From 2bc1076921c0633ec9dc20be0c54b148568b2a67 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Tue, 8 Dec 2020 19:38:25 +0100 Subject: [PATCH] regex: bug fixes, improved tests --- vlib/regex/regex.v | 13 ++++++++ vlib/regex/regex_test.v | 74 +++++++++++++++++++++++++++++------------ 2 files changed, 66 insertions(+), 21 deletions(-) diff --git a/vlib/regex/regex.v b/vlib/regex/regex.v index b5e393a1fe..44ea32420b 100644 --- a/vlib/regex/regex.v +++ b/vlib/regex/regex.v @@ -1814,6 +1814,12 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { re.groups[g_index] = 0 } re.groups[g_index+1] = i + + // if a group end with a dot, manage the not increased char index + if i == re.groups[g_index] { + re.groups[g_index+1] = i+1 + } + //println("GROUP ${re.prog[pc].group_id} END [${re.groups[g_index]}, ${re.groups[g_index+1]}]") // continuous save, save until we have space @@ -2092,6 +2098,13 @@ pub fn (mut re RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) { re.prog[tmp_pc].group_rep = 0 // clear the repetitions group_index-- m_state = .ist_next + + // if dot char manage advance of the group + if l_ist == u32(ist_dot_char) { + //print("dot char next char") + i+=char_len + } + continue } else if rep >= re.prog[tmp_pc].rep_min { diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index e074268eea..aa5d334bbf 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -190,6 +190,18 @@ cgroups_test_suite = [ [3, 0, 0, 4, 1, 7, 11, 1, 11, 16], {'format':int(0)} }, + TestItemCGroup{ + "acc +13 pippo", + r"(\w+)\s(.)([0-9]+) \w+",0,13, + [0, 3, 4, 5, 5, 7], + map[string]int{} + }, + TestItemCGroup{ + "acc +13", + r"(\w+)\s(.)([0-9]+)",0,7, + [0, 3, 4, 5, 5, 7], + map[string]int{} + }, ] ) @@ -210,7 +222,12 @@ fn test_regex(){ continue } - re.group_csave = [-1].repeat(3*20+1) + if to.cgn.len > 0 { + re.group_csave = [-1].repeat(3*20+1) + if debug { println("continuous save")} + } else { + if debug { println("NO continuous save")} + } start, end := re.match_string(to.src) @@ -225,31 +242,46 @@ fn test_regex(){ C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e) assert false continue - } + } // check cgroups - if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] { - println("Capturing group len error! ${re.group_csave[0]}") - assert false - continue - } - - // check captured groups - mut ln := re.group_csave[0]*3 - for ln > 0 { - if re.group_csave[ln] != to.cg[ln] { - assert false - } - ln-- - } - - // check named captured groups - for k in to.cgn.keys() { - if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1 - println("Named capturing group error! [$k]") + if to.cgn.len > 0 { + if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] { + println("Capturing group len error! ${re.group_csave[0]}") assert false continue } + + // check captured groups + mut ln := re.group_csave[0]*3 + for ln > 0 { + if re.group_csave[ln] != to.cg[ln] { + assert false + } + ln-- + } + + // check named captured groups + for k in to.cgn.keys() { + if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1 + println("Named capturing group error! [$k]") + assert false + continue + } + } + } else { + // check normal captured groups + if re.groups.len != to.cg.len { + assert false + } + for ln:=0; ln < re.groups.len; ln++ { + if re.groups[ln] != to.cg[ln] { + println("Capture group doesn't match:") + println("true ground: [${to.cg}]") + println("elaborated : [${re.groups}]") + assert false + } + } } }