From 0fafefc07860bff250490b415c2fef98f1b56ce4 Mon Sep 17 00:00:00 2001 From: penguindark <57967770+penguindark@users.noreply.github.com> Date: Tue, 12 Oct 2021 05:03:23 +0200 Subject: [PATCH] regex: bug fix on find groups indexes (#12152) --- vlib/regex/regex_test.v | 61 +++++++++++++++++++++++++++++++++++++++++ vlib/regex/regex_util.v | 18 +++++++++--- 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/vlib/regex/regex_test.v b/vlib/regex/regex_test.v index e9a3baf1ce..bc1be2dfbb 100644 --- a/vlib/regex/regex_test.v +++ b/vlib/regex/regex_test.v @@ -645,3 +645,64 @@ fn test_quantifier_sequences(){ assert re_err == regex.err_syntax_error } } + +// test group index in find +struct Test_find_groups { + src string + q string + s int // start index + e int // end index + res []int // groups indexes +} +const ( +find_groups_test_suite = [ + Test_find_groups{ + "aabbbccccdd", + r"(b+)(c+)", + 2, + 9, + [2, 5, 5, 9], + }, + Test_find_groups{ + "aabbbccccdd", + r"(a+).*(c+)", + 0, + 9, + [0, 2, 5, 9], + }, + Test_find_groups{ + "aabbbccccdd", + r"((b+).*)(d+)", + 2, + 11, + [2, 9, 2, 5, 9, 11], + }, +] +) +fn test_groups_in_find(){ + for test_obj in find_groups_test_suite { + src_text := test_obj.src + query := test_obj.q + mut re := regex.regex_opt(query) or { panic(err) } + start, end := re.find(src_text) + // Debug print do not remove!! + /* + println("---------") + println("src_text:[${src_text}]") + println("query :[${query}]") + println("[${start}, ${end}]") + println(re.groups) + mut gi := 0 + for gi < re.groups.len { + if re.groups[gi] >= 0 { + println('${gi / 2} :[${src_text[re.groups[gi]..re.groups[gi + 1]]}]') + } + gi += 2 + } + */ + // check + assert start == test_obj.s + assert end == test_obj.e + assert re.groups == test_obj.res + } +} \ No newline at end of file diff --git a/vlib/regex/regex_util.v b/vlib/regex/regex_util.v index 060d01dabb..fa78f63499 100644 --- a/vlib/regex/regex_util.v +++ b/vlib/regex/regex_util.v @@ -194,6 +194,11 @@ pub fn (mut re RE) find(in_txt string) (int, int) { if s >= 0 && e > s { // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") // re.flag = old_flag + mut gi := 0 + for gi < re.groups.len { + re.groups[gi] += i + gi++ + } return i + s, i + e } i++ @@ -229,6 +234,11 @@ pub fn (mut re RE) find_from(in_txt string, start int) (int, int) { if s >= 0 && e > s { // println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]") re.flag = old_flag + mut gi := 0 + for gi < re.groups.len { + re.groups[gi] += i + gi++ + } return i + s, i + e } else { i++ @@ -354,12 +364,12 @@ pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string { if last_end < s { res.write_string(in_txt[last_end..s]) } - + /* for g_i in 0 .. re.group_count { re.groups[g_i << 1] += i re.groups[(g_i << 1) + 1] += i } - + */ repl := repl_fn(re, in_txt, s, e) // println("repl res: $repl") res.write_string(repl) @@ -416,12 +426,12 @@ pub fn (mut re RE) replace(in_txt string, repl_str string) string { if last_end < s { res.write_string(in_txt[last_end..s]) } - + /* for g_i in 0 .. re.group_count { re.groups[g_i << 1] += i re.groups[(g_i << 1) + 1] += i } - + */ // repl := repl_fn(re, in_txt, s, e) repl := re.parsed_replace_string(in_txt, repl_str) // println("repl res: $repl")