mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
regex: fix a bug for CC token not correctly parsed in groups (#16272)
This commit is contained in:
parent
47a10f3181
commit
362adfae3a
@ -259,6 +259,7 @@ mut:
|
||||
// dot_char token variables
|
||||
dot_check_pc int = -1 // pc of the next token to check for dots
|
||||
bsls_check_pc int = -1 // pc of the next token to check for bsls
|
||||
cc_check_pc int = -1 // pc of the next token to check for CC
|
||||
last_dot_flag bool // if true indicate that is the last dot_char in the regex
|
||||
// debug fields
|
||||
source_index int
|
||||
@ -1270,7 +1271,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
|
||||
pc1++
|
||||
}
|
||||
|
||||
// println("last_dot_char_pc: $last_dot_char_pc")
|
||||
// println("last_dot_char_pc: ${last_dot_char_pc}")
|
||||
if last_dot_char_pc >= 0 {
|
||||
pc1 = last_dot_char_pc + 1
|
||||
mut is_last_dot := true
|
||||
@ -1313,7 +1314,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
|
||||
pc1++
|
||||
}
|
||||
|
||||
// println('last_bsls_char_pc: $last_bsls_char_pc')
|
||||
// println('last_bsls_char_pc: ${last_bsls_char_pc}')
|
||||
if last_bsls_char_pc >= 0 {
|
||||
pc1 = last_bsls_char_pc + 1
|
||||
mut is_last_bsls := true
|
||||
@ -1329,6 +1330,46 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// manage CC
|
||||
//
|
||||
pc1 = 0
|
||||
mut cc_char_count := 0
|
||||
mut last_cc_char_pc := -1
|
||||
for pc1 < pc {
|
||||
if re.prog[pc1].ist in [rune(regex.ist_char_class_pos), regex.ist_char_class_neg] {
|
||||
last_cc_char_pc = pc1
|
||||
cc_char_count++
|
||||
mut pc2 := pc1 + 1
|
||||
for pc2 < pc {
|
||||
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
|
||||
regex.ist_group_start] {
|
||||
// println("Next CC check is PC: ${pc2}")
|
||||
re.prog[pc1].cc_check_pc = pc2
|
||||
break
|
||||
}
|
||||
pc2++
|
||||
}
|
||||
}
|
||||
pc1++
|
||||
}
|
||||
|
||||
// println('last_cc_char_pc: ${last_cc_char_pc}')
|
||||
if last_cc_char_pc >= 0 {
|
||||
pc1 = last_cc_char_pc + 1
|
||||
mut is_last_cc := true
|
||||
for pc1 < pc {
|
||||
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
|
||||
is_last_cc = false
|
||||
break
|
||||
}
|
||||
pc1++
|
||||
}
|
||||
if is_last_cc {
|
||||
re.prog[last_cc_char_pc].last_dot_flag = true
|
||||
}
|
||||
}
|
||||
|
||||
//******************************************
|
||||
|
||||
// OR branch
|
||||
@ -1417,6 +1458,9 @@ pub fn (re RE) get_code() string {
|
||||
ist := tk.ist
|
||||
if ist == regex.ist_bsls_char {
|
||||
res.write_string('[\\${tk.ch:1c}] BSLS')
|
||||
if tk.last_dot_flag == true {
|
||||
res.write_string(' last!')
|
||||
}
|
||||
} else if ist == regex.ist_prog_end {
|
||||
res.write_string('PROG_END')
|
||||
stop_flag = true
|
||||
@ -1424,8 +1468,14 @@ pub fn (re RE) get_code() string {
|
||||
res.write_string('OR ')
|
||||
} else if ist == regex.ist_char_class_pos {
|
||||
res.write_string('[${re.get_char_class(pc1)}] CHAR_CLASS_POS')
|
||||
if tk.last_dot_flag == true {
|
||||
res.write_string(' last!')
|
||||
}
|
||||
} else if ist == regex.ist_char_class_neg {
|
||||
res.write_string('[^${re.get_char_class(pc1)}] CHAR_CLASS_NEG')
|
||||
if tk.last_dot_flag == true {
|
||||
res.write_string(' last!')
|
||||
}
|
||||
} else if ist == regex.ist_dot_char {
|
||||
res.write_string('. DOT_CHAR nx chk: $tk.dot_check_pc')
|
||||
if tk.last_dot_flag == true {
|
||||
@ -1788,9 +1838,15 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
|
||||
// println("Finished text!!")
|
||||
src_end = true
|
||||
|
||||
// we have fished the text, we must manage out pf bound indexes
|
||||
if state.i >= in_txt_len {
|
||||
state.i = in_txt_len - 1
|
||||
}
|
||||
|
||||
// manage groups
|
||||
if state.group_index >= 0 && state.match_index >= 0 {
|
||||
// println("End text with open groups!")
|
||||
// println("state.group_index: ${state.group_index}")
|
||||
// close the groups
|
||||
for state.group_index >= 0 {
|
||||
tmp_pc := re.group_data[state.group_index]
|
||||
@ -1804,15 +1860,13 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
|
||||
|
||||
// save group results
|
||||
g_index := re.prog[tmp_pc].group_id * 2
|
||||
// println("group_id: ${re.prog[tmp_pc].group_id} g_index: ${g_index}")
|
||||
if start_i >= 0 {
|
||||
re.groups[g_index] = start_i
|
||||
} else {
|
||||
re.groups[g_index] = 0
|
||||
}
|
||||
// we have fished the text, we must manage out pf bound indexes
|
||||
if state.i >= in_txt_len {
|
||||
state.i = in_txt_len - 1
|
||||
}
|
||||
|
||||
re.groups[g_index + 1] = state.i
|
||||
|
||||
if re.groups[g_index + 1] >= in_txt_len {
|
||||
@ -1827,6 +1881,8 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
|
||||
}
|
||||
}
|
||||
|
||||
// println("re.groups: ${re.groups}")
|
||||
|
||||
// the text is finished and the groups closed and we are the last group, ok exit
|
||||
if ist == regex.ist_group_end && re.prog[state.pc + 1].ist == regex.ist_prog_end {
|
||||
// println("Last group end")
|
||||
@ -1847,12 +1903,17 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
|
||||
return state.first_match, state.i
|
||||
}
|
||||
|
||||
// we are in a last dot_ char case
|
||||
if l_ist == regex.ist_dot_char {
|
||||
// println("***** We have a last dot_char")
|
||||
if l_ist in [
|
||||
rune(regex.ist_char_class_neg),
|
||||
regex.ist_char_class_pos,
|
||||
regex.ist_bsls_char,
|
||||
regex.ist_dot_char,
|
||||
] {
|
||||
// println("***** We have a last special token")
|
||||
// println("PC: ${state.pc} last_dot_flag:${re.prog[state.pc].last_dot_flag}")
|
||||
// println("rep: ${re.prog[state.pc].group_rep} min: ${re.prog[state.pc].rep_min} max: ${re.prog[state.pc].rep_max}")
|
||||
// println("first match: ${state.first_match}")
|
||||
|
||||
if re.prog[state.pc].last_dot_flag == true
|
||||
&& re.prog[state.pc].rep >= re.prog[state.pc].rep_min
|
||||
&& re.prog[state.pc].rep <= re.prog[state.pc].rep_max {
|
||||
@ -1860,10 +1921,6 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
|
||||
}
|
||||
// println("Not fitted!!")
|
||||
}
|
||||
|
||||
// m_state = .end
|
||||
// break
|
||||
|
||||
// no groups open, check the last token quantifier
|
||||
if ist != regex.ist_group_end && re.prog[state.pc + 1].ist == regex.ist_prog_end {
|
||||
if re.prog[state.pc].rep >= re.prog[state.pc].rep_min
|
||||
@ -1873,7 +1930,7 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
|
||||
}
|
||||
}
|
||||
|
||||
// print("No good exit!!")
|
||||
// println("No good exit!!")
|
||||
return regex.no_match_found, state.i
|
||||
}
|
||||
|
||||
|
@ -364,6 +364,18 @@ find_all_test_suite = [
|
||||
r"\+{3}.*\+{3}",
|
||||
[0, 11, 18, 32, 33, 44],
|
||||
['+++pippo+++', '+++ pippo2 +++', '+++ oggi+++']
|
||||
},
|
||||
Test_find_all{
|
||||
"ab",
|
||||
r"[^\n]*",
|
||||
[0, 2],
|
||||
['ab']
|
||||
},
|
||||
Test_find_all{
|
||||
"ab",
|
||||
r"([^\n]*)",
|
||||
[0, 2],
|
||||
['ab']
|
||||
}
|
||||
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user