1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

regex: bugfix for #18363, [^\s]+ act different from \S+ (#18371)

This commit is contained in:
penguindark 2023-06-09 13:34:06 +02:00 committed by GitHub
parent 5300441c09
commit 1de6523da5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 79 additions and 1 deletions

View File

@ -2202,6 +2202,77 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
} }
// char class IST // char class IST
else if ist == regex.ist_char_class_pos || ist == regex.ist_char_class_neg { else if ist == regex.ist_char_class_pos || ist == regex.ist_char_class_neg {
// check next token to be false
mut next_check_flag := false
// if we are done with max go on dot char are dedicated case!!
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max {
re.state_list.pop()
m_state = .ist_next
continue
}
if re.prog[state.pc].last_dot_flag == false && re.prog[state.pc].cc_check_pc >= 0
&& re.prog[state.pc].rep >= re.prog[state.pc].rep_min {
// load the char
// ch_t, _ := re.get_charb(in_txt, state.i+char_len)
ch_t := ch
chk_pc := re.prog[state.pc].cc_check_pc
// simple char
if re.prog[chk_pc].ist == regex.ist_simple_char {
if re.prog[chk_pc].ch == ch_t {
next_check_flag = true
}
// println("Check [ist_simple_char] [${re.prog[chk_pc].ch}]==[${ch_t:c}] => $next_check_flag")
}
// char char_class
else if re.prog[chk_pc].ist == regex.ist_char_class_pos
|| re.prog[chk_pc].ist == regex.ist_char_class_neg {
mut cc_neg := false
if re.prog[chk_pc].ist == regex.ist_char_class_neg {
cc_neg = true
}
mut cc_res := re.check_char_class(chk_pc, ch_t)
if cc_neg {
cc_res = !cc_res
}
next_check_flag = cc_res
// println("Check [ist_char_class] => $next_check_flag")
}
// check bsls
else if re.prog[chk_pc].ist == regex.ist_bsls_char {
next_check_flag = re.prog[chk_pc].validator(u8(ch_t))
// println("Check [ist_bsls_char] => $next_check_flag")
}
}
// check if we must continue or pass to the next IST
if next_check_flag == true && re.prog[state.pc + 1].ist != regex.ist_prog_end {
// println("save the state!!")
mut dot_state := StateObj{
group_index: state.group_index
match_flag: state.match_flag
match_index: state.match_index
first_match: state.first_match
pc: state.pc
i: state.i + char_len
char_len: char_len
last_dot_pc: state.pc
}
// if we are managing a \[something]* stay on the same char on return
if re.prog[state.pc].rep_min == 0 {
dot_state.i -= char_len
}
re.state_list << dot_state
m_state = .ist_quant_n
// println("dot_char stack len: ${re.state_list.len}")
continue
}
state.match_flag = false state.match_flag = false
mut cc_neg := false mut cc_neg := false

View File

@ -105,7 +105,6 @@ match_test_suite = [
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0}, TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
TestItem{"/home/us_er/pippo/info-01.jpeg", r"(/?[-\w_]+)*\.txt$",-1,26}
// check unicode // check unicode
TestItem{"this is a test",r".*a [-Ⅵ ]+",0,34}, TestItem{"this is a test",r".*a [-Ⅵ ]+",0,34},
@ -174,6 +173,14 @@ match_test_suite = [
TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26}, TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26},
TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26}, TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26},
TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26}, TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26},
// test \S+ vs [^\s]+
TestItem{"ab.c", r"\S+\.",0,3},
TestItem{"ab.c", r"[^\s]+\.",0,3},
TestItem{"ab.c", r"\S*\.",0,3},
TestItem{"ab.c", r"[^\s]*\.",0,3},
TestItem{"ab c", r"[\S]+\s",0,3},
TestItem{"ab c", r"[^\s]+\s",0,3},
] ]
) )