2020-08-08 09:04:12 +03:00
|
|
|
module regex
|
2021-05-08 13:32:29 +03:00
|
|
|
|
2020-08-08 09:04:12 +03:00
|
|
|
import strings
|
|
|
|
|
|
|
|
// compile_opt compile RE pattern string
|
2023-03-02 16:49:50 +03:00
|
|
|
pub fn (mut re RE) compile_opt(pattern string) ! {
|
2021-05-08 13:32:29 +03:00
|
|
|
re_err, err_pos := re.impl_compile(pattern)
|
|
|
|
|
2020-08-08 09:04:12 +03:00
|
|
|
if re_err != compile_ok {
|
|
|
|
mut err_msg := strings.new_builder(300)
|
2022-11-15 16:53:13 +03:00
|
|
|
err_msg.write_string('\nquery: ${pattern}\n')
|
2021-05-08 13:32:29 +03:00
|
|
|
line := '-'.repeat(err_pos)
|
2022-11-15 16:53:13 +03:00
|
|
|
err_msg.write_string('err : ${line}^\n')
|
2020-12-05 03:51:48 +03:00
|
|
|
err_str := re.get_parse_error_string(re_err)
|
2022-11-15 16:53:13 +03:00
|
|
|
err_msg.write_string('ERROR: ${err_str}\n')
|
2020-08-08 09:04:12 +03:00
|
|
|
return error_with_code(err_msg.str(), re_err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// new_regex create a RE of small size, usually sufficient for ordinary use
|
|
|
|
pub fn new() RE {
|
2020-12-24 08:27:46 +03:00
|
|
|
// init regex
|
2021-05-08 13:32:29 +03:00
|
|
|
mut re := RE{}
|
|
|
|
re.prog = []Token{len: max_code_len + 1} // max program length, can not be longer then the pattern
|
|
|
|
re.cc = []CharClass{len: max_code_len} // can not be more char class the the length of the pattern
|
|
|
|
re.group_csave_flag = false // enable continuos group saving
|
|
|
|
re.group_max_nested = 128 // set max 128 group nested
|
|
|
|
re.group_max = max_code_len >> 1 // we can't have more groups than the half of the pattern legth
|
2020-12-24 08:27:46 +03:00
|
|
|
|
2021-05-08 13:32:29 +03:00
|
|
|
re.group_stack = []int{len: re.group_max, init: -1}
|
|
|
|
re.group_data = []int{len: re.group_max, init: -1}
|
2020-12-24 08:27:46 +03:00
|
|
|
|
|
|
|
return re
|
2020-08-08 09:04:12 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// regex_opt create new RE object from RE pattern string
|
2023-03-02 16:49:50 +03:00
|
|
|
pub fn regex_opt(pattern string) !RE {
|
2020-12-20 06:52:02 +03:00
|
|
|
// init regex
|
2021-05-08 13:32:29 +03:00
|
|
|
mut re := RE{}
|
|
|
|
re.prog = []Token{len: pattern.len + 1} // max program length, can not be longer then the pattern
|
|
|
|
re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern
|
|
|
|
re.group_csave_flag = false // enable continuos group saving
|
2021-12-01 09:38:50 +03:00
|
|
|
re.group_max_nested = pattern.len >> 1 // set max 128 group nested
|
2021-05-08 13:32:29 +03:00
|
|
|
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth
|
2020-12-20 06:52:02 +03:00
|
|
|
|
2021-05-08 13:32:29 +03:00
|
|
|
re.group_stack = []int{len: re.group_max, init: -1}
|
|
|
|
re.group_data = []int{len: re.group_max, init: -1}
|
2020-12-22 23:34:46 +03:00
|
|
|
|
2021-05-08 13:32:29 +03:00
|
|
|
// compile the pattern
|
2023-03-02 16:49:50 +03:00
|
|
|
re.compile_opt(pattern)!
|
2020-12-20 06:52:02 +03:00
|
|
|
|
2021-05-08 13:32:29 +03:00
|
|
|
return re
|
2020-08-08 09:04:12 +03:00
|
|
|
}
|