v/vlib/regex/regex_util.v

/*

regex 1.0 alpha

Copyright (c) 2019-2021 Dario Deledda. All rights reserved.
Use of this source code is governed by an MIT license
that can be found in the LICENSE file.

*/
module regex
import strings

/******************************************************************************
*
* Inits
*
******************************************************************************/
// regex create a regex object from the query string, retunr RE object and errors as re_err, err_pos
pub fn regex_base(pattern string) (RE,int,int){
	// init regex
    mut re := regex.RE{}
    re.prog = []Token    {len: pattern.len + 1} // max program length, can not be longer then the pattern
    re.cc   = []CharClass{len: pattern.len}     // can not be more char class the the length of the pattern
    re.group_csave_flag = false                 // enable continuos group saving
    re.group_max_nested = 128                   // set max 128 group nested
    re.group_max        = pattern.len >> 1      // we can't have more groups than the half of the pattern legth

    re.group_stack = []int{len: re.group_max, init: -1}
	re.group_data  = []int{len: re.group_max, init: -1}

	re_err,err_pos := re.impl_compile(pattern)
	return re, re_err, err_pos
}

/******************************************************************************
*
* Utilities
*
******************************************************************************/
// get_group_bounds_by_name get a group boundaries by its name
pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {
	if group_name in re.group_map {
		tmp_index := re.group_map[group_name]-1
		start     := re.groups[tmp_index * 2]
		end       := re.groups[tmp_index * 2 + 1]
		return start,end
	}
	return -1, -1
}

// get_group_by_name get a group boundaries by its name
pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {
	if group_name in re.group_map {
		tmp_index := re.group_map[group_name]-1
		start     := re.groups[tmp_index * 2]
		end       := re.groups[tmp_index * 2 + 1]
		if start >= 0 && end > start {
			return in_txt[start..end]
		}
	}
	return ""
}

// get_group_by_id get a group string by its id
pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {
	if group_id < (re.groups.len >> 1) {
		index := group_id << 1
		start := re.groups[index]
		end   := re.groups[index + 1]
		if start >= 0 && end > start {
			return in_txt[start..end]
		}
	}
	return ""
}

// get_group_by_id get a group boundaries by its id
pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {
	if group_id < re.group_count {
		index := group_id << 1
		return re.groups[index], re.groups[index + 1]
	}
	return -1, -1
}

pub
struct Re_group {
pub:
	start int = -1
	end   int = -1
}

// get_group_list return a list of Re_group for the found groups
pub fn (re RE) get_group_list() []Re_group {
	mut res := []Re_group{len: re.groups.len >> 1}
	mut gi := 0
	//println("len: ${re.groups.len} groups: ${re.groups}")

	for gi < re.groups.len {
		if re.groups[gi] >= 0 {
			txt_st := re.groups[gi]
            txt_en := re.groups[gi+1]

            //println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")
            if txt_st >= 0 && txt_en > txt_st {
				tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}
				//println(tmp)
				res[gi >> 1] = tmp
			} else {
				res[gi >> 1] = Re_group{}
			}
		}
		gi += 2
	}
	return res
}

/******************************************************************************
*
* Matchers
*
******************************************************************************/
// match_string Match the pattern with the in_txt string
[direct_array_access]
pub fn (mut re RE) match_string(in_txt string) (int,int) {

	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
	if end > in_txt.len {
		end = in_txt.len
	}

	if start >= 0 && end > start {
		if (re.flag & f_ms) != 0 && start > 0 {
			return no_match_found, 0
		}
		if (re.flag & f_me) != 0 && end < in_txt.len {
			if in_txt[end] in new_line_list {
				return start, end
			}
			return no_match_found, 0
		}
		return start, end
	}
	return start, end
}


/******************************************************************************
*
* Finders
*
******************************************************************************/
/*
// find internal implementation HERE for reference do not remove!!
[direct_array_access]
fn (mut re RE) find_imp(in_txt string) (int,int) {
	old_flag := re.flag
	re.flag |= f_src  // enable search mode

	start, mut end := re.match_base(in_txt.str, in_txt.len + 1)
	//print("Find [$start,$end] '${in_txt[start..end]}'")
	if end > in_txt.len {
		end = in_txt.len
	}
	re.flag = old_flag

	if start >= 0 && end > start {
		return start, end
	}
	return no_match_found, 0
}
*/

// find try to find the first match in the input string
[direct_array_access]
pub fn (mut re RE) find(in_txt string) (int,int) {
	//old_flag := re.flag
	//re.flag |= f_src  // enable search mode

	mut i := 0
	for i < in_txt.len {
		//--- speed references ---
		mut s := -1
		mut e := -1
		unsafe {
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
			s,e = re.match_string(tmp_str)
		}
		//------------------------
		//s,e := re.find_imp(in_txt[i..])
		//------------------------
		if s >= 0 && e > s {
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
			//re.flag = old_flag
			return i+s, i+e
		} else {
			i++
		}

	}
	//re.flag = old_flag
	return -1, -1
}

// find try to find the first match in the input string strarting from start index
[direct_array_access]
pub fn (mut re RE) find_from(in_txt string, start int) (int,int) {
	old_flag := re.flag
	re.flag |= f_src  // enable search mode

	mut i := start
	if i < 0 {
		return -1, -1
	}
	for i < in_txt.len {
		//--- speed references ---
				
		mut s := -1
		mut e := -1
		
		unsafe {
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
			s,e = re.match_string(tmp_str)
		}
		//------------------------
		//s,e = re.find_imp(in_txt[i..])
		//------------------------
		if s >= 0 && e > s {
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")
			re.flag = old_flag
			return i+s, i+e
		} else {
			i++
		}

	}
	re.flag = old_flag
	return -1, -1
}

// find_all find all the non overlapping occurrences of the match pattern
[direct_array_access]
pub fn (mut re RE) find_all(in_txt string) []int {
	//old_flag := re.flag
	//re.flag |= f_src  // enable search mode

	mut i := 0
	mut res := []int{}
	mut ls := -1

	for i < in_txt.len {
		//--- speed references ---
		mut s := -1
		mut e := -1
		unsafe {
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
			s,e = re.match_string(tmp_str)
		}
		//------------------------
		//s,e := re.find_imp(in_txt[i..])
		//------------------------
		if s >= 0 && e > s && i+s > ls {
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
			res << i+s
			res << i+e
			ls = i+s
			i = i+e
			continue
		} else {
			i++
		}

	}
	//re.flag = old_flag
	return res
}

// find_all_str find all the non overlapping occurrences of the match pattern, return a string list
[direct_array_access]
pub fn (mut re RE) find_all_str(in_txt string) []string {
	mut i := 0
	mut res := []string{}
	mut ls := -1

	for i < in_txt.len {
		//--- speed references ---
		mut s := -1
		mut e := -1
		unsafe {
			tmp_str := tos(in_txt.str+i, in_txt.len-i)
			s,e = re.find(tmp_str)
		}
		//------------------------
		//s,e := re.find(in_txt[i..])
		//------------------------
		if s >= 0 && e > s && i+s > ls {
			//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")
			res << in_txt[i+s..i+e]
			ls = i+s
			i = i+e
			continue
		} else {
			i++
		}

	}
	return res
}
/******************************************************************************
*
* Replacers
*
******************************************************************************/
// replace_simple return a string where the matches are replaced with the replace string
pub fn (mut re RE) replace_simple(in_txt string, repl string) string {
	pos := re.find_all(in_txt)

	if pos.len > 0 {
		mut res := ""
		mut i := 0

		mut s1 := 0
		mut e1 := in_txt.len

		for i < pos.len {
			e1 = pos[i]
			res += in_txt[s1..e1] + repl
			s1 = pos[i+1]
			i += 2
		}

		res += in_txt[s1..]
		return res
	}
	return in_txt
}


// type of function used for custom replace
// in_txt  source text
// start   index of the start of the match in in_txt
// end     index of the end   of the match in in_txt
// the match is in in_txt[start..end]
pub type FnReplace = fn (re RE, in_txt string, start int, end int) string

// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function
pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {
	mut i   := 0
	mut res := strings.new_builder(in_txt.len)
	mut last_end    := 0

	for i < in_txt.len {
		//println("Find Start. $i [${in_txt[i..]}]")
		s, e := re.find_from(in_txt,i)
		//println("Find End.")
		if s >= 0 && e > s  {
			//println("find match in: ${s},${e} [${in_txt[s..e]}]")
			
			if last_end < s {
				res.write_string(in_txt[last_end..s])
			}

			for g_i in 0..re.group_count {
				re.groups[g_i << 1      ] += i
				re.groups[(g_i << 1) + 1] += i
			}
			
			repl := repl_fn(re, in_txt, s, e)
			//println("repl res: $repl")
			res.write_string(repl)
			//res.write_string("[[${in_txt[s..e]}]]")
			
			last_end = e
			i = e
		} else {
			break
			//i++
		}
		//println(i)
	}
	if last_end >= 0 && last_end < in_txt.len {
		res.write_string(in_txt[last_end..])
	}
	return res.str()
}


fn (re RE) parsed_replace_string(in_txt string, repl string) string {
	str_lst := repl.split("\\")
	mut res := str_lst[0]
	mut i := 1
	for i < str_lst.len {
		tmp := str_lst[i]
		//println("tmp: ${tmp}")
		if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
			group_id := int(tmp[0] - `0`)
			group := re.get_group_by_id(in_txt, group_id)
			//println("group: $group_id [$group]")
			res += "${group}${tmp[1..]}"
		} else {
			res += '\\'+tmp
		}
		i++
	}
	return res
}

// replace return a string where the matches are replaced with the repl_str string, 
// this function support use groups in the replace string
pub fn (mut re RE) replace(in_txt string, repl_str string) string {
	mut i   := 0
	mut res := strings.new_builder(in_txt.len)
	mut last_end    := 0

	for i < in_txt.len {
		//println("Find Start. $i [${in_txt[i..]}]")
		s, e := re.find_from(in_txt,i)
		//println("Find End.")
		if s >= 0 && e > s  {
			//println("find match in: ${s},${e} [${in_txt[s..e]}]")
			
			if last_end < s {
				res.write_string(in_txt[last_end..s])
			}

			for g_i in 0..re.group_count {
				re.groups[g_i << 1      ] += i
				re.groups[(g_i << 1) + 1] += i
			}
			
			//repl := repl_fn(re, in_txt, s, e)
			repl := re.parsed_replace_string(in_txt, repl_str)
			//println("repl res: $repl")
			res.write_string(repl)
			//res.write_string("[[${in_txt[s..e]}]]")
			
			last_end = e
			i = e
		} else {
			break
			//i++
		}
		//println(i)
	}
	if last_end >= 0 && last_end < in_txt.len {
		res.write_string(in_txt[last_end..])
	}
	return res.str()
}
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`/*`

			`regex 1.0 alpha`

all: update copyright to 2019-2021 (#8029) 2021-01-18 15:20:06 +03:00			`Copyright (c) 2019-2021 Dario Deledda. All rights reserved.`
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`Use of this source code is governed by an MIT license`
			`that can be found in the LICENSE file.`

			`*/`
			`module regex`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`import strings`
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00
			`/******************************************************************************`
			`*`
			`* Inits`
			`*`
			`******************************************************************************/`
regex: remove [deprecated] functions/methods, code clean, add test for regex_base (#8862) 2021-02-20 22:39:08 +03:00			`// regex create a regex object from the query string, retunr RE object and errors as re_err, err_pos`
			`pub fn regex_base(pattern string) (RE,int,int){`
regex: speed optimization 2 (#7473) 2020-12-22 23:34:46 +03:00			`// init regex`
			`mut re := regex.RE{}`
			`re.prog = []Token {len: pattern.len + 1} // max program length, can not be longer then the pattern`
			`re.cc = []CharClass{len: pattern.len} // can not be more char class the the length of the pattern`
			`re.group_csave_flag = false // enable continuos group saving`
			`re.group_max_nested = 128 // set max 128 group nested`
			`re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern legth`

			`re.group_stack = []int{len: re.group_max, init: -1}`
			`re.group_data = []int{len: re.group_max, init: -1}`

regex: remove [deprecated] functions/methods, code clean, add test for regex_base (#8862) 2021-02-20 22:39:08 +03:00			`re_err,err_pos := re.impl_compile(pattern)`
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`return re, re_err, err_pos`
			`}`

			`/******************************************************************************`
			`*`
			`* Utilities`
			`*`
			`******************************************************************************/`
			`// get_group_bounds_by_name get a group boundaries by its name`
			`pub fn (re RE) get_group_bounds_by_name(group_name string) (int, int) {`
			`if group_name in re.group_map {`
			`tmp_index := re.group_map[group_name]-1`
			`start := re.groups[tmp_index * 2]`
			`end := re.groups[tmp_index * 2 + 1]`
			`return start,end`
			`}`
			`return -1, -1`
			`}`

			`// get_group_by_name get a group boundaries by its name`
			`pub fn (re RE) get_group_by_name(in_txt string, group_name string) string {`
			`if group_name in re.group_map {`
			`tmp_index := re.group_map[group_name]-1`
			`start := re.groups[tmp_index * 2]`
			`end := re.groups[tmp_index * 2 + 1]`
regex: added groups in replace strings (#9576) 2021-04-03 23:16:56 +03:00			`if start >= 0 && end > start {`
			`return in_txt[start..end]`
			`}`
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`}`
			`return ""`
			`}`

			`// get_group_by_id get a group string by its id`
			`pub fn (re RE) get_group_by_id(in_txt string, group_id int) string {`
			`if group_id < (re.groups.len >> 1) {`
			`index := group_id << 1`
			`start := re.groups[index]`
			`end := re.groups[index + 1]`
regex: added groups in replace strings (#9576) 2021-04-03 23:16:56 +03:00			`if start >= 0 && end > start {`
			`return in_txt[start..end]`
			`}`
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`}`
			`return ""`
			`}`

			`// get_group_by_id get a group boundaries by its id`
			`pub fn (re RE) get_group_bounds_by_id(group_id int) (int,int) {`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`if group_id < re.group_count {`
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`index := group_id << 1`
			`return re.groups[index], re.groups[index + 1]`
			`}`
			`return -1, -1`
			`}`

			`pub`
			`struct Re_group {`
			`pub:`
			`start int = -1`
			`end int = -1`
			`}`

			`// get_group_list return a list of Re_group for the found groups`
			`pub fn (re RE) get_group_list() []Re_group {`
			`mut res := []Re_group{len: re.groups.len >> 1}`
			`mut gi := 0`
			`//println("len: ${re.groups.len} groups: ${re.groups}")`
regex: remove undocumented deprecated calls, optimize speed and memory usage (#7582) 2020-12-26 23:08:56 +03:00
regex: refactoring, documentation, examples (#7418) 2020-12-20 06:52:02 +03:00			`for gi < re.groups.len {`
			`if re.groups[gi] >= 0 {`
			`txt_st := re.groups[gi]`
			`txt_en := re.groups[gi+1]`

			`//println("#${gi/2} start: ${re.groups[gi]} end: ${re.groups[gi + 1]} ")`
			`if txt_st >= 0 && txt_en > txt_st {`
			`tmp := Re_group{ start: re.groups[gi], end: re.groups[gi + 1]}`
			`//println(tmp)`
			`res[gi >> 1] = tmp`
			`} else {`
			`res[gi >> 1] = Re_group{}`
			`}`
			`}`
			`gi += 2`
			`}`
			`return res`
			`}`

regex: remove [deprecated] functions/methods, code clean, add test for regex_base (#8862) 2021-02-20 22:39:08 +03:00			`/******************************************************************************`
			`*`
			`* Matchers`
			`*`
			`******************************************************************************/`
			`// match_string Match the pattern with the in_txt string`
			`[direct_array_access]`
			`pub fn (mut re RE) match_string(in_txt string) (int,int) {`

			`start, mut end := re.match_base(in_txt.str, in_txt.len + 1)`
			`if end > in_txt.len {`
			`end = in_txt.len`
			`}`

			`if start >= 0 && end > start {`
			`if (re.flag & f_ms) != 0 && start > 0 {`
			`return no_match_found, 0`
			`}`
			`if (re.flag & f_me) != 0 && end < in_txt.len {`
			`if in_txt[end] in new_line_list {`
			`return start, end`
			`}`
			`return no_match_found, 0`
			`}`
			`return start, end`
			`}`
			`return start, end`
			`}`


regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`/******************************************************************************`
			`*`
			`* Finders`
			`*`
			`******************************************************************************/`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`/*`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`// find internal implementation HERE for reference do not remove!!`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`[direct_array_access]`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`fn (mut re RE) find_imp(in_txt string) (int,int) {`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`old_flag := re.flag`
			`re.flag \|= f_src // enable search mode`

			`start, mut end := re.match_base(in_txt.str, in_txt.len + 1)`
			`//print("Find [$start,$end] '${in_txt[start..end]}'")`
			`if end > in_txt.len {`
			`end = in_txt.len`
			`}`
			`re.flag = old_flag`

			`if start >= 0 && end > start {`
			`return start, end`
			`}`
			`return no_match_found, 0`
			`}`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`*/`

			`// find try to find the first match in the input string`
			`[direct_array_access]`
			`pub fn (mut re RE) find(in_txt string) (int,int) {`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`//old_flag := re.flag`
			`//re.flag \|= f_src // enable search mode`

regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`mut i := 0`
			`for i < in_txt.len {`
			`//--- speed references ---`
			`mut s := -1`
			`mut e := -1`
			`unsafe {`
			`tmp_str := tos(in_txt.str+i, in_txt.len-i)`
			`s,e = re.match_string(tmp_str)`
			`}`
			`//------------------------`
			`//s,e := re.find_imp(in_txt[i..])`
			`//------------------------`
			`if s >= 0 && e > s {`
			`//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`//re.flag = old_flag`
			`return i+s, i+e`
			`} else {`
			`i++`
			`}`

			`}`
			`//re.flag = old_flag`
			`return -1, -1`
			`}`

			`// find try to find the first match in the input string strarting from start index`
			`[direct_array_access]`
			`pub fn (mut re RE) find_from(in_txt string, start int) (int,int) {`
			`old_flag := re.flag`
			`re.flag \|= f_src // enable search mode`

			`mut i := start`
			`if i < 0 {`
			`return -1, -1`
			`}`
			`for i < in_txt.len {`
			`//--- speed references ---`

			`mut s := -1`
			`mut e := -1`

			`unsafe {`
			`tmp_str := tos(in_txt.str+i, in_txt.len-i)`
			`s,e = re.match_string(tmp_str)`
			`}`
			`//------------------------`
			`//s,e = re.find_imp(in_txt[i..])`
			`//------------------------`
			`if s >= 0 && e > s {`
			`//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}]")`
			`re.flag = old_flag`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`return i+s, i+e`
			`} else {`
			`i++`
			`}`

			`}`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`re.flag = old_flag`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`return -1, -1`
			`}`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00
			`// find_all find all the non overlapping occurrences of the match pattern`
			`[direct_array_access]`
			`pub fn (mut re RE) find_all(in_txt string) []int {`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`//old_flag := re.flag`
			`//re.flag \|= f_src // enable search mode`

regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`mut i := 0`
			`mut res := []int{}`
			`mut ls := -1`

			`for i < in_txt.len {`
regex: remove undocumented deprecated calls, optimize speed and memory usage (#7582) 2020-12-26 23:08:56 +03:00			`//--- speed references ---`
			`mut s := -1`
			`mut e := -1`
			`unsafe {`
			`tmp_str := tos(in_txt.str+i, in_txt.len-i)`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`s,e = re.match_string(tmp_str)`
regex: remove undocumented deprecated calls, optimize speed and memory usage (#7582) 2020-12-26 23:08:56 +03:00			`}`
			`//------------------------`
regex: fix a bug in find and find_all (#7839) 2021-01-03 18:59:00 +03:00			`//s,e := re.find_imp(in_txt[i..])`
regex: remove undocumented deprecated calls, optimize speed and memory usage (#7582) 2020-12-26 23:08:56 +03:00			`//------------------------`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`if s >= 0 && e > s && i+s > ls {`
			`//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")`
			`res << i+s`
			`res << i+e`
			`ls = i+s`
			`i = i+e`
			`continue`
			`} else {`
			`i++`
			`}`

			`}`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`//re.flag = old_flag`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`return res`
			`}`

			`// find_all_str find all the non overlapping occurrences of the match pattern, return a string list`
			`[direct_array_access]`
			`pub fn (mut re RE) find_all_str(in_txt string) []string {`
			`mut i := 0`
			`mut res := []string{}`
			`mut ls := -1`

			`for i < in_txt.len {`
regex: remove undocumented deprecated calls, optimize speed and memory usage (#7582) 2020-12-26 23:08:56 +03:00			`//--- speed references ---`
			`mut s := -1`
			`mut e := -1`
			`unsafe {`
			`tmp_str := tos(in_txt.str+i, in_txt.len-i)`
			`s,e = re.find(tmp_str)`
			`}`
			`//------------------------`
			`//s,e := re.find(in_txt[i..])`
			`//------------------------`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`if s >= 0 && e > s && i+s > ls {`
			`//println("find match in: ${i+s},${i+e} [${in_txt[i+s..i+e]}] ls:$ls")`
			`res << in_txt[i+s..i+e]`
			`ls = i+s`
			`i = i+e`
			`continue`
			`} else {`
			`i++`
			`}`

			`}`
			`return res`
			`}`
			`/******************************************************************************`
			`*`
			`* Replacers`
			`*`
			`******************************************************************************/`
regex: added groups in replace strings (#9576) 2021-04-03 23:16:56 +03:00			`// replace_simple return a string where the matches are replaced with the replace string`
			`pub fn (mut re RE) replace_simple(in_txt string, repl string) string {`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`pos := re.find_all(in_txt)`

			`if pos.len > 0 {`
			`mut res := ""`
			`mut i := 0`

			`mut s1 := 0`
			`mut e1 := in_txt.len`

			`for i < pos.len {`
			`e1 = pos[i]`
			`res += in_txt[s1..e1] + repl`
			`s1 = pos[i+1]`
			`i += 2`
			`}`

			`res += in_txt[s1..]`
			`return res`
			`}`
			`return in_txt`
			`}`

regex: added groups in replace strings (#9576) 2021-04-03 23:16:56 +03:00
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`// type of function used for custom replace`
			`// in_txt source text`
			`// start index of the start of the match in in_txt`
			`// end index of the end of the match in in_txt`
			`// the match is in in_txt[start..end]`
all: update copyright to 2019-2021 (#8029) 2021-01-18 15:20:06 +03:00			`pub type FnReplace = fn (re RE, in_txt string, start int, end int) string`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00
			`// replace_by_fn return a string where the matches are replaced with the string from the repl_fn callback function`
			`pub fn (mut re RE) replace_by_fn(in_txt string, repl_fn FnReplace) string {`
			`mut i := 0`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`mut res := strings.new_builder(in_txt.len)`
			`mut last_end := 0`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00
			`for i < in_txt.len {`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`//println("Find Start. $i [${in_txt[i..]}]")`
			`s, e := re.find_from(in_txt,i)`
			`//println("Find End.")`
			`if s >= 0 && e > s {`
			`//println("find match in: ${s},${e} [${in_txt[s..e]}]")`

			`if last_end < s {`
			`res.write_string(in_txt[last_end..s])`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`}`

regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`for g_i in 0..re.group_count {`
			`re.groups[g_i << 1 ] += i`
			`re.groups[(g_i << 1) + 1] += i`
			`}`

			`repl := repl_fn(re, in_txt, s, e)`
			`//println("repl res: $repl")`
			`res.write_string(repl)`
			`//res.write_string("[[${in_txt[s..e]}]]")`

			`last_end = e`
			`i = e`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`} else {`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`break`
			`//i++`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`}`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`//println(i)`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`}`
regex: bug fix in replace using function, added tests (#9381) 2021-03-20 02:54:12 +03:00			`if last_end >= 0 && last_end < in_txt.len {`
			`res.write_string(in_txt[last_end..])`
			`}`
			`return res.str()`
regex: add a find_all_str function (#7517) 2020-12-24 08:27:46 +03:00			`}`
regex: added groups in replace strings (#9576) 2021-04-03 23:16:56 +03:00

			`fn (re RE) parsed_replace_string(in_txt string, repl string) string {`
			`str_lst := repl.split("\\")`
			`mut res := str_lst[0]`
			`mut i := 1`
			`for i < str_lst.len {`
			`tmp := str_lst[i]`
			`//println("tmp: ${tmp}")`
			if tmp.len > 0 && tmp[0] >= `0` && tmp[0] <= `9` {
			group_id := int(tmp[0] - `0`)
			`group := re.get_group_by_id(in_txt, group_id)`
			`//println("group: $group_id [$group]")`
			`res += "${group}${tmp[1..]}"`
			`} else {`
			`res += '\\'+tmp`
			`}`
			`i++`
			`}`
			`return res`
			`}`

			`// replace return a string where the matches are replaced with the repl_str string,`
			`// this function support use groups in the replace string`
			`pub fn (mut re RE) replace(in_txt string, repl_str string) string {`
			`mut i := 0`
			`mut res := strings.new_builder(in_txt.len)`
			`mut last_end := 0`

			`for i < in_txt.len {`
			`//println("Find Start. $i [${in_txt[i..]}]")`
			`s, e := re.find_from(in_txt,i)`
			`//println("Find End.")`
			`if s >= 0 && e > s {`
			`//println("find match in: ${s},${e} [${in_txt[s..e]}]")`

			`if last_end < s {`
			`res.write_string(in_txt[last_end..s])`
			`}`

			`for g_i in 0..re.group_count {`
			`re.groups[g_i << 1 ] += i`
			`re.groups[(g_i << 1) + 1] += i`
			`}`

			`//repl := repl_fn(re, in_txt, s, e)`
			`repl := re.parsed_replace_string(in_txt, repl_str)`
			`//println("repl res: $repl")`
			`res.write_string(repl)`
			`//res.write_string("[[${in_txt[s..e]}]]")`

			`last_end = e`
			`i = e`
			`} else {`
			`break`
			`//i++`
			`}`
			`//println(i)`
			`}`
			`if last_end >= 0 && last_end < in_txt.len {`
			`res.write_string(in_txt[last_end..])`
			`}`
			`return res.str()`
			`}`