v/vlib/regex/regex.v

/**********************************************************************
*
* regex 0.9a
*
* Copyright (c) 2019 Dario Deledda. All rights reserved.
* Use of this source code is governed by an MIT license
* that can be found in the LICENSE file.
*
* This file contains regex module
*
* Know limitation:
* - max 8 stacked groups
* - find is implemented in a trivial way
*
*
**********************************************************************/
module regex

pub const(
	V_REGEX_VERSION = "0.9a"      // regex module version

	MAX_CODE_LEN     = 256        // default small base code len for the regex programs
	MAX_QUANTIFIER   = 1073741824 // default max repetitions allowed for the quantifiers = 2^30

	// spaces chars (here only westerns!!) TODO: manage all the spaces from unicode
	SPACES = [` `, `\t`, `\n`, `\r`, `\v`, `\f`]
	// new line chars for now only '\n'
	NEW_LINE_LIST = [`\n`,`\r`]

	// Results
	NO_MATCH_FOUND          = -1

	// Errors
	COMPILE_OK              =  0   // the regex string compiled, all ok
	ERR_CHAR_UNKNOWN        = -2   // the char used is unknow to the system
	ERR_UNDEFINED           = -3   // the compiler symbol is undefined
	ERR_INTERNAL_ERROR      = -4   // Bug in the regex system!!
	ERR_CC_ALLOC_OVERFLOW   = -5   // memory for char class full!!
	ERR_SYNTAX_ERROR        = -6   // syntax error in regex compiling
	ERR_GROUPS_OVERFLOW     = -7   // max number of groups reached
	ERR_GROUPS_MAX_NESTED   = -8   // max number of nested group reached
	ERR_GROUP_NOT_BALANCED  = -9   // group not balanced
)

const(
	//*************************************
	// regex program instructions
	//*************************************
	SIMPLE_CHAR_MASK = u32(0x80000000)   // single char mask
	IST_SIMPLE_CHAR  = u32(0x7FFFFFFF)   // single char instruction, 31 bit available to char

	// char class 11 0100 AA xxxxxxxx
	// AA = 00  regular class
	// AA = 01  Negated class ^ char
	IST_CHAR_CLASS       = 0xD1000000   // MASK
	IST_CHAR_CLASS_POS   = 0xD0000000   // char class normal [abc]
	IST_CHAR_CLASS_NEG   = 0xD1000000   // char class negate [^abc]

	// dot char        10 0110 xx xxxxxxxx
	IST_DOT_CHAR         = 0x98000000   // match any char except \n

	// backslash chars 10 0100 xx xxxxxxxx
	IST_BSLS_CHAR        = 0x90000000   // backslash char

	// OR |            10 010Y xx xxxxxxxx
	IST_OR_BRANCH        = 0x91000000   // OR case

	// groups          10 010Y xx xxxxxxxx
	IST_GROUP_START      = 0x92000000   // group start (
	IST_GROUP_END        = 0x94000000   // group end   )

	// control instructions
	IST_PROG_END         = u32(0x88000000)      //10 0010 xx xxxxxxxx
	//*************************************
)

/******************************************************************************
*
* General Utilities
*
******************************************************************************/
// utf8util_char_len calculate the length in bytes of a utf8 char
[inline]
fn utf8util_char_len(b byte) int {
	return (( 0xe5000000 >> (( b >> 3 ) & 0x1e )) & 3 ) + 1
}

// get_char get a char from position i and return an u32 with the unicode code
[inline]
fn get_char(in_txt string, i int) (u32,int) {
	// ascii 8 bit
	if in_txt.str[i] & 0x80 == 0 {
		return u32(in_txt.str[i]), 1
	}
	// unicode char
	char_len := utf8util_char_len(in_txt.str[i])
	mut tmp := 0
	mut ch := u32(0)
	for tmp < char_len {
		ch = (ch << 8) | in_txt.str[i+tmp]
		tmp++
	}
	return ch,char_len
}

// get_charb get a char from position i and return an u32 with the unicode code
[inline]
fn get_charb(in_txt byteptr, i int) (u32,int) {
	// ascii 8 bit
	if in_txt[i] & 0x80 == 0 {
		return u32(in_txt[i]), 1
	}
	// unicode char
	char_len := utf8util_char_len(in_txt[i])
	mut tmp := 0
	mut ch := u32(0)
	for tmp < char_len {
		ch = (ch << 8) | in_txt[i+tmp]
		tmp++
	}
	return ch,char_len
}

[inline]
fn is_alnum(in_char byte) bool {
	mut tmp := in_char - `A`
	if tmp >= 0x00 && tmp <= 25 { return true }
	tmp = in_char - `a`
	if tmp >= 0x00 && tmp <= 25 { return true }
	tmp = in_char - `0`
	if tmp >= 0x00 && tmp <= 9  { return true }
	return false
}

[inline]
fn is_not_alnum(in_char byte) bool {
	return !is_alnum(in_char)
}

[inline]
fn is_space(in_char byte) bool {
	return in_char in SPACES
}

[inline]
fn is_not_space(in_char byte) bool {
	return !is_space(in_char)
}

[inline]
fn is_digit(in_char byte) bool {
	tmp := in_char - `0`
	return tmp <= 0x09 && tmp >= 0
}

[inline]
fn is_not_digit(in_char byte) bool {
	return !is_digit(in_char)
}

[inline]
fn is_wordchar(in_char byte) bool {
	return is_alnum(in_char) || in_char == `_`
}

[inline]
fn is_not_wordchar(in_char byte) bool {
	return !is_alnum(in_char)
}

[inline]
fn is_lower(in_char byte) bool {
	tmp := in_char - `a`
	return  tmp >= 0x00 && tmp <= 25
}

[inline]
fn is_upper(in_char byte) bool {
	tmp := in_char - `A`
	return  tmp >= 0x00 && tmp <= 25
}

pub fn (re RE) get_parse_error_string(err int) string {
	match err {
		COMPILE_OK             { return "COMPILE_OK" }
		NO_MATCH_FOUND         { return "NO_MATCH_FOUND" }
		ERR_CHAR_UNKNOWN       { return "ERR_CHAR_UNKNOWN" }
		ERR_UNDEFINED          { return "ERR_UNDEFINED" }
		ERR_INTERNAL_ERROR     { return "ERR_INTERNAL_ERROR" }
		ERR_CC_ALLOC_OVERFLOW  { return "ERR_CC_ALLOC_OVERFLOW" }
		ERR_SYNTAX_ERROR       { return "ERR_SYNTAX_ERROR" }
		ERR_GROUPS_OVERFLOW    { return "ERR_GROUPS_OVERFLOW"}
		ERR_GROUPS_MAX_NESTED  { return "ERR_GROUPS_MAX_NESTED"}
		ERR_GROUP_NOT_BALANCED { return "ERR_GROUP_NOT_BALANCED"}
		else { return "ERR_UNKNOWN" }
	}
}

// simple_log default log function
fn simple_log(txt string) {
	C.fprintf(C.stdout, "%s",txt.str)
	C.fflush(stdout)
}

/******************************************************************************
*
* Token Structs
*
******************************************************************************/
struct Token{
mut:
	ist u32 = u32(0)

	// Quantifiers / branch
	rep_min         int    = 0    // used also for jump next in the OR branch [no match] pc jump
	rep_max         int    = 0    // used also for jump next in the OR branch [   match] pc jump

	// Char class
	cc_index        int    = -1

	// counters for quantifier check (repetitions)
	rep int = 0

	// validator function pointer and control char
	validator fn (byte) bool
	v_ch u32               = u32(0) // debug, helper for recreate the query string

	// groups variables
	group_rep          int = 0    // repetition of the group
	group_id           int = -1   // id of the group
	goto_pc            int = -1   // jump to this PC if is needed

	// OR flag for the token
	next_is_or bool = false       // true if the next token is an OR
}

fn (tok mut Token) reset() {
	tok.rep = 0
}

/******************************************************************************
*
* Regex struct
*
******************************************************************************/
pub const (
	//F_FND = 0x00000001  // check until the end of the input string, it act like a "find first match", not efficient!!
	//F_NL  = 0x00000002  // end the match when find a new line symbol
	//F_PM  = 0x00000004  // partial match: if the source text finish and the match is positive until then return true

	F_MS  = 0x00000008  // match true only if the match is at the start of the string
	F_ME  = 0x00000010  // match true only if the match is at the end of the string

	F_EFM = 0x01000000  // exit on first token matched, used by search
)

struct StateDotObj{
mut:
	i  int                = 0   // char index in the input buffer
	pc int                = 0   // program counter saved
	mi int                = 0   // match_index saved
	group_stack_index int = -1  // group index stack pointer saved
}

pub
struct RE {
pub mut:
	prog []Token

	// char classes storage
	cc []CharClass             // char class list
	cc_index int         = 0   // index

	// state index
	state_stack_index int= -1
	state_stack []StateDotObj


	// groups
	group_count int      = 0   // number of groups in this regex struct
	groups []int               // groups index results
	group_max_nested int = 3   // max nested group
	group_max int        = 8   // max allowed number of different groups

	// flags
	flag int             = 0   // flag for optional parameters

	// Debug/log
	debug int            = 0   // enable in order to have the unroll of the code 0 = NO_DEBUG, 1 = LIGHT 2 = VERBOSE
	log_func fn (string) = simple_log  // log function, can be customized by the user
	query string         = ""  // query string
}

// Reset RE object
fn (re mut RE) reset(){
	//re.group_count      = 0
	re.cc_index         = 0

	mut i := 0
	for i < re.prog.len {
		re.prog[i].group_rep          = 0 // clear repetition of the group
		re.prog[i].rep                = 0 // clear repetition of the token
		i++
	}
	re.groups = [-1].repeat(re.group_count*2)

	re.state_stack_index = -1
}

/******************************************************************************
*
* Backslashes chars
*
******************************************************************************/
struct BslsStruct {
	ch u32                   // meta char
	validator fn (byte) bool // validator function pointer
}

const(
	BSLS_VALIDATOR_ARRAY = [
		BslsStruct{`w`, is_alnum},
		BslsStruct{`W`, is_not_alnum},
		BslsStruct{`s`, is_space},
		BslsStruct{`S`, is_not_space},
		BslsStruct{`d`, is_digit},
		BslsStruct{`D`, is_not_digit},
		BslsStruct{`a`, is_lower},
		BslsStruct{`A`, is_upper},
	]

	// these chars are escape if preceded by a \
	BSLS_ESCAPE_LIST = [ `\\`,`|`,`.`,`*`,`+`,`{`,`}`,`[`,`]` ]
)

enum BSLS_parse_state {
		start,
		bsls_found,
		bsls_char,
		normal_char
}

// parse_bsls return (index, str_len) BSLS_VALIDATOR_ARRAY index, len of the backslash sequence if present
fn (re RE) parse_bsls(in_txt string, in_i int) (int,int){
	mut status := BSLS_parse_state.start
	mut i := in_i

	for i < in_txt.len {
		// get our char
		char_tmp,char_len := get_char(in_txt,i)
		ch := byte(char_tmp)

		if status == .start && ch == `\\` {
			status = .bsls_found
			i += char_len
			continue
		}

		// check if is our bsls char, for now only one length sequence
		if status == .bsls_found {
			for c,x in BSLS_VALIDATOR_ARRAY {
				if x.ch == ch {
					return c,i-in_i+1
				}
			}
			status = .normal_char
			continue
		}

		// no BSLS validator, manage as normal escape char char
		if status == .normal_char {
			if ch in BSLS_ESCAPE_LIST {
				return NO_MATCH_FOUND,i-in_i+1
			}
			return ERR_SYNTAX_ERROR,i-in_i+1
		}

		// at the present time we manage only one char after the \
		break

	}
	// not our bsls return KO
	return ERR_SYNTAX_ERROR, i
}

/******************************************************************************
*
* Char class
*
******************************************************************************/
const(
	CC_NULL = 0    // empty cc token
	CC_CHAR = 1    // simple char: a
	CC_INT  = 2    // char interval: a-z
	CC_BSLS = 3    // backslash char
	CC_END  = 4    // cc sequence terminator
)

struct CharClass {
mut:
	cc_type int = CC_NULL      // type of cc token
	ch0 u32     = u32(0)       // first char of the interval a-b  a in this case
	ch1 u32     = u32(0)	   // second char of the interval a-b b in this case
	validator fn (byte) bool   // validator function pointer
}

enum CharClass_parse_state {
	start,
	in_char,
	in_bsls,
	separator,
	finish,
}

fn (re RE) get_char_class(pc int) string {
	buf := [byte(0)].repeat(re.cc.len)
	mut buf_ptr := *byte(&buf)

	mut cc_i := re.prog[pc].cc_index
	mut i := 0
	mut tmp := 0
	for cc_i >= 0 && cc_i < re.cc.len && re.cc[cc_i].cc_type != CC_END {

		if re.cc[cc_i].cc_type == CC_BSLS {
			buf_ptr[i++] = `\\`
			buf_ptr[i++] = byte(re.cc[cc_i].ch0)
		}
		else if re.cc[cc_i].ch0 == re.cc[cc_i].ch1 {
			tmp = 3
			for tmp >= 0 {
				x := byte((re.cc[cc_i].ch0 >> (tmp*8)) & 0xFF)
				if x != 0 {
					buf_ptr[i++] = x
				}
				tmp--
			}
		}
		else {
			tmp = 3
			for tmp >= 0 {
				x := byte((re.cc[cc_i].ch0 >> (tmp*8)) & 0xFF)
				if x != 0 {
					buf_ptr[i++] = x
				}
				tmp--
			}
			buf_ptr[i++] = `-`
			tmp = 3
			for tmp >= 0 {
				x := byte((re.cc[cc_i].ch1 >> (tmp*8)) & 0xFF)
				if x != 0 {
					buf_ptr[i++] = x
				}
				tmp--
			}
		}
		cc_i++
	}
	buf_ptr[i] = byte(0)

	return tos_clone( buf_ptr )
}

fn (re RE) check_char_class(pc int, ch u32) bool {
	mut cc_i := re.prog[pc].cc_index
	for cc_i >= 0 && cc_i < re.cc.len && re.cc[cc_i].cc_type != CC_END {
		if re.cc[cc_i].cc_type == CC_BSLS {
			if re.cc[cc_i].validator(byte(ch)) {
				return true
			}
		}
		else if ch >= re.cc[cc_i].ch0 && ch <= re.cc[cc_i].ch1 {
			return true
		}
		cc_i++
	}
	return false
}

// parse_char_class return (index, str_len, cc_type) of a char class [abcm-p], char class start after the [ char
fn (re mut RE) parse_char_class(in_txt string, in_i int) (int, int, u32) {
	mut status := CharClass_parse_state.start
	mut i := in_i

	mut tmp_index := re.cc_index
	res_index := re.cc_index

	mut cc_type := u32(IST_CHAR_CLASS_POS)

	for i < in_txt.len {

		// check if we are out of memory for char classes
		if tmp_index >= re.cc.len {
			return ERR_CC_ALLOC_OVERFLOW,0,u32(0)
		}

		// get our char
		char_tmp,char_len := get_char(in_txt,i)
		ch := byte(char_tmp)

		//C.printf("CC #%3d ch: %c\n",i,ch)

		// negation
		if status == .start && ch == `^` {
			cc_type = u32(IST_CHAR_CLASS_NEG)
			i += char_len
			continue
		}

		// bsls
		if (status == .start || status == .in_char) && ch == `\\` {
			//C.printf("CC bsls.\n")
			status = .in_bsls
			i += char_len
			continue
		}

		if status == .in_bsls {
			//C.printf("CC bsls validation.\n")
			for c,x in BSLS_VALIDATOR_ARRAY {
				if x.ch == ch {
					//C.printf("CC bsls found \\%c.\n",ch)
					re.cc[tmp_index].cc_type   = CC_BSLS
					re.cc[tmp_index].ch0       = BSLS_VALIDATOR_ARRAY[c].ch
					re.cc[tmp_index].ch1       = BSLS_VALIDATOR_ARRAY[c].ch
					re.cc[tmp_index].validator = BSLS_VALIDATOR_ARRAY[c].validator
					i += char_len
					tmp_index++
					status = .in_char
					break
				}
			}
			if status == .in_bsls {
				//C.printf("CC bsls not found \\%c.\n",ch)
				status = .in_char
			}else {
				continue
			}
		}

		// simple char
		if (status == .start || status == .in_char) &&
			ch != `-` && ch != `]`
		{
			status = .in_char

			re.cc[tmp_index].cc_type = CC_CHAR
			re.cc[tmp_index].ch0     = char_tmp
			re.cc[tmp_index].ch1     = char_tmp

			i += char_len
			tmp_index++
			continue
		}

		// check range separator
		if status == .in_char && ch == `-` {
			status = .separator
			i += char_len
			continue
		}

		// check range end
		if status == .separator && ch != `]` && ch != `-` {
			status = .in_char
			re.cc[tmp_index-1].cc_type = CC_INT
			re.cc[tmp_index-1].ch1     = char_tmp
			i += char_len
			continue
		}

		// char class end
		if status == .in_char && ch == `]` {
			re.cc[tmp_index].cc_type = CC_END
			re.cc[tmp_index].ch0     = 0
			re.cc[tmp_index].ch1     = 0
			re.cc_index = tmp_index+1

			return res_index, i-in_i+2, cc_type
		}

		i++
	}
	return ERR_SYNTAX_ERROR,0,u32(0)
}

/******************************************************************************
*
* Re Compiler
*
******************************************************************************/
//
// Quantifier
//
enum Quant_parse_state {
	start,
	min_parse,
	comma_checked,
	max_parse,
	finish
}

// parse_quantifier return (min, max, str_len) of a {min,max} quantifier starting after the { char
fn (re RE) parse_quantifier(in_txt string, in_i int) (int, int, int) {
	mut status := Quant_parse_state.start
	mut i := in_i

	mut q_min := 0 // default min in a {} quantifier is 1
	mut q_max := 0 // deafult max in a {} quantifier is MAX_QUANTIFIER

	mut ch := byte(0)

	for i < in_txt.len {
		ch = in_txt.str[i]

		//C.printf("%c status: %d\n",ch,status)

		// exit on no compatible char with {} quantifier
		if utf8util_char_len(ch) != 1 {
			return ERR_SYNTAX_ERROR,i,0
		}

		// min parsing skip if comma present
		if status == .start && ch == `,` {
			q_min = 1 // default min in a {} quantifier is 1
			status = .comma_checked
			i++
			continue
		}

		if status == .start && is_digit( ch ) {
			status = .min_parse
			q_min *= 10
			q_min += int(ch - `0`)
			i++
			continue
		}

		if status == .min_parse && is_digit( ch ) {
			q_min *= 10
			q_min += int(ch - `0`)
			i++
			continue
		}

		// we have parsed the min, now check the max
		if status == .min_parse && ch == `,` {
			status = .comma_checked
			i++
			continue
		}

		// single value {4}
		if status == .min_parse && ch == `}` {
			q_max = q_min
			return q_min, q_max, i-in_i+2
		}

		// end without max
		if status == .comma_checked && ch == `}` {
			q_max = MAX_QUANTIFIER
			return q_min, q_max, i-in_i+2
		}

		// start max parsing
		if status == .comma_checked && is_digit( ch ) {
			status = .max_parse
			q_max *= 10
			q_max += int(ch - `0`)
			i++
			continue
		}

		// parse the max
		if status == .max_parse && is_digit( ch ) {
			q_max *= 10
			q_max += int(ch - `0`)
			i++
			continue
		}

		// end the parsing
		if status == .max_parse && ch == `}` {
			return q_min, q_max, i-in_i+2
		}

		// not  a {} quantifier, exit
		return ERR_SYNTAX_ERROR,i,0
	}

	// not a conform {} quantifier
	return ERR_SYNTAX_ERROR,i,0
}

//
// main compiler
//
// compile return (return code, index) where index is the index of the error in the query string if return code is an error code
pub fn (re mut RE) compile(in_txt string) (int,int) {
	mut i        := 0      // input string index
	mut pc       := 0      // program counter
	mut tmp_code := u32(0)

	// group management variables
	mut group_count           := -1
	mut group_stack           := [0 ].repeat(re.group_max_nested)
	mut group_stack_txt_index := [-1].repeat(re.group_max_nested)
	mut group_stack_index     := -1

	re.query = in_txt      // save the query string

	i = 0
	for i < in_txt.len {
		tmp_code = u32(0)
		mut char_tmp := u32(0)
		mut char_len := 0
		//C.printf("i: %3d ch: %c\n", i, in_txt.str[i])

		char_tmp,char_len = get_char(in_txt,i)

		//
		// check special cases: $ ^
		//
		if char_len == 1 && i == 0 && byte(char_tmp) == `^` {
			re.flag = F_MS
			i = i + char_len
			continue
		}
		if char_len == 1 && i == (in_txt.len-1) && byte(char_tmp) == `$` {
			re.flag = F_ME
			i = i + char_len
			continue
		}

		// IST_GROUP_START
		if char_len == 1 && pc >= 0 && byte(char_tmp) == `(` {

			//check max groups allowed
			if group_count > re.group_max {
				return ERR_GROUPS_OVERFLOW,i+1
			}

			group_stack_index++

			// check max nested groups allowed
			if group_stack_index > re.group_max_nested {
				return ERR_GROUPS_MAX_NESTED,i+1
			}

			group_count++

			group_stack_txt_index[group_stack_index] = i
			group_stack[group_stack_index] = pc

			re.prog[pc].ist = u32(0) | IST_GROUP_START
			re.prog[pc].group_id = group_count
			re.prog[pc].rep_min = 1
			re.prog[pc].rep_max = 1
			pc = pc + 1
			i = i + char_len
			continue

		}

		// IST_GROUP_END
		if char_len==1 && pc > 0 && byte(char_tmp) == `)` {
			if group_stack_index < 0 {
				return ERR_GROUP_NOT_BALANCED,i+1
			}

			goto_pc := group_stack[group_stack_index]
			group_stack_index--

			re.prog[pc].ist = u32(0) | IST_GROUP_END
			re.prog[pc].rep_min = 1
			re.prog[pc].rep_max = 1

			re.prog[pc].goto_pc = goto_pc			          // PC where to jump if a group need
			re.prog[pc].group_id = re.prog[goto_pc].group_id  // id of this group, used for storing data

			re.prog[goto_pc].goto_pc = pc                     // start goto point to the end group pc
			//re.prog[goto_pc].group_id = group_count         // id of this group, used for storing data

			pc = pc + 1
			i = i + char_len
			continue
		}

		// IST_DOT_CHAR match any char except the following token
		if char_len==1 && pc >= 0 && byte(char_tmp) == `.` {
			re.prog[pc].ist = u32(0) | IST_DOT_CHAR
			re.prog[pc].rep_min = 1
			re.prog[pc].rep_max = 1
			pc = pc + 1
			i = i + char_len
			continue
		}

		// OR branch
		if char_len==1 && pc > 0 && byte(char_tmp) == `|` {
			// two consecutive IST_DOT_CHAR are an error
			if pc > 0 && re.prog[pc-1].ist == IST_OR_BRANCH {
				return ERR_SYNTAX_ERROR,i
			}
			re.prog[pc].ist = u32(0) | IST_OR_BRANCH
			pc = pc + 1
			i = i + char_len
			continue
		}

		// Quantifiers
		if char_len==1 && pc > 0{
			mut quant_flag := true
			match byte(char_tmp) {
				`?` {
					//C.printf("q: %c\n",char_tmp)
					re.prog[pc-1].rep_min = 0
					re.prog[pc-1].rep_max = 1
				}

				`+` {
					//C.printf("q: %c\n",char_tmp)
					re.prog[pc-1].rep_min = 1
					re.prog[pc-1].rep_max = MAX_QUANTIFIER
				}

				`*` {
					//C.printf("q: %c\n",char_tmp)
					re.prog[pc-1].rep_min = 0
					re.prog[pc-1].rep_max = MAX_QUANTIFIER
				}

				`{` {
					min,max,tmp := re.parse_quantifier(in_txt, i+1)
					// it is a quantifier
					if min >= 0 {
						//C.printf("{%d,%d}\n str:[%s]\n",min,max,in_txt[i..i+tmp])
						i = i + tmp
						re.prog[pc-1].rep_min = min
						re.prog[pc-1].rep_max = max
						continue
					}
					else {
						return min,i
					}
					// TODO: decide if the open bracket can be conform without the close bracket
					/*
					// no conform, parse as normal char
					else {
						quant_flag = false
					}
					*/
				}
				else{
					quant_flag = false
				}
			}

			if quant_flag {
				i = i + char_len
				continue
			}
		}

		// IST_CHAR_CLASS
		if char_len==1 && pc >= 0{
			if byte(char_tmp) == `[` {
				cc_index,tmp,cc_type := re.parse_char_class(in_txt, i+1)
				if cc_index >= 0 {
					//C.printf("index: %d str:%s\n",cc_index,in_txt[i..i+tmp])
					i = i + tmp
					re.prog[pc].ist      = u32(0) | cc_type
					re.prog[pc].cc_index = cc_index
					re.prog[pc].rep_min  = 1
					re.prog[pc].rep_max  = 1
					pc = pc + 1
					continue
				}

				// cc_class vector memory full
				else if cc_index < 0 {
					return cc_index, i
				}
			}
		}

		// IST_BSLS_CHAR
		if char_len==1 && pc >= 0{
			if byte(char_tmp) == `\\` {
				bsls_index,tmp := re.parse_bsls(in_txt,i)
				//C.printf("index: %d str:%s\n",bsls_index,in_txt[i..i+tmp])
				if bsls_index >= 0 {
					i = i + tmp
					re.prog[pc].ist       = u32(0) | IST_BSLS_CHAR
					re.prog[pc].rep_min   = 1
					re.prog[pc].rep_max   = 1
					re.prog[pc].validator = BSLS_VALIDATOR_ARRAY[bsls_index].validator
					re.prog[pc].v_ch      = BSLS_VALIDATOR_ARRAY[bsls_index].ch
					pc = pc + 1
					continue
				}
				// this is an escape char, skip the bsls and continue as a normal char
				else if bsls_index == NO_MATCH_FOUND {
					i += char_len
					char_tmp,char_len = get_char(in_txt,i)
					// continue as simple char
				}
				// if not an escape or a bsls char then it is an error (at least for now!)
				else {
					return bsls_index,i+tmp
				}
			}
		}

		// IST_SIMPLE_CHAR
		tmp_code            = (tmp_code | char_tmp) & IST_SIMPLE_CHAR
		re.prog[pc].ist     = tmp_code
		re.prog[pc].rep_min = 1
		re.prog[pc].rep_max = 1
		//C.printf("char: %c\n",char_tmp)
		pc = pc +1

		i+=char_len
	}

	// add end of the program
	re.prog[pc].ist = IST_PROG_END

	// check for unbalanced groups
	if group_stack_index != -1 {
		return ERR_GROUP_NOT_BALANCED, group_stack_txt_index[group_stack_index]+1
	}

	// check for OR at the end of the program
	if pc > 0 && re.prog[pc-1].ist == IST_OR_BRANCH {
		return ERR_SYNTAX_ERROR,in_txt.len
	}

	// store the number of groups in the query
	re.group_count = group_count+1

	//******************************************
	// Post processing
	//******************************************

	// count IST_DOT_CHAR to set the size of the state stack
	mut pc1 := 0
	mut tmp_count := 0
	for pc1 < pc {
		if re.prog[pc1].ist == IST_DOT_CHAR {
			tmp_count++
		}
		pc1++
	}
	// init the state stack
	re.state_stack = [StateDotObj{}].repeat(tmp_count+1)


	// OR branch
	// a|b|cd
	// d exit point
	// a,b,c branches
	// set the jump in the right places
	pc1 = 0
	for pc1 < pc-2 {
		// two consecutive OR are a syntax error
		if re.prog[pc1+1].ist == IST_OR_BRANCH && re.prog[pc1+2].ist == IST_OR_BRANCH {
			return ERR_SYNTAX_ERROR, i
		}

		// manange a|b chains like a|(b)|c|d...
		// standard solution
		if re.prog[pc1].ist != IST_OR_BRANCH &&
			re.prog[pc1+1].ist == IST_OR_BRANCH &&
			re.prog[pc1+2].ist != IST_OR_BRANCH
		{
			re.prog[pc1].next_is_or = true   // set that the next token is an  OR
			re.prog[pc1+1].rep_min = pc1+2   // failed match jump

			// match jump, if an OR chain the next token will be an OR token
			mut pc2 := pc1+2
			for pc2 < pc-1 {
				ist := re.prog[pc2].ist
				if  ist == IST_GROUP_START {
					re.prog[pc1+1].rep_max = re.prog[pc2].goto_pc + 1
					break
				}
				if ist != IST_OR_BRANCH {
					re.prog[pc1+1].rep_max = pc2 + 1
					break
				}
				pc2++
			}
			//C.printf("Compile OR postproc. [%d,OR %d,%d]\n",pc1,pc1+1,pc2)
			pc1 = pc2
			continue
		}

		pc1++
	}


	//******************************************
	// DEBUG PRINT REGEX GENERATED CODE
	//******************************************
	if re.debug > 0 {
		re.log_func(re.get_code())
	}
	//******************************************

	return COMPILE_OK, 0
}

// get_code return the compiled code as regex string, note: may be different from the source!
pub fn (re RE) get_code() string {
		mut result := ""

		// use the best buffer possible
		mut tmp_len := 256+128
		if tmp_len < re.cc.len+128 {
			tmp_len = re.cc.len+128
		}
		// some memory buffer
		buf1 := [byte(0)].repeat(tmp_len)
		buf := &buf1[0]

		mut buf_ptr := buf
		mut pc1 := 0
		C.sprintf(buf_ptr, "========================================\nv RegEx compiler v%s output:\n", V_REGEX_VERSION)
		result += tos_clone(buf)

		mut stop_flag := false

		for pc1 <= re.prog.len {
			buf_ptr = buf
			C.sprintf(buf_ptr, "PC:%3d ist:%08x ",pc1, re.prog[pc1].ist)
			buf_ptr += vstrlen(buf_ptr)
			ist :=re.prog[pc1].ist
			if ist == IST_BSLS_CHAR {
				C.sprintf(buf_ptr, "[\\%c]     BSLS", re.prog[pc1].v_ch)
			} else if ist == IST_PROG_END {
				C.sprintf(buf_ptr, "PROG_END")
				stop_flag = true
			} else if ist == IST_OR_BRANCH {
				C.sprintf(buf_ptr, "OR      ")
			} else if ist == IST_CHAR_CLASS_POS {
				C.sprintf(buf_ptr, "[%s]     CHAR_CLASS_POS", re.get_char_class(pc1))
			} else if ist == IST_CHAR_CLASS_NEG {
				C.sprintf(buf_ptr, "[^]      CHAR_CLASS_NEG[%s]", re.get_char_class(pc1))
			} else if ist == IST_DOT_CHAR {
				C.sprintf(buf_ptr, ".        DOT_CHAR")
			} else if ist == IST_GROUP_START {
				C.sprintf(buf_ptr, "(        GROUP_START #:%d", re.prog[pc1].group_id)
			} else if ist == IST_GROUP_END {
				C.sprintf(buf_ptr, ")        GROUP_END   #:%d", re.prog[pc1].group_id)
			} else if ist & SIMPLE_CHAR_MASK == 0 {
				C.sprintf(buf_ptr, "[%c]      query_ch", ist & IST_SIMPLE_CHAR)
			}
			buf_ptr += vstrlen(buf_ptr)

			if re.prog[pc1].rep_max == MAX_QUANTIFIER {
				C.sprintf(buf_ptr, " {%3d,MAX}",re.prog[pc1].rep_min)
			}else{
				if ist == IST_OR_BRANCH {
					C.sprintf(buf_ptr, " if false go: %3d if true go: %3d", re.prog[pc1].rep_min, re.prog[pc1].rep_max)
				} else {
					C.sprintf(buf_ptr, " {%3d,%3d}", re.prog[pc1].rep_min, re.prog[pc1].rep_max)
				}
			}
			buf_ptr += vstrlen(buf_ptr)
			C.sprintf(buf_ptr, "\n")
			buf_ptr += vstrlen(buf_ptr)
			result += tos_clone(buf)
			if stop_flag {
				break
			}
			pc1++
		}

		buf_ptr = buf
		C.sprintf(buf_ptr, "========================================\n")

		result += tos_clone(buf)
		return result
}

// get_query return a string with a reconstruction of the query starting from the regex program code

pub fn (re RE) get_query() string {
	// use the best buffer possible
	buf1 := [byte(0)].repeat(re.cc.len*2)
	buf := &buf1[0]
	mut buf_ptr := buf

	if (re.flag & F_MS) != 0 {
		C.sprintf(buf_ptr, "^")
		buf_ptr += vstrlen(buf_ptr)
	}

	mut i := 0
	for i < re.prog.len && re.prog[i].ist != IST_PROG_END && re.prog[i].ist != 0{
		ch := re.prog[i].ist

		//C.printf("ty: %08x\n", ch)

		// GROUP start
		if ch == IST_GROUP_START {
			if re.debug == 0 {
				C.sprintf(buf_ptr, "(")
			} else {
				C.sprintf(buf_ptr, "#%d(", re.prog[i].group_id)
			}
			buf_ptr += vstrlen(buf_ptr)
			i++
			continue
		}

		// GROUP end
		if ch == IST_GROUP_END {
			C.sprintf(buf_ptr, ")")
			buf_ptr += vstrlen(buf_ptr)
		}

		// OR branch
		if ch == IST_OR_BRANCH {
			C.sprintf(buf_ptr, "|")
			if re.debug > 0 {
				C.sprintf(buf_ptr, "{%d,%d}", re.prog[i].rep_min, re.prog[i].rep_max)
			}
			buf_ptr += vstrlen(buf_ptr)
			i++
			continue
		}

		// char class
		if ch == IST_CHAR_CLASS_NEG || ch == IST_CHAR_CLASS_POS {
			C.sprintf(buf_ptr, "[")
			buf_ptr += vstrlen(buf_ptr)

			if ch == IST_CHAR_CLASS_NEG {
				C.sprintf(buf_ptr, "^")
				buf_ptr += vstrlen(buf_ptr)
			}

			C.sprintf(buf_ptr,"%s", re.get_char_class(i))
			buf_ptr += vstrlen(buf_ptr)

			C.sprintf(buf_ptr, "]")
			buf_ptr += vstrlen(buf_ptr)
		}

		// bsls char
		if ch == IST_BSLS_CHAR {
			C.sprintf(buf_ptr, "\\%c", re.prog[i].v_ch)
			buf_ptr += vstrlen(buf_ptr)
		}

		// IST_DOT_CHAR
		if ch == IST_DOT_CHAR {
			C.sprintf(buf_ptr, ".")
			buf_ptr += vstrlen(buf_ptr)
		}

		// char alone
		if ch & SIMPLE_CHAR_MASK == 0 {
			if byte(ch) in BSLS_ESCAPE_LIST {
				C.sprintf(buf_ptr, "\\")
				buf_ptr += vstrlen(buf_ptr)
			}
			C.sprintf(buf_ptr, "%c", re.prog[i].ist)
			buf_ptr += vstrlen(buf_ptr)
		}

		// quantifier
		if !(re.prog[i].rep_min == 1 && re.prog[i].rep_max == 1) {
			if re.prog[i].rep_min == 0 && re.prog[i].rep_max == 1 {
				C.sprintf(buf_ptr, "?")
			} else if re.prog[i].rep_min == 1 && re.prog[i].rep_max == MAX_QUANTIFIER {
				C.sprintf(buf_ptr, "+")
			} else if re.prog[i].rep_min == 0 && re.prog[i].rep_max == MAX_QUANTIFIER {
				C.sprintf(buf_ptr, "*")
			} else {
				if re.prog[i].rep_max == MAX_QUANTIFIER {
					C.sprintf(buf_ptr, "{%d,MAX}", re.prog[i].rep_min)
				} else {
					C.sprintf(buf_ptr, "{%d,%d}", re.prog[i].rep_min, re.prog[i].rep_max)
				}
			}
			buf_ptr += vstrlen(buf_ptr)
		}

		i++
	}
	if (re.flag & F_ME) != 0 {
		C.sprintf(buf_ptr, "$")
		buf_ptr += vstrlen(buf_ptr)
	}
	res := tos_clone(buf)

	return res
}

/******************************************************************************
*
* Matching
*
******************************************************************************/
enum match_state{
	start = 0,
	stop,
	end,

	ist_load,     // load and execute istruction
	ist_next,     // go to next istruction
	ist_next_ks,  // go to next istruction without clenaning the state
	ist_quant_p,  // match positive ,quantifier check
	ist_quant_n,  // match negative, quantifier check
	ist_quant_pg, // match positive ,group quantifier check
	ist_quant_ng, // match negative ,group quantifier check
}

fn state_str(s match_state) string {
	match s{
		.start        { return "start" }
		.stop         { return "stop" }
		.end          { return "end" }

		.ist_load     { return "ist_load" }
		.ist_next     { return "ist_next" }
		.ist_next_ks  { return "ist_next_ks" }
		.ist_quant_p  { return "ist_quant_p" }
		.ist_quant_n  { return "ist_quant_n" }
		.ist_quant_pg { return "ist_quant_pg" }
		.ist_quant_ng { return "ist_quant_ng" }
		else { return "UNKN" }
	}
}

struct StateObj {
pub mut:
	match_flag bool = false
	match_index int = -1
	match_first int = -1
}

pub fn (re mut RE) match_base(in_txt byteptr, in_txt_len int ) (int,int) {
	// result status
	mut result := NO_MATCH_FOUND     // function return
	mut first_match := -1             //index of the first match

	mut i := 0                       // source string index
	mut ch := u32(0)                 // examinated char
	mut char_len := 0                // utf8 examinated char len
	mut m_state := match_state.start // start point for the matcher FSM

	mut pc := -1                     // program counter
	mut state := StateObj{}          // actual state
	mut ist := u32(0)                // Program Counter

	mut group_stack      := [-1].repeat(re.group_max)
	mut group_data       := [-1].repeat(re.group_max)

	mut group_index := -1            // group id used to know how many groups are open

	mut step_count := 0              // stats for debug
	mut dbg_line   := 0              // count debug line printed

	re.reset()

	if re.debug>0 {
		// print header
		h_buf :=  [byte(0)].repeat(64)
		C.sprintf(&h_buf[0], "flags: %08x\n",re.flag)
		re.log_func(tos_clone(&h_buf[0]))
	}

	for m_state != .end {

		if pc >= 0 && pc < re.prog.len {
			ist = re.prog[pc].ist
		}else if pc >= re.prog.len {
			C.printf("ERROR!! PC overflow!!\n")
			return ERR_INTERNAL_ERROR, i
		}

		//******************************************
		// DEBUG LOG
		//******************************************
		if re.debug>0 {
			// use the best buffer possible
			mut tmp_len := 256
			if tmp_len < re.cc.len+128 {
				tmp_len = re.cc.len+128
			}

			// some memory buffer
			buf1 := [byte(0)].repeat(tmp_len)
			buf  := &buf1[0]

			// print all the instructions
			mut buf_ptr := buf

			// end of the input text
			if i >= in_txt_len {
				C.sprintf(buf_ptr, "# %3d END OF INPUT TEXT\n",step_count)
				re.log_func(tos_clone(buf))
			}else{

				// print only the exe istruction
				if (re.debug == 1 && m_state == .ist_load) ||
					re.debug == 2
				{

					if ist == IST_PROG_END {
						C.sprintf(buf_ptr, "# %3d PROG_END\n",step_count)
						buf_ptr += vstrlen(buf_ptr)
					}
					else if ist == 0 || m_state in [.start,.ist_next,.stop] {
						C.sprintf(buf_ptr, "# %3d s: %12s PC: NA\n",step_count, state_str(m_state).str)
						buf_ptr += vstrlen(buf_ptr)
					}else{
						ch, char_len = get_charb(in_txt,i)

						tmp_bl:=[byte(ch >> 24), byte((ch >> 16) & 0xFF), byte((ch >> 8) & 0xFF), byte(ch & 0xFF), 0]
						tmp_un_ch := byteptr(&tmp_bl[4-char_len])

						C.sprintf(buf_ptr, "# %3d s: %12s PC: %3d=>%08x i,ch,len:[%3d,'%s',%d] f.m:[%3d,%3d] ",
							step_count, state_str(m_state).str , pc, ist, i, tmp_un_ch, char_len, first_match,state.match_index)
						buf_ptr += vstrlen(buf_ptr)

						if ist & SIMPLE_CHAR_MASK == 0 {
							if char_len < 4 {
								C.sprintf(buf_ptr, "query_ch: [%c]", ist & IST_SIMPLE_CHAR)
							} else {
								C.sprintf(buf_ptr, "query_ch: [%c]", ist | SIMPLE_CHAR_MASK)
							}
							buf_ptr += vstrlen(buf_ptr)
						} else {
							if ist == IST_BSLS_CHAR {
								C.sprintf(buf_ptr, "BSLS [\\%c]",re.prog[pc].v_ch)
							} else if ist == IST_PROG_END {
								C.sprintf(buf_ptr, "PROG_END")
							} else if ist == IST_OR_BRANCH {
								C.sprintf(buf_ptr, "OR")
							} else if ist == IST_CHAR_CLASS_POS {
								C.sprintf(buf_ptr, "CHAR_CLASS_POS[%s]",re.get_char_class(pc))
							} else if ist == IST_CHAR_CLASS_NEG {
								C.sprintf(buf_ptr, "CHAR_CLASS_NEG[%s]",re.get_char_class(pc))
							} else if ist == IST_DOT_CHAR {
								C.sprintf(buf_ptr, "DOT_CHAR")
							} else if ist == IST_GROUP_START {
								C.sprintf(buf_ptr, "GROUP_START #:%d rep:%d ",re.prog[pc].group_id, re.prog[re.prog[pc].goto_pc].group_rep)
							} else if ist == IST_GROUP_END {
								C.sprintf(buf_ptr, "GROUP_END   #:%d deep:%d ",re.prog[pc].group_id, group_index)
							}
							buf_ptr += vstrlen(buf_ptr)
						}
						if re.prog[pc].rep_max == MAX_QUANTIFIER {
							C.sprintf(buf_ptr, "{%d,MAX}:%d",re.prog[pc].rep_min,re.prog[pc].rep)
						} else {
							C.sprintf(buf_ptr, "{%d,%d}:%d",re.prog[pc].rep_min,re.prog[pc].rep_max,re.prog[pc].rep)
						}
						buf_ptr += vstrlen(buf_ptr)
						C.sprintf(buf_ptr, " (#%d)\n",group_index)

					}

					re.log_func(tos_clone(buf))

				}
			}
			step_count++
			dbg_line++
		}
		//******************************************

		// we're out of text, manage it
		if i >= in_txt_len {

			// manage groups
			if group_index >= 0 && state.match_index >= 0 {
				//C.printf("End text with open groups!\n")
				// close the groups
				for group_index >= 0 {
					tmp_pc := group_data[group_index]
					re.prog[tmp_pc].group_rep++
					/*
					C.printf("Closing group %d {%d,%d}:%d\n",
						group_index,
						re.prog[tmp_pc].rep_min,
						re.prog[tmp_pc].rep_max,
						re.prog[tmp_pc].group_rep
					)
					*/
					if re.prog[tmp_pc].group_rep >= re.prog[tmp_pc].rep_min{
						start_i   := group_stack[group_index]
	 					group_stack[group_index]=-1

	 					// save group results
						g_index := re.prog[tmp_pc].group_id*2
						if start_i >= 0 {
							re.groups[g_index] = start_i
						} else {
							re.groups[g_index] = 0
						}
						re.groups[g_index+1] = i
 					}

					group_index--
				}
			}

			// manage IST_DOT_CHAR
			if re.state_stack_index >= 0 {
				//C.printf("DOT CHAR text end management!\n")
				// if DOT CHAR is not the last istruction and we are still going, then no match!!
				if pc < re.prog.len && re.prog[pc+1].ist != IST_PROG_END {
					return NO_MATCH_FOUND,0
				}
			}

			m_state == .end
			break
			return NO_MATCH_FOUND,0
		}

		// starting and init
		if m_state == .start {
			pc = -1
			i = 0
			m_state = .ist_next
			continue
		}

		// ist_next, next istruction reseting its state
		if m_state == .ist_next {
			pc = pc + 1
			re.prog[pc].reset()
			// check if we are in the program bounds
			if pc < 0 || pc > re.prog.len {
				C.printf("ERROR!! PC overflow!!\n")
				return ERR_INTERNAL_ERROR, i
			}
			m_state = .ist_load
			continue
		}

		// ist_next_ks, next istruction keeping its state
		if m_state == .ist_next_ks {
			pc = pc + 1
			// check if we are in the program bounds
			if pc < 0 || pc > re.prog.len {
				C.printf("ERROR!! PC overflow!!\n")
				return ERR_INTERNAL_ERROR, i
			}
			m_state = .ist_load
			continue
		}

		// load the char
		ch, char_len = get_charb(in_txt,i)

		// check if stop
		if m_state == .stop {
			// if we are in restore state ,do it and restart
			if re.state_stack_index >= 0 {
				i = re.state_stack[re.state_stack_index].i
				pc = re.state_stack[re.state_stack_index].pc
				state.match_index =	re.state_stack[re.state_stack_index].mi
				group_index = re.state_stack[re.state_stack_index].group_stack_index

				m_state = .ist_load
				continue
			}

			if ist == IST_PROG_END {
				return first_match,i
			}

			// exit on no match
			return result,0
		}

		// ist_load
		if m_state == .ist_load {

			// program end
			if ist == IST_PROG_END {
				// if we are in match exit well
				if group_index >= 0 && state.match_index >= 0 {
					group_index = -1
				}

				m_state = .stop
				continue
			}

			// check GROUP start, no quantifier is checkd for this token!!
			else if ist == IST_GROUP_START {
				group_index++
				group_data[group_index] = re.prog[pc].goto_pc  // save where is IST_GROUP_END, we will use it for escape
				group_stack[group_index]=i                     // index where we start to manage
				//C.printf("group_index %d rep %d\n", group_index, re.prog[re.prog[pc].goto_pc].group_rep)

				m_state = .ist_next
				continue
			}

			// check GROUP end
			else if ist == IST_GROUP_END {
				// we are in matching streak
				if state.match_index >= 0 {
					// restore txt index stack and save the group data

					//C.printf("g.id: %d group_index: %d\n", re.prog[pc].group_id, group_index)
					if group_index >= 0 {
	 					start_i   := group_stack[group_index]
	 					group_stack[group_index]=-1

	 					// save group results
						g_index := re.prog[pc].group_id*2
						if start_i >= 0 {
							re.groups[g_index] = start_i
						} else {
							re.groups[g_index] = 0
						}
						re.groups[g_index+1] = i
					}

					re.prog[pc].group_rep++ // increase repetitions
					//C.printf("GROUP %d END %d\n", group_index, re.prog[pc].group_rep)
					m_state = .ist_quant_pg
					continue

				}

				m_state = .ist_quant_ng
				continue
			}

			// check OR
			else if ist == IST_OR_BRANCH {
				if state.match_index >= 0 {
					pc = re.prog[pc].rep_max
					//C.printf("IST_OR_BRANCH True pc: %d\n", pc)
				}else{
					pc = re.prog[pc].rep_min
					//C.printf("IST_OR_BRANCH False pc: %d\n", pc)
				}
				re.prog[pc].reset()
				m_state == .ist_load
				continue
			}

			// check IST_DOT_CHAR
			else if ist == IST_DOT_CHAR {
				//C.printf("IST_DOT_CHAR rep: %d\n", re.prog[pc].rep)
				state.match_flag = true

				if first_match < 0 {
					first_match = i
				}
				state.match_index = i
				re.prog[pc].rep++

				if re.prog[pc].rep == 1 {
					// save the state
					re.state_stack_index++
					re.state_stack[re.state_stack_index].pc = pc
					re.state_stack[re.state_stack_index].mi = state.match_index
					re.state_stack[re.state_stack_index].group_stack_index = group_index
				}

				if re.prog[pc].rep >= 1 && re.state_stack_index >= 0 {
					re.state_stack[re.state_stack_index].i  = i + char_len
				}

				// manage * and {0,} quantifier
				if re.prog[pc].rep_min > 0 {
					i += char_len // next char
				}

				if re.prog[pc+1].ist !=  IST_GROUP_END {
					m_state = .ist_next
					continue
				}
				// IST_DOT_CHAR is the last istruction, get all
				else {
					//C.printf("We are the last one!\n")
					pc--
					m_state = .ist_next_ks
					continue
				}

			}

			// char class IST
			else if ist == IST_CHAR_CLASS_POS || ist == IST_CHAR_CLASS_NEG {
				state.match_flag = false
				mut cc_neg := false

				if ist == IST_CHAR_CLASS_NEG {
					cc_neg = true
				}
				mut cc_res := re.check_char_class(pc,ch)

				if cc_neg {
					cc_res = !cc_res
				}

				if cc_res {
					state.match_flag = true

					if first_match < 0 {
						first_match = i
					}

					state.match_index = i

					re.prog[pc].rep++ // increase repetitions
					i += char_len // next char
					m_state = .ist_quant_p
					continue
				}
				m_state = .ist_quant_n
				continue
			}

			// check bsls
			else if ist == IST_BSLS_CHAR {
				state.match_flag = false
				tmp_res := re.prog[pc].validator(byte(ch))
				//C.printf("BSLS in_ch: %c res: %d\n", ch, tmp_res)
				if tmp_res {
					state.match_flag = true

					if first_match < 0 {
						first_match = i
					}

					state.match_index = i

					re.prog[pc].rep++ // increase repetitions
					i += char_len // next char
					m_state = .ist_quant_p
					continue
				}
				m_state = .ist_quant_n
				continue
			}

			// simple char IST
			else if ist & IST_SIMPLE_CHAR != 0 {
				//C.printf("IST_SIMPLE_CHAR\n")
				state.match_flag = false

				if (char_len<4 && ist == ch) ||
					(char_len == 4 && (ist | SIMPLE_CHAR_MASK) == ch )
				{
					state.match_flag = true

					if first_match < 0 {
						first_match = i
					}
					//C.printf("state.match_index: %d\n", state.match_index)
					state.match_index = i

					re.prog[pc].rep++ // increase repetitions
					i += char_len // next char
					m_state = .ist_quant_p
					continue
				}
				m_state = .ist_quant_n
				continue
			}
			/* UNREACHABLE */
			//C.printf("PANIC2!! state: %d\n", m_state)
			return ERR_INTERNAL_ERROR, i

		}

		/***********************************
		* Quantifier management
		***********************************/
		// ist_quant_ng
		if m_state == .ist_quant_ng {

			// we are finished here
			if group_index < 0 {
				//C.printf("Early stop!\n")
				result = NO_MATCH_FOUND
				m_state = .stop
				continue
			}

			tmp_pc := group_data[group_index]    // PC to the end of the group token
			rep    := re.prog[tmp_pc].group_rep  // use a temp variable
			re.prog[tmp_pc].group_rep = 0        // clear the repetitions

			//C.printf(".ist_quant_ng group_pc_end: %d rep: %d\n", tmp_pc,rep)

			if rep >= re.prog[tmp_pc].rep_min {
				//C.printf("ist_quant_ng GROUP CLOSED OK group_index: %d\n", group_index)

				i = group_stack[group_index]
				pc = tmp_pc
				group_index--
				m_state = .ist_next
				continue
			}
			else if re.prog[tmp_pc].next_is_or {
				//C.printf("ist_quant_ng OR Negative branch\n")

				i = group_stack[group_index]
				pc = re.prog[tmp_pc+1].rep_min -1
				group_index--
				m_state = .ist_next
				continue
			}
			else if rep>0 && rep < re.prog[tmp_pc].rep_min {
				//C.printf("ist_quant_ng UNDER THE MINIMUM g.i: %d\n", group_index)

				// check if we are inside a group, if yes exit from the nested groups
				if group_index > 0{
					group_index--
					pc = tmp_pc
					m_state = .ist_quant_ng //.ist_next
					continue
				}

				if group_index == 0 {
					group_index--
					pc = tmp_pc // TEST
					m_state = .ist_next
					continue
				}

				result = NO_MATCH_FOUND
				m_state = .stop
				continue
			}
			else if rep==0 && rep < re.prog[tmp_pc].rep_min {
				//C.printf("ist_quant_ng ZERO UNDER THE MINIMUM g.i: %d\n", group_index)

				if group_index > 0{
					group_index--
					pc = tmp_pc
					m_state = .ist_quant_ng //.ist_next
					continue
				}

				result = NO_MATCH_FOUND
				m_state = .stop
				continue
			}

			//C.printf("DO NOT STAY HERE!! {%d,%d}:%d\n", re.prog[tmp_pc].rep_min, re.prog[tmp_pc].rep_max, rep)
			/* UNREACHABLE */
			return ERR_INTERNAL_ERROR, i

		}
		// ist_quant_pg
		else if m_state == .ist_quant_pg {
			//C.printf(".ist_quant_pg\n")
			mut tmp_pc := pc
			if group_index >= 0 {
				tmp_pc = group_data[group_index]
			}

			rep := re.prog[tmp_pc].group_rep

			if rep < re.prog[tmp_pc].rep_min {
				//C.printf("ist_quant_pg UNDER RANGE\n")
				pc = re.prog[tmp_pc].goto_pc
				//group_index--

				m_state = .ist_next
				continue
			}
			else if rep == re.prog[tmp_pc].rep_max {
				//C.printf("ist_quant_pg MAX RANGE\n")
				re.prog[tmp_pc].group_rep = 0 // clear the repetitions
				group_index--
				m_state = .ist_next
				continue
			}
			else if rep >= re.prog[tmp_pc].rep_min {
				//C.printf("ist_quant_pg IN RANGE group_index:%d\n", group_index)
				pc = re.prog[tmp_pc].goto_pc - 1
				group_index--
				m_state = .ist_next
				continue
			}

			/* UNREACHABLE */
			//C.printf("PANIC3!! state: %d\n", m_state)
			return ERR_INTERNAL_ERROR, i
		}

		// ist_quant_n
		else if m_state == .ist_quant_n {
			rep := re.prog[pc].rep
			//C.printf("Here!! PC %d is_next_or: %d \n", pc, re.prog[pc].next_is_or)

			// zero quantifier * or ?
			if rep == 0 && re.prog[pc].rep_min == 0 {
				//C.printf("ist_quant_n ZERO RANGE MIN\n")
				m_state = .ist_next // go to next ist
				continue
			}

			// match failed
			else if rep == 0 && re.prog[pc].rep_min > 0 {
				//C.printf("ist_quant_n NO MATCH\n")
				// dummy
			}
			// match + or *
			else if rep >= re.prog[pc].rep_min {
				//C.printf("ist_quant_n MATCH RANGE\n")
				m_state = .ist_next
				continue
			}

			// check the OR if present
			if re.prog[pc].next_is_or {
				//C.printf("OR present on failing\n")
				state.match_index = -1
				m_state = .ist_next
				continue
			}

			// we are in a group manage no match from here
			if group_index >= 0 {
				//C.printf("ist_quant_n FAILED insied a GROUP group_index:%d\n", group_index)
				m_state = .ist_quant_ng
				continue
			}

			// no other options
			//C.printf("NO_MATCH_FOUND\n")
			result = NO_MATCH_FOUND
			m_state = .stop
			continue
			//return NO_MATCH_FOUND, 0
		}

		// ist_quant_p
		else if m_state == .ist_quant_p {
			// exit on first match
			if (re.flag & F_EFM) != 0 {
				return i,i+1
			}

			rep := re.prog[pc].rep

			// clear the actual dot char capture state
			if re.state_stack_index >= 0 {
				//C.printf("Drop the DOT_CHAR state!\n")
				re.state_stack_index--
			}

			// under range
			if rep > 0 && rep < re.prog[pc].rep_min {
				//C.printf("ist_quant_p UNDER RANGE\n")
				m_state = .ist_load // continue the loop
				continue
			}

			// range ok, continue loop
			else if rep >= re.prog[pc].rep_min && rep < re.prog[pc].rep_max {
				//C.printf("ist_quant_p IN RANGE\n")
				m_state = .ist_load
				continue
			}

			// max reached
			else if rep == re.prog[pc].rep_max {
				//C.printf("ist_quant_p MAX RANGE\n")
				m_state = .ist_next
				continue
			}

		}
		/* UNREACHABLE */
		//C.printf("PANIC4!! state: %d\n", m_state)
		return ERR_INTERNAL_ERROR, i
	}

	// Check the results
	if state.match_index >= 0 {
		if group_index < 0 {
			//C.printf("OK match,natural end [%d,%d]\n", first_match, i)
			return first_match, i
		} else {
			//C.printf("Skip last group\n")
			return first_match,group_stack[group_index--]
		}
	}
	//C.printf("NO_MATCH_FOUND, natural end\n")
	return NO_MATCH_FOUND, 0
}

/******************************************************************************
*
* Public functions
*
******************************************************************************/

//
// Inits
//

// regex create a regex object from the query string
pub fn regex(in_query string) (RE,int,int){
	mut re := RE{}
	re.prog = [Token{}].repeat(in_query.len+1)
	re.cc = [CharClass{}].repeat(in_query.len+1)
	re.group_max_nested = 8

	re_err,err_pos := re.compile(in_query)
	return re, re_err, err_pos
}

// new_regex create a REgex of small size, usually sufficient for ordinary use
pub fn new_regex() RE {
	return new_regex_by_size(1)
}

// new_regex_by_size create a REgex of large size, mult specify the scale factor of the memory that will be allocated
pub fn new_regex_by_size(mult int) RE {
	mut re := RE{}
	re.prog = [Token{}].repeat(MAX_CODE_LEN*mult)       // max program length, default 256 istructions
	re.cc = [CharClass{}].repeat(MAX_CODE_LEN*mult)     // char class list
	re.group_max_nested = 3*mult                        // max nested group

	return re
}

//
// Matchers
//

pub fn (re mut RE) match_string(in_txt string) (int,int) {
	start, end := re.match_base(in_txt.str,in_txt.len)
	if start >= 0 && end > start {
		if (re.flag & F_MS) != 0 && start > 0 {
			return NO_MATCH_FOUND, 0
		}
		if (re.flag & F_ME) != 0 && end < in_txt.len {
			return NO_MATCH_FOUND, 0
		}
		return start, end
	}
	return start, end
}

//
// Finders
//

// find try to find the first match in the input string
pub fn (re mut RE) find(in_txt string) (int,int) {
	mut i     := 0
	mut start := -1
	mut end   := -1
	old_flag := re.flag

	for i < in_txt.len {

		// test only the first part of the query string
		re.flag &= F_EFM // set to exit on the first token match
		mut tmp_end := i+re.query.len
		if tmp_end > in_txt.len { tmp_end = in_txt.len }
		tmp_txt := string{ str: in_txt.str+i, len: tmp_end-i }
		start, end = re.match_base(tmp_txt.str, tmp_txt.len)

		if start >= 0 && end > start {
			// test a complete match
			re.flag = old_flag
			tmp_txt1 := string{ str: in_txt.str+i , len: in_txt.len-i }
			start, end = re.match_base(tmp_txt1.str, tmp_txt1.len)

			if start >= 0 && end > start {
				if (re.flag & F_MS) != 0 && (i+start) > 0 {
					return NO_MATCH_FOUND, 0
				}
				if (re.flag & F_ME) != 0 && (i+end) < in_txt.len {
					return NO_MATCH_FOUND, 0
				}

				return i+start, i+end
			}
		}

		i++
		if re.flag == F_MS && i>0 {
			return NO_MATCH_FOUND, 0
		}
	}
	return NO_MATCH_FOUND, 0
}