import regex /****************************************************************************** * * Test section * ******************************************************************************/ struct TestItem { src string q string s int = 0 e int = 0 } const( match_test_suite = [ // positive TestItem{"this is a good.",r"this",0,4}, TestItem{"this is a good.",r"good",10,14}, TestItem{"this is a good.",r"go+d",10,14}, TestItem{"this is a good.",r"g[oae]+d",10,14}, TestItem{"this is a goed.",r"g[oae]+d",10,14}, TestItem{"this is a good.",r"g[oae]*d",10,14}, TestItem{"this is a goaezd.",r"g[ea-cm-z]*d",10,16}, TestItem{"this is a good.",r"this (\w+) a",0,9}, TestItem{"this is a good.",r"this( \w+){2} g",0,11}, TestItem{"this is a good.",r"( ?\w+){,1}",0,4}, TestItem{"this is a good.",r"( ?\w+)+",0,14}, TestItem{"this is a good.",r"this( \w+)+",0,14}, TestItem{"this is a good sample.",r"( ?\w+){,2}",0,7}, TestItem{"this is a good sample.",r"( ?\w+){,3}",0,9}, TestItem{"this is a good sample.",r"( ?\w+){,4}",0,14}, TestItem{"this is a good sample.",r"( ?\w+){,5}",0,21}, TestItem{"this is a good sample.",r"( ?\w+){2,3}",0,9}, TestItem{"this is a good sample.",r"(\s?\w+){2,3}",0,9}, TestItem{"this these those.",r"(th[ei]se?\s|\.)+",0,11}, TestItem{"this these those ",r"(th[eio]se? ?)+",0,17}, TestItem{"this these those ",r"(th[eio]se? )+",0,17}, TestItem{"this,these,those. over",r"(th[eio]se?[,. ])+",0,17}, TestItem{"soday,this,these,those. over",r"(th[eio]se?[,. ])+",6,23}, TestItem{"cpapaz",r"(c(pa)+z)",0,6}, TestItem{"this is a cpapaz over",r"(c(pa)+z)",10,16}, TestItem{"this is a cpapapez over",r"(c(p[ae])+z)",10,18}, TestItem{"test@post.pip.com",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,17}, TestItem{"test1@post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",0,18}, TestItem{"pippo@pera.com ",r"[a-z0-9_]+@([a-z0-9_]+\.?)+",0,14}, TestItem{"adce aabe",r"(a(ab)+)|(a(dc)+)e",0,4}, TestItem{"zadce aabe",r"(a(ab)+)|(a(dc)+)e",1,5}, TestItem{"abbz accz addz.",r"c|(d)|e|(ab+)",0,3}, TestItem{"this those these ciao",r"((t[hieo]+se?)\s*)+",0,17}, TestItem{"this ciao",r"((t[hieo]+se?)\s*)+",0,5}, TestItem{"this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}",5,21}, TestItem{"1234this cpapaz adce aabe",r"(c(pa)+z)(\s[\a]+){2}$",9,25}, TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}",5,21}, TestItem{"123cpapaz ole. pippo",r"(c(pa)+z)(\s+\a+[\.,]?)+",3,20}, TestItem{"this is a good sample.",r".*i(\w)+",0,4}, TestItem{"soday,this,these,those. over",r".*,(th[eio]se?[,. ])+",0,23}, TestItem{"soday,this,these,thesa.thesi over",r".*,(th[ei]se?[,. ])+(thes[ai][,. ])+",0,29}, TestItem{"cpapaz ole. pippo,",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18}, TestItem{"cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,17}, TestItem{"cpapaz ole. pippo, 852",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,18}, TestItem{"123cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20}, TestItem{"...cpapaz ole. pippo",r".*(c(pa)+z)(\s+\a+[\.,]?)+",0,20}, TestItem{"cpapaz ole. pippo,",r".*c.+ole.*pi",0,14}, TestItem{"cpapaz ole. pipipo,",r".*c.+ole.*p([ip])+o",0,18}, TestItem{"cpapaz ole. pipipo",r"^.*c.+ol?e.*p([ip])+o$",0,18}, TestItem{"abbb",r"ab{2,3}?",0,3}, TestItem{" pippo pera",r"\s(.*)pe(.*)",0,11}, TestItem{" abb",r"\s(.*)",0,4}, // negative TestItem{"zthis ciao",r"((t[hieo]+se?)\s*)+",-1,0}, TestItem{"this is a good.",r"thes",-1,0}, TestItem{"test1post.pip.com, pera",r"[\w]+@([\w]+\.)+\w+",-1,0}, TestItem{"this cpapaz adce",r"(c(pa)+z)(\s[\a]+){2}",-1,0}, TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0}, TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0}, // check unicode TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34}, TestItem{"123Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r"[Ⅰ-Ⅴ\s]+",3,23}, ] ) struct TestItemFa { src string q string r []int } const ( match_test_suite_fa = [ // find_all tests TestItemFa{ "oggi pippo è andato a casa di pluto ed ha trovato pippo", r"p[iplut]+o", [5, 10, 31, 36, 51, 56] }, TestItemFa{ "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", r"(pi?(ba)+o)", [5, 10, 31, 39, 54, 65] }, ] ) struct TestItemRe { src string q string rep string r string } const ( match_test_suite_re = [ // replace tests TestItemRe{ "oggi pibao è andato a casa di pbababao ed ha trovato pibabababao", r"(pi?(ba)+o)", "CIAO", "oggi CIAO è andato a casa di CIAO ed ha trovato CIAO" }, TestItemRe{ "Today is a good day and tomorrow will be for sure.", r"[Tt]o\w+", "CIAO", "CIAO is a good day and CIAO will be for sure." } ] ) struct TestItemCGroup { src string q string s int = 0 e int = 0 cg []int cgn map[string]int } const ( cgroups_test_suite = [ TestItemCGroup{ "http://www.ciao.mondo/hello/pippo12_/pera.html", r"(?Phttps?)|(?:ftps?)://(?P[\w_]+.)+",0,46, [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46], {'format':0,'token':1} }, TestItemCGroup{ "http://www.ciao.mondo/hello/pippo12_/pera.html", r"(?Phttps?)|(?Pftps?)://(?P[\w_]+.)+",0,46, [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46], {'format':0,'token':1} }, TestItemCGroup{ "http://www.ciao.mondo/hello/pippo12_/pera.html", r"(?Phttps?)|(?Pftps?)://([\w_]+.)+",0,46, [8, 0, 0, 4, 1, 7, 11, 1, 11, 16, 1, 16, 22, 1, 22, 28, 1, 28, 37, 1, 37, 42, 1, 42, 46], {'format':0} }, ] ) fn test_regex(){ // check capturing groups for c,to in cgroups_test_suite { // debug print //println("#$c [$to.src] q[$to.q] ($to.s, $to.e)") mut re, re_err, err_pos := regex.regex(to.q) re.group_csave = [-1].repeat(3*20+1) if re_err == regex.COMPILE_OK { start, end := re.match_string(to.src) mut tmp_str := "" if start >= 0 && end > start{ tmp_str = to.src[start..end] } if start != to.s || end != to.e { println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end") println("ERROR!") //C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e) assert false break } // check cgroups if re.group_csave.len == 0 || re.group_csave[0] != to.cg[0] { println("Capturing group len error!") assert false } // check captured groups mut ln := re.group_csave[0]*3 for ln > 0 { if re.group_csave[ln] != to.cg[ln] { assert false } ln-- } // check named captured groups for k in to.cgn.keys() { if to.cgn[k] != (re.group_map[k]-1) { // we have -1 because the map not found is 0, in groups we start from 0 and we store using +1 println("Named capturing group error! [$k]") assert false } } } } // check find_all for c,to in match_test_suite_fa{ // debug print //println("#$c [$to.src] q[$to.q] $to.r") mut re, re_err, err_pos := regex.regex(to.q) if re_err == regex.COMPILE_OK { res := re.find_all(to.src) if res.len != to.r.len { println("ERROR: find_all, array of different size.") assert false } for c1,i in res { if i != to.r[c1] { println("ERROR: find_all, different indexes.") assert false } } } else { println("query: $to.q") lc := "-".repeat(err_pos-1) println("err : $lc^") err_str := re.get_parse_error_string(re_err) println("ERROR: $err_str") assert false } } // check replace for c,to in match_test_suite_re{ // debug print //println("#$c [$to.src] q[$to.q] $to.r") mut re, re_err, err_pos := regex.regex(to.q) if re_err == regex.COMPILE_OK { res := re.replace(to.src,to.rep) if res != to.r { println("ERROR: replace.") assert false } } else { println("query: $to.q") lc := "-".repeat(err_pos-1) println("err : $lc^") err_str := re.get_parse_error_string(re_err) println("ERROR: $err_str") assert false } } // check match and find for c,to in match_test_suite { // debug print //println("#$c [$to.src] q[$to.q] $to.s") // test the find if to.s > 0 { mut re, re_err, err_pos := regex.regex(to.q) if re_err == regex.COMPILE_OK { //q_str := re.get_query() //println("Query: $q_str") start,end := re.find(to.src) if start != to.s || end != to.e { err_str := re.get_parse_error_string(start) println("ERROR : $err_str") assert false } else { //tmp_str := text[start..end] //println("found in [$start, $end] => [$tmp_str]") assert true } } else { println("query: $to.q") lc := "-".repeat(err_pos-1) println("err : $lc^") err_str := re.get_parse_error_string(re_err) println("ERROR: $err_str") assert false } continue } // test the match mut re := regex.new_regex() //re.debug = true re_err,err_pos := re.compile(to.q) if re_err == regex.COMPILE_OK { //println("#$c [$to.src] q[$to.q]") start, end := re.match_string(to.src) mut tmp_str := "" if start >= 0 && end > start{ tmp_str = to.src[start..end] } if start != to.s || end != to.e { println("#$c [$to.src] q[$to.q] res[$tmp_str] $start, $end") println("ERROR!") //C.printf("ERROR!! res:(%d, %d) refh:(%d, %d)\n",start, end, to.s, to.e) assert false break } // rerun to test consistency tmp_str1 := to.src.clone() start1, end1 := re.match_string(tmp_str1) if start1 != start || end1 != end { println("two run ERROR!!") assert false break } } else { println("query: $to.q") lc := "-".repeat(err_pos-1) println("err : $lc^") err_str := re.get_parse_error_string(re_err) println("ERROR: $err_str") assert false break } } }