regex: fix formatting inconsistencies in README.md (#17940)

2023-08-10 21:13:21 +03:00 · 2023-04-13 13:44:45 +02:00 · 2023-04-13 13:44:45 +02:00 · 489ac892b9
commit 489ac892b9
parent 524f7c3ead
1 changed files with 187 additions and 181 deletions
--- a/vlib/regex/README.md
+++ b/vlib/regex/README.md
@ -1,4 +1,5 @@
 # Description
 `regex` is a small but powerful regular expression library,
 written in pure V.
@ -15,8 +16,7 @@ are valid for all the `regex` module features:
 1. The matching stops at the end of the string, *not* at newline characters.
 2. The basic atomic elements of this regex engine are the tokens.
-In a query string a simple character is a token.
+   In a query string a simple character is a token.
 ## Differences with PCRE:
@ -28,36 +28,35 @@ In a query string a simple character is a token.
 The main differences can be summarized in the following points:
 - The basic element **is the token not the sequence of symbols**, and the most
-simple token, is a single character.
+  simple token, is a single character.
 - `|` **the OR operator acts on tokens,** for example `abc|ebc` is not
-`abc` OR `ebc`. Instead it is evaluated like `ab`, followed by `c OR e`,
+  `abc` OR `ebc`. Instead it is evaluated like `ab`, followed by `c OR e`,
-followed by `bc`, because the **token is the base element**,
+  followed by `bc`, because the **token is the base element**,
-not the sequence of symbols.
+  not the sequence of symbols.
-Note: **Two char classes with an `OR` in the middle is a syntax error.**
+  Note: **Two char classes with an `OR` in the middle is a syntax error.**
 - The **match operation stops at the end of the string**. It does *NOT* stop
-at new line characters.
+  at new line characters.
 - The **match operation stops at the end of the string**. It does *NOT* stop
  at new line characters.
 ## Tokens
 The tokens are the atomic units, used by this regex engine.
 They can be one of the following:
 ### Simple char
 This token is a simple single character like `a` or `b` etc.
 ### Match positional delimiters
 `^` Matches the start of the string.
 `$` Matches the end of the string.
 ### Char class (cc)
 The character classes match all the chars specified inside. Use square
@ -98,14 +97,14 @@ For example `\w` is the meta-char `w`.
 A meta-char can match different types of characters.
-* `\w` matches a word char char `[a-zA-Z0-9_]`
+- `\w` matches a word char char `[a-zA-Z0-9_]`
-* `\W` matches a non word char
+- `\W` matches a non word char
-* `\d` matches a digit `[0-9]`
+- `\d` matches a digit `[0-9]`
-* `\D` matches a non digit
+- `\D` matches a non digit
-* `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
+- `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
-* `\S` matches a non space char
+- `\S` matches a non space char
-* `\a` matches only a lowercase char `[a-z]`
+- `\a` matches only a lowercase char `[a-z]`
-* `\A` matches only an uppercase char `[A-Z]`
+- `\A` matches only an uppercase char `[A-Z]`
 ### Quantifier
@ -142,11 +141,12 @@ with a regex. The following table show the query strings and the result of
 parsing source string.
 | query string | result      |
-|--------------|-------------|
+| ------------ | ----------- |
 | `.*c`        | `abc`       |
 | `.*dd`       | `abcc dd`   |
 | `ab.*e`      | `abccc dde` |
 | `ab.{3} .*e` | `abccc dde` |
 The dot matches any character, until the next token match is satisfied.
 > Important Note: Consecutive dots, for example `...`, are not allowed.
@ -195,7 +195,7 @@ i.e. the space char (ascii code 32) followed by the `?` quantifier,
 which means that the preceding space should be matched 0 or 1 time.
 This explains why the `(c(pa)+z ?)+` query string,
-can match `cpaz cpapaz cpapapaz` .
+can match `cpaz cpapaz cpapapaz`.
 In this implementation the groups are "capture groups". This means that the
 last temporal result for each group, can be retrieved from the `RE` struct.
@ -278,10 +278,10 @@ Others utility functions are `get_group_by_id` and `get_group_bounds_by_id`
 that get directly the string of a group using its `id`:
 ```v ignore
-txt := "my used string...."
+txt := 'my used string....'
-for g_index := 0; g_index < re.group_count ; g_index++ {
+for g_index := 0; g_index < re.group_count; g_index++ {
-	println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+	println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-    	bounds: ${re.get_group_bounds_by_id(g_index)}")
+		bounds: ${re.get_group_bounds_by_id(g_index)}')
 }
 ```
@ -311,32 +311,33 @@ not be saved.
 ```v ignore
 import regex
-fn main(){
+
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
+fn main() {
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
 	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
 	mut re := regex.regex_opt(query) or { panic(err) }
-    //println(re.get_code())   // uncomment to see the print of the regex execution code
+	// println(re.get_code())   // uncomment to see the print of the regex execution code
-    re.debug=2  // enable maximum log
+	re.debug = 2 // enable maximum log
-    println("String: ${txt}")
+	println('String: ${txt}')
-    println("Query : ${re.get_query()}")
+	println('Query : ${re.get_query()}')
-    re.debug=0  // disable log
+	re.debug = 0 // disable log
 	re.group_csave_flag = true
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}
-    if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0{
+	if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
-        println("cg: ${re.group_csave}")
+		println('cg: ${re.group_csave}')
 		mut cs_i := 1
-        for cs_i < re.group_csave[0]*3 {
+		for cs_i < re.group_csave[0] * 3 {
 			g_id := re.group_csave[cs_i]
-            st   := re.group_csave[cs_i+1]
+			st := re.group_csave[cs_i + 1]
-            en   := re.group_csave[cs_i+2]
+			en := re.group_csave[cs_i + 2]
-            println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
+			println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
 			cs_i += 3
 		}
 	}
@ -374,23 +375,25 @@ that is a map from `string` to `int`, where the value is the index in
 `group_csave` list of indexes.
 Here is an example for how to use them:
 ```v ignore
 import regex
-fn main(){
+
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
+fn main() {
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
 	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
 	mut re := regex.regex_opt(query) or { panic(err) }
-    //println(re.get_code())   // uncomment to see the print of the regex execution code
+	// println(re.get_code())   // uncomment to see the print of the regex execution code
-    re.debug=2  // enable maximum log
+	re.debug = 2 // enable maximum log
-    println("String: ${txt}")
+	println('String: ${txt}')
-    println("Query : ${re.get_query()}")
+	println('Query : ${re.get_query()}')
-    re.debug=0  // disable log
+	re.debug = 0 // disable log
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 	for name in re.group_map.keys() {
@ -414,6 +417,7 @@ In order to simplify the use of the named groups, it is possible to
 use a name map in the `re` struct, using the function `re.get_group_by_name`.
 Here is a more complex example of using them:
 ```v oksyntax
 // This function demonstrate the use of the named groups
 fn convert_html_rgb_n(in_col string) u32 {
@ -443,15 +447,13 @@ Other utilities are `get_group_by_name` and `get_group_bounds_by_name`,
 that return the string of a group using its `name`:
 ```v ignore
-txt := "my used string...."
+txt := 'my used string....'
 for name in re.group_map.keys() {
 	println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 		bounds: ${re.get_group_bounds_by_name(name)}")
 }
 ```
 ### Groups query functions
 These functions are helpers to query the captured groups
@ -522,32 +524,35 @@ pub fn regex_opt(in_query string) ?RE
 ```v ignore
 // new_regex create a REgex of small size, usually sufficient for ordinary use
 pub fn new() RE
 ```
 #### **Custom initialization**
 For some particular needs, it is possible to initialize a fully customized regex:
 ```v ignore
-pattern = r"ab(.*)(ac)"
+pattern = r'ab(.*)(ac)'
 // init custom regex
 mut re := regex.RE{}
 // max program length, can not be longer then the pattern
-re.prog = []Token    {len: pattern.len + 1}
+re.prog = []Token{len: pattern.len + 1}
 // can not be more char class the the length of the pattern
 re.cc = []CharClass{len: pattern.len}
 re.group_csave_flag = false // true enable continuous group saving if needed
 re.group_max_nested = 128 // set max 128 group nested possible
-re.group_max        = pattern.len>>1 // we can't have more groups than the half of the pattern length
+re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern length
 re.group_stack = []int{len: re.group_max, init: -1}
 re.group_data = []int{len: re.group_max, init: -1}
 ```
 ### Compiling
 After an initializer is used, the regex expression must be compiled with:
 ```v ignore
 // compile compiles the REgex returning an error if the compilation fails
-pub fn (re mut RE) compile_opt(in_txt string)?
+pub fn (mut re RE) compile_opt(in_txt string) ?
 ```
 ### Matching Functions
@ -556,8 +561,7 @@ These are the matching functions
 ```v ignore
 // match_string try to match the input string, return start and end index if found else start is -1
-pub fn (re mut RE) match_string(in_txt string) (int,int)
+pub fn (mut re RE) match_string(in_txt string) (int, int)
 ```
 ## Find and Replace
@ -569,16 +573,16 @@ There are the following find  and replace functions:
 ```v ignore
 // find try to find the first match in the input string
 // return start and end index if found else start is -1
-pub fn (re mut RE) find(in_txt string) (int,int)
+pub fn (mut re RE) find(in_txt string) (int, int)
 // find_all find all the "non overlapping" occurrences of the matching pattern
 // return a list of start end indexes like: [3,4,6,8]
 // the matches are [3,4] and [6,8]
-pub fn (re mut RE) find_all(in_txt string) []int
+pub fn (mut re RE) find_all(in_txt string) []int
 // find_all find all the "non overlapping" occurrences of the matching pattern
 // return a list of strings
-// the result is like ["first match","secon match"]
+// the result is like ['first match','secon match']
 pub fn (mut re RE) find_all_str(in_txt string) []string
 ```
@ -587,16 +591,16 @@ pub fn (mut re RE) find_all_str(in_txt string) []string
 ```v ignore
 // replace return a string where the matches are replaced with the repl_str string,
 // this function support groups in the replace string
-pub fn (re mut RE) replace(in_txt string, repl string) string
+pub fn (mut re RE) replace(in_txt string, repl string) string
 ```
 replace string can include groups references:
 ```v ignore
-txt   := "Today it is a good day."
+txt := 'Today it is a good day.'
 query := r'(a\w)[ ,.]'
 mut re := regex.regex_opt(query)?
-res := re.replace(txt, r"__[\0]__")
+res := re.replace(txt, r'__[\0]__')
 ```
 in this example we used the group `0` in the replace string: `\0`, the result will be:
@ -617,6 +621,7 @@ pub fn (mut re RE) replace_simple(in_txt string, repl string) string
 ```
 If it is needed to replace N instances of the found strings it is possible to use:
 ```v ignore
 // replace_n return a string where the first `count` matches are replaced with the repl_str string
 // `count` indicate the number of max replacements that will be done.
@ -650,16 +655,17 @@ The following example will clarify its usage:
 import regex
 // customized replace functions
 // it will be called on each non overlapped find
 fn my_repl(re regex.RE, in_txt string, start int, end int) string {
 	g0 := re.get_group_by_id(in_txt, 0)
 	g1 := re.get_group_by_id(in_txt, 1)
 	g2 := re.get_group_by_id(in_txt, 2)
-    return "*${g0}*${g1}*${g2}*"
+	return '*${g0}*${g1}*${g2}*'
 }
-fn main(){
+fn main() {
-    txt   := "today [John] is gone to his house with (Jack) and [Marie]."
+	txt := 'today [John] is gone to his house with (Jack) and [Marie].'
-    query := r"(.)(\A\w+)(.)"
+	query := r'(.)(\A\w+)(.)'
 	mut re := regex.regex_opt(query) or { panic(err) }
@ -674,8 +680,6 @@ Output:
 today *[*John*]* is gone to his house with *(*Jack*)* and *[*Marie*]*.
 ```
 ## Debugging
 This module has few small utilities to you write regex patterns.
@ -810,86 +814,88 @@ Here an example that perform some basically match of strings
 ```v ignore
 import regex
-fn main(){
+fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
 	mut re := regex.regex_opt(query) or { panic(err) }
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
-        for g_index := 0; g_index < re.group_count ; g_index++ {
+		for g_index := 0; g_index < re.group_count; g_index++ {
-            println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+			println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-            bounds: ${re.get_group_bounds_by_id(g_index)}")
+				bounds: ${re.get_group_bounds_by_id(g_index)}')
 		}
 		for name in re.group_map.keys() {
 			println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 				bounds: ${re.get_group_bounds_by_name(name)}")
 		}
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 }
 ```
 Here an example of total customization of the regex environment creation:
 ```v ignore
 import regex
-fn main(){
+fn main() {
-    txt   := "today John is gone to his house with Jack and Marie."
+	txt := 'today John is gone to his house with Jack and Marie.'
-    query := r"(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+"
+	query := r'(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+'
 	// init regex
 	mut re := regex.RE{}
 	// max program length, can not be longer then the query
-    re.prog = []regex.Token    {len: query.len + 1}
+	re.prog = []regex.Token{len: query.len + 1}
 	// can not be more char class the the length of the query
 	re.cc = []regex.CharClass{len: query.len}
-    re.prog = []regex.Token    {len: query.len+1}
+	re.prog = []regex.Token{len: query.len + 1}
 	// enable continuous group saving
 	re.group_csave_flag = true
 	// set max 128 group nested
 	re.group_max_nested = 128
 	// we can't have more groups than the half of the query length
-    re.group_max        = query.len>>1
+	re.group_max = query.len >> 1
 	// compile the query
 	re.compile_opt(query) or { panic(err) }
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 	// show results for continuous group saving
-    if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0{
+	if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
-        println("cg: ${re.group_csave}")
+		println('cg: ${re.group_csave}')
 		mut cs_i := 1
-        for cs_i < re.group_csave[0]*3 {
+		for cs_i < re.group_csave[0] * 3 {
 			g_id := re.group_csave[cs_i]
-            st   := re.group_csave[cs_i+1]
+			st := re.group_csave[cs_i + 1]
-            en   := re.group_csave[cs_i+2]
+			en := re.group_csave[cs_i + 2]
-            println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
+			println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
 			cs_i += 3
 		}
 	}
 	// show results for captured groups
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
-        for g_index := 0; g_index < re.group_count ; g_index++ {
+		for g_index := 0; g_index < re.group_count; g_index++ {
-            println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+			println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-            bounds: ${re.get_group_bounds_by_id(g_index)}")
+				bounds: ${re.get_group_bounds_by_id(g_index)}')
 		}
 		for name in re.group_map.keys() {
 			println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 				bounds: ${re.get_group_bounds_by_name(name)}")
 		}
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 }
 ```