regex: fix formatting inconsistencies in README.md (#17940)

2023-08-10 21:13:21 +03:00 · 2023-04-13 13:44:45 +02:00
parent 524f7c3ead
commit 489ac892b9
1 changed files with 187 additions and 181 deletions
--- a/vlib/regex/README.md
+++ b/vlib/regex/README.md
@@ -1,4 +1,5 @@
 # Description
 `regex` is a small but powerful regular expression library,
 written in pure V.
@@ -17,7 +18,6 @@ are valid for all the `regex` module features:
 2. The basic atomic elements of this regex engine are the tokens.
   In a query string a simple character is a token.
 ## Differences with PCRE:
 > **Note**
@@ -39,25 +39,24 @@ Note: **Two char classes with an `OR` in the middle is a syntax error.**
 - The **match operation stops at the end of the string**. It does *NOT* stop
  at new line characters.
 - The **match operation stops at the end of the string**. It does *NOT* stop
  at new line characters.
 ## Tokens
 The tokens are the atomic units, used by this regex engine.
 They can be one of the following:
 ### Simple char
 This token is a simple single character like `a` or `b` etc.
 ### Match positional delimiters
 `^` Matches the start of the string.
 `$` Matches the end of the string.
 ### Char class (cc)
 The character classes match all the chars specified inside. Use square
@@ -98,14 +97,14 @@ For example `\w` is the meta-char `w`.
 A meta-char can match different types of characters.
-* `\w` matches a word char char `[a-zA-Z0-9_]`
+- `\w` matches a word char char `[a-zA-Z0-9_]`
-* `\W` matches a non word char
+- `\W` matches a non word char
-* `\d` matches a digit `[0-9]`
+- `\d` matches a digit `[0-9]`
-* `\D` matches a non digit
+- `\D` matches a non digit
-* `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
+- `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
-* `\S` matches a non space char
+- `\S` matches a non space char
-* `\a` matches only a lowercase char `[a-z]`
+- `\a` matches only a lowercase char `[a-z]`
-* `\A` matches only an uppercase char `[A-Z]`
+- `\A` matches only an uppercase char `[A-Z]`
 ### Quantifier
@@ -142,11 +141,12 @@ with a regex. The following table show the query strings and the result of
 parsing source string.
 | query string | result      |
-|--------------|-------------|
+| ------------ | ----------- |
 | `.*c`        | `abc`       |
 | `.*dd`       | `abcc dd`   |
 | `ab.*e`      | `abccc dde` |
 | `ab.{3} .*e` | `abccc dde` |
 The dot matches any character, until the next token match is satisfied.
 > Important Note: Consecutive dots, for example `...`, are not allowed.
@@ -278,10 +278,10 @@ Others utility functions are `get_group_by_id` and `get_group_bounds_by_id`
 that get directly the string of a group using its `id`:
 ```v ignore
-txt := "my used string...."
+txt := 'my used string....'
 for g_index := 0; g_index < re.group_count; g_index++ {
-	println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+	println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-    	bounds: ${re.get_group_bounds_by_id(g_index)}")
+		bounds: ${re.get_group_bounds_by_id(g_index)}')
 }
 ```
@@ -311,32 +311,33 @@ not be saved.
 ```v ignore
 import regex
 fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
 	mut re := regex.regex_opt(query) or { panic(err) }
 	// println(re.get_code())   // uncomment to see the print of the regex execution code
 	re.debug = 2 // enable maximum log
-    println("String: ${txt}")
+	println('String: ${txt}')
-    println("Query : ${re.get_query()}")
+	println('Query : ${re.get_query()}')
 	re.debug = 0 // disable log
 	re.group_csave_flag = true
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 	if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
-        println("cg: ${re.group_csave}")
+		println('cg: ${re.group_csave}')
 		mut cs_i := 1
 		for cs_i < re.group_csave[0] * 3 {
 			g_id := re.group_csave[cs_i]
 			st := re.group_csave[cs_i + 1]
 			en := re.group_csave[cs_i + 2]
-            println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
+			println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
 			cs_i += 3
 		}
 	}
@@ -374,23 +375,25 @@ that is a map from `string` to `int`, where the value is the index in
 `group_csave` list of indexes.
 Here is an example for how to use them:
 ```v ignore
 import regex
 fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
 	mut re := regex.regex_opt(query) or { panic(err) }
 	// println(re.get_code())   // uncomment to see the print of the regex execution code
 	re.debug = 2 // enable maximum log
-    println("String: ${txt}")
+	println('String: ${txt}')
-    println("Query : ${re.get_query()}")
+	println('Query : ${re.get_query()}')
 	re.debug = 0 // disable log
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 	for name in re.group_map.keys() {
@@ -414,6 +417,7 @@ In order to simplify the use of the named groups, it is possible to
 use a name map in the `re` struct, using the function `re.get_group_by_name`.
 Here is a more complex example of using them:
 ```v oksyntax
 // This function demonstrate the use of the named groups
 fn convert_html_rgb_n(in_col string) u32 {
@@ -443,15 +447,13 @@ Other utilities are `get_group_by_name` and `get_group_bounds_by_name`,
 that return the string of a group using its `name`:
 ```v ignore
-txt := "my used string...."
+txt := 'my used string....'
 for name in re.group_map.keys() {
 	println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 		bounds: ${re.get_group_bounds_by_name(name)}")
 }
 ```
 ### Groups query functions
 These functions are helpers to query the captured groups
@@ -522,12 +524,14 @@ pub fn regex_opt(in_query string) ?RE
 ```v ignore
 // new_regex create a REgex of small size, usually sufficient for ordinary use
 pub fn new() RE
 ```
 #### **Custom initialization**
 For some particular needs, it is possible to initialize a fully customized regex:
 ```v ignore
-pattern = r"ab(.*)(ac)"
+pattern = r'ab(.*)(ac)'
 // init custom regex
 mut re := regex.RE{}
 // max program length, can not be longer then the pattern
@@ -541,13 +545,14 @@ re.group_max        = pattern.len>>1 // we can't have more groups than the half
 re.group_stack = []int{len: re.group_max, init: -1}
 re.group_data = []int{len: re.group_max, init: -1}
 ```
 ### Compiling
 After an initializer is used, the regex expression must be compiled with:
 ```v ignore
 // compile compiles the REgex returning an error if the compilation fails
-pub fn (re mut RE) compile_opt(in_txt string)?
+pub fn (mut re RE) compile_opt(in_txt string) ?
 ```
 ### Matching Functions
@@ -556,8 +561,7 @@ These are the matching functions
 ```v ignore
 // match_string try to match the input string, return start and end index if found else start is -1
-pub fn (re mut RE) match_string(in_txt string) (int,int)
+pub fn (mut re RE) match_string(in_txt string) (int, int)
 ```
 ## Find and Replace
@@ -569,16 +573,16 @@ There are the following find  and replace functions:
 ```v ignore
 // find try to find the first match in the input string
 // return start and end index if found else start is -1
-pub fn (re mut RE) find(in_txt string) (int,int)
+pub fn (mut re RE) find(in_txt string) (int, int)
 // find_all find all the "non overlapping" occurrences of the matching pattern
 // return a list of start end indexes like: [3,4,6,8]
 // the matches are [3,4] and [6,8]
-pub fn (re mut RE) find_all(in_txt string) []int
+pub fn (mut re RE) find_all(in_txt string) []int
 // find_all find all the "non overlapping" occurrences of the matching pattern
 // return a list of strings
-// the result is like ["first match","secon match"]
+// the result is like ['first match','secon match']
 pub fn (mut re RE) find_all_str(in_txt string) []string
 ```
@@ -587,16 +591,16 @@ pub fn (mut re RE) find_all_str(in_txt string) []string
 ```v ignore
 // replace return a string where the matches are replaced with the repl_str string,
 // this function support groups in the replace string
-pub fn (re mut RE) replace(in_txt string, repl string) string
+pub fn (mut re RE) replace(in_txt string, repl string) string
 ```
 replace string can include groups references:
 ```v ignore
-txt   := "Today it is a good day."
+txt := 'Today it is a good day.'
 query := r'(a\w)[ ,.]'
 mut re := regex.regex_opt(query)?
-res := re.replace(txt, r"__[\0]__")
+res := re.replace(txt, r'__[\0]__')
 ```
 in this example we used the group `0` in the replace string: `\0`, the result will be:
@@ -617,6 +621,7 @@ pub fn (mut re RE) replace_simple(in_txt string, repl string) string
 ```
 If it is needed to replace N instances of the found strings it is possible to use:
 ```v ignore
 // replace_n return a string where the first `count` matches are replaced with the repl_str string
 // `count` indicate the number of max replacements that will be done.
@@ -650,16 +655,17 @@ The following example will clarify its usage:
 import regex
 // customized replace functions
 // it will be called on each non overlapped find
 fn my_repl(re regex.RE, in_txt string, start int, end int) string {
 	g0 := re.get_group_by_id(in_txt, 0)
 	g1 := re.get_group_by_id(in_txt, 1)
 	g2 := re.get_group_by_id(in_txt, 2)
-    return "*${g0}*${g1}*${g2}*"
+	return '*${g0}*${g1}*${g2}*'
 }
 fn main() {
-    txt   := "today [John] is gone to his house with (Jack) and [Marie]."
+	txt := 'today [John] is gone to his house with (Jack) and [Marie].'
-    query := r"(.)(\A\w+)(.)"
+	query := r'(.)(\A\w+)(.)'
 	mut re := regex.regex_opt(query) or { panic(err) }
@@ -674,8 +680,6 @@ Output:
 today *[*John*]* is gone to his house with *(*Jack*)* and *[*Marie*]*.
 ```
 ## Debugging
 This module has few small utilities to you write regex patterns.
@@ -811,34 +815,36 @@ Here an example that perform some basically match of strings
 import regex
 fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
 	mut re := regex.regex_opt(query) or { panic(err) }
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 		for g_index := 0; g_index < re.group_count; g_index++ {
-            println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+			println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-            bounds: ${re.get_group_bounds_by_id(g_index)}")
+				bounds: ${re.get_group_bounds_by_id(g_index)}')
 		}
 		for name in re.group_map.keys() {
 			println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 				bounds: ${re.get_group_bounds_by_name(name)}")
 		}
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 }
 ```
 Here an example of total customization of the regex environment creation:
 ```v ignore
 import regex
 fn main() {
-    txt   := "today John is gone to his house with Jack and Marie."
+	txt := 'today John is gone to his house with Jack and Marie.'
-    query := r"(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+"
+	query := r'(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+'
 	// init regex
 	mut re := regex.RE{}
@@ -859,37 +865,37 @@ fn main(){
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 	// show results for continuous group saving
 	if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
-        println("cg: ${re.group_csave}")
+		println('cg: ${re.group_csave}')
 		mut cs_i := 1
 		for cs_i < re.group_csave[0] * 3 {
 			g_id := re.group_csave[cs_i]
 			st := re.group_csave[cs_i + 1]
 			en := re.group_csave[cs_i + 2]
-            println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
+			println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
 			cs_i += 3
 		}
 	}
 	// show results for captured groups
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 		for g_index := 0; g_index < re.group_count; g_index++ {
-            println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+			println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-            bounds: ${re.get_group_bounds_by_id(g_index)}")
+				bounds: ${re.get_group_bounds_by_id(g_index)}')
 		}
 		for name in re.group_map.keys() {
 			println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 				bounds: ${re.get_group_bounds_by_name(name)}")
 		}
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 }
 ```