regex: fix formatting inconsistencies in README.md (#17940)

2023-08-10 21:13:21 +03:00 · 2023-04-13 13:44:45 +02:00
parent 524f7c3ead
commit 489ac892b9
1 changed files with 187 additions and 181 deletions
--- a/vlib/regex/README.md
+++ b/vlib/regex/README.md
@@ -1,4 +1,5 @@
 # Description
+
 `regex` is a small but powerful regular expression library,
 written in pure V.

@@ -17,7 +18,6 @@ are valid for all the `regex` module features:
 2. The basic atomic elements of this regex engine are the tokens.
   In a query string a simple character is a token.

-
 ## Differences with PCRE:

 > **Note**
@@ -39,25 +39,24 @@ Note: **Two char classes with an `OR` in the middle is a syntax error.**
 - The **match operation stops at the end of the string**. It does *NOT* stop
  at new line characters.

+- The **match operation stops at the end of the string**. It does *NOT* stop
+  at new line characters.

 ## Tokens

 The tokens are the atomic units, used by this regex engine.
 They can be one of the following:

-
 ### Simple char

 This token is a simple single character like `a` or `b` etc.

-
 ### Match positional delimiters

 `^` Matches the start of the string.

 `$` Matches the end of the string.

-
 ### Char class (cc)

 The character classes match all the chars specified inside. Use square
@@ -98,14 +97,14 @@ For example `\w` is the meta-char `w`.

 A meta-char can match different types of characters.

-* `\w` matches a word char char `[a-zA-Z0-9_]`
-* `\W` matches a non word char
-* `\d` matches a digit `[0-9]`
-* `\D` matches a non digit
-* `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
-* `\S` matches a non space char
-* `\a` matches only a lowercase char `[a-z]`
-* `\A` matches only an uppercase char `[A-Z]`
+- `\w` matches a word char char `[a-zA-Z0-9_]`
+- `\W` matches a non word char
+- `\d` matches a digit `[0-9]`
+- `\D` matches a non digit
+- `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
+- `\S` matches a non space char
+- `\a` matches only a lowercase char `[a-z]`
+- `\A` matches only an uppercase char `[A-Z]`

 ### Quantifier

@@ -142,11 +141,12 @@ with a regex. The following table show the query strings and the result of
 parsing source string.

 | query string | result      |
-|--------------|-------------|
+| ------------ | ----------- |
 | `.*c`        | `abc`       |
 | `.*dd`       | `abcc dd`   |
 | `ab.*e`      | `abccc dde` |
 | `ab.{3} .*e` | `abccc dde` |
+
 The dot matches any character, until the next token match is satisfied.

 > Important Note: Consecutive dots, for example `...`, are not allowed.
@@ -278,10 +278,10 @@ Others utility functions are `get_group_by_id` and `get_group_bounds_by_id`
 that get directly the string of a group using its `id`:

 ```v ignore
-txt := "my used string...."
+txt := 'my used string....'
 for g_index := 0; g_index < re.group_count; g_index++ {
-	println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-    	bounds: ${re.get_group_bounds_by_id(g_index)}")
+	println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+		bounds: ${re.get_group_bounds_by_id(g_index)}')
 }
 ```

@@ -311,32 +311,33 @@ not be saved.

 ```v ignore
 import regex
+
 fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'

 	mut re := regex.regex_opt(query) or { panic(err) }
 	// println(re.get_code())   // uncomment to see the print of the regex execution code
 	re.debug = 2 // enable maximum log
-    println("String: ${txt}")
-    println("Query : ${re.get_query()}")
+	println('String: ${txt}')
+	println('Query : ${re.get_query()}')
 	re.debug = 0 // disable log
 	re.group_csave_flag = true
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}

 	if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
-        println("cg: ${re.group_csave}")
+		println('cg: ${re.group_csave}')
 		mut cs_i := 1
 		for cs_i < re.group_csave[0] * 3 {
 			g_id := re.group_csave[cs_i]
 			st := re.group_csave[cs_i + 1]
 			en := re.group_csave[cs_i + 2]
-            println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
+			println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
 			cs_i += 3
 		}
 	}
@@ -374,23 +375,25 @@ that is a map from `string` to `int`, where the value is the index in
 `group_csave` list of indexes.

 Here is an example for how to use them:
+
 ```v ignore
 import regex
+
 fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'

 	mut re := regex.regex_opt(query) or { panic(err) }
 	// println(re.get_code())   // uncomment to see the print of the regex execution code
 	re.debug = 2 // enable maximum log
-    println("String: ${txt}")
-    println("Query : ${re.get_query()}")
+	println('String: ${txt}')
+	println('Query : ${re.get_query()}')
 	re.debug = 0 // disable log
 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}

 	for name in re.group_map.keys() {
@@ -414,6 +417,7 @@ In order to simplify the use of the named groups, it is possible to
 use a name map in the `re` struct, using the function `re.get_group_by_name`.

 Here is a more complex example of using them:
+
 ```v oksyntax
 // This function demonstrate the use of the named groups
 fn convert_html_rgb_n(in_col string) u32 {
@@ -443,15 +447,13 @@ Other utilities are `get_group_by_name` and `get_group_bounds_by_name`,
 that return the string of a group using its `name`:

 ```v ignore
-txt := "my used string...."
+txt := 'my used string....'
 for name in re.group_map.keys() {
 	println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 		bounds: ${re.get_group_bounds_by_name(name)}")
 }
 ```

-
-
 ### Groups query functions

 These functions are helpers to query the captured groups
@@ -522,12 +524,14 @@ pub fn regex_opt(in_query string) ?RE
 ```v ignore
 // new_regex create a REgex of small size, usually sufficient for ordinary use
 pub fn new() RE
-
 ```
+
 #### **Custom initialization**
+
 For some particular needs, it is possible to initialize a fully customized regex:
+
 ```v ignore
-pattern = r"ab(.*)(ac)"
+pattern = r'ab(.*)(ac)'
 // init custom regex
 mut re := regex.RE{}
 // max program length, can not be longer then the pattern
@@ -541,13 +545,14 @@ re.group_max        = pattern.len>>1 // we can't have more groups than the half
 re.group_stack = []int{len: re.group_max, init: -1}
 re.group_data = []int{len: re.group_max, init: -1}
 ```
+
 ### Compiling

 After an initializer is used, the regex expression must be compiled with:

 ```v ignore
 // compile compiles the REgex returning an error if the compilation fails
-pub fn (re mut RE) compile_opt(in_txt string)?
+pub fn (mut re RE) compile_opt(in_txt string) ?
 ```

 ### Matching Functions
@@ -556,8 +561,7 @@ These are the matching functions

 ```v ignore
 // match_string try to match the input string, return start and end index if found else start is -1
-pub fn (re mut RE) match_string(in_txt string) (int,int)
-
+pub fn (mut re RE) match_string(in_txt string) (int, int)
 ```

 ## Find and Replace
@@ -569,16 +573,16 @@ There are the following find  and replace functions:
 ```v ignore
 // find try to find the first match in the input string
 // return start and end index if found else start is -1
-pub fn (re mut RE) find(in_txt string) (int,int)
+pub fn (mut re RE) find(in_txt string) (int, int)

 // find_all find all the "non overlapping" occurrences of the matching pattern
 // return a list of start end indexes like: [3,4,6,8]
 // the matches are [3,4] and [6,8]
-pub fn (re mut RE) find_all(in_txt string) []int
+pub fn (mut re RE) find_all(in_txt string) []int

 // find_all find all the "non overlapping" occurrences of the matching pattern
 // return a list of strings
-// the result is like ["first match","secon match"]
+// the result is like ['first match','secon match']
 pub fn (mut re RE) find_all_str(in_txt string) []string
 ```

@@ -587,16 +591,16 @@ pub fn (mut re RE) find_all_str(in_txt string) []string
 ```v ignore
 // replace return a string where the matches are replaced with the repl_str string,
 // this function support groups in the replace string
-pub fn (re mut RE) replace(in_txt string, repl string) string
+pub fn (mut re RE) replace(in_txt string, repl string) string
 ```

 replace string can include groups references:

 ```v ignore
-txt   := "Today it is a good day."
+txt := 'Today it is a good day.'
 query := r'(a\w)[ ,.]'
 mut re := regex.regex_opt(query)?
-res := re.replace(txt, r"__[\0]__")
+res := re.replace(txt, r'__[\0]__')
 ```

 in this example we used the group `0` in the replace string: `\0`, the result will be:
@@ -617,6 +621,7 @@ pub fn (mut re RE) replace_simple(in_txt string, repl string) string
 ```

 If it is needed to replace N instances of the found strings it is possible to use:
+
 ```v ignore
 // replace_n return a string where the first `count` matches are replaced with the repl_str string
 // `count` indicate the number of max replacements that will be done.
@@ -650,16 +655,17 @@ The following example will clarify its usage:
 import regex
 // customized replace functions
 // it will be called on each non overlapped find
+
 fn my_repl(re regex.RE, in_txt string, start int, end int) string {
 	g0 := re.get_group_by_id(in_txt, 0)
 	g1 := re.get_group_by_id(in_txt, 1)
 	g2 := re.get_group_by_id(in_txt, 2)
-    return "*${g0}*${g1}*${g2}*"
+	return '*${g0}*${g1}*${g2}*'
 }

 fn main() {
-    txt   := "today [John] is gone to his house with (Jack) and [Marie]."
-    query := r"(.)(\A\w+)(.)"
+	txt := 'today [John] is gone to his house with (Jack) and [Marie].'
+	query := r'(.)(\A\w+)(.)'

 	mut re := regex.regex_opt(query) or { panic(err) }

@@ -674,8 +680,6 @@ Output:
 today *[*John*]* is gone to his house with *(*Jack*)* and *[*Marie*]*.
 ```

-
-
 ## Debugging

 This module has few small utilities to you write regex patterns.
@@ -811,34 +815,36 @@ Here an example that perform some basically match of strings
 import regex

 fn main() {
-    txt   := "http://www.ciao.mondo/hello/pippo12_/pera.html"
-    query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
+	txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
+	query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'

 	mut re := regex.regex_opt(query) or { panic(err) }

 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 		for g_index := 0; g_index < re.group_count; g_index++ {
-            println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-            bounds: ${re.get_group_bounds_by_id(g_index)}")
+			println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+				bounds: ${re.get_group_bounds_by_id(g_index)}')
 		}
 		for name in re.group_map.keys() {
 			println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 				bounds: ${re.get_group_bounds_by_name(name)}")
 		}
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 }
 ```
+
 Here an example of total customization of the regex environment creation:
+
 ```v ignore
 import regex

 fn main() {
-    txt   := "today John is gone to his house with Jack and Marie."
-    query := r"(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+"
+	txt := 'today John is gone to his house with Jack and Marie.'
+	query := r'(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+'

 	// init regex
 	mut re := regex.RE{}
@@ -859,37 +865,37 @@ fn main(){

 	start, end := re.match_string(txt)
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 	} else {
-        println("No Match")
+		println('No Match')
 	}

 	// show results for continuous group saving
 	if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
-        println("cg: ${re.group_csave}")
+		println('cg: ${re.group_csave}')
 		mut cs_i := 1
 		for cs_i < re.group_csave[0] * 3 {
 			g_id := re.group_csave[cs_i]
 			st := re.group_csave[cs_i + 1]
 			en := re.group_csave[cs_i + 2]
-            println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
+			println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
 			cs_i += 3
 		}
 	}

 	// show results for captured groups
 	if start >= 0 {
-        println("Match (${start}, ${end}) => [${txt[start..end]}]")
+		println('Match (${start}, ${end}) => [${txt[start..end]}]')
 		for g_index := 0; g_index < re.group_count; g_index++ {
-            println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
-            bounds: ${re.get_group_bounds_by_id(g_index)}")
+			println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
+				bounds: ${re.get_group_bounds_by_id(g_index)}')
 		}
 		for name in re.group_map.keys() {
 			println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
 				bounds: ${re.get_group_bounds_by_name(name)}")
 		}
 	} else {
-        println("No Match")
+		println('No Match')
 	}
 }
 ```