mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
regex: fix formatting inconsistencies in README.md (#17940)
This commit is contained in:
parent
524f7c3ead
commit
489ac892b9
@ -1,4 +1,5 @@
|
|||||||
# Description
|
# Description
|
||||||
|
|
||||||
`regex` is a small but powerful regular expression library,
|
`regex` is a small but powerful regular expression library,
|
||||||
written in pure V.
|
written in pure V.
|
||||||
|
|
||||||
@ -15,8 +16,7 @@ are valid for all the `regex` module features:
|
|||||||
1. The matching stops at the end of the string, *not* at newline characters.
|
1. The matching stops at the end of the string, *not* at newline characters.
|
||||||
|
|
||||||
2. The basic atomic elements of this regex engine are the tokens.
|
2. The basic atomic elements of this regex engine are the tokens.
|
||||||
In a query string a simple character is a token.
|
In a query string a simple character is a token.
|
||||||
|
|
||||||
|
|
||||||
## Differences with PCRE:
|
## Differences with PCRE:
|
||||||
|
|
||||||
@ -28,36 +28,35 @@ In a query string a simple character is a token.
|
|||||||
The main differences can be summarized in the following points:
|
The main differences can be summarized in the following points:
|
||||||
|
|
||||||
- The basic element **is the token not the sequence of symbols**, and the most
|
- The basic element **is the token not the sequence of symbols**, and the most
|
||||||
simple token, is a single character.
|
simple token, is a single character.
|
||||||
|
|
||||||
- `|` **the OR operator acts on tokens,** for example `abc|ebc` is not
|
- `|` **the OR operator acts on tokens,** for example `abc|ebc` is not
|
||||||
`abc` OR `ebc`. Instead it is evaluated like `ab`, followed by `c OR e`,
|
`abc` OR `ebc`. Instead it is evaluated like `ab`, followed by `c OR e`,
|
||||||
followed by `bc`, because the **token is the base element**,
|
followed by `bc`, because the **token is the base element**,
|
||||||
not the sequence of symbols.
|
not the sequence of symbols.
|
||||||
Note: **Two char classes with an `OR` in the middle is a syntax error.**
|
Note: **Two char classes with an `OR` in the middle is a syntax error.**
|
||||||
|
|
||||||
- The **match operation stops at the end of the string**. It does *NOT* stop
|
- The **match operation stops at the end of the string**. It does *NOT* stop
|
||||||
at new line characters.
|
at new line characters.
|
||||||
|
|
||||||
|
- The **match operation stops at the end of the string**. It does *NOT* stop
|
||||||
|
at new line characters.
|
||||||
|
|
||||||
## Tokens
|
## Tokens
|
||||||
|
|
||||||
The tokens are the atomic units, used by this regex engine.
|
The tokens are the atomic units, used by this regex engine.
|
||||||
They can be one of the following:
|
They can be one of the following:
|
||||||
|
|
||||||
|
|
||||||
### Simple char
|
### Simple char
|
||||||
|
|
||||||
This token is a simple single character like `a` or `b` etc.
|
This token is a simple single character like `a` or `b` etc.
|
||||||
|
|
||||||
|
|
||||||
### Match positional delimiters
|
### Match positional delimiters
|
||||||
|
|
||||||
`^` Matches the start of the string.
|
`^` Matches the start of the string.
|
||||||
|
|
||||||
`$` Matches the end of the string.
|
`$` Matches the end of the string.
|
||||||
|
|
||||||
|
|
||||||
### Char class (cc)
|
### Char class (cc)
|
||||||
|
|
||||||
The character classes match all the chars specified inside. Use square
|
The character classes match all the chars specified inside. Use square
|
||||||
@ -98,14 +97,14 @@ For example `\w` is the meta-char `w`.
|
|||||||
|
|
||||||
A meta-char can match different types of characters.
|
A meta-char can match different types of characters.
|
||||||
|
|
||||||
* `\w` matches a word char char `[a-zA-Z0-9_]`
|
- `\w` matches a word char char `[a-zA-Z0-9_]`
|
||||||
* `\W` matches a non word char
|
- `\W` matches a non word char
|
||||||
* `\d` matches a digit `[0-9]`
|
- `\d` matches a digit `[0-9]`
|
||||||
* `\D` matches a non digit
|
- `\D` matches a non digit
|
||||||
* `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
|
- `\s` matches a space char, one of `[' ','\t','\n','\r','\v','\f']`
|
||||||
* `\S` matches a non space char
|
- `\S` matches a non space char
|
||||||
* `\a` matches only a lowercase char `[a-z]`
|
- `\a` matches only a lowercase char `[a-z]`
|
||||||
* `\A` matches only an uppercase char `[A-Z]`
|
- `\A` matches only an uppercase char `[A-Z]`
|
||||||
|
|
||||||
### Quantifier
|
### Quantifier
|
||||||
|
|
||||||
@ -123,9 +122,9 @@ must be matched.
|
|||||||
- `{x}` matches exactly x times, `a{2}` matches `aa`, but not `aaa` or `a`
|
- `{x}` matches exactly x times, `a{2}` matches `aa`, but not `aaa` or `a`
|
||||||
- `{min,}` matches at least min times, `a{2,}` matches `aaa` or `aa`, not `a`
|
- `{min,}` matches at least min times, `a{2,}` matches `aaa` or `aa`, not `a`
|
||||||
- `{,max}` matches at least 0 times and at maximum max times,
|
- `{,max}` matches at least 0 times and at maximum max times,
|
||||||
for example, `a{,2}` matches `a` and `aa`, but doesn't match `aaa`
|
for example, `a{,2}` matches `a` and `aa`, but doesn't match `aaa`
|
||||||
- `{min,max}` matches from min times, to max times, for example
|
- `{min,max}` matches from min times, to max times, for example
|
||||||
`a{2,3}` matches `aa` and `aaa`, but doesn't match `a` or `aaaa`
|
`a{2,3}` matches `aa` and `aaa`, but doesn't match `a` or `aaaa`
|
||||||
|
|
||||||
A long quantifier, may have a `greedy off` flag, that is the `?`
|
A long quantifier, may have a `greedy off` flag, that is the `?`
|
||||||
character after the brackets. `{2,4}?` means to match the minimum
|
character after the brackets. `{2,4}?` means to match the minimum
|
||||||
@ -141,12 +140,13 @@ Suppose you have `abccc ddeef` as a source string, that you want to parse
|
|||||||
with a regex. The following table show the query strings and the result of
|
with a regex. The following table show the query strings and the result of
|
||||||
parsing source string.
|
parsing source string.
|
||||||
|
|
||||||
| query string | result |
|
| query string | result |
|
||||||
|--------------|-------------|
|
| ------------ | ----------- |
|
||||||
| `.*c` | `abc` |
|
| `.*c` | `abc` |
|
||||||
| `.*dd` | `abcc dd` |
|
| `.*dd` | `abcc dd` |
|
||||||
| `ab.*e` | `abccc dde` |
|
| `ab.*e` | `abccc dde` |
|
||||||
| `ab.{3} .*e` | `abccc dde` |
|
| `ab.{3} .*e` | `abccc dde` |
|
||||||
|
|
||||||
The dot matches any character, until the next token match is satisfied.
|
The dot matches any character, until the next token match is satisfied.
|
||||||
|
|
||||||
> Important Note: Consecutive dots, for example `...`, are not allowed.
|
> Important Note: Consecutive dots, for example `...`, are not allowed.
|
||||||
@ -195,7 +195,7 @@ i.e. the space char (ascii code 32) followed by the `?` quantifier,
|
|||||||
which means that the preceding space should be matched 0 or 1 time.
|
which means that the preceding space should be matched 0 or 1 time.
|
||||||
|
|
||||||
This explains why the `(c(pa)+z ?)+` query string,
|
This explains why the `(c(pa)+z ?)+` query string,
|
||||||
can match `cpaz cpapaz cpapapaz` .
|
can match `cpaz cpapaz cpapapaz`.
|
||||||
|
|
||||||
In this implementation the groups are "capture groups". This means that the
|
In this implementation the groups are "capture groups". This means that the
|
||||||
last temporal result for each group, can be retrieved from the `RE` struct.
|
last temporal result for each group, can be retrieved from the `RE` struct.
|
||||||
@ -275,13 +275,13 @@ fn convert_html_rgb(in_col string) u32 {
|
|||||||
```
|
```
|
||||||
|
|
||||||
Others utility functions are `get_group_by_id` and `get_group_bounds_by_id`
|
Others utility functions are `get_group_by_id` and `get_group_bounds_by_id`
|
||||||
that get directly the string of a group using its `id`:
|
that get directly the string of a group using its `id`:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
txt := "my used string...."
|
txt := 'my used string....'
|
||||||
for g_index := 0; g_index < re.group_count ; g_index++ {
|
for g_index := 0; g_index < re.group_count; g_index++ {
|
||||||
println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
||||||
bounds: ${re.get_group_bounds_by_id(g_index)}")
|
bounds: ${re.get_group_bounds_by_id(g_index)}')
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -311,35 +311,36 @@ not be saved.
|
|||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
import regex
|
import regex
|
||||||
fn main(){
|
|
||||||
txt := "http://www.ciao.mondo/hello/pippo12_/pera.html"
|
|
||||||
query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
|
|
||||||
|
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
fn main() {
|
||||||
//println(re.get_code()) // uncomment to see the print of the regex execution code
|
txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
|
||||||
re.debug=2 // enable maximum log
|
query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
|
||||||
println("String: ${txt}")
|
|
||||||
println("Query : ${re.get_query()}")
|
|
||||||
re.debug=0 // disable log
|
|
||||||
re.group_csave_flag = true
|
|
||||||
start, end := re.match_string(txt)
|
|
||||||
if start >= 0 {
|
|
||||||
println("Match (${start}, ${end}) => [${txt[start..end]}]")
|
|
||||||
} else {
|
|
||||||
println("No Match")
|
|
||||||
}
|
|
||||||
|
|
||||||
if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0{
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
println("cg: ${re.group_csave}")
|
// println(re.get_code()) // uncomment to see the print of the regex execution code
|
||||||
mut cs_i := 1
|
re.debug = 2 // enable maximum log
|
||||||
for cs_i < re.group_csave[0]*3 {
|
println('String: ${txt}')
|
||||||
g_id := re.group_csave[cs_i]
|
println('Query : ${re.get_query()}')
|
||||||
st := re.group_csave[cs_i+1]
|
re.debug = 0 // disable log
|
||||||
en := re.group_csave[cs_i+2]
|
re.group_csave_flag = true
|
||||||
println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
|
start, end := re.match_string(txt)
|
||||||
cs_i += 3
|
if start >= 0 {
|
||||||
}
|
println('Match (${start}, ${end}) => [${txt[start..end]}]')
|
||||||
}
|
} else {
|
||||||
|
println('No Match')
|
||||||
|
}
|
||||||
|
|
||||||
|
if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
|
||||||
|
println('cg: ${re.group_csave}')
|
||||||
|
mut cs_i := 1
|
||||||
|
for cs_i < re.group_csave[0] * 3 {
|
||||||
|
g_id := re.group_csave[cs_i]
|
||||||
|
st := re.group_csave[cs_i + 1]
|
||||||
|
en := re.group_csave[cs_i + 2]
|
||||||
|
println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
|
||||||
|
cs_i += 3
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -364,7 +365,7 @@ cg[1] 42 46:[html]
|
|||||||
|
|
||||||
This regex module supports partially the question mark `?` PCRE syntax for groups.
|
This regex module supports partially the question mark `?` PCRE syntax for groups.
|
||||||
|
|
||||||
`(?:abcd)` **non capturing group**: the content of the group will not be saved.
|
`(?:abcd)` **non capturing group**: the content of the group will not be saved.
|
||||||
|
|
||||||
`(?P<mygroup>abcdef)` **named group:** the group content is saved and labeled
|
`(?P<mygroup>abcdef)` **named group:** the group content is saved and labeled
|
||||||
as `mygroup`.
|
as `mygroup`.
|
||||||
@ -374,29 +375,31 @@ that is a map from `string` to `int`, where the value is the index in
|
|||||||
`group_csave` list of indexes.
|
`group_csave` list of indexes.
|
||||||
|
|
||||||
Here is an example for how to use them:
|
Here is an example for how to use them:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
import regex
|
import regex
|
||||||
fn main(){
|
|
||||||
txt := "http://www.ciao.mondo/hello/pippo12_/pera.html"
|
|
||||||
query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
|
|
||||||
|
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
fn main() {
|
||||||
//println(re.get_code()) // uncomment to see the print of the regex execution code
|
txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
|
||||||
re.debug=2 // enable maximum log
|
query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
|
||||||
println("String: ${txt}")
|
|
||||||
println("Query : ${re.get_query()}")
|
|
||||||
re.debug=0 // disable log
|
|
||||||
start, end := re.match_string(txt)
|
|
||||||
if start >= 0 {
|
|
||||||
println("Match (${start}, ${end}) => [${txt[start..end]}]")
|
|
||||||
} else {
|
|
||||||
println("No Match")
|
|
||||||
}
|
|
||||||
|
|
||||||
for name in re.group_map.keys() {
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
// println(re.get_code()) // uncomment to see the print of the regex execution code
|
||||||
bounds: ${re.get_group_bounds_by_name(name)}")
|
re.debug = 2 // enable maximum log
|
||||||
}
|
println('String: ${txt}')
|
||||||
|
println('Query : ${re.get_query()}')
|
||||||
|
re.debug = 0 // disable log
|
||||||
|
start, end := re.match_string(txt)
|
||||||
|
if start >= 0 {
|
||||||
|
println('Match (${start}, ${end}) => [${txt[start..end]}]')
|
||||||
|
} else {
|
||||||
|
println('No Match')
|
||||||
|
}
|
||||||
|
|
||||||
|
for name in re.group_map.keys() {
|
||||||
|
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
||||||
|
bounds: ${re.get_group_bounds_by_name(name)}")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -414,6 +417,7 @@ In order to simplify the use of the named groups, it is possible to
|
|||||||
use a name map in the `re` struct, using the function `re.get_group_by_name`.
|
use a name map in the `re` struct, using the function `re.get_group_by_name`.
|
||||||
|
|
||||||
Here is a more complex example of using them:
|
Here is a more complex example of using them:
|
||||||
|
|
||||||
```v oksyntax
|
```v oksyntax
|
||||||
// This function demonstrate the use of the named groups
|
// This function demonstrate the use of the named groups
|
||||||
fn convert_html_rgb_n(in_col string) u32 {
|
fn convert_html_rgb_n(in_col string) u32 {
|
||||||
@ -443,15 +447,13 @@ Other utilities are `get_group_by_name` and `get_group_bounds_by_name`,
|
|||||||
that return the string of a group using its `name`:
|
that return the string of a group using its `name`:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
txt := "my used string...."
|
txt := 'my used string....'
|
||||||
for name in re.group_map.keys() {
|
for name in re.group_map.keys() {
|
||||||
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
||||||
bounds: ${re.get_group_bounds_by_name(name)}")
|
bounds: ${re.get_group_bounds_by_name(name)}")
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Groups query functions
|
### Groups query functions
|
||||||
|
|
||||||
These functions are helpers to query the captured groups
|
These functions are helpers to query the captured groups
|
||||||
@ -493,15 +495,15 @@ re.flag = regex.f_bin
|
|||||||
- `f_bin`: parse a string as bytes, utf-8 management disabled.
|
- `f_bin`: parse a string as bytes, utf-8 management disabled.
|
||||||
|
|
||||||
- `f_efm`: exit on the first char matches in the query, used by the
|
- `f_efm`: exit on the first char matches in the query, used by the
|
||||||
find function.
|
find function.
|
||||||
|
|
||||||
- `f_ms`: matches only if the index of the start match is 0,
|
- `f_ms`: matches only if the index of the start match is 0,
|
||||||
same as `^` at the start of the query string.
|
same as `^` at the start of the query string.
|
||||||
|
|
||||||
- `f_me`: matches only if the end index of the match is the last char
|
- `f_me`: matches only if the end index of the match is the last char
|
||||||
of the input string, same as `$` end of query string.
|
of the input string, same as `$` end of query string.
|
||||||
|
|
||||||
- `f_nl`: stop the matching if found a new line char `\n` or `\r`
|
- `f_nl`: stop the matching if found a new line char `\n` or `\r`
|
||||||
|
|
||||||
## Functions
|
## Functions
|
||||||
|
|
||||||
@ -522,32 +524,35 @@ pub fn regex_opt(in_query string) ?RE
|
|||||||
```v ignore
|
```v ignore
|
||||||
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
// new_regex create a REgex of small size, usually sufficient for ordinary use
|
||||||
pub fn new() RE
|
pub fn new() RE
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### **Custom initialization**
|
#### **Custom initialization**
|
||||||
|
|
||||||
For some particular needs, it is possible to initialize a fully customized regex:
|
For some particular needs, it is possible to initialize a fully customized regex:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
pattern = r"ab(.*)(ac)"
|
pattern = r'ab(.*)(ac)'
|
||||||
// init custom regex
|
// init custom regex
|
||||||
mut re := regex.RE{}
|
mut re := regex.RE{}
|
||||||
// max program length, can not be longer then the pattern
|
// max program length, can not be longer then the pattern
|
||||||
re.prog = []Token {len: pattern.len + 1}
|
re.prog = []Token{len: pattern.len + 1}
|
||||||
// can not be more char class the the length of the pattern
|
// can not be more char class the the length of the pattern
|
||||||
re.cc = []CharClass{len: pattern.len}
|
re.cc = []CharClass{len: pattern.len}
|
||||||
|
|
||||||
re.group_csave_flag = false // true enable continuous group saving if needed
|
re.group_csave_flag = false // true enable continuous group saving if needed
|
||||||
re.group_max_nested = 128 // set max 128 group nested possible
|
re.group_max_nested = 128 // set max 128 group nested possible
|
||||||
re.group_max = pattern.len>>1 // we can't have more groups than the half of the pattern length
|
re.group_max = pattern.len >> 1 // we can't have more groups than the half of the pattern length
|
||||||
re.group_stack = []int{len: re.group_max, init: -1}
|
re.group_stack = []int{len: re.group_max, init: -1}
|
||||||
re.group_data = []int{len: re.group_max, init: -1}
|
re.group_data = []int{len: re.group_max, init: -1}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Compiling
|
### Compiling
|
||||||
|
|
||||||
After an initializer is used, the regex expression must be compiled with:
|
After an initializer is used, the regex expression must be compiled with:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// compile compiles the REgex returning an error if the compilation fails
|
// compile compiles the REgex returning an error if the compilation fails
|
||||||
pub fn (re mut RE) compile_opt(in_txt string)?
|
pub fn (mut re RE) compile_opt(in_txt string) ?
|
||||||
```
|
```
|
||||||
|
|
||||||
### Matching Functions
|
### Matching Functions
|
||||||
@ -556,29 +561,28 @@ These are the matching functions
|
|||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// match_string try to match the input string, return start and end index if found else start is -1
|
// match_string try to match the input string, return start and end index if found else start is -1
|
||||||
pub fn (re mut RE) match_string(in_txt string) (int,int)
|
pub fn (mut re RE) match_string(in_txt string) (int, int)
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Find and Replace
|
## Find and Replace
|
||||||
|
|
||||||
There are the following find and replace functions:
|
There are the following find and replace functions:
|
||||||
|
|
||||||
#### Find functions
|
#### Find functions
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// find try to find the first match in the input string
|
// find try to find the first match in the input string
|
||||||
// return start and end index if found else start is -1
|
// return start and end index if found else start is -1
|
||||||
pub fn (re mut RE) find(in_txt string) (int,int)
|
pub fn (mut re RE) find(in_txt string) (int, int)
|
||||||
|
|
||||||
// find_all find all the "non overlapping" occurrences of the matching pattern
|
// find_all find all the "non overlapping" occurrences of the matching pattern
|
||||||
// return a list of start end indexes like: [3,4,6,8]
|
// return a list of start end indexes like: [3,4,6,8]
|
||||||
// the matches are [3,4] and [6,8]
|
// the matches are [3,4] and [6,8]
|
||||||
pub fn (re mut RE) find_all(in_txt string) []int
|
pub fn (mut re RE) find_all(in_txt string) []int
|
||||||
|
|
||||||
// find_all find all the "non overlapping" occurrences of the matching pattern
|
// find_all find all the "non overlapping" occurrences of the matching pattern
|
||||||
// return a list of strings
|
// return a list of strings
|
||||||
// the result is like ["first match","secon match"]
|
// the result is like ['first match','secon match']
|
||||||
pub fn (mut re RE) find_all_str(in_txt string) []string
|
pub fn (mut re RE) find_all_str(in_txt string) []string
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -587,16 +591,16 @@ pub fn (mut re RE) find_all_str(in_txt string) []string
|
|||||||
```v ignore
|
```v ignore
|
||||||
// replace return a string where the matches are replaced with the repl_str string,
|
// replace return a string where the matches are replaced with the repl_str string,
|
||||||
// this function support groups in the replace string
|
// this function support groups in the replace string
|
||||||
pub fn (re mut RE) replace(in_txt string, repl string) string
|
pub fn (mut re RE) replace(in_txt string, repl string) string
|
||||||
```
|
```
|
||||||
|
|
||||||
replace string can include groups references:
|
replace string can include groups references:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
txt := "Today it is a good day."
|
txt := 'Today it is a good day.'
|
||||||
query := r'(a\w)[ ,.]'
|
query := r'(a\w)[ ,.]'
|
||||||
mut re := regex.regex_opt(query)?
|
mut re := regex.regex_opt(query)?
|
||||||
res := re.replace(txt, r"__[\0]__")
|
res := re.replace(txt, r'__[\0]__')
|
||||||
```
|
```
|
||||||
|
|
||||||
in this example we used the group `0` in the replace string: `\0`, the result will be:
|
in this example we used the group `0` in the replace string: `\0`, the result will be:
|
||||||
@ -617,6 +621,7 @@ pub fn (mut re RE) replace_simple(in_txt string, repl string) string
|
|||||||
```
|
```
|
||||||
|
|
||||||
If it is needed to replace N instances of the found strings it is possible to use:
|
If it is needed to replace N instances of the found strings it is possible to use:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
// replace_n return a string where the first `count` matches are replaced with the repl_str string
|
// replace_n return a string where the first `count` matches are replaced with the repl_str string
|
||||||
// `count` indicate the number of max replacements that will be done.
|
// `count` indicate the number of max replacements that will be done.
|
||||||
@ -650,21 +655,22 @@ The following example will clarify its usage:
|
|||||||
import regex
|
import regex
|
||||||
// customized replace functions
|
// customized replace functions
|
||||||
// it will be called on each non overlapped find
|
// it will be called on each non overlapped find
|
||||||
|
|
||||||
fn my_repl(re regex.RE, in_txt string, start int, end int) string {
|
fn my_repl(re regex.RE, in_txt string, start int, end int) string {
|
||||||
g0 := re.get_group_by_id(in_txt, 0)
|
g0 := re.get_group_by_id(in_txt, 0)
|
||||||
g1 := re.get_group_by_id(in_txt, 1)
|
g1 := re.get_group_by_id(in_txt, 1)
|
||||||
g2 := re.get_group_by_id(in_txt, 2)
|
g2 := re.get_group_by_id(in_txt, 2)
|
||||||
return "*${g0}*${g1}*${g2}*"
|
return '*${g0}*${g1}*${g2}*'
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main(){
|
fn main() {
|
||||||
txt := "today [John] is gone to his house with (Jack) and [Marie]."
|
txt := 'today [John] is gone to his house with (Jack) and [Marie].'
|
||||||
query := r"(.)(\A\w+)(.)"
|
query := r'(.)(\A\w+)(.)'
|
||||||
|
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
|
||||||
result := re.replace_by_fn(txt, my_repl)
|
result := re.replace_by_fn(txt, my_repl)
|
||||||
println(result)
|
println(result)
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -674,8 +680,6 @@ Output:
|
|||||||
today *[*John*]* is gone to his house with *(*Jack*)* and *[*Marie*]*.
|
today *[*John*]* is gone to his house with *(*Jack*)* and *[*Marie*]*.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Debugging
|
## Debugging
|
||||||
|
|
||||||
This module has few small utilities to you write regex patterns.
|
This module has few small utilities to you write regex patterns.
|
||||||
@ -727,7 +731,7 @@ PC: 10 ist: 88000000 PROG_END { 0, 0}
|
|||||||
|
|
||||||
`query_ch` is the type of token.
|
`query_ch` is the type of token.
|
||||||
|
|
||||||
`{m,n}` is the quantifier, the greedy off flag `?` will be showed if present in the token
|
`{m,n}` is the quantifier, the greedy off flag `?` will be showed if present in the token
|
||||||
|
|
||||||
### **Log debug**
|
### **Log debug**
|
||||||
|
|
||||||
@ -810,87 +814,89 @@ Here an example that perform some basically match of strings
|
|||||||
```v ignore
|
```v ignore
|
||||||
import regex
|
import regex
|
||||||
|
|
||||||
fn main(){
|
fn main() {
|
||||||
txt := "http://www.ciao.mondo/hello/pippo12_/pera.html"
|
txt := 'http://www.ciao.mondo/hello/pippo12_/pera.html'
|
||||||
query := r"(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+"
|
query := r'(?P<format>https?)|(?P<format>ftps?)://(?P<token>[\w_]+.)+'
|
||||||
|
|
||||||
mut re := regex.regex_opt(query) or { panic(err) }
|
mut re := regex.regex_opt(query) or { panic(err) }
|
||||||
|
|
||||||
start, end := re.match_string(txt)
|
start, end := re.match_string(txt)
|
||||||
if start >= 0 {
|
if start >= 0 {
|
||||||
println("Match (${start}, ${end}) => [${txt[start..end]}]")
|
println('Match (${start}, ${end}) => [${txt[start..end]}]')
|
||||||
for g_index := 0; g_index < re.group_count ; g_index++ {
|
for g_index := 0; g_index < re.group_count; g_index++ {
|
||||||
println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
||||||
bounds: ${re.get_group_bounds_by_id(g_index)}")
|
bounds: ${re.get_group_bounds_by_id(g_index)}')
|
||||||
}
|
}
|
||||||
for name in re.group_map.keys() {
|
for name in re.group_map.keys() {
|
||||||
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
||||||
bounds: ${re.get_group_bounds_by_name(name)}")
|
bounds: ${re.get_group_bounds_by_name(name)}")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
println("No Match")
|
println('No Match')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Here an example of total customization of the regex environment creation:
|
Here an example of total customization of the regex environment creation:
|
||||||
|
|
||||||
```v ignore
|
```v ignore
|
||||||
import regex
|
import regex
|
||||||
|
|
||||||
fn main(){
|
fn main() {
|
||||||
txt := "today John is gone to his house with Jack and Marie."
|
txt := 'today John is gone to his house with Jack and Marie.'
|
||||||
query := r"(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+"
|
query := r'(?:(?P<word>\A\w+)|(?:\a\w+)[\s.]?)+'
|
||||||
|
|
||||||
// init regex
|
// init regex
|
||||||
mut re := regex.RE{}
|
mut re := regex.RE{}
|
||||||
// max program length, can not be longer then the query
|
// max program length, can not be longer then the query
|
||||||
re.prog = []regex.Token {len: query.len + 1}
|
re.prog = []regex.Token{len: query.len + 1}
|
||||||
// can not be more char class the the length of the query
|
// can not be more char class the the length of the query
|
||||||
re.cc = []regex.CharClass{len: query.len}
|
re.cc = []regex.CharClass{len: query.len}
|
||||||
re.prog = []regex.Token {len: query.len+1}
|
re.prog = []regex.Token{len: query.len + 1}
|
||||||
// enable continuous group saving
|
// enable continuous group saving
|
||||||
re.group_csave_flag = true
|
re.group_csave_flag = true
|
||||||
// set max 128 group nested
|
// set max 128 group nested
|
||||||
re.group_max_nested = 128
|
re.group_max_nested = 128
|
||||||
// we can't have more groups than the half of the query length
|
// we can't have more groups than the half of the query length
|
||||||
re.group_max = query.len>>1
|
re.group_max = query.len >> 1
|
||||||
|
|
||||||
// compile the query
|
// compile the query
|
||||||
re.compile_opt(query) or { panic(err) }
|
re.compile_opt(query) or { panic(err) }
|
||||||
|
|
||||||
start, end := re.match_string(txt)
|
start, end := re.match_string(txt)
|
||||||
if start >= 0 {
|
if start >= 0 {
|
||||||
println("Match (${start}, ${end}) => [${txt[start..end]}]")
|
println('Match (${start}, ${end}) => [${txt[start..end]}]')
|
||||||
} else {
|
} else {
|
||||||
println("No Match")
|
println('No Match')
|
||||||
}
|
}
|
||||||
|
|
||||||
// show results for continuous group saving
|
// show results for continuous group saving
|
||||||
if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0{
|
if re.group_csave_flag == true && start >= 0 && re.group_csave.len > 0 {
|
||||||
println("cg: ${re.group_csave}")
|
println('cg: ${re.group_csave}')
|
||||||
mut cs_i := 1
|
mut cs_i := 1
|
||||||
for cs_i < re.group_csave[0]*3 {
|
for cs_i < re.group_csave[0] * 3 {
|
||||||
g_id := re.group_csave[cs_i]
|
g_id := re.group_csave[cs_i]
|
||||||
st := re.group_csave[cs_i+1]
|
st := re.group_csave[cs_i + 1]
|
||||||
en := re.group_csave[cs_i+2]
|
en := re.group_csave[cs_i + 2]
|
||||||
println("cg[${g_id}] ${st} ${en}:[${txt[st..en]}]")
|
println('cg[${g_id}] ${st} ${en}:[${txt[st..en]}]')
|
||||||
cs_i += 3
|
cs_i += 3
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// show results for captured groups
|
// show results for captured groups
|
||||||
if start >= 0 {
|
if start >= 0 {
|
||||||
println("Match (${start}, ${end}) => [${txt[start..end]}]")
|
println('Match (${start}, ${end}) => [${txt[start..end]}]')
|
||||||
for g_index := 0; g_index < re.group_count ; g_index++ {
|
for g_index := 0; g_index < re.group_count; g_index++ {
|
||||||
println("#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
println('#${g_index} [${re.get_group_by_id(txt, g_index)}] \
|
||||||
bounds: ${re.get_group_bounds_by_id(g_index)}")
|
bounds: ${re.get_group_bounds_by_id(g_index)}')
|
||||||
}
|
}
|
||||||
for name in re.group_map.keys() {
|
for name in re.group_map.keys() {
|
||||||
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
println("group:'${name}' \t=> [${re.get_group_by_name(txt, name)}] \
|
||||||
bounds: ${re.get_group_bounds_by_name(name)}")
|
bounds: ${re.get_group_bounds_by_name(name)}")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
println("No Match")
|
println('No Match')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user