1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

time: add more detailed error descriptions, add custom format parsing with time.parse_format (#18257)

This commit is contained in:
sandbankdisperser 2023-06-06 17:43:10 +02:00 committed by GitHub
parent 0bbbf1e801
commit e97aff8742
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 438 additions and 27 deletions

View File

@ -106,10 +106,13 @@ pub fn (s string) clone() string {
return string(s.str) return string(s.str)
} }
// contains returns `true` if the string contains `substr`.
// See also: [`string.index`](#string.index)
pub fn (s string) contains(substr string) bool { pub fn (s string) contains(substr string) bool {
return bool(s.str.includes(substr.str)) return bool(s.str.includes(substr.str))
} }
// contains_any returns `true` if the string contains any chars in `chars`.
pub fn (s string) contains_any(chars string) bool { pub fn (s string) contains_any(chars string) bool {
sep := '' sep := ''
res := chars.str.split(sep.str) res := chars.str.split(sep.str)
@ -121,6 +124,26 @@ pub fn (s string) contains_any(chars string) bool {
return false return false
} }
// contains_only returns `true`, if the string contains only the characters in `chars`.
pub fn (s string) contains_only(chars string) bool {
if chars.len == 0 {
return false
}
for ch in s {
mut res := 0
for c in chars {
if ch == c {
res++
break
}
}
if res == 0 {
return false
}
}
return true
}
pub fn (s string) contains_any_substr(chars []string) bool { pub fn (s string) contains_any_substr(chars []string) bool {
if chars.len == 0 { if chars.len == 0 {
return true return true

View File

@ -404,6 +404,13 @@ fn test_contains_any() {
assert !''.contains_any('') assert !''.contains_any('')
} }
fn test_contains_only() {
assert '23885'.contains_only('0123456789')
assert '23gg885'.contains_only('01g23456789')
assert !'hello;'.contains_only('hello')
assert !''.contains_only('')
}
fn test_contains_any_substr() { fn test_contains_any_substr() {
s := 'Some random text' s := 'Some random text'
assert s.contains_any_substr(['false', 'not', 'rand']) assert s.contains_any_substr(['false', 'not', 'rand'])

View File

@ -0,0 +1,295 @@
module time
struct DateTimeParser {
datetime string
format string
mut:
current_pos_datetime int
}
fn new_date_time_parser(datetime string, format string) DateTimeParser {
return DateTimeParser{
datetime: datetime
format: format
}
}
fn (mut p DateTimeParser) next(length int) !string {
if p.current_pos_datetime + length > p.datetime.len {
return error('end of string')
}
val := p.datetime[p.current_pos_datetime..p.current_pos_datetime + length]
p.current_pos_datetime += length
return val
}
fn (mut p DateTimeParser) peek(length int) !string {
if p.current_pos_datetime + length > p.datetime.len {
return error('end of string')
}
return p.datetime[p.current_pos_datetime..p.current_pos_datetime + length]
}
fn (mut p DateTimeParser) must_be_int(length int) !int {
val := p.next(length) or { return err }
return val.int()
}
fn (mut p DateTimeParser) must_be_int_with_minimum_length(min int, max int, allow_leading_zero bool) !int {
mut length := max + 1 - min
mut val := ''
for _ in 0 .. length {
maybe_int := p.peek(1) or { break }
if maybe_int == '0' || maybe_int == '1' || maybe_int == '2' || maybe_int == '4'
|| maybe_int == '5' || maybe_int == '6' || maybe_int == '7' || maybe_int == '8'
|| maybe_int == '9' {
p.next(1)!
val += maybe_int
} else {
break
}
}
if val.len < min {
return error('expected int with a minimum length of ${min}, found: ${val.len}')
}
if !allow_leading_zero && val.starts_with('0') {
return error('0 is not allowed for this format')
}
return val.int()
}
fn (mut p DateTimeParser) must_be_single_int_with_optional_leading_zero() !int {
mut val := p.next(1) or { return err }
if val == '0' {
val += p.next(1) or { '' }
}
return val.int()
}
fn (mut p DateTimeParser) must_be_string(must string) ! {
val := p.next(must.len) or { return err }
if val != must {
return error('invalid string: "${val}"!="${must}" at: ${p.current_pos_datetime}')
}
}
fn (mut p DateTimeParser) must_be_string_one_of(oneof []string) !string {
for _, must in oneof {
val := p.peek(must.len) or { continue }
if val == must {
return must
}
}
return error('invalid string: must be one of ${oneof}, at ${p.current_pos_datetime}')
}
fn (mut p DateTimeParser) must_be_valid_month() !int {
for _, v in long_months {
if p.current_pos_datetime + v.len < p.datetime.len {
month_name := p.datetime[p.current_pos_datetime..p.current_pos_datetime + v.len]
if v == month_name {
p.current_pos_datetime += v.len
return long_months.index(month_name) + 1
}
}
}
return error_invalid_time(0, 'invalid month name')
}
fn (mut p DateTimeParser) must_be_valid_week_day(letters int) !string {
val := p.next(letters) or { return err }
for _, v in long_days {
if v[0..letters] == val {
return v
}
}
return error_invalid_time(0, 'invalid month name')
}
fn extract_tokens(s string) ![]string {
mut tokens := []string{}
mut current := ''
for r in s {
if current.contains_only(r.ascii_str()) || current == '' {
current += r.ascii_str()
} else {
tokens << current
current = r.ascii_str()
}
}
if current != '' {
tokens << current
}
return tokens
}
// parse_format parses the string `s`, as a custom `format`, containing the following specifiers:
// YYYY - 4 digit year, 0000..9999
// YY - 2 digit year, 00..99
// M - month, 1..12
// MM - month, 2 digits, 01..12
// MMMM - name of month
// D - day of the month, 1..31
// DD - day of the month, 01..31
// H - hour, 0..23
// HH - hour, 00..23
// h - hour, 0..23
// hh - hour, 0..23
// k - hour, 0..23
// kk - hour, 0..23
// m - minute, 0..59
// mm - minute, 0..59
// s - second, 0..59
// ss - second, 0..59
fn (mut p DateTimeParser) parse() !Time {
mut year_ := 0
mut month_ := 0
mut day_in_month := 0
mut hour_ := 0
mut minute_ := 0
mut second_ := 0
tokens := extract_tokens(p.format) or {
return error_invalid_time(0, 'malformed format string: ${err}')
}
for _, token in tokens {
match token {
'YYYY' {
year_ = p.must_be_int(4) or {
return error_invalid_time(0, 'end of string reached before the full year was specified')
}
}
'YY' {
year_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before the full year was specified')
}
}
'M' {
month_ = p.must_be_int_with_minimum_length(1, 2, false) or {
return error_invalid_time(0, 'end of string reached before the month was specified')
}
if month_ < 1 || month_ > 12 {
return error_invalid_time(0, 'month must be between 1 and 12')
}
}
'MM' {
month_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before the month was specified')
}
if month_ < 1 || month_ > 12 {
return error_invalid_time(0, 'month must be between 01 and 12')
}
}
'MMMM' {
month_ = p.must_be_valid_month() or { return err }
}
'D' {
day_in_month = p.must_be_int_with_minimum_length(1, 2, false) or {
return error_invalid_time(0, 'end of string reached before the day was specified')
}
if day_in_month < 1 || day_in_month > 31 {
return error_invalid_time(0, 'day must be between 1 and 31')
}
}
'DD' {
day_in_month = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before the month was specified')
}
if day_in_month < 1 || day_in_month > 31 {
return error_invalid_time(0, 'day must be between 01 and 31')
}
}
'H' {
hour_ = p.must_be_int_with_minimum_length(1, 2, false) or {
return error_invalid_time(0, 'end of string reached before hours where specified')
}
if hour_ < 0 || hour_ > 23 {
return error_invalid_time(0, 'hour must be between 0 and 23')
}
}
'HH' {
hour_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before hours where specified')
}
if hour_ < 0 || hour_ > 23 {
return error_invalid_time(0, 'hour must be between 00 and 23')
}
}
'h' {
hour_ = p.must_be_int_with_minimum_length(1, 2, false) or {
return error_invalid_time(0, 'end of string reached before hours where specified')
}
if hour_ < 0 || hour_ > 23 {
return error_invalid_time(0, 'hour must be between 0 and 23')
}
}
'hh' {
hour_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before hours where specified')
}
if hour_ < 0 || hour_ > 23 {
return error_invalid_time(0, 'hour must be between 00 and 23')
}
}
'k' {
hour_ = p.must_be_int(1) or {
return error_invalid_time(0, 'end of string reached before hours where specified')
}
if hour_ < 0 || hour_ > 23 {
return error_invalid_time(0, 'hour must be between 0 and 23')
}
}
'kk' {
hour_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before hours where specified')
}
if hour_ < 0 || hour_ > 23 {
return error_invalid_time(0, 'hour must be between 00 and 23')
}
}
'm' {
minute_ = p.must_be_int(1) or {
return error_invalid_time(0, 'end of string reached before minutes where specified')
}
if minute_ < 0 || minute_ > 59 {
return error_invalid_time(0, 'minute must be between 0 and 59')
}
}
'mm' {
minute_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before minutes where specified')
}
if minute_ < 0 || minute_ > 59 {
return error_invalid_time(0, 'minute must be between 00 and 59')
}
}
's' {
second_ = p.must_be_int(1) or {
return error_invalid_time(0, 'end of string reached before seconds where specified')
}
if second_ < 0 || second_ > 59 {
return error_invalid_time(0, 'second must be between 0 and 59')
}
}
'ss' {
second_ = p.must_be_int(2) or {
return error_invalid_time(0, 'end of string reached before seconds where specified')
}
if second_ < 0 || second_ > 59 {
return error_invalid_time(0, 'second must be between 00 and 59')
}
}
else {
p.must_be_string(token) or { return error_invalid_time(0, '${err}') }
}
}
}
return new_time(
year: year_
month: month_
day: day_in_month
hour: hour_
minute: minute_
second: second_
)
}

View File

@ -8,7 +8,7 @@ module time
// the differences between ISO-8601 and RFC 3339. // the differences between ISO-8601 and RFC 3339.
pub fn parse_rfc3339(s string) !Time { pub fn parse_rfc3339(s string) !Time {
if s == '' { if s == '' {
return error_invalid_time(0) return error_invalid_time(0, 'datetime string is empty')
} }
// Normalize the input before parsing. Good since iso8601 doesn't permit lower case `t` and `z`. // Normalize the input before parsing. Good since iso8601 doesn't permit lower case `t` and `z`.
sn := s.replace_each(['t', 'T', 'z', 'Z']) sn := s.replace_each(['t', 'T', 'z', 'Z'])
@ -54,24 +54,26 @@ pub fn parse_rfc3339(s string) !Time {
return t return t
} }
return error_invalid_time(9) return error_invalid_time(9, 'malformed date')
} }
// parse returns time from a date string in "YYYY-MM-DD HH:mm:ss" format. // parse returns time from a date string in "YYYY-MM-DD HH:mm:ss" format.
pub fn parse(s string) !Time { pub fn parse(s string) !Time {
if s == '' { if s == '' {
return error_invalid_time(0) return error_invalid_time(0, 'datetime string is empty')
}
pos := s.index(' ') or {
return error_invalid_time(1, 'string has no space between date and time')
} }
pos := s.index(' ') or { return error_invalid_time(1) }
symd := s[..pos] symd := s[..pos]
ymd := symd.split('-') ymd := symd.split('-')
if ymd.len != 3 { if ymd.len != 3 {
return error_invalid_time(2) return error_invalid_time(2, 'date must be in the form of y-m-d')
} }
shms := s[pos..] shms := s[pos..]
hms := shms.split(':') hms := shms.split(':')
if hms.len != 3 { if hms.len != 3 {
return error_invalid_time(9) return error_invalid_time(9, 'time must be in the form of H:i:s')
} }
hour_ := hms[0][1..] hour_ := hms[0][1..]
minute_ := hms[1] minute_ := hms[1]
@ -85,22 +87,22 @@ pub fn parse(s string) !Time {
isecond := second_.int() isecond := second_.int()
// eprintln('>> iyear: $iyear | imonth: $imonth | iday: $iday | ihour: $ihour | iminute: $iminute | isecond: $isecond') // eprintln('>> iyear: $iyear | imonth: $imonth | iday: $iday | ihour: $ihour | iminute: $iminute | isecond: $isecond')
if iyear > 9999 || iyear < -9999 { if iyear > 9999 || iyear < -9999 {
return error_invalid_time(3) return error_invalid_time(3, 'year must be between -10000 and 10000')
} }
if imonth > 12 || imonth < 1 { if imonth > 12 || imonth < 1 {
return error_invalid_time(4) return error_invalid_time(4, 'month must be between 1 and 12')
} }
if iday > 31 || iday < 1 { if iday > 31 || iday < 1 {
return error_invalid_time(5) return error_invalid_time(5, 'day must be between 1 and 31')
} }
if ihour > 23 || ihour < 0 { if ihour > 23 || ihour < 0 {
return error_invalid_time(6) return error_invalid_time(6, 'hours must be between 0 and 24')
} }
if iminute > 59 || iminute < 0 { if iminute > 59 || iminute < 0 {
return error_invalid_time(7) return error_invalid_time(7, 'minutes must be between 0 and 60')
} }
if isecond > 59 || isecond < 0 { if isecond > 59 || isecond < 0 {
return error_invalid_time(8) return error_invalid_time(8, 'seconds must be between 0 and 60')
} }
res := new_time(Time{ res := new_time(Time{
year: iyear year: iyear
@ -113,6 +115,32 @@ pub fn parse(s string) !Time {
return res return res
} }
// parse_format parses the string `s`, as a custom `format`, containing the following specifiers:
// YYYY - 4 digit year, 0000..9999
// YY - 2 digit year, 00..99
// M - month, 1..12
// MM - month, 2 digits, 01..12
// MMMM - name of month
// D - day of the month, 1..31
// DD - day of the month, 01..31
// H - hour, 0..23
// HH - hour, 00..23
// h - hour, 0..23
// hh - hour, 0..23
// k - hour, 0..23
// kk - hour, 0..23
// m - minute, 0..59
// mm - minute, 0..59
// s - second, 0..59
// ss - second, 0..59
pub fn parse_format(s string, format string) !Time {
if s == '' {
return error_invalid_time(0, 'datetime string is empty')
}
mut p := new_date_time_parser(s, format)
return p.parse()
}
// parse_iso8601 parses rfc8601 time format yyyy-MM-ddTHH:mm:ss.dddddd+dd:dd as local time // parse_iso8601 parses rfc8601 time format yyyy-MM-ddTHH:mm:ss.dddddd+dd:dd as local time
// the fraction part is difference in milli seconds and the last part is offset // the fraction part is difference in milli seconds and the last part is offset
// from UTC time and can be both +/- HH:mm // from UTC time and can be both +/- HH:mm
@ -120,12 +148,12 @@ pub fn parse(s string) !Time {
// also checks and support for leapseconds should be added in future PR // also checks and support for leapseconds should be added in future PR
pub fn parse_iso8601(s string) !Time { pub fn parse_iso8601(s string) !Time {
if s == '' { if s == '' {
return error_invalid_time(0) return error_invalid_time(0, 'datetime string is empty')
} }
t_i := s.index('T') or { -1 } t_i := s.index('T') or { -1 }
parts := if t_i != -1 { [s[..t_i], s[t_i + 1..]] } else { s.split(' ') } parts := if t_i != -1 { [s[..t_i], s[t_i + 1..]] } else { s.split(' ') }
if !(parts.len == 1 || parts.len == 2) { if !(parts.len == 1 || parts.len == 2) {
return error_invalid_time(12) return error_invalid_time(12, 'malformed date')
} }
year, month, day := parse_iso8601_date(parts[0])! year, month, day := parse_iso8601_date(parts[0])!
mut hour_, mut minute_, mut second_, mut microsecond_, mut unix_offset, mut is_local_time := 0, 0, 0, 0, i64(0), true mut hour_, mut minute_, mut second_, mut microsecond_, mut unix_offset, mut is_local_time := 0, 0, 0, 0, i64(0), true
@ -157,13 +185,15 @@ pub fn parse_iso8601(s string) !Time {
// parse_rfc2822 returns time from a date string in RFC 2822 datetime format. // parse_rfc2822 returns time from a date string in RFC 2822 datetime format.
pub fn parse_rfc2822(s string) !Time { pub fn parse_rfc2822(s string) !Time {
if s == '' { if s == '' {
return error_invalid_time(0) return error_invalid_time(0, 'datetime string is empty')
} }
fields := s.split(' ') fields := s.split(' ')
if fields.len < 5 { if fields.len < 5 {
return error_invalid_time(1) return error_invalid_time(1, 'datetime string must have 5 components, has: ${fields.len}')
}
pos := months_string.index(fields[2]) or {
return error_invalid_time(2, 'invalid month format')
} }
pos := months_string.index(fields[2]) or { return error_invalid_time(2) }
mm := pos / 3 + 1 mm := pos / 3 + 1
unsafe { unsafe {
tmstr := malloc_noscan(s.len * 2) tmstr := malloc_noscan(s.len * 2)
@ -178,16 +208,16 @@ fn parse_iso8601_date(s string) !(int, int, int) {
year, month, day, dummy := 0, 0, 0, u8(0) year, month, day, dummy := 0, 0, 0, u8(0)
count := unsafe { C.sscanf(&char(s.str), c'%4d-%2d-%2d%c', &year, &month, &day, &dummy) } count := unsafe { C.sscanf(&char(s.str), c'%4d-%2d-%2d%c', &year, &month, &day, &dummy) }
if count != 3 { if count != 3 {
return error_invalid_time(10) return error_invalid_time(10, 'datetime string must have 3 components, but has ${count}')
} }
if year > 9999 { if year > 9999 {
return error_invalid_time(13) return error_invalid_time(13, 'year must be smaller than 10000')
} }
if month > 12 { if month > 12 {
return error_invalid_time(14) return error_invalid_time(14, 'month must be smaller than 12')
} }
if day > 31 { if day > 31 {
return error_invalid_time(15) return error_invalid_time(15, 'day must be smaller than 31')
} }
return year, month, day return year, month, day
} }
@ -234,16 +264,16 @@ fn parse_iso8601_time(s string) !(int, int, int, int, i64, bool) {
count++ // Increment count because skipped microsecond count++ // Increment count because skipped microsecond
} }
if count < 4 { if count < 4 {
return error_invalid_time(10) return error_invalid_time(10, 'malformed date')
} }
} }
is_local_time := plus_min_z == `a` && count == 4 is_local_time := plus_min_z == `a` && count == 4
is_utc := plus_min_z == `Z` && count == 5 is_utc := plus_min_z == `Z` && count == 5
if !(count == 7 || is_local_time || is_utc) { if !(count == 7 || is_local_time || is_utc) {
return error_invalid_time(11) return error_invalid_time(11, 'malformed date')
} }
if plus_min_z != `+` && plus_min_z != `-` && !is_utc && !is_local_time { if plus_min_z != `+` && plus_min_z != `-` && !is_utc && !is_local_time {
return error_invalid_time(12) return error_invalid_time(12, 'missing timezone')
} }
mut unix_offset := 0 mut unix_offset := 0
if offset_hour > 0 { if offset_hour > 0 {

View File

@ -6,16 +6,18 @@ module time
// TimeParseError represents a time parsing error. // TimeParseError represents a time parsing error.
pub struct TimeParseError { pub struct TimeParseError {
Error Error
code int code int
message string
} }
// msg implements the `IError.msg()` method for `TimeParseError`. // msg implements the `IError.msg()` method for `TimeParseError`.
pub fn (err TimeParseError) msg() string { pub fn (err TimeParseError) msg() string {
return 'Invalid time format code: ${err.code}' return 'Invalid time format code: ${err.code}, error: ${err.message}'
} }
fn error_invalid_time(code int) IError { fn error_invalid_time(code int, message string) IError {
return TimeParseError{ return TimeParseError{
code: code code: code
message: message
} }
} }

View File

@ -201,3 +201,39 @@ fn test_ad_second_to_parse_result_pre_2001() {
assert future_tm.str() == '2000-01-01 04:01:00' assert future_tm.str() == '2000-01-01 04:01:00'
assert now_tm.unix < future_tm.unix assert now_tm.unix < future_tm.unix
} }
fn test_parse_format() {
mut s := '2018-01-27 12:48:34'
mut t := time.parse_format(s, 'YYYY-MM-DD HH:mm:ss') or {
eprintln('> failing format: ${s} | err: ${err}')
assert false
return
}
assert t.year == 2018 && t.month == 1 && t.day == 27 && t.hour == 12 && t.minute == 48
&& t.second == 34
s = '2018-November-27 12:48:20'
t = time.parse_format(s, 'YYYY-MMMM-DD HH:mm:ss') or {
eprintln('> failing format: ${s} | err: ${err}')
assert false
return
}
assert t.year == 2018 && t.month == 11 && t.day == 27 && t.hour == 12 && t.minute == 48
&& t.second == 20
s = '2018-1-2 1:8:2'
t = time.parse_format(s, 'YYYY-M-D H:m:s') or {
eprintln('> failing format: ${s} | err: ${err}')
assert false
return
}
assert t.year == 2018 && t.month == 1 && t.day == 2 && t.hour == 1 && t.minute == 8
&& t.second == 2
// This should always fail, because we test if M and D allow for a 01 value which they shouldn't
s = '2018-01-02 1:8:2'
t = time.parse_format(s, 'YYYY-M-D H:m:s') or { return }
eprintln('> failing for datetime: ${s}, the datetime string should not have passed the format "YYYY-M-D H:m:s"')
assert false
}

View File

@ -43,6 +43,24 @@ pub fn sleep(dur Duration) {
#while (new Date().getTime() < now + Number(toWait)) {} #while (new Date().getTime() < now + Number(toWait)) {}
} }
// new_time returns a time struct with calculated Unix time.
pub fn new_time(t Time) Time {
if t.unix != 0 {
return t
}
mut res := Time{}
#res.year.val = t.year.val
#res.month.val = t.month.val
#res.day.val = t.day.val
#res.hour.val = t.hour.val
#res.minute.val = t.minute.val
#res.second.val = t.second.val
#res.microsecond.val = t.microsecond.val
#res.unix.val = t.unix.val
return res
}
pub fn ticks() i64 { pub fn ticks() i64 {
t := i64(0) t := i64(0)
#t.val = BigInt(new Date().getTime()) #t.val = BigInt(new Date().getTime())