2022-01-04 12:21:08 +03:00
|
|
|
// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
|
2019-08-14 09:45:56 +03:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
2019-08-17 15:51:20 +03:00
|
|
|
module csv
|
|
|
|
|
2019-08-14 09:45:56 +03:00
|
|
|
// Once interfaces are further along the idea would be to have something similar to
|
|
|
|
// go's io.reader & bufio.reader rather than reading the whole file into string, this
|
|
|
|
// would then satisfy that interface. I designed it this way to be easily adapted.
|
2021-03-30 15:27:26 +03:00
|
|
|
struct ErrCommentIsDelimiter {
|
|
|
|
msg string = 'encoding.csv: comment cannot be the same as delimiter'
|
|
|
|
code int
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ErrInvalidDelimiter {
|
|
|
|
msg string = 'encoding.csv: invalid delimiter'
|
|
|
|
code int
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ErrEndOfFile {
|
|
|
|
msg string = 'encoding.csv: end of file'
|
|
|
|
code int
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ErrInvalidLineEnding {
|
|
|
|
msg string = 'encoding.csv: could not find any valid line endings'
|
|
|
|
code int
|
|
|
|
}
|
2019-08-14 09:45:56 +03:00
|
|
|
|
2021-01-05 21:14:35 +03:00
|
|
|
struct Reader {
|
2019-08-14 09:45:56 +03:00
|
|
|
// not used yet
|
|
|
|
// has_header bool
|
|
|
|
// headings []string
|
2021-03-08 17:57:02 +03:00
|
|
|
data string
|
2019-12-13 20:09:11 +03:00
|
|
|
pub mut:
|
2019-08-14 09:45:56 +03:00
|
|
|
delimiter byte
|
|
|
|
comment byte
|
|
|
|
is_mac_pre_osx_le bool
|
|
|
|
row_pos int
|
|
|
|
}
|
|
|
|
|
2020-11-14 20:49:36 +03:00
|
|
|
// new_reader initializes a Reader with string data to parse
|
2019-09-03 14:57:04 +03:00
|
|
|
pub fn new_reader(data string) &Reader {
|
2019-08-14 09:45:56 +03:00
|
|
|
return &Reader{
|
2021-01-05 21:14:35 +03:00
|
|
|
delimiter: `,`
|
|
|
|
comment: `#`
|
2019-08-14 09:45:56 +03:00
|
|
|
data: data
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-14 20:49:36 +03:00
|
|
|
// read reads a row from the CSV data.
|
|
|
|
// If successful, the result holds an array of each column's data.
|
2020-05-17 14:51:18 +03:00
|
|
|
pub fn (mut r Reader) read() ?[]string {
|
2021-01-05 21:14:35 +03:00
|
|
|
l := r.read_record() ?
|
2019-08-14 09:45:56 +03:00
|
|
|
return l
|
|
|
|
}
|
|
|
|
|
|
|
|
// Once we have multi dimensional array
|
2020-05-17 14:51:18 +03:00
|
|
|
// pub fn (mut r Reader) read_all() ?[][]string {
|
2020-04-26 14:49:31 +03:00
|
|
|
// mut records := []string{}
|
2019-08-14 09:45:56 +03:00
|
|
|
// for {
|
|
|
|
// record := r.read_record() or {
|
2021-02-28 23:20:21 +03:00
|
|
|
// if err.error == err_eof.error {
|
2019-08-14 09:45:56 +03:00
|
|
|
// return records
|
|
|
|
// } else {
|
2021-02-28 23:20:21 +03:00
|
|
|
// return err
|
2019-08-14 09:45:56 +03:00
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// records << record
|
|
|
|
// }
|
|
|
|
// return records
|
|
|
|
// }
|
2020-05-17 14:51:18 +03:00
|
|
|
fn (mut r Reader) read_line() ?string {
|
2019-08-14 09:45:56 +03:00
|
|
|
// last record
|
|
|
|
if r.row_pos == r.data.len {
|
2021-03-30 15:27:26 +03:00
|
|
|
return IError(&ErrEndOfFile{})
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
|
|
|
le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
|
|
|
|
mut i := r.data.index_after(le, r.row_pos)
|
|
|
|
if i == -1 {
|
|
|
|
if r.row_pos == 0 {
|
|
|
|
// check for pre osx mac line endings
|
|
|
|
i = r.data.index_after('\r', r.row_pos)
|
|
|
|
if i != -1 {
|
|
|
|
r.is_mac_pre_osx_le = true
|
|
|
|
} else {
|
|
|
|
// no valid line endings found
|
2021-03-30 15:27:26 +03:00
|
|
|
return IError(&ErrInvalidLineEnding{})
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
2020-04-21 01:02:55 +03:00
|
|
|
} else {
|
|
|
|
// No line ending on file
|
2021-01-05 21:14:35 +03:00
|
|
|
i = r.data.len - 1
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
|
|
|
}
|
2019-10-27 10:03:15 +03:00
|
|
|
mut line := r.data[r.row_pos..i]
|
2021-01-05 21:14:35 +03:00
|
|
|
r.row_pos = i + 1
|
2019-08-14 09:45:56 +03:00
|
|
|
// normalize win line endings (remove extra \r)
|
2021-01-05 21:14:35 +03:00
|
|
|
if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len - 1] == `\r`) {
|
|
|
|
line = line[..line.len - 1]
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
|
|
|
return line
|
|
|
|
}
|
|
|
|
|
2020-05-17 14:51:18 +03:00
|
|
|
fn (mut r Reader) read_record() ?[]string {
|
2019-08-14 09:45:56 +03:00
|
|
|
if r.delimiter == r.comment {
|
2021-03-30 15:27:26 +03:00
|
|
|
return IError(&ErrCommentIsDelimiter{})
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
2019-08-17 15:51:20 +03:00
|
|
|
if !valid_delim(r.delimiter) {
|
2021-03-30 15:27:26 +03:00
|
|
|
return IError(&ErrInvalidDelimiter{})
|
2019-08-17 15:51:20 +03:00
|
|
|
}
|
2020-05-10 15:19:26 +03:00
|
|
|
mut need_read := true
|
|
|
|
mut keep_raw := false
|
2019-08-14 09:45:56 +03:00
|
|
|
mut line := ''
|
2020-04-26 14:49:31 +03:00
|
|
|
mut fields := []string{}
|
2019-08-14 09:45:56 +03:00
|
|
|
mut i := -1
|
|
|
|
for {
|
2020-05-10 15:19:26 +03:00
|
|
|
if need_read {
|
2021-01-05 21:14:35 +03:00
|
|
|
l := r.read_line() ?
|
2020-05-10 15:19:26 +03:00
|
|
|
if l.len <= 0 {
|
2021-01-05 21:14:35 +03:00
|
|
|
if keep_raw {
|
|
|
|
line += '\n'
|
|
|
|
}
|
2020-05-10 15:19:26 +03:00
|
|
|
continue
|
|
|
|
} else if l[0] == r.comment {
|
2021-01-05 21:14:35 +03:00
|
|
|
if keep_raw {
|
|
|
|
line += '\n' + l
|
|
|
|
}
|
2020-05-10 15:19:26 +03:00
|
|
|
continue
|
|
|
|
} else {
|
2021-01-05 21:14:35 +03:00
|
|
|
if keep_raw {
|
|
|
|
line += '\n'
|
|
|
|
}
|
2020-05-10 15:19:26 +03:00
|
|
|
line += l
|
|
|
|
}
|
|
|
|
need_read = false
|
|
|
|
keep_raw = false
|
|
|
|
}
|
2021-05-13 17:51:07 +03:00
|
|
|
if line.len == 0 || line[0] != `"` { // not quoted
|
2021-01-05 21:14:35 +03:00
|
|
|
j := line.index(r.delimiter.ascii_str()) or {
|
2019-08-14 09:45:56 +03:00
|
|
|
// last
|
2020-04-20 22:49:05 +03:00
|
|
|
fields << line[..line.len]
|
2019-08-14 09:45:56 +03:00
|
|
|
break
|
|
|
|
}
|
2020-04-08 18:21:36 +03:00
|
|
|
i = j
|
2019-10-27 10:03:15 +03:00
|
|
|
fields << line[..i]
|
2021-01-05 21:14:35 +03:00
|
|
|
line = line[i + 1..]
|
2019-08-14 09:45:56 +03:00
|
|
|
continue
|
2021-01-05 21:14:35 +03:00
|
|
|
} else { // quoted
|
2021-06-10 19:24:20 +03:00
|
|
|
mut need_more := true
|
|
|
|
mut has_double_quotes := false
|
|
|
|
mut j := 0
|
|
|
|
mut n := 1
|
|
|
|
for n < line.len {
|
|
|
|
if line[n] == `"` {
|
|
|
|
if n == line.len - 1 || line[n + 1] != `"` {
|
|
|
|
need_more = false
|
|
|
|
j = n - 1
|
|
|
|
break
|
|
|
|
} else {
|
|
|
|
has_double_quotes = true
|
|
|
|
n++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
n++
|
|
|
|
}
|
|
|
|
if need_more {
|
2020-05-10 15:19:26 +03:00
|
|
|
need_read = true
|
|
|
|
keep_raw = true
|
|
|
|
continue
|
2020-04-29 17:50:02 +03:00
|
|
|
}
|
2020-05-10 15:19:26 +03:00
|
|
|
line = line[1..]
|
2021-01-05 21:14:35 +03:00
|
|
|
if j + 1 == line.len {
|
2020-04-29 17:50:02 +03:00
|
|
|
// last record
|
2021-06-10 19:24:20 +03:00
|
|
|
fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
|
2020-04-29 17:50:02 +03:00
|
|
|
break
|
|
|
|
}
|
2021-01-05 21:14:35 +03:00
|
|
|
next := line[j + 1]
|
2020-04-29 17:50:02 +03:00
|
|
|
if next == r.delimiter {
|
2021-06-10 19:24:20 +03:00
|
|
|
fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] }
|
2021-03-08 17:57:02 +03:00
|
|
|
if j + 2 == line.len {
|
2021-05-13 17:51:07 +03:00
|
|
|
line = ''
|
|
|
|
} else {
|
|
|
|
line = line[j + 2..]
|
2021-03-08 17:57:02 +03:00
|
|
|
}
|
2020-04-29 17:50:02 +03:00
|
|
|
continue
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if i <= -1 && fields.len == 0 {
|
2021-03-30 15:27:26 +03:00
|
|
|
return IError(&ErrInvalidDelimiter{})
|
2019-08-14 09:45:56 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return fields
|
|
|
|
}
|
2019-08-17 15:51:20 +03:00
|
|
|
|
|
|
|
fn valid_delim(b byte) bool {
|
2021-01-05 21:14:35 +03:00
|
|
|
return b != 0 && b != `"` && b != `\r` && b != `\n`
|
2019-08-17 15:51:20 +03:00
|
|
|
}
|