1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/vlib/encoding/csv/reader.v

165 lines
3.5 KiB
V
Raw Normal View History

2020-02-03 07:00:36 +03:00
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
2019-08-14 09:45:56 +03:00
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
2019-08-17 15:51:20 +03:00
module csv
2019-08-14 09:45:56 +03:00
// Once interfaces are further along the idea would be to have something similar to
// go's io.reader & bufio.reader rather than reading the whole file into string, this
// would then satisfy that interface. I designed it this way to be easily adapted.
const (
err_comment_is_delim = error('encoding.csv: comment cannot be the same as delimiter')
err_invalid_delim = error('encoding.csv: invalid delimiter')
err_eof = error('encoding.csv: end of file')
err_invalid_le = error('encoding.csv: could not find any valid line endings')
2019-08-14 09:45:56 +03:00
)
struct Reader {
// not used yet
// has_header bool
// headings []string
data string
pub mut:
2019-08-14 09:45:56 +03:00
delimiter byte
comment byte
is_mac_pre_osx_le bool
row_pos int
}
2019-09-03 14:57:04 +03:00
pub fn new_reader(data string) &Reader {
2019-08-14 09:45:56 +03:00
return &Reader{
delimiter: `,`,
comment: `#`,
data: data
}
}
// read() reads one row from the csv file
2020-05-17 14:51:18 +03:00
pub fn (mut r Reader) read() ?[]string {
l := r.read_record()?
2019-08-14 09:45:56 +03:00
return l
}
// Once we have multi dimensional array
2020-05-17 14:51:18 +03:00
// pub fn (mut r Reader) read_all() ?[][]string {
2020-04-26 14:49:31 +03:00
// mut records := []string{}
2019-08-14 09:45:56 +03:00
// for {
// record := r.read_record() or {
// if error(err).error == err_eof.error {
// return records
// } else {
// return error(err)
// }
// }
// records << record
// }
// return records
// }
2020-05-17 14:51:18 +03:00
fn (mut r Reader) read_line() ?string {
2019-08-14 09:45:56 +03:00
// last record
if r.row_pos == r.data.len {
return err_eof
}
le := if r.is_mac_pre_osx_le { '\r' } else { '\n' }
mut i := r.data.index_after(le, r.row_pos)
if i == -1 {
if r.row_pos == 0 {
// check for pre osx mac line endings
i = r.data.index_after('\r', r.row_pos)
if i != -1 {
r.is_mac_pre_osx_le = true
} else {
// no valid line endings found
return err_invalid_le
}
2020-04-21 01:02:55 +03:00
} else {
// No line ending on file
i = r.data.len-1
2019-08-14 09:45:56 +03:00
}
}
mut line := r.data[r.row_pos..i]
2019-08-14 09:45:56 +03:00
r.row_pos = i+1
// normalize win line endings (remove extra \r)
if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len-1] == `\r`) {
line = line[..line.len-1]
2019-08-14 09:45:56 +03:00
}
return line
}
2020-05-17 14:51:18 +03:00
fn (mut r Reader) read_record() ?[]string {
2019-08-14 09:45:56 +03:00
if r.delimiter == r.comment {
return err_comment_is_delim
}
2019-08-17 15:51:20 +03:00
if !valid_delim(r.delimiter) {
return err_invalid_delim
}
2020-05-10 15:19:26 +03:00
mut need_read := true
mut keep_raw := false
2019-08-14 09:45:56 +03:00
mut line := ''
2020-04-26 14:49:31 +03:00
mut fields := []string{}
2019-08-14 09:45:56 +03:00
mut i := -1
2020-05-10 15:19:26 +03:00
2019-08-14 09:45:56 +03:00
for {
2020-05-10 15:19:26 +03:00
if need_read {
l := r.read_line()?
2020-05-10 15:19:26 +03:00
if l.len <= 0 {
if keep_raw { line += '\n'}
continue
} else if l[0] == r.comment {
if keep_raw { line += '\n' + l }
continue
} else {
if keep_raw { line += '\n'}
line += l
}
need_read = false
keep_raw = false
}
if line[0] != `"` { // not quoted
j := line.index(r.delimiter.str()) or {
2019-08-14 09:45:56 +03:00
// last
2020-04-20 22:49:05 +03:00
fields << line[..line.len]
2019-08-14 09:45:56 +03:00
break
}
i = j
fields << line[..i]
line = line[i+1..]
2019-08-14 09:45:56 +03:00
continue
2020-05-10 15:19:26 +03:00
} else { // quoted
j := line[1..].index('"') or {
need_read = true
keep_raw = true
continue
2020-04-29 17:50:02 +03:00
}
2020-05-10 15:19:26 +03:00
line = line[1..]
2020-04-29 17:50:02 +03:00
if j+1 == line.len {
// last record
fields << line[..j]
break
}
next := line[j+1]
if next == r.delimiter {
fields << line[..j]
line = line[j..]
continue
2019-08-14 09:45:56 +03:00
}
line = line[1..]
2019-08-14 09:45:56 +03:00
}
if i <= -1 && fields.len == 0 {
return err_invalid_delim
}
}
return fields
}
2019-08-17 15:51:20 +03:00
fn valid_delim(b byte) bool {
return b != 0 &&
b != `"` &&
b != `\r` &&
b != `\n`
}