From 7b48f7ac9ecad9e6d174592814533b79b925b495 Mon Sep 17 00:00:00 2001 From: joe-conigliaro Date: Wed, 14 Aug 2019 16:45:56 +1000 Subject: [PATCH] encoding.csv module --- vlib/encoding/csv/csv_test.v | 33 ++++++++ vlib/encoding/csv/reader.v | 144 +++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 vlib/encoding/csv/csv_test.v create mode 100644 vlib/encoding/csv/reader.v diff --git a/vlib/encoding/csv/csv_test.v b/vlib/encoding/csv/csv_test.v new file mode 100644 index 0000000000..7ecf29424a --- /dev/null +++ b/vlib/encoding/csv/csv_test.v @@ -0,0 +1,33 @@ +import encoding.csv + +fn test_encoding_csv() { + // test reading + data := 'name,email,phone,other\njoe,joe@blow.com,0400000000,test\nsam,sam@likesham.com,0433000000,"test quoted field"\n#chris,chris@nomail.com,94444444,"commented row"\nmike,mike@mikesbikes.com,98888888,"bike store"\n' + mut csv_reader := csv.new_reader(data) + + mut row_count := 0 + for { + row := csv_reader.read() or { + break + } + row_count++ + if row_count== 1 { + assert row[0] == 'name' + } + if row_count == 2 { + assert row[0] == 'joe' + } + if row_count == 3 { + assert row[0] == 'sam' + // quoted field + assert row[3] == 'test quoted field' + } + if row_count == 4 { + assert row[0] == 'mike' + } + } + + assert row_count == 4 + + // test writing to come +} diff --git a/vlib/encoding/csv/reader.v b/vlib/encoding/csv/reader.v new file mode 100644 index 0000000000..6ee69124ec --- /dev/null +++ b/vlib/encoding/csv/reader.v @@ -0,0 +1,144 @@ +module csv + +// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. + +// Once interfaces are further along the idea would be to have something similar to +// go's io.reader & bufio.reader rather than reading the whole file into string, this +// would then satisfy that interface. I designed it this way to be easily adapted. + +const ( + err_comment_is_delim = error('encoding.csv: comment cannot be the same as delimiter') + err_invalid_delim = error('encoding.csv: invalid delimiter') + err_eof = error('encoding.csv: end of file') + err_invalid_le = error('encoding.csv: could not find any valid line endings.') +) + + +struct Reader { + // not used yet + // has_header bool + // headings []string + data string +mut: + delimiter byte + comment byte + is_mac_pre_osx_le bool + row_pos int +} + +pub fn new_reader(data string) *Reader { + return &Reader{ + delimiter: `,`, + comment: `#`, + data: data + } +} + +// read() reads one row from the csv file +pub fn (r mut Reader) read() ?[]string { + l := r.read_record() or { + return error(err) + } + return l +} + +// Once we have multi dimensional array +// pub fn (r mut Reader) read_all() ?[][]string { +// mut records := []string +// for { +// record := r.read_record() or { +// if error(err).error == err_eof.error { +// return records +// } else { +// return error(err) +// } +// } +// records << record +// } +// return records +// } + +fn (r mut Reader) read_line() ?string { + // last record + if r.row_pos == r.data.len { + return err_eof + } + le := if r.is_mac_pre_osx_le { '\r' } else { '\n' } + mut i := r.data.index_after(le, r.row_pos) + if i == -1 { + if r.row_pos == 0 { + // check for pre osx mac line endings + i = r.data.index_after('\r', r.row_pos) + if i != -1 { + r.is_mac_pre_osx_le = true + } else { + // no valid line endings found + return err_invalid_le + } + } + } + mut line := r.data.substr(r.row_pos, i) + r.row_pos = i+1 + // normalize win line endings (remove extra \r) + if !r.is_mac_pre_osx_le && (line.len >= 1 && line[line.len-1] == `\r`) { + line = line.left(line.len-1) + } + return line +} + +fn (r mut Reader) read_record() ?[]string { + if r.delimiter == r.comment { + return err_comment_is_delim + } + mut line := '' + for { + l := r.read_line() or { + return error(err) + } + line = l + // skip commented lines + if line[0] == r.comment { + continue + } + break + } + mut fields := []string + mut i := -1 + for { + // not quoted + if line[0] != `"` { + i = line.index(r.delimiter.str()) + if i == -1 { + // last + break + } + fields << line.left(i) + line = line.right(i+1) + continue + } + // quoted + else { + line = line.right(1) + i = line.index('"') + if i+1 == line.len { + // last record + fields << line.left(i) + break + } + next := line[i+1] + if next == r.delimiter { + fields << line.left(i) + line = line.right(i) + continue + } + line = line.right(1) + } + if i <= -1 && fields.len == 0 { + return err_invalid_delim + } + } + + return fields +}