From 9a9d539e6f8dab33def5b0a04eb68815d2c187f0 Mon Sep 17 00:00:00 2001 From: yuyi Date: Fri, 11 Jun 2021 00:24:20 +0800 Subject: [PATCH] csv: fix csv fields with double quotes (#10399) --- vlib/encoding/csv/reader.v | 23 ++++++++++++++++++++--- vlib/encoding/csv/reader_test.v | 24 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/vlib/encoding/csv/reader.v b/vlib/encoding/csv/reader.v index 28b0497014..dafd02217b 100644 --- a/vlib/encoding/csv/reader.v +++ b/vlib/encoding/csv/reader.v @@ -145,7 +145,24 @@ fn (mut r Reader) read_record() ?[]string { line = line[i + 1..] continue } else { // quoted - j := line[1..].index('"') or { + mut need_more := true + mut has_double_quotes := false + mut j := 0 + mut n := 1 + for n < line.len { + if line[n] == `"` { + if n == line.len - 1 || line[n + 1] != `"` { + need_more = false + j = n - 1 + break + } else { + has_double_quotes = true + n++ + } + } + n++ + } + if need_more { need_read = true keep_raw = true continue @@ -153,12 +170,12 @@ fn (mut r Reader) read_record() ?[]string { line = line[1..] if j + 1 == line.len { // last record - fields << line[..j] + fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] } break } next := line[j + 1] if next == r.delimiter { - fields << line[..j] + fields << if has_double_quotes { line[..j].replace('""', '"') } else { line[..j] } if j + 2 == line.len { line = '' } else { diff --git a/vlib/encoding/csv/reader_test.v b/vlib/encoding/csv/reader_test.v index b8958a3149..cd54827ad3 100644 --- a/vlib/encoding/csv/reader_test.v +++ b/vlib/encoding/csv/reader_test.v @@ -227,3 +227,27 @@ fn test_field_quotes_for_parts() { } assert row_count == 4 } + +fn test_field_double_quotes() { + row1 := '11,"12\n13"\n' + row2 := '21,"2""2""\n23"\n' + row3 := '"3""1""",32\n' + data := row1 + row2 + row3 + mut csv_reader := csv.new_reader(data) + mut row_count := 0 + for { + row := csv_reader.read() or { break } + row_count++ + if row_count == 1 { + assert row[0] == '11' + assert row[1] == '12\n13' + } else if row_count == 2 { + assert row[0] == '21' + assert row[1] == '2"2"\n23' + } else if row_count == 3 { + assert row[0] == '3"1"' + assert row[1] == '32' + } + } + assert row_count == 3 +}