From 5cd074a49ea52dc1fb7d292703e48efb3b889fb1 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Tue, 24 Jan 2023 02:02:25 -0600 Subject: [PATCH] builtin: improve multi-platform portability for `string.split_into_lines()` (#17078) --- vlib/builtin/js/string.js.v | 2 +- vlib/builtin/js/string_test.js.v | 34 +++++++++++++++++++++++++++----- vlib/builtin/string.v | 26 +++++++++++++++++------- vlib/builtin/string_test.v | 27 ++++++++++++++++++++++++- 4 files changed, 75 insertions(+), 14 deletions(-) diff --git a/vlib/builtin/js/string.js.v b/vlib/builtin/js/string.js.v index ad54e449f1..518779d21c 100644 --- a/vlib/builtin/js/string.js.v +++ b/vlib/builtin/js/string.js.v @@ -792,7 +792,7 @@ pub fn (s string) split_into_lines() []string { if s.len == 0 { return res } - #res.arr.arr = s.str.split("\n") + #res.arr.arr = s.str.split(/\r?\n|\r/) #if (res.arr.arr[res.arr.arr.length-1] == "") res.arr.arr.pop(); #res.arr.len = new int(res.arr.arr.length); #res.arr.cap = new int(res.arr.arr.length); diff --git a/vlib/builtin/js/string_test.js.v b/vlib/builtin/js/string_test.js.v index 042fca10e0..57437e44ee 100644 --- a/vlib/builtin/js/string_test.js.v +++ b/vlib/builtin/js/string_test.js.v @@ -791,10 +791,18 @@ fn filter(b u8) bool { return b != `a` } -/* fn test_split_into_lines() { - line_content := 'Line' - text_crlf := '$line_content\r\n$line_content\r\n$line_content' + line_content := 'line content' + + text_cr := '${line_content}\r${line_content}\r${line_content}' + lines_cr := text_cr.split_into_lines() + + assert lines_cr.len == 3 + for line in lines_cr { + assert line == line_content + } + + text_crlf := '${line_content}\r\n${line_content}\r\n${line_content}' lines_crlf := text_crlf.split_into_lines() assert lines_crlf.len == 3 @@ -802,15 +810,31 @@ fn test_split_into_lines() { assert line == line_content } - text_lf := '$line_content\n$line_content\n$line_content' + text_lf := '${line_content}\n${line_content}\n${line_content}' lines_lf := text_lf.split_into_lines() assert lines_lf.len == 3 for line in lines_lf { assert line == line_content } + + text_mixed := '${line_content}\n${line_content}\r${line_content}' + lines_mixed := text_mixed.split_into_lines() + + assert lines_mixed.len == 3 + for line in lines_mixed { + assert line == line_content + } + + text_mixed_trailers := '${line_content}\n${line_content}\r${line_content}\r\r\r\n\n\n\r\r' + lines_mixed_trailers := text_mixed_trailers.split_into_lines() + + assert lines_mixed_trailers.len == 9 + for line in lines_mixed_trailers { + assert (line == line_content) || (line == '') + } } -*/ + fn test_string_literal_with_backslash() { a := 'HelloWorld' assert a == 'HelloWorld' diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 37d427cab6..b2bbe3526d 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -841,22 +841,34 @@ pub fn (s string) split_nth(delim string, nth int) []string { // split_into_lines splits the string by newline characters. // newlines are stripped. -// Both `\n` and `\r\n` newline endings are supported. +// `\r` (MacOS), `\n` (POSIX), and `\r\n` (WinOS) line endings are all supported (including mixed line endings). +// NOTE: algorithm is "greedy", consuming '\r\n' as a single line ending with higher priority than '\r' and '\n' as multiple endings [direct_array_access] pub fn (s string) split_into_lines() []string { mut res := []string{} if s.len == 0 { return res } - mut start := 0 + cr := `\r` + lf := `\n` + mut line_start := 0 for i := 0; i < s.len; i++ { - if s[i] == 10 { - res << if start == i { '' } else { s[start..i].trim_right('\r') } - start = i + 1 + if line_start <= i { + if s[i] == lf { + res << if line_start == i { '' } else { s[line_start..i] } + line_start = i + 1 + } else if s[i] == cr { + res << if line_start == i { '' } else { s[line_start..i] } + if ((i + 1) < s.len) && (s[i + 1] == lf) { + line_start = i + 2 + } else { + line_start = i + 1 + } + } } } - if start < s.len { - res << s[start..] + if line_start < s.len { + res << s[line_start..] } return res } diff --git a/vlib/builtin/string_test.v b/vlib/builtin/string_test.v index 6697d53463..9d01d70bcb 100644 --- a/vlib/builtin/string_test.v +++ b/vlib/builtin/string_test.v @@ -883,7 +883,16 @@ fn filter(b u8) bool { } fn test_split_into_lines() { - line_content := 'Line' + line_content := 'line content' + + text_cr := '${line_content}\r${line_content}\r${line_content}' + lines_cr := text_cr.split_into_lines() + + assert lines_cr.len == 3 + for line in lines_cr { + assert line == line_content + } + text_crlf := '${line_content}\r\n${line_content}\r\n${line_content}' lines_crlf := text_crlf.split_into_lines() @@ -899,6 +908,22 @@ fn test_split_into_lines() { for line in lines_lf { assert line == line_content } + + text_mixed := '${line_content}\n${line_content}\r${line_content}' + lines_mixed := text_mixed.split_into_lines() + + assert lines_mixed.len == 3 + for line in lines_mixed { + assert line == line_content + } + + text_mixed_trailers := '${line_content}\n${line_content}\r${line_content}\r\r\r\n\n\n\r\r' + lines_mixed_trailers := text_mixed_trailers.split_into_lines() + + assert lines_mixed_trailers.len == 9 + for line in lines_mixed_trailers { + assert (line == line_content) || (line == '') + } } fn test_string_literal_with_backslash() {