From 04ea2824d317f8f358c027f62a74d1b40d5d3b41 Mon Sep 17 00:00:00 2001 From: Sebastian Schicho <64368773+schicho@users.noreply.github.com> Date: Fri, 21 May 2021 12:18:08 +0200 Subject: [PATCH] os: implement File.read_bytes_into_newline method for reading into a buffer, line by line (#10129) --- vlib/os/file.c.v | 40 +++++++++++++++++++++++++++++ vlib/os/file_test.v | 61 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/vlib/os/file.c.v b/vlib/os/file.c.v index 6f2af20bb1..beedbfb28c 100644 --- a/vlib/os/file.c.v +++ b/vlib/os/file.c.v @@ -17,6 +17,8 @@ fn C.fseeko(voidptr, u64, int) int fn C._fseeki64(voidptr, u64, int) int +fn C.getc(voidptr) int + // open_file can be used to open or create a file with custom flags and permissions and returns a `File` object. pub fn open_file(path string, mode string, options ...int) ?File { mut flags := 0 @@ -362,6 +364,44 @@ pub fn (f &File) read_bytes_at(size int, pos u64) []byte { return arr[0..nreadbytes] } +// read_bytes_into_newline reads from the beginning of the file into the provided buffer. +// Each consecutive call on the same file continues reading where it previously ended. +// A read call is either stopped, if the buffer is full, a newline was read or EOF. +pub fn (f &File) read_bytes_into_newline(mut buf []byte) ?int { + if buf.len == 0 { + panic(@FN + ': `buf.len` == 0') + } + newline := 10 + mut c := 0 + mut buf_ptr := 0 + mut nbytes := 0 + + for (buf_ptr < buf.len) { + c = C.getc(f.cfile) + match c { + C.EOF { + if C.feof(f.cfile) != 0 { + return nbytes + } + if C.ferror(f.cfile) != 0 { + return error('file read error') + } + } + newline { + buf[buf_ptr] = byte(c) + nbytes++ + return nbytes + } + else { + buf[buf_ptr] = byte(c) + buf_ptr++ + nbytes++ + } + } + } + return nbytes +} + // read_bytes_into fills `buf` with bytes at the given position in the file. // `buf` *must* have length greater than zero. // Returns the number of read bytes, or an error. diff --git a/vlib/os/file_test.v b/vlib/os/file_test.v index c8ab607c7d..6144ad1aa1 100644 --- a/vlib/os/file_test.v +++ b/vlib/os/file_test.v @@ -59,6 +59,67 @@ fn testsuite_end() ? { assert !os.is_dir(tfolder) } +// test_read_bytes_into_newline_text tests reading text from a file with newlines. +// This test simulates reading a larger text file step by step into a buffer and +// returning on each newline, even before the buffer is full, and reaching EOF before +// the buffer is completely filled. +fn test_read_bytes_into_newline_text() ? { + mut f := os.open_file(tfile, 'w') ? + f.write_string('Hello World!\nGood\r morning.') ? + f.close() + + f = os.open_file(tfile, 'r') ? + mut buf := []byte{len: 8} + + n0 := f.read_bytes_into_newline(mut buf) ? + assert n0 == 8 + + n1 := f.read_bytes_into_newline(mut buf) ? + assert n1 == 5 + + n2 := f.read_bytes_into_newline(mut buf) ? + assert n2 == 8 + + n3 := f.read_bytes_into_newline(mut buf) ? + assert n3 == 6 + + f.close() +} + +// test_read_bytes_into_newline_binary tests reading a binary file with NUL bytes. +// This test simulates the scenario when a byte stream is read and a newline byte +// appears in that stream and an EOF occurs before the buffer is full. +fn test_read_bytes_into_newline_binary() ? { + os.rm(tfile) or {} // FIXME This is a workaround for macos, because the file isn't truncated when open with 'w' + mut bw := []byte{len: 15} + bw[9] = 0xff + bw[12] = 10 // newline + + n0_bytes := bw[0..10] + n1_bytes := bw[10..13] + n2_bytes := bw[13..] + + mut f := os.open_file(tfile, 'w') ? + f.write(bw) ? + f.close() + + f = os.open_file(tfile, 'r') ? + mut buf := []byte{len: 10} + + n0 := f.read_bytes_into_newline(mut buf) ? + assert n0 == 10 + assert buf[..n0] == n0_bytes + + n1 := f.read_bytes_into_newline(mut buf) ? + assert n1 == 3 + assert buf[..n1] == n1_bytes + + n2 := f.read_bytes_into_newline(mut buf) ? + assert n2 == 2 + assert buf[..n2] == n2_bytes + f.close() +} + // test_read_eof_last_read_partial_buffer_fill tests that when reading a file // the end-of-file is detected and results in a none error being returned. This // test simulates file reading where the end-of-file is reached inside an fread