From 69dff4b384de4ebaacc6cf0932c0258101220d13 Mon Sep 17 00:00:00 2001 From: Bastian Buck <59334447+bstnbuck@users.noreply.github.com> Date: Fri, 26 Mar 2021 07:51:55 +0100 Subject: [PATCH] os: make os module handle large files (#9439) --- cmd/tools/modules/scripting/scripting.v | 2 +- vlib/io/reader.v | 2 +- vlib/io/writer.v | 2 +- vlib/os/file.c.v | 153 +++++++++++++++++++----- vlib/os/os_c.v | 45 +++++-- 5 files changed, 158 insertions(+), 46 deletions(-) diff --git a/cmd/tools/modules/scripting/scripting.v b/cmd/tools/modules/scripting/scripting.v index f9589118f7..323aca47b0 100644 --- a/cmd/tools/modules/scripting/scripting.v +++ b/cmd/tools/modules/scripting/scripting.v @@ -120,6 +120,6 @@ pub fn used_tools_must_exist(tools []string) { pub fn show_sizes_of_files(files []string) { for f in files { size := os.file_size(f) - println('${size:10d} $f') + println('$size $f') // println('${size:10d} $f') } } diff --git a/vlib/io/reader.v b/vlib/io/reader.v index 3b7a25ea9f..a3e89d8105 100644 --- a/vlib/io/reader.v +++ b/vlib/io/reader.v @@ -70,5 +70,5 @@ pub fn read_any(r Reader) ?[]byte { // RandomReader represents a stream of data that can be read from at a random location interface RandomReader { - read_from(pos int, mut buf []byte) ?int + read_from(pos u64, mut buf []byte) ?int } diff --git a/vlib/io/writer.v b/vlib/io/writer.v index e647e189a3..322dd45e63 100644 --- a/vlib/io/writer.v +++ b/vlib/io/writer.v @@ -8,5 +8,5 @@ pub interface Writer { // RandomWriter represents a stream of data that can be wrote to // at a random pos pub interface RandomWriter { - write_to(pos int, buf []byte) ?int + write_to(pos u64, buf []byte) ?int } diff --git a/vlib/os/file.c.v b/vlib/os/file.c.v index 8b89ed9345..5d0aa68e0c 100644 --- a/vlib/os/file.c.v +++ b/vlib/os/file.c.v @@ -13,6 +13,10 @@ struct FileInfo { size int } +fn C.fseeko(voidptr, u64, int) int + +fn C._fseeki64(voidptr, u64, int) int + // open_file can be used to open or create a file with custom flags and permissions and returns a `File` object. pub fn open_file(path string, mode string, options ...int) ?File { mut flags := 0 @@ -202,17 +206,39 @@ pub fn (mut f File) write_string(s string) ?int { // write_to implements the RandomWriter interface. // It returns how many bytes were actually written. // It resets the seek position to the end of the file. -pub fn (mut f File) write_to(pos int, buf []byte) ?int { +pub fn (mut f File) write_to(pos u64, buf []byte) ?int { if !f.is_opened { return error('file is not opened') } - C.fseek(f.cfile, pos, C.SEEK_SET) - res := int(C.fwrite(buf.data, 1, buf.len, f.cfile)) - if res == 0 && buf.len != 0 { - return error('0 bytes written') + $if x64 { + $if windows { + C._fseeki64(f.cfile, pos, C.SEEK_SET) + res := int(C.fwrite(buf.data, 1, buf.len, f.cfile)) + if res == 0 && buf.len != 0 { + return error('0 bytes written') + } + C._fseeki64(f.cfile, 0, C.SEEK_END) + return res + } $else { + C.fseeko(f.cfile, pos, C.SEEK_SET) + res := int(C.fwrite(buf.data, 1, buf.len, f.cfile)) + if res == 0 && buf.len != 0 { + return error('0 bytes written') + } + C.fseeko(f.cfile, 0, C.SEEK_END) + return res + } } - C.fseek(f.cfile, 0, C.SEEK_END) - return res + $if x32 { + C.fseek(f.cfile, pos, C.SEEK_SET) + res := int(C.fwrite(buf.data, 1, buf.len, f.cfile)) + if res == 0 && buf.len != 0 { + return error('0 bytes written') + } + C.fseek(f.cfile, 0, C.SEEK_END) + return res + } + return error('Could not write to file') } // write_bytes writes `size` bytes to the file, starting from the address in `data`. @@ -230,7 +256,7 @@ pub fn (mut f File) write_bytes(data voidptr, size int) int { // pointers to it, it will cause your programs to segfault. [deprecated: 'use File.write_ptr_at() instead'] [unsafe] -pub fn (mut f File) write_bytes_at(data voidptr, size int, pos int) int { +pub fn (mut f File) write_bytes_at(data voidptr, size int, pos u64) int { return unsafe { f.write_ptr_at(data, size, pos) } } @@ -247,11 +273,27 @@ pub fn (mut f File) write_ptr(data voidptr, size int) int { // NB: write_ptr_at is unsafe and should be used carefully, since if you pass invalid // pointers to it, it will cause your programs to segfault. [unsafe] -pub fn (mut f File) write_ptr_at(data voidptr, size int, pos int) int { - C.fseek(f.cfile, pos, C.SEEK_SET) - res := int(C.fwrite(data, 1, size, f.cfile)) - C.fseek(f.cfile, 0, C.SEEK_END) - return res +pub fn (mut f File) write_ptr_at(data voidptr, size int, pos u64) int { + $if x64 { + $if windows { + C._fseeki64(f.cfile, pos, C.SEEK_SET) + res := int(C.fwrite(data, 1, size, f.cfile)) + C._fseeki64(f.cfile, 0, C.SEEK_END) + return res + } $else { + C.fseeko(f.cfile, pos, C.SEEK_SET) + res := int(C.fwrite(data, 1, size, f.cfile)) + C.fseeko(f.cfile, 0, C.SEEK_END) + return res + } + } + $if x32 { + C.fseek(f.cfile, pos, C.SEEK_SET) + res := int(C.fwrite(data, 1, size, f.cfile)) + C.fseek(f.cfile, 0, C.SEEK_END) + return res + } + return 0 } // **************************** Read ops *************************** @@ -262,7 +304,7 @@ pub fn (f &File) read_bytes(size int) []byte { } // read_bytes_at reads `size` bytes at the given position in the file. -pub fn (f &File) read_bytes_at(size int, pos int) []byte { +pub fn (f &File) read_bytes_at(size int, pos u64) []byte { mut arr := []byte{len: size} nreadbytes := f.read_bytes_into(pos, mut arr) or { // return err @@ -274,22 +316,50 @@ pub fn (f &File) read_bytes_at(size int, pos int) []byte { // read_bytes_into fills `buf` with bytes at the given position in the file. // `buf` *must* have length greater than zero. // Returns the number of read bytes, or an error. -pub fn (f &File) read_bytes_into(pos int, mut buf []byte) ?int { +pub fn (f &File) read_bytes_into(pos u64, mut buf []byte) ?int { if buf.len == 0 { panic(@FN + ': `buf.len` == 0') } - // Note: fseek errors if pos == os.file_size, which we accept - C.fseek(f.cfile, pos, C.SEEK_SET) - // errno is only set if fread fails, so clear it first to tell - C.errno = 0 - nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) - if C.errno != 0 { - return error(posix_get_error_msg(C.errno)) + $if x64 { + $if windows { + // Note: fseek errors if pos == os.file_size, which we accept + C._fseeki64(f.cfile, pos, C.SEEK_SET) + // errno is only set if fread fails, so clear it first to tell + C.errno = 0 + nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) + if C.errno != 0 { + return error(posix_get_error_msg(C.errno)) + } + $if debug { + C._fseeki64(f.cfile, 0, C.SEEK_SET) + } + return nbytes + } $else { + C.fseeko(f.cfile, pos, C.SEEK_SET) + C.errno = 0 + nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) + if C.errno != 0 { + return error(posix_get_error_msg(C.errno)) + } + $if debug { + C.fseeko(f.cfile, 0, C.SEEK_SET) + } + return nbytes + } } - $if debug { - C.fseek(f.cfile, 0, C.SEEK_SET) + $if x32 { + C.fseek(f.cfile, pos, C.SEEK_SET) + C.errno = 0 + nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) + if C.errno != 0 { + return error(posix_get_error_msg(C.errno)) + } + $if debug { + C.fseek(f.cfile, 0, C.SEEK_SET) + } + return nbytes } - return nbytes + return error('Could not read file') } // read implements the Reader interface. @@ -307,22 +377,39 @@ pub fn (f &File) read(mut buf []byte) ?int { // read_at reads `buf.len` bytes starting at file byte offset `pos`, in `buf`. [deprecated: 'use File.read_from() instead'] -pub fn (f &File) read_at(pos int, mut buf []byte) ?int { +pub fn (f &File) read_at(pos u64, mut buf []byte) ?int { return f.read_from(pos, mut buf) } // read_from implements the RandomReader interface. -pub fn (f &File) read_from(pos int, mut buf []byte) ?int { +pub fn (f &File) read_from(pos u64, mut buf []byte) ?int { if buf.len == 0 { return 0 } - C.fseek(f.cfile, pos, C.SEEK_SET) - C.errno = 0 - nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) - if C.errno != 0 { - return error(posix_get_error_msg(C.errno)) + $if x64 { + $if windows { + C._fseeki64(f.cfile, pos, C.SEEK_SET) + } $else { + C.fseeko(f.cfile, pos, C.SEEK_SET) + } + + C.errno = 0 + nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) + if C.errno != 0 { + return error(posix_get_error_msg(C.errno)) + } + return nbytes } - return nbytes + $if x32 { + C.fseek(f.cfile, pos, C.SEEK_SET) + C.errno = 0 + nbytes := int(C.fread(buf.data, 1, buf.len, f.cfile)) + if C.errno != 0 { + return error(posix_get_error_msg(C.errno)) + } + return nbytes + } + return error('Could not read file') } // **************************** Utility ops *********************** diff --git a/vlib/os/os_c.v b/vlib/os/os_c.v index 6cba4bc738..5f5242f0eb 100644 --- a/vlib/os/os_c.v +++ b/vlib/os/os_c.v @@ -25,9 +25,19 @@ fn C.CopyFile(&u32, &u32, int) int fn C.execvp(file charptr, argv &charptr) int +// fn C.lstat(charptr, voidptr) u64 + +fn C._wstat64(charptr, voidptr) u64 + // fn C.proc_pidpath(int, byteptr, int) int struct C.stat { - st_size int + st_size u64 + st_mode u32 + st_mtime int +} + +struct C.__stat64 { + st_size u64 st_mode u32 st_mtime int } @@ -101,20 +111,33 @@ pub fn read_file(path string) ?string { // ***************************** OS ops ************************ // file_size returns the size of the file located in `path`. -pub fn file_size(path string) int { +pub fn file_size(path string) u64 { mut s := C.stat{} unsafe { - $if windows { - $if tinyc { - C.stat(charptr(path.str), &s) + $if x64 { + $if windows { + mut swin := C.__stat64{} + C._wstat64(path.to_wide(), voidptr(&swin)) + return swin.st_size } $else { - C._wstat(path.to_wide(), voidptr(&s)) + C.stat(charptr(path.str), &s) + return u64(s.st_size) + } + } + $if x32 { + $if debug { + println('Using os.file_size() on 32bit systems may not work on big files.') + } + $if windows { + C._wstat(path.to_wide(), voidptr(&s)) + return u64(s.st_size) + } $else { + C.stat(charptr(path.str), &s) + return u64(s.st_size) } - } $else { - C.stat(charptr(path.str), &s) } } - return s.st_size + return 0 } // mv moves files or folders from `src` to `dst`. @@ -172,7 +195,9 @@ pub fn cp(src string, dst string) ? { } } from_attr := C.stat{} - unsafe { C.stat(charptr(src.str), &from_attr) } + unsafe { + C.stat(charptr(src.str), &from_attr) + } if C.chmod(charptr(dst.str), from_attr.st_mode) < 0 { return error_with_code('failed to set permissions for $dst', int(-1)) }