diff --git a/vlib/compress/README.md b/vlib/compress/README.md index b7b2f513ab..7e4b087e9f 100644 --- a/vlib/compress/README.md +++ b/vlib/compress/README.md @@ -1,4 +1,8 @@ ## Description: `compress` is a namespace for (multiple) compression algorithms supported by V. -At the moment, only `compress.zlib` and `compress.deflate` are implemented. + +At the moment, the following compression algorithms are implemented: +- `compress.deflate` +- `compress.gzip` +- `compress.zlib` diff --git a/vlib/compress/compress.v b/vlib/compress/compress.v index 062efff48a..bc0c350ba4 100644 --- a/vlib/compress/compress.v +++ b/vlib/compress/compress.v @@ -9,7 +9,7 @@ fn C.tdefl_compress_mem_to_heap(source_buf voidptr, source_buf_len usize, out_le fn C.tinfl_decompress_mem_to_heap(source_buf voidptr, source_buf_len usize, out_len &usize, flags int) voidptr // compresses an array of bytes based on providing flags and returns the compressed bytes in a new array -// see `gzip.compress([]u8)` and `zlib.compress([]u8)` for default implementations. +// NB: this is a low level api, a high level implementation like zlib/gzip should be preferred [manualfree] pub fn compress(data []u8, flags int) ?[]u8 { if u64(data.len) > compress.max_size { @@ -28,7 +28,7 @@ pub fn compress(data []u8, flags int) ?[]u8 { } // decompresses an array of bytes based on providing flags and returns the decompressed bytes in a new array -// see `gzip.decompress([]u8)` and `zlib.decompress([]u8)` for default implementations. +// NB: this is a low level api, a high level implementation like zlib/gzip should be preferred [manualfree] pub fn decompress(data []u8, flags int) ?[]u8 { mut out_len := usize(0) diff --git a/vlib/compress/deflate/README.md b/vlib/compress/deflate/README.md index 40d70eda15..c4dca7b06e 100644 --- a/vlib/compress/deflate/README.md +++ b/vlib/compress/deflate/README.md @@ -3,9 +3,6 @@ `compress.deflate` is a module that assists in the compression and decompression of binary data using `deflate` compression -NOTE: To decompress gzip, discard first 10 bytes of compressed bytes -then use `compress.deflate.decompress`. (Header validation won't be -performed in this case) ## Examples: diff --git a/vlib/compress/deflate/deflate.v b/vlib/compress/deflate/deflate.v index 5f66630ab4..b283b2825b 100644 --- a/vlib/compress/deflate/deflate.v +++ b/vlib/compress/deflate/deflate.v @@ -2,15 +2,14 @@ module deflate import compress -// compresses an array of bytes using gzip and returns the compressed bytes in a new array -// Example: compressed := gzip.compress(b)? +// compresses an array of bytes using deflate and returns the compressed bytes in a new array +// Example: compressed := deflate.compress(b)? pub fn compress(data []u8) ?[]u8 { return compress.compress(data, 0) } -// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array -// Example: decompressed := zlib.decompress(b)? -[manualfree] +// decompresses an array of bytes using deflate and returns the decompressed bytes in a new array +// Example: decompressed := deflate.decompress(b)? pub fn decompress(data []u8) ?[]u8 { return compress.decompress(data, 0) } diff --git a/vlib/compress/gzip/README.md b/vlib/compress/gzip/README.md new file mode 100644 index 0000000000..23196a6ec4 --- /dev/null +++ b/vlib/compress/gzip/README.md @@ -0,0 +1,18 @@ +## Description: + +`compress.gzip` is a module that assists in the compression and +decompression of binary data using `gzip` compression + + +## Examples: + +```v +import compress.gzip + +fn main() { + uncompressed := 'Hello world!' + compressed := gzip.compress(uncompressed.bytes())? + decompressed := gzip.decompress(compressed)? + assert decompressed == uncompressed.bytes() +} +``` diff --git a/vlib/compress/gzip/gzip.v b/vlib/compress/gzip/gzip.v new file mode 100644 index 0000000000..642dca94ed --- /dev/null +++ b/vlib/compress/gzip/gzip.v @@ -0,0 +1,114 @@ +// [rfc1952](https://datatracker.ietf.org/doc/html/rfc1952) compliant +// gzip compression/decompression + +module gzip + +import compress +import hash.crc32 + +// compresses an array of bytes using gzip and returns the compressed bytes in a new array +// Example: compressed := gzip.compress(b)? +pub fn compress(data []u8) ?[]u8 { + compressed := compress.compress(data, 0)? + // header + mut result := [ + u8(0x1f), // magic numbers (1F 8B) + 0x8b, + 0x08, // deflate + 0x00, // header flags + 0x00, // 4-byte timestamp, 0 = no timestamp (00 00 00 00) + 0x00, + 0x00, + 0x00, + 0x00, // extra flags + 0xff, // operating system id (0xff = unknown) + ] // 10 bytes + result << compressed + // trailer + checksum := crc32.sum(data) + length := data.len + result << [ + u8(checksum >> 24), + u8(checksum >> 16), + u8(checksum >> 8), + u8(checksum), + u8(length >> 24), + u8(length >> 16), + u8(length >> 8), + u8(length), + ] // 8 bytes + return result +} + +// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array +// Example: decompressed := gzip.decompress(b)? +pub fn decompress(data []u8) ?[]u8 { + if data.len < 18 { + return error('data is too short, not gzip compressed?') + } else if data[0] != 0x1f || data[1] != 0x8b { + return error('wrong magic numbers, not gzip compressed?') + } else if data[2] != 0x08 { + return error('gzip data is not compressed with DEFLATE') + } + mut header_length := 10 + + // parse flags, we ignore most of them, but we still need to parse them + // correctly, so we dont accidently decompress something that belongs + // to the header + + if data[4] & 0b1110_0000 > 0 { // reserved bits + // rfc 1952 2.3.1.2 Compliance + // A compliant decompressor must give an error indication if any + // reserved bit is non-zero, since such a bit could indicate the + // presence of a new field that would cause subsequent data to be + // interpreted incorrectly. + return error('reserved flags are set, unsupported field detected') + } + + // if data[4] & 0b0000_0001 {} // FTEXT + if data[4] & 0b0000_0100 > 0 { // FEXTRA, extra data + xlen := data[header_length] + header_length += xlen + 1 + } + if data[4] & 0b0000_1000 > 0 { // FNAME, file name + // filename is zero-terminated, so skip until we hit a zero byte + for header_length < data.len && data[header_length] != 0x00 { + header_length++ + } + header_length++ + } + if data[4] & 0b0001_0000 > 0 { // FCOMMENT + // comment is zero-terminated, so skip until we hit a zero byte + for header_length < data.len && data[header_length] != 0x00 { + header_length++ + } + header_length++ + } + if data[4] & 0b0000_0010 > 0 { // FHCRC, flag header crc + if header_length + 12 > data.len { + return error('data too short') + } + checksum_header := crc32.sum(data[..header_length]) + checksum_header_expected := (u32(data[header_length]) << 24) | (u32(data[header_length + 1]) << 16) | (u32(data[ + header_length + 2]) << 8) | data[header_length + 3] + if checksum_header != checksum_header_expected { + return error('header checksum verification failed') + } + header_length += 4 + } + if header_length + 8 > data.len { + return error('data too short') + } + + decompressed := compress.decompress(data[header_length..data.len - 8], 0)? + length_expected := (u32(data[data.len - 4]) << 24) | (u32(data[data.len - 3]) << 16) | (u32(data[data.len - 2]) << 8) | data[data.len - 1] + if decompressed.len != length_expected { + return error('length verification failed, got $decompressed.len, expected $length_expected') + } + checksum := crc32.sum(decompressed) + checksum_expected := (u32(data[data.len - 8]) << 24) | (u32(data[data.len - 7]) << 16) | (u32(data[data.len - 6]) << 8) | data[data.len - 5] + if checksum != checksum_expected { + return error('checksum verification failed') + } + return decompressed +} diff --git a/vlib/compress/gzip/gzip_test.v b/vlib/compress/gzip/gzip_test.v new file mode 100644 index 0000000000..7d561bfab8 --- /dev/null +++ b/vlib/compress/gzip/gzip_test.v @@ -0,0 +1,134 @@ +module gzip + +import hash.crc32 + +fn test_gzip() ? { + uncompressed := 'Hello world!' + compressed := compress(uncompressed.bytes())? + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn assert_decompress_error(data []u8, reason string) ? { + decompress(data) or { + assert err.msg() == reason + return + } + return error('did not error') +} + +fn test_gzip_invalid_too_short() ? { + assert_decompress_error([]u8{}, 'data is too short, not gzip compressed?')? +} + +fn test_gzip_invalid_magic_numbers() ? { + assert_decompress_error([]u8{len: 100}, 'wrong magic numbers, not gzip compressed?')? +} + +fn test_gzip_invalid_compression() ? { + mut data := []u8{len: 100} + data[0] = 0x1f + data[1] = 0x8b + assert_decompress_error(data, 'gzip data is not compressed with DEFLATE')? +} + +fn test_gzip_with_ftext() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0000_0001 // FTEXT + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn test_gzip_with_fname() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0000_1000 + compressed.insert(10, `h`) + compressed.insert(11, `i`) + compressed.insert(12, 0x00) + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn test_gzip_with_fcomment() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0001_0000 + compressed.insert(10, `h`) + compressed.insert(11, `i`) + compressed.insert(12, 0x00) + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn test_gzip_with_fname_fcomment() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0001_1000 + compressed.insert(10, `h`) + compressed.insert(11, `i`) + compressed.insert(12, 0x00) + compressed.insert(10, `h`) + compressed.insert(11, `i`) + compressed.insert(12, 0x00) + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn test_gzip_with_fextra() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0000_0100 + compressed.insert(10, 2) + compressed.insert(11, `h`) + compressed.insert(12, `i`) + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn test_gzip_with_hcrc() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0000_0010 + checksum := crc32.sum(compressed[..10]) + compressed.insert(10, u8(checksum >> 24)) + compressed.insert(11, u8(checksum >> 16)) + compressed.insert(12, u8(checksum >> 8)) + compressed.insert(13, u8(checksum)) + decompressed := decompress(compressed)? + assert decompressed == uncompressed.bytes() +} + +fn test_gzip_with_invalid_hcrc() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b0000_0010 + checksum := crc32.sum(compressed[..10]) + compressed.insert(10, u8(checksum >> 24)) + compressed.insert(11, u8(checksum >> 16)) + compressed.insert(12, u8(checksum >> 8)) + compressed.insert(13, u8(checksum + 1)) + assert_decompress_error(compressed, 'header checksum verification failed')? +} + +fn test_gzip_with_invalid_checksum() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[compressed.len - 5] += 1 + assert_decompress_error(compressed, 'checksum verification failed')? +} + +fn test_gzip_with_invalid_length() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[compressed.len - 1] += 1 + assert_decompress_error(compressed, 'length verification failed, got 12, expected 13')? +} + +fn test_gzip_with_invalid_flags() ? { + uncompressed := 'Hello world!' + mut compressed := compress(uncompressed.bytes())? + compressed[4] |= 0b1000_0000 + assert_decompress_error(compressed, 'reserved flags are set, unsupported field detected')? +} diff --git a/vlib/compress/zlib/zlib.v b/vlib/compress/zlib/zlib.v index 44e4e2abc1..80039b4c2f 100644 --- a/vlib/compress/zlib/zlib.v +++ b/vlib/compress/zlib/zlib.v @@ -4,7 +4,6 @@ import compress // compresses an array of bytes using zlib and returns the compressed bytes in a new array // Example: compressed := zlib.compress(b)? -[manualfree] pub fn compress(data []u8) ?[]u8 { // flags = TDEFL_WRITE_ZLIB_HEADER (0x01000) return compress.compress(data, 0x01000) @@ -12,7 +11,6 @@ pub fn compress(data []u8) ?[]u8 { // decompresses an array of bytes using zlib and returns the decompressed bytes in a new array // Example: decompressed := zlib.decompress(b)? -[manualfree] pub fn decompress(data []u8) ?[]u8 { // flags = TINFL_FLAG_PARSE_ZLIB_HEADER (0x1) return compress.decompress(data, 0x1)