2022-06-05 18:53:45 +03:00
|
|
|
// [rfc1952](https://datatracker.ietf.org/doc/html/rfc1952) compliant
|
|
|
|
// gzip compression/decompression
|
|
|
|
|
|
|
|
module gzip
|
|
|
|
|
2023-02-08 21:37:04 +03:00
|
|
|
import compress as compr
|
2022-06-05 18:53:45 +03:00
|
|
|
import hash.crc32
|
|
|
|
|
|
|
|
// compresses an array of bytes using gzip and returns the compressed bytes in a new array
|
2023-06-13 19:10:27 +03:00
|
|
|
// Example: compressed := gzip.compress(b)!
|
2022-10-20 22:14:33 +03:00
|
|
|
pub fn compress(data []u8) ![]u8 {
|
2023-02-08 21:37:04 +03:00
|
|
|
compressed := compr.compress(data, 0)!
|
2022-06-05 18:53:45 +03:00
|
|
|
// header
|
|
|
|
mut result := [
|
|
|
|
u8(0x1f), // magic numbers (1F 8B)
|
|
|
|
0x8b,
|
|
|
|
0x08, // deflate
|
|
|
|
0x00, // header flags
|
|
|
|
0x00, // 4-byte timestamp, 0 = no timestamp (00 00 00 00)
|
|
|
|
0x00,
|
|
|
|
0x00,
|
|
|
|
0x00,
|
|
|
|
0x00, // extra flags
|
|
|
|
0xff, // operating system id (0xff = unknown)
|
|
|
|
] // 10 bytes
|
|
|
|
result << compressed
|
|
|
|
// trailer
|
|
|
|
checksum := crc32.sum(data)
|
|
|
|
length := data.len
|
|
|
|
result << [
|
|
|
|
u8(checksum >> 24),
|
|
|
|
u8(checksum >> 16),
|
|
|
|
u8(checksum >> 8),
|
|
|
|
u8(checksum),
|
|
|
|
u8(length >> 24),
|
|
|
|
u8(length >> 16),
|
|
|
|
u8(length >> 8),
|
|
|
|
u8(length),
|
|
|
|
] // 8 bytes
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2022-06-12 11:24:41 +03:00
|
|
|
[params]
|
|
|
|
pub struct DecompressParams {
|
|
|
|
verify_header_checksum bool = true
|
|
|
|
verify_length bool = true
|
|
|
|
verify_checksum bool = true
|
|
|
|
}
|
|
|
|
|
2023-01-29 14:27:04 +03:00
|
|
|
pub const (
|
|
|
|
reserved_bits = 0b1110_0000
|
|
|
|
ftext = 0b0000_0001
|
|
|
|
fextra = 0b0000_0100
|
|
|
|
fname = 0b0000_1000
|
|
|
|
fcomment = 0b0001_0000
|
|
|
|
fhcrc = 0b0000_0010
|
|
|
|
)
|
|
|
|
|
|
|
|
const min_header_length = 18
|
|
|
|
|
|
|
|
[noinit]
|
|
|
|
pub struct GzipHeader {
|
|
|
|
pub mut:
|
|
|
|
length int = 10
|
|
|
|
extra []u8
|
|
|
|
filename []u8
|
|
|
|
comment []u8
|
|
|
|
modification_time u32
|
|
|
|
operating_system u8
|
|
|
|
}
|
|
|
|
|
|
|
|
// validate validates the header and returns its details if valid
|
|
|
|
pub fn validate(data []u8, params DecompressParams) !GzipHeader {
|
|
|
|
if data.len < gzip.min_header_length {
|
2022-06-05 18:53:45 +03:00
|
|
|
return error('data is too short, not gzip compressed?')
|
|
|
|
} else if data[0] != 0x1f || data[1] != 0x8b {
|
|
|
|
return error('wrong magic numbers, not gzip compressed?')
|
|
|
|
} else if data[2] != 0x08 {
|
|
|
|
return error('gzip data is not compressed with DEFLATE')
|
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
mut header := GzipHeader{}
|
2022-06-05 18:53:45 +03:00
|
|
|
|
|
|
|
// parse flags, we ignore most of them, but we still need to parse them
|
|
|
|
// correctly, so we dont accidently decompress something that belongs
|
|
|
|
// to the header
|
|
|
|
|
2023-01-29 14:27:04 +03:00
|
|
|
if data[3] & gzip.reserved_bits > 0 {
|
2022-06-05 18:53:45 +03:00
|
|
|
// rfc 1952 2.3.1.2 Compliance
|
|
|
|
// A compliant decompressor must give an error indication if any
|
|
|
|
// reserved bit is non-zero, since such a bit could indicate the
|
|
|
|
// presence of a new field that would cause subsequent data to be
|
|
|
|
// interpreted incorrectly.
|
|
|
|
return error('reserved flags are set, unsupported field detected')
|
|
|
|
}
|
|
|
|
|
2023-01-29 14:27:04 +03:00
|
|
|
if data[3] & gzip.fextra > 0 {
|
|
|
|
xlen := data[header.length]
|
|
|
|
header.extra = data[header.length + 1..header.length + 1 + xlen]
|
|
|
|
header.length += xlen + 1
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
if data[3] & gzip.fname > 0 {
|
2022-06-05 18:53:45 +03:00
|
|
|
// filename is zero-terminated, so skip until we hit a zero byte
|
2023-01-29 14:27:04 +03:00
|
|
|
for header.length < data.len && data[header.length] != 0x00 {
|
|
|
|
header.filename << data[header.length]
|
|
|
|
header.length++
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
header.length++
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
if data[3] & gzip.fcomment > 0 {
|
2022-06-05 18:53:45 +03:00
|
|
|
// comment is zero-terminated, so skip until we hit a zero byte
|
2023-01-29 14:27:04 +03:00
|
|
|
for header.length < data.len && data[header.length] != 0x00 {
|
|
|
|
header.comment << data[header.length]
|
|
|
|
header.length++
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
header.length++
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
if data[3] & gzip.fhcrc > 0 {
|
|
|
|
if header.length + 12 > data.len {
|
2022-06-05 18:53:45 +03:00
|
|
|
return error('data too short')
|
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
checksum_header := crc32.sum(data[..header.length])
|
|
|
|
checksum_header_expected := (u32(data[header.length]) << 24) | (u32(data[header.length + 1]) << 16) | (u32(data[
|
|
|
|
header.length + 2]) << 8) | data[header.length + 3]
|
2022-06-12 11:24:41 +03:00
|
|
|
if params.verify_header_checksum && checksum_header != checksum_header_expected {
|
2022-06-05 18:53:45 +03:00
|
|
|
return error('header checksum verification failed')
|
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
header.length += 4
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
if header.length + 8 > data.len {
|
2022-06-05 18:53:45 +03:00
|
|
|
return error('data too short')
|
|
|
|
}
|
2023-01-29 14:27:04 +03:00
|
|
|
header.operating_system = data[9]
|
|
|
|
return header
|
|
|
|
}
|
|
|
|
|
|
|
|
// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array
|
2023-06-13 19:10:27 +03:00
|
|
|
// Example: decompressed := gzip.decompress(b)!
|
2023-01-29 14:27:04 +03:00
|
|
|
pub fn decompress(data []u8, params DecompressParams) ![]u8 {
|
|
|
|
gzip_header := validate(data, params)!
|
|
|
|
header_length := gzip_header.length
|
2022-06-05 18:53:45 +03:00
|
|
|
|
2023-02-08 21:37:04 +03:00
|
|
|
decompressed := compr.decompress(data[header_length..data.len - 8], 0)!
|
2022-06-05 18:53:45 +03:00
|
|
|
length_expected := (u32(data[data.len - 4]) << 24) | (u32(data[data.len - 3]) << 16) | (u32(data[data.len - 2]) << 8) | data[data.len - 1]
|
2022-06-12 11:24:41 +03:00
|
|
|
if params.verify_length && decompressed.len != length_expected {
|
2022-11-15 16:53:13 +03:00
|
|
|
return error('length verification failed, got ${decompressed.len}, expected ${length_expected}')
|
2022-06-05 18:53:45 +03:00
|
|
|
}
|
|
|
|
checksum := crc32.sum(decompressed)
|
|
|
|
checksum_expected := (u32(data[data.len - 8]) << 24) | (u32(data[data.len - 7]) << 16) | (u32(data[data.len - 6]) << 8) | data[data.len - 5]
|
2022-06-12 11:24:41 +03:00
|
|
|
if params.verify_checksum && checksum != checksum_expected {
|
2022-06-05 18:53:45 +03:00
|
|
|
return error('checksum verification failed')
|
|
|
|
}
|
|
|
|
return decompressed
|
|
|
|
}
|