use crate::error::{NtfsError, Result};
const MAX_OUTPUT: usize = 1 << 30;
const CHUNK_MAX: usize = 4096;
pub fn decompress(input: &[u8]) -> Result<Vec<u8>> {
let mut out = Vec::new();
let mut pos = 0;
while pos + 2 <= input.len() {
let header = u16::from_le_bytes([input[pos], input[pos + 1]]);
pos += 2;
if header == 0 {
break; }
let chunk_size = (header & 0x0FFF) as usize + 1;
let is_compressed = header & 0x8000 != 0;
let chunk = input
.get(pos..pos + chunk_size)
.ok_or(NtfsError::BadCompression("chunk extends past input"))?;
pos += chunk_size;
if is_compressed {
decompress_chunk(chunk, &mut out)?;
} else {
grow(&mut out, chunk.len())?;
let end = out.len();
out[end - chunk.len()..].copy_from_slice(chunk);
}
}
Ok(out)
}
fn decompress_chunk(chunk: &[u8], out: &mut Vec<u8>) -> Result<()> {
let chunk_start = out.len();
let mut i = 0;
while i < chunk.len() {
let flags = chunk[i];
i += 1;
for bit in 0..8 {
if i >= chunk.len() {
break;
}
if flags & (1 << bit) == 0 {
grow(out, 1)?;
let end = out.len();
out[end - 1] = chunk[i];
i += 1;
} else {
let token_bytes = chunk
.get(i..i + 2)
.ok_or(NtfsError::BadCompression("truncated back-reference"))?;
let token = u16::from_le_bytes([token_bytes[0], token_bytes[1]]);
i += 2;
let produced = out.len() - chunk_start;
if produced == 0 {
return Err(NtfsError::BadCompression("back-reference at chunk start"));
}
if produced > CHUNK_MAX {
return Err(NtfsError::BadCompression("chunk decodes past 4 KiB window"));
}
let mut length_bits = 4u32;
let mut threshold = 0x10usize;
while produced >= threshold {
length_bits += 1;
threshold <<= 1;
}
let length_mask = (1u16 << length_bits) - 1;
let length = (token & length_mask) as usize + 3;
let offset = (token >> length_bits) as usize + 1;
if offset > produced {
return Err(NtfsError::BadCompression(
"back-reference before chunk start",
));
}
let src = out.len() - offset;
grow(out, length)?;
for k in 0..length {
let b = out[src + k]; let idx = out.len() - length + k;
out[idx] = b;
}
}
}
}
Ok(())
}
fn grow(out: &mut Vec<u8>, n: usize) -> Result<()> {
let new_len = out
.len()
.checked_add(n)
.filter(|&l| l <= MAX_OUTPUT)
.ok_or(NtfsError::BadCompression("output exceeds ceiling"))?;
out.resize(new_len, 0);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn compressed_chunk(body: &[u8]) -> Vec<u8> {
let mut v = Vec::new();
let header = 0x8000u16 | (3 << 12) | ((body.len() - 1) as u16);
v.extend_from_slice(&header.to_le_bytes());
v.extend_from_slice(body);
v.extend_from_slice(&0u16.to_le_bytes()); v
}
fn uncompressed_chunk(body: &[u8]) -> Vec<u8> {
let mut v = Vec::new();
let header = (3u16 << 12) | ((body.len() - 1) as u16); v.extend_from_slice(&header.to_le_bytes());
v.extend_from_slice(body);
v.extend_from_slice(&0u16.to_le_bytes());
v
}
#[test]
fn decompresses_uncompressed_chunk() {
let stream = uncompressed_chunk(b"verbatim bytes");
assert_eq!(decompress(&stream).unwrap(), b"verbatim bytes");
}
#[test]
fn decompresses_all_literals() {
let body = [0x00, b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h'];
let out = decompress(&compressed_chunk(&body)).unwrap();
assert_eq!(out, b"abcdefgh");
}
#[test]
#[allow(clippy::identity_op)] fn decompresses_back_reference() {
let token: u16 = ((3 - 1) << 4) | (3 - 3); let tb = token.to_le_bytes();
let body = [0x08, b'a', b'b', b'c', tb[0], tb[1]];
let out = decompress(&compressed_chunk(&body)).unwrap();
assert_eq!(out, b"abcabc");
}
#[test]
#[allow(clippy::identity_op)] fn decompresses_run_length() {
let token: u16 = ((1 - 1) << 4) | (10 - 3); let tb = token.to_le_bytes();
let body = [0x02, b'x', tb[0], tb[1]]; let out = decompress(&compressed_chunk(&body)).unwrap();
assert_eq!(out, b"xxxxxxxxxxx"); }
#[test]
fn empty_input_yields_empty() {
assert!(decompress(&[]).unwrap().is_empty());
assert!(decompress(&0u16.to_le_bytes()).unwrap().is_empty());
}
#[test]
fn rejects_back_reference_at_chunk_start() {
let body = [0x01u8, 0x00, 0x00]; assert!(matches!(
decompress(&compressed_chunk(&body)),
Err(NtfsError::BadCompression(_))
));
}
#[test]
fn fuzzer_crash_no_chunk_overflow() {
let crash = [
0x4a, 0xc0, 0x36, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x1a, 0x00, 0x4a,
0xc0, 0x36, 0x36, 0x09, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x0a, 0x00, 0x1a, 0x00, 0x4a, 0xc0, 0x36, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0x01, 0x00, 0x41, 0xff, 0x00, 0x40, 0x00, 0x00, 0x40, 0x00, 0x00,
0x40, 0x01, 0xff, 0x51, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff,
];
let _ = decompress(&crash);
}
#[test]
fn rejects_chunk_decoding_past_4096() {
let mut body = vec![0x01u8, b'x']; body[0] = 0x02; let token: u16 = 0x07; body.extend_from_slice(&token.to_le_bytes());
let mut group = vec![0xFFu8]; for _ in 0..8 {
group.extend_from_slice(&token.to_le_bytes());
}
for _ in 0..600 {
body.extend_from_slice(&group);
}
let mut stream = (0x8000u16 | (3 << 12) | ((body.len() - 1) as u16))
.to_le_bytes()
.to_vec();
stream.extend_from_slice(&body);
stream.extend_from_slice(&0u16.to_le_bytes());
assert!(matches!(
decompress(&stream),
Err(NtfsError::BadCompression(_))
));
}
#[test]
fn rejects_truncated_chunk() {
let header = 0x8000u16 | (3 << 12) | 9;
let mut stream = header.to_le_bytes().to_vec();
stream.extend_from_slice(&[0x00, b'a']); assert!(matches!(
decompress(&stream),
Err(NtfsError::BadCompression(_))
));
}
}