Skip to main content

agentic_codebase/format/
compression.rs

1//! LZ4 string pool compression and decompression.
2//!
3//! The string pool stores all variable-length strings (names, paths, docs)
4//! in a single contiguous block, compressed with LZ4 for compactness.
5
6use crate::types::{AcbError, AcbResult};
7
8/// Compress data using LZ4 block compression.
9///
10/// The output includes a prepended size header for safe decompression.
11pub fn compress(data: &[u8]) -> Vec<u8> {
12    lz4_flex::compress_prepend_size(data)
13}
14
15/// Decompress LZ4-compressed data with prepended size.
16///
17/// # Errors
18///
19/// Returns `AcbError::Compression` if the data is corrupt or truncated.
20pub fn decompress(data: &[u8]) -> AcbResult<Vec<u8>> {
21    lz4_flex::decompress_size_prepended(data)
22        .map_err(|e| AcbError::Compression(format!("LZ4 decompression failed: {}", e)))
23}
24
25/// A string pool builder that collects strings and records their offsets.
26///
27/// Strings are stored contiguously as UTF-8. Each string is referenced
28/// by `(offset, length)`.
29#[derive(Debug, Default)]
30pub struct StringPoolBuilder {
31    data: Vec<u8>,
32}
33
34impl StringPoolBuilder {
35    /// Create a new empty string pool builder.
36    pub fn new() -> Self {
37        Self { data: Vec::new() }
38    }
39
40    /// Add a string to the pool and return its (offset, length).
41    pub fn add(&mut self, s: &str) -> (u32, u16) {
42        let offset = self.data.len() as u32;
43        let len = s.len() as u16;
44        self.data.extend_from_slice(s.as_bytes());
45        (offset, len)
46    }
47
48    /// Return the uncompressed data.
49    pub fn data(&self) -> &[u8] {
50        &self.data
51    }
52
53    /// Return the uncompressed size.
54    pub fn uncompressed_size(&self) -> usize {
55        self.data.len()
56    }
57
58    /// Compress and return the pool data.
59    pub fn compress(&self) -> Vec<u8> {
60        compress(&self.data)
61    }
62}
63
64/// A read-only string pool backed by decompressed data.
65#[derive(Debug, Clone)]
66pub struct StringPool {
67    data: Vec<u8>,
68}
69
70impl StringPool {
71    /// Create a string pool from decompressed data.
72    pub fn from_data(data: Vec<u8>) -> Self {
73        Self { data }
74    }
75
76    /// Create a string pool by decompressing LZ4 data.
77    pub fn from_compressed(compressed: &[u8]) -> AcbResult<Self> {
78        let data = decompress(compressed)?;
79        Ok(Self { data })
80    }
81
82    /// Get a string by offset and length.
83    ///
84    /// # Errors
85    ///
86    /// Returns `AcbError::Corrupt` if the range is out of bounds or not valid UTF-8.
87    pub fn get(&self, offset: u32, len: u16) -> AcbResult<&str> {
88        let start = offset as usize;
89        let end = start + len as usize;
90        if end > self.data.len() {
91            return Err(AcbError::Corrupt(offset as u64));
92        }
93        std::str::from_utf8(&self.data[start..end]).map_err(|_| AcbError::Corrupt(offset as u64))
94    }
95
96    /// Total size of the decompressed pool.
97    pub fn len(&self) -> usize {
98        self.data.len()
99    }
100
101    /// Returns true if the pool is empty.
102    pub fn is_empty(&self) -> bool {
103        self.data.is_empty()
104    }
105}