bytepunch_rs/
lib.rs

1//! # Byte Punch Compression
2//!
3//! Profile-aware semantic tokenization for CML documents and other structured content.
4//!
5//! Byte Punch achieves 40-70% compression by replacing common patterns with
6//! fixed-size tokens:
7//! - 2-byte tokens: Reserved words (e.g., "shall" → 0x2001)
8//! - 4-byte tokens: Common terms (e.g., "Congress" → 0x40000001)
9//! - 8-byte tokens: Phrases (e.g., "We the People" → 0x8000000000000001)
10//!
11//! ## Compression Goals by Profile
12//!
13//! - **Legal**: 60-70% (highest due to boilerplate repetition)
14//! - **Code**: 55-65% (method names, type signatures)
15//! - **Bookstack**: 50-60% (Markdown syntax, headings)
16//!
17//! ## Key Properties
18//!
19//! - **Predictable**: Same input → same output, always
20//! - **Bidirectional**: Perfect decompression, no data loss
21//! - **Profile-aware**: Uses domain-specific dictionaries
22//! - **Fast**: Simple byte replacement, no entropy encoding
23
24pub mod compressor;
25pub mod decompressor;
26pub mod dictionary;
27pub mod error;
28
29pub use compressor::Compressor;
30pub use decompressor::Decompressor;
31pub use dictionary::Dictionary;
32pub use error::{BytePunchError, Result};
33
34/// Compression statistics for a document
35#[derive(Debug, Clone, PartialEq)]
36pub struct CompressionStats {
37    /// Original size in bytes
38    pub original_size: usize,
39    /// Compressed size in bytes
40    pub compressed_size: usize,
41    /// Compression ratio (compressed / original)
42    pub ratio: f64,
43    /// Number of 2-byte tokens replaced
44    pub two_byte_tokens: usize,
45    /// Number of 4-byte tokens replaced
46    pub four_byte_tokens: usize,
47    /// Number of 8-byte tokens replaced
48    pub eight_byte_tokens: usize,
49}
50
51impl CompressionStats {
52    /// Create new compression stats
53    pub fn new(original_size: usize, compressed_size: usize) -> Self {
54        Self {
55            original_size,
56            compressed_size,
57            ratio: compressed_size as f64 / original_size as f64,
58            two_byte_tokens: 0,
59            four_byte_tokens: 0,
60            eight_byte_tokens: 0,
61        }
62    }
63
64    /// Calculate percentage saved
65    pub fn percentage_saved(&self) -> f64 {
66        (1.0 - self.ratio) * 100.0
67    }
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn test_compression_stats() {
76        let stats = CompressionStats::new(1000, 400);
77        assert_eq!(stats.ratio, 0.4);
78        assert_eq!(stats.percentage_saved(), 60.0);
79    }
80}