bytepunch_rs/lib.rs
1//! # Byte Punch Compression
2//!
3//! Profile-aware semantic tokenization for CML documents and other structured content.
4//!
5//! Byte Punch achieves 40-70% compression by replacing common patterns with
6//! fixed-size tokens:
7//! - 2-byte tokens: Reserved words (e.g., "shall" → 0x2001)
8//! - 4-byte tokens: Common terms (e.g., "Congress" → 0x40000001)
9//! - 8-byte tokens: Phrases (e.g., "We the People" → 0x8000000000000001)
10//!
11//! ## Compression Goals by Profile
12//!
13//! - **Legal**: 60-70% (highest due to boilerplate repetition)
14//! - **Code**: 55-65% (method names, type signatures)
15//! - **Bookstack**: 50-60% (Markdown syntax, headings)
16//!
17//! ## Key Properties
18//!
19//! - **Predictable**: Same input → same output, always
20//! - **Bidirectional**: Perfect decompression, no data loss
21//! - **Profile-aware**: Uses domain-specific dictionaries
22//! - **Fast**: Simple byte replacement, no entropy encoding
23
24pub mod compressor;
25pub mod decompressor;
26pub mod dictionary;
27pub mod error;
28
29pub use compressor::Compressor;
30pub use decompressor::Decompressor;
31pub use dictionary::Dictionary;
32pub use error::{BytePunchError, Result};
33
34/// Compression statistics for a document
35#[derive(Debug, Clone, PartialEq)]
36pub struct CompressionStats {
37 /// Original size in bytes
38 pub original_size: usize,
39 /// Compressed size in bytes
40 pub compressed_size: usize,
41 /// Compression ratio (compressed / original)
42 pub ratio: f64,
43 /// Number of 2-byte tokens replaced
44 pub two_byte_tokens: usize,
45 /// Number of 4-byte tokens replaced
46 pub four_byte_tokens: usize,
47 /// Number of 8-byte tokens replaced
48 pub eight_byte_tokens: usize,
49}
50
51impl CompressionStats {
52 /// Create new compression stats
53 pub fn new(original_size: usize, compressed_size: usize) -> Self {
54 Self {
55 original_size,
56 compressed_size,
57 ratio: compressed_size as f64 / original_size as f64,
58 two_byte_tokens: 0,
59 four_byte_tokens: 0,
60 eight_byte_tokens: 0,
61 }
62 }
63
64 /// Calculate percentage saved
65 pub fn percentage_saved(&self) -> f64 {
66 (1.0 - self.ratio) * 100.0
67 }
68}
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73
74 #[test]
75 fn test_compression_stats() {
76 let stats = CompressionStats::new(1000, 400);
77 assert_eq!(stats.ratio, 0.4);
78 assert_eq!(stats.percentage_saved(), 60.0);
79 }
80}