m2m/codec/
algorithm.rs

1//! Compression algorithm types and results.
2
3use serde::{Deserialize, Serialize};
4
5/// Available compression algorithms
6///
7/// M2M Protocol v0.4.0 supports three compression algorithms:
8/// - **M2M**: Default, 100% JSON fidelity with extracted routing headers
9/// - **TokenNative**: Token ID transmission for maximum compression  
10/// - **Brotli**: High-ratio compression for large content (>1KB)
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
12#[serde(rename_all = "lowercase")]
13pub enum Algorithm {
14    /// No compression (passthrough)
15    None,
16    /// M2M Wire Format v1 (default, 100% JSON fidelity)
17    ///
18    /// Binary protocol with header extraction for routing and Brotli-compressed
19    /// JSON payload. Provides 100% fidelity reconstruction of original JSON.
20    ///
21    /// Wire format: `#M2M|1|<fixed_header><routing_header><payload>`
22    #[default]
23    M2M,
24    /// Token-native compression (transmit token IDs directly)
25    ///
26    /// This algorithm tokenizes content using the negotiated tokenizer and
27    /// transmits token IDs with VarInt encoding. Achieves 50-60% compression
28    /// by leveraging the tokenizer as the compression dictionary.
29    ///
30    /// Wire format: `#TK|<tokenizer_id>|<varint_encoded_tokens>`
31    TokenNative,
32    /// Brotli compression (high ratio, base64 encoded)
33    ///
34    /// Best for large content (>1KB) with repetitive patterns.
35    /// Achieves 60-80% compression.
36    ///
37    /// Wire format: `#M2M[v3.0]|DATA:<base64_brotli>`
38    Brotli,
39}
40
41impl Algorithm {
42    /// Get the wire format prefix for this algorithm
43    pub fn prefix(&self) -> &'static str {
44        match self {
45            Algorithm::None => "",
46            Algorithm::M2M => "#M2M|1|",
47            Algorithm::TokenNative => "#TK|",
48            Algorithm::Brotli => "#M2M[v3.0]|DATA:",
49        }
50    }
51
52    /// Parse algorithm from wire format
53    pub fn from_prefix(content: &str) -> Option<Self> {
54        if content.starts_with("#M2M|1|") {
55            Some(Algorithm::M2M)
56        } else if content.starts_with("#TK|") {
57            Some(Algorithm::TokenNative)
58        } else if content.starts_with("#M2M[v3.0]|") {
59            Some(Algorithm::Brotli)
60        } else {
61            None
62        }
63    }
64
65    /// Get human-readable name
66    pub fn name(&self) -> &'static str {
67        match self {
68            Algorithm::None => "NONE",
69            Algorithm::M2M => "M2M",
70            Algorithm::TokenNative => "TOKEN_NATIVE",
71            Algorithm::Brotli => "BROTLI",
72        }
73    }
74
75    /// Get all available algorithms in preference order
76    pub fn all() -> &'static [Algorithm] {
77        &[
78            Algorithm::M2M,
79            Algorithm::TokenNative,
80            Algorithm::Brotli,
81            Algorithm::None,
82        ]
83    }
84}
85
86impl std::fmt::Display for Algorithm {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        write!(f, "{}", self.name())
89    }
90}
91
92/// Result of compression operation
93#[derive(Debug, Clone)]
94pub struct CompressionResult {
95    /// Compressed data (wire format)
96    pub data: String,
97    /// Algorithm used
98    pub algorithm: Algorithm,
99    /// Original size in bytes
100    pub original_bytes: usize,
101    /// Compressed size in bytes
102    pub compressed_bytes: usize,
103    /// Original token count (if available)
104    pub original_tokens: Option<usize>,
105    /// Compressed token count (if available)
106    pub compressed_tokens: Option<usize>,
107}
108
109impl CompressionResult {
110    /// Create new compression result
111    pub fn new(
112        data: String,
113        algorithm: Algorithm,
114        original_bytes: usize,
115        compressed_bytes: usize,
116    ) -> Self {
117        Self {
118            data,
119            algorithm,
120            original_bytes,
121            compressed_bytes,
122            original_tokens: None,
123            compressed_tokens: None,
124        }
125    }
126
127    /// Set token counts
128    pub fn with_tokens(mut self, original: usize, compressed: usize) -> Self {
129        self.original_tokens = Some(original);
130        self.compressed_tokens = Some(compressed);
131        self
132    }
133
134    /// Calculate byte compression ratio
135    pub fn byte_ratio(&self) -> f64 {
136        if self.compressed_bytes == 0 {
137            0.0
138        } else {
139            self.original_bytes as f64 / self.compressed_bytes as f64
140        }
141    }
142
143    /// Calculate token savings percentage
144    pub fn token_savings_percent(&self) -> Option<f64> {
145        match (self.original_tokens, self.compressed_tokens) {
146            (Some(orig), Some(comp)) if orig > 0 => {
147                Some((orig as f64 - comp as f64) / orig as f64 * 100.0)
148            },
149            _ => None,
150        }
151    }
152
153    /// Check if compression was beneficial
154    pub fn is_beneficial(&self) -> bool {
155        match (self.original_tokens, self.compressed_tokens) {
156            (Some(orig), Some(comp)) => comp < orig,
157            _ => self.compressed_bytes < self.original_bytes,
158        }
159    }
160}