m2m/codec/
engine.rs

1//! Codec engine for automatic algorithm selection and compression.
2//!
3//! The engine analyzes content characteristics and selects the optimal
4//! compression algorithm. Can also be guided by ML inference for
5//! intelligent routing decisions.
6
7use serde_json::Value;
8
9use super::brotli::BrotliCodec;
10use super::m2m::M2MCodec;
11use super::token_native::TokenNativeCodec;
12use super::{Algorithm, CompressionResult};
13use crate::error::{M2MError, Result};
14use crate::inference::HydraModel;
15use crate::models::Encoding;
16use crate::security::SecurityScanner;
17use crate::tokenizer::count_tokens_with_encoding;
18
19/// Content characteristics for algorithm selection
20#[derive(Debug, Clone)]
21pub struct ContentAnalysis {
22    /// Content length in bytes
23    pub length: usize,
24    /// Is valid JSON
25    pub is_json: bool,
26    /// Has LLM API structure (messages, model, etc.)
27    pub is_llm_api: bool,
28    /// Repetition ratio (0.0 = unique, 1.0 = highly repetitive)
29    pub repetition_ratio: f32,
30    /// Has tool/function calls
31    pub has_tools: bool,
32    /// Estimated token count
33    pub estimated_tokens: usize,
34}
35
36impl ContentAnalysis {
37    /// Analyze content for compression characteristics
38    pub fn analyze(content: &str) -> Self {
39        let length = content.len();
40        let parsed: Option<Value> = serde_json::from_str(content).ok();
41        let is_json = parsed.is_some();
42
43        let (is_llm_api, has_tools) = if let Some(ref value) = parsed {
44            let is_api = value.get("messages").is_some()
45                || value.get("model").is_some()
46                || value.get("choices").is_some();
47            let tools = value.get("tools").is_some()
48                || value.get("tool_calls").is_some()
49                || value.get("functions").is_some();
50            (is_api, tools)
51        } else {
52            (false, false)
53        };
54
55        // Simple repetition detection
56        let repetition_ratio = Self::calculate_repetition(content);
57
58        // Rough token estimate (chars / 4 for English)
59        let estimated_tokens = length / 4;
60
61        Self {
62            length,
63            is_json,
64            is_llm_api,
65            repetition_ratio,
66            has_tools,
67            estimated_tokens,
68        }
69    }
70
71    fn calculate_repetition(content: &str) -> f32 {
72        if content.len() < 100 {
73            return 0.0;
74        }
75
76        // Count unique 4-grams
77        let mut seen = std::collections::HashSet::new();
78        let chars: Vec<char> = content.chars().collect();
79        let total = chars.len().saturating_sub(3);
80
81        if total == 0 {
82            return 0.0;
83        }
84
85        for window in chars.windows(4) {
86            let gram: String = window.iter().collect();
87            seen.insert(gram);
88        }
89
90        1.0 - (seen.len() as f32 / total as f32)
91    }
92}
93
94/// Codec engine with automatic algorithm selection
95#[derive(Clone)]
96pub struct CodecEngine {
97    /// Token-native codec instance
98    token_native: TokenNativeCodec,
99    /// M2M codec instance (default for M2M v1 wire format - 100% JSON fidelity)
100    m2m: M2MCodec,
101    /// Brotli codec instance
102    brotli: BrotliCodec,
103    /// Hydra model for ML routing (optional)
104    hydra: Option<HydraModel>,
105    /// ML routing enabled (requires inference module)
106    pub ml_routing: bool,
107    /// Minimum size for Brotli (bytes)
108    pub brotli_threshold: usize,
109    /// Prefer M2M for LLM API payloads (default: true)
110    pub prefer_m2m_for_api: bool,
111}
112
113impl Default for CodecEngine {
114    fn default() -> Self {
115        Self {
116            token_native: TokenNativeCodec::default(),
117            m2m: M2MCodec::new(),
118            brotli: BrotliCodec::new(),
119            hydra: None,
120            ml_routing: false,
121            brotli_threshold: 1024, // 1KB
122            prefer_m2m_for_api: true,
123        }
124    }
125}
126
127impl CodecEngine {
128    /// Create new codec engine
129    pub fn new() -> Self {
130        Self::default()
131    }
132
133    /// Enable ML-based routing (requires loaded model)
134    pub fn with_ml_routing(mut self, enabled: bool) -> Self {
135        self.ml_routing = enabled;
136        self
137    }
138
139    /// Set Hydra model for ML-based algorithm selection
140    pub fn with_hydra(mut self, model: HydraModel) -> Self {
141        self.hydra = Some(model);
142        self.ml_routing = true;
143        self
144    }
145
146    /// Set Brotli threshold
147    pub fn with_brotli_threshold(mut self, threshold: usize) -> Self {
148        self.brotli_threshold = threshold;
149        self
150    }
151
152    /// Set token-native encoding
153    pub fn with_encoding(mut self, encoding: Encoding) -> Self {
154        self.token_native = TokenNativeCodec::new(encoding);
155        self
156    }
157
158    /// Compress with specified algorithm and track token counts
159    ///
160    /// This method counts tokens before and after compression to provide
161    /// accurate token savings metrics. Use this when token efficiency matters
162    /// more than raw speed.
163    pub fn compress_with_tokens(
164        &self,
165        content: &str,
166        algorithm: Algorithm,
167        encoding: Encoding,
168    ) -> Result<CompressionResult> {
169        let original_tokens = count_tokens_with_encoding(content, encoding);
170        let mut result = self.compress(content, algorithm)?;
171
172        let compressed_tokens = count_tokens_with_encoding(&result.data, encoding);
173        result.original_tokens = Some(original_tokens);
174        result.compressed_tokens = Some(compressed_tokens);
175
176        Ok(result)
177    }
178
179    /// Secure compress: Run cognitive security scan before compression
180    ///
181    /// This method combines security scanning with compression:
182    /// 1. Scans plaintext for threats using Hydra/patterns
183    /// 2. If safe, compresses with optimal algorithm
184    /// 3. If threat detected and blocking enabled, returns error
185    ///
186    /// Epistemic basis:
187    /// - K: Threats exist in plaintext, not compressed form
188    /// - K: Security scan must happen before compression
189    /// - B: Combined scan provides defense in depth
190    pub fn secure_compress(
191        &self,
192        content: &str,
193        scanner: &SecurityScanner,
194    ) -> Result<CompressionResult> {
195        // 1. Security scan (cognitive security)
196        let scan_result = scanner.scan_and_validate(content)?;
197
198        if scan_result.should_block {
199            let threat_desc = scan_result
200                .threats
201                .first()
202                .map(|t| t.name.clone())
203                .unwrap_or_else(|| "unknown".to_string());
204
205            return Err(M2MError::ContentBlocked(format!(
206                "Content blocked: {} (confidence: {:.2})",
207                threat_desc, scan_result.confidence
208            )));
209        }
210
211        // 2. If safe (or not blocking), compress with optimal algorithm
212        let analysis = ContentAnalysis::analyze(content);
213        let algorithm = self.select_algorithm(&analysis);
214        self.compress(content, algorithm)
215    }
216
217    /// Secure compress with ML-based algorithm selection
218    ///
219    /// Uses Hydra for both security scanning and algorithm selection.
220    pub fn secure_compress_ml(&self, content: &str) -> Result<(CompressionResult, bool)> {
221        // Use Hydra for security if available
222        let is_safe = if let Some(ref hydra) = self.hydra {
223            let security = hydra.predict_security(content)?;
224            security.safe
225        } else {
226            // Fallback to heuristic
227            let fallback = HydraModel::fallback_only();
228            fallback.predict_security(content)?.safe
229        };
230
231        // Compress with ML-selected algorithm
232        let algorithm = self.select_algorithm_for_content(content);
233        let result = self.compress(content, algorithm)?;
234
235        Ok((result, is_safe))
236    }
237
238    /// Compress with automatic algorithm selection and token tracking
239    pub fn compress_auto_with_tokens(
240        &self,
241        content: &str,
242        encoding: Encoding,
243    ) -> Result<(CompressionResult, Algorithm)> {
244        let analysis = ContentAnalysis::analyze(content);
245        let algorithm = self.select_algorithm(&analysis);
246
247        let result = self.compress_with_tokens(content, algorithm, encoding)?;
248        Ok((result, algorithm))
249    }
250
251    /// Compress with specified algorithm
252    pub fn compress(&self, content: &str, algorithm: Algorithm) -> Result<CompressionResult> {
253        match algorithm {
254            Algorithm::None => Ok(CompressionResult::new(
255                content.to_string(),
256                Algorithm::None,
257                content.len(),
258                content.len(),
259            )),
260            Algorithm::M2M => {
261                // M2M wire format with 100% JSON fidelity
262                // Uses base64 encoding for text transport
263                let wire = self.m2m.encode_string(content)?;
264                Ok(CompressionResult::new(
265                    wire.clone(),
266                    Algorithm::M2M,
267                    content.len(),
268                    wire.len(),
269                ))
270            },
271            Algorithm::TokenNative => self.token_native.compress(content),
272            Algorithm::Brotli => self.brotli.compress(content),
273        }
274    }
275
276    /// Compress with automatic algorithm selection
277    pub fn compress_auto(&self, content: &str) -> Result<(CompressionResult, Algorithm)> {
278        let analysis = ContentAnalysis::analyze(content);
279        let algorithm = self.select_algorithm(&analysis);
280
281        let result = self.compress(content, algorithm)?;
282        Ok((result, algorithm))
283    }
284
285    /// Compress JSON value with automatic selection
286    pub fn compress_value(&self, value: &Value) -> Result<(CompressionResult, Algorithm)> {
287        let content = serde_json::to_string(value)?;
288        self.compress_auto(&content)
289    }
290
291    /// Select optimal algorithm based on content analysis
292    pub fn select_algorithm(&self, analysis: &ContentAnalysis) -> Algorithm {
293        // If ML routing is enabled and Hydra model is available, use ML
294        if self.ml_routing {
295            return self.ml_select_algorithm(analysis);
296        }
297
298        // Heuristic-based selection
299        self.heuristic_select_algorithm(analysis)
300    }
301
302    /// ML-based algorithm selection using Hydra SLM
303    fn ml_select_algorithm(&self, analysis: &ContentAnalysis) -> Algorithm {
304        // Use Hydra model if available
305        if let Some(ref hydra) = self.hydra {
306            // Hydra needs the raw content, but we only have analysis
307            // For full ML routing, we need to pass content through
308            // For now, use Hydra's heuristic which mirrors our logic
309            // but with TokenNative awareness
310            if let Ok(decision) = hydra.predict_compression("") {
311                return decision.algorithm;
312            }
313        }
314
315        // Fall back to heuristics
316        self.heuristic_select_algorithm(analysis)
317    }
318
319    /// Select algorithm with full content access (for ML routing)
320    pub fn select_algorithm_for_content(&self, content: &str) -> Algorithm {
321        // If ML routing is enabled and Hydra model is available, use ML
322        if self.ml_routing {
323            if let Some(ref hydra) = self.hydra {
324                if let Ok(decision) = hydra.predict_compression(content) {
325                    return decision.algorithm;
326                }
327            }
328        }
329
330        // Fall back to analysis-based selection
331        let analysis = ContentAnalysis::analyze(content);
332        self.heuristic_select_algorithm(&analysis)
333    }
334
335    /// Heuristic-based algorithm selection
336    ///
337    /// Epistemic basis:
338    /// - K: M2M achieves ~60-70% byte savings for LLM API JSON with 100% fidelity
339    /// - K: Brotli is optimal for large repetitive content (>1KB)
340    /// - B: M2M is best for small-medium LLM API JSON (<1KB)
341    fn heuristic_select_algorithm(&self, analysis: &ContentAnalysis) -> Algorithm {
342        // Small content: no compression (overhead not worth it)
343        // Epistemic: K - compression overhead exceeds savings
344        if analysis.length < 100 {
345            return Algorithm::None;
346        }
347
348        // Large content (>1KB): Brotli is almost always best
349        // Epistemic: K - Brotli achieves 40-60% savings on large content
350        if analysis.length > self.brotli_threshold {
351            return Algorithm::Brotli;
352        }
353
354        // Medium LLM API JSON (100-1KB): M2M compression (100% fidelity)
355        // Epistemic: K - M2M achieves ~60-70% compression with routing headers
356        if analysis.is_llm_api && self.prefer_m2m_for_api {
357            return Algorithm::M2M;
358        }
359
360        // Medium content with high repetition: Brotli
361        if analysis.repetition_ratio > 0.3 {
362            return Algorithm::Brotli;
363        }
364
365        // Default: M2M for JSON (optimal for M2M wire format), None for others
366        if analysis.is_json {
367            Algorithm::M2M
368        } else {
369            Algorithm::None
370        }
371    }
372
373    /// Decompress content (auto-detects algorithm from wire format)
374    pub fn decompress(&self, wire: &str) -> Result<String> {
375        let algorithm = super::detect_algorithm(wire).unwrap_or(Algorithm::None);
376
377        match algorithm {
378            Algorithm::None => Ok(wire.to_string()),
379            Algorithm::M2M => {
380                // M2M wire format - 100% JSON fidelity
381                self.m2m.decode_string(wire)
382            },
383            Algorithm::TokenNative => self.token_native.decompress(wire),
384            Algorithm::Brotli => self.brotli.decompress(wire),
385        }
386    }
387
388    /// Decompress to JSON value
389    pub fn decompress_value(&self, wire: &str) -> Result<Value> {
390        let json = self.decompress(wire)?;
391        serde_json::from_str(&json).map_err(|e| M2MError::Decompression(e.to_string()))
392    }
393
394    /// Try all algorithms and return best result
395    pub fn compress_best(&self, content: &str) -> Result<CompressionResult> {
396        let mut best: Option<CompressionResult> = None;
397
398        // Try each algorithm (M2M first as best for 100% fidelity)
399        for algo in [Algorithm::M2M, Algorithm::TokenNative, Algorithm::Brotli] {
400            if let Ok(result) = self.compress(content, algo) {
401                let is_better = match &best {
402                    None => true,
403                    Some(current) => result.compressed_bytes < current.compressed_bytes,
404                };
405
406                if is_better {
407                    best = Some(result);
408                }
409            }
410        }
411
412        best.ok_or_else(|| M2MError::Compression("All algorithms failed".to_string()))
413    }
414
415    /// Get analysis for content
416    pub fn analyze(&self, content: &str) -> ContentAnalysis {
417        ContentAnalysis::analyze(content)
418    }
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn test_auto_select_small() {
427        let engine = CodecEngine::new();
428        let analysis = ContentAnalysis::analyze("small");
429        assert_eq!(engine.select_algorithm(&analysis), Algorithm::None);
430    }
431
432    #[test]
433    fn test_auto_select_llm_api() {
434        let engine = CodecEngine::new();
435        // Content must be 100-1024 bytes for M2M selection
436        let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, how are you doing today? This is a longer message to test the compression algorithm selection."}]}"#;
437        let analysis = ContentAnalysis::analyze(content);
438
439        assert!(analysis.is_json);
440        assert!(analysis.is_llm_api);
441        assert!(analysis.length >= 100 && analysis.length <= 1024);
442        assert_eq!(engine.select_algorithm(&analysis), Algorithm::M2M);
443    }
444
445    #[test]
446    fn test_compress_decompress_auto() {
447        let engine = CodecEngine::new();
448        let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, how are you doing today?"}],"temperature":0.7}"#;
449
450        let (result, algo) = engine.compress_auto(content).unwrap();
451        println!("Selected algorithm: {algo:?}");
452        println!(
453            "Original: {} bytes, Compressed: {} bytes",
454            result.original_bytes, result.compressed_bytes
455        );
456
457        let decompressed = engine.decompress(&result.data).unwrap();
458        let original: Value = serde_json::from_str(content).unwrap();
459        let recovered: Value = serde_json::from_str(&decompressed).unwrap();
460
461        // Core content should match
462        assert_eq!(
463            original["messages"][0]["content"],
464            recovered["messages"][0]["content"]
465        );
466    }
467
468    #[test]
469    fn test_compress_best() {
470        let engine = CodecEngine::new();
471        let content = r#"{"model":"gpt-4o","messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"What is the capital of France?"},{"role":"assistant","content":"The capital of France is Paris."}]}"#;
472
473        let result = engine.compress_best(content).unwrap();
474        println!(
475            "Best algorithm: {:?}, ratio: {:.2}",
476            result.algorithm,
477            result.byte_ratio()
478        );
479    }
480
481    #[test]
482    fn test_content_analysis() {
483        let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"test"}],"tools":[{"type":"function"}]}"#;
484        let analysis = ContentAnalysis::analyze(content);
485
486        assert!(analysis.is_json);
487        assert!(analysis.is_llm_api);
488        assert!(analysis.has_tools);
489    }
490
491    #[test]
492    fn test_large_content_selects_brotli() {
493        let engine = CodecEngine::new();
494
495        // Create large content (>1024 bytes) - should select Brotli
496        let repeated = "hello world ".repeat(100);
497        let analysis = ContentAnalysis::analyze(&repeated);
498
499        assert!(
500            analysis.length > 1024,
501            "Content should be >1024 bytes for Brotli selection"
502        );
503        assert_eq!(engine.select_algorithm(&analysis), Algorithm::Brotli);
504    }
505
506    #[test]
507    fn test_ml_routing_with_hydra() {
508        let hydra = HydraModel::fallback_only();
509        let engine = CodecEngine::new().with_hydra(hydra);
510
511        // Should use Hydra's heuristics when ML routing is enabled
512        let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"Test message"}]}"#;
513        let algo = engine.select_algorithm_for_content(content);
514
515        // Hydra should select M2M for small LLM API content
516        assert_eq!(algo, Algorithm::M2M);
517    }
518
519    #[test]
520    fn test_token_native_roundtrip() {
521        let engine = CodecEngine::new();
522        let content = r#"{"model":"gpt-4o","messages":[{"role":"user","content":"Hello!"}]}"#;
523
524        let result = engine.compress(content, Algorithm::TokenNative).unwrap();
525        assert!(result.data.starts_with("#TK|"));
526
527        let decompressed = engine.decompress(&result.data).unwrap();
528        assert_eq!(content, decompressed);
529    }
530}