Skip to main content

sqz_engine/
confidence_router.rs

1/// Confidence-driven compression mode router.
2///
3/// Analyzes input content and selects the appropriate compression aggressiveness:
4/// - High-risk content (stack traces, configs, migrations) → safe preset
5/// - Low-entropy repetitive content → aggressive preset
6/// - Normal content → default preset
7///
8/// Based on entropy analysis and content pattern detection.
9
10use crate::entropy_analyzer::EntropyAnalyzer;
11
12/// The compression mode selected by the router.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum CompressionMode {
15    /// Safe mode: minimal compression, preserve all structure.
16    /// Used for stack traces, configs, migration files, legal text.
17    Safe,
18    /// Default mode: balanced compression.
19    Default,
20    /// Aggressive mode: maximum compression.
21    /// Used for repetitive/boilerplate content with low information density.
22    Aggressive,
23}
24
25impl CompressionMode {
26    pub fn as_str(&self) -> &'static str {
27        match self {
28            Self::Safe => "safe",
29            Self::Default => "default",
30            Self::Aggressive => "aggressive",
31        }
32    }
33}
34
35/// Routes content to the appropriate compression mode.
36pub struct ConfidenceRouter {
37    entropy_analyzer: EntropyAnalyzer,
38}
39
40impl Default for ConfidenceRouter {
41    fn default() -> Self {
42        Self::new()
43    }
44}
45
46impl ConfidenceRouter {
47    pub fn new() -> Self {
48        Self {
49            entropy_analyzer: EntropyAnalyzer::new(),
50        }
51    }
52
53    /// Analyze content and return the recommended compression mode.
54    pub fn route(&self, content: &str) -> CompressionMode {
55        if content.len() < 100 {
56            return CompressionMode::Default;
57        }
58
59        // Check for high-risk content patterns first (always → Safe)
60        if self.is_high_risk(content) {
61            return CompressionMode::Safe;
62        }
63
64        // Use entropy to distinguish repetitive from normal content
65        let blocks = self.entropy_analyzer.analyze(content);
66        if blocks.is_empty() {
67            return CompressionMode::Default;
68        }
69
70        let avg_entropy: f64 = blocks.iter().map(|b| b.entropy).sum::<f64>() / blocks.len() as f64;
71
72        // Very low average entropy → mostly boilerplate → aggressive
73        if avg_entropy < 2.5 {
74            return CompressionMode::Aggressive;
75        }
76
77        // High average entropy → information-dense → safe
78        if avg_entropy > 4.5 {
79            return CompressionMode::Safe;
80        }
81
82        CompressionMode::Default
83    }
84
85    /// Returns true if content contains high-risk patterns that should
86    /// never be aggressively compressed.
87    fn is_high_risk(&self, content: &str) -> bool {
88        let lower = content.to_lowercase();
89
90        // Stack traces
91        if lower.contains("stack trace") || lower.contains("traceback")
92            || lower.contains("at line ") || lower.contains("panicked at")
93        {
94            return true;
95        }
96
97        // Database migrations
98        if lower.contains("alter table") || lower.contains("create table")
99            || lower.contains("drop table") || lower.contains("migration")
100        {
101            return true;
102        }
103
104        // Security/auth configs
105        if lower.contains("private_key") || lower.contains("secret_key")
106            || lower.contains("api_key") || lower.contains("password")
107            || lower.contains("-----begin") // PEM headers
108        {
109            return true;
110        }
111
112        // Legal/compliance text
113        if lower.contains("terms of service") || lower.contains("privacy policy")
114            || lower.contains("license agreement") || lower.contains("gdpr")
115        {
116            return true;
117        }
118
119        // Kubernetes/infrastructure configs with critical fields
120        if lower.contains("apiversion:") && lower.contains("kind:")
121            && (lower.contains("secret") || lower.contains("configmap"))
122        {
123            return true;
124        }
125
126        false
127    }
128}
129
130// ---------------------------------------------------------------------------
131// Tests
132// ---------------------------------------------------------------------------
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn routes_stack_trace_to_safe() {
140        let router = ConfidenceRouter::new();
141        let trace = "thread 'main' panicked at 'index out of bounds', src/main.rs:42\nstack trace:\n  0: std::panicking::begin_panic\n  1: myapp::process\n  2: main";
142        assert_eq!(router.route(trace), CompressionMode::Safe);
143    }
144
145    #[test]
146    fn routes_migration_to_safe() {
147        let router = ConfidenceRouter::new();
148        let migration = "ALTER TABLE users ADD COLUMN email VARCHAR(255) NOT NULL;\nCREATE TABLE sessions (id UUID PRIMARY KEY, user_id INT REFERENCES users(id));";
149        assert_eq!(router.route(migration), CompressionMode::Safe);
150    }
151
152    #[test]
153    fn routes_repetitive_logs_to_aggressive() {
154        let router = ConfidenceRouter::new();
155        // Very repetitive content → low entropy → aggressive
156        let logs = "// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n// comment\n";
157        let mode = router.route(logs);
158        // Should be aggressive or default (low entropy)
159        assert!(mode == CompressionMode::Aggressive || mode == CompressionMode::Default);
160    }
161
162    #[test]
163    fn routes_normal_code_to_default() {
164        let router = ConfidenceRouter::new();
165        let code = "fn process(items: &[Item]) -> Result<Vec<Output>, Error> {\n    let mut results = Vec::new();\n    for item in items {\n        let output = transform(item)?;\n        results.push(output);\n    }\n    Ok(results)\n}";
166        let mode = router.route(code);
167        assert!(mode == CompressionMode::Default || mode == CompressionMode::Safe);
168    }
169
170    #[test]
171    fn short_content_is_default() {
172        let router = ConfidenceRouter::new();
173        assert_eq!(router.route("hello"), CompressionMode::Default);
174    }
175
176    #[test]
177    fn pem_key_routes_to_safe() {
178        let router = ConfidenceRouter::new();
179        // PEM key must be > 100 chars to trigger routing
180        let key = "-----BEGIN RSA PRIVATE KEY-----\nMIIEowIBAAKCAQEA0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP\nQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz==\n-----END RSA PRIVATE KEY-----\n";
181        assert_eq!(router.route(key), CompressionMode::Safe);
182    }
183}