compression_prompt/
compressor.rs

1//! Main compression pipeline and result structures.
2
3use crate::statistical_filter::{StatisticalFilter, StatisticalFilterConfig};
4use serde::{Deserialize, Serialize};
5use thiserror::Error;
6
7/// Output format for compression result.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum OutputFormat {
10    /// Plain text output (default).
11    Text,
12    /// PNG image output (1024x1024 monospace).
13    Image,
14}
15
16/// Compression errors.
17#[derive(Error, Debug)]
18pub enum CompressionError {
19    /// Compression would increase token count.
20    #[error("Compression ratio {0:.2} < 1.0, would increase tokens")]
21    NegativeGain(f32),
22
23    /// Input too short to compress.
24    #[error("Input too short ({0} tokens), minimum is {1}")]
25    InputTooShort(usize, usize),
26}
27
28/// Configuration for the compressor.
29#[derive(Debug, Clone)]
30pub struct CompressorConfig {
31    /// Target compression ratio (default: 0.5 = 50% of original size).
32    pub target_ratio: f32,
33
34    /// Minimum input tokens to attempt compression (default: 100).
35    pub min_input_tokens: usize,
36
37    /// Minimum input bytes to attempt compression (default: 1024).
38    pub min_input_bytes: usize,
39}
40
41impl Default for CompressorConfig {
42    fn default() -> Self {
43        Self {
44            target_ratio: 0.5,
45            min_input_tokens: 100,
46            min_input_bytes: 1024,
47        }
48    }
49}
50
51/// Result of compression operation.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct CompressionResult {
54    /// The compressed text (always included).
55    pub compressed: String,
56
57    /// Optional image output (PNG bytes).
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub image_data: Option<Vec<u8>>,
60
61    /// Output format used.
62    pub format: OutputFormat,
63
64    /// Original token count.
65    pub original_tokens: usize,
66
67    /// Compressed token count.
68    pub compressed_tokens: usize,
69
70    /// Compression ratio (compressed/original).
71    pub compression_ratio: f32,
72
73    /// Number of tokens removed.
74    pub tokens_removed: usize,
75}
76
77/// Main compressor.
78#[derive(Debug)]
79pub struct Compressor {
80    /// Compression configuration.
81    pub config: CompressorConfig,
82    /// Statistical filter instance
83    filter: StatisticalFilter,
84}
85
86impl Compressor {
87    /// Create a new compressor with given configuration.
88    pub fn new(config: CompressorConfig) -> Self {
89        let filter_config = StatisticalFilterConfig {
90            compression_ratio: config.target_ratio,
91            ..Default::default()
92        };
93        let filter = StatisticalFilter::new(filter_config);
94        Self { config, filter }
95    }
96
97    /// Create a new compressor with custom statistical filter configuration.
98    pub fn with_filter_config(
99        config: CompressorConfig,
100        filter_config: StatisticalFilterConfig,
101    ) -> Self {
102        let filter = StatisticalFilter::new(filter_config);
103        Self { config, filter }
104    }
105
106    /// Compress input text using statistical filtering.
107    ///
108    /// Returns an error if compression would be counterproductive.
109    pub fn compress(&self, input: &str) -> Result<CompressionResult, CompressionError> {
110        self.compress_with_format(input, OutputFormat::Text)
111    }
112
113    /// Compress input text with specified output format.
114    ///
115    /// # Arguments
116    ///
117    /// * `input` - The text to compress
118    /// * `format` - Output format (Text or Image)
119    ///
120    /// # Returns
121    ///
122    /// CompressionResult with compressed text and optional image data.
123    ///
124    /// # Errors
125    ///
126    /// Returns `CompressionError` if:
127    /// - Input is too short (< min_input_bytes or < min_input_tokens)
128    /// - Compression would increase size (ratio >= 1.0)
129    pub fn compress_with_format(
130        &self,
131        input: &str,
132        format: OutputFormat,
133    ) -> Result<CompressionResult, CompressionError> {
134        // Step 1: Check input size (bytes)
135        let input_bytes = input.len();
136        if input_bytes < self.config.min_input_bytes {
137            return Err(CompressionError::InputTooShort(
138                input_bytes,
139                self.config.min_input_bytes,
140            ));
141        }
142
143        // Step 2: Estimate tokens (using char count / 4 as rough estimate)
144        let original_tokens = input.chars().count() / 4;
145        if original_tokens < self.config.min_input_tokens {
146            return Err(CompressionError::InputTooShort(
147                original_tokens,
148                self.config.min_input_tokens,
149            ));
150        }
151
152        // Step 3: Apply statistical filtering
153        let compressed = self.filter.compress(input);
154
155        // Step 4: Validate compression ratio
156        let compressed_tokens = compressed.chars().count() / 4;
157        let compression_ratio = compressed_tokens as f32 / original_tokens as f32;
158
159        if compression_ratio >= 1.0 {
160            return Err(CompressionError::NegativeGain(compression_ratio));
161        }
162
163        let tokens_removed = original_tokens.saturating_sub(compressed_tokens);
164
165        // Step 5: Generate image if requested
166        let image_data = if format == OutputFormat::Image {
167            #[cfg(feature = "image")]
168            {
169                use crate::image_renderer::ImageRenderer;
170                let renderer = ImageRenderer::default();
171                match renderer.render_to_png(&compressed) {
172                    Ok(data) => Some(data),
173                    Err(_) => None, // Fallback: no image on error
174                }
175            }
176            #[cfg(not(feature = "image"))]
177            {
178                None // Image feature not enabled
179            }
180        } else {
181            None
182        };
183
184        Ok(CompressionResult {
185            compressed,
186            image_data,
187            format,
188            original_tokens,
189            compressed_tokens,
190            compression_ratio,
191            tokens_removed,
192        })
193    }
194}
195
196impl Default for Compressor {
197    fn default() -> Self {
198        Self::new(CompressorConfig::default())
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_compression_too_short() {
208        let compressor = Compressor::default();
209        let input = "short text";
210
211        let result = compressor.compress(input);
212        assert!(matches!(result, Err(CompressionError::InputTooShort(_, _))));
213    }
214
215    #[test]
216    fn test_compression_min_bytes() {
217        let compressor = Compressor::default();
218
219        // Create input < 1024 bytes but > 100 tokens
220        let mut input = String::new();
221        for _ in 0..200 {
222            input.push_str("ab ");
223        }
224
225        // Should fail due to min_input_bytes
226        assert!(input.len() < 1024);
227        let result = compressor.compress(&input);
228        assert!(matches!(result, Err(CompressionError::InputTooShort(_, _))));
229    }
230
231    #[test]
232    fn test_compression_min_tokens() {
233        let config = CompressorConfig {
234            min_input_bytes: 10, // Lower byte requirement
235            min_input_tokens: 500,
236            ..Default::default()
237        };
238        let compressor = Compressor::new(config);
239
240        // Create input > 10 bytes but < 500 tokens
241        let input = "some short text with few tokens";
242
243        // Should fail due to min_input_tokens
244        let result = compressor.compress(input);
245        assert!(matches!(result, Err(CompressionError::InputTooShort(_, _))));
246    }
247}