compression_prompt/
compressor.rs1use crate::statistical_filter::{StatisticalFilter, StatisticalFilterConfig};
4use serde::{Deserialize, Serialize};
5use thiserror::Error;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum OutputFormat {
10 Text,
12 Image,
14}
15
16#[derive(Error, Debug)]
18pub enum CompressionError {
19 #[error("Compression ratio {0:.2} < 1.0, would increase tokens")]
21 NegativeGain(f32),
22
23 #[error("Input too short ({0} tokens), minimum is {1}")]
25 InputTooShort(usize, usize),
26}
27
28#[derive(Debug, Clone)]
30pub struct CompressorConfig {
31 pub target_ratio: f32,
33
34 pub min_input_tokens: usize,
36
37 pub min_input_bytes: usize,
39}
40
41impl Default for CompressorConfig {
42 fn default() -> Self {
43 Self {
44 target_ratio: 0.5,
45 min_input_tokens: 100,
46 min_input_bytes: 1024,
47 }
48 }
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct CompressionResult {
54 pub compressed: String,
56
57 #[serde(skip_serializing_if = "Option::is_none")]
59 pub image_data: Option<Vec<u8>>,
60
61 pub format: OutputFormat,
63
64 pub original_tokens: usize,
66
67 pub compressed_tokens: usize,
69
70 pub compression_ratio: f32,
72
73 pub tokens_removed: usize,
75}
76
77#[derive(Debug)]
79pub struct Compressor {
80 pub config: CompressorConfig,
82 filter: StatisticalFilter,
84}
85
86impl Compressor {
87 pub fn new(config: CompressorConfig) -> Self {
89 let filter_config = StatisticalFilterConfig {
90 compression_ratio: config.target_ratio,
91 ..Default::default()
92 };
93 let filter = StatisticalFilter::new(filter_config);
94 Self { config, filter }
95 }
96
97 pub fn with_filter_config(
99 config: CompressorConfig,
100 filter_config: StatisticalFilterConfig,
101 ) -> Self {
102 let filter = StatisticalFilter::new(filter_config);
103 Self { config, filter }
104 }
105
106 pub fn compress(&self, input: &str) -> Result<CompressionResult, CompressionError> {
110 self.compress_with_format(input, OutputFormat::Text)
111 }
112
113 pub fn compress_with_format(
130 &self,
131 input: &str,
132 format: OutputFormat,
133 ) -> Result<CompressionResult, CompressionError> {
134 let input_bytes = input.len();
136 if input_bytes < self.config.min_input_bytes {
137 return Err(CompressionError::InputTooShort(
138 input_bytes,
139 self.config.min_input_bytes,
140 ));
141 }
142
143 let original_tokens = input.chars().count() / 4;
145 if original_tokens < self.config.min_input_tokens {
146 return Err(CompressionError::InputTooShort(
147 original_tokens,
148 self.config.min_input_tokens,
149 ));
150 }
151
152 let compressed = self.filter.compress(input);
154
155 let compressed_tokens = compressed.chars().count() / 4;
157 let compression_ratio = compressed_tokens as f32 / original_tokens as f32;
158
159 if compression_ratio >= 1.0 {
160 return Err(CompressionError::NegativeGain(compression_ratio));
161 }
162
163 let tokens_removed = original_tokens.saturating_sub(compressed_tokens);
164
165 let image_data = if format == OutputFormat::Image {
167 #[cfg(feature = "image")]
168 {
169 use crate::image_renderer::ImageRenderer;
170 let renderer = ImageRenderer::default();
171 match renderer.render_to_png(&compressed) {
172 Ok(data) => Some(data),
173 Err(_) => None, }
175 }
176 #[cfg(not(feature = "image"))]
177 {
178 None }
180 } else {
181 None
182 };
183
184 Ok(CompressionResult {
185 compressed,
186 image_data,
187 format,
188 original_tokens,
189 compressed_tokens,
190 compression_ratio,
191 tokens_removed,
192 })
193 }
194}
195
196impl Default for Compressor {
197 fn default() -> Self {
198 Self::new(CompressorConfig::default())
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn test_compression_too_short() {
208 let compressor = Compressor::default();
209 let input = "short text";
210
211 let result = compressor.compress(input);
212 assert!(matches!(result, Err(CompressionError::InputTooShort(_, _))));
213 }
214
215 #[test]
216 fn test_compression_min_bytes() {
217 let compressor = Compressor::default();
218
219 let mut input = String::new();
221 for _ in 0..200 {
222 input.push_str("ab ");
223 }
224
225 assert!(input.len() < 1024);
227 let result = compressor.compress(&input);
228 assert!(matches!(result, Err(CompressionError::InputTooShort(_, _))));
229 }
230
231 #[test]
232 fn test_compression_min_tokens() {
233 let config = CompressorConfig {
234 min_input_bytes: 10, min_input_tokens: 500,
236 ..Default::default()
237 };
238 let compressor = Compressor::new(config);
239
240 let input = "some short text with few tokens";
242
243 let result = compressor.compress(input);
245 assert!(matches!(result, Err(CompressionError::InputTooShort(_, _))));
246 }
247}