Skip to main content

amql_engine/
compression.rs

1//! Pure in-memory compression measurement for benchmarking.
2//!
3//! Computes the ratio of raw source bytes to serialized annotation bytes.
4//! No filesystem or subprocess access — works in WASM, CLI, MCP, and Rust.
5
6use crate::store::Annotation;
7use crate::types::RelativePath;
8use serde::{Deserialize, Serialize};
9
10/// A source file entry for in-memory benchmarking.
11#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
12#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
13#[cfg_attr(feature = "ts", ts(export))]
14#[cfg_attr(feature = "flow", flow(export))]
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct SourceEntry {
17    /// Relative file path.
18    pub file: RelativePath,
19    /// Full source text.
20    pub source: String,
21}
22
23/// Compression measurement result from in-memory data.
24#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
25#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
26#[cfg_attr(feature = "ts", ts(export))]
27#[cfg_attr(feature = "flow", flow(export))]
28#[derive(Debug, Clone, Serialize)]
29pub struct CompressionResult {
30    /// Total source bytes across all files.
31    pub source_bytes: usize,
32    /// Estimated source tokens (bytes / 4).
33    pub source_tokens: usize,
34    /// Serialized annotation JSON bytes.
35    pub annotation_bytes: usize,
36    /// Estimated annotation tokens (bytes / 4).
37    pub annotation_tokens: usize,
38    /// Number of files that have at least one annotation.
39    pub covered_files: usize,
40    /// Total number of files.
41    pub total_files: usize,
42    /// File coverage ratio (covered / total).
43    pub file_coverage: f64,
44    /// Compression ratio: source_bytes / annotation_bytes. `None` when annotation_bytes = 0.
45    pub ratio: Option<f64>,
46    /// Per-file breakdown.
47    pub files: Vec<FileCompression>,
48}
49
50/// Per-file compression detail.
51#[cfg_attr(feature = "ts", derive(ts_rs::TS))]
52#[cfg_attr(feature = "flow", derive(flowjs_rs::Flow))]
53#[cfg_attr(feature = "ts", ts(export))]
54#[cfg_attr(feature = "flow", flow(export))]
55#[derive(Debug, Clone, Serialize)]
56pub struct FileCompression {
57    /// Relative file path.
58    pub file: RelativePath,
59    /// Source bytes for this file.
60    pub source_bytes: usize,
61    /// Annotation count for this file.
62    pub annotation_count: usize,
63    /// Annotation JSON bytes for this file.
64    pub annotation_bytes: usize,
65    /// Per-file ratio. `None` when annotation_bytes = 0.
66    pub ratio: Option<f64>,
67}
68
69/// Measure compression from pre-loaded source files and annotations.
70///
71/// Pure function — no filesystem, no subprocesses. Computes source bytes vs
72/// annotation JSON bytes per file and in aggregate. Returns the compression
73/// ratio (higher = more compression) and per-file breakdown.
74pub fn measure_compression(
75    sources: &[SourceEntry],
76    annotations: &[Annotation],
77) -> CompressionResult {
78    let total_files = sources.len();
79    let source_bytes: usize = sources.iter().map(|s| s.source.len()).sum();
80    let source_tokens = source_bytes / 4;
81
82    // Group annotations by file
83    let mut by_file: rustc_hash::FxHashMap<&str, Vec<&Annotation>> =
84        rustc_hash::FxHashMap::default();
85    for ann in annotations {
86        by_file.entry(ann.file.as_ref()).or_default().push(ann);
87    }
88
89    let mut files = Vec::with_capacity(total_files);
90    let mut covered_files = 0usize;
91    let mut total_ann_bytes = 0usize;
92
93    for entry in sources {
94        let file_anns = by_file.get(AsRef::<str>::as_ref(&entry.file));
95        let ann_count =
96            file_anns.map_or(0, |v| v.iter().map(|a| count_annotations(a)).sum::<usize>());
97        let ann_bytes = file_anns.map_or(0, |v| {
98            serde_json::to_string(v).map(|s| s.len()).unwrap_or(0)
99        });
100        if ann_count > 0 {
101            covered_files += 1;
102        }
103        total_ann_bytes += ann_bytes;
104
105        files.push(FileCompression {
106            file: entry.file.clone(),
107            source_bytes: entry.source.len(),
108            annotation_count: ann_count,
109            annotation_bytes: ann_bytes,
110            ratio: if ann_bytes > 0 {
111                Some(entry.source.len() as f64 / ann_bytes as f64)
112            } else {
113                None
114            },
115        });
116    }
117
118    let file_coverage = if total_files > 0 {
119        covered_files as f64 / total_files as f64
120    } else {
121        0.0
122    };
123
124    CompressionResult {
125        source_bytes,
126        source_tokens,
127        annotation_bytes: total_ann_bytes,
128        annotation_tokens: total_ann_bytes / 4,
129        covered_files,
130        total_files,
131        file_coverage,
132        ratio: if total_ann_bytes > 0 {
133            Some(source_bytes as f64 / total_ann_bytes as f64)
134        } else {
135            None
136        },
137        files,
138    }
139}
140
141/// Count annotations recursively (including nested children).
142fn count_annotations(ann: &Annotation) -> usize {
143    1 + ann.children.iter().map(count_annotations).sum::<usize>()
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149    use crate::store::Annotation;
150    use crate::types::{Binding, RelativePath, TagName};
151
152    fn make_annotation(file: &str, tag: &str, binding: &str) -> Annotation {
153        Annotation {
154            tag: TagName::from(tag),
155            binding: Binding::from(binding),
156            file: RelativePath::from(file),
157            attrs: rustc_hash::FxHashMap::default(),
158            children: Vec::new(),
159        }
160    }
161
162    #[test]
163    fn empty_input() {
164        // Act
165        let result = measure_compression(&[], &[]);
166
167        // Assert
168        assert_eq!(result.total_files, 0, "no files");
169        assert_eq!(result.source_bytes, 0, "no source bytes");
170        assert!(result.ratio.is_none(), "no ratio when no annotations");
171    }
172
173    #[test]
174    fn basic_compression() {
175        // Arrange
176        let sources = vec![SourceEntry {
177            file: RelativePath::from("src/main.ts"),
178            source: "function hello() {\n  console.log('hello');\n}\n".to_string(),
179        }];
180        let annotations = vec![make_annotation("src/main.ts", "function", "hello")];
181
182        // Act
183        let result = measure_compression(&sources, &annotations);
184
185        // Assert
186        assert_eq!(result.total_files, 1, "one file");
187        assert_eq!(result.covered_files, 1, "one covered");
188        assert!(result.ratio.is_some(), "has ratio");
189        assert!(result.ratio.unwrap() > 0.0, "positive ratio");
190        assert_eq!(result.files.len(), 1, "one file detail");
191        assert_eq!(result.files[0].annotation_count, 1, "one annotation");
192    }
193
194    #[test]
195    fn uncovered_file() {
196        // Arrange
197        let sources = vec![
198            SourceEntry {
199                file: RelativePath::from("src/a.ts"),
200                source: "export const a = 1;".to_string(),
201            },
202            SourceEntry {
203                file: RelativePath::from("src/b.ts"),
204                source: "export const b = 2;".to_string(),
205            },
206        ];
207        let annotations = vec![make_annotation("src/a.ts", "const", "a")];
208
209        // Act
210        let result = measure_compression(&sources, &annotations);
211
212        // Assert
213        assert_eq!(result.total_files, 2, "two files");
214        assert_eq!(result.covered_files, 1, "one covered");
215        assert_eq!(result.file_coverage, 0.5, "50% coverage");
216        assert!(
217            result.files[1].ratio.is_none(),
218            "uncovered file has no ratio"
219        );
220    }
221}