Skip to main content

ast_doc_core/parser/
strategy.rs

1//! Strategy engine: byte-range slicing for NoTests and Summary modes.
2//!
3//! Given a source string and sorted removal ranges, produces transformed
4//! output for each strategy variant using byte-range slicing.
5
6use crate::config::OutputStrategy;
7
8/// Reason a byte range is marked for removal.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum RemovalReason {
11    /// A `#[cfg(test)]` module (Rust) or equivalent test module.
12    TestModule,
13    /// A test function/method (e.g., `#[test]`, `def test_*`, `func Test*`).
14    TestFunction,
15    /// Implementation body to be replaced with a marker (Summary mode).
16    Implementation,
17}
18
19/// A byte range in source to be removed or replaced.
20#[derive(Debug, Clone)]
21pub struct RemovalRange {
22    /// Start byte offset (inclusive).
23    pub start: usize,
24    /// End byte offset (exclusive).
25    pub end: usize,
26    /// Why this range is being removed.
27    pub reason: RemovalReason,
28}
29
30/// Apply the NoTests strategy: remove test ranges, insert omission markers.
31///
32/// Returns the transformed source with test code replaced by markers.
33#[must_use]
34pub fn apply_no_tests(source: &str, ranges: &[RemovalRange]) -> String {
35    let test_ranges: Vec<&RemovalRange> = ranges
36        .iter()
37        .filter(|r| {
38            r.reason == RemovalReason::TestModule || r.reason == RemovalReason::TestFunction
39        })
40        .collect();
41
42    if test_ranges.is_empty() {
43        return source.to_string();
44    }
45
46    let mut result = String::with_capacity(source.len());
47    let mut last_end = 0;
48
49    for range in &test_ranges {
50        if range.start > last_end {
51            result.push_str(&source[last_end..range.start]);
52        }
53        result.push_str("// ✂️ test module omitted\n");
54        last_end = range.end;
55    }
56
57    if last_end < source.len() {
58        result.push_str(&source[last_end..]);
59    }
60
61    result
62}
63
64/// Apply the Summary strategy: extract signatures only, replace bodies with markers.
65///
66/// Returns the transformed source with implementations replaced by markers.
67#[must_use]
68pub fn apply_summary(source: &str, ranges: &[RemovalRange]) -> String {
69    let impl_ranges: Vec<&RemovalRange> =
70        ranges.iter().filter(|r| r.reason == RemovalReason::Implementation).collect();
71
72    if impl_ranges.is_empty() {
73        return source.to_string();
74    }
75
76    let mut result = String::with_capacity(source.len());
77    let mut last_end = 0;
78
79    for range in &impl_ranges {
80        if range.start > last_end {
81            result.push_str(&source[last_end..range.start]);
82        }
83        result.push_str("// ✂️ implementations omitted");
84        last_end = range.end;
85    }
86
87    if last_end < source.len() {
88        result.push_str(&source[last_end..]);
89    }
90
91    result
92}
93
94/// Compute the token count of a string using `tiktoken-rs`.
95#[must_use]
96pub fn compute_token_count(content: &str) -> usize {
97    tiktoken_rs::cl100k_base().map_or(0, |bpe| bpe.encode_with_special_tokens(content).len())
98}
99
100/// Build strategy data for all three output modes.
101///
102/// This is the main entry point called by language parsers. Given the full
103/// source and computed removal ranges, produces `StrategyData` for each mode.
104#[must_use]
105pub fn build_strategies(
106    source: &str,
107    test_ranges: &[RemovalRange],
108    summary_ranges: &[RemovalRange],
109) -> std::collections::HashMap<OutputStrategy, crate::parser::StrategyData> {
110    use std::collections::HashMap;
111
112    let mut all_ranges = Vec::new();
113    all_ranges.extend_from_slice(test_ranges);
114    all_ranges.extend_from_slice(summary_ranges);
115
116    // Full: verbatim source
117    let full_content = source.to_string();
118    let full_tokens = compute_token_count(&full_content);
119
120    // NoTests: remove test ranges
121    let no_tests_content = apply_no_tests(source, &all_ranges);
122    let no_tests_tokens = compute_token_count(&no_tests_content);
123
124    // Summary: extract signatures only
125    let summary_content = apply_summary(source, &all_ranges);
126    let summary_tokens = compute_token_count(&summary_content);
127
128    let mut map = HashMap::new();
129    map.insert(
130        OutputStrategy::Full,
131        crate::parser::StrategyData { content: full_content, token_count: full_tokens },
132    );
133    map.insert(
134        OutputStrategy::NoTests,
135        crate::parser::StrategyData { content: no_tests_content, token_count: no_tests_tokens },
136    );
137    map.insert(
138        OutputStrategy::Summary,
139        crate::parser::StrategyData { content: summary_content, token_count: summary_tokens },
140    );
141    map
142}
143
144#[cfg(test)]
145#[expect(clippy::unwrap_used, clippy::panic)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn test_apply_no_tests_empty_ranges() {
151        let source = "fn main() {}\n";
152        let result = apply_no_tests(source, &[]);
153        assert_eq!(result, source);
154    }
155
156    #[test]
157    fn test_apply_no_tests_removes_test_module() {
158        let source = "pub fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\n#[cfg(test)]\nmod tests {\n    #[test]\n    fn test_add() {\n        assert_eq!(add(1, 2), 3);\n    }\n}\n";
159        let ranges = vec![RemovalRange {
160            start: source.find("#[cfg(test)]").unwrap(),
161            end: source.len(),
162            reason: RemovalReason::TestModule,
163        }];
164        let result = apply_no_tests(source, &ranges);
165        assert!(!result.contains("#[cfg(test)]"));
166        assert!(!result.contains("test_add"));
167        assert!(result.contains("pub fn add"));
168        assert!(result.contains("✂️ test module omitted"));
169    }
170
171    #[test]
172    fn test_apply_no_tests_preserves_non_test_code() {
173        let source = "pub fn lib() -> i32 {\n    42\n}\n";
174        let result = apply_no_tests(source, &[]);
175        assert_eq!(result, source);
176    }
177
178    #[test]
179    fn test_apply_summary_empty_ranges() {
180        let source = "fn main() {}\n";
181        let result = apply_summary(source, &[]);
182        assert_eq!(result, source);
183    }
184
185    #[test]
186    fn test_apply_summary_replaces_bodies() {
187        let source = "pub fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n";
188        let body_start = source.find('{').unwrap();
189        let ranges = vec![RemovalRange {
190            start: body_start,
191            end: source.len() - 1, // exclude trailing newline for precision
192            reason: RemovalReason::Implementation,
193        }];
194        let result = apply_summary(source, &ranges);
195        assert!(result.contains("pub fn add(a: i32, b: i32) -> i32"));
196        assert!(result.contains("✂️ implementations omitted"));
197        assert!(!result.contains("a + b"));
198    }
199
200    #[test]
201    fn test_compute_token_count() {
202        let count = compute_token_count("fn main() {}");
203        assert!(count > 0, "token count should be > 0");
204    }
205
206    #[test]
207    fn test_compute_token_count_empty() {
208        let count = compute_token_count("");
209        assert_eq!(count, 0);
210    }
211
212    #[test]
213    fn test_build_strategies_produces_three_variants() {
214        let source = "pub fn lib() -> i32 {\n    42\n}\n";
215        let strategies = build_strategies(source, &[], &[]);
216        assert!(strategies.contains_key(&OutputStrategy::Full));
217        assert!(strategies.contains_key(&OutputStrategy::NoTests));
218        assert!(strategies.contains_key(&OutputStrategy::Summary));
219        // With no ranges, all should match source
220        assert_eq!(strategies[&OutputStrategy::Full].content, source);
221        assert_eq!(strategies[&OutputStrategy::NoTests].content, source);
222        assert_eq!(strategies[&OutputStrategy::Summary].content, source);
223    }
224
225    #[test]
226    fn test_full_mode_is_verbatim() {
227        let source = "fn main() {\n    println!(\"hello\");\n}\n";
228        let strategies = build_strategies(source, &[], &[]);
229        assert_eq!(strategies[&OutputStrategy::Full].content, source);
230        assert_eq!(strategies[&OutputStrategy::Full].token_count, compute_token_count(source));
231    }
232
233    #[test]
234    fn test_no_tests_less_than_full_tokens() {
235        let source = "pub fn add(a: i32, b: i32) -> i32 {\n    a + b\n}\n\n#[cfg(test)]\nmod tests {\n    #[test]\n    fn test_add() {\n        assert_eq!(add(1, 2), 3);\n    }\n}\n";
236        let test_range_start = source.find("#[cfg(test)]").unwrap();
237        let test_ranges = vec![RemovalRange {
238            start: test_range_start,
239            end: source.len(),
240            reason: RemovalReason::TestModule,
241        }];
242        let strategies = build_strategies(source, &test_ranges, &[]);
243        assert!(
244            strategies[&OutputStrategy::NoTests].token_count <
245                strategies[&OutputStrategy::Full].token_count,
246            "NoTests should have fewer tokens than Full"
247        );
248    }
249}