ast_doc_core/parser/lang/
generic_parser.rs1use std::path::Path;
8
9use tree_sitter_language_pack::{ProcessConfig, StructureKind};
10
11use crate::{
12 error::AstDocError,
13 parser::{
14 Language, LanguageParser, ParsedFile,
15 strategy::{self, RemovalRange, RemovalReason},
16 },
17};
18
19#[derive(Debug)]
23pub struct GenericParser {
24 language_name: String,
26}
27
28impl GenericParser {
29 #[must_use]
31 pub fn new(language_name: &str) -> Self {
32 Self { language_name: language_name.to_string() }
33 }
34}
35
36impl LanguageParser for GenericParser {
37 fn parse(&self, source: &str, path: &Path) -> Result<ParsedFile, AstDocError> {
38 let config = ProcessConfig::new(&self.language_name).all();
39
40 let result = tree_sitter_language_pack::process(source, &config).map_err(|e| {
41 AstDocError::Parse {
42 path: path.to_path_buf(),
43 message: format!("Failed to process {} source: {e}", self.language_name),
44 }
45 })?;
46
47 let test_ranges = collect_test_ranges_from_structure(&result, source);
48 let summary_ranges = collect_summary_ranges_from_structure(&result, source);
49
50 let strategies_data = strategy::build_strategies(source, &test_ranges, &summary_ranges);
51
52 Ok(ParsedFile {
53 path: path.to_path_buf(),
54 language: Language::Generic(self.language_name.clone()),
55 source: source.to_string(),
56 strategies_data,
57 })
58 }
59}
60
61fn is_test_name(name: &str) -> bool {
63 name.starts_with("test_") || name.starts_with("Test") || name.starts_with("test") && name.len() > 4 && name.as_bytes()[4].is_ascii_uppercase() || name.starts_with("it_") ||
68 name.starts_with("should_") || name.starts_with("bench_") || name.starts_with("Benchmark") }
72
73fn is_test_structure_item(item: &tree_sitter_language_pack::StructureItem) -> bool {
75 match &item.name {
76 Some(name) => is_test_name(name),
77 None => false,
78 }
79}
80
81fn collect_test_ranges_from_structure(
83 result: &tree_sitter_language_pack::ProcessResult,
84 _source: &str,
85) -> Vec<RemovalRange> {
86 let mut ranges = Vec::new();
87
88 for item in &result.structure {
89 if is_test_structure_item(item) {
90 ranges.push(RemovalRange {
91 start: item.span.start_byte,
92 end: item.span.end_byte,
93 reason: match item.kind {
94 StructureKind::Function | StructureKind::Method => RemovalReason::TestFunction,
95 StructureKind::Class | StructureKind::Struct => RemovalReason::TestModule,
96 _ => RemovalReason::TestFunction,
97 },
98 });
99 }
100 }
101
102 ranges
103}
104
105fn collect_summary_ranges_from_structure(
107 result: &tree_sitter_language_pack::ProcessResult,
108 source: &str,
109) -> Vec<RemovalRange> {
110 let mut ranges = Vec::new();
111
112 for item in &result.structure {
113 if is_test_structure_item(item) {
114 continue;
115 }
116
117 match item.kind {
118 StructureKind::Function | StructureKind::Method => {
119 if let Some(range) = extract_body_range(source, &item.span) {
123 ranges.push(range);
124 }
125 }
126 StructureKind::Class |
127 StructureKind::Struct |
128 StructureKind::Interface |
129 StructureKind::Enum |
130 StructureKind::Module |
131 StructureKind::Trait |
132 StructureKind::Impl |
133 StructureKind::Namespace |
134 StructureKind::Other(_) => {
135 }
137 }
138 }
139
140 ranges
141}
142
143fn extract_body_range(
148 source: &str,
149 span: &tree_sitter_language_pack::Span,
150) -> Option<RemovalRange> {
151 let start = span.start_byte;
152 let end = span.end_byte;
153 if end > source.len() || start >= end {
154 return None;
155 }
156
157 let item_text = &source[start..end];
158
159 let body_start_in_item = find_body_open_brace(item_text)?;
162
163 let abs_body_start = start + body_start_in_item;
164 let abs_body_end = start + find_matching_brace(item_text, body_start_in_item)?;
165
166 if abs_body_end <= abs_body_start {
167 return None;
168 }
169
170 Some(RemovalRange {
171 start: abs_body_start,
172 end: abs_body_end + 1, reason: RemovalReason::Implementation,
174 })
175}
176
177fn find_body_open_brace(text: &str) -> Option<usize> {
180 let bytes = text.as_bytes();
181 let mut depth_paren = 0i32;
182 let mut depth_angle = 0i32;
183 let mut found_sig_end = false;
184
185 let mut i = 0;
186 while i < bytes.len() {
187 match bytes[i] {
188 b'(' => depth_paren += 1,
189 b')' => depth_paren -= 1,
190 b'<' => depth_angle += 1,
191 b'>' => depth_angle -= 1,
192 b':' if !found_sig_end => {
193 }
195 b'{' if depth_paren == 0 && depth_angle <= 0 => {
196 return Some(i);
197 }
198 b'\n' if depth_paren == 0 && depth_angle <= 0 => {
199 found_sig_end = true;
200 }
201 _ => {}
202 }
203 i += 1;
204 }
205 None
206}
207
208fn find_matching_brace(text: &str, open_pos: usize) -> Option<usize> {
210 let bytes = text.as_bytes();
211 let mut depth = 0i32;
212
213 for (i, &byte) in bytes.iter().enumerate().skip(open_pos) {
214 match byte {
215 b'{' => depth += 1,
216 b'}' => {
217 depth -= 1;
218 if depth == 0 {
219 return Some(i);
220 }
221 }
222 _ => {}
223 }
224 }
225 None
226}
227
228#[cfg(test)]
229#[expect(clippy::unwrap_used, clippy::panic)]
230mod tests {
231 use super::*;
232 use crate::config::OutputStrategy;
233
234 fn get_strategy_content<'a>(parsed: &'a ParsedFile, strategy: &OutputStrategy) -> &'a str {
235 parsed.strategies_data.get(strategy).map_or("", |s| s.content.as_str())
236 }
237
238 #[test]
239 fn test_generic_parser_full_is_verbatim() {
240 if !tree_sitter_language_pack::has_language("rust") {
241 return;
242 }
243 let source = "fn main() {\n println!(\"hello\");\n}\n";
244 let parser = GenericParser::new("rust");
245 let parsed = parser.parse(source, Path::new("test.rs")).unwrap();
246 assert_eq!(get_strategy_content(&parsed, &OutputStrategy::Full), source);
247 }
248
249 #[test]
250 fn test_generic_parser_creates_three_strategies() {
251 if !tree_sitter_language_pack::has_language("rust") {
252 return;
253 }
254 let source = "fn main() {\n println!(\"hello\");\n}\n";
255 let parser = GenericParser::new("rust");
256 let parsed = parser.parse(source, Path::new("test.rs")).unwrap();
257 assert!(parsed.strategies_data.contains_key(&OutputStrategy::Full));
258 assert!(parsed.strategies_data.contains_key(&OutputStrategy::NoTests));
259 assert!(parsed.strategies_data.contains_key(&OutputStrategy::Summary));
260 }
261
262 #[test]
263 fn test_generic_parser_language_stored() {
264 if !tree_sitter_language_pack::has_language("java") {
265 return;
266 }
267 let source = "fn main() {}\n";
268 let parser = GenericParser::new("java");
269 let parsed = parser.parse(source, Path::new("Main.java")).unwrap();
270 assert_eq!(parsed.language, Language::Generic("java".to_string()));
271 }
272
273 #[test]
274 fn test_is_test_name() {
275 assert!(is_test_name("test_add"));
276 assert!(is_test_name("TestAdd"));
277 assert!(is_test_name("should_work"));
278 assert!(is_test_name("bench_sort"));
279 assert!(is_test_name("BenchmarkSort"));
280 assert!(!is_test_name("add"));
281 assert!(!is_test_name("main"));
282 assert!(!is_test_name("get_test_value")); }
284
285 #[test]
286 fn test_find_matching_brace() {
287 let text = "{ body }";
288 assert_eq!(find_matching_brace(text, 0), Some(7));
289
290 let text = "{ { nested } }";
291 assert_eq!(find_matching_brace(text, 0), Some(13));
292
293 let text = "{ unclosed";
294 assert_eq!(find_matching_brace(text, 0), None);
295 }
296
297 #[test]
298 fn test_generic_parser_with_python() {
299 if !tree_sitter_language_pack::has_language("python") {
300 return;
301 }
302 let source = "def hello():\n pass\n";
303 let parser = GenericParser::new("python");
304 let parsed = parser.parse(source, Path::new("test.py")).unwrap();
305 assert_eq!(get_strategy_content(&parsed, &OutputStrategy::Full), source);
306 }
307
308 #[test]
309 fn test_generic_parser_empty_source() {
310 if !tree_sitter_language_pack::has_language("java") {
311 return;
312 }
313 let source = "";
314 let parser = GenericParser::new("java");
315 let parsed = parser.parse(source, Path::new("Empty.java")).unwrap();
316 assert_eq!(get_strategy_content(&parsed, &OutputStrategy::Full), "");
317 }
318
319 proptest::proptest! {
320 #[test]
321 fn test_generic_parser_full_matches_source(source in "[a-zA-Z0-9 {}();\n\t]{0,200}") {
322 if tree_sitter_language_pack::has_language("c") {
323 let parser = GenericParser::new("c");
324 let parsed = parser.parse(&source, Path::new("test.c")).unwrap();
325 let full_data = parsed.strategies_data.get(&OutputStrategy::Full);
326 proptest::prop_assert!(full_data.is_some());
327 proptest::prop_assert_eq!(&full_data.unwrap().content, &source);
328 }
329 }
330 }
331}