1use crate::error::{Result, SqzError};
8use std::collections::HashMap;
9use tree_sitter::{Language, Parser};
10
11#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct ImportDecl {
14 pub text: String,
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct FunctionSignature {
20 pub name: String,
21 pub signature: String,
22}
23
24#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct ClassDefinition {
27 pub name: String,
28 pub signature: String,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
33pub struct TypeDeclaration {
34 pub name: String,
35 pub signature: String,
36}
37
38#[derive(Debug, Clone)]
40pub struct CodeSummary {
41 pub imports: Vec<ImportDecl>,
42 pub functions: Vec<FunctionSignature>,
43 pub classes: Vec<ClassDefinition>,
44 pub types: Vec<TypeDeclaration>,
45 pub tokens_original: u32,
46 pub tokens_summary: u32,
47}
48
49impl CodeSummary {
50 pub fn to_text(&self) -> String {
52 let mut parts = Vec::new();
53 for imp in &self.imports {
54 parts.push(imp.text.clone());
55 }
56 for cls in &self.classes {
57 parts.push(cls.signature.clone());
58 }
59 for ty in &self.types {
60 parts.push(ty.signature.clone());
61 }
62 for func in &self.functions {
63 parts.push(func.signature.clone());
64 }
65 parts.join("\n")
66 }
67}
68
69fn approx_tokens(s: &str) -> u32 {
71 ((s.len() as f64) / 4.0).ceil() as u32
72}
73
74fn extract_line(source: &str, node: &tree_sitter::Node) -> String {
79 let start = node.start_position().row;
80 let end = node.end_position().row;
81 let lines: Vec<&str> = source.lines().collect();
82 if start < lines.len() {
83 if start == end {
84 lines[start].trim().to_string()
85 } else {
86 lines[start].trim().to_string()
88 }
89 } else {
90 String::new()
91 }
92}
93
94fn node_text<'a>(source: &'a str, node: &tree_sitter::Node) -> &'a str {
95 &source[node.byte_range()]
96}
97
98fn extract_rust(source: &str, language: Language) -> Result<CodeSummary> {
100 let mut parser = Parser::new();
101 parser
102 .set_language(&language)
103 .map_err(|e| SqzError::Other(format!("tree-sitter language error: {e}")))?;
104
105 let tree = parser
106 .parse(source, None)
107 .ok_or_else(|| SqzError::Other("tree-sitter parse failed".into()))?;
108
109 let root = tree.root_node();
110 let mut imports = Vec::new();
111 let mut functions = Vec::new();
112 let mut classes = Vec::new();
113 let mut types = Vec::new();
114
115 let mut cursor = root.walk();
116 for child in root.children(&mut cursor) {
118 match child.kind() {
119 "use_declaration" => {
120 imports.push(ImportDecl {
121 text: extract_line(source, &child),
122 });
123 }
124 "function_item" => {
125 let name = child
126 .child_by_field_name("name")
127 .map(|n| node_text(source, &n).to_string())
128 .unwrap_or_default();
129 functions.push(FunctionSignature {
130 name,
131 signature: extract_line(source, &child),
132 });
133 }
134 "struct_item" | "enum_item" | "impl_item" | "trait_item" => {
135 let name = child
136 .child_by_field_name("name")
137 .map(|n| node_text(source, &n).to_string())
138 .unwrap_or_default();
139 classes.push(ClassDefinition {
140 name,
141 signature: extract_line(source, &child),
142 });
143 }
144 "type_item" => {
145 let name = child
146 .child_by_field_name("name")
147 .map(|n| node_text(source, &n).to_string())
148 .unwrap_or_default();
149 types.push(TypeDeclaration {
150 name,
151 signature: extract_line(source, &child),
152 });
153 }
154 _ => {}
155 }
156 }
157
158 let summary_text = {
159 let mut parts = Vec::new();
160 for i in &imports {
161 parts.push(i.text.clone());
162 }
163 for c in &classes {
164 parts.push(c.signature.clone());
165 }
166 for t in &types {
167 parts.push(t.signature.clone());
168 }
169 for f in &functions {
170 parts.push(f.signature.clone());
171 }
172 parts.join("\n")
173 };
174
175 Ok(CodeSummary {
176 imports,
177 functions,
178 classes,
179 types,
180 tokens_original: approx_tokens(source),
181 tokens_summary: approx_tokens(&summary_text),
182 })
183}
184
185fn extract_python(source: &str, language: Language) -> Result<CodeSummary> {
187 let mut parser = Parser::new();
188 parser
189 .set_language(&language)
190 .map_err(|e| SqzError::Other(format!("tree-sitter language error: {e}")))?;
191
192 let tree = parser
193 .parse(source, None)
194 .ok_or_else(|| SqzError::Other("tree-sitter parse failed".into()))?;
195
196 let root = tree.root_node();
197 let mut imports = Vec::new();
198 let mut functions = Vec::new();
199 let mut classes = Vec::new();
200 let types = Vec::new();
201
202 let mut cursor = root.walk();
203 for child in root.children(&mut cursor) {
204 match child.kind() {
205 "import_statement" | "import_from_statement" => {
206 imports.push(ImportDecl {
207 text: extract_line(source, &child),
208 });
209 }
210 "function_definition" => {
211 let name = child
212 .child_by_field_name("name")
213 .map(|n| node_text(source, &n).to_string())
214 .unwrap_or_default();
215 functions.push(FunctionSignature {
216 name,
217 signature: extract_line(source, &child),
218 });
219 }
220 "class_definition" => {
221 let name = child
222 .child_by_field_name("name")
223 .map(|n| node_text(source, &n).to_string())
224 .unwrap_or_default();
225 classes.push(ClassDefinition {
226 name,
227 signature: extract_line(source, &child),
228 });
229 }
230 _ => {}
231 }
232 }
233
234 let summary_text = build_summary_text(&imports, &functions, &classes, &types);
235 Ok(CodeSummary {
236 imports,
237 functions,
238 classes,
239 types,
240 tokens_original: approx_tokens(source),
241 tokens_summary: approx_tokens(&summary_text),
242 })
243}
244
245fn extract_javascript(source: &str, language: Language) -> Result<CodeSummary> {
247 let mut parser = Parser::new();
248 parser
249 .set_language(&language)
250 .map_err(|e| SqzError::Other(format!("tree-sitter language error: {e}")))?;
251
252 let tree = parser
253 .parse(source, None)
254 .ok_or_else(|| SqzError::Other("tree-sitter parse failed".into()))?;
255
256 let root = tree.root_node();
257 let mut imports = Vec::new();
258 let mut functions = Vec::new();
259 let mut classes = Vec::new();
260 let types = Vec::new();
261
262 let mut cursor = root.walk();
263 for child in root.children(&mut cursor) {
264 match child.kind() {
265 "import_statement" => {
266 imports.push(ImportDecl {
267 text: extract_line(source, &child),
268 });
269 }
270 "function_declaration" => {
271 let name = child
272 .child_by_field_name("name")
273 .map(|n| node_text(source, &n).to_string())
274 .unwrap_or_default();
275 functions.push(FunctionSignature {
276 name,
277 signature: extract_line(source, &child),
278 });
279 }
280 "class_declaration" => {
281 let name = child
282 .child_by_field_name("name")
283 .map(|n| node_text(source, &n).to_string())
284 .unwrap_or_default();
285 classes.push(ClassDefinition {
286 name,
287 signature: extract_line(source, &child),
288 });
289 }
290 "lexical_declaration" | "variable_declaration" => {
291 let line = extract_line(source, &child);
293 if line.contains("function") || line.contains("=>") {
294 let name = child
295 .named_child(0)
296 .and_then(|d| d.child_by_field_name("name"))
297 .map(|n| node_text(source, &n).to_string())
298 .unwrap_or_default();
299 if !name.is_empty() {
300 functions.push(FunctionSignature {
301 name,
302 signature: line,
303 });
304 }
305 }
306 }
307 _ => {}
308 }
309 }
310
311 let summary_text = build_summary_text(&imports, &functions, &classes, &types);
312 Ok(CodeSummary {
313 imports,
314 functions,
315 classes,
316 types,
317 tokens_original: approx_tokens(source),
318 tokens_summary: approx_tokens(&summary_text),
319 })
320}
321
322fn extract_bash(source: &str, language: Language) -> Result<CodeSummary> {
324 let mut parser = Parser::new();
325 parser
326 .set_language(&language)
327 .map_err(|e| SqzError::Other(format!("tree-sitter language error: {e}")))?;
328
329 let tree = parser
330 .parse(source, None)
331 .ok_or_else(|| SqzError::Other("tree-sitter parse failed".into()))?;
332
333 let root = tree.root_node();
334 let mut functions = Vec::new();
335 let imports = Vec::new();
336 let classes = Vec::new();
337 let types = Vec::new();
338
339 let mut cursor = root.walk();
340 for child in root.children(&mut cursor) {
341 if child.kind() == "function_definition" {
342 let name = child
343 .child_by_field_name("name")
344 .map(|n| node_text(source, &n).to_string())
345 .unwrap_or_default();
346 functions.push(FunctionSignature {
347 name,
348 signature: extract_line(source, &child),
349 });
350 }
351 }
352
353 let summary_text = build_summary_text(&imports, &functions, &classes, &types);
354 Ok(CodeSummary {
355 imports,
356 functions,
357 classes,
358 types,
359 tokens_original: approx_tokens(source),
360 tokens_summary: approx_tokens(&summary_text),
361 })
362}
363
364fn build_summary_text(
365 imports: &[ImportDecl],
366 functions: &[FunctionSignature],
367 classes: &[ClassDefinition],
368 types: &[TypeDeclaration],
369) -> String {
370 let mut parts = Vec::new();
371 for i in imports {
372 parts.push(i.text.clone());
373 }
374 for c in classes {
375 parts.push(c.signature.clone());
376 }
377 for t in types {
378 parts.push(t.signature.clone());
379 }
380 for f in functions {
381 parts.push(f.signature.clone());
382 }
383 parts.join("\n")
384}
385
386struct RegexExtractor {
392 import_patterns: Vec<&'static str>,
393 function_patterns: Vec<&'static str>,
394 class_patterns: Vec<&'static str>,
395 type_patterns: Vec<&'static str>,
396}
397
398impl RegexExtractor {
399 fn extract(&self, source: &str) -> CodeSummary {
400 use std::collections::HashSet;
401
402 let mut imports = Vec::new();
403 let mut functions = Vec::new();
404 let mut classes = Vec::new();
405 let mut types = Vec::new();
406 let mut seen: HashSet<String> = HashSet::new();
407
408 for line in source.lines() {
409 let trimmed = line.trim();
410 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('#') {
411 continue;
412 }
413
414 for pat in &self.import_patterns {
415 if trimmed.starts_with(pat) && seen.insert(trimmed.to_string()) {
416 imports.push(ImportDecl {
417 text: trimmed.to_string(),
418 });
419 break;
420 }
421 }
422
423 for pat in &self.function_patterns {
424 if trimmed.contains(pat) && seen.insert(trimmed.to_string()) {
425 let name = extract_name_after(trimmed, pat);
427 functions.push(FunctionSignature {
428 name,
429 signature: trimmed.to_string(),
430 });
431 break;
432 }
433 }
434
435 for pat in &self.class_patterns {
436 if trimmed.starts_with(pat) && seen.insert(trimmed.to_string()) {
437 let name = extract_name_after(trimmed, pat);
438 classes.push(ClassDefinition {
439 name,
440 signature: trimmed.to_string(),
441 });
442 break;
443 }
444 }
445
446 for pat in &self.type_patterns {
447 if trimmed.starts_with(pat) && seen.insert(trimmed.to_string()) {
448 let name = extract_name_after(trimmed, pat);
449 types.push(TypeDeclaration {
450 name,
451 signature: trimmed.to_string(),
452 });
453 break;
454 }
455 }
456 }
457
458 let summary_text = build_summary_text(&imports, &functions, &classes, &types);
459 CodeSummary {
460 imports,
461 functions,
462 classes,
463 types,
464 tokens_original: approx_tokens(source),
465 tokens_summary: approx_tokens(&summary_text),
466 }
467 }
468}
469
470fn extract_name_after(line: &str, keyword: &str) -> String {
471 let rest = line[line.find(keyword).unwrap_or(0) + keyword.len()..].trim();
472 rest.split(|c: char| !c.is_alphanumeric() && c != '_')
473 .next()
474 .unwrap_or("")
475 .to_string()
476}
477
478fn go_extractor() -> RegexExtractor {
479 RegexExtractor {
480 import_patterns: vec!["import "],
481 function_patterns: vec!["func "],
482 class_patterns: vec!["type "],
483 type_patterns: vec![],
484 }
485}
486
487fn java_extractor() -> RegexExtractor {
488 RegexExtractor {
489 import_patterns: vec!["import "],
490 function_patterns: vec![
491 "public ",
492 "private ",
493 "protected ",
494 "static ",
495 "void ",
496 "int ",
497 "String ",
498 ],
499 class_patterns: vec!["class ", "interface ", "enum ", "record "],
500 type_patterns: vec![],
501 }
502}
503
504fn c_extractor() -> RegexExtractor {
505 RegexExtractor {
506 import_patterns: vec!["#include"],
507 function_patterns: vec![],
508 class_patterns: vec!["struct ", "union ", "enum "],
509 type_patterns: vec!["typedef "],
510 }
511}
512
513fn cpp_extractor() -> RegexExtractor {
514 RegexExtractor {
515 import_patterns: vec!["#include"],
516 function_patterns: vec![],
517 class_patterns: vec!["class ", "struct ", "union ", "enum "],
518 type_patterns: vec!["typedef ", "using "],
519 }
520}
521
522fn ruby_extractor() -> RegexExtractor {
523 RegexExtractor {
524 import_patterns: vec!["require ", "require_relative "],
525 function_patterns: vec!["def "],
526 class_patterns: vec!["class ", "module "],
527 type_patterns: vec![],
528 }
529}
530
531fn json_extractor() -> RegexExtractor {
532 RegexExtractor {
534 import_patterns: vec![],
535 function_patterns: vec![],
536 class_patterns: vec![],
537 type_patterns: vec![],
538 }
539}
540
541fn html_extractor() -> RegexExtractor {
542 RegexExtractor {
543 import_patterns: vec!["<link", "<script"],
544 function_patterns: vec![],
545 class_patterns: vec![],
546 type_patterns: vec![],
547 }
548}
549
550fn css_extractor() -> RegexExtractor {
551 RegexExtractor {
552 import_patterns: vec!["@import"],
553 function_patterns: vec![],
554 class_patterns: vec![],
555 type_patterns: vec!["@keyframes", "@media", "@mixin"],
556 }
557}
558
559fn typescript_extractor() -> RegexExtractor {
560 RegexExtractor {
561 import_patterns: vec!["import "],
562 function_patterns: vec!["function ", "async function ", "export function ", "export async function "],
563 class_patterns: vec!["class ", "interface ", "abstract class "],
564 type_patterns: vec!["type ", "enum "],
565 }
566}
567
568fn csharp_extractor() -> RegexExtractor {
569 RegexExtractor {
570 import_patterns: vec!["using "],
571 function_patterns: vec![
572 "public ",
573 "private ",
574 "protected ",
575 "internal ",
576 "static ",
577 "override ",
578 "virtual ",
579 "abstract ",
580 ],
581 class_patterns: vec!["class ", "interface ", "struct ", "enum ", "record "],
582 type_patterns: vec![],
583 }
584}
585
586fn kotlin_extractor() -> RegexExtractor {
587 RegexExtractor {
588 import_patterns: vec!["import "],
589 function_patterns: vec!["fun "],
590 class_patterns: vec!["class ", "interface ", "object ", "data class ", "sealed class "],
591 type_patterns: vec!["typealias "],
592 }
593}
594
595fn swift_extractor() -> RegexExtractor {
596 RegexExtractor {
597 import_patterns: vec!["import "],
598 function_patterns: vec!["func "],
599 class_patterns: vec!["class ", "struct ", "enum ", "protocol ", "extension "],
600 type_patterns: vec!["typealias "],
601 }
602}
603
604fn toml_extractor() -> RegexExtractor {
605 RegexExtractor {
606 import_patterns: vec![],
607 function_patterns: vec![],
608 class_patterns: vec!["["],
609 type_patterns: vec![],
610 }
611}
612
613fn yaml_extractor() -> RegexExtractor {
614 RegexExtractor {
615 import_patterns: vec![],
616 function_patterns: vec![],
617 class_patterns: vec![],
618 type_patterns: vec![],
619 }
620}
621
622pub struct AstParser {
628 grammars: HashMap<String, Language>,
629}
630
631impl AstParser {
632 pub fn new() -> Self {
634 let mut grammars = HashMap::new();
635 grammars.insert("rust".to_string(), tree_sitter_rust::language());
636 grammars.insert("python".to_string(), tree_sitter_python::language());
637 grammars.insert("javascript".to_string(), tree_sitter_javascript::language());
638 grammars.insert("bash".to_string(), tree_sitter_bash::language());
639 AstParser { grammars }
640 }
641
642 pub fn supported_languages(&self) -> &[&'static str] {
644 &[
645 "rust",
646 "python",
647 "javascript",
648 "typescript",
649 "go",
650 "java",
651 "c",
652 "cpp",
653 "ruby",
654 "bash",
655 "json",
656 "html",
657 "css",
658 "csharp",
659 "kotlin",
660 "swift",
661 "toml",
662 "yaml",
663 ]
664 }
665
666 pub fn is_supported(&self, language: &str) -> bool {
668 self.supported_languages().contains(&language)
669 }
670
671 pub fn extract_signatures(&self, source: &str, language: &str) -> Result<CodeSummary> {
676 if !self.is_supported(language) {
677 eprintln!("AstParser: unsupported language '{language}', returning source unchanged");
678 return Err(SqzError::UnsupportedLanguage(language.to_string()));
679 }
680
681 match language {
682 "rust" => {
683 let lang = self.grammars["rust"].clone();
684 extract_rust(source, lang)
685 }
686 "python" => {
687 let lang = self.grammars["python"].clone();
688 extract_python(source, lang)
689 }
690 "javascript" => {
691 let lang = self.grammars["javascript"].clone();
692 extract_javascript(source, lang)
693 }
694 "bash" => {
695 let lang = self.grammars["bash"].clone();
696 extract_bash(source, lang)
697 }
698 "typescript" => Ok(typescript_extractor().extract(source)),
699 "go" => Ok(go_extractor().extract(source)),
700 "java" => Ok(java_extractor().extract(source)),
701 "c" => Ok(c_extractor().extract(source)),
702 "cpp" => Ok(cpp_extractor().extract(source)),
703 "ruby" => Ok(ruby_extractor().extract(source)),
704 "json" => Ok(json_extractor().extract(source)),
705 "html" => Ok(html_extractor().extract(source)),
706 "css" => Ok(css_extractor().extract(source)),
707 "csharp" => Ok(csharp_extractor().extract(source)),
708 "kotlin" => Ok(kotlin_extractor().extract(source)),
709 "swift" => Ok(swift_extractor().extract(source)),
710 "toml" => Ok(toml_extractor().extract(source)),
711 "yaml" => Ok(yaml_extractor().extract(source)),
712 _ => unreachable!("is_supported check above covers all cases"),
713 }
714 }
715}
716
717impl Default for AstParser {
718 fn default() -> Self {
719 Self::new()
720 }
721}
722
723#[cfg(test)]
728mod tests {
729 use super::*;
730
731 #[test]
732 fn test_supported_languages_count() {
733 let parser = AstParser::new();
734 assert!(
735 parser.supported_languages().len() >= 18,
736 "must support 18+ languages"
737 );
738 }
739
740 #[test]
741 fn test_is_supported() {
742 let parser = AstParser::new();
743 assert!(parser.is_supported("rust"));
744 assert!(parser.is_supported("python"));
745 assert!(parser.is_supported("go"));
746 assert!(!parser.is_supported("cobol"));
747 assert!(!parser.is_supported(""));
748 }
749
750 #[test]
751 fn test_unsupported_language_returns_error() {
752 let parser = AstParser::new();
753 let result = parser.extract_signatures("fn main() {}", "cobol");
754 assert!(matches!(result, Err(SqzError::UnsupportedLanguage(_))));
755 }
756
757 #[test]
758 fn test_rust_extraction() {
759 let parser = AstParser::new();
760 let source = r#"
761use std::collections::HashMap;
762
763pub struct Foo {
764 x: i32,
765}
766
767pub fn bar(x: i32) -> i32 {
768 x + 1
769}
770
771pub type MyType = Vec<i32>;
772"#;
773 let summary = parser.extract_signatures(source, "rust").unwrap();
774 assert!(!summary.functions.is_empty());
775 assert!(!summary.classes.is_empty());
776 assert!(!summary.imports.is_empty());
777 assert!(summary.tokens_summary < summary.tokens_original);
778 }
779
780 #[test]
781 fn test_python_extraction() {
782 let parser = AstParser::new();
783 let source = r#"
784import os
785from typing import List
786
787class MyClass:
788 def __init__(self):
789 pass
790
791def my_function(x: int) -> int:
792 return x + 1
793"#;
794 let summary = parser.extract_signatures(source, "python").unwrap();
795 assert!(!summary.functions.is_empty());
796 assert!(!summary.classes.is_empty());
797 assert!(!summary.imports.is_empty());
798 }
799
800 #[test]
801 fn test_go_extraction() {
802 let parser = AstParser::new();
803 let source = r#"
804package main
805
806import "fmt"
807
808type Server struct {
809 port int
810}
811
812func NewServer(port int) *Server {
813 return &Server{port: port}
814}
815
816func (s *Server) Start() error {
817 fmt.Println("starting")
818 return nil
819}
820"#;
821 let summary = parser.extract_signatures(source, "go").unwrap();
822 assert!(!summary.functions.is_empty());
823 assert!(!summary.imports.is_empty());
824 }
825
826 #[test]
827 fn test_compression_ratio() {
828 let parser = AstParser::new();
829 let source = r#"
831use std::collections::HashMap;
832use std::sync::Arc;
833
834/// A complex data structure with lots of implementation
835pub struct ComplexStruct {
836 field1: i32,
837 field2: String,
838 field3: Vec<u8>,
839 field4: HashMap<String, i32>,
840}
841
842impl ComplexStruct {
843 pub fn new() -> Self {
844 Self {
845 field1: 0,
846 field2: String::new(),
847 field3: Vec::new(),
848 field4: HashMap::new(),
849 }
850 }
851
852 pub fn process(&self, input: &str) -> Result<String, Box<dyn std::error::Error>> {
853 // lots of implementation
854 let mut result = String::new();
855 for c in input.chars() {
856 result.push(c);
857 result.push(' ');
858 }
859 Ok(result)
860 }
861
862 fn internal_helper(&self) -> i32 {
863 self.field1 * 2
864 }
865}
866
867pub fn standalone_function(x: i32, y: i32) -> i32 {
868 // implementation
869 let temp = x + y;
870 let temp2 = temp * 2;
871 temp2 - x
872}
873
874pub type MyAlias = Arc<ComplexStruct>;
875"#;
876 let summary = parser.extract_signatures(source, "rust").unwrap();
877 assert!(
878 summary.tokens_summary < summary.tokens_original,
879 "summary ({}) should be smaller than original ({})",
880 summary.tokens_summary,
881 summary.tokens_original
882 );
883 }
884
885 #[cfg(test)]
898 mod prop25 {
899 use super::*;
900 use proptest::prelude::*;
901
902 fn arb_rust_source() -> impl Strategy<Value = String> {
906 (
907 1usize..=5, 1usize..=3, 5usize..=20, )
911 .prop_map(|(n_fns, n_structs, body_lines)| {
912 let mut src = String::new();
913 src.push_str("use std::collections::HashMap;\n\n");
914
915 for i in 0..n_structs {
916 src.push_str(&format!("pub struct MyStruct{i} {{\n"));
917 src.push_str(" field_a: i32,\n");
918 src.push_str(" field_b: String,\n");
919 src.push_str(" field_c: Vec<u8>,\n");
920 src.push_str("}\n\n");
921 }
922
923 for i in 0..n_fns {
924 src.push_str(&format!(
925 "pub fn my_function_{i}(x: i32, y: i32) -> i32 {{\n"
926 ));
927 for j in 0..body_lines {
928 src.push_str(&format!(
929 " let _var_{j} = x + y + {j};\n"
930 ));
931 }
932 src.push_str(" x + y\n");
933 src.push_str("}\n\n");
934 }
935 src
936 })
937 }
938
939 fn arb_python_source() -> impl Strategy<Value = String> {
941 (
942 1usize..=5,
943 1usize..=3,
944 5usize..=20,
945 )
946 .prop_map(|(n_fns, n_classes, body_lines)| {
947 let mut src = String::new();
948 src.push_str("import os\nimport sys\nfrom typing import List, Dict\n\n");
949
950 for i in 0..n_classes {
951 src.push_str(&format!("class MyClass{i}:\n"));
952 src.push_str(" def __init__(self):\n");
953 for j in 0..body_lines {
954 src.push_str(&format!(" self.field_{j} = {j}\n"));
955 }
956 src.push('\n');
957 }
958
959 for i in 0..n_fns {
960 src.push_str(&format!("def my_function_{i}(x, y):\n"));
961 for j in 0..body_lines {
962 src.push_str(&format!(" var_{j} = x + y + {j}\n"));
963 }
964 src.push_str(" return x + y\n\n");
965 }
966 src
967 })
968 }
969
970 proptest! {
971 #[test]
975 fn prop25_ast_preserves_public_api_rust(source in arb_rust_source()) {
976 let parser = AstParser::new();
977 let summary = parser.extract_signatures(&source, "rust")
978 .expect("rust extraction should succeed");
979
980 prop_assert!(
982 summary.tokens_summary < summary.tokens_original,
983 "tokens_summary ({}) must be < tokens_original ({})",
984 summary.tokens_summary,
985 summary.tokens_original
986 );
987
988 let summary_text = summary.to_text();
990 for func in &summary.functions {
991 prop_assert!(
992 summary_text.contains(&func.name),
993 "function name '{}' must appear in summary",
994 func.name
995 );
996 }
997
998 for cls in &summary.classes {
1000 prop_assert!(
1001 summary_text.contains(&cls.name),
1002 "class name '{}' must appear in summary",
1003 cls.name
1004 );
1005 }
1006
1007 prop_assert!(
1009 !summary.functions.is_empty() || !summary.classes.is_empty(),
1010 "must extract at least one function or class"
1011 );
1012 }
1013
1014 #[test]
1018 fn prop25_ast_preserves_public_api_python(source in arb_python_source()) {
1019 let parser = AstParser::new();
1020 let summary = parser.extract_signatures(&source, "python")
1021 .expect("python extraction should succeed");
1022
1023 prop_assert!(
1025 summary.tokens_summary < summary.tokens_original,
1026 "tokens_summary ({}) must be < tokens_original ({})",
1027 summary.tokens_summary,
1028 summary.tokens_original
1029 );
1030
1031 let summary_text = summary.to_text();
1033 for func in &summary.functions {
1034 prop_assert!(
1035 summary_text.contains(&func.name),
1036 "function name '{}' must appear in summary",
1037 func.name
1038 );
1039 }
1040
1041 for cls in &summary.classes {
1043 prop_assert!(
1044 summary_text.contains(&cls.name),
1045 "class name '{}' must appear in summary",
1046 cls.name
1047 );
1048 }
1049
1050 prop_assert!(
1052 !summary.functions.is_empty() || !summary.classes.is_empty(),
1053 "must extract at least one function or class"
1054 );
1055 }
1056 }
1057 }
1058}