1use std::cell::RefCell;
22use std::collections::{BTreeSet, HashMap};
23use std::ops::Bound;
24use std::path::Path;
25use std::sync::Arc;
26
27use once_cell::sync::Lazy;
28use parking_lot::RwLock;
29use tree_sitter::{Language as TSLanguage, Node, Parser, Query, QueryCursor, QueryMatch, Tree};
30
31use crate::ast::types::{ClassInfo, FunctionInfo, ImportInfo, ModuleInfo};
32use crate::error::{Result, BrrrError};
33use crate::lang::{Language, LanguageRegistry};
34use crate::util::format_query_error;
35
36type QueryCacheKey = (&'static str, &'static str);
39
40static QUERY_CACHE: Lazy<RwLock<HashMap<QueryCacheKey, Arc<Query>>>> =
45 Lazy::new(|| RwLock::new(HashMap::new()));
46
47fn get_cached_query(
66 ts_lang: &TSLanguage,
67 lang_name: &'static str,
68 query_kind: &'static str,
69 query_str: &str,
70) -> Result<Arc<Query>> {
71 let key = (lang_name, query_kind);
72
73 {
75 let cache = QUERY_CACHE.read();
76 if let Some(query) = cache.get(&key) {
77 return Ok(Arc::clone(query));
78 }
79 }
80
81 let query = Query::new(ts_lang, query_str).map_err(|e| {
83 BrrrError::TreeSitter(format_query_error(lang_name, query_kind, query_str, &e))
84 })?;
85
86 let query_arc = Arc::new(query);
87
88 let mut cache = QUERY_CACHE.write();
89 cache.entry(key).or_insert_with(|| Arc::clone(&query_arc));
92
93 Ok(query_arc)
94}
95
96#[allow(dead_code)]
101pub fn clear_query_cache() {
102 let mut cache = QUERY_CACHE.write();
103 cache.clear();
104}
105
106#[allow(dead_code)]
110pub fn query_cache_stats() -> usize {
111 QUERY_CACHE.read().len()
112}
113
114thread_local! {
131 static PARSER_CACHE: RefCell<HashMap<&'static str, Parser>> = RefCell::new(HashMap::new());
137}
138
139const MAX_CACHED_PARSERS: usize = 16;
145
146struct CachedParser {
160 parser: Option<Parser>,
162 lang_name: &'static str,
164}
165
166impl CachedParser {
167 fn take(lang: &dyn Language) -> Result<Self> {
182 let lang_name = lang.name();
183
184 let cached = PARSER_CACHE.with(|cache| cache.borrow_mut().remove(lang_name));
186
187 let parser = match cached {
188 Some(mut p) => {
189 p.reset();
191 p
192 }
193 None => {
194 lang.parser()?
196 }
197 };
198
199 Ok(Self {
200 parser: Some(parser),
201 lang_name,
202 })
203 }
204
205 fn get_mut(&mut self) -> &mut Parser {
212 self.parser.as_mut().expect("Parser already consumed")
213 }
214}
215
216impl Drop for CachedParser {
217 fn drop(&mut self) {
218 if let Some(parser) = self.parser.take() {
219 PARSER_CACHE.with(|cache| {
220 let mut cache = cache.borrow_mut();
221 if cache.len() < MAX_CACHED_PARSERS {
223 cache.insert(self.lang_name, parser);
224 }
225 });
227 }
228 }
229}
230
231#[allow(dead_code)]
236pub fn clear_parser_cache() {
237 PARSER_CACHE.with(|cache| {
238 cache.borrow_mut().clear();
239 });
240}
241
242#[allow(dead_code)]
246pub fn parser_cache_stats() -> usize {
247 PARSER_CACHE.with(|cache| cache.borrow().len())
248}
249
250struct PositionSet {
264 positions: BTreeSet<usize>,
266 tolerance: usize,
268}
269
270impl PositionSet {
271 fn with_tolerance(tolerance: usize) -> Self {
276 Self {
277 positions: BTreeSet::new(),
278 tolerance,
279 }
280 }
281
282 fn is_duplicate(&self, pos: usize) -> bool {
294 let lower = pos.saturating_sub(self.tolerance);
296 let upper = pos.saturating_add(self.tolerance);
297
298 self.positions
302 .range((Bound::Included(lower), Bound::Included(upper)))
303 .next()
304 .is_some()
305 }
306
307 fn insert(&mut self, pos: usize) {
312 self.positions.insert(pos);
313 }
314}
315
316const FUNCTION_NODE_KINDS: &[&str] = &[
323 "function_definition",
325 "decorated_definition",
326 "function_declaration",
328 "method_definition",
329 "arrow_function",
330 "function_expression",
331 "generator_function_declaration",
332 "function_signature",
333 "ambient_declaration",
334 "function_declaration",
336 "method_declaration",
337 "function_item",
339 "function_signature_item",
340 "macro_definition",
341 "function_definition",
343 "declaration",
344 "template_declaration",
345 "preproc_def",
346 "preproc_function_def",
347 "type_definition",
348 "method_declaration",
350 "constructor_declaration",
351];
352
353const CLASS_NODE_KINDS: &[&str] = &[
360 "class_definition",
362 "decorated_definition",
363 "class_declaration",
365 "abstract_class_declaration",
366 "class", "interface_declaration",
368 "enum_declaration",
369 "type_alias_declaration",
370 "module",
371 "type_declaration",
373 "struct_item",
375 "union_item",
376 "impl_item",
377 "trait_item",
378 "enum_item",
379 "const_item",
380 "static_item",
381 "type_item",
382 "mod_item",
383 "foreign_mod_item",
384 "extern_crate_declaration",
385 "struct_specifier",
387 "enum_specifier",
388 "union_specifier",
389 "class_specifier",
390 "namespace_definition",
391 "type_definition",
392 "preproc_ifdef",
393 "preproc_if",
394 "class_declaration",
396 "interface_declaration",
397 "enum_declaration",
398 "record_declaration",
399 "annotation_type_declaration",
400];
401
402fn get_function_node_from_match<'tree>(
414 match_: &QueryMatch<'_, 'tree>,
415 query: &Query,
416) -> Option<Node<'tree>> {
417 if let Some(idx) = query.capture_index_for_name("function") {
419 if let Some(capture) = match_.captures.iter().find(|c| c.index == idx) {
420 return Some(capture.node);
421 }
422 }
423
424 for capture in match_.captures.iter() {
427 if FUNCTION_NODE_KINDS.contains(&capture.node.kind()) {
428 return Some(capture.node);
429 }
430 }
431
432 match_.captures.first().map(|c| c.node)
435}
436
437fn get_class_node_from_match<'tree>(
449 match_: &QueryMatch<'_, 'tree>,
450 query: &Query,
451) -> Option<Node<'tree>> {
452 if let Some(idx) = query.capture_index_for_name("class") {
454 if let Some(capture) = match_.captures.iter().find(|c| c.index == idx) {
455 return Some(capture.node);
456 }
457 }
458
459 for capture in match_.captures.iter() {
462 if CLASS_NODE_KINDS.contains(&capture.node.kind()) {
463 return Some(capture.node);
464 }
465 }
466
467 match_.captures.first().map(|c| c.node)
469}
470
471pub struct AstExtractor;
476
477impl AstExtractor {
478 pub fn extract_file(path: &Path) -> Result<ModuleInfo> {
497 let registry = LanguageRegistry::global();
498 let lang = registry.detect_language(path).ok_or_else(|| {
499 BrrrError::UnsupportedLanguage(
500 path.extension()
501 .and_then(|e| e.to_str())
502 .unwrap_or("unknown")
503 .to_string(),
504 )
505 })?;
506
507 let source = std::fs::read(path)
508 .map_err(|e| BrrrError::io_with_path(e, path))?;
509
510 if lang.should_skip_file(path, &source) {
514 return Err(BrrrError::UnsupportedLanguage(format!(
515 "File content incompatible with {} parser: {}",
516 lang.name(),
517 path.display()
518 )));
519 }
520
521 let mut cached_parser = CachedParser::take(lang)?;
525 let tree = cached_parser
526 .get_mut()
527 .parse(&source, None)
528 .ok_or_else(|| BrrrError::Parse {
529 file: path.display().to_string(),
530 message: "Failed to parse file".to_string(),
531 })?;
532
533 Self::extract_module(&tree, &source, lang, path)
534 }
535
536 fn extract_module(
541 tree: &Tree,
542 source: &[u8],
543 lang: &dyn Language,
544 path: &Path,
545 ) -> Result<ModuleInfo> {
546 let functions = Self::extract_functions(tree, source, lang)?;
547 let classes = Self::extract_classes(tree, source, lang)?;
548 let imports = lang.extract_imports(tree, source);
549 let docstring = lang.extract_module_docstring(tree, source);
550
551 Ok(ModuleInfo {
552 path: path.display().to_string(),
553 language: lang.name().to_string(),
554 docstring,
555 functions,
556 classes,
557 imports,
558 call_graph: None, })
560 }
561
562 fn extract_functions(
576 tree: &Tree,
577 source: &[u8],
578 lang: &dyn Language,
579 ) -> Result<Vec<FunctionInfo>> {
580 let query_str = lang.function_query();
581 let ts_lang = tree.language();
582
583 let query = get_cached_query(&ts_lang, lang.name(), "function", query_str)?;
586
587 let mut cursor = QueryCursor::new();
588 let mut matches = cursor.matches(&query, tree.root_node(), source);
589
590 let mut functions = Vec::new();
591 let mut seen_positions = PositionSet::with_tolerance(2);
594
595 use streaming_iterator::StreamingIterator;
597 while let Some(match_) = matches.next() {
598 let node = get_function_node_from_match(match_, &query);
602
603 if let Some(node) = node {
604 let start = node.start_byte();
622
623 if seen_positions.is_duplicate(start) {
625 continue;
626 }
627 seen_positions.insert(start);
628
629 if let Some(func_info) = lang.extract_function(node, source) {
630 functions.push(func_info);
631 }
632 }
633 }
634
635 functions.sort_by_key(|f| f.line_number);
637 Ok(functions)
638 }
639
640 fn extract_classes(tree: &Tree, source: &[u8], lang: &dyn Language) -> Result<Vec<ClassInfo>> {
654 let query_str = lang.class_query();
655 let ts_lang = tree.language();
656
657 let query = get_cached_query(&ts_lang, lang.name(), "class", query_str)?;
660
661 let mut cursor = QueryCursor::new();
662 let mut matches = cursor.matches(&query, tree.root_node(), source);
663
664 let mut classes = Vec::new();
665 let mut seen_positions = PositionSet::with_tolerance(2);
668
669 use streaming_iterator::StreamingIterator;
671 while let Some(match_) = matches.next() {
672 let node = get_class_node_from_match(match_, &query);
677
678 if let Some(node) = node {
679 let start = node.start_byte();
687
688 if seen_positions.is_duplicate(start) {
690 continue;
691 }
692 seen_positions.insert(start);
693
694 if let Some(class_info) = lang.extract_class(node, source) {
695 classes.push(class_info);
701 }
702 }
703 }
704
705 classes.sort_by_key(|c| c.line_number);
707 Ok(classes)
708 }
709
710 #[allow(dead_code)]
722 pub fn extract_from_source(source: &str, language: &str) -> Result<ModuleInfo> {
723 let registry = LanguageRegistry::global();
724 let lang = registry
725 .get_by_name(language)
726 .ok_or_else(|| BrrrError::UnsupportedLanguage(language.to_string()))?;
727
728 let source_bytes = source.as_bytes();
729
730 let mut cached_parser = CachedParser::take(lang)?;
733 let tree = cached_parser
734 .get_mut()
735 .parse(source_bytes, None)
736 .ok_or_else(|| BrrrError::Parse {
737 file: "<string>".to_string(),
738 message: "Failed to parse source".to_string(),
739 })?;
740
741 let functions = Self::extract_functions(&tree, source_bytes, lang)?;
742 let classes = Self::extract_classes(&tree, source_bytes, lang)?;
743 let imports = lang.extract_imports(&tree, source_bytes);
744 let docstring = lang.extract_module_docstring(&tree, source_bytes);
745
746 Ok(ModuleInfo {
747 path: "<string>".to_string(),
748 language: lang.name().to_string(),
749 docstring,
750 functions,
751 classes,
752 imports,
753 call_graph: None, })
755 }
756
757 #[allow(dead_code)]
766 pub fn find_function(path: &Path, function_name: &str) -> Result<FunctionInfo> {
767 let module_info = Self::extract_file(path)?;
768
769 if let Some(func) = module_info
771 .functions
772 .iter()
773 .find(|f| f.name == function_name)
774 {
775 return Ok(func.clone());
776 }
777
778 for class in &module_info.classes {
780 if let Some(method) = class.methods.iter().find(|m| m.name == function_name) {
781 return Ok(method.clone());
782 }
783 }
784
785 Err(BrrrError::FunctionNotFound(function_name.to_string()))
786 }
787
788 #[allow(dead_code)]
797 pub fn find_class(path: &Path, class_name: &str) -> Result<ClassInfo> {
798 let module_info = Self::extract_file(path)?;
799
800 module_info
801 .classes
802 .into_iter()
803 .find(|c| c.name == class_name)
804 .ok_or_else(|| BrrrError::ClassNotFound(class_name.to_string()))
805 }
806}
807
808pub fn extract_imports(path: &Path) -> Result<Vec<ImportInfo>> {
812 let registry = LanguageRegistry::global();
813 let lang = registry.detect_language(path).ok_or_else(|| {
814 BrrrError::UnsupportedLanguage(
815 path.extension()
816 .and_then(|e| e.to_str())
817 .unwrap_or("unknown")
818 .to_string(),
819 )
820 })?;
821
822 let source = std::fs::read(path)
823 .map_err(|e| BrrrError::io_with_path(e, path))?;
824
825 let mut cached_parser = CachedParser::take(lang)?;
827 let tree = cached_parser
828 .get_mut()
829 .parse(&source, None)
830 .ok_or_else(|| BrrrError::Parse {
831 file: path.display().to_string(),
832 message: "Failed to parse file".to_string(),
833 })?;
834
835 Ok(lang.extract_imports(&tree, &source))
836}
837
838#[cfg(test)]
839mod tests {
840 use super::*;
841 use std::io::Write;
842 use tempfile::NamedTempFile;
843
844 fn create_temp_file(content: &str, extension: &str) -> NamedTempFile {
845 let mut file = tempfile::Builder::new()
846 .suffix(extension)
847 .tempfile()
848 .unwrap();
849 file.write_all(content.as_bytes()).unwrap();
850 file
851 }
852
853 #[test]
854 fn test_extract_python_functions() {
855 let source = r#"
856def hello(name: str) -> str:
857 """Say hello to someone."""
858 return f"Hello, {name}!"
859
860async def fetch_data(url: str) -> bytes:
861 """Fetch data from URL."""
862 pass
863
864class MyClass:
865 def method(self, x: int) -> int:
866 return x * 2
867"#;
868 let file = create_temp_file(source, ".py");
869 let result = AstExtractor::extract_file(file.path()).unwrap();
870
871 assert_eq!(result.language, "python");
872 assert!(result.functions.len() >= 2);
873
874 let hello = result.functions.iter().find(|f| f.name == "hello");
876 assert!(hello.is_some());
877 let hello = hello.unwrap();
878 assert_eq!(hello.return_type, Some("str".to_string()));
879 assert!(hello
880 .docstring
881 .as_ref()
882 .map_or(false, |d| d.contains("Say hello")));
883 assert!(!hello.is_async);
884
885 let fetch = result.functions.iter().find(|f| f.name == "fetch_data");
887 assert!(fetch.is_some());
888 assert!(fetch.unwrap().is_async);
889
890 assert_eq!(result.classes.len(), 1);
892 assert_eq!(result.classes[0].name, "MyClass");
893 assert!(!result.classes[0].methods.is_empty());
894 }
895
896 #[test]
897 fn test_extract_python_classes() {
898 let source = r#"
899class Animal:
900 """Base class for animals."""
901
902 def __init__(self, name: str):
903 self.name = name
904
905 def speak(self) -> str:
906 pass
907
908class Dog(Animal):
909 """A dog."""
910
911 def speak(self) -> str:
912 return "Woof!"
913"#;
914 let file = create_temp_file(source, ".py");
915 let result = AstExtractor::extract_file(file.path()).unwrap();
916
917 assert_eq!(result.classes.len(), 2);
918
919 let animal = result.classes.iter().find(|c| c.name == "Animal").unwrap();
920 assert!(animal
921 .docstring
922 .as_ref()
923 .map_or(false, |d| d.contains("Base class")));
924 assert!(animal.methods.len() >= 2);
925
926 let dog = result.classes.iter().find(|c| c.name == "Dog").unwrap();
927 assert!(dog.bases.contains(&"Animal".to_string()));
928 }
929
930 #[test]
931 fn test_extract_python_imports() {
932 let source = r#"
933import os
934import sys as system
935from pathlib import Path
936from collections import defaultdict as dd
937from . import local
938"#;
939 let file = create_temp_file(source, ".py");
940 let imports = extract_imports(file.path()).unwrap();
941
942 assert!(imports.len() >= 4);
943
944 let os_import = imports.iter().find(|i| i.module == "os");
946 assert!(os_import.is_some());
947 assert!(!os_import.unwrap().is_from);
948
949 let sys_import = imports.iter().find(|i| i.module == "sys");
951 assert!(sys_import.is_some());
952 assert!(sys_import.unwrap().aliases.contains_key("sys"));
953
954 let pathlib_import = imports.iter().find(|i| i.module == "pathlib");
956 assert!(pathlib_import.is_some());
957 assert!(pathlib_import.unwrap().is_from);
958 assert!(pathlib_import.unwrap().names.contains(&"Path".to_string()));
959 }
960
961 #[test]
962 fn test_extract_typescript_functions() {
963 let source = r#"
964function greet(name: string): string {
965 return "Hello, " + name;
966}
967
968async function fetchData(url: string): Promise<Response> {
969 return fetch(url);
970}
971
972const add = (a: number, b: number): number => a + b;
973"#;
974 let file = create_temp_file(source, ".ts");
975 let result = AstExtractor::extract_file(file.path()).unwrap();
976
977 assert_eq!(result.language, "typescript");
978 assert!(result.functions.len() >= 2);
979
980 let greet = result.functions.iter().find(|f| f.name == "greet");
981 assert!(greet.is_some());
982 assert_eq!(greet.unwrap().return_type, Some("string".to_string()));
983
984 let fetch_data = result.functions.iter().find(|f| f.name == "fetchData");
985 assert!(fetch_data.is_some());
986 assert!(fetch_data.unwrap().is_async);
987 }
988
989 #[test]
990 fn test_extract_typescript_classes() {
991 let source = r#"
992class Animal {
993 constructor(public name: string) {}
994
995 speak(): void {
996 console.log(this.name);
997 }
998}
999
1000class Dog extends Animal {
1001 bark(): void {
1002 console.log("Woof!");
1003 }
1004}
1005"#;
1006 let file = create_temp_file(source, ".ts");
1007 let result = AstExtractor::extract_file(file.path()).unwrap();
1008
1009 assert_eq!(result.classes.len(), 2);
1010
1011 let animal = result.classes.iter().find(|c| c.name == "Animal");
1012 assert!(animal.is_some());
1013
1014 let dog = result.classes.iter().find(|c| c.name == "Dog");
1015 assert!(dog.is_some());
1016 assert!(dog.unwrap().bases.contains(&"Animal".to_string()));
1017 }
1018
1019 #[test]
1020 fn test_extract_from_source() {
1021 let source = r#"
1022def add(a: int, b: int) -> int:
1023 return a + b
1024"#;
1025 let result = AstExtractor::extract_from_source(source, "python").unwrap();
1026
1027 assert_eq!(result.language, "python");
1028 assert_eq!(result.functions.len(), 1);
1029 assert_eq!(result.functions[0].name, "add");
1030 }
1031
1032 #[test]
1033 fn test_find_function() {
1034 let source = r#"
1035def target_function(x: int) -> int:
1036 return x * 2
1037
1038def other_function():
1039 pass
1040"#;
1041 let file = create_temp_file(source, ".py");
1042
1043 let func = AstExtractor::find_function(file.path(), "target_function");
1044 assert!(func.is_ok());
1045 assert_eq!(func.unwrap().name, "target_function");
1046
1047 let not_found = AstExtractor::find_function(file.path(), "nonexistent");
1048 assert!(not_found.is_err());
1049 }
1050
1051 #[test]
1052 fn test_find_class() {
1053 let source = r#"
1054class TargetClass:
1055 pass
1056
1057class OtherClass:
1058 pass
1059"#;
1060 let file = create_temp_file(source, ".py");
1061
1062 let class = AstExtractor::find_class(file.path(), "TargetClass");
1063 assert!(class.is_ok());
1064 assert_eq!(class.unwrap().name, "TargetClass");
1065
1066 let not_found = AstExtractor::find_class(file.path(), "NonexistentClass");
1067 assert!(not_found.is_err());
1068 assert!(matches!(not_found, Err(BrrrError::ClassNotFound(_))));
1069 }
1070
1071 #[test]
1072 fn test_unsupported_language() {
1073 let file = create_temp_file("some content", ".xyz");
1074 let result = AstExtractor::extract_file(file.path());
1075
1076 assert!(matches!(result, Err(BrrrError::UnsupportedLanguage(_))));
1077 }
1078
1079 #[test]
1080 fn test_decorated_python_function() {
1081 let source = r#"
1082@staticmethod
1083@cache
1084def cached_function(x: int) -> int:
1085 return x * 2
1086"#;
1087 let file = create_temp_file(source, ".py");
1088 let result = AstExtractor::extract_file(file.path()).unwrap();
1089
1090 assert_eq!(result.functions.len(), 1);
1091 let func = &result.functions[0];
1092 assert_eq!(func.name, "cached_function");
1093 assert!(!func.decorators.is_empty());
1094 }
1095
1096 #[test]
1097 fn test_decorated_python_class() {
1098 let source = r#"
1099@dataclass
1100class Point:
1101 x: float
1102 y: float
1103"#;
1104 let file = create_temp_file(source, ".py");
1105 let result = AstExtractor::extract_file(file.path()).unwrap();
1106
1107 assert_eq!(result.classes.len(), 1);
1108 let class = &result.classes[0];
1109 assert_eq!(class.name, "Point");
1110 assert!(!class.decorators.is_empty());
1111 }
1112
1113 #[test]
1114 fn test_multiple_decorated_functions_no_duplicates() {
1115 let source = r#"
1119@decorator1
1120def func1():
1121 pass
1122
1123@decorator2
1124@decorator3
1125def func2():
1126 pass
1127
1128@contextmanager
1129def func3():
1130 yield
1131
1132def plain_func():
1133 pass
1134"#;
1135 let file = create_temp_file(source, ".py");
1136 let result = AstExtractor::extract_file(file.path()).unwrap();
1137
1138 assert_eq!(
1140 result.functions.len(),
1141 4,
1142 "Expected 4 functions, got {}: {:?}",
1143 result.functions.len(),
1144 result.functions.iter().map(|f| &f.name).collect::<Vec<_>>()
1145 );
1146
1147 let names: Vec<&str> = result.functions.iter().map(|f| f.name.as_str()).collect();
1149 assert!(names.contains(&"func1"));
1150 assert!(names.contains(&"func2"));
1151 assert!(names.contains(&"func3"));
1152 assert!(names.contains(&"plain_func"));
1153 }
1154
1155 #[test]
1156 fn test_nested_decorated_classes_no_duplicates() {
1157 let source = r#"
1159@dataclass
1160class Point:
1161 x: float
1162 y: float
1163
1164@singleton
1165@validate
1166class Config:
1167 value: str
1168
1169class PlainClass:
1170 pass
1171"#;
1172 let file = create_temp_file(source, ".py");
1173 let result = AstExtractor::extract_file(file.path()).unwrap();
1174
1175 assert_eq!(
1177 result.classes.len(),
1178 3,
1179 "Expected 3 classes, got {}: {:?}",
1180 result.classes.len(),
1181 result.classes.iter().map(|c| &c.name).collect::<Vec<_>>()
1182 );
1183
1184 let names: Vec<&str> = result.classes.iter().map(|c| c.name.as_str()).collect();
1185 assert!(names.contains(&"Point"));
1186 assert!(names.contains(&"Config"));
1187 assert!(names.contains(&"PlainClass"));
1188 }
1189
1190 #[test]
1191 fn test_overlap_detection_algorithm() {
1192 fn overlaps(start: usize, end: usize, s: usize, e: usize) -> bool {
1197 start < e && s < end
1198 }
1199
1200 assert!(overlaps(10, 20, 15, 25), "Partial overlap should be detected");
1202
1203 assert!(overlaps(15, 25, 10, 20), "Partial overlap should be detected (reversed)");
1205
1206 assert!(overlaps(10, 30, 15, 20), "Containment should be detected");
1208
1209 assert!(overlaps(15, 20, 10, 30), "Containment should be detected (reversed)");
1211
1212 assert!(!overlaps(10, 20, 20, 30), "Adjacent intervals should not overlap");
1214
1215 assert!(!overlaps(10, 20, 25, 30), "Disjoint intervals should not overlap");
1217
1218 assert!(!overlaps(25, 30, 10, 20), "Disjoint intervals should not overlap (reversed)");
1220
1221 assert!(overlaps(10, 20, 10, 20), "Same interval should overlap");
1223
1224 assert!(overlaps(10, 20, 19, 25), "Should overlap when ranges share interior point");
1226 }
1227
1228 #[test]
1229 fn test_position_set_deduplication() {
1230 let mut set = PositionSet::with_tolerance(2);
1234
1235 assert!(!set.is_duplicate(100), "Empty set should have no duplicates");
1237
1238 set.insert(100);
1240
1241 assert!(set.is_duplicate(100), "Exact position should be duplicate");
1243
1244 assert!(set.is_duplicate(99), "Position 99 should be duplicate (within tolerance of 100)");
1246 assert!(set.is_duplicate(101), "Position 101 should be duplicate (within tolerance of 100)");
1247 assert!(set.is_duplicate(98), "Position 98 should be duplicate (within tolerance of 100)");
1248 assert!(set.is_duplicate(102), "Position 102 should be duplicate (within tolerance of 100)");
1249
1250 assert!(!set.is_duplicate(97), "Position 97 should NOT be duplicate (outside tolerance)");
1252 assert!(!set.is_duplicate(103), "Position 103 should NOT be duplicate (outside tolerance)");
1253 assert!(!set.is_duplicate(50), "Position 50 should NOT be duplicate");
1254 assert!(!set.is_duplicate(200), "Position 200 should NOT be duplicate");
1255
1256 set.insert(500);
1258 assert!(set.is_duplicate(500), "Position 500 should now be duplicate");
1259 assert!(set.is_duplicate(498), "Position 498 should be duplicate (within tolerance of 500)");
1260 assert!(!set.is_duplicate(495), "Position 495 should NOT be duplicate");
1261
1262 assert!(set.is_duplicate(100), "Position 100 should still be duplicate");
1264
1265 let mut set2 = PositionSet::with_tolerance(2);
1267 set2.insert(0);
1268 assert!(set2.is_duplicate(0), "Position 0 should be duplicate");
1269 assert!(set2.is_duplicate(1), "Position 1 should be duplicate (within tolerance of 0)");
1270 assert!(set2.is_duplicate(2), "Position 2 should be duplicate (within tolerance of 0)");
1271 assert!(!set2.is_duplicate(3), "Position 3 should NOT be duplicate");
1272
1273 set2.insert(1);
1275 assert!(set2.is_duplicate(0), "Position 0 should be duplicate");
1277 assert!(set2.is_duplicate(3), "Position 3 should be duplicate (within tolerance of 1)");
1278 }
1279
1280 #[test]
1281 fn test_position_set_performance_characteristics() {
1282 let mut set = PositionSet::with_tolerance(2);
1285
1286 for i in 0..1000 {
1289 let pos = i * 100;
1290 assert!(!set.is_duplicate(pos), "Position {} should not be duplicate before insert", pos);
1291 set.insert(pos);
1292 assert!(set.is_duplicate(pos), "Position {} should be duplicate after insert", pos);
1293 }
1294
1295 for i in 0..1000 {
1297 let pos = i * 100;
1298 assert!(set.is_duplicate(pos), "Position {} should be duplicate", pos);
1299 assert!(set.is_duplicate(pos + 1), "Position {} should be duplicate (tolerance)", pos + 1);
1300 if i < 999 {
1302 assert!(!set.is_duplicate(pos + 50), "Position {} should NOT be duplicate (between functions)", pos + 50);
1303 }
1304 }
1305 }
1306
1307 #[test]
1308 fn test_extract_java_methods_with_fallback() {
1309 let source = r#"
1312public class Calculator {
1313 public int add(int a, int b) {
1314 return a + b;
1315 }
1316
1317 public Calculator() {
1318 // constructor
1319 }
1320
1321 private void helper() {
1322 // helper method
1323 }
1324}
1325"#;
1326 let file = create_temp_file(source, ".java");
1327 let result = AstExtractor::extract_file(file.path()).unwrap();
1328
1329 assert_eq!(result.classes.len(), 1, "Should extract Calculator class");
1331 let calc = &result.classes[0];
1332 assert_eq!(calc.name, "Calculator");
1333
1334 assert!(
1337 calc.methods.len() >= 2,
1338 "Should extract at least 2 methods from Calculator, got {}",
1339 calc.methods.len()
1340 );
1341
1342 let method_names: Vec<&str> = calc.methods.iter().map(|m| m.name.as_str()).collect();
1344 assert!(
1345 method_names.contains(&"add"),
1346 "Should find 'add' method, found: {:?}",
1347 method_names
1348 );
1349 }
1350
1351 #[test]
1352 fn test_extract_go_structs_with_fallback() {
1353 let source = r#"
1356package main
1357
1358type Person struct {
1359 Name string
1360 Age int
1361}
1362
1363type Speaker interface {
1364 Speak() string
1365}
1366"#;
1367 let file = create_temp_file(source, ".go");
1368 let result = AstExtractor::extract_file(file.path()).unwrap();
1369
1370 assert!(
1372 result.classes.len() >= 2,
1373 "Should extract Person struct and Speaker interface, got {}",
1374 result.classes.len()
1375 );
1376
1377 let names: Vec<&str> = result.classes.iter().map(|c| c.name.as_str()).collect();
1378 assert!(
1379 names.contains(&"Person"),
1380 "Should find Person struct, found: {:?}",
1381 names
1382 );
1383 assert!(
1384 names.contains(&"Speaker"),
1385 "Should find Speaker interface, found: {:?}",
1386 names
1387 );
1388 }
1389
1390 #[test]
1391 fn test_fallback_node_selection_helper_functions() {
1392 assert!(
1397 FUNCTION_NODE_KINDS.contains(&"function_definition"),
1398 "Should contain Python function_definition"
1399 );
1400 assert!(
1401 FUNCTION_NODE_KINDS.contains(&"method_declaration"),
1402 "Should contain Java method_declaration"
1403 );
1404 assert!(
1405 FUNCTION_NODE_KINDS.contains(&"function_item"),
1406 "Should contain Rust function_item"
1407 );
1408 assert!(
1409 FUNCTION_NODE_KINDS.contains(&"arrow_function"),
1410 "Should contain TypeScript arrow_function"
1411 );
1412
1413 assert!(
1415 CLASS_NODE_KINDS.contains(&"class_definition"),
1416 "Should contain Python class_definition"
1417 );
1418 assert!(
1419 CLASS_NODE_KINDS.contains(&"type_declaration"),
1420 "Should contain Go type_declaration"
1421 );
1422 assert!(
1423 CLASS_NODE_KINDS.contains(&"struct_specifier"),
1424 "Should contain C struct_specifier"
1425 );
1426 assert!(
1427 CLASS_NODE_KINDS.contains(&"class_declaration"),
1428 "Should contain Java/TS class_declaration"
1429 );
1430 }
1431
1432 #[test]
1433 fn test_query_caching() {
1434 let _baseline_size = query_cache_stats();
1440
1441 let source = r#"
1444def hello():
1445 pass
1446
1447class World:
1448 pass
1449"#;
1450 let file = create_temp_file(source, ".py");
1451 let result = AstExtractor::extract_file(file.path()).unwrap();
1452
1453 assert!(!result.functions.is_empty(), "Should extract at least one function");
1455 assert!(!result.classes.is_empty(), "Should extract at least one class");
1456
1457 let cache_size_after_python = query_cache_stats();
1459 assert!(
1460 cache_size_after_python >= 2,
1461 "Cache should have at least 2 entries (function + class), got {}",
1462 cache_size_after_python
1463 );
1464
1465 let source2 = r#"
1467def another():
1468 return 42
1469"#;
1470 let file2 = create_temp_file(source2, ".py");
1471 let result2 = AstExtractor::extract_file(file2.path()).unwrap();
1472 assert!(!result2.functions.is_empty());
1473
1474 assert_eq!(
1476 query_cache_stats(),
1477 cache_size_after_python,
1478 "Cache size should remain the same when reusing same language"
1479 );
1480 }
1481
1482 #[test]
1483 fn test_query_cache_reuse() {
1484 let ts_source1 = "function greet(): string { return 'hello'; }";
1489 let ts_file1 = create_temp_file(ts_source1, ".ts");
1490 let _ = AstExtractor::extract_file(ts_file1.path()).unwrap();
1491
1492 let size_after_first = query_cache_stats();
1493
1494 let ts_source2 = "const add = (a: number, b: number) => a + b;";
1496 let ts_file2 = create_temp_file(ts_source2, ".ts");
1497 let ts_result = AstExtractor::extract_file(ts_file2.path()).unwrap();
1498 assert!(!ts_result.functions.is_empty(), "Should extract TypeScript function");
1499
1500 let size_after_second = query_cache_stats();
1501
1502 assert_eq!(
1504 size_after_first, size_after_second,
1505 "Cache size should remain the same when reprocessing same language"
1506 );
1507 }
1508
1509 #[test]
1510 fn test_query_cache_thread_safety() {
1511 use std::thread;
1514
1515 let handles: Vec<_> = (0..4)
1516 .map(|i| {
1517 thread::spawn(move || {
1518 let source = format!(
1519 r#"
1520def func_{}():
1521 pass
1522"#,
1523 i
1524 );
1525 let file = create_temp_file(&source, ".py");
1526 let result = AstExtractor::extract_file(file.path());
1527 assert!(result.is_ok(), "Extraction should succeed in thread {}", i);
1528 })
1529 })
1530 .collect();
1531
1532 for handle in handles {
1533 handle.join().expect("Thread should complete successfully");
1534 }
1535
1536 let cache_size = query_cache_stats();
1538 assert!(
1539 cache_size >= 2,
1540 "Cache should have entries after concurrent access, got {}",
1541 cache_size
1542 );
1543 }
1544
1545 #[test]
1550 fn test_parser_caching_basic() {
1551 let source1 = r#"
1556def hello():
1557 pass
1558"#;
1559 let source2 = r#"
1560def world():
1561 return 42
1562"#;
1563 let file1 = create_temp_file(source1, ".py");
1564 let file2 = create_temp_file(source2, ".py");
1565
1566 let result1 = AstExtractor::extract_file(file1.path());
1568 assert!(result1.is_ok(), "First extraction should succeed");
1569
1570 let result2 = AstExtractor::extract_file(file2.path());
1571 assert!(result2.is_ok(), "Second extraction should succeed (using cached parser)");
1572
1573 assert!(!result1.unwrap().functions.is_empty());
1575 assert!(!result2.unwrap().functions.is_empty());
1576 }
1577
1578 #[test]
1579 fn test_parser_caching_multiple_languages() {
1580 let py_source = "def hello(): pass";
1583 let ts_source = "function hello(): void {}";
1584 let go_source = "package main\nfunc hello() {}";
1585
1586 let py_file = create_temp_file(py_source, ".py");
1587 let ts_file = create_temp_file(ts_source, ".ts");
1588 let go_file = create_temp_file(go_source, ".go");
1589
1590 let py_result = AstExtractor::extract_file(py_file.path());
1592 let ts_result = AstExtractor::extract_file(ts_file.path());
1593 let go_result = AstExtractor::extract_file(go_file.path());
1594
1595 assert!(py_result.is_ok(), "Python extraction should succeed");
1597 assert!(ts_result.is_ok(), "TypeScript extraction should succeed");
1598 assert!(go_result.is_ok(), "Go extraction should succeed");
1599
1600 assert_eq!(py_result.unwrap().language, "python");
1602 assert_eq!(ts_result.unwrap().language, "typescript");
1603 assert_eq!(go_result.unwrap().language, "go");
1604
1605 let cache_size = parser_cache_stats();
1607 assert!(
1608 cache_size >= 3,
1609 "Cache should have at least 3 parsers (one per language), got {}",
1610 cache_size
1611 );
1612 }
1613
1614 #[test]
1615 fn test_parser_caching_extract_from_source() {
1616 let source1 = "def foo(): pass";
1619 let source2 = "def bar(): return 1";
1620
1621 let result1 = AstExtractor::extract_from_source(source1, "python");
1622 let result2 = AstExtractor::extract_from_source(source2, "python");
1623
1624 assert!(result1.is_ok(), "First extract_from_source should succeed");
1625 assert!(result2.is_ok(), "Second extract_from_source should succeed (cached)");
1626
1627 assert_eq!(result1.unwrap().functions[0].name, "foo");
1628 assert_eq!(result2.unwrap().functions[0].name, "bar");
1629 }
1630
1631 #[test]
1632 fn test_parser_cache_clear() {
1633 let source = "def test(): pass";
1637 let file = create_temp_file(source, ".py");
1638 let _ = AstExtractor::extract_file(file.path()).unwrap();
1639
1640 let before_clear = parser_cache_stats();
1642 assert!(before_clear >= 1, "Cache should have at least 1 parser before clear");
1643
1644 clear_parser_cache();
1646
1647 let after_clear = parser_cache_stats();
1649 assert_eq!(after_clear, 0, "Cache should be empty after clear");
1650
1651 let source2 = "def another(): pass";
1653 let file2 = create_temp_file(source2, ".py");
1654 let result = AstExtractor::extract_file(file2.path());
1655 assert!(result.is_ok(), "Extraction should work after cache clear");
1656
1657 let after_extraction = parser_cache_stats();
1659 assert_eq!(after_extraction, 1, "Cache should have 1 parser after extraction");
1660 }
1661
1662 #[test]
1663 fn test_parser_cache_thread_local() {
1664 use std::thread;
1666
1667 clear_parser_cache();
1669
1670 let source = "def main_thread(): pass";
1672 let file = create_temp_file(source, ".py");
1673 let _ = AstExtractor::extract_file(file.path()).unwrap();
1674
1675 let main_thread_cache = parser_cache_stats();
1676 assert!(main_thread_cache >= 1, "Main thread cache should have parser");
1677
1678 let handle = thread::spawn(|| {
1680 let child_cache_before = parser_cache_stats();
1682
1683 let source = "def child_thread(): pass";
1685 let file = create_temp_file(source, ".py");
1686 let _ = AstExtractor::extract_file(file.path()).unwrap();
1687
1688 let child_cache_after = parser_cache_stats();
1689
1690 (child_cache_before, child_cache_after)
1691 });
1692
1693 let (child_before, child_after) = handle.join().unwrap();
1694
1695 assert_eq!(
1697 child_before, 0,
1698 "Child thread should start with empty cache"
1699 );
1700 assert!(
1701 child_after >= 1,
1702 "Child thread should have parser after extraction"
1703 );
1704
1705 let main_thread_cache_after = parser_cache_stats();
1707 assert_eq!(
1708 main_thread_cache, main_thread_cache_after,
1709 "Main thread cache should be unchanged by child thread"
1710 );
1711 }
1712}