1pub mod types;
2
3pub use types::{CodeUnit, Language, UnitType};
4
5use std::path::Path;
6use tree_sitter::{Language as TsLanguage, Node, Parser};
7
8pub fn detect_language(path: &Path) -> Option<Language> {
10 if let Some(filename) = path.file_name().and_then(|f| f.to_str()) {
12 let filename_lower = filename.to_lowercase();
13 match filename_lower.as_str() {
14 "dockerfile" => return Some(Language::Dockerfile),
15 "makefile" | "gnumakefile" => return Some(Language::Makefile),
16 _ => {}
17 }
18 }
19
20 match path.extension()?.to_str()?.to_lowercase().as_str() {
22 "py" => Some(Language::Python),
24 "ts" | "tsx" => Some(Language::TypeScript),
25 "js" | "jsx" | "mjs" => Some(Language::JavaScript),
26 "go" => Some(Language::Go),
27 "rs" => Some(Language::Rust),
28 "java" => Some(Language::Java),
29 "c" | "h" => Some(Language::C),
30 "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Some(Language::Cpp),
31 "rb" => Some(Language::Ruby),
32 "cs" => Some(Language::CSharp),
33 "kt" | "kts" => Some(Language::Kotlin),
35 "swift" => Some(Language::Swift),
36 "scala" | "sc" => Some(Language::Scala),
37 "php" => Some(Language::Php),
38 "lua" => Some(Language::Lua),
39 "ex" | "exs" => Some(Language::Elixir),
40 "hs" => Some(Language::Haskell),
41 "ml" | "mli" => Some(Language::Ocaml),
42 "md" | "markdown" => Some(Language::Markdown),
44 "txt" | "text" | "rst" => Some(Language::Text),
45 "adoc" | "asciidoc" => Some(Language::AsciiDoc),
46 "org" => Some(Language::Org),
47 "yaml" | "yml" => Some(Language::Yaml),
49 "toml" => Some(Language::Toml),
50 "json" => Some(Language::Json),
51 "sh" | "bash" | "zsh" => Some(Language::Shell),
53 "ps1" => Some(Language::Powershell),
54 _ => None,
55 }
56}
57
58pub fn is_text_format(lang: Language) -> bool {
60 matches!(
61 lang,
62 Language::Markdown
63 | Language::Text
64 | Language::Yaml
65 | Language::Toml
66 | Language::Json
67 | Language::Dockerfile
68 | Language::Makefile
69 | Language::Shell
70 | Language::Powershell
71 | Language::AsciiDoc
72 | Language::Org
73 )
74}
75
76fn get_tree_sitter_language(lang: Language) -> TsLanguage {
78 match lang {
79 Language::Python => tree_sitter_python::LANGUAGE.into(),
81 Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
82 Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
83 Language::Go => tree_sitter_go::LANGUAGE.into(),
84 Language::Rust => tree_sitter_rust::LANGUAGE.into(),
85 Language::Java => tree_sitter_java::LANGUAGE.into(),
86 Language::C => tree_sitter_c::LANGUAGE.into(),
87 Language::Cpp => tree_sitter_cpp::LANGUAGE.into(),
88 Language::Ruby => tree_sitter_ruby::LANGUAGE.into(),
89 Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(),
90 Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(),
92 Language::Swift => tree_sitter_swift::LANGUAGE.into(),
93 Language::Scala => tree_sitter_scala::LANGUAGE.into(),
94 Language::Php => tree_sitter_php::LANGUAGE_PHP.into(),
95 Language::Lua => tree_sitter_lua::LANGUAGE.into(),
96 Language::Elixir => tree_sitter_elixir::LANGUAGE.into(),
97 Language::Haskell => tree_sitter_haskell::LANGUAGE.into(),
98 Language::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(),
99 Language::Markdown
101 | Language::Text
102 | Language::Yaml
103 | Language::Toml
104 | Language::Json
105 | Language::Dockerfile
106 | Language::Makefile
107 | Language::Shell
108 | Language::Powershell
109 | Language::AsciiDoc
110 | Language::Org => unreachable!("Text/config formats don't use tree-sitter"),
111 }
112}
113
114pub fn extract_units(path: &Path, source: &str, lang: Language) -> Vec<CodeUnit> {
116 if is_text_format(lang) {
118 return extract_text_units(path, source, lang);
119 }
120
121 let mut parser = Parser::new();
122 if parser
123 .set_language(&get_tree_sitter_language(lang))
124 .is_err()
125 {
126 return Vec::new();
127 }
128
129 let tree = match parser.parse(source, None) {
130 Some(t) => t,
131 None => return Vec::new(),
132 };
133
134 let lines: Vec<&str> = source.lines().collect();
135 let bytes = source.as_bytes();
136 let file_imports = extract_file_imports(tree.root_node(), bytes, lang);
137
138 let mut units = Vec::new();
139 extract_from_node(
140 tree.root_node(),
141 path,
142 &lines,
143 bytes,
144 lang,
145 &mut units,
146 None,
147 &file_imports,
148 );
149
150 units
151}
152
153fn extract_text_units(path: &Path, source: &str, lang: Language) -> Vec<CodeUnit> {
155 let lines: Vec<&str> = source.lines().collect();
156
157 match lang {
158 Language::Markdown => extract_markdown_units(path, source, &lines),
159 _ => extract_plain_text_units(path, source, &lines, lang),
161 }
162}
163
164fn extract_markdown_units(path: &Path, _source: &str, lines: &[&str]) -> Vec<CodeUnit> {
166 if lines.is_empty() || lines.iter().all(|l| l.trim().is_empty()) {
167 return Vec::new();
168 }
169
170 let title = path
171 .file_stem()
172 .and_then(|s| s.to_str())
173 .unwrap_or("document")
174 .to_string();
175
176 let unit = create_text_unit(
177 path,
178 &title,
179 1,
180 Language::Markdown,
181 UnitType::Document,
182 lines,
183 );
184
185 vec![unit]
186}
187
188fn extract_plain_text_units(
190 path: &Path,
191 _source: &str,
192 lines: &[&str],
193 lang: Language,
194) -> Vec<CodeUnit> {
195 if lines.is_empty() || lines.iter().all(|l| l.trim().is_empty()) {
196 return Vec::new();
197 }
198
199 let title = path
200 .file_stem()
201 .and_then(|s| s.to_str())
202 .unwrap_or("document")
203 .to_string();
204
205 let unit = create_text_unit(path, &title, 1, lang, UnitType::Document, lines);
206
207 vec![unit]
208}
209
210fn create_text_unit(
212 path: &Path,
213 name: &str,
214 line: usize,
215 lang: Language,
216 unit_type: UnitType,
217 content_lines: &[&str],
218) -> CodeUnit {
219 let qualified_name = format!("{}::{}", path.display(), name);
220
221 let signature = content_lines
223 .iter()
224 .find(|l| !l.trim().is_empty())
225 .map(|l| l.trim().to_string())
226 .unwrap_or_default();
227
228 let docstring: Option<String> = {
230 let para: Vec<&str> = content_lines
231 .iter()
232 .take_while(|l| !l.trim().is_empty())
233 .map(|l| l.trim())
234 .filter(|l| !l.is_empty())
235 .take(5) .collect();
237 if para.is_empty() {
238 None
239 } else {
240 Some(para.join(" "))
241 }
242 };
243
244 let preview_lines: Vec<&str> = content_lines.iter().take(20).cloned().collect();
246 let code_preview = preview_lines.join("\n");
247
248 CodeUnit {
249 name: name.to_string(),
250 qualified_name,
251 file: path.to_path_buf(),
252 line,
253 language: lang,
254 unit_type,
255 signature,
256 docstring,
257 parameters: Vec::new(),
258 return_type: None,
259 calls: Vec::new(),
260 called_by: Vec::new(),
261 complexity: 1,
262 has_loops: false,
263 has_branches: false,
264 has_error_handling: false,
265 variables: Vec::new(),
266 imports: Vec::new(),
267 code_preview,
268 }
269}
270
271#[allow(clippy::too_many_arguments)]
273fn extract_from_node(
274 node: Node,
275 path: &Path,
276 lines: &[&str],
277 bytes: &[u8],
278 lang: Language,
279 units: &mut Vec<CodeUnit>,
280 parent_class: Option<&str>,
281 file_imports: &[String],
282) {
283 let kind = node.kind();
284
285 if is_function_node(kind, lang) {
287 if let Some(unit) =
288 extract_function(node, path, lines, bytes, lang, parent_class, file_imports)
289 {
290 units.push(unit);
291 }
292 }
293 else if is_class_node(kind, lang) {
295 if let Some(class_name) = get_node_name(node, bytes, lang) {
296 if let Some(unit) = extract_class(node, path, lines, bytes, lang, file_imports) {
298 units.push(unit);
299 }
300
301 if let Some(body) = find_class_body(node, lang) {
303 for child in body.children(&mut body.walk()) {
304 extract_from_node(
305 child,
306 path,
307 lines,
308 bytes,
309 lang,
310 units,
311 Some(&class_name),
312 file_imports,
313 );
314 }
315 }
316 return; }
318 }
319
320 for child in node.children(&mut node.walk()) {
322 extract_from_node(
323 child,
324 path,
325 lines,
326 bytes,
327 lang,
328 units,
329 parent_class,
330 file_imports,
331 );
332 }
333}
334
335fn is_function_node(kind: &str, lang: Language) -> bool {
336 match lang {
337 Language::Python => kind == "function_definition",
338 Language::Rust => kind == "function_item",
339 Language::TypeScript | Language::JavaScript => {
340 matches!(
341 kind,
342 "function_declaration" | "method_definition" | "arrow_function"
343 )
344 }
345 Language::Go => kind == "function_declaration" || kind == "method_declaration",
346 Language::Java => kind == "method_declaration" || kind == "constructor_declaration",
347 Language::C | Language::Cpp => kind == "function_definition",
348 Language::Ruby => kind == "method" || kind == "singleton_method",
349 Language::CSharp => kind == "method_declaration" || kind == "constructor_declaration",
350 Language::Kotlin => matches!(kind, "function_declaration" | "anonymous_function"),
352 Language::Swift => matches!(kind, "function_declaration" | "init_declaration"),
353 Language::Scala => matches!(kind, "function_definition" | "function_declaration"),
354 Language::Php => matches!(kind, "function_definition" | "method_declaration"),
355 Language::Lua => kind == "function_declaration",
356 Language::Elixir => matches!(kind, "call" | "anonymous_function"), Language::Haskell => kind == "function",
358 Language::Ocaml => matches!(kind, "let_binding" | "value_definition"),
359 _ => false,
361 }
362}
363
364fn is_class_node(kind: &str, lang: Language) -> bool {
365 match lang {
366 Language::Python => kind == "class_definition",
367 Language::Rust => kind == "impl_item" || kind == "struct_item",
368 Language::TypeScript | Language::JavaScript => kind == "class_declaration",
369 Language::Go => kind == "type_declaration",
370 Language::Java => kind == "class_declaration" || kind == "interface_declaration",
371 Language::Cpp => kind == "class_specifier" || kind == "struct_specifier",
372 Language::Ruby => kind == "class" || kind == "module",
373 Language::CSharp => kind == "class_declaration" || kind == "interface_declaration",
374 Language::Kotlin => matches!(kind, "class_declaration" | "object_declaration"),
376 Language::Swift => matches!(
377 kind,
378 "class_declaration" | "struct_declaration" | "protocol_declaration"
379 ),
380 Language::Scala => matches!(
381 kind,
382 "class_definition" | "object_definition" | "trait_definition"
383 ),
384 Language::Php => kind == "class_declaration",
385 Language::Lua => false, Language::Elixir => kind == "call", Language::Haskell => matches!(kind, "type_alias" | "newtype" | "adt"),
388 Language::Ocaml => matches!(kind, "type_definition" | "module_definition"),
389 _ => false,
391 }
392}
393
394fn find_class_body(node: Node, lang: Language) -> Option<Node> {
395 match lang {
396 Language::Python => node.child_by_field_name("body"),
397 Language::Rust => node.child_by_field_name("body"),
398 Language::TypeScript | Language::JavaScript => node.child_by_field_name("body"),
399 Language::Java | Language::CSharp => node.child_by_field_name("body"),
400 Language::Go => node.child_by_field_name("type"),
401 Language::Cpp => {
402 for child in node.children(&mut node.walk()) {
404 if child.kind() == "field_declaration_list" {
405 return Some(child);
406 }
407 }
408 None
409 }
410 Language::Ruby => node.child_by_field_name("body"),
411 Language::Kotlin | Language::Swift | Language::Scala | Language::Php => {
413 node.child_by_field_name("body")
414 }
415 Language::Elixir => node.child_by_field_name("body"),
416 Language::Haskell | Language::Ocaml => node.child_by_field_name("body"),
417 _ => None,
419 }
420}
421
422fn get_node_name(node: Node, bytes: &[u8], lang: Language) -> Option<String> {
423 let name_node = match lang {
424 Language::Python
425 | Language::Rust
426 | Language::Go
427 | Language::Java
428 | Language::Ruby
429 | Language::CSharp => node.child_by_field_name("name"),
430 Language::TypeScript | Language::JavaScript => node
431 .child_by_field_name("name")
432 .or_else(|| node.child_by_field_name("property")),
433 Language::C | Language::Cpp => {
434 node.child_by_field_name("declarator").and_then(|d| {
435 if d.kind() == "function_declarator" {
437 d.child_by_field_name("declarator")
438 } else {
439 Some(d)
440 }
441 })
442 }
443 Language::Kotlin
445 | Language::Swift
446 | Language::Scala
447 | Language::Php
448 | Language::Lua
449 | Language::Haskell => node.child_by_field_name("name"),
450 Language::Elixir => {
451 node.child_by_field_name("target")
453 .or_else(|| node.child_by_field_name("name"))
454 }
455 Language::Ocaml => node
456 .child_by_field_name("name")
457 .or_else(|| node.child_by_field_name("pattern")),
458 _ => None,
460 };
461
462 name_node.and_then(|n| {
463 let text = n.utf8_text(bytes).ok()?;
464 if text.is_empty() {
465 None
466 } else {
467 Some(text.to_string())
468 }
469 })
470}
471
472fn extract_function(
473 node: Node,
474 path: &Path,
475 lines: &[&str],
476 bytes: &[u8],
477 lang: Language,
478 parent_class: Option<&str>,
479 file_imports: &[String],
480) -> Option<CodeUnit> {
481 let name = get_node_name(node, bytes, lang)?;
482 let start_line = node.start_position().row;
483 let end_line = node.end_position().row;
484
485 let unit_type = if parent_class.is_some() {
486 UnitType::Method
487 } else {
488 UnitType::Function
489 };
490
491 let mut unit = CodeUnit::new(
492 name,
493 path.to_path_buf(),
494 start_line + 1,
495 lang,
496 unit_type,
497 parent_class,
498 );
499
500 unit.signature = lines
502 .get(start_line)
503 .map(|s| s.trim().to_string())
504 .unwrap_or_default();
505 unit.docstring = extract_docstring(node, lines, lang);
506 unit.parameters = extract_parameters(node, bytes, lang);
507 unit.return_type = extract_return_type(node, bytes, lang);
508
509 unit.calls = extract_function_calls(node, bytes, lang);
511 let (complexity, has_loops, has_branches, has_error_handling) =
515 extract_control_flow(node, lang);
516 unit.complexity = complexity;
517 unit.has_loops = has_loops;
518 unit.has_branches = has_branches;
519 unit.has_error_handling = has_error_handling;
520
521 unit.variables = extract_variables(node, bytes, lang);
523
524 unit.imports = filter_used_imports(&unit.calls, file_imports);
526
527 let preview_end = (start_line + 20).min(end_line + 1).min(lines.len());
529 unit.code_preview = lines[start_line..preview_end].join("\n");
530
531 Some(unit)
532}
533
534fn extract_class(
535 node: Node,
536 path: &Path,
537 lines: &[&str],
538 bytes: &[u8],
539 lang: Language,
540 file_imports: &[String],
541) -> Option<CodeUnit> {
542 let name = get_node_name(node, bytes, lang)?;
543 let start_line = node.start_position().row;
544 let end_line = node.end_position().row;
545
546 let mut unit = CodeUnit::new(
547 name,
548 path.to_path_buf(),
549 start_line + 1,
550 lang,
551 UnitType::Class,
552 None,
553 );
554
555 unit.signature = lines
557 .get(start_line)
558 .map(|s| s.trim().to_string())
559 .unwrap_or_default();
560 unit.docstring = extract_docstring(node, lines, lang);
561
562 unit.imports = file_imports.to_vec();
564
565 let preview_end = (start_line + 5).min(end_line + 1).min(lines.len());
567 unit.code_preview = lines[start_line..preview_end].join("\n");
568
569 Some(unit)
570}
571
572fn extract_docstring(node: Node, lines: &[&str], lang: Language) -> Option<String> {
573 match lang {
574 Language::Python => {
575 let body = node.child_by_field_name("body")?;
577 let first_child = body.child(0)?;
578 if first_child.kind() == "expression_statement" {
579 let expr = first_child.child(0)?;
580 if expr.kind() == "string" {
581 let start = expr.start_position().row;
582 let end = expr.end_position().row;
583 let doc_lines: Vec<&str> = lines[start..=end.min(lines.len() - 1)].to_vec();
584 let doc = doc_lines.join("\n");
585 return Some(
587 doc.trim_matches(|c| c == '"' || c == '\'')
588 .trim()
589 .to_string(),
590 );
591 }
592 }
593 None
594 }
595 Language::Rust => {
596 let mut doc_lines = Vec::new();
598 let start_row = node.start_position().row;
599 if start_row > 0 {
600 for i in (0..start_row).rev() {
601 let line = lines.get(i)?.trim();
602 if line.starts_with("///") {
603 doc_lines.insert(0, line.trim_start_matches("///").trim());
604 } else if line.starts_with("//!") || line.starts_with("#[") || line.is_empty() {
605 continue;
606 } else {
607 break;
608 }
609 }
610 }
611 if doc_lines.is_empty() {
612 None
613 } else {
614 Some(doc_lines.join(" "))
615 }
616 }
617 Language::JavaScript
618 | Language::TypeScript
619 | Language::Java
620 | Language::CSharp
621 | Language::Kotlin
622 | Language::Swift
623 | Language::Scala
624 | Language::Php => {
625 let start_row = node.start_position().row;
627 if start_row > 0 {
628 let prev_line = lines.get(start_row - 1)?.trim();
629 if prev_line.ends_with("*/") {
630 for i in (0..start_row).rev() {
632 let line = lines.get(i)?.trim();
633 if line.starts_with("/**") || line.starts_with("/*") {
634 let doc: String = lines[i..start_row]
635 .iter()
636 .map(|l| {
637 l.trim()
638 .trim_start_matches("/**")
639 .trim_start_matches("/*")
640 .trim_start_matches('*')
641 .trim_end_matches("*/")
642 .trim()
643 })
644 .filter(|l| !l.is_empty())
645 .collect::<Vec<_>>()
646 .join(" ");
647 return Some(doc);
648 }
649 }
650 }
651 }
652 None
653 }
654 Language::Haskell => {
655 let mut doc_lines = Vec::new();
657 let start_row = node.start_position().row;
658 if start_row > 0 {
659 for i in (0..start_row).rev() {
660 let line = lines.get(i)?.trim();
661 if line.starts_with("-- |") || line.starts_with("-- ^") {
662 doc_lines.insert(
663 0,
664 line.trim_start_matches("-- |")
665 .trim_start_matches("-- ^")
666 .trim(),
667 );
668 } else if line.starts_with("--") && !doc_lines.is_empty() {
669 doc_lines.insert(0, line.trim_start_matches("--").trim());
670 } else if !line.is_empty() {
671 break;
672 }
673 }
674 }
675 if doc_lines.is_empty() {
676 None
677 } else {
678 Some(doc_lines.join(" "))
679 }
680 }
681 Language::Elixir => {
682 let start_row = node.start_position().row;
684 if start_row > 0 {
685 for i in (0..start_row).rev() {
686 let line = lines.get(i)?.trim();
687 if line.starts_with("@doc") || line.starts_with("@moduledoc") {
688 if let Some(start) = line.find('"') {
690 return Some(line[start..].trim_matches('"').to_string());
691 }
692 } else if !line.is_empty() && !line.starts_with("#") && !line.starts_with("@") {
693 break;
694 }
695 }
696 }
697 None
698 }
699 _ => None,
700 }
701}
702
703fn extract_parameters(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
704 let params_node = match lang {
705 Language::Python | Language::Rust | Language::Go | Language::Java | Language::CSharp => {
706 node.child_by_field_name("parameters")
707 }
708 Language::TypeScript | Language::JavaScript => node
709 .child_by_field_name("parameters")
710 .or_else(|| node.child_by_field_name("formal_parameters")),
711 Language::C | Language::Cpp => node
712 .child_by_field_name("declarator")
713 .and_then(|d| d.child_by_field_name("parameters")),
714 Language::Ruby => node.child_by_field_name("parameters"),
715 Language::Kotlin
717 | Language::Swift
718 | Language::Scala
719 | Language::Php
720 | Language::Lua
721 | Language::Elixir
722 | Language::Haskell
723 | Language::Ocaml => node.child_by_field_name("parameters"),
724 _ => None,
726 };
727
728 let Some(params) = params_node else {
729 return Vec::new();
730 };
731
732 let mut result = Vec::new();
733 for child in params.children(&mut params.walk()) {
734 let kind = child.kind();
736 if kind.contains("parameter") || kind == "identifier" {
737 if let Some(name) = child.child_by_field_name("name").or_else(|| {
738 if child.kind() == "identifier" {
739 Some(child)
740 } else {
741 None
742 }
743 }) {
744 if let Ok(text) = name.utf8_text(bytes) {
745 if !text.is_empty() && text != "self" && text != "this" && text != "cls" {
746 result.push(text.to_string());
747 }
748 }
749 }
750 }
751 }
752 result
753}
754
755fn extract_return_type(node: Node, bytes: &[u8], lang: Language) -> Option<String> {
756 let ret_node = match lang {
757 Language::Python => node.child_by_field_name("return_type"),
758 Language::Rust => node.child_by_field_name("return_type"),
759 Language::TypeScript => node.child_by_field_name("return_type"),
760 Language::Go => node.child_by_field_name("result"),
761 Language::Java | Language::CSharp => node.child_by_field_name("type"),
762 Language::Cpp | Language::C => node.child_by_field_name("type"),
763 _ => None,
764 };
765
766 ret_node.and_then(|n| n.utf8_text(bytes).ok().map(|s| s.to_string()))
767}
768
769fn extract_function_calls(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
770 let mut calls = Vec::new();
771 let call_types: &[&str] = match lang {
772 Language::Python => &["call"],
773 Language::Rust => &["call_expression", "macro_invocation"],
774 Language::TypeScript | Language::JavaScript => &["call_expression"],
775 Language::Go => &["call_expression"],
776 Language::Java | Language::CSharp => &["method_invocation", "object_creation_expression"],
777 Language::C | Language::Cpp => &["call_expression"],
778 Language::Ruby => &["call", "method_call"],
779 Language::Kotlin => &["call_expression", "navigation_expression"],
781 Language::Swift => &["call_expression"],
782 Language::Scala => &["call_expression"],
783 Language::Php => &["function_call_expression", "method_call_expression"],
784 Language::Lua => &["function_call"],
785 Language::Elixir => &["call"],
786 Language::Haskell => &["function_application"],
787 Language::Ocaml => &["application"],
788 _ => return calls,
790 };
791
792 fn visit(node: Node, bytes: &[u8], call_types: &[&str], calls: &mut Vec<String>) {
793 if call_types.contains(&node.kind()) {
794 if let Some(name_node) = node
795 .child_by_field_name("function")
796 .or_else(|| node.child_by_field_name("name"))
797 .or_else(|| node.child_by_field_name("method"))
798 .or_else(|| node.child(0))
799 {
800 if let Ok(text) = name_node.utf8_text(bytes) {
801 #[allow(clippy::double_ended_iterator_last)]
803 let name = text.split('.').last().unwrap_or(text);
804 #[allow(clippy::double_ended_iterator_last)]
805 let name = name.split("::").last().unwrap_or(name);
806 let name = name.trim_end_matches('!'); if !name.is_empty()
808 && name
809 .chars()
810 .next()
811 .map(|c| c.is_alphabetic())
812 .unwrap_or(false)
813 {
814 calls.push(name.to_string());
815 }
816 }
817 }
818 }
819 for child in node.children(&mut node.walk()) {
820 visit(child, bytes, call_types, calls);
821 }
822 }
823
824 visit(node, bytes, call_types, &mut calls);
825 calls.sort();
826 calls.dedup();
827 calls
828}
829
830fn extract_control_flow(node: Node, _lang: Language) -> (usize, bool, bool, bool) {
831 let mut complexity = 1;
832 let mut has_loops = false;
833 let mut has_branches = false;
834 let mut has_error_handling = false;
835
836 fn visit(
837 node: Node,
838 complexity: &mut usize,
839 loops: &mut bool,
840 branches: &mut bool,
841 errors: &mut bool,
842 ) {
843 match node.kind() {
844 "if_statement"
846 | "if_expression"
847 | "match_expression"
848 | "match_statement"
849 | "switch_statement"
850 | "case_statement"
851 | "conditional_expression"
852 | "ternary_expression"
853 | "if"
854 | "unless"
855 | "when" => {
856 *complexity += 1;
857 *branches = true;
858 }
859 "for_statement" | "for_expression" | "while_statement" | "while_expression"
861 | "loop_expression" | "for_in_statement" | "foreach_statement" | "do_statement"
862 | "for" | "while" | "until" => {
863 *complexity += 1;
864 *loops = true;
865 }
866 "try_statement" | "try_expression" | "catch_clause" | "rescue" | "except_clause"
868 | "try" => {
869 *errors = true;
870 }
871 "?" | "try_operator" => {
873 *errors = true;
874 }
875 _ => {}
876 }
877 for child in node.children(&mut node.walk()) {
878 visit(child, complexity, loops, branches, errors);
879 }
880 }
881
882 visit(
883 node,
884 &mut complexity,
885 &mut has_loops,
886 &mut has_branches,
887 &mut has_error_handling,
888 );
889 (complexity, has_loops, has_branches, has_error_handling)
890}
891
892fn extract_variables(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
893 let mut vars = Vec::new();
894 let var_types: &[&str] = match lang {
895 Language::Python => &["assignment", "named_expression", "augmented_assignment"],
896 Language::Rust => &["let_declaration"],
897 Language::TypeScript | Language::JavaScript => {
898 &["variable_declarator", "lexical_declaration"]
899 }
900 Language::Go => &["short_var_declaration", "var_declaration"],
901 Language::Java | Language::CSharp => &["variable_declarator", "local_variable_declaration"],
902 Language::C | Language::Cpp => &["declaration", "init_declarator"],
903 Language::Ruby => &["assignment"],
904 Language::Kotlin => &["property_declaration", "variable_declaration"],
906 Language::Swift => &["property_declaration", "constant_declaration"],
907 Language::Scala => &["val_definition", "var_definition"],
908 Language::Php => &["simple_variable"],
909 Language::Lua => &["variable_declaration", "local_variable_declaration"],
910 Language::Elixir => &["match"],
911 Language::Haskell => &["function_binding"],
912 Language::Ocaml => &["let_binding"],
913 _ => return vars,
915 };
916
917 fn visit(node: Node, bytes: &[u8], var_types: &[&str], vars: &mut Vec<String>) {
918 if var_types.contains(&node.kind()) {
919 if let Some(name_node) = node
920 .child_by_field_name("left")
921 .or_else(|| node.child_by_field_name("name"))
922 .or_else(|| node.child_by_field_name("pattern"))
923 .or_else(|| node.child(0))
924 {
925 if let Ok(text) = name_node.utf8_text(bytes) {
926 let name = text.trim();
927 if !name.is_empty()
928 && name.len() < 50
929 && name
930 .chars()
931 .next()
932 .map(|c| c.is_alphabetic() || c == '_')
933 .unwrap_or(false)
934 {
935 vars.push(name.to_string());
936 }
937 }
938 }
939 }
940 for child in node.children(&mut node.walk()) {
941 visit(child, bytes, var_types, vars);
942 }
943 }
944
945 visit(node, bytes, var_types, &mut vars);
946 vars.sort();
947 vars.dedup();
948 vars
949}
950
951fn extract_file_imports(node: Node, bytes: &[u8], lang: Language) -> Vec<String> {
952 let mut imports = Vec::new();
953 let import_types: &[&str] = match lang {
954 Language::Python => &["import_statement", "import_from_statement"],
955 Language::Rust => &["use_declaration"],
956 Language::TypeScript | Language::JavaScript => &["import_statement"],
957 Language::Go => &["import_declaration"],
958 Language::Java => &["import_declaration"],
959 Language::CSharp => &["using_directive"],
960 Language::C | Language::Cpp => &["preproc_include"],
961 Language::Ruby => &["call"], Language::Kotlin => &["import_header"],
964 Language::Swift => &["import_declaration"],
965 Language::Scala => &["import_declaration"],
966 Language::Php => &["namespace_use_declaration"],
967 Language::Lua => &["call"], Language::Elixir => &["call"], Language::Haskell => &["import"],
970 Language::Ocaml => &["open_statement"],
971 _ => return imports,
973 };
974
975 fn visit(
976 node: Node,
977 bytes: &[u8],
978 import_types: &[&str],
979 imports: &mut Vec<String>,
980 lang: Language,
981 ) {
982 if import_types.contains(&node.kind()) {
983 if lang == Language::Ruby {
985 if let Some(name) = node.child_by_field_name("method") {
986 if let Ok(text) = name.utf8_text(bytes) {
987 if text != "require" && text != "require_relative" {
988 return;
989 }
990 }
991 }
992 }
993
994 if let Ok(text) = node.utf8_text(bytes) {
995 let text = text.trim();
997 let module = text
999 .split_whitespace()
1000 .find(|s| {
1001 !s.starts_with("import")
1002 && !s.starts_with("from")
1003 && !s.starts_with("use")
1004 && !s.starts_with("using")
1005 })
1006 .unwrap_or(text)
1007 .trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '.')
1008 .split("::")
1009 .next()
1010 .unwrap_or("")
1011 .split('.')
1012 .next()
1013 .unwrap_or("");
1014
1015 if !module.is_empty() {
1016 imports.push(module.to_string());
1017 }
1018 }
1019 }
1020 for child in node.children(&mut node.walk()) {
1021 visit(child, bytes, import_types, imports, lang);
1022 }
1023 }
1024
1025 visit(node, bytes, import_types, &mut imports, lang);
1026 imports.sort();
1027 imports.dedup();
1028 imports
1029}
1030
1031fn filter_used_imports(calls: &[String], file_imports: &[String]) -> Vec<String> {
1032 file_imports
1035 .iter()
1036 .filter(|import| {
1037 calls.iter().any(|call| {
1038 call.to_lowercase().contains(&import.to_lowercase())
1039 || import.to_lowercase().contains(&call.to_lowercase())
1040 })
1041 })
1042 .cloned()
1043 .collect()
1044}
1045
1046pub fn is_text_format_check(lang: Language) -> bool {
1048 is_text_format(lang)
1049}
1050
1051pub fn build_call_graph(units: &mut [CodeUnit]) {
1053 use std::collections::HashMap;
1054
1055 let mut name_to_indices: HashMap<String, Vec<usize>> = HashMap::new();
1057 for (i, unit) in units.iter().enumerate() {
1058 name_to_indices
1059 .entry(unit.name.clone())
1060 .or_default()
1061 .push(i);
1062 }
1063
1064 let calls_map: Vec<(usize, Vec<String>)> = units
1066 .iter()
1067 .enumerate()
1068 .map(|(i, u)| (i, u.calls.clone()))
1069 .collect();
1070
1071 for (caller_idx, calls) in calls_map {
1073 let caller_name = units[caller_idx].name.clone();
1074 for callee_name in calls {
1075 if let Some(indices) = name_to_indices.get(&callee_name) {
1076 for &callee_idx in indices {
1077 if !units[callee_idx].called_by.contains(&caller_name) {
1078 units[callee_idx].called_by.push(caller_name.clone());
1079 }
1080 }
1081 }
1082 }
1083 }
1084}
1085
1086#[cfg(test)]
1087mod tests {
1088 use super::*;
1089 use std::path::Path;
1090
1091 #[test]
1094 fn test_detect_language_python() {
1095 assert_eq!(
1096 detect_language(Path::new("main.py")),
1097 Some(Language::Python)
1098 );
1099 assert_eq!(
1100 detect_language(Path::new("src/utils/helper.py")),
1101 Some(Language::Python)
1102 );
1103 }
1104
1105 #[test]
1106 fn test_detect_language_rust() {
1107 assert_eq!(detect_language(Path::new("main.rs")), Some(Language::Rust));
1108 assert_eq!(
1109 detect_language(Path::new("src/lib.rs")),
1110 Some(Language::Rust)
1111 );
1112 }
1113
1114 #[test]
1115 fn test_detect_language_typescript() {
1116 assert_eq!(
1117 detect_language(Path::new("app.ts")),
1118 Some(Language::TypeScript)
1119 );
1120 assert_eq!(
1121 detect_language(Path::new("Component.tsx")),
1122 Some(Language::TypeScript)
1123 );
1124 }
1125
1126 #[test]
1127 fn test_detect_language_javascript() {
1128 assert_eq!(
1129 detect_language(Path::new("app.js")),
1130 Some(Language::JavaScript)
1131 );
1132 assert_eq!(
1133 detect_language(Path::new("Component.jsx")),
1134 Some(Language::JavaScript)
1135 );
1136 assert_eq!(
1137 detect_language(Path::new("module.mjs")),
1138 Some(Language::JavaScript)
1139 );
1140 }
1141
1142 #[test]
1143 fn test_detect_language_go() {
1144 assert_eq!(detect_language(Path::new("main.go")), Some(Language::Go));
1145 }
1146
1147 #[test]
1148 fn test_detect_language_java() {
1149 assert_eq!(
1150 detect_language(Path::new("Main.java")),
1151 Some(Language::Java)
1152 );
1153 }
1154
1155 #[test]
1156 fn test_detect_language_c() {
1157 assert_eq!(detect_language(Path::new("main.c")), Some(Language::C));
1158 assert_eq!(detect_language(Path::new("header.h")), Some(Language::C));
1159 }
1160
1161 #[test]
1162 fn test_detect_language_cpp() {
1163 assert_eq!(detect_language(Path::new("main.cpp")), Some(Language::Cpp));
1164 assert_eq!(detect_language(Path::new("main.cc")), Some(Language::Cpp));
1165 assert_eq!(detect_language(Path::new("main.cxx")), Some(Language::Cpp));
1166 assert_eq!(
1167 detect_language(Path::new("header.hpp")),
1168 Some(Language::Cpp)
1169 );
1170 assert_eq!(
1171 detect_language(Path::new("header.hxx")),
1172 Some(Language::Cpp)
1173 );
1174 }
1175
1176 #[test]
1177 fn test_detect_language_ruby() {
1178 assert_eq!(detect_language(Path::new("main.rb")), Some(Language::Ruby));
1179 }
1180
1181 #[test]
1182 fn test_detect_language_csharp() {
1183 assert_eq!(
1184 detect_language(Path::new("Program.cs")),
1185 Some(Language::CSharp)
1186 );
1187 }
1188
1189 #[test]
1190 fn test_detect_language_kotlin() {
1191 assert_eq!(
1192 detect_language(Path::new("Main.kt")),
1193 Some(Language::Kotlin)
1194 );
1195 assert_eq!(
1196 detect_language(Path::new("build.gradle.kts")),
1197 Some(Language::Kotlin)
1198 );
1199 }
1200
1201 #[test]
1202 fn test_detect_language_swift() {
1203 assert_eq!(
1204 detect_language(Path::new("App.swift")),
1205 Some(Language::Swift)
1206 );
1207 }
1208
1209 #[test]
1210 fn test_detect_language_scala() {
1211 assert_eq!(
1212 detect_language(Path::new("Main.scala")),
1213 Some(Language::Scala)
1214 );
1215 assert_eq!(
1216 detect_language(Path::new("script.sc")),
1217 Some(Language::Scala)
1218 );
1219 }
1220
1221 #[test]
1222 fn test_detect_language_php() {
1223 assert_eq!(detect_language(Path::new("index.php")), Some(Language::Php));
1224 }
1225
1226 #[test]
1227 fn test_detect_language_lua() {
1228 assert_eq!(detect_language(Path::new("init.lua")), Some(Language::Lua));
1229 }
1230
1231 #[test]
1232 fn test_detect_language_elixir() {
1233 assert_eq!(detect_language(Path::new("app.ex")), Some(Language::Elixir));
1234 assert_eq!(
1235 detect_language(Path::new("test.exs")),
1236 Some(Language::Elixir)
1237 );
1238 }
1239
1240 #[test]
1241 fn test_detect_language_haskell() {
1242 assert_eq!(
1243 detect_language(Path::new("Main.hs")),
1244 Some(Language::Haskell)
1245 );
1246 }
1247
1248 #[test]
1249 fn test_detect_language_ocaml() {
1250 assert_eq!(detect_language(Path::new("main.ml")), Some(Language::Ocaml));
1251 assert_eq!(
1252 detect_language(Path::new("main.mli")),
1253 Some(Language::Ocaml)
1254 );
1255 }
1256
1257 #[test]
1258 fn test_detect_language_markdown() {
1259 assert_eq!(
1260 detect_language(Path::new("README.md")),
1261 Some(Language::Markdown)
1262 );
1263 assert_eq!(
1264 detect_language(Path::new("docs.markdown")),
1265 Some(Language::Markdown)
1266 );
1267 }
1268
1269 #[test]
1270 fn test_detect_language_text() {
1271 assert_eq!(
1272 detect_language(Path::new("notes.txt")),
1273 Some(Language::Text)
1274 );
1275 assert_eq!(detect_language(Path::new("doc.text")), Some(Language::Text));
1276 assert_eq!(
1277 detect_language(Path::new("readme.rst")),
1278 Some(Language::Text)
1279 );
1280 }
1281
1282 #[test]
1283 fn test_detect_language_yaml() {
1284 assert_eq!(
1285 detect_language(Path::new("config.yaml")),
1286 Some(Language::Yaml)
1287 );
1288 assert_eq!(
1289 detect_language(Path::new("config.yml")),
1290 Some(Language::Yaml)
1291 );
1292 }
1293
1294 #[test]
1295 fn test_detect_language_toml() {
1296 assert_eq!(
1297 detect_language(Path::new("Cargo.toml")),
1298 Some(Language::Toml)
1299 );
1300 }
1301
1302 #[test]
1303 fn test_detect_language_json() {
1304 assert_eq!(
1305 detect_language(Path::new("package.json")),
1306 Some(Language::Json)
1307 );
1308 }
1309
1310 #[test]
1311 fn test_detect_language_shell() {
1312 assert_eq!(
1313 detect_language(Path::new("script.sh")),
1314 Some(Language::Shell)
1315 );
1316 assert_eq!(
1317 detect_language(Path::new("script.bash")),
1318 Some(Language::Shell)
1319 );
1320 assert_eq!(
1321 detect_language(Path::new("script.zsh")),
1322 Some(Language::Shell)
1323 );
1324 }
1325
1326 #[test]
1327 fn test_detect_language_powershell() {
1328 assert_eq!(
1329 detect_language(Path::new("script.ps1")),
1330 Some(Language::Powershell)
1331 );
1332 }
1333
1334 #[test]
1335 fn test_detect_language_dockerfile() {
1336 assert_eq!(
1337 detect_language(Path::new("Dockerfile")),
1338 Some(Language::Dockerfile)
1339 );
1340 assert_eq!(
1341 detect_language(Path::new("dockerfile")),
1342 Some(Language::Dockerfile)
1343 );
1344 }
1345
1346 #[test]
1347 fn test_detect_language_makefile() {
1348 assert_eq!(
1349 detect_language(Path::new("Makefile")),
1350 Some(Language::Makefile)
1351 );
1352 assert_eq!(
1353 detect_language(Path::new("makefile")),
1354 Some(Language::Makefile)
1355 );
1356 assert_eq!(
1357 detect_language(Path::new("GNUmakefile")),
1358 Some(Language::Makefile)
1359 );
1360 }
1361
1362 #[test]
1363 fn test_detect_language_asciidoc() {
1364 assert_eq!(
1365 detect_language(Path::new("doc.adoc")),
1366 Some(Language::AsciiDoc)
1367 );
1368 assert_eq!(
1369 detect_language(Path::new("doc.asciidoc")),
1370 Some(Language::AsciiDoc)
1371 );
1372 }
1373
1374 #[test]
1375 fn test_detect_language_org() {
1376 assert_eq!(detect_language(Path::new("notes.org")), Some(Language::Org));
1377 }
1378
1379 #[test]
1380 fn test_detect_language_unknown() {
1381 assert_eq!(detect_language(Path::new("file.xyz")), None);
1382 assert_eq!(detect_language(Path::new("file.unknown")), None);
1383 assert_eq!(detect_language(Path::new("no_extension")), None);
1384 }
1385
1386 #[test]
1387 fn test_detect_language_case_insensitive() {
1388 assert_eq!(
1389 detect_language(Path::new("main.PY")),
1390 Some(Language::Python)
1391 );
1392 assert_eq!(detect_language(Path::new("Main.RS")), Some(Language::Rust));
1393 assert_eq!(
1394 detect_language(Path::new("app.TS")),
1395 Some(Language::TypeScript)
1396 );
1397 }
1398
1399 #[test]
1402 fn test_is_text_format_true() {
1403 assert!(is_text_format(Language::Markdown));
1404 assert!(is_text_format(Language::Text));
1405 assert!(is_text_format(Language::Yaml));
1406 assert!(is_text_format(Language::Toml));
1407 assert!(is_text_format(Language::Json));
1408 assert!(is_text_format(Language::Dockerfile));
1409 assert!(is_text_format(Language::Makefile));
1410 assert!(is_text_format(Language::Shell));
1411 assert!(is_text_format(Language::Powershell));
1412 assert!(is_text_format(Language::AsciiDoc));
1413 assert!(is_text_format(Language::Org));
1414 }
1415
1416 #[test]
1417 fn test_is_text_format_false() {
1418 assert!(!is_text_format(Language::Python));
1419 assert!(!is_text_format(Language::Rust));
1420 assert!(!is_text_format(Language::TypeScript));
1421 assert!(!is_text_format(Language::JavaScript));
1422 assert!(!is_text_format(Language::Go));
1423 assert!(!is_text_format(Language::Java));
1424 assert!(!is_text_format(Language::C));
1425 assert!(!is_text_format(Language::Cpp));
1426 assert!(!is_text_format(Language::Ruby));
1427 assert!(!is_text_format(Language::CSharp));
1428 assert!(!is_text_format(Language::Kotlin));
1429 assert!(!is_text_format(Language::Swift));
1430 assert!(!is_text_format(Language::Scala));
1431 assert!(!is_text_format(Language::Php));
1432 assert!(!is_text_format(Language::Lua));
1433 assert!(!is_text_format(Language::Elixir));
1434 assert!(!is_text_format(Language::Haskell));
1435 assert!(!is_text_format(Language::Ocaml));
1436 }
1437
1438 #[test]
1441 fn test_extract_python_function() {
1442 let source = r#"
1443def hello(name: str) -> str:
1444 """Say hello to someone."""
1445 return f"Hello, {name}!"
1446"#;
1447 let units = extract_units(Path::new("test.py"), source, Language::Python);
1448 assert_eq!(units.len(), 1);
1449 assert_eq!(units[0].name, "hello");
1450 assert_eq!(units[0].unit_type, UnitType::Function);
1451 assert!(units[0].docstring.is_some());
1454 }
1455
1456 #[test]
1457 fn test_extract_python_class() {
1458 let source = r#"
1459class Person:
1460 """A person class."""
1461 def __init__(self, name):
1462 self.name = name
1463
1464 def greet(self):
1465 return f"Hello, I'm {self.name}"
1466"#;
1467 let units = extract_units(Path::new("test.py"), source, Language::Python);
1468 assert!(units
1469 .iter()
1470 .any(|u| u.name == "Person" && u.unit_type == UnitType::Class));
1471 assert!(units
1472 .iter()
1473 .any(|u| u.name == "__init__" && u.unit_type == UnitType::Method));
1474 assert!(units
1475 .iter()
1476 .any(|u| u.name == "greet" && u.unit_type == UnitType::Method));
1477 }
1478
1479 #[test]
1480 fn test_extract_rust_function() {
1481 let source = r#"
1482/// Adds two numbers together.
1483fn add(a: i32, b: i32) -> i32 {
1484 a + b
1485}
1486"#;
1487 let units = extract_units(Path::new("test.rs"), source, Language::Rust);
1488 assert_eq!(units.len(), 1);
1489 assert_eq!(units[0].name, "add");
1490 assert_eq!(units[0].unit_type, UnitType::Function);
1491 assert!(units[0].docstring.is_some());
1492 assert!(units[0]
1493 .docstring
1494 .as_ref()
1495 .unwrap()
1496 .contains("Adds two numbers"));
1497 }
1498
1499 #[test]
1500 fn test_extract_rust_impl() {
1501 let source = r#"
1502struct Point {
1503 x: i32,
1504 y: i32,
1505}
1506
1507impl Point {
1508 fn new(x: i32, y: i32) -> Self {
1509 Self { x, y }
1510 }
1511}
1512"#;
1513 let units = extract_units(Path::new("test.rs"), source, Language::Rust);
1514 assert!(units
1516 .iter()
1517 .any(|u| u.name == "Point" && u.unit_type == UnitType::Class));
1518 assert!(units.iter().any(|u| u.name == "new"));
1521 }
1522
1523 #[test]
1524 fn test_extract_javascript_function() {
1525 let source = r#"
1526function greet(name) {
1527 return `Hello, ${name}!`;
1528}
1529"#;
1530 let units = extract_units(Path::new("test.js"), source, Language::JavaScript);
1531 assert_eq!(units.len(), 1);
1532 assert_eq!(units[0].name, "greet");
1533 assert_eq!(units[0].unit_type, UnitType::Function);
1534 }
1535
1536 #[test]
1537 fn test_extract_typescript_class() {
1538 let source = r#"
1539class Calculator {
1540 add(a: number, b: number): number {
1541 return a + b;
1542 }
1543}
1544"#;
1545 let units = extract_units(Path::new("test.ts"), source, Language::TypeScript);
1546 assert!(units
1547 .iter()
1548 .any(|u| u.name == "Calculator" && u.unit_type == UnitType::Class));
1549 assert!(units
1550 .iter()
1551 .any(|u| u.name == "add" && u.unit_type == UnitType::Method));
1552 }
1553
1554 #[test]
1555 fn test_extract_go_function() {
1556 let source = r#"
1557package main
1558
1559func Add(a, b int) int {
1560 return a + b
1561}
1562"#;
1563 let units = extract_units(Path::new("test.go"), source, Language::Go);
1564 assert_eq!(units.len(), 1);
1565 assert_eq!(units[0].name, "Add");
1566 assert_eq!(units[0].unit_type, UnitType::Function);
1567 }
1568
1569 #[test]
1570 fn test_extract_java_class() {
1571 let source = r#"
1572public class Calculator {
1573 public int add(int a, int b) {
1574 return a + b;
1575 }
1576}
1577"#;
1578 let units = extract_units(Path::new("Test.java"), source, Language::Java);
1579 assert!(units
1580 .iter()
1581 .any(|u| u.name == "Calculator" && u.unit_type == UnitType::Class));
1582 assert!(units
1583 .iter()
1584 .any(|u| u.name == "add" && u.unit_type == UnitType::Method));
1585 }
1586
1587 #[test]
1588 fn test_extract_markdown_document() {
1589 let source = r#"# My Document
1590
1591This is a paragraph.
1592
1593## Section 1
1594
1595Some content here.
1596"#;
1597 let units = extract_units(Path::new("README.md"), source, Language::Markdown);
1598 assert_eq!(units.len(), 1);
1599 assert_eq!(units[0].name, "README");
1600 assert_eq!(units[0].unit_type, UnitType::Document);
1601 }
1602
1603 #[test]
1604 fn test_extract_empty_source() {
1605 let units = extract_units(Path::new("test.py"), "", Language::Python);
1606 assert!(units.is_empty());
1607 }
1608
1609 #[test]
1610 fn test_extract_empty_markdown() {
1611 let units = extract_units(Path::new("empty.md"), "", Language::Markdown);
1612 assert!(units.is_empty());
1613 }
1614
1615 #[test]
1616 fn test_extract_whitespace_only_markdown() {
1617 let units = extract_units(
1618 Path::new("whitespace.md"),
1619 " \n\n \n",
1620 Language::Markdown,
1621 );
1622 assert!(units.is_empty());
1623 }
1624
1625 #[test]
1628 fn test_build_call_graph_simple() {
1629 let source = r#"
1630def caller():
1631 callee()
1632
1633def callee():
1634 pass
1635"#;
1636 let mut units = extract_units(Path::new("test.py"), source, Language::Python);
1637 build_call_graph(&mut units);
1638
1639 let caller = units.iter().find(|u| u.name == "caller").unwrap();
1640 let callee = units.iter().find(|u| u.name == "callee").unwrap();
1641
1642 assert!(caller.calls.contains(&"callee".to_string()));
1643 assert!(callee.called_by.contains(&"caller".to_string()));
1644 }
1645
1646 #[test]
1647 fn test_build_call_graph_multiple_callers() {
1648 let source = r#"
1649def helper():
1650 pass
1651
1652def caller1():
1653 helper()
1654
1655def caller2():
1656 helper()
1657"#;
1658 let mut units = extract_units(Path::new("test.py"), source, Language::Python);
1659 build_call_graph(&mut units);
1660
1661 let helper = units.iter().find(|u| u.name == "helper").unwrap();
1662 assert!(helper.called_by.contains(&"caller1".to_string()));
1663 assert!(helper.called_by.contains(&"caller2".to_string()));
1664 }
1665
1666 #[test]
1669 fn test_extract_control_flow_loops() {
1670 let source = r#"
1671def process_items(items):
1672 for item in items:
1673 print(item)
1674"#;
1675 let units = extract_units(Path::new("test.py"), source, Language::Python);
1676 assert_eq!(units.len(), 1);
1677 assert!(units[0].has_loops);
1678 }
1679
1680 #[test]
1681 fn test_extract_control_flow_branches() {
1682 let source = r#"
1683def check_value(x):
1684 if x > 0:
1685 return "positive"
1686 else:
1687 return "non-positive"
1688"#;
1689 let units = extract_units(Path::new("test.py"), source, Language::Python);
1690 assert_eq!(units.len(), 1);
1691 assert!(units[0].has_branches);
1692 }
1693
1694 #[test]
1695 fn test_extract_control_flow_error_handling() {
1696 let source = r#"
1697def safe_divide(a, b):
1698 try:
1699 return a / b
1700 except ZeroDivisionError:
1701 return None
1702"#;
1703 let units = extract_units(Path::new("test.py"), source, Language::Python);
1704 assert_eq!(units.len(), 1);
1705 assert!(units[0].has_error_handling);
1706 }
1707
1708 #[test]
1709 fn test_extract_complexity() {
1710 let source = r#"
1711def complex_function(x, y):
1712 if x > 0:
1713 if y > 0:
1714 return "both positive"
1715 return "not both positive"
1716"#;
1717 let units = extract_units(Path::new("test.py"), source, Language::Python);
1718 assert_eq!(units.len(), 1);
1719 assert!(units[0].complexity >= 3);
1721 }
1722
1723 #[test]
1726 fn test_language_from_str() {
1727 use std::str::FromStr;
1728
1729 assert_eq!(Language::from_str("python"), Ok(Language::Python));
1730 assert_eq!(Language::from_str("py"), Ok(Language::Python));
1731 assert_eq!(Language::from_str("PYTHON"), Ok(Language::Python));
1732
1733 assert_eq!(Language::from_str("rust"), Ok(Language::Rust));
1734 assert_eq!(Language::from_str("rs"), Ok(Language::Rust));
1735
1736 assert_eq!(Language::from_str("typescript"), Ok(Language::TypeScript));
1737 assert_eq!(Language::from_str("ts"), Ok(Language::TypeScript));
1738
1739 assert_eq!(Language::from_str("javascript"), Ok(Language::JavaScript));
1740 assert_eq!(Language::from_str("js"), Ok(Language::JavaScript));
1741
1742 assert_eq!(Language::from_str("go"), Ok(Language::Go));
1743 assert_eq!(Language::from_str("java"), Ok(Language::Java));
1744
1745 assert_eq!(Language::from_str("c"), Ok(Language::C));
1746 assert_eq!(Language::from_str("cpp"), Ok(Language::Cpp));
1747 assert_eq!(Language::from_str("c++"), Ok(Language::Cpp));
1748
1749 assert_eq!(Language::from_str("csharp"), Ok(Language::CSharp));
1750 assert_eq!(Language::from_str("c#"), Ok(Language::CSharp));
1751 assert_eq!(Language::from_str("cs"), Ok(Language::CSharp));
1752
1753 assert_eq!(Language::from_str("ruby"), Ok(Language::Ruby));
1754 assert_eq!(Language::from_str("rb"), Ok(Language::Ruby));
1755
1756 assert_eq!(
1757 Language::from_str("unknown"),
1758 Err("Unknown language: unknown".to_string())
1759 );
1760 }
1761}