Skip to main content

sift/
parser.rs

1use anyhow::{Context, Result};
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use tree_sitter::{Language, Parser, Query, QueryCursor, StreamingIterator};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum LanguageId {
8    Rust,
9    Python,
10    JavaScript,
11    TypeScript,
12    Tsx,
13    Go,
14    C,
15    Cpp,
16    Java,
17    Ruby,
18    Zig,
19    Bash,
20}
21
22impl LanguageId {
23    pub fn from_path(path: &Path) -> Option<Self> {
24        let ext = path.extension()?.to_str()?;
25        match ext {
26            "rs" => Some(Self::Rust),
27            "py" => Some(Self::Python),
28            "js" | "jsx" => Some(Self::JavaScript),
29            "ts" => Some(Self::TypeScript),
30            "tsx" => Some(Self::Tsx),
31            "go" => Some(Self::Go),
32            "c" | "h" => Some(Self::C),
33            "cpp" | "cxx" | "cc" | "hpp" | "hh" | "hxx" => Some(Self::Cpp),
34            "java" => Some(Self::Java),
35            "rb" => Some(Self::Ruby),
36            "zig" => Some(Self::Zig),
37            "sh" | "bash" => Some(Self::Bash),
38            _ => None,
39        }
40    }
41
42    fn grammar(&self) -> Language {
43        match self {
44            Self::Rust => Language::new(tree_sitter_rust::LANGUAGE),
45            Self::Python => Language::new(tree_sitter_python::LANGUAGE),
46            Self::JavaScript => Language::new(tree_sitter_javascript::LANGUAGE),
47            Self::TypeScript => Language::new(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
48            Self::Tsx => Language::new(tree_sitter_typescript::LANGUAGE_TSX),
49            Self::Go => Language::new(tree_sitter_go::LANGUAGE),
50            Self::C => Language::new(tree_sitter_c::LANGUAGE),
51            Self::Cpp => Language::new(tree_sitter_cpp::LANGUAGE),
52            Self::Java => Language::new(tree_sitter_java::LANGUAGE),
53            Self::Ruby => Language::new(tree_sitter_ruby::LANGUAGE),
54            Self::Zig => Language::new(tree_sitter_zig::LANGUAGE),
55            Self::Bash => Language::new(tree_sitter_bash::LANGUAGE),
56        }
57    }
58
59    /// Returns (capture_kind, query_pattern) pairs.
60    /// Each pattern is tried independently; failed patterns are silently skipped.
61    /// All definition patterns capture both @name (for the identifier) and @node (for the whole definition).
62    fn patterns(&self) -> Vec<(CaptureKind, &'static str)> {
63        match self {
64            Self::Rust => vec![
65                (CaptureKind::Def(DefKind::Function), "(function_item name: (identifier) @name) @node"),
66                (CaptureKind::Def(DefKind::Struct), "(struct_item name: (type_identifier) @name) @node"),
67                (CaptureKind::Def(DefKind::Trait), "(trait_item name: (type_identifier) @name) @node"),
68                (CaptureKind::Def(DefKind::Impl), "(impl_item type: (type_identifier) @name) @node"),
69                (CaptureKind::Def(DefKind::Enum), "(enum_item name: (type_identifier) @name) @node"),
70                (CaptureKind::Def(DefKind::TypeAlias), "(type_item name: (type_identifier) @name) @node"),
71                (CaptureKind::Def(DefKind::Constant), "(const_item name: (identifier) @name) @node"),
72                (CaptureKind::Def(DefKind::Static), "(static_item name: (identifier) @name) @node"),
73                (CaptureKind::Def(DefKind::Function), "(function_signature_item name: (identifier) @name) @node"),
74                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
75                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (field_identifier) @name))"),
76                (CaptureKind::Import, "(use_declaration (scoped_identifier name: (identifier) @name))"),
77                (CaptureKind::Import, "(use_declaration (scoped_use_list list: (use_list (identifier) @name)))"),
78                (CaptureKind::Import, "(use_declaration argument: (use_as_clause alias: (identifier) @name))"),
79                (CaptureKind::Import, "(use_declaration argument: (identifier) @name)"),
80            ],
81            Self::Python => vec![
82                (CaptureKind::Def(DefKind::Function), "(function_definition name: (identifier) @name) @node"),
83                (CaptureKind::Def(DefKind::Class), "(class_definition name: (identifier) @name) @node"),
84                (CaptureKind::Ref(RefKind::Call), "(call function: (identifier) @name)"),
85                (CaptureKind::Import, "(import_statement name: (dotted_name) @name)"),
86                (CaptureKind::Import, "(import_from_statement name: (dotted_name) @name)"),
87            ],
88            Self::JavaScript => vec![
89                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
90                (CaptureKind::Def(DefKind::Class), "(class_declaration name: (identifier) @name) @node"),
91                (CaptureKind::Def(DefKind::Method), "(method_definition name: (property_identifier) @name) @node"),
92                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
93                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (member_expression property: (property_identifier) @name))"),
94                (CaptureKind::Import, "(import_statement source: (string) (import_clause name: (identifier) @name))"),
95                (CaptureKind::Import, "(import_statement source: (string) (import_clause (named_imports (import_specifier name: (identifier) @name))))"),
96            ],
97            Self::TypeScript | Self::Tsx => vec![
98                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
99                (CaptureKind::Def(DefKind::Class), "(class_declaration name: (identifier) @name) @node"),
100                (CaptureKind::Def(DefKind::Method), "(method_definition name: (property_identifier) @name) @node"),
101                (CaptureKind::Def(DefKind::Trait), "(interface_declaration name: (type_identifier) @name) @node"),
102                (CaptureKind::Def(DefKind::TypeAlias), "(type_alias_declaration name: (type_identifier) @name) @node"),
103                (CaptureKind::Def(DefKind::Enum), "(enum_declaration name: (identifier) @name) @node"),
104                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
105                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (member_expression property: (property_identifier) @name))"),
106                (CaptureKind::Import, "(import_statement source: (string) (import_clause name: (identifier) @name))"),
107                (CaptureKind::Import, "(import_statement source: (string) (import_clause (named_imports (import_specifier name: (identifier) @name))))"),
108            ],
109            Self::Go => vec![
110                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
111                (CaptureKind::Def(DefKind::Method), "(method_declaration name: (field_identifier) @name) @node"),
112                (CaptureKind::Def(DefKind::Struct), "(type_declaration (type_spec name: (type_identifier) @name)) @node"),
113                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
114                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (selector_expression field: (field_identifier) @name))"),
115                (CaptureKind::Import, "(import_declaration (import_spec name: (package_identifier)? path: (interpreted_string_literal) @name))"),
116            ],
117            Self::C => vec![
118                (CaptureKind::Def(DefKind::Function), "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @node"),
119                (CaptureKind::Def(DefKind::Struct), "(struct_specifier name: (type_identifier) @name) @node"),
120                (CaptureKind::Def(DefKind::Struct), "(union_specifier name: (type_identifier) @name) @node"),
121                (CaptureKind::Def(DefKind::Enum), "(enum_specifier name: (type_identifier) @name) @node"),
122                (CaptureKind::Def(DefKind::TypeAlias), "(type_definition declarator: (type_identifier) @name) @node"),
123                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
124                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (field_identifier) @name))"),
125                (CaptureKind::Import, "(preproc_include path: (string_literal) @name)"),
126                (CaptureKind::Import, "(preproc_include path: (system_lib_string) @name)"),
127            ],
128            Self::Cpp => vec![
129                (CaptureKind::Def(DefKind::Function), "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @node"),
130                (CaptureKind::Def(DefKind::Function), "(template_declaration declaration: (function_definition declarator: (function_declarator declarator: (identifier) @name))) @node"),
131                (CaptureKind::Def(DefKind::Class), "(class_specifier name: (type_identifier) @name) @node"),
132                (CaptureKind::Def(DefKind::Struct), "(struct_specifier name: (type_identifier) @name) @node"),
133                (CaptureKind::Def(DefKind::Enum), "(enum_specifier name: (type_identifier) @name) @node"),
134                (CaptureKind::Def(DefKind::TypeAlias), "(type_definition declarator: (type_identifier) @name) @node"),
135                (CaptureKind::Def(DefKind::TypeAlias), "(alias_declaration name: (identifier) @name) @node"),
136                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
137                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (field_identifier) @name))"),
138                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (qualified_identifier name: (identifier) @name))"),
139                (CaptureKind::Import, "(preproc_include path: (string_literal) @name)"),
140                (CaptureKind::Import, "(preproc_include path: (system_lib_string) @name)"),
141            ],
142            Self::Java => vec![
143                (CaptureKind::Def(DefKind::Class), "(class_declaration name: (identifier) @name) @node"),
144                (CaptureKind::Def(DefKind::Trait), "(interface_declaration name: (identifier) @name) @node"),
145                (CaptureKind::Def(DefKind::Method), "(method_declaration name: (identifier) @name) @node"),
146                (CaptureKind::Def(DefKind::Enum), "(enum_declaration name: (identifier) @name) @node"),
147                (CaptureKind::Def(DefKind::Class), "(record_declaration name: (identifier) @name) @node"),
148                (CaptureKind::Ref(RefKind::Call), "(method_invocation name: (identifier) @name)"),
149                (CaptureKind::Import, "(import_declaration name: (scoped_identifier name: (identifier) @name))"),
150            ],
151            Self::Ruby => vec![
152                (CaptureKind::Def(DefKind::Method), "(method name: (identifier) @name) @node"),
153                (CaptureKind::Def(DefKind::Method), "(singleton_method name: (identifier) @name) @node"),
154                (CaptureKind::Def(DefKind::Class), "(class name: (constant) @name) @node"),
155                (CaptureKind::Def(DefKind::Class), "(module name: (constant) @name) @node"),
156                (CaptureKind::Ref(RefKind::Call), "(call method: (identifier) @name)"),
157                (CaptureKind::Import, "(require path: (string) @name)"),
158            ],
159            Self::Zig => vec![
160                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
161                (CaptureKind::Def(DefKind::Struct), "(container_declaration name: (identifier) @name (container_kind struct)) @node"),
162                (CaptureKind::Def(DefKind::Enum), "(container_declaration name: (identifier) @name (container_kind enum)) @node"),
163                (CaptureKind::Def(DefKind::TypeAlias), "(type_declaration name: (identifier) @name) @node"),
164                (CaptureKind::Def(DefKind::Constant), "(variable_declaration name: (identifier) @name (container_kind const)) @node"),
165                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
166                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (identifier) @name))"),
167            ],
168            Self::Bash => vec![
169                (CaptureKind::Def(DefKind::Function), "(function_definition name: (word) @name) @node"),
170            ],
171        }
172    }
173}
174
175#[derive(Debug, Clone, PartialEq, Eq)]
176pub enum CaptureKind {
177    Def(DefKind),
178    Ref(RefKind),
179    Import,
180}
181
182#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
183pub enum DefKind {
184    Function,
185    Struct,
186    Trait,
187    Impl,
188    Enum,
189    TypeAlias,
190    Constant,
191    Static,
192    Class,
193    Method,
194}
195
196#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
197pub enum RefKind {
198    Call,
199}
200
201#[derive(Debug, Clone)]
202pub struct ParsedFile {
203    pub path: std::path::PathBuf,
204    pub language: LanguageId,
205    pub definitions: Vec<ParsedDef>,
206    pub references: Vec<ParsedRef>,
207    pub imports: Vec<ParsedImport>,
208}
209
210#[derive(Debug, Clone)]
211pub struct ParsedDef {
212    pub name: String,
213    pub kind: DefKind,
214    pub start_line: usize,
215    pub end_line: usize,
216    pub doc: Option<String>,
217}
218
219#[derive(Debug, Clone)]
220pub struct ParsedRef {
221    pub name: String,
222    pub kind: RefKind,
223    pub line: usize,
224}
225
226#[derive(Debug, Clone)]
227pub struct ParsedImport {
228    pub name: String,
229}
230
231pub fn parse_file(path: &Path) -> Result<ParsedFile> {
232    let language = LanguageId::from_path(path).context("unsupported language")?;
233    let source = std::fs::read_to_string(path)
234        .with_context(|| format!("reading {}", path.display()))?;
235    parse_source(path, language, &source)
236}
237
238pub fn parse_source(path: &Path, language: LanguageId, source: &str) -> Result<ParsedFile> {
239    let mut parser = Parser::new();
240    parser
241        .set_language(&language.grammar())
242        .context("setting language")?;
243
244    let tree = parser.parse(source, None).context("parsing")?;
245    let root = tree.root_node();
246
247    let mut definitions: Vec<ParsedDef> = Vec::new();
248    let mut references: Vec<ParsedRef> = Vec::new();
249    let mut imports: Vec<ParsedImport> = Vec::new();
250
251    let source_bytes = source.as_bytes();
252
253    for (kind, pattern_str) in language.patterns() {
254        let Ok(query) = Query::new(&language.grammar(), pattern_str) else {
255            continue;
256        };
257        process_pattern(
258            &query,
259            &kind,
260            root,
261            source_bytes,
262            &mut definitions,
263            &mut references,
264            &mut imports,
265        );
266    }
267
268    // Extract doc comments for each definition
269    for def in &mut definitions {
270        if def.doc.is_none() {
271            def.doc = extract_doc_comment(source, def.start_line);
272        }
273    }
274
275    Ok(ParsedFile {
276        path: path.to_path_buf(),
277        language,
278        definitions,
279        references,
280        imports,
281    })
282}
283
284fn process_pattern(
285    query: &Query,
286    kind: &CaptureKind,
287    root: tree_sitter::Node<'_>,
288    source_bytes: &[u8],
289    definitions: &mut Vec<ParsedDef>,
290    references: &mut Vec<ParsedRef>,
291    imports: &mut Vec<ParsedImport>,
292) {
293    let caps = query.capture_names();
294    let name_idx = caps.iter().position(|n| *n == "name").map(|i| i as u32);
295    let node_idx = caps.iter().position(|n| *n == "node").map(|i| i as u32);
296    let Some(name_capture_idx) = name_idx else { return };
297
298    let mut cursor = QueryCursor::new();
299    let mut query_matches = cursor.matches(query, root, source_bytes);
300
301    while let Some(match_) = query_matches.next() {
302        let mut name_node = None;
303        let mut span_node = None;
304        for capture in match_.captures {
305            if capture.index == node_idx.unwrap_or(u32::MAX) {
306                span_node = Some(capture.node);
307            } else if capture.index == name_capture_idx {
308                name_node = Some(capture.node);
309            }
310        }
311
312        let Some(name_node) = name_node else { continue };
313        let Ok(name) = name_node.utf8_text(source_bytes) else { continue };
314        let name = name.to_string();
315        let line = name_node.start_position().row + 1;
316
317        push_capture(kind, &name, line, span_node, definitions, references, imports);
318    }
319}
320
321fn push_capture(
322    kind: &CaptureKind,
323    name: &str,
324    line: usize,
325    span_node: Option<tree_sitter::Node<'_>>,
326    definitions: &mut Vec<ParsedDef>,
327    references: &mut Vec<ParsedRef>,
328    imports: &mut Vec<ParsedImport>,
329) {
330    match kind {
331        CaptureKind::Def(def_kind) => {
332            let (start_line, end_line) = if let Some(node) = span_node {
333                (node.start_position().row + 1, node.end_position().row + 1)
334            } else {
335                (line, line)
336            };
337            definitions.push(ParsedDef {
338                name: name.to_string(),
339                kind: *def_kind,
340                start_line,
341                end_line,
342                doc: None,
343            });
344        }
345        CaptureKind::Ref(_) => {
346            references.push(ParsedRef {
347                name: name.to_string(),
348                kind: RefKind::Call,
349                line,
350            });
351        }
352        CaptureKind::Import => {
353            imports.push(ParsedImport {
354                name: name.to_string(),
355            });
356        }
357    }
358}
359
360fn is_doc_line(trimmed: &str) -> bool {
361    trimmed.starts_with("///")
362        || trimmed.starts_with("//!")
363        || trimmed.starts_with("// ")
364        || trimmed.starts_with("//\t")
365        || trimmed.starts_with("# ")
366        || trimmed.starts_with("##")
367        || {
368            let bytes = trimmed.as_bytes();
369            bytes.len() > 1
370                && bytes[0] == b'#'
371                && bytes[1] != b'['
372                && bytes[1] != b'!'
373        }
374}
375
376fn is_block_start(trimmed: &str) -> bool {
377    trimmed.starts_with("/**") || trimmed.starts_with("/*!")
378}
379
380/// Extract doc comment text preceding the given definition line (1-indexed).
381/// Looks backward for consecutive doc comment lines/blocks.
382fn extract_doc_comment(source: &str, def_line: usize) -> Option<String> {
383    if def_line <= 1 {
384        return None;
385    }
386    let lines: Vec<&str> = source.lines().collect();
387    let mut collected: Vec<&str> = Vec::new();
388    let mut cur = def_line.saturating_sub(2); // 0-indexed, line before def
389
390    loop {
391        let raw = lines[cur];
392        let trimmed = raw.trim();
393
394        if trimmed.is_empty() {
395            if collected.is_empty() {
396                if cur == 0 {
397                    break;
398                }
399                cur -= 1;
400                continue;
401            }
402            break;
403        }
404
405        if is_doc_line(trimmed) {
406            collected.push(raw);
407            if cur == 0 {
408                break;
409            }
410            cur -= 1;
411            continue;
412        }
413
414        // Block comment start+end on same line
415        if is_block_start(trimmed) && trimmed.contains("*/") {
416            collected.push(raw);
417            break;
418        }
419
420        // Block comment end (contains */) — seek backward for start
421        if trimmed.ends_with("*/") || trimmed == "*/" {
422            collected.push(raw);
423            if cur == 0 {
424                break;
425            }
426            cur -= 1;
427            loop {
428                let inner = lines[cur];
429                collected.push(inner);
430                if inner.trim_start().starts_with("/*") {
431                    break;
432                }
433                if cur == 0 {
434                    break;
435                }
436                cur -= 1;
437            }
438            break;
439        }
440
441        // Block comment start without end on same line
442        if is_block_start(trimmed) {
443            collected.push(raw);
444            break;
445        }
446
447        // Rust attributes between doc comment and definition
448        if trimmed.starts_with("#[") || trimmed.starts_with("#![") {
449            if cur == 0 {
450                break;
451            }
452            cur -= 1;
453            continue;
454        }
455
456        break;
457    }
458
459    if collected.is_empty() {
460        None
461    } else {
462        collected.reverse();
463        Some(collected.join("\n"))
464    }
465}
466
467#[cfg(test)]
468mod tests {
469    use super::*;
470
471    fn parse_rust(source: &str) -> ParsedFile {
472        let path = Path::new("test.rs");
473        parse_source(path, LanguageId::Rust, source).unwrap()
474    }
475
476    #[test]
477    fn test_parse_function_definition() {
478        let pf = parse_rust("fn hello() {}");
479        assert_eq!(pf.definitions.len(), 1);
480        assert_eq!(pf.definitions[0].name, "hello");
481        assert_eq!(pf.definitions[0].kind, DefKind::Function);
482        assert_eq!(pf.definitions[0].start_line, 1);
483        assert_eq!(pf.definitions[0].end_line, 1);
484    }
485
486    #[test]
487    fn test_parse_struct_definition() {
488        let pf = parse_rust("struct Point { x: i32, y: i32 }");
489        assert_eq!(pf.definitions.len(), 1);
490        assert_eq!(pf.definitions[0].name, "Point");
491        assert_eq!(pf.definitions[0].kind, DefKind::Struct);
492    }
493
494    #[test]
495    fn test_parse_trait_definition() {
496        let pf = parse_rust("trait Foo { fn bar(&self); }");
497        assert_eq!(pf.definitions.len(), 2); // trait Foo + method bar
498        assert_eq!(pf.definitions[0].name, "Foo");
499        assert_eq!(pf.definitions[0].kind, DefKind::Trait);
500        assert_eq!(pf.definitions[1].name, "bar");
501        assert_eq!(pf.definitions[1].kind, DefKind::Function);
502    }
503
504    #[test]
505    fn test_parse_function_calls() {
506        let pf = parse_rust("fn caller() { callee(); another() }");
507        let calls: Vec<_> = pf.references.iter().map(|r| r.name.as_str()).collect();
508        assert!(calls.contains(&"callee"));
509        assert!(calls.contains(&"another"));
510    }
511
512    #[test]
513    fn test_parse_method_calls() {
514        let pf = parse_rust("fn caller() { foo.bar(); baz.qux() }");
515        let calls: Vec<_> = pf.references.iter().map(|r| r.name.as_str()).collect();
516        assert!(calls.contains(&"bar"), "method bar not found in calls: {:?}", calls);
517        assert!(calls.contains(&"qux"), "method qux not found in calls: {:?}", calls);
518    }
519
520    #[test]
521    fn test_parse_imports() {
522        let pf = parse_rust("use std::collections::HashMap;");
523        let imports: Vec<_> = pf.imports.iter().map(|i| i.name.as_str()).collect();
524        assert!(imports.contains(&"HashMap"), "imports: {:?}", imports);
525    }
526
527    #[test]
528    fn test_parse_import_from_list() {
529        let pf = parse_rust("use std::io::{BufRead, Write};");
530        let imports: Vec<_> = pf.imports.iter().map(|i| i.name.as_str()).collect();
531        assert!(imports.contains(&"BufRead"), "imports: {:?}", imports);
532        assert!(imports.contains(&"Write"), "imports: {:?}", imports);
533    }
534
535    #[test]
536    fn test_parse_python_function() {
537        let path = Path::new("test.py");
538        let pf = parse_source(path, LanguageId::Python, "def hello():\n    pass\n").unwrap();
539        assert_eq!(pf.definitions.len(), 1);
540        assert_eq!(pf.definitions[0].name, "hello");
541        assert_eq!(pf.definitions[0].kind, DefKind::Function);
542    }
543
544    #[test]
545    fn test_parse_python_class() {
546        let path = Path::new("test.py");
547        let pf = parse_source(path, LanguageId::Python, "class MyClass:\n    pass\n").unwrap();
548        assert_eq!(pf.definitions.len(), 1);
549        assert_eq!(pf.definitions[0].name, "MyClass");
550        assert_eq!(pf.definitions[0].kind, DefKind::Class);
551    }
552
553    #[test]
554    fn test_parse_javascript_function() {
555        let path = Path::new("test.js");
556        let pf = parse_source(path, LanguageId::JavaScript, "function hello() {}\n").unwrap();
557        assert_eq!(pf.definitions.len(), 1);
558        assert_eq!(pf.definitions[0].name, "hello");
559        assert_eq!(pf.definitions[0].kind, DefKind::Function);
560    }
561
562    #[test]
563    fn test_parse_typescript_interface() {
564        let path = Path::new("test.ts");
565        let pf = parse_source(path, LanguageId::TypeScript, "interface Foo { bar(): void }\n").unwrap();
566        let iface = pf.definitions.iter().find(|d| d.kind == DefKind::Trait);
567        assert!(iface.is_some(), "no trait definition found");
568        assert_eq!(iface.unwrap().name, "Foo");
569    }
570
571    #[test]
572    fn test_parse_go_function() {
573        let path = Path::new("test.go");
574        let pf = parse_source(path, LanguageId::Go, "package main\nfunc hello() {}\n").unwrap();
575        assert_eq!(pf.definitions.len(), 1);
576        assert_eq!(pf.definitions[0].name, "hello");
577        assert_eq!(pf.definitions[0].kind, DefKind::Function);
578    }
579
580    #[test]
581    fn test_parse_go_struct() {
582        let path = Path::new("test.go");
583        let pf = parse_source(path, LanguageId::Go, "package main\ntype Point struct {\n  x int\n}\n").unwrap();
584        let s = pf.definitions.iter().find(|d| d.kind == DefKind::Struct);
585        assert!(s.is_some());
586        assert_eq!(s.unwrap().name, "Point");
587    }
588
589    #[test]
590    fn test_language_from_path() {
591        assert_eq!(LanguageId::from_path(Path::new("foo.rs")), Some(LanguageId::Rust));
592        assert_eq!(LanguageId::from_path(Path::new("foo.py")), Some(LanguageId::Python));
593        assert_eq!(LanguageId::from_path(Path::new("foo.js")), Some(LanguageId::JavaScript));
594        assert_eq!(LanguageId::from_path(Path::new("foo.ts")), Some(LanguageId::TypeScript));
595        assert_eq!(LanguageId::from_path(Path::new("foo.tsx")), Some(LanguageId::Tsx));
596        assert_eq!(LanguageId::from_path(Path::new("foo.go")), Some(LanguageId::Go));
597        assert_eq!(LanguageId::from_path(Path::new("foo.c")), Some(LanguageId::C));
598        assert_eq!(LanguageId::from_path(Path::new("foo.h")), Some(LanguageId::C));
599        assert_eq!(LanguageId::from_path(Path::new("foo.cpp")), Some(LanguageId::Cpp));
600        assert_eq!(LanguageId::from_path(Path::new("foo.hpp")), Some(LanguageId::Cpp));
601        assert_eq!(LanguageId::from_path(Path::new("foo.java")), Some(LanguageId::Java));
602        assert_eq!(LanguageId::from_path(Path::new("foo.rb")), Some(LanguageId::Ruby));
603        assert_eq!(LanguageId::from_path(Path::new("foo.zig")), Some(LanguageId::Zig));
604        assert_eq!(LanguageId::from_path(Path::new("foo.sh")), Some(LanguageId::Bash));
605        assert_eq!(LanguageId::from_path(Path::new("foo.bash")), Some(LanguageId::Bash));
606        assert_eq!(LanguageId::from_path(Path::new("foo.md")), None);
607        assert_eq!(LanguageId::from_path(Path::new("foo")), None);
608    }
609
610    #[test]
611    fn test_parse_c_function() {
612        let path = Path::new("test.c");
613        let pf = parse_source(path, LanguageId::C, "int add(int a, int b) { return a + b; }\n").unwrap();
614        assert_eq!(pf.definitions.len(), 1);
615        assert_eq!(pf.definitions[0].name, "add");
616        assert_eq!(pf.definitions[0].kind, DefKind::Function);
617    }
618
619    #[test]
620    fn test_parse_c_struct() {
621        let path = Path::new("test.c");
622        let pf = parse_source(path, LanguageId::C, "struct Point { int x; int y; };\n").unwrap();
623        let s = pf.definitions.iter().find(|d| d.kind == DefKind::Struct);
624        assert!(s.is_some());
625        assert_eq!(s.unwrap().name, "Point");
626    }
627
628    #[test]
629    fn test_parse_c_include() {
630        let path = Path::new("test.c");
631        let pf = parse_source(path, LanguageId::C, "#include <stdio.h>\n#include \"myheader.h\"\n").unwrap();
632        assert_eq!(pf.imports.len(), 2);
633        let names: Vec<_> = pf.imports.iter().map(|i| i.name.as_str()).collect();
634        assert!(names.contains(&"<stdio.h>"));
635        assert!(names.contains(&"\"myheader.h\""));
636    }
637
638    #[test]
639    fn test_parse_cpp_class() {
640        let path = Path::new("test.cpp");
641        let pf = parse_source(path, LanguageId::Cpp, "class MyClass {\npublic:\n  int getValue() { return 42; }\n};\n").unwrap();
642        let cls = pf.definitions.iter().find(|d| d.kind == DefKind::Class);
643        assert!(cls.is_some(), "no class found in {:?}", pf.definitions);
644        assert_eq!(cls.unwrap().name, "MyClass");
645    }
646
647    #[test]
648    fn test_parse_java_class() {
649        let path = Path::new("Test.java");
650        let pf = parse_source(path, LanguageId::Java, "public class Test {\n  public void hello() {}\n}\n").unwrap();
651        let cls = pf.definitions.iter().find(|d| d.kind == DefKind::Class);
652        assert!(cls.is_some());
653        assert_eq!(cls.unwrap().name, "Test");
654    }
655
656    #[test]
657    fn test_parse_ruby_method() {
658        let path = Path::new("test.rb");
659        let pf = parse_source(path, LanguageId::Ruby, "def hello(name)\n  puts name\nend\n").unwrap();
660        assert_eq!(pf.definitions.len(), 1);
661        assert_eq!(pf.definitions[0].name, "hello");
662        assert_eq!(pf.definitions[0].kind, DefKind::Method);
663    }
664
665    #[test]
666    fn test_parse_bash_function() {
667        let path = Path::new("test.sh");
668        let pf = parse_source(path, LanguageId::Bash, "function hello {\n  echo world\n}\n").unwrap();
669        assert_eq!(pf.definitions.len(), 1);
670        assert_eq!(pf.definitions[0].name, "hello");
671        assert_eq!(pf.definitions[0].kind, DefKind::Function);
672    }
673
674    #[test]
675    fn test_parse_bash_function_parens() {
676        let path = Path::new("test.sh");
677        let pf = parse_source(path, LanguageId::Bash, "hello() {\n  echo world\n}\n").unwrap();
678        assert_eq!(pf.definitions.len(), 1);
679        assert_eq!(pf.definitions[0].name, "hello");
680        assert_eq!(pf.definitions[0].kind, DefKind::Function);
681    }
682
683    #[test]
684    fn test_parse_bash_no_false_positives_on_commands() {
685        let path = Path::new("test.sh");
686        let pf = parse_source(path, LanguageId::Bash, "echo hello\nls -la\nsource utils.sh\n").unwrap();
687        assert_eq!(pf.definitions.len(), 0);
688    }
689
690    #[test]
691    fn test_parse_zig_function() {
692        let path = Path::new("test.zig");
693        let pf = parse_source(path, LanguageId::Zig, "fn hello() void {}\n").unwrap();
694        assert_eq!(pf.definitions.len(), 1);
695        assert_eq!(pf.definitions[0].name, "hello");
696        assert_eq!(pf.definitions[0].kind, DefKind::Function);
697    }
698
699    #[test]
700    fn test_parse_empty_file() {
701        let pf = parse_rust("");
702        assert_eq!(pf.definitions.len(), 0);
703        assert_eq!(pf.references.len(), 0);
704        assert_eq!(pf.imports.len(), 0);
705    }
706
707    #[test]
708    fn test_parse_multiple_definitions() {
709        let src = "fn a() {}\nfn b() {}\nstruct C {}";
710        let pf = parse_rust(src);
711        assert_eq!(pf.definitions.len(), 3);
712    }
713
714    #[test]
715    fn test_call_line_numbers() {
716        let pf = parse_rust("fn foo() {\n  bar()\n}\n");
717        assert_eq!(pf.references.len(), 1);
718        assert_eq!(pf.references[0].name, "bar");
719        assert_eq!(pf.references[0].line, 2);
720    }
721
722    #[test]
723    fn test_unsupported_language() {
724        let result = parse_file(Path::new("foo.txt"));
725        assert!(result.is_err());
726    }
727
728    // -- Doc comment extraction tests --
729
730    #[test]
731    fn test_extract_doc_line() {
732        assert!(is_doc_line("/// docs"));
733        assert!(is_doc_line("//! inner docs"));
734        assert!(is_doc_line("// comment"));
735        assert!(is_doc_line("# comment"));
736        assert!(is_doc_line("## doc"));
737        assert!(is_doc_line("//\t tabbed"));
738        assert!(!is_doc_line("#[derive(Debug)]"));
739        assert!(!is_doc_line("fn hello() {}"));
740        assert!(!is_doc_line("pub struct Foo;"));
741    }
742
743    #[test]
744    fn test_rust_doc_comment_three_slash() {
745        let src = "/// Adds two numbers together\nfn add() {}";
746        let pf = parse_rust(src);
747        assert_eq!(pf.definitions.len(), 1);
748        assert_eq!(pf.definitions[0].name, "add");
749        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/// Adds two numbers together"));
750    }
751
752    #[test]
753    fn test_rust_doc_comment_multiple_lines() {
754        let src = "/// Adds two numbers\n/// # Example\n/// ```\n/// let x = add(2, 3);\n/// ```\nfn add() {}";
755        let pf = parse_rust(src);
756        assert_eq!(pf.definitions[0].doc.as_deref(), Some(
757            "/// Adds two numbers\n/// # Example\n/// ```\n/// let x = add(2, 3);\n/// ```"
758        ));
759    }
760
761    #[test]
762    fn test_rust_doc_with_attributes() {
763        let src = "/// Doc comment\n#[inline]\nfn foo() {}";
764        let pf = parse_rust(src);
765        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/// Doc comment"));
766    }
767
768    #[test]
769    fn test_rust_block_doc_comment() {
770        let src = "/** Documentation */\nfn foo() {}";
771        let pf = parse_rust(src);
772        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/** Documentation */"));
773    }
774
775    #[test]
776    fn test_rust_block_doc_multiline() {
777        let src = "/**\n * Documentation\n */\nfn foo() {}";
778        let pf = parse_rust(src);
779        let doc = pf.definitions[0].doc.as_deref().unwrap();
780        assert!(doc.contains("/**"));
781        assert!(doc.contains("*/"));
782        assert!(doc.contains("Documentation"));
783    }
784
785    #[test]
786    fn test_no_doc_comment() {
787        let pf = parse_rust("fn plain() {}");
788        assert!(pf.definitions[0].doc.is_none());
789    }
790
791    #[test]
792    fn test_def_on_line_one() {
793        let pf = parse_rust("fn top() {}");
794        assert!(pf.definitions[0].doc.is_none());
795    }
796
797    #[test]
798    fn test_struct_with_doc() {
799        let src = "/// A point in 2D space\nstruct Point { x: i32, y: i32 }";
800        let pf = parse_rust(src);
801        assert_eq!(pf.definitions[0].name, "Point");
802        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/// A point in 2D space"));
803    }
804
805    #[test]
806    fn test_python_comment_doc() {
807        let path = Path::new("test.py");
808        let src = "# Add two numbers together\ndef add(a, b):\n    return a + b";
809        let pf = parse_source(path, LanguageId::Python, src).unwrap();
810        assert_eq!(pf.definitions[0].name, "add");
811        assert_eq!(pf.definitions[0].doc.as_deref(), Some("# Add two numbers together"));
812    }
813
814    #[test]
815    fn test_python_no_doc() {
816        let path = Path::new("test.py");
817        let pf = parse_source(path, LanguageId::Python, "def bare():\n    pass").unwrap();
818        assert!(pf.definitions[0].doc.is_none());
819    }
820
821    #[test]
822    fn test_jsdoc_block() {
823        let path = Path::new("test.js");
824        let src = "/** Calculate the total */\nfunction total() {}";
825        let pf = parse_source(path, LanguageId::JavaScript, src).unwrap();
826        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/** Calculate the total */"));
827    }
828
829    #[test]
830    fn test_doc_with_blank_line() {
831        let pf = parse_rust("/// doc comment\n\nfn spaced() {}");
832        assert!(pf.definitions[0].doc.is_some());
833    }
834
835    #[test]
836    fn test_double_dash_comment_skipped() {
837        let pf = parse_rust("x = 1;\nfn later() {}");
838        assert!(pf.definitions[0].doc.is_none());
839    }
840}