Skip to main content

sift/
parser.rs

1use anyhow::{Context, Result};
2use serde::{Deserialize, Serialize};
3use std::path::Path;
4use tree_sitter::{Language, Parser, Query, QueryCursor, StreamingIterator};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum LanguageId {
8    Rust,
9    Python,
10    JavaScript,
11    TypeScript,
12    Tsx,
13    Go,
14    C,
15    Cpp,
16    Java,
17    Ruby,
18    Zig,
19}
20
21impl LanguageId {
22    pub fn from_path(path: &Path) -> Option<Self> {
23        let ext = path.extension()?.to_str()?;
24        match ext {
25            "rs" => Some(Self::Rust),
26            "py" => Some(Self::Python),
27            "js" | "jsx" => Some(Self::JavaScript),
28            "ts" => Some(Self::TypeScript),
29            "tsx" => Some(Self::Tsx),
30            "go" => Some(Self::Go),
31            "c" | "h" => Some(Self::C),
32            "cpp" | "cxx" | "cc" | "hpp" | "hh" | "hxx" => Some(Self::Cpp),
33            "java" => Some(Self::Java),
34            "rb" => Some(Self::Ruby),
35            "zig" => Some(Self::Zig),
36            _ => None,
37        }
38    }
39
40    fn grammar(&self) -> Language {
41        match self {
42            Self::Rust => Language::new(tree_sitter_rust::LANGUAGE),
43            Self::Python => Language::new(tree_sitter_python::LANGUAGE),
44            Self::JavaScript => Language::new(tree_sitter_javascript::LANGUAGE),
45            Self::TypeScript => Language::new(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
46            Self::Tsx => Language::new(tree_sitter_typescript::LANGUAGE_TSX),
47            Self::Go => Language::new(tree_sitter_go::LANGUAGE),
48            Self::C => Language::new(tree_sitter_c::LANGUAGE),
49            Self::Cpp => Language::new(tree_sitter_cpp::LANGUAGE),
50            Self::Java => Language::new(tree_sitter_java::LANGUAGE),
51            Self::Ruby => Language::new(tree_sitter_ruby::LANGUAGE),
52            Self::Zig => Language::new(tree_sitter_zig::LANGUAGE),
53        }
54    }
55
56    /// Returns (capture_kind, query_pattern) pairs.
57    /// Each pattern is tried independently; failed patterns are silently skipped.
58    /// All definition patterns capture both @name (for the identifier) and @node (for the whole definition).
59    fn patterns(&self) -> Vec<(CaptureKind, &'static str)> {
60        match self {
61            Self::Rust => vec![
62                (CaptureKind::Def(DefKind::Function), "(function_item name: (identifier) @name) @node"),
63                (CaptureKind::Def(DefKind::Struct), "(struct_item name: (type_identifier) @name) @node"),
64                (CaptureKind::Def(DefKind::Trait), "(trait_item name: (type_identifier) @name) @node"),
65                (CaptureKind::Def(DefKind::Impl), "(impl_item type: (type_identifier) @name) @node"),
66                (CaptureKind::Def(DefKind::Enum), "(enum_item name: (type_identifier) @name) @node"),
67                (CaptureKind::Def(DefKind::TypeAlias), "(type_item name: (type_identifier) @name) @node"),
68                (CaptureKind::Def(DefKind::Constant), "(const_item name: (identifier) @name) @node"),
69                (CaptureKind::Def(DefKind::Static), "(static_item name: (identifier) @name) @node"),
70                (CaptureKind::Def(DefKind::Function), "(function_signature_item name: (identifier) @name) @node"),
71                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
72                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (field_identifier) @name))"),
73                (CaptureKind::Import, "(use_declaration (scoped_identifier name: (identifier) @name))"),
74                (CaptureKind::Import, "(use_declaration (scoped_use_list list: (use_list (identifier) @name)))"),
75                (CaptureKind::Import, "(use_declaration argument: (use_as_clause alias: (identifier) @name))"),
76                (CaptureKind::Import, "(use_declaration argument: (identifier) @name)"),
77            ],
78            Self::Python => vec![
79                (CaptureKind::Def(DefKind::Function), "(function_definition name: (identifier) @name) @node"),
80                (CaptureKind::Def(DefKind::Class), "(class_definition name: (identifier) @name) @node"),
81                (CaptureKind::Ref(RefKind::Call), "(call function: (identifier) @name)"),
82                (CaptureKind::Import, "(import_statement name: (dotted_name) @name)"),
83                (CaptureKind::Import, "(import_from_statement name: (dotted_name) @name)"),
84            ],
85            Self::JavaScript => vec![
86                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
87                (CaptureKind::Def(DefKind::Class), "(class_declaration name: (identifier) @name) @node"),
88                (CaptureKind::Def(DefKind::Method), "(method_definition name: (property_identifier) @name) @node"),
89                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
90                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (member_expression property: (property_identifier) @name))"),
91                (CaptureKind::Import, "(import_statement source: (string) (import_clause name: (identifier) @name))"),
92                (CaptureKind::Import, "(import_statement source: (string) (import_clause (named_imports (import_specifier name: (identifier) @name))))"),
93            ],
94            Self::TypeScript | Self::Tsx => vec![
95                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
96                (CaptureKind::Def(DefKind::Class), "(class_declaration name: (identifier) @name) @node"),
97                (CaptureKind::Def(DefKind::Method), "(method_definition name: (property_identifier) @name) @node"),
98                (CaptureKind::Def(DefKind::Trait), "(interface_declaration name: (type_identifier) @name) @node"),
99                (CaptureKind::Def(DefKind::TypeAlias), "(type_alias_declaration name: (type_identifier) @name) @node"),
100                (CaptureKind::Def(DefKind::Enum), "(enum_declaration name: (identifier) @name) @node"),
101                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
102                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (member_expression property: (property_identifier) @name))"),
103                (CaptureKind::Import, "(import_statement source: (string) (import_clause name: (identifier) @name))"),
104                (CaptureKind::Import, "(import_statement source: (string) (import_clause (named_imports (import_specifier name: (identifier) @name))))"),
105            ],
106            Self::Go => vec![
107                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
108                (CaptureKind::Def(DefKind::Method), "(method_declaration name: (field_identifier) @name) @node"),
109                (CaptureKind::Def(DefKind::Struct), "(type_declaration (type_spec name: (type_identifier) @name)) @node"),
110                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
111                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (selector_expression field: (field_identifier) @name))"),
112                (CaptureKind::Import, "(import_declaration (import_spec name: (package_identifier)? path: (interpreted_string_literal) @name))"),
113            ],
114            Self::C => vec![
115                (CaptureKind::Def(DefKind::Function), "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @node"),
116                (CaptureKind::Def(DefKind::Struct), "(struct_specifier name: (type_identifier) @name) @node"),
117                (CaptureKind::Def(DefKind::Struct), "(union_specifier name: (type_identifier) @name) @node"),
118                (CaptureKind::Def(DefKind::Enum), "(enum_specifier name: (type_identifier) @name) @node"),
119                (CaptureKind::Def(DefKind::TypeAlias), "(type_definition declarator: (type_identifier) @name) @node"),
120                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
121                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (field_identifier) @name))"),
122                (CaptureKind::Import, "(preproc_include path: (string_literal) @name)"),
123                (CaptureKind::Import, "(preproc_include path: (system_lib_string) @name)"),
124            ],
125            Self::Cpp => vec![
126                (CaptureKind::Def(DefKind::Function), "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @node"),
127                (CaptureKind::Def(DefKind::Function), "(template_declaration declaration: (function_definition declarator: (function_declarator declarator: (identifier) @name))) @node"),
128                (CaptureKind::Def(DefKind::Class), "(class_specifier name: (type_identifier) @name) @node"),
129                (CaptureKind::Def(DefKind::Struct), "(struct_specifier name: (type_identifier) @name) @node"),
130                (CaptureKind::Def(DefKind::Enum), "(enum_specifier name: (type_identifier) @name) @node"),
131                (CaptureKind::Def(DefKind::TypeAlias), "(type_definition declarator: (type_identifier) @name) @node"),
132                (CaptureKind::Def(DefKind::TypeAlias), "(alias_declaration name: (identifier) @name) @node"),
133                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
134                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (field_identifier) @name))"),
135                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (qualified_identifier name: (identifier) @name))"),
136                (CaptureKind::Import, "(preproc_include path: (string_literal) @name)"),
137                (CaptureKind::Import, "(preproc_include path: (system_lib_string) @name)"),
138            ],
139            Self::Java => vec![
140                (CaptureKind::Def(DefKind::Class), "(class_declaration name: (identifier) @name) @node"),
141                (CaptureKind::Def(DefKind::Trait), "(interface_declaration name: (identifier) @name) @node"),
142                (CaptureKind::Def(DefKind::Method), "(method_declaration name: (identifier) @name) @node"),
143                (CaptureKind::Def(DefKind::Enum), "(enum_declaration name: (identifier) @name) @node"),
144                (CaptureKind::Def(DefKind::Class), "(record_declaration name: (identifier) @name) @node"),
145                (CaptureKind::Ref(RefKind::Call), "(method_invocation name: (identifier) @name)"),
146                (CaptureKind::Import, "(import_declaration name: (scoped_identifier name: (identifier) @name))"),
147            ],
148            Self::Ruby => vec![
149                (CaptureKind::Def(DefKind::Method), "(method name: (identifier) @name) @node"),
150                (CaptureKind::Def(DefKind::Method), "(singleton_method name: (identifier) @name) @node"),
151                (CaptureKind::Def(DefKind::Class), "(class name: (constant) @name) @node"),
152                (CaptureKind::Def(DefKind::Class), "(module name: (constant) @name) @node"),
153                (CaptureKind::Ref(RefKind::Call), "(call method: (identifier) @name)"),
154                (CaptureKind::Import, "(require path: (string) @name)"),
155            ],
156            Self::Zig => vec![
157                (CaptureKind::Def(DefKind::Function), "(function_declaration name: (identifier) @name) @node"),
158                (CaptureKind::Def(DefKind::Struct), "(container_declaration name: (identifier) @name (container_kind struct)) @node"),
159                (CaptureKind::Def(DefKind::Enum), "(container_declaration name: (identifier) @name (container_kind enum)) @node"),
160                (CaptureKind::Def(DefKind::TypeAlias), "(type_declaration name: (identifier) @name) @node"),
161                (CaptureKind::Def(DefKind::Constant), "(variable_declaration name: (identifier) @name (container_kind const)) @node"),
162                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (identifier) @name)"),
163                (CaptureKind::Ref(RefKind::Call), "(call_expression function: (field_expression field: (identifier) @name))"),
164            ],
165        }
166    }
167}
168
169#[derive(Debug, Clone, PartialEq, Eq)]
170pub enum CaptureKind {
171    Def(DefKind),
172    Ref(RefKind),
173    Import,
174}
175
176#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
177pub enum DefKind {
178    Function,
179    Struct,
180    Trait,
181    Impl,
182    Enum,
183    TypeAlias,
184    Constant,
185    Static,
186    Class,
187    Method,
188}
189
190#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
191pub enum RefKind {
192    Call,
193}
194
195#[derive(Debug, Clone)]
196pub struct ParsedFile {
197    pub path: std::path::PathBuf,
198    pub language: LanguageId,
199    pub definitions: Vec<ParsedDef>,
200    pub references: Vec<ParsedRef>,
201    pub imports: Vec<ParsedImport>,
202}
203
204#[derive(Debug, Clone)]
205pub struct ParsedDef {
206    pub name: String,
207    pub kind: DefKind,
208    pub start_line: usize,
209    pub end_line: usize,
210    pub doc: Option<String>,
211}
212
213#[derive(Debug, Clone)]
214pub struct ParsedRef {
215    pub name: String,
216    pub kind: RefKind,
217    pub line: usize,
218}
219
220#[derive(Debug, Clone)]
221pub struct ParsedImport {
222    pub name: String,
223}
224
225pub fn parse_file(path: &Path) -> Result<ParsedFile> {
226    let language = LanguageId::from_path(path).context("unsupported language")?;
227    let source = std::fs::read_to_string(path)
228        .with_context(|| format!("reading {}", path.display()))?;
229    parse_source(path, language, &source)
230}
231
232pub fn parse_source(path: &Path, language: LanguageId, source: &str) -> Result<ParsedFile> {
233    let mut parser = Parser::new();
234    parser
235        .set_language(&language.grammar())
236        .context("setting language")?;
237
238    let tree = parser.parse(source, None).context("parsing")?;
239    let root = tree.root_node();
240
241    let mut definitions: Vec<ParsedDef> = Vec::new();
242    let mut references: Vec<ParsedRef> = Vec::new();
243    let mut imports: Vec<ParsedImport> = Vec::new();
244
245    let source_bytes = source.as_bytes();
246
247    for (kind, pattern_str) in language.patterns() {
248        let Ok(query) = Query::new(&language.grammar(), pattern_str) else {
249            continue;
250        };
251        process_pattern(
252            &query,
253            &kind,
254            root,
255            source_bytes,
256            &mut definitions,
257            &mut references,
258            &mut imports,
259        );
260    }
261
262    // Extract doc comments for each definition
263    for def in &mut definitions {
264        if def.doc.is_none() {
265            def.doc = extract_doc_comment(source, def.start_line);
266        }
267    }
268
269    Ok(ParsedFile {
270        path: path.to_path_buf(),
271        language,
272        definitions,
273        references,
274        imports,
275    })
276}
277
278fn process_pattern(
279    query: &Query,
280    kind: &CaptureKind,
281    root: tree_sitter::Node<'_>,
282    source_bytes: &[u8],
283    definitions: &mut Vec<ParsedDef>,
284    references: &mut Vec<ParsedRef>,
285    imports: &mut Vec<ParsedImport>,
286) {
287    let caps = query.capture_names();
288    let name_idx = caps.iter().position(|n| *n == "name").map(|i| i as u32);
289    let node_idx = caps.iter().position(|n| *n == "node").map(|i| i as u32);
290    let Some(name_capture_idx) = name_idx else { return };
291
292    let mut cursor = QueryCursor::new();
293    let mut query_matches = cursor.matches(query, root, source_bytes);
294
295    while let Some(match_) = query_matches.next() {
296        let mut name_node = None;
297        let mut span_node = None;
298        for capture in match_.captures {
299            if capture.index == node_idx.unwrap_or(u32::MAX) {
300                span_node = Some(capture.node);
301            } else if capture.index == name_capture_idx {
302                name_node = Some(capture.node);
303            }
304        }
305
306        let Some(name_node) = name_node else { continue };
307        let Ok(name) = name_node.utf8_text(source_bytes) else { continue };
308        let name = name.to_string();
309        let line = name_node.start_position().row + 1;
310
311        push_capture(kind, &name, line, span_node, definitions, references, imports);
312    }
313}
314
315fn push_capture(
316    kind: &CaptureKind,
317    name: &str,
318    line: usize,
319    span_node: Option<tree_sitter::Node<'_>>,
320    definitions: &mut Vec<ParsedDef>,
321    references: &mut Vec<ParsedRef>,
322    imports: &mut Vec<ParsedImport>,
323) {
324    match kind {
325        CaptureKind::Def(def_kind) => {
326            let (start_line, end_line) = if let Some(node) = span_node {
327                (node.start_position().row + 1, node.end_position().row + 1)
328            } else {
329                (line, line)
330            };
331            definitions.push(ParsedDef {
332                name: name.to_string(),
333                kind: *def_kind,
334                start_line,
335                end_line,
336                doc: None,
337            });
338        }
339        CaptureKind::Ref(_) => {
340            references.push(ParsedRef {
341                name: name.to_string(),
342                kind: RefKind::Call,
343                line,
344            });
345        }
346        CaptureKind::Import => {
347            imports.push(ParsedImport {
348                name: name.to_string(),
349            });
350        }
351    }
352}
353
354fn is_doc_line(trimmed: &str) -> bool {
355    trimmed.starts_with("///")
356        || trimmed.starts_with("//!")
357        || trimmed.starts_with("// ")
358        || trimmed.starts_with("//\t")
359        || trimmed.starts_with("# ")
360        || trimmed.starts_with("##")
361        || {
362            let bytes = trimmed.as_bytes();
363            bytes.len() > 1
364                && bytes[0] == b'#'
365                && bytes[1] != b'['
366                && bytes[1] != b'!'
367        }
368}
369
370fn is_block_start(trimmed: &str) -> bool {
371    trimmed.starts_with("/**") || trimmed.starts_with("/*!")
372}
373
374/// Extract doc comment text preceding the given definition line (1-indexed).
375/// Looks backward for consecutive doc comment lines/blocks.
376fn extract_doc_comment(source: &str, def_line: usize) -> Option<String> {
377    if def_line <= 1 {
378        return None;
379    }
380    let lines: Vec<&str> = source.lines().collect();
381    let mut collected: Vec<&str> = Vec::new();
382    let mut cur = def_line.saturating_sub(2); // 0-indexed, line before def
383
384    loop {
385        let raw = lines[cur];
386        let trimmed = raw.trim();
387
388        if trimmed.is_empty() {
389            if collected.is_empty() {
390                if cur == 0 {
391                    break;
392                }
393                cur -= 1;
394                continue;
395            }
396            break;
397        }
398
399        if is_doc_line(trimmed) {
400            collected.push(raw);
401            if cur == 0 {
402                break;
403            }
404            cur -= 1;
405            continue;
406        }
407
408        // Block comment start+end on same line
409        if is_block_start(trimmed) && trimmed.contains("*/") {
410            collected.push(raw);
411            break;
412        }
413
414        // Block comment end (contains */) — seek backward for start
415        if trimmed.ends_with("*/") || trimmed == "*/" {
416            collected.push(raw);
417            if cur == 0 {
418                break;
419            }
420            cur -= 1;
421            loop {
422                let inner = lines[cur];
423                collected.push(inner);
424                if inner.trim_start().starts_with("/*") {
425                    break;
426                }
427                if cur == 0 {
428                    break;
429                }
430                cur -= 1;
431            }
432            break;
433        }
434
435        // Block comment start without end on same line
436        if is_block_start(trimmed) {
437            collected.push(raw);
438            break;
439        }
440
441        // Rust attributes between doc comment and definition
442        if trimmed.starts_with("#[") || trimmed.starts_with("#![") {
443            if cur == 0 {
444                break;
445            }
446            cur -= 1;
447            continue;
448        }
449
450        break;
451    }
452
453    if collected.is_empty() {
454        None
455    } else {
456        collected.reverse();
457        Some(collected.join("\n"))
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464
465    fn parse_rust(source: &str) -> ParsedFile {
466        let path = Path::new("test.rs");
467        parse_source(path, LanguageId::Rust, source).unwrap()
468    }
469
470    #[test]
471    fn test_parse_function_definition() {
472        let pf = parse_rust("fn hello() {}");
473        assert_eq!(pf.definitions.len(), 1);
474        assert_eq!(pf.definitions[0].name, "hello");
475        assert_eq!(pf.definitions[0].kind, DefKind::Function);
476        assert_eq!(pf.definitions[0].start_line, 1);
477        assert_eq!(pf.definitions[0].end_line, 1);
478    }
479
480    #[test]
481    fn test_parse_struct_definition() {
482        let pf = parse_rust("struct Point { x: i32, y: i32 }");
483        assert_eq!(pf.definitions.len(), 1);
484        assert_eq!(pf.definitions[0].name, "Point");
485        assert_eq!(pf.definitions[0].kind, DefKind::Struct);
486    }
487
488    #[test]
489    fn test_parse_trait_definition() {
490        let pf = parse_rust("trait Foo { fn bar(&self); }");
491        assert_eq!(pf.definitions.len(), 2); // trait Foo + method bar
492        assert_eq!(pf.definitions[0].name, "Foo");
493        assert_eq!(pf.definitions[0].kind, DefKind::Trait);
494        assert_eq!(pf.definitions[1].name, "bar");
495        assert_eq!(pf.definitions[1].kind, DefKind::Function);
496    }
497
498    #[test]
499    fn test_parse_function_calls() {
500        let pf = parse_rust("fn caller() { callee(); another() }");
501        let calls: Vec<_> = pf.references.iter().map(|r| r.name.as_str()).collect();
502        assert!(calls.contains(&"callee"));
503        assert!(calls.contains(&"another"));
504    }
505
506    #[test]
507    fn test_parse_method_calls() {
508        let pf = parse_rust("fn caller() { foo.bar(); baz.qux() }");
509        let calls: Vec<_> = pf.references.iter().map(|r| r.name.as_str()).collect();
510        assert!(calls.contains(&"bar"), "method bar not found in calls: {:?}", calls);
511        assert!(calls.contains(&"qux"), "method qux not found in calls: {:?}", calls);
512    }
513
514    #[test]
515    fn test_parse_imports() {
516        let pf = parse_rust("use std::collections::HashMap;");
517        let imports: Vec<_> = pf.imports.iter().map(|i| i.name.as_str()).collect();
518        assert!(imports.contains(&"HashMap"), "imports: {:?}", imports);
519    }
520
521    #[test]
522    fn test_parse_import_from_list() {
523        let pf = parse_rust("use std::io::{BufRead, Write};");
524        let imports: Vec<_> = pf.imports.iter().map(|i| i.name.as_str()).collect();
525        assert!(imports.contains(&"BufRead"), "imports: {:?}", imports);
526        assert!(imports.contains(&"Write"), "imports: {:?}", imports);
527    }
528
529    #[test]
530    fn test_parse_python_function() {
531        let path = Path::new("test.py");
532        let pf = parse_source(path, LanguageId::Python, "def hello():\n    pass\n").unwrap();
533        assert_eq!(pf.definitions.len(), 1);
534        assert_eq!(pf.definitions[0].name, "hello");
535        assert_eq!(pf.definitions[0].kind, DefKind::Function);
536    }
537
538    #[test]
539    fn test_parse_python_class() {
540        let path = Path::new("test.py");
541        let pf = parse_source(path, LanguageId::Python, "class MyClass:\n    pass\n").unwrap();
542        assert_eq!(pf.definitions.len(), 1);
543        assert_eq!(pf.definitions[0].name, "MyClass");
544        assert_eq!(pf.definitions[0].kind, DefKind::Class);
545    }
546
547    #[test]
548    fn test_parse_javascript_function() {
549        let path = Path::new("test.js");
550        let pf = parse_source(path, LanguageId::JavaScript, "function hello() {}\n").unwrap();
551        assert_eq!(pf.definitions.len(), 1);
552        assert_eq!(pf.definitions[0].name, "hello");
553        assert_eq!(pf.definitions[0].kind, DefKind::Function);
554    }
555
556    #[test]
557    fn test_parse_typescript_interface() {
558        let path = Path::new("test.ts");
559        let pf = parse_source(path, LanguageId::TypeScript, "interface Foo { bar(): void }\n").unwrap();
560        let iface = pf.definitions.iter().find(|d| d.kind == DefKind::Trait);
561        assert!(iface.is_some(), "no trait definition found");
562        assert_eq!(iface.unwrap().name, "Foo");
563    }
564
565    #[test]
566    fn test_parse_go_function() {
567        let path = Path::new("test.go");
568        let pf = parse_source(path, LanguageId::Go, "package main\nfunc hello() {}\n").unwrap();
569        assert_eq!(pf.definitions.len(), 1);
570        assert_eq!(pf.definitions[0].name, "hello");
571        assert_eq!(pf.definitions[0].kind, DefKind::Function);
572    }
573
574    #[test]
575    fn test_parse_go_struct() {
576        let path = Path::new("test.go");
577        let pf = parse_source(path, LanguageId::Go, "package main\ntype Point struct {\n  x int\n}\n").unwrap();
578        let s = pf.definitions.iter().find(|d| d.kind == DefKind::Struct);
579        assert!(s.is_some());
580        assert_eq!(s.unwrap().name, "Point");
581    }
582
583    #[test]
584    fn test_language_from_path() {
585        assert_eq!(LanguageId::from_path(Path::new("foo.rs")), Some(LanguageId::Rust));
586        assert_eq!(LanguageId::from_path(Path::new("foo.py")), Some(LanguageId::Python));
587        assert_eq!(LanguageId::from_path(Path::new("foo.js")), Some(LanguageId::JavaScript));
588        assert_eq!(LanguageId::from_path(Path::new("foo.ts")), Some(LanguageId::TypeScript));
589        assert_eq!(LanguageId::from_path(Path::new("foo.tsx")), Some(LanguageId::Tsx));
590        assert_eq!(LanguageId::from_path(Path::new("foo.go")), Some(LanguageId::Go));
591        assert_eq!(LanguageId::from_path(Path::new("foo.c")), Some(LanguageId::C));
592        assert_eq!(LanguageId::from_path(Path::new("foo.h")), Some(LanguageId::C));
593        assert_eq!(LanguageId::from_path(Path::new("foo.cpp")), Some(LanguageId::Cpp));
594        assert_eq!(LanguageId::from_path(Path::new("foo.hpp")), Some(LanguageId::Cpp));
595        assert_eq!(LanguageId::from_path(Path::new("foo.java")), Some(LanguageId::Java));
596        assert_eq!(LanguageId::from_path(Path::new("foo.rb")), Some(LanguageId::Ruby));
597        assert_eq!(LanguageId::from_path(Path::new("foo.zig")), Some(LanguageId::Zig));
598        assert_eq!(LanguageId::from_path(Path::new("foo.md")), None);
599        assert_eq!(LanguageId::from_path(Path::new("foo")), None);
600    }
601
602    #[test]
603    fn test_parse_c_function() {
604        let path = Path::new("test.c");
605        let pf = parse_source(path, LanguageId::C, "int add(int a, int b) { return a + b; }\n").unwrap();
606        assert_eq!(pf.definitions.len(), 1);
607        assert_eq!(pf.definitions[0].name, "add");
608        assert_eq!(pf.definitions[0].kind, DefKind::Function);
609    }
610
611    #[test]
612    fn test_parse_c_struct() {
613        let path = Path::new("test.c");
614        let pf = parse_source(path, LanguageId::C, "struct Point { int x; int y; };\n").unwrap();
615        let s = pf.definitions.iter().find(|d| d.kind == DefKind::Struct);
616        assert!(s.is_some());
617        assert_eq!(s.unwrap().name, "Point");
618    }
619
620    #[test]
621    fn test_parse_c_include() {
622        let path = Path::new("test.c");
623        let pf = parse_source(path, LanguageId::C, "#include <stdio.h>\n#include \"myheader.h\"\n").unwrap();
624        assert_eq!(pf.imports.len(), 2);
625        let names: Vec<_> = pf.imports.iter().map(|i| i.name.as_str()).collect();
626        assert!(names.contains(&"<stdio.h>"));
627        assert!(names.contains(&"\"myheader.h\""));
628    }
629
630    #[test]
631    fn test_parse_cpp_class() {
632        let path = Path::new("test.cpp");
633        let pf = parse_source(path, LanguageId::Cpp, "class MyClass {\npublic:\n  int getValue() { return 42; }\n};\n").unwrap();
634        let cls = pf.definitions.iter().find(|d| d.kind == DefKind::Class);
635        assert!(cls.is_some(), "no class found in {:?}", pf.definitions);
636        assert_eq!(cls.unwrap().name, "MyClass");
637    }
638
639    #[test]
640    fn test_parse_java_class() {
641        let path = Path::new("Test.java");
642        let pf = parse_source(path, LanguageId::Java, "public class Test {\n  public void hello() {}\n}\n").unwrap();
643        let cls = pf.definitions.iter().find(|d| d.kind == DefKind::Class);
644        assert!(cls.is_some());
645        assert_eq!(cls.unwrap().name, "Test");
646    }
647
648    #[test]
649    fn test_parse_ruby_method() {
650        let path = Path::new("test.rb");
651        let pf = parse_source(path, LanguageId::Ruby, "def hello(name)\n  puts name\nend\n").unwrap();
652        assert_eq!(pf.definitions.len(), 1);
653        assert_eq!(pf.definitions[0].name, "hello");
654        assert_eq!(pf.definitions[0].kind, DefKind::Method);
655    }
656
657    #[test]
658    fn test_parse_zig_function() {
659        let path = Path::new("test.zig");
660        let pf = parse_source(path, LanguageId::Zig, "fn hello() void {}\n").unwrap();
661        assert_eq!(pf.definitions.len(), 1);
662        assert_eq!(pf.definitions[0].name, "hello");
663        assert_eq!(pf.definitions[0].kind, DefKind::Function);
664    }
665
666    #[test]
667    fn test_parse_empty_file() {
668        let pf = parse_rust("");
669        assert_eq!(pf.definitions.len(), 0);
670        assert_eq!(pf.references.len(), 0);
671        assert_eq!(pf.imports.len(), 0);
672    }
673
674    #[test]
675    fn test_parse_multiple_definitions() {
676        let src = "fn a() {}\nfn b() {}\nstruct C {}";
677        let pf = parse_rust(src);
678        assert_eq!(pf.definitions.len(), 3);
679    }
680
681    #[test]
682    fn test_call_line_numbers() {
683        let pf = parse_rust("fn foo() {\n  bar()\n}\n");
684        assert_eq!(pf.references.len(), 1);
685        assert_eq!(pf.references[0].name, "bar");
686        assert_eq!(pf.references[0].line, 2);
687    }
688
689    #[test]
690    fn test_unsupported_language() {
691        let result = parse_file(Path::new("foo.txt"));
692        assert!(result.is_err());
693    }
694
695    // -- Doc comment extraction tests --
696
697    #[test]
698    fn test_extract_doc_line() {
699        assert!(is_doc_line("/// docs"));
700        assert!(is_doc_line("//! inner docs"));
701        assert!(is_doc_line("// comment"));
702        assert!(is_doc_line("# comment"));
703        assert!(is_doc_line("## doc"));
704        assert!(is_doc_line("//\t tabbed"));
705        assert!(!is_doc_line("#[derive(Debug)]"));
706        assert!(!is_doc_line("fn hello() {}"));
707        assert!(!is_doc_line("pub struct Foo;"));
708    }
709
710    #[test]
711    fn test_rust_doc_comment_three_slash() {
712        let src = "/// Adds two numbers together\nfn add() {}";
713        let pf = parse_rust(src);
714        assert_eq!(pf.definitions.len(), 1);
715        assert_eq!(pf.definitions[0].name, "add");
716        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/// Adds two numbers together"));
717    }
718
719    #[test]
720    fn test_rust_doc_comment_multiple_lines() {
721        let src = "/// Adds two numbers\n/// # Example\n/// ```\n/// let x = add(2, 3);\n/// ```\nfn add() {}";
722        let pf = parse_rust(src);
723        assert_eq!(pf.definitions[0].doc.as_deref(), Some(
724            "/// Adds two numbers\n/// # Example\n/// ```\n/// let x = add(2, 3);\n/// ```"
725        ));
726    }
727
728    #[test]
729    fn test_rust_doc_with_attributes() {
730        let src = "/// Doc comment\n#[inline]\nfn foo() {}";
731        let pf = parse_rust(src);
732        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/// Doc comment"));
733    }
734
735    #[test]
736    fn test_rust_block_doc_comment() {
737        let src = "/** Documentation */\nfn foo() {}";
738        let pf = parse_rust(src);
739        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/** Documentation */"));
740    }
741
742    #[test]
743    fn test_rust_block_doc_multiline() {
744        let src = "/**\n * Documentation\n */\nfn foo() {}";
745        let pf = parse_rust(src);
746        let doc = pf.definitions[0].doc.as_deref().unwrap();
747        assert!(doc.contains("/**"));
748        assert!(doc.contains("*/"));
749        assert!(doc.contains("Documentation"));
750    }
751
752    #[test]
753    fn test_no_doc_comment() {
754        let pf = parse_rust("fn plain() {}");
755        assert!(pf.definitions[0].doc.is_none());
756    }
757
758    #[test]
759    fn test_def_on_line_one() {
760        let pf = parse_rust("fn top() {}");
761        assert!(pf.definitions[0].doc.is_none());
762    }
763
764    #[test]
765    fn test_struct_with_doc() {
766        let src = "/// A point in 2D space\nstruct Point { x: i32, y: i32 }";
767        let pf = parse_rust(src);
768        assert_eq!(pf.definitions[0].name, "Point");
769        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/// A point in 2D space"));
770    }
771
772    #[test]
773    fn test_python_comment_doc() {
774        let path = Path::new("test.py");
775        let src = "# Add two numbers together\ndef add(a, b):\n    return a + b";
776        let pf = parse_source(path, LanguageId::Python, src).unwrap();
777        assert_eq!(pf.definitions[0].name, "add");
778        assert_eq!(pf.definitions[0].doc.as_deref(), Some("# Add two numbers together"));
779    }
780
781    #[test]
782    fn test_python_no_doc() {
783        let path = Path::new("test.py");
784        let pf = parse_source(path, LanguageId::Python, "def bare():\n    pass").unwrap();
785        assert!(pf.definitions[0].doc.is_none());
786    }
787
788    #[test]
789    fn test_jsdoc_block() {
790        let path = Path::new("test.js");
791        let src = "/** Calculate the total */\nfunction total() {}";
792        let pf = parse_source(path, LanguageId::JavaScript, src).unwrap();
793        assert_eq!(pf.definitions[0].doc.as_deref(), Some("/** Calculate the total */"));
794    }
795
796    #[test]
797    fn test_doc_with_blank_line() {
798        let pf = parse_rust("/// doc comment\n\nfn spaced() {}");
799        assert!(pf.definitions[0].doc.is_some());
800    }
801
802    #[test]
803    fn test_double_dash_comment_skipped() {
804        let pf = parse_rust("x = 1;\nfn later() {}");
805        assert!(pf.definitions[0].doc.is_none());
806    }
807}