Skip to main content

mcp_memory/code/
mod.rs

1//! Tree-sitter code-symbol indexing.
2//!
3//! Pure parsing layer: turns source files into [`ParsedFile`]s (definitions and
4//! references) and provides a gitignore-aware directory walk. It has no
5//! knowledge of the graph store — [`crate::actions::code`] maps the parsed
6//! output onto entities/relations and handles incremental hashing.
7
8pub mod lang;
9
10use std::path::{Path, PathBuf};
11use std::sync::atomic::AtomicUsize;
12
13pub use lang::{Lang, detect};
14
15/// Files larger than this are skipped (parsing huge generated/minified files
16/// is slow and rarely useful for a symbol map).
17pub const MAX_FILE_BYTES: u64 = 10 * 1024 * 1024;
18
19/// Max defs emitted per file (safety cap against pathological inputs).
20pub const MAX_SYMBOLS_PER_FILE: usize = 20_000;
21
22/// Max tags (defs + refs) accepted from tree-sitter for a single file,
23/// preventing OOM from pathological generated code.
24pub const MAX_TAGS_PER_FILE: usize = 100_000;
25
26/// Max files collected by [`walk`] before stopping early.
27pub const MAX_WALK_FILES: usize = 200_000;
28
29/// Signatures and doc lines are capped to keep observations compact.
30const MAX_SIGNATURE_CHARS: usize = 512;
31const MAX_DOC_CHARS: usize = 512;
32
33/// A symbol definition extracted from a file.
34#[derive(Debug, Clone)]
35pub struct Def {
36    /// Normalized kind: `function`, `method`, `class`, `module`, `constant`, …
37    pub kind: String,
38    /// Bare symbol name.
39    pub name: String,
40    /// 1-based inclusive line range of the definition.
41    pub line_start: usize,
42    pub line_end: usize,
43    /// First (declaration) line of the definition, trimmed.
44    pub signature: String,
45    /// First line of the associated doc comment, if any.
46    pub doc: Option<String>,
47}
48
49/// A reference (call / type use) extracted from a file.
50#[derive(Debug, Clone)]
51pub struct Ref {
52    /// Reference kind from the grammar's tags query, e.g. `call`, `type`.
53    pub kind: String,
54    /// Bare name referenced.
55    pub name: String,
56    /// 1-based line of the reference.
57    pub line: usize,
58}
59
60/// Parsed symbols for a single file.
61#[derive(Debug, Clone, Default)]
62pub struct ParsedFile {
63    pub defs: Vec<Def>,
64    pub refs: Vec<Ref>,
65}
66
67/// BLAKE3 content hash (hex) used for incremental change detection.
68pub fn hash_bytes(bytes: &[u8]) -> String {
69    blake3::hash(bytes).to_hex().to_string()
70}
71
72/// Normalize a grammar's definition kind into our small entity vocabulary.
73fn normalize_def_kind(raw: &str) -> &str {
74    match raw {
75        "function" | "macro" => "function",
76        "method" | "delegate" => "method",
77        "class" | "interface" | "struct" | "type" | "enum" | "trait"
78            | "union" | "concept" | "object" | "annotation" | "typealias" => "class",
79        "module" | "namespace" => "module",
80        "constant" => "constant",
81        other => other,
82    }
83}
84
85/// Extract the trimmed first line of `source` starting at byte offset `start`.
86fn first_line(source: &[u8], start: usize) -> String {
87    let end = source[start..]
88        .iter()
89        .position(|&b| b == b'\n')
90        .map(|p| start + p)
91        .unwrap_or(source.len());
92    let mut s = String::from_utf8_lossy(&source[start..end]).trim().to_string();
93    if s.chars().count() > MAX_SIGNATURE_CHARS {
94        s = s.chars().take(MAX_SIGNATURE_CHARS).collect::<String>() + "…";
95    }
96    s
97}
98
99fn clamp_doc(doc: &str) -> Option<String> {
100    let line = doc.lines().find(|l| !l.trim().is_empty())?.trim();
101    if line.is_empty() {
102        return None;
103    }
104    let s = if line.chars().count() > MAX_DOC_CHARS {
105        line.chars().take(MAX_DOC_CHARS).collect::<String>() + "…"
106    } else {
107        line.to_string()
108    };
109    Some(s)
110}
111
112/// Parse one in-memory source buffer into defs/refs. Returns an empty result
113/// for unsupported languages or unbuildable tag configs.
114pub fn parse_source(lang: Lang, source: &[u8]) -> ParsedFile {
115    let Some(config) = lang::config(lang) else {
116        return ParsedFile::default();
117    };
118
119    let mut ctx = tree_sitter_tags::TagsContext::new();
120    let cancel = AtomicUsize::new(0);
121    let (tags, _failed) = match ctx.generate_tags(config, source, Some(&cancel)) {
122        Ok(v) => v,
123        Err(_) => return ParsedFile::default(),
124    };
125
126    // Byte offset of the start of each line, for O(log n) byte→line lookups.
127    // `tag.range` spans the whole definition node (body included), while
128    // `tag.span` is only the name; we derive the def's line range from `range`.
129    let line_starts: Vec<usize> = std::iter::once(0)
130        .chain(source.iter().enumerate().filter(|&(_, &b)| b == b'\n').map(|(i, _)| i + 1))
131        .collect();
132    let line_of = |byte: usize| line_starts.partition_point(|&s| s <= byte).max(1);
133
134    let mut out = ParsedFile::default();
135    for tag in tags.flatten() {
136        if out.defs.len() + out.refs.len() >= MAX_TAGS_PER_FILE {
137            break;
138        }
139        let name = String::from_utf8_lossy(&source[tag.name_range.clone()]).to_string();
140        if name.is_empty() {
141            continue;
142        }
143        let kind = config.syntax_type_name(tag.syntax_type_id).to_string();
144        if tag.is_definition {
145            let end_byte = tag.range.end.saturating_sub(1).max(tag.range.start);
146            out.defs.push(Def {
147                kind: normalize_def_kind(&kind).to_string(),
148                name,
149                line_start: line_of(tag.range.start),
150                line_end: line_of(end_byte),
151                signature: first_line(source, tag.range.start),
152                doc: tag.docs.as_deref().and_then(clamp_doc),
153            });
154        } else {
155            out.refs.push(Ref {
156                kind,
157                name,
158                line: tag.span.start.row + 1,
159            });
160        }
161    }
162    out
163}
164
165/// Walk `root` (a file or directory) and collect indexable source files,
166/// honoring `.gitignore`/hidden-file rules and skipping oversized files.
167pub fn walk(root: &Path, max_bytes: u64) -> Vec<PathBuf> {
168    let mut files = Vec::new();
169    if root.is_file() {
170        if detect(root).is_some()
171            && std::fs::metadata(root).map(|m| m.len() <= max_bytes).unwrap_or(false)
172        {
173            files.push(root.to_path_buf());
174        }
175        return files;
176    }
177
178    let walker = ignore::WalkBuilder::new(root)
179        .standard_filters(true)
180        .hidden(true)
181        .git_ignore(true)
182        .git_global(true)
183        .require_git(false)
184        .filter_entry(|e| {
185            // Belt-and-suspenders: skip common build/vendor dirs even when no
186            // .gitignore is present.
187            let name = e.file_name().to_string_lossy();
188            !matches!(name.as_ref(), "target" | "node_modules" | ".git" | "dist" | "build")
189        })
190        .build();
191
192    for entry in walker.flatten() {
193        if files.len() >= MAX_WALK_FILES {
194            break;
195        }
196        let path = entry.path();
197        if !path.is_file() || detect(path).is_none() {
198            continue;
199        }
200        if std::fs::metadata(path).map(|m| m.len() > max_bytes).unwrap_or(true) {
201            continue;
202        }
203        files.push(path.to_path_buf());
204    }
205    files
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    // -----------------------------------------------------------------------
213    // Parser unit tests — each new language's tags query is exercised.
214    // -----------------------------------------------------------------------
215
216    fn count_defs_of_kind(parsed: &ParsedFile, kind: &str) -> usize {
217        parsed.defs.iter().filter(|d| d.kind == kind).count()
218    }
219
220    fn find_def<'a>(parsed: &'a ParsedFile, name: &str) -> Option<&'a Def> {
221        parsed.defs.iter().find(|d| d.name == name)
222    }
223
224    // ── Rust ──────────────────────────────────────────────────────────────
225
226    #[test]
227    fn test_parse_rust() {
228        let src = b"/// Docs
229pub fn alpha(x: i32) -> i32 { x + 1 }
230
231fn beta() {}
232
233pub struct Thing { pub x: i32 }
234
235pub enum Color { Red, Blue }
236
237trait Foo { fn bar(&self); }
238";
239        let parsed = parse_source(Lang::Rust, src);
240        assert!(!parsed.defs.is_empty(), "expected defs");
241
242        let alpha = find_def(&parsed, "alpha").expect("alpha");
243        assert_eq!(alpha.kind, "function");
244        assert!(alpha.signature.contains("fn alpha"));
245
246        let beta = find_def(&parsed, "beta").expect("beta");
247        assert_eq!(beta.kind, "function");
248
249        let thing = find_def(&parsed, "Thing").expect("Thing");
250        assert_eq!(thing.kind, "class");
251
252        let color = find_def(&parsed, "Color").expect("Color");
253        assert_eq!(color.kind, "class");
254
255        let foo = find_def(&parsed, "Foo").expect("Foo");
256        assert_eq!(foo.kind, "class");
257    }
258
259    #[test]
260    fn test_parse_rust_calls() {
261        let src = b"fn alpha() { beta() + gamma() }
262fn beta() -> i32 { 1 }
263fn gamma() -> i32 { 2 }";
264        let parsed = parse_source(Lang::Rust, src);
265        let call_refs: Vec<&str> = parsed.refs.iter()
266            .filter(|r| r.kind == "call")
267            .map(|r| r.name.as_str())
268            .collect();
269        assert!(call_refs.contains(&"beta"), "alpha should call beta");
270        assert!(call_refs.contains(&"gamma"), "alpha should call gamma");
271    }
272
273    // ── Python ────────────────────────────────────────────────────────────
274
275    #[test]
276    fn test_parse_python() {
277        let src = b"def greet(name):
278    return 'hello ' + name
279
280class Greeter:
281    def hello(self):
282        return greet('world')
283
284MAX_RETRIES = 3
285";
286        let parsed = parse_source(Lang::Python, src);
287        assert_eq!(count_defs_of_kind(&parsed, "function"), 2);
288        assert_eq!(count_defs_of_kind(&parsed, "class"), 1);
289        assert_eq!(count_defs_of_kind(&parsed, "constant"), 1);
290        assert!(find_def(&parsed, "greet").is_some());
291        assert!(find_def(&parsed, "Greeter").is_some());
292    }
293
294    // ── JavaScript ─────────────────────────────────────────────────────────
295
296    #[test]
297    fn test_parse_javascript() {
298        let src = b"function alpha(x) { return beta(x) + 1; }
299class Thing { constructor(v) { this.v = v; } }
300";
301        let parsed = parse_source(Lang::JavaScript, src);
302        assert!(!parsed.defs.is_empty(), "JS should produce defs");
303        assert!(find_def(&parsed, "alpha").is_some(), "alpha function");
304        assert!(find_def(&parsed, "Thing").is_some(), "Thing class");
305    }
306
307    // ── Go ────────────────────────────────────────────────────────────────
308
309    #[test]
310    fn test_parse_go() {
311        let src = b"package main
312
313func alpha(x int) int { return beta(x) + 1 }
314
315func beta(x int) int { return x * 2 }
316
317type Thing struct { Value int }
318";
319        let parsed = parse_source(Lang::Go, src);
320        assert!(!parsed.defs.is_empty(), "Go should produce defs");
321        assert!(find_def(&parsed, "alpha").is_some(), "alpha function");
322        assert!(find_def(&parsed, "beta").is_some(), "beta function");
323        assert!(find_def(&parsed, "Thing").is_some(), "Thing type");
324    }
325
326    // ── Java ──────────────────────────────────────────────────────────────
327
328    #[test]
329    fn test_parse_java() {
330        let src = b"package com.example;
331
332class Hello {
333    private int x;
334
335    public int add(int a, int b) { return a + b; }
336}
337
338interface Worker {
339    void doWork();
340}
341";
342        let parsed = parse_source(Lang::Java, src);
343        assert!(!parsed.defs.is_empty(), "Java should produce defs");
344        assert!(find_def(&parsed, "Hello").is_some(), "Hello class");
345        assert!(find_def(&parsed, "Worker").is_some(), "Worker interface");
346    }
347
348    // ═══════════════════════════════════════════════════════════════════════
349    // NEW LANGUAGE TESTS
350    // ═══════════════════════════════════════════════════════════════════════
351
352    // ── C ─────────────────────────────────────────────────────────────────
353
354    #[test]
355    fn test_parse_c() {
356        let src = b"#include <stdio.h>
357
358int add(int a, int b) {
359    return a + b;
360}
361
362struct Point {
363    int x;
364    int y;
365};
366
367enum Color { RED, GREEN, BLUE };
368
369#define MAX_SIZE 1024
370
371void greet(const char *name) {
372    printf(\"Hello %s\", name);
373}
374";
375        let parsed = parse_source(Lang::C, src);
376        // C functions: add, greet
377        assert_eq!(count_defs_of_kind(&parsed, "function"), 2);
378        // struct Point
379        assert_eq!(count_defs_of_kind(&parsed, "class"), 2); // struct + enum
380        assert!(find_def(&parsed, "add").is_some());
381        assert!(find_def(&parsed, "greet").is_some());
382        assert!(find_def(&parsed, "Point").is_some());
383        assert!(find_def(&parsed, "Color").is_some());
384
385        // Verify line ranges
386        let add = find_def(&parsed, "add").unwrap();
387        assert_eq!(add.line_start, 3);
388    }
389
390    // ── C++ ───────────────────────────────────────────────────────────────
391
392    #[test]
393    fn test_parse_cpp() {
394        let src = b"#include <vector>
395
396class Calculator {
397public:
398    int add(int a, int b) { return a + b; }
399
400    int multiply(int a, int b);
401};
402
403struct Config {
404    int timeout;
405};
406
407enum Status { OK, ERROR };
408
409namespace util {
410    int helper(int x) { return x * 2; }
411}
412
413template<typename T>
414T max(T a, T b) { return a > b ? a : b; }
415";
416        let parsed = parse_source(Lang::Cpp, src);
417        // C++: class, struct, enum, namespace + functions
418        assert!(find_def(&parsed, "Calculator").is_some());
419        assert!(find_def(&parsed, "Config").is_some());
420        assert!(find_def(&parsed, "Status").is_some());
421        assert!(find_def(&parsed, "helper").is_some());
422        assert!(find_def(&parsed, "max").is_some());
423
424        // Calculator is a class
425        let calc = find_def(&parsed, "Calculator").unwrap();
426        assert_eq!(calc.kind, "class");
427
428        // Config is a struct → normalized to "class"
429        let config = find_def(&parsed, "Config").unwrap();
430        assert_eq!(config.kind, "class");
431    }
432
433    // ── Ruby ──────────────────────────────────────────────────────────────
434
435    #[test]
436    fn test_parse_ruby() {
437        let src = b"# Adds two numbers
438def add(a, b)
439  a + b
440end
441
442class Greeter
443  def initialize(name)
444    @name = name
445  end
446
447  def hello
448    \"Hello, #{@name}\"
449  end
450end
451
452module Utils
453  MAX_RETRIES = 3
454
455  def self.format(s)
456    s.strip
457  end
458end
459";
460        let parsed = parse_source(Lang::Ruby, src);
461        // Ruby: method definitions
462        assert!(find_def(&parsed, "add").is_some(), "add method");
463        assert!(find_def(&parsed, "hello").is_some(), "hello method");
464        // class + module
465        assert!(find_def(&parsed, "Greeter").is_some(), "Greeter class");
466        assert!(find_def(&parsed, "Utils").is_some(), "Utils module");
467
468        // Verify doc extraction
469        let add = find_def(&parsed, "add").unwrap();
470        assert_eq!(add.kind, "method");
471        assert_eq!(add.doc.as_deref(), Some("Adds two numbers"));
472    }
473
474    #[test]
475    fn test_parse_ruby_constant() {
476        let src = b"MAX_VALUE = 1000
477MIN_VALUE = 1
478";
479        let parsed = parse_source(Lang::Ruby, src);
480        // Ruby may or may not extract top-level constants; just verify no crash
481        assert!(parsed.refs.is_empty() || parsed.defs.len() <= 4);
482    }
483
484    // ── PHP ───────────────────────────────────────────────────────────────
485
486    #[test]
487    fn test_parse_php() {
488        let src = b"<?php
489
490namespace App\\Service;
491
492class UserService {
493    public function find(int $id): ?User {
494        return $this->repo->find($id);
495    }
496
497    private function validate(array $data): bool {
498        return !empty($data['name']);
499    }
500}
501
502interface Logger {
503    public function log(string $msg): void;
504}
505
506trait Timestampable {
507    public function getCreatedAt(): \\DateTime {
508        return $this->createdAt;
509    }
510}
511
512function helper_sort(array &$arr): void {
513    sort($arr);
514}
515";
516        let parsed = parse_source(Lang::Php, src);
517        // PHP: class, interface, trait + methods
518        assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
519        assert!(find_def(&parsed, "Logger").is_some(), "Logger interface");
520        assert!(find_def(&parsed, "Timestampable").is_some(), "Timestampable trait");
521        assert!(find_def(&parsed, "find").is_some(), "find method");
522        assert!(find_def(&parsed, "validate").is_some(), "validate method");
523        assert!(find_def(&parsed, "log").is_some(), "log method");
524        assert!(find_def(&parsed, "helper_sort").is_some(), "helper_sort function");
525
526        let svc = find_def(&parsed, "UserService").unwrap();
527        assert_eq!(svc.kind, "class");
528    }
529
530    // ═══════════════════════════════════════════════════════════════════════
531    // COMPLEX LANGUAGE PATTERNS
532    // ═══════════════════════════════════════════════════════════════════════
533
534    // ── Rust (generics, impl, async, closures, macros) ────────────────────
535
536    #[test]
537    fn test_parse_rust_complex() {
538        let src = b"pub struct Pair<T, U> { first: T, second: U }
539
540impl<T, U> Pair<T, U> {
541    fn new(first: T, second: U) -> Self { Pair { first, second } }
542    fn first(&self) -> &T { &self.first }
543}
544
545pub trait Into {
546    fn into(self) -> i32;
547}
548
549impl Into for i32 {
550    fn into(self) -> i32 { self }
551}
552
553pub async fn fetch(url: &str) -> Result<String, String> {
554    Ok(String::new())
555}
556
557pub fn handler() {
558    let add = |a: i32, b: i32| a + b;
559    let _ = add(1, 2);
560}
561
562macro_rules! vec_of {
563    ($x:expr) => { vec![$x] };
564}
565";
566        let parsed = parse_source(Lang::Rust, src);
567        assert!(find_def(&parsed, "Pair").is_some(), "Pair generic struct");
568        assert!(find_def(&parsed, "Into").is_some(), "Into trait");
569        assert!(find_def(&parsed, "fetch").is_some(), "fetch async fn");
570        assert!(find_def(&parsed, "handler").is_some(), "handler fn");
571        // Methods inside impl blocks
572        assert!(find_def(&parsed, "new").is_some(), "Pair::new method");
573        assert!(find_def(&parsed, "first").is_some(), "Pair::first method");
574        // References (calls inside handler)
575        let call_refs: Vec<&str> = parsed.refs.iter()
576            .filter(|r| r.kind == "call")
577            .map(|r| r.name.as_str())
578            .collect();
579        assert!(call_refs.contains(&"add"), "handler calls add closure");
580    }
581
582    // ── Python (decorators, inheritance, type hints, classmethod, staticmethod, lambdas) ──
583
584    #[test]
585    fn test_parse_python_complex() {
586        let src = b"from typing import Optional, List
587
588class Repository:
589    def __init__(self, db: str) -> None:
590        self.db = db
591
592    async def find(self, id: int) -> Optional[dict]:
593        return None
594
595    @classmethod
596    def default(cls) -> 'Repository':
597        return cls('sqlite')
598
599    @staticmethod
600    def version() -> str:
601        return '1.0'
602
603class UserService(Repository):
604    def __init__(self) -> None:
605        super().__init__('users')
606
607    async def find(self, id: int) -> Optional[dict]:
608        return {'id': id}
609
610def compute(items: List[int]) -> int:
611    return sum(filter(None, map(lambda x: x * 2, items)))
612";
613        let parsed = parse_source(Lang::Python, src);
614        assert!(find_def(&parsed, "Repository").is_some(), "Repository class");
615        assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
616        assert!(find_def(&parsed, "find").is_some(), "find method (both classes)");
617        assert!(find_def(&parsed, "default").is_some(), "default classmethod");
618        assert!(find_def(&parsed, "version").is_some(), "version staticmethod");
619        assert!(find_def(&parsed, "compute").is_some(), "compute function");
620        assert!(find_def(&parsed, "__init__").is_some(), "__init__ method");
621        // Should have at least these functions/methods
622        assert!(count_defs_of_kind(&parsed, "function") >= 3);
623        assert!(count_defs_of_kind(&parsed, "class") >= 2);
624    }
625
626    // ── JavaScript (classes, methods, arrow functions, async) ──────────────
627
628    #[test]
629    fn test_parse_javascript_complex() {
630        let src = b"class Repository {
631    constructor(db) { this.db = db; }
632    async find(id) { return null; }
633    static default() { return new Repository('sqlite'); }
634}
635
636class UserService extends Repository {
637    constructor() { super('users'); }
638    async find(id) { return { id }; }
639}
640
641function compute(arr) {
642    return arr.filter(x => x != null).map(x => x * 2);
643}
644";
645        let parsed = parse_source(Lang::JavaScript, src);
646        assert!(find_def(&parsed, "Repository").is_some(), "Repository class");
647        assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
648        assert!(find_def(&parsed, "find").is_some(), "find method");
649        assert!(find_def(&parsed, "default").is_some(), "default static method");
650        assert!(find_def(&parsed, "compute").is_some(), "compute function");
651        assert!(find_def(&parsed, "default").is_some(), "default static method");
652    }
653
654    // ── Go (interfaces, methods on structs, variadic functions) ────────────
655
656    #[test]
657    fn test_parse_go_complex() {
658        let src = b"package main
659
660import \"fmt\"
661
662type Walker interface {
663    Walk() int
664}
665
666type Thing struct {
667    value int
668}
669
670func (t *Thing) Walk() int {
671    return t.value
672}
673
674func NewThing(v int) *Thing {
675    return &Thing{value: v}
676}
677
678func sum(vals ...int) int {
679    total := 0
680    for _, v := range vals {
681        total += v
682    }
683    return total
684}
685";
686        let parsed = parse_source(Lang::Go, src);
687        assert!(find_def(&parsed, "Walker").is_some(), "Walker interface");
688        assert!(find_def(&parsed, "Thing").is_some(), "Thing struct");
689        assert!(find_def(&parsed, "Walk").is_some(), "Walk method");
690        assert!(find_def(&parsed, "NewThing").is_some(), "NewThing constructor");
691        assert!(find_def(&parsed, "sum").is_some(), "sum variadic function");
692    }
693
694    // ── Java (generics, inheritance, annotations, enums, inner classes) ────
695
696    #[test]
697    fn test_parse_java_complex() {
698        let src = b"package com.example;
699
700import java.util.List;
701
702class Repository<T> {
703    public T find(int id) { return null; }
704
705    public List<T> findAll() { return null; }
706}
707
708class UserService extends Repository<String> {
709    @Override
710    public String find(int id) {
711        return \"user\";
712    }
713}
714
715enum Status { ACTIVE, INACTIVE }
716
717interface Cache<K, V> {
718    V get(K key);
719    void put(K key, V value);
720}
721";
722        let parsed = parse_source(Lang::Java, src);
723        assert!(find_def(&parsed, "Repository").is_some(), "Repository generic class");
724        assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
725        assert!(find_def(&parsed, "Cache").is_some(), "Cache interface");
726        assert!(find_def(&parsed, "find").is_some(), "find method");
727        assert!(find_def(&parsed, "findAll").is_some(), "findAll method");
728    }
729
730    // ── C (typedef, union, function pointers, static, inline) ─────────────
731
732    #[test]
733    fn test_parse_c_complex() {
734        let src = b"#include <stddef.h>
735
736typedef struct Buffer Buffer;
737
738struct Buffer {
739    char *data;
740    size_t len;
741};
742
743static inline int max(int a, int b) {
744    return a > b ? a : b;
745}
746
747static void internal_cleanup(Buffer *buf) {
748    if (buf) buf->len = 0;
749}
750
751int process(Buffer *buf) {
752    return (int)buf->len;
753}
754";
755        let parsed = parse_source(Lang::C, src);
756        assert!(find_def(&parsed, "Buffer").is_some(), "Buffer struct");
757        assert!(find_def(&parsed, "max").is_some(), "max inline function");
758        assert!(find_def(&parsed, "internal_cleanup").is_some(), "internal_cleanup static");
759        assert!(find_def(&parsed, "process").is_some(), "process function");
760        // Struct + typedef produce class-kind defs
761        let types = count_defs_of_kind(&parsed, "class");
762        assert!(types >= 1, "should have at least Buffer struct, got {types}");
763    }
764
765    // ── C++ (virtual inheritance, operator overloads, lambdas, constexpr) ──
766
767    #[test]
768    fn test_parse_cpp_complex() {
769        let src = b"#include <vector>
770
771class Base {
772public:
773    virtual ~Base() = default;
774    virtual int compute() const = 0;
775};
776
777class Derived final : public Base {
778public:
779    int compute() const override { return value_; }
780
781    Derived& operator=(const Derived& other) {
782        value_ = other.value_;
783        return *this;
784    }
785
786private:
787    int value_ = 42;
788};
789
790template<typename T>
791constexpr T pi = T(3.1415926535);
792
793namespace detail {
794    template<typename T>
795    class ScopedPtr {
796    public:
797        explicit ScopedPtr(T* ptr) : ptr_(ptr) {}
798        ~ScopedPtr() { delete ptr_; }
799        T& operator*() const { return *ptr_; }
800    private:
801        T* ptr_;
802    };
803}
804";
805        let parsed = parse_source(Lang::Cpp, src);
806        assert!(find_def(&parsed, "Base").is_some(), "Base abstract class");
807        assert!(find_def(&parsed, "Derived").is_some(), "Derived class");
808        assert!(find_def(&parsed, "ScopedPtr").is_some(), "ScopedPtr template class");
809        assert!(find_def(&parsed, "compute").is_some(), "compute method");
810    }
811
812    // ── Ruby (blocks, modules, mixins, inheritance, attr_accessor) ───────
813
814    #[test]
815    fn test_parse_ruby_complex() {
816        let src = b"module Persistence
817  def save
818    'saved'
819  end
820end
821
822class BaseRecord
823  attr_accessor :id
824
825  def initialize(id = nil)
826    @id = id
827  end
828end
829
830class User < BaseRecord
831  include Persistence
832
833  attr_reader :name
834
835  def initialize(id, name)
836    super(id)
837    @name = name
838  end
839
840  def self.find(id)
841    new(id, 'default')
842  end
843
844  def to_s
845    \"User(#{@id}, #{@name})\"
846  end
847end
848";
849        let parsed = parse_source(Lang::Ruby, src);
850        assert!(find_def(&parsed, "Persistence").is_some(), "Persistence module");
851        assert!(find_def(&parsed, "BaseRecord").is_some(), "BaseRecord class");
852        assert!(find_def(&parsed, "User").is_some(), "User class");
853        assert!(find_def(&parsed, "save").is_some(), "save method");
854        assert!(find_def(&parsed, "initialize").is_some(), "initialize");
855        assert!(find_def(&parsed, "find").is_some(), "find class method");
856        assert!(find_def(&parsed, "to_s").is_some(), "to_s method");
857    }
858
859    // ── PHP (constructor promotion, attributes, union types, static) ─────
860
861    #[test]
862    fn test_parse_php_complex() {
863        let src = b"<?php
864
865namespace App\\Service;
866
867interface CacheInterface
868{
869    public function get(string $key): mixed;
870    public function set(string $key, mixed $value, int $ttl = 0): void;
871}
872
873trait Loggable
874{
875    public function log(string $msg): void
876    {
877        echo \\date('[Y-m-d] ') . $msg;
878    }
879}
880
881class UserService implements CacheInterface
882{
883    use Loggable;
884
885    public function __construct(
886        private string $prefix = 'usr'
887    ) {}
888
889    public function get(string $key): mixed
890    {
891        $this->log(\"get: $key\");
892        return null;
893    }
894
895    public function set(string $key, mixed $value, int $ttl = 0): void
896    {
897        $this->log(\"set: $key\");
898    }
899
900    public static function createDefault(): self
901    {
902        return new self();
903    }
904}
905";
906        let parsed = parse_source(Lang::Php, src);
907        assert!(find_def(&parsed, "CacheInterface").is_some(), "CacheInterface");
908        assert!(find_def(&parsed, "Loggable").is_some(), "Loggable trait");
909        assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
910        assert!(find_def(&parsed, "get").is_some(), "get method");
911        assert!(find_def(&parsed, "set").is_some(), "set method");
912        assert!(find_def(&parsed, "log").is_some(), "log method");
913        assert!(find_def(&parsed, "createDefault").is_some(), "createDefault static");
914    }
915
916    // ── Edge cases ────────────────────────────────────────────────────────
917
918    #[test]
919    fn test_parse_empty_source() {
920        let src = b"";
921        for lang in Lang::all() {
922            let parsed = parse_source(lang, src);
923            assert!(parsed.defs.is_empty(), "{:?} should produce no defs from empty input", lang);
924            assert!(parsed.refs.is_empty(), "{:?} should produce no refs from empty input", lang);
925        }
926    }
927
928    #[test]
929    fn test_parse_whitespace_only() {
930        for lang in Lang::all() {
931            let parsed = parse_source(lang, b"\n\n   \n\t\n");
932            assert!(parsed.defs.is_empty(), "{:?} whitespace should produce no defs", lang);
933        }
934    }
935
936    #[test]
937    fn test_parse_syntax_error_recovers_gracefully() {
938        // Rust file with missing semicolons and incomplete expressions
939        let src = b"fn broken(x: i32) -> i32 {
940    x +
941}
942fn fine() {}
943";
944        let parsed = parse_source(Lang::Rust, src);
945        // Even with syntax errors, tree-sitter should still extract some symbols
946        assert!(!parsed.defs.is_empty(), "should recover and find some defs");
947        assert!(find_def(&parsed, "fine").is_some(), "fine should be found");
948    }
949
950    #[test]
951    fn test_parse_large_file_truncated() {
952        // parse_source itself does not cap (capping is in handle_code_index);
953        // verify it can handle a large number of definitions without OOM.
954        let mut src = String::new();
955        for i in 0..MAX_SYMBOLS_PER_FILE + 100 {
956            src.push_str(&format!("fn func_{i}() {{}}\n"));
957        }
958        let parsed = parse_source(Lang::Rust, src.as_bytes());
959        // Should parse all defs without crashing
960        assert!(parsed.defs.len() > MAX_SYMBOLS_PER_FILE,
961            "should parse more than cap without truncation, got {}", parsed.defs.len());
962    }
963
964    #[test]
965    fn test_parse_all_languages_produce_defs() {
966        let samples: Vec<(Lang, &[u8])> = vec![
967            (Lang::Rust, b"fn foo() {}\nconst X: i32 = 1;\n"),
968            (Lang::Python, b"def foo(): pass\nX = 1\n"),
969            (Lang::JavaScript, b"function foo() {}\nconst X = 1;\n"),
970            (Lang::TypeScript, b"abstract class Foo {}\ninterface Bar {}\n"),
971            (Lang::Tsx, b"abstract class Foo {}\ninterface Bar {}\n"),
972            (Lang::Go, b"func foo() {}\nconst X = 1\n"),
973            (Lang::Java, b"class Foo {}\n"),
974            (Lang::C, b"int foo() { return 1; }\n"),
975            (Lang::Cpp, b"int foo() { return 1; }\nclass Bar {};\n"),
976            (Lang::Ruby, b"def foo; end\nX = 1\n"),
977            (Lang::Php, b"<?php function foo() {}\n"),
978        ];
979        for (lang, src) in samples {
980            let parsed = parse_source(lang, src);
981            assert!(!parsed.defs.is_empty(),
982                "{:?} should produce at least one def", lang);
983        }
984    }
985
986    #[test]
987    fn test_parse_c_header_file() {
988        let src = b"#ifndef FOO_H
989#define FOO_H
990
991typedef struct Buffer Buffer;
992
993struct Buffer {
994    char *data;
995    size_t len;
996};
997
998int process(Buffer *buf);
999void free_buffer(Buffer *buf);
1000
1001#endif
1002";
1003        let parsed = parse_source(Lang::C, src);
1004        assert!(find_def(&parsed, "process").is_some(), "process function");
1005        assert!(find_def(&parsed, "free_buffer").is_some(), "free_buffer function");
1006        assert!(find_def(&parsed, "Buffer").is_some(), "Buffer struct");
1007    }
1008
1009    #[test]
1010    fn test_parse_cpp_template_and_methods() {
1011        let src = b"template<typename T>
1012class Vector {
1013public:
1014    void push(const T& val);
1015    T pop();
1016private:
1017    T* data_;
1018    size_t size_;
1019};
1020
1021template<>
1022class Vector<bool> {
1023public:
1024    void push(bool val);
1025};
1026";
1027        let parsed = parse_source(Lang::Cpp, src);
1028        assert!(find_def(&parsed, "Vector").is_some(), "Vector template class");
1029        assert!(find_def(&parsed, "push").is_some(), "push method");
1030        assert!(find_def(&parsed, "pop").is_some(), "pop method");
1031    }
1032
1033    #[test]
1034    fn test_parse_php_without_opening_tag() {
1035        // PHP without <?php should still parse (many real files are tag-only)
1036        let src = b"<?php
1037function foo() {}
1038";
1039        let parsed = parse_source(Lang::Php, src);
1040        assert!(find_def(&parsed, "foo").is_some(), "foo function expected");
1041    }
1042
1043    #[test]
1044    fn test_parse_ruby_singleton_methods() {
1045        let src = b"class Foo
1046  def self.bar
1047    'class method'
1048  end
1049
1050  def instance_method
1051    'instance'
1052  end
1053end
1054";
1055        let parsed = parse_source(Lang::Ruby, src);
1056        assert!(find_def(&parsed, "bar").is_some(), "self.bar method");
1057        assert!(find_def(&parsed, "instance_method").is_some(), "instance_method");
1058    }
1059}