1pub mod lang;
9
10use std::path::{Path, PathBuf};
11use std::sync::atomic::AtomicUsize;
12
13pub use lang::{Lang, detect};
14
15pub const MAX_FILE_BYTES: u64 = 10 * 1024 * 1024;
18
19pub const MAX_SYMBOLS_PER_FILE: usize = 20_000;
21
22pub const MAX_TAGS_PER_FILE: usize = 100_000;
25
26pub const MAX_WALK_FILES: usize = 200_000;
28
29const MAX_SIGNATURE_CHARS: usize = 512;
31const MAX_DOC_CHARS: usize = 512;
32
33#[derive(Debug, Clone)]
35pub struct Def {
36 pub kind: String,
38 pub name: String,
40 pub line_start: usize,
42 pub line_end: usize,
43 pub signature: String,
45 pub doc: Option<String>,
47}
48
49#[derive(Debug, Clone)]
51pub struct Ref {
52 pub kind: String,
54 pub name: String,
56 pub line: usize,
58}
59
60#[derive(Debug, Clone, Default)]
62pub struct ParsedFile {
63 pub defs: Vec<Def>,
64 pub refs: Vec<Ref>,
65}
66
67pub fn hash_bytes(bytes: &[u8]) -> String {
69 blake3::hash(bytes).to_hex().to_string()
70}
71
72fn normalize_def_kind(raw: &str) -> &str {
74 match raw {
75 "function" | "macro" => "function",
76 "method" | "delegate" => "method",
77 "class" | "interface" | "struct" | "type" | "enum" | "trait"
78 | "union" | "concept" | "object" | "annotation" | "typealias" => "class",
79 "module" | "namespace" => "module",
80 "constant" => "constant",
81 other => other,
82 }
83}
84
85fn first_line(source: &[u8], start: usize) -> String {
87 let end = source[start..]
88 .iter()
89 .position(|&b| b == b'\n')
90 .map(|p| start + p)
91 .unwrap_or(source.len());
92 let mut s = String::from_utf8_lossy(&source[start..end]).trim().to_string();
93 if s.chars().count() > MAX_SIGNATURE_CHARS {
94 s = s.chars().take(MAX_SIGNATURE_CHARS).collect::<String>() + "…";
95 }
96 s
97}
98
99fn clamp_doc(doc: &str) -> Option<String> {
100 let line = doc.lines().find(|l| !l.trim().is_empty())?.trim();
101 if line.is_empty() {
102 return None;
103 }
104 let s = if line.chars().count() > MAX_DOC_CHARS {
105 line.chars().take(MAX_DOC_CHARS).collect::<String>() + "…"
106 } else {
107 line.to_string()
108 };
109 Some(s)
110}
111
112pub fn parse_source(lang: Lang, source: &[u8]) -> ParsedFile {
115 let Some(config) = lang::config(lang) else {
116 return ParsedFile::default();
117 };
118
119 let mut ctx = tree_sitter_tags::TagsContext::new();
120 let cancel = AtomicUsize::new(0);
121 let (tags, _failed) = match ctx.generate_tags(config, source, Some(&cancel)) {
122 Ok(v) => v,
123 Err(_) => return ParsedFile::default(),
124 };
125
126 let line_starts: Vec<usize> = std::iter::once(0)
130 .chain(source.iter().enumerate().filter(|&(_, &b)| b == b'\n').map(|(i, _)| i + 1))
131 .collect();
132 let line_of = |byte: usize| line_starts.partition_point(|&s| s <= byte).max(1);
133
134 let mut out = ParsedFile::default();
135 for tag in tags.flatten() {
136 if out.defs.len() + out.refs.len() >= MAX_TAGS_PER_FILE {
137 break;
138 }
139 let name = String::from_utf8_lossy(&source[tag.name_range.clone()]).to_string();
140 if name.is_empty() {
141 continue;
142 }
143 let kind = config.syntax_type_name(tag.syntax_type_id).to_string();
144 if tag.is_definition {
145 let end_byte = tag.range.end.saturating_sub(1).max(tag.range.start);
146 out.defs.push(Def {
147 kind: normalize_def_kind(&kind).to_string(),
148 name,
149 line_start: line_of(tag.range.start),
150 line_end: line_of(end_byte),
151 signature: first_line(source, tag.range.start),
152 doc: tag.docs.as_deref().and_then(clamp_doc),
153 });
154 } else {
155 out.refs.push(Ref {
156 kind,
157 name,
158 line: tag.span.start.row + 1,
159 });
160 }
161 }
162 out
163}
164
165pub fn walk(root: &Path, max_bytes: u64) -> Vec<PathBuf> {
168 let mut files = Vec::new();
169 if root.is_file() {
170 if detect(root).is_some()
171 && std::fs::metadata(root).map(|m| m.len() <= max_bytes).unwrap_or(false)
172 {
173 files.push(root.to_path_buf());
174 }
175 return files;
176 }
177
178 let walker = ignore::WalkBuilder::new(root)
179 .standard_filters(true)
180 .hidden(true)
181 .git_ignore(true)
182 .git_global(true)
183 .require_git(false)
184 .filter_entry(|e| {
185 let name = e.file_name().to_string_lossy();
188 !matches!(name.as_ref(), "target" | "node_modules" | ".git" | "dist" | "build")
189 })
190 .build();
191
192 for entry in walker.flatten() {
193 if files.len() >= MAX_WALK_FILES {
194 break;
195 }
196 let path = entry.path();
197 if !path.is_file() || detect(path).is_none() {
198 continue;
199 }
200 if std::fs::metadata(path).map(|m| m.len() > max_bytes).unwrap_or(true) {
201 continue;
202 }
203 files.push(path.to_path_buf());
204 }
205 files
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211
212 fn count_defs_of_kind(parsed: &ParsedFile, kind: &str) -> usize {
217 parsed.defs.iter().filter(|d| d.kind == kind).count()
218 }
219
220 fn find_def<'a>(parsed: &'a ParsedFile, name: &str) -> Option<&'a Def> {
221 parsed.defs.iter().find(|d| d.name == name)
222 }
223
224 #[test]
227 fn test_parse_rust() {
228 let src = b"/// Docs
229pub fn alpha(x: i32) -> i32 { x + 1 }
230
231fn beta() {}
232
233pub struct Thing { pub x: i32 }
234
235pub enum Color { Red, Blue }
236
237trait Foo { fn bar(&self); }
238";
239 let parsed = parse_source(Lang::Rust, src);
240 assert!(!parsed.defs.is_empty(), "expected defs");
241
242 let alpha = find_def(&parsed, "alpha").expect("alpha");
243 assert_eq!(alpha.kind, "function");
244 assert!(alpha.signature.contains("fn alpha"));
245
246 let beta = find_def(&parsed, "beta").expect("beta");
247 assert_eq!(beta.kind, "function");
248
249 let thing = find_def(&parsed, "Thing").expect("Thing");
250 assert_eq!(thing.kind, "class");
251
252 let color = find_def(&parsed, "Color").expect("Color");
253 assert_eq!(color.kind, "class");
254
255 let foo = find_def(&parsed, "Foo").expect("Foo");
256 assert_eq!(foo.kind, "class");
257 }
258
259 #[test]
260 fn test_parse_rust_calls() {
261 let src = b"fn alpha() { beta() + gamma() }
262fn beta() -> i32 { 1 }
263fn gamma() -> i32 { 2 }";
264 let parsed = parse_source(Lang::Rust, src);
265 let call_refs: Vec<&str> = parsed.refs.iter()
266 .filter(|r| r.kind == "call")
267 .map(|r| r.name.as_str())
268 .collect();
269 assert!(call_refs.contains(&"beta"), "alpha should call beta");
270 assert!(call_refs.contains(&"gamma"), "alpha should call gamma");
271 }
272
273 #[test]
276 fn test_parse_python() {
277 let src = b"def greet(name):
278 return 'hello ' + name
279
280class Greeter:
281 def hello(self):
282 return greet('world')
283
284MAX_RETRIES = 3
285";
286 let parsed = parse_source(Lang::Python, src);
287 assert_eq!(count_defs_of_kind(&parsed, "function"), 2);
288 assert_eq!(count_defs_of_kind(&parsed, "class"), 1);
289 assert_eq!(count_defs_of_kind(&parsed, "constant"), 1);
290 assert!(find_def(&parsed, "greet").is_some());
291 assert!(find_def(&parsed, "Greeter").is_some());
292 }
293
294 #[test]
297 fn test_parse_javascript() {
298 let src = b"function alpha(x) { return beta(x) + 1; }
299class Thing { constructor(v) { this.v = v; } }
300";
301 let parsed = parse_source(Lang::JavaScript, src);
302 assert!(!parsed.defs.is_empty(), "JS should produce defs");
303 assert!(find_def(&parsed, "alpha").is_some(), "alpha function");
304 assert!(find_def(&parsed, "Thing").is_some(), "Thing class");
305 }
306
307 #[test]
310 fn test_parse_go() {
311 let src = b"package main
312
313func alpha(x int) int { return beta(x) + 1 }
314
315func beta(x int) int { return x * 2 }
316
317type Thing struct { Value int }
318";
319 let parsed = parse_source(Lang::Go, src);
320 assert!(!parsed.defs.is_empty(), "Go should produce defs");
321 assert!(find_def(&parsed, "alpha").is_some(), "alpha function");
322 assert!(find_def(&parsed, "beta").is_some(), "beta function");
323 assert!(find_def(&parsed, "Thing").is_some(), "Thing type");
324 }
325
326 #[test]
329 fn test_parse_java() {
330 let src = b"package com.example;
331
332class Hello {
333 private int x;
334
335 public int add(int a, int b) { return a + b; }
336}
337
338interface Worker {
339 void doWork();
340}
341";
342 let parsed = parse_source(Lang::Java, src);
343 assert!(!parsed.defs.is_empty(), "Java should produce defs");
344 assert!(find_def(&parsed, "Hello").is_some(), "Hello class");
345 assert!(find_def(&parsed, "Worker").is_some(), "Worker interface");
346 }
347
348 #[test]
355 fn test_parse_c() {
356 let src = b"#include <stdio.h>
357
358int add(int a, int b) {
359 return a + b;
360}
361
362struct Point {
363 int x;
364 int y;
365};
366
367enum Color { RED, GREEN, BLUE };
368
369#define MAX_SIZE 1024
370
371void greet(const char *name) {
372 printf(\"Hello %s\", name);
373}
374";
375 let parsed = parse_source(Lang::C, src);
376 assert_eq!(count_defs_of_kind(&parsed, "function"), 2);
378 assert_eq!(count_defs_of_kind(&parsed, "class"), 2); assert!(find_def(&parsed, "add").is_some());
381 assert!(find_def(&parsed, "greet").is_some());
382 assert!(find_def(&parsed, "Point").is_some());
383 assert!(find_def(&parsed, "Color").is_some());
384
385 let add = find_def(&parsed, "add").unwrap();
387 assert_eq!(add.line_start, 3);
388 }
389
390 #[test]
393 fn test_parse_cpp() {
394 let src = b"#include <vector>
395
396class Calculator {
397public:
398 int add(int a, int b) { return a + b; }
399
400 int multiply(int a, int b);
401};
402
403struct Config {
404 int timeout;
405};
406
407enum Status { OK, ERROR };
408
409namespace util {
410 int helper(int x) { return x * 2; }
411}
412
413template<typename T>
414T max(T a, T b) { return a > b ? a : b; }
415";
416 let parsed = parse_source(Lang::Cpp, src);
417 assert!(find_def(&parsed, "Calculator").is_some());
419 assert!(find_def(&parsed, "Config").is_some());
420 assert!(find_def(&parsed, "Status").is_some());
421 assert!(find_def(&parsed, "helper").is_some());
422 assert!(find_def(&parsed, "max").is_some());
423
424 let calc = find_def(&parsed, "Calculator").unwrap();
426 assert_eq!(calc.kind, "class");
427
428 let config = find_def(&parsed, "Config").unwrap();
430 assert_eq!(config.kind, "class");
431 }
432
433 #[test]
436 fn test_parse_ruby() {
437 let src = b"# Adds two numbers
438def add(a, b)
439 a + b
440end
441
442class Greeter
443 def initialize(name)
444 @name = name
445 end
446
447 def hello
448 \"Hello, #{@name}\"
449 end
450end
451
452module Utils
453 MAX_RETRIES = 3
454
455 def self.format(s)
456 s.strip
457 end
458end
459";
460 let parsed = parse_source(Lang::Ruby, src);
461 assert!(find_def(&parsed, "add").is_some(), "add method");
463 assert!(find_def(&parsed, "hello").is_some(), "hello method");
464 assert!(find_def(&parsed, "Greeter").is_some(), "Greeter class");
466 assert!(find_def(&parsed, "Utils").is_some(), "Utils module");
467
468 let add = find_def(&parsed, "add").unwrap();
470 assert_eq!(add.kind, "method");
471 assert_eq!(add.doc.as_deref(), Some("Adds two numbers"));
472 }
473
474 #[test]
475 fn test_parse_ruby_constant() {
476 let src = b"MAX_VALUE = 1000
477MIN_VALUE = 1
478";
479 let parsed = parse_source(Lang::Ruby, src);
480 assert!(parsed.refs.is_empty() || parsed.defs.len() <= 4);
482 }
483
484 #[test]
487 fn test_parse_php() {
488 let src = b"<?php
489
490namespace App\\Service;
491
492class UserService {
493 public function find(int $id): ?User {
494 return $this->repo->find($id);
495 }
496
497 private function validate(array $data): bool {
498 return !empty($data['name']);
499 }
500}
501
502interface Logger {
503 public function log(string $msg): void;
504}
505
506trait Timestampable {
507 public function getCreatedAt(): \\DateTime {
508 return $this->createdAt;
509 }
510}
511
512function helper_sort(array &$arr): void {
513 sort($arr);
514}
515";
516 let parsed = parse_source(Lang::Php, src);
517 assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
519 assert!(find_def(&parsed, "Logger").is_some(), "Logger interface");
520 assert!(find_def(&parsed, "Timestampable").is_some(), "Timestampable trait");
521 assert!(find_def(&parsed, "find").is_some(), "find method");
522 assert!(find_def(&parsed, "validate").is_some(), "validate method");
523 assert!(find_def(&parsed, "log").is_some(), "log method");
524 assert!(find_def(&parsed, "helper_sort").is_some(), "helper_sort function");
525
526 let svc = find_def(&parsed, "UserService").unwrap();
527 assert_eq!(svc.kind, "class");
528 }
529
530 #[test]
537 fn test_parse_rust_complex() {
538 let src = b"pub struct Pair<T, U> { first: T, second: U }
539
540impl<T, U> Pair<T, U> {
541 fn new(first: T, second: U) -> Self { Pair { first, second } }
542 fn first(&self) -> &T { &self.first }
543}
544
545pub trait Into {
546 fn into(self) -> i32;
547}
548
549impl Into for i32 {
550 fn into(self) -> i32 { self }
551}
552
553pub async fn fetch(url: &str) -> Result<String, String> {
554 Ok(String::new())
555}
556
557pub fn handler() {
558 let add = |a: i32, b: i32| a + b;
559 let _ = add(1, 2);
560}
561
562macro_rules! vec_of {
563 ($x:expr) => { vec![$x] };
564}
565";
566 let parsed = parse_source(Lang::Rust, src);
567 assert!(find_def(&parsed, "Pair").is_some(), "Pair generic struct");
568 assert!(find_def(&parsed, "Into").is_some(), "Into trait");
569 assert!(find_def(&parsed, "fetch").is_some(), "fetch async fn");
570 assert!(find_def(&parsed, "handler").is_some(), "handler fn");
571 assert!(find_def(&parsed, "new").is_some(), "Pair::new method");
573 assert!(find_def(&parsed, "first").is_some(), "Pair::first method");
574 let call_refs: Vec<&str> = parsed.refs.iter()
576 .filter(|r| r.kind == "call")
577 .map(|r| r.name.as_str())
578 .collect();
579 assert!(call_refs.contains(&"add"), "handler calls add closure");
580 }
581
582 #[test]
585 fn test_parse_python_complex() {
586 let src = b"from typing import Optional, List
587
588class Repository:
589 def __init__(self, db: str) -> None:
590 self.db = db
591
592 async def find(self, id: int) -> Optional[dict]:
593 return None
594
595 @classmethod
596 def default(cls) -> 'Repository':
597 return cls('sqlite')
598
599 @staticmethod
600 def version() -> str:
601 return '1.0'
602
603class UserService(Repository):
604 def __init__(self) -> None:
605 super().__init__('users')
606
607 async def find(self, id: int) -> Optional[dict]:
608 return {'id': id}
609
610def compute(items: List[int]) -> int:
611 return sum(filter(None, map(lambda x: x * 2, items)))
612";
613 let parsed = parse_source(Lang::Python, src);
614 assert!(find_def(&parsed, "Repository").is_some(), "Repository class");
615 assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
616 assert!(find_def(&parsed, "find").is_some(), "find method (both classes)");
617 assert!(find_def(&parsed, "default").is_some(), "default classmethod");
618 assert!(find_def(&parsed, "version").is_some(), "version staticmethod");
619 assert!(find_def(&parsed, "compute").is_some(), "compute function");
620 assert!(find_def(&parsed, "__init__").is_some(), "__init__ method");
621 assert!(count_defs_of_kind(&parsed, "function") >= 3);
623 assert!(count_defs_of_kind(&parsed, "class") >= 2);
624 }
625
626 #[test]
629 fn test_parse_javascript_complex() {
630 let src = b"class Repository {
631 constructor(db) { this.db = db; }
632 async find(id) { return null; }
633 static default() { return new Repository('sqlite'); }
634}
635
636class UserService extends Repository {
637 constructor() { super('users'); }
638 async find(id) { return { id }; }
639}
640
641function compute(arr) {
642 return arr.filter(x => x != null).map(x => x * 2);
643}
644";
645 let parsed = parse_source(Lang::JavaScript, src);
646 assert!(find_def(&parsed, "Repository").is_some(), "Repository class");
647 assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
648 assert!(find_def(&parsed, "find").is_some(), "find method");
649 assert!(find_def(&parsed, "default").is_some(), "default static method");
650 assert!(find_def(&parsed, "compute").is_some(), "compute function");
651 assert!(find_def(&parsed, "default").is_some(), "default static method");
652 }
653
654 #[test]
657 fn test_parse_go_complex() {
658 let src = b"package main
659
660import \"fmt\"
661
662type Walker interface {
663 Walk() int
664}
665
666type Thing struct {
667 value int
668}
669
670func (t *Thing) Walk() int {
671 return t.value
672}
673
674func NewThing(v int) *Thing {
675 return &Thing{value: v}
676}
677
678func sum(vals ...int) int {
679 total := 0
680 for _, v := range vals {
681 total += v
682 }
683 return total
684}
685";
686 let parsed = parse_source(Lang::Go, src);
687 assert!(find_def(&parsed, "Walker").is_some(), "Walker interface");
688 assert!(find_def(&parsed, "Thing").is_some(), "Thing struct");
689 assert!(find_def(&parsed, "Walk").is_some(), "Walk method");
690 assert!(find_def(&parsed, "NewThing").is_some(), "NewThing constructor");
691 assert!(find_def(&parsed, "sum").is_some(), "sum variadic function");
692 }
693
694 #[test]
697 fn test_parse_java_complex() {
698 let src = b"package com.example;
699
700import java.util.List;
701
702class Repository<T> {
703 public T find(int id) { return null; }
704
705 public List<T> findAll() { return null; }
706}
707
708class UserService extends Repository<String> {
709 @Override
710 public String find(int id) {
711 return \"user\";
712 }
713}
714
715enum Status { ACTIVE, INACTIVE }
716
717interface Cache<K, V> {
718 V get(K key);
719 void put(K key, V value);
720}
721";
722 let parsed = parse_source(Lang::Java, src);
723 assert!(find_def(&parsed, "Repository").is_some(), "Repository generic class");
724 assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
725 assert!(find_def(&parsed, "Cache").is_some(), "Cache interface");
726 assert!(find_def(&parsed, "find").is_some(), "find method");
727 assert!(find_def(&parsed, "findAll").is_some(), "findAll method");
728 }
729
730 #[test]
733 fn test_parse_c_complex() {
734 let src = b"#include <stddef.h>
735
736typedef struct Buffer Buffer;
737
738struct Buffer {
739 char *data;
740 size_t len;
741};
742
743static inline int max(int a, int b) {
744 return a > b ? a : b;
745}
746
747static void internal_cleanup(Buffer *buf) {
748 if (buf) buf->len = 0;
749}
750
751int process(Buffer *buf) {
752 return (int)buf->len;
753}
754";
755 let parsed = parse_source(Lang::C, src);
756 assert!(find_def(&parsed, "Buffer").is_some(), "Buffer struct");
757 assert!(find_def(&parsed, "max").is_some(), "max inline function");
758 assert!(find_def(&parsed, "internal_cleanup").is_some(), "internal_cleanup static");
759 assert!(find_def(&parsed, "process").is_some(), "process function");
760 let types = count_defs_of_kind(&parsed, "class");
762 assert!(types >= 1, "should have at least Buffer struct, got {types}");
763 }
764
765 #[test]
768 fn test_parse_cpp_complex() {
769 let src = b"#include <vector>
770
771class Base {
772public:
773 virtual ~Base() = default;
774 virtual int compute() const = 0;
775};
776
777class Derived final : public Base {
778public:
779 int compute() const override { return value_; }
780
781 Derived& operator=(const Derived& other) {
782 value_ = other.value_;
783 return *this;
784 }
785
786private:
787 int value_ = 42;
788};
789
790template<typename T>
791constexpr T pi = T(3.1415926535);
792
793namespace detail {
794 template<typename T>
795 class ScopedPtr {
796 public:
797 explicit ScopedPtr(T* ptr) : ptr_(ptr) {}
798 ~ScopedPtr() { delete ptr_; }
799 T& operator*() const { return *ptr_; }
800 private:
801 T* ptr_;
802 };
803}
804";
805 let parsed = parse_source(Lang::Cpp, src);
806 assert!(find_def(&parsed, "Base").is_some(), "Base abstract class");
807 assert!(find_def(&parsed, "Derived").is_some(), "Derived class");
808 assert!(find_def(&parsed, "ScopedPtr").is_some(), "ScopedPtr template class");
809 assert!(find_def(&parsed, "compute").is_some(), "compute method");
810 }
811
812 #[test]
815 fn test_parse_ruby_complex() {
816 let src = b"module Persistence
817 def save
818 'saved'
819 end
820end
821
822class BaseRecord
823 attr_accessor :id
824
825 def initialize(id = nil)
826 @id = id
827 end
828end
829
830class User < BaseRecord
831 include Persistence
832
833 attr_reader :name
834
835 def initialize(id, name)
836 super(id)
837 @name = name
838 end
839
840 def self.find(id)
841 new(id, 'default')
842 end
843
844 def to_s
845 \"User(#{@id}, #{@name})\"
846 end
847end
848";
849 let parsed = parse_source(Lang::Ruby, src);
850 assert!(find_def(&parsed, "Persistence").is_some(), "Persistence module");
851 assert!(find_def(&parsed, "BaseRecord").is_some(), "BaseRecord class");
852 assert!(find_def(&parsed, "User").is_some(), "User class");
853 assert!(find_def(&parsed, "save").is_some(), "save method");
854 assert!(find_def(&parsed, "initialize").is_some(), "initialize");
855 assert!(find_def(&parsed, "find").is_some(), "find class method");
856 assert!(find_def(&parsed, "to_s").is_some(), "to_s method");
857 }
858
859 #[test]
862 fn test_parse_php_complex() {
863 let src = b"<?php
864
865namespace App\\Service;
866
867interface CacheInterface
868{
869 public function get(string $key): mixed;
870 public function set(string $key, mixed $value, int $ttl = 0): void;
871}
872
873trait Loggable
874{
875 public function log(string $msg): void
876 {
877 echo \\date('[Y-m-d] ') . $msg;
878 }
879}
880
881class UserService implements CacheInterface
882{
883 use Loggable;
884
885 public function __construct(
886 private string $prefix = 'usr'
887 ) {}
888
889 public function get(string $key): mixed
890 {
891 $this->log(\"get: $key\");
892 return null;
893 }
894
895 public function set(string $key, mixed $value, int $ttl = 0): void
896 {
897 $this->log(\"set: $key\");
898 }
899
900 public static function createDefault(): self
901 {
902 return new self();
903 }
904}
905";
906 let parsed = parse_source(Lang::Php, src);
907 assert!(find_def(&parsed, "CacheInterface").is_some(), "CacheInterface");
908 assert!(find_def(&parsed, "Loggable").is_some(), "Loggable trait");
909 assert!(find_def(&parsed, "UserService").is_some(), "UserService class");
910 assert!(find_def(&parsed, "get").is_some(), "get method");
911 assert!(find_def(&parsed, "set").is_some(), "set method");
912 assert!(find_def(&parsed, "log").is_some(), "log method");
913 assert!(find_def(&parsed, "createDefault").is_some(), "createDefault static");
914 }
915
916 #[test]
919 fn test_parse_empty_source() {
920 let src = b"";
921 for lang in Lang::all() {
922 let parsed = parse_source(lang, src);
923 assert!(parsed.defs.is_empty(), "{:?} should produce no defs from empty input", lang);
924 assert!(parsed.refs.is_empty(), "{:?} should produce no refs from empty input", lang);
925 }
926 }
927
928 #[test]
929 fn test_parse_whitespace_only() {
930 for lang in Lang::all() {
931 let parsed = parse_source(lang, b"\n\n \n\t\n");
932 assert!(parsed.defs.is_empty(), "{:?} whitespace should produce no defs", lang);
933 }
934 }
935
936 #[test]
937 fn test_parse_syntax_error_recovers_gracefully() {
938 let src = b"fn broken(x: i32) -> i32 {
940 x +
941}
942fn fine() {}
943";
944 let parsed = parse_source(Lang::Rust, src);
945 assert!(!parsed.defs.is_empty(), "should recover and find some defs");
947 assert!(find_def(&parsed, "fine").is_some(), "fine should be found");
948 }
949
950 #[test]
951 fn test_parse_large_file_truncated() {
952 let mut src = String::new();
955 for i in 0..MAX_SYMBOLS_PER_FILE + 100 {
956 src.push_str(&format!("fn func_{i}() {{}}\n"));
957 }
958 let parsed = parse_source(Lang::Rust, src.as_bytes());
959 assert!(parsed.defs.len() > MAX_SYMBOLS_PER_FILE,
961 "should parse more than cap without truncation, got {}", parsed.defs.len());
962 }
963
964 #[test]
965 fn test_parse_all_languages_produce_defs() {
966 let samples: Vec<(Lang, &[u8])> = vec![
967 (Lang::Rust, b"fn foo() {}\nconst X: i32 = 1;\n"),
968 (Lang::Python, b"def foo(): pass\nX = 1\n"),
969 (Lang::JavaScript, b"function foo() {}\nconst X = 1;\n"),
970 (Lang::TypeScript, b"abstract class Foo {}\ninterface Bar {}\n"),
971 (Lang::Tsx, b"abstract class Foo {}\ninterface Bar {}\n"),
972 (Lang::Go, b"func foo() {}\nconst X = 1\n"),
973 (Lang::Java, b"class Foo {}\n"),
974 (Lang::C, b"int foo() { return 1; }\n"),
975 (Lang::Cpp, b"int foo() { return 1; }\nclass Bar {};\n"),
976 (Lang::Ruby, b"def foo; end\nX = 1\n"),
977 (Lang::Php, b"<?php function foo() {}\n"),
978 ];
979 for (lang, src) in samples {
980 let parsed = parse_source(lang, src);
981 assert!(!parsed.defs.is_empty(),
982 "{:?} should produce at least one def", lang);
983 }
984 }
985
986 #[test]
987 fn test_parse_c_header_file() {
988 let src = b"#ifndef FOO_H
989#define FOO_H
990
991typedef struct Buffer Buffer;
992
993struct Buffer {
994 char *data;
995 size_t len;
996};
997
998int process(Buffer *buf);
999void free_buffer(Buffer *buf);
1000
1001#endif
1002";
1003 let parsed = parse_source(Lang::C, src);
1004 assert!(find_def(&parsed, "process").is_some(), "process function");
1005 assert!(find_def(&parsed, "free_buffer").is_some(), "free_buffer function");
1006 assert!(find_def(&parsed, "Buffer").is_some(), "Buffer struct");
1007 }
1008
1009 #[test]
1010 fn test_parse_cpp_template_and_methods() {
1011 let src = b"template<typename T>
1012class Vector {
1013public:
1014 void push(const T& val);
1015 T pop();
1016private:
1017 T* data_;
1018 size_t size_;
1019};
1020
1021template<>
1022class Vector<bool> {
1023public:
1024 void push(bool val);
1025};
1026";
1027 let parsed = parse_source(Lang::Cpp, src);
1028 assert!(find_def(&parsed, "Vector").is_some(), "Vector template class");
1029 assert!(find_def(&parsed, "push").is_some(), "push method");
1030 assert!(find_def(&parsed, "pop").is_some(), "pop method");
1031 }
1032
1033 #[test]
1034 fn test_parse_php_without_opening_tag() {
1035 let src = b"<?php
1037function foo() {}
1038";
1039 let parsed = parse_source(Lang::Php, src);
1040 assert!(find_def(&parsed, "foo").is_some(), "foo function expected");
1041 }
1042
1043 #[test]
1044 fn test_parse_ruby_singleton_methods() {
1045 let src = b"class Foo
1046 def self.bar
1047 'class method'
1048 end
1049
1050 def instance_method
1051 'instance'
1052 end
1053end
1054";
1055 let parsed = parse_source(Lang::Ruby, src);
1056 assert!(find_def(&parsed, "bar").is_some(), "self.bar method");
1057 assert!(find_def(&parsed, "instance_method").is_some(), "instance_method");
1058 }
1059}