1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::ops::Range;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
13#[repr(transparent)]
14pub struct SymbolId(pub u32);
15
16impl SymbolId {
17 #[inline]
19 pub const fn new(id: u32) -> Self {
20 Self(id)
21 }
22
23 #[inline]
25 pub const fn as_u32(self) -> u32 {
26 self.0
27 }
28}
29
30impl From<u32> for SymbolId {
31 #[inline]
32 fn from(id: u32) -> Self {
33 Self(id)
34 }
35}
36
37impl From<SymbolId> for u32 {
38 #[inline]
39 fn from(id: SymbolId) -> Self {
40 id.0
41 }
42}
43
44impl std::fmt::Display for SymbolId {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 write!(f, "sym#{}", self.0)
47 }
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
53#[repr(transparent)]
54pub struct FileId(pub u32);
55
56impl FileId {
57 #[inline]
59 pub const fn new(id: u32) -> Self {
60 Self(id)
61 }
62
63 #[inline]
65 pub const fn as_u32(self) -> u32 {
66 self.0
67 }
68}
69
70impl From<u32> for FileId {
71 #[inline]
72 fn from(id: u32) -> Self {
73 Self(id)
74 }
75}
76
77impl From<FileId> for u32 {
78 #[inline]
79 fn from(id: FileId) -> Self {
80 id.0
81 }
82}
83
84impl std::fmt::Display for FileId {
85 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86 write!(f, "file#{}", self.0)
87 }
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
92pub struct IndexSymbol {
93 pub id: SymbolId,
95 pub name: String,
97 pub kind: IndexSymbolKind,
99 pub file_id: FileId,
101 pub span: Span,
103 pub signature: Option<String>,
105 pub parent: Option<SymbolId>,
107 pub visibility: Visibility,
109 pub docstring: Option<String>,
111}
112
113#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub enum IndexSymbolKind {
116 Function,
117 Method,
118 Class,
119 Struct,
120 Interface,
121 Trait,
122 Enum,
123 Constant,
124 Variable,
125 Module,
126 Import,
127 Export,
128 TypeAlias,
129 Macro,
130}
131
132impl IndexSymbolKind {
133 pub fn name(&self) -> &'static str {
134 match self {
135 Self::Function => "function",
136 Self::Method => "method",
137 Self::Class => "class",
138 Self::Struct => "struct",
139 Self::Interface => "interface",
140 Self::Trait => "trait",
141 Self::Enum => "enum",
142 Self::Constant => "constant",
143 Self::Variable => "variable",
144 Self::Module => "module",
145 Self::Import => "import",
146 Self::Export => "export",
147 Self::TypeAlias => "type",
148 Self::Macro => "macro",
149 }
150 }
151
152 pub fn is_scope(&self) -> bool {
154 matches!(
155 self,
156 Self::Class | Self::Struct | Self::Interface | Self::Trait | Self::Module | Self::Enum
157 )
158 }
159}
160
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
163pub enum Visibility {
164 #[default]
165 Public,
166 Private,
167 Protected,
168 Internal,
169}
170
171#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
173pub struct Span {
174 pub start_line: u32,
175 pub start_col: u16,
176 pub end_line: u32,
177 pub end_col: u16,
178}
179
180impl Span {
181 pub fn new(start_line: u32, start_col: u16, end_line: u32, end_col: u16) -> Self {
182 Self { start_line, start_col, end_line, end_col }
183 }
184
185 pub fn contains_line(&self, line: u32) -> bool {
187 line >= self.start_line && line <= self.end_line
188 }
189
190 pub fn line_count(&self) -> u32 {
192 if self.end_line >= self.start_line {
193 self.end_line - self.start_line + 1
194 } else {
195 1
196 }
197 }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct FileEntry {
203 pub id: FileId,
205 pub path: String,
207 pub language: Language,
209 pub content_hash: [u8; 32],
211 pub symbols: Range<u32>,
213 pub imports: Vec<Import>,
215 pub lines: u32,
217 pub tokens: u32,
219}
220
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
223pub enum Language {
224 Rust,
225 Python,
226 JavaScript,
227 TypeScript,
228 Go,
229 Java,
230 C,
231 Cpp,
232 CSharp,
233 Ruby,
234 Bash,
235 Php,
236 Kotlin,
237 Swift,
238 Scala,
239 Haskell,
240 Elixir,
241 Clojure,
242 OCaml,
243 Lua,
244 R,
245 #[default]
246 Unknown,
247}
248
249impl Language {
250 pub fn from_extension(ext: &str) -> Self {
251 match ext.to_lowercase().as_str() {
252 "rs" => Self::Rust,
253 "py" | "pyi" | "pyw" => Self::Python,
254 "js" | "mjs" | "cjs" => Self::JavaScript,
255 "ts" | "mts" | "cts" => Self::TypeScript,
256 "tsx" | "jsx" => Self::TypeScript,
257 "go" => Self::Go,
258 "java" => Self::Java,
259 "c" | "h" => Self::C,
260 "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Self::Cpp,
261 "cs" => Self::CSharp,
262 "rb" => Self::Ruby,
263 "sh" | "bash" | "zsh" => Self::Bash,
264 "php" | "php3" | "php4" | "php5" | "phtml" => Self::Php,
265 "kt" | "kts" => Self::Kotlin,
266 "swift" => Self::Swift,
267 "scala" | "sc" => Self::Scala,
268 "hs" | "lhs" => Self::Haskell,
269 "ex" | "exs" => Self::Elixir,
270 "clj" | "cljs" | "cljc" | "edn" => Self::Clojure,
271 "ml" | "mli" => Self::OCaml,
272 "lua" => Self::Lua,
273 "r" | "rmd" => Self::R,
274 _ => Self::Unknown,
275 }
276 }
277
278 pub fn name(&self) -> &'static str {
279 match self {
280 Self::Rust => "rust",
281 Self::Python => "python",
282 Self::JavaScript => "javascript",
283 Self::TypeScript => "typescript",
284 Self::Go => "go",
285 Self::Java => "java",
286 Self::C => "c",
287 Self::Cpp => "cpp",
288 Self::CSharp => "csharp",
289 Self::Ruby => "ruby",
290 Self::Bash => "bash",
291 Self::Php => "php",
292 Self::Kotlin => "kotlin",
293 Self::Swift => "swift",
294 Self::Scala => "scala",
295 Self::Haskell => "haskell",
296 Self::Elixir => "elixir",
297 Self::Clojure => "clojure",
298 Self::OCaml => "ocaml",
299 Self::Lua => "lua",
300 Self::R => "r",
301 Self::Unknown => "unknown",
302 }
303 }
304}
305
306#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct Import {
309 pub source: String,
311 pub resolved_file: Option<u32>,
313 pub symbols: Vec<String>,
315 pub span: Span,
317 pub is_external: bool,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct SymbolIndex {
324 pub version: u32,
326 pub repo_name: String,
328 pub commit_hash: Option<String>,
330 pub created_at: u64,
332 pub files: Vec<FileEntry>,
334 pub symbols: Vec<IndexSymbol>,
336
337 #[serde(skip)]
339 pub file_by_path: HashMap<String, u32>,
340 #[serde(skip)]
341 pub symbols_by_name: HashMap<String, Vec<u32>>,
342}
343
344impl Default for SymbolIndex {
345 fn default() -> Self {
346 Self::new()
347 }
348}
349
350impl SymbolIndex {
351 pub const CURRENT_VERSION: u32 = 1;
352
353 pub fn new() -> Self {
354 Self {
355 version: Self::CURRENT_VERSION,
356 repo_name: String::new(),
357 commit_hash: None,
358 created_at: 0,
359 files: Vec::new(),
360 symbols: Vec::new(),
361 file_by_path: HashMap::new(),
362 symbols_by_name: HashMap::new(),
363 }
364 }
365
366 pub fn rebuild_lookups(&mut self) {
368 self.file_by_path.clear();
369 self.symbols_by_name.clear();
370
371 for file in &self.files {
372 self.file_by_path
373 .insert(file.path.clone(), file.id.as_u32());
374 }
375
376 for symbol in &self.symbols {
377 self.symbols_by_name
378 .entry(symbol.name.clone())
379 .or_default()
380 .push(symbol.id.as_u32());
381 }
382 }
383
384 pub fn get_file(&self, path: &str) -> Option<&FileEntry> {
386 self.file_by_path
387 .get(path)
388 .and_then(|&id| self.files.get(id as usize))
389 }
390
391 pub fn get_file_by_id(&self, id: u32) -> Option<&FileEntry> {
393 self.files.get(id as usize)
394 }
395
396 pub fn get_symbol(&self, id: u32) -> Option<&IndexSymbol> {
398 self.symbols.get(id as usize)
399 }
400
401 pub fn get_file_symbols(&self, file_id: FileId) -> &[IndexSymbol] {
403 if let Some(file) = self.get_file_by_id(file_id.as_u32()) {
404 &self.symbols[file.symbols.start as usize..file.symbols.end as usize]
405 } else {
406 &[]
407 }
408 }
409
410 pub fn find_symbols(&self, name: &str) -> Vec<&IndexSymbol> {
412 self.symbols_by_name
413 .get(name)
414 .map(|ids| ids.iter().filter_map(|&id| self.get_symbol(id)).collect())
415 .unwrap_or_default()
416 }
417
418 pub fn find_symbol_at_line(&self, file_id: FileId, line: u32) -> Option<&IndexSymbol> {
420 self.get_file_symbols(file_id)
421 .iter()
422 .filter(|s| s.span.contains_line(line))
423 .min_by_key(|s| s.span.line_count())
425 }
426}
427
428#[derive(Debug, Clone, Default, Serialize, Deserialize)]
430pub struct DepGraph {
431 pub file_imports: Vec<(u32, u32)>,
434 pub symbol_refs: Vec<(u32, u32)>,
436
437 pub file_imported_by: Vec<(u32, u32)>,
440 pub symbol_ref_by: Vec<(u32, u32)>,
442
443 pub calls: Vec<(u32, u32)>,
446 pub called_by: Vec<(u32, u32)>,
448
449 pub file_pagerank: Vec<f32>,
452 pub symbol_pagerank: Vec<f32>,
454}
455
456impl DepGraph {
457 pub fn new() -> Self {
458 Self::default()
459 }
460
461 pub fn add_file_import(&mut self, from_file: u32, to_file: u32) {
463 self.file_imports.push((from_file, to_file));
464 self.file_imported_by.push((to_file, from_file));
465 }
466
467 pub fn add_symbol_ref(&mut self, from_symbol: u32, to_symbol: u32) {
469 self.symbol_refs.push((from_symbol, to_symbol));
470 self.symbol_ref_by.push((to_symbol, from_symbol));
471 }
472
473 pub fn add_call(&mut self, caller: u32, callee: u32) {
475 self.calls.push((caller, callee));
476 self.called_by.push((callee, caller));
477 }
478
479 pub fn get_importers(&self, file_id: u32) -> Vec<u32> {
481 self.file_imported_by
482 .iter()
483 .filter_map(|&(f, importer)| if f == file_id { Some(importer) } else { None })
484 .collect()
485 }
486
487 pub fn get_imports(&self, file_id: u32) -> Vec<u32> {
489 self.file_imports
490 .iter()
491 .filter_map(|&(f, imported)| if f == file_id { Some(imported) } else { None })
492 .collect()
493 }
494
495 pub fn get_referencers(&self, symbol_id: u32) -> Vec<u32> {
497 self.symbol_ref_by
498 .iter()
499 .filter_map(|&(s, referencer)| {
500 if s == symbol_id {
501 Some(referencer)
502 } else {
503 None
504 }
505 })
506 .collect()
507 }
508
509 pub fn get_callers(&self, symbol_id: u32) -> Vec<u32> {
511 self.called_by
512 .iter()
513 .filter_map(|&(callee, caller)| {
514 if callee == symbol_id {
515 Some(caller)
516 } else {
517 None
518 }
519 })
520 .collect()
521 }
522
523 pub fn get_callees(&self, symbol_id: u32) -> Vec<u32> {
525 self.calls
526 .iter()
527 .filter_map(|&(caller, callee)| {
528 if caller == symbol_id {
529 Some(callee)
530 } else {
531 None
532 }
533 })
534 .collect()
535 }
536}
537
538#[derive(Debug, Clone, Serialize, Deserialize)]
540pub struct Reference {
541 pub symbol_id: u32,
543 pub file_id: u32,
545 pub span: Span,
547 pub kind: RefKind,
549}
550
551#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
553pub enum RefKind {
554 Call,
556 Read,
558 Write,
560 Import,
562 TypeRef,
564 Inheritance,
566}
567
568#[cfg(test)]
569mod tests {
570 use super::*;
571
572 #[test]
573 fn test_span_contains_line() {
574 let span = Span::new(10, 0, 20, 0);
575 assert!(span.contains_line(10));
576 assert!(span.contains_line(15));
577 assert!(span.contains_line(20));
578 assert!(!span.contains_line(9));
579 assert!(!span.contains_line(21));
580 }
581
582 #[test]
583 fn test_language_from_extension() {
584 assert_eq!(Language::from_extension("rs"), Language::Rust);
585 assert_eq!(Language::from_extension("py"), Language::Python);
586 assert_eq!(Language::from_extension("ts"), Language::TypeScript);
587 assert_eq!(Language::from_extension("xyz"), Language::Unknown);
588 }
589
590 #[test]
591 fn test_symbol_index_lookups() {
592 let mut index = SymbolIndex::new();
593 index.files.push(FileEntry {
594 id: FileId::new(0),
595 path: "src/main.rs".to_owned(),
596 language: Language::Rust,
597 content_hash: [0; 32],
598 symbols: 0..2,
599 imports: vec![],
600 lines: 100,
601 tokens: 500,
602 });
603 index.symbols.push(IndexSymbol {
604 id: SymbolId::new(0),
605 name: "main".to_owned(),
606 kind: IndexSymbolKind::Function,
607 file_id: FileId::new(0),
608 span: Span::new(1, 0, 10, 0),
609 signature: Some("fn main()".to_owned()),
610 parent: None,
611 visibility: Visibility::Public,
612 docstring: None,
613 });
614 index.symbols.push(IndexSymbol {
615 id: SymbolId::new(1),
616 name: "helper".to_owned(),
617 kind: IndexSymbolKind::Function,
618 file_id: FileId::new(0),
619 span: Span::new(15, 0, 25, 0),
620 signature: Some("fn helper()".to_owned()),
621 parent: None,
622 visibility: Visibility::Private,
623 docstring: None,
624 });
625
626 index.rebuild_lookups();
627
628 assert!(index.get_file("src/main.rs").is_some());
629 assert!(index.get_file("nonexistent.rs").is_none());
630
631 let main_symbols = index.find_symbols("main");
632 assert_eq!(main_symbols.len(), 1);
633 assert_eq!(main_symbols[0].name, "main");
634
635 let symbol = index.find_symbol_at_line(FileId::new(0), 5);
636 assert!(symbol.is_some());
637 assert_eq!(symbol.unwrap().name, "main");
638
639 let symbol = index.find_symbol_at_line(FileId::new(0), 20);
640 assert!(symbol.is_some());
641 assert_eq!(symbol.unwrap().name, "helper");
642 }
643
644 #[test]
645 fn test_dep_graph() {
646 let mut graph = DepGraph::new();
647 graph.add_file_import(0, 1);
648 graph.add_file_import(0, 2);
649 graph.add_file_import(1, 2);
650
651 assert_eq!(graph.get_imports(0), vec![1, 2]);
652 assert_eq!(graph.get_importers(2), vec![0, 1]);
653
654 graph.add_call(10, 20);
655 graph.add_call(10, 21);
656
657 assert_eq!(graph.get_callees(10), vec![20, 21]);
658 assert_eq!(graph.get_callers(20), vec![10]);
659 }
660}