use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::types::*;
use crate::{parse_file, extract_source};
#[derive(Debug, Clone)]
pub struct IndexedSymbol {
pub file_path: String,
pub symbol: Symbol,
pub source: String,
}
#[derive(Debug, Clone)]
pub struct CrossReference {
pub from_file: String,
pub from_symbol: String,
pub to_file: String,
pub to_symbol: String,
}
#[derive(Debug)]
pub struct ProjectIndex {
pub files: HashMap<String, ParsedFile>,
pub symbols: HashMap<String, IndexedSymbol>,
pub references: Vec<CrossReference>,
pub root: PathBuf,
}
const EXTENSIONS: &[&str] = &["rs", "py", "ts", "tsx", "js", "jsx", "mjs", "cjs", "go"];
const SKIP_DIRS: &[&str] = &[
"target", "node_modules", ".git", "__pycache__", "dist", "build",
".next", "vendor", ".cargo",
];
impl ProjectIndex {
pub fn build(root: &Path) -> Self {
let mut index = Self {
files: HashMap::new(),
symbols: HashMap::new(),
references: Vec::new(),
root: root.to_path_buf(),
};
let files = collect_source_files(root);
for file_path in &files {
let rel_path = file_path.strip_prefix(root)
.unwrap_or(file_path)
.to_string_lossy()
.to_string();
let content = match std::fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
if let Some(parsed) = parse_file(&content, &rel_path) {
for sym in &parsed.symbols {
let key = format!("{}::{}", rel_path, sym.name);
index.symbols.insert(key, IndexedSymbol {
file_path: rel_path.clone(),
symbol: sym.clone(),
source: extract_source(sym, &content),
});
for child in &sym.children {
let child_key = format!("{}::{}::{}", rel_path, sym.name, child.name);
index.symbols.insert(child_key, IndexedSymbol {
file_path: rel_path.clone(),
symbol: child.clone(),
source: extract_source(child, &content),
});
}
}
index.files.insert(rel_path, parsed);
}
}
index.build_cross_references(&files, root);
index
}
pub fn find(&self, name: &str) -> Vec<&IndexedSymbol> {
self.symbols.values()
.filter(|s| s.symbol.name == name)
.collect()
}
pub fn find_fuzzy(&self, query: &str) -> Vec<&IndexedSymbol> {
let query_lower = query.to_lowercase();
self.symbols.values()
.filter(|s| s.symbol.name.to_lowercase().contains(&query_lower))
.collect()
}
pub fn callers_of(&self, symbol_name: &str) -> Vec<&CrossReference> {
self.references.iter()
.filter(|r| r.to_symbol == symbol_name)
.collect()
}
pub fn callees_of(&self, symbol_name: &str) -> Vec<&CrossReference> {
self.references.iter()
.filter(|r| r.from_symbol == symbol_name)
.collect()
}
fn build_cross_references(&mut self, files: &[PathBuf], root: &Path) {
let symbol_names: Vec<(String, String)> = self.symbols.values()
.filter(|s| !matches!(s.symbol.kind, SymbolKind::Import | SymbolKind::Const))
.map(|s| (s.symbol.name.clone(), s.file_path.clone()))
.collect();
for file_path in files {
let rel_path = file_path.strip_prefix(root)
.unwrap_or(file_path)
.to_string_lossy()
.to_string();
let content = match std::fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
let file_symbols: Vec<&IndexedSymbol> = self.symbols.values()
.filter(|s| s.file_path == rel_path)
.collect();
for (target_name, target_file) in &symbol_names {
if *target_file == rel_path { continue; }
if target_name.len() < 3 { continue; }
if !contains_identifier(&content, target_name) {
continue;
}
let referencing_symbol = file_symbols.iter()
.find(|s| {
let src = &s.source;
contains_identifier(src, target_name)
})
.map(|s| s.symbol.name.clone())
.unwrap_or_else(|| rel_path.clone());
self.references.push(CrossReference {
from_file: rel_path.clone(),
from_symbol: referencing_symbol,
to_file: target_file.clone(),
to_symbol: target_name.clone(),
});
}
}
}
pub fn stats(&self) -> IndexStats {
IndexStats {
files: self.files.len(),
symbols: self.symbols.len(),
references: self.references.len(),
}
}
}
#[derive(Debug, Clone)]
pub struct IndexStats {
pub files: usize,
pub symbols: usize,
pub references: usize,
}
impl std::fmt::Display for IndexStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} files, {} symbols, {} cross-references",
self.files, self.symbols, self.references)
}
}
fn collect_source_files(dir: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
collect_recursive(dir, &mut files);
files
}
fn collect_recursive(dir: &Path, files: &mut Vec<PathBuf>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
let name = path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
if SKIP_DIRS.contains(&name) {
continue;
}
collect_recursive(&path, files);
} else if path.is_file() {
let ext = path.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
if EXTENSIONS.contains(&ext) {
files.push(path);
}
}
}
}
fn contains_identifier(source: &str, name: &str) -> bool {
let bytes = source.as_bytes();
let name_bytes = name.as_bytes();
let mut pos = 0;
while pos + name_bytes.len() <= bytes.len() {
if let Some(found) = source[pos..].find(name) {
let abs = pos + found;
let before = if abs > 0 { bytes[abs - 1] } else { b' ' };
let after_pos = abs + name_bytes.len();
let after = if after_pos < bytes.len() { bytes[after_pos] } else { b' ' };
if !is_ident_char(before) && !is_ident_char(after) {
return true;
}
pos = abs + 1;
} else {
break;
}
}
false
}
fn is_ident_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn test_project_index() {
let dir = tempfile::tempdir().unwrap();
fs::write(dir.path().join("lib.rs"), r#"
pub struct Parser {
lang: String,
}
impl Parser {
pub fn new(lang: String) -> Self {
Self { lang }
}
pub fn parse(&self, source: &str) -> Vec<String> {
vec![]
}
}
pub fn create_parser() -> Parser {
Parser::new("rust".into())
}
"#).unwrap();
fs::write(dir.path().join("main.rs"), r#"
mod lib;
fn main() {
let p = create_parser();
let result = p.parse("fn foo() {}");
println!("{:?}", result);
}
"#).unwrap();
let index = ProjectIndex::build(dir.path());
let stats = index.stats();
assert_eq!(stats.files, 2);
assert!(stats.symbols >= 4);
let parsers = index.find("Parser");
assert!(!parsers.is_empty());
let refs = index.callers_of("create_parser");
assert!(!refs.is_empty(), "main.rs should reference create_parser");
assert!(refs.iter().any(|r| r.from_file == "main.rs"));
let fuzzy = index.find_fuzzy("pars");
assert!(fuzzy.len() >= 2); }
#[test]
fn test_contains_identifier() {
assert!(contains_identifier("let x = foo();", "foo"));
assert!(!contains_identifier("let x = foobar();", "foo"));
assert!(contains_identifier("use crate::Parser;", "Parser"));
assert!(!contains_identifier("use crate::ParserBuilder;", "Parser"));
}
}