use super::{Symbol, SymbolKind, IndexStats};
use dashmap::DashMap;
use rayon::prelude::*;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
use walkdir::WalkDir;
#[derive(Clone)]
pub struct SymbolIndex {
symbols: Arc<DashMap<String, Symbol>>,
name_index: Arc<DashMap<String, Vec<String>>>,
file_index: Arc<DashMap<String, Vec<String>>>,
kind_index: Arc<DashMap<String, Vec<String>>>,
relationships: Arc<DashMap<String, Vec<String>>>,
}
pub struct SymbolIndexStore {
index: SymbolIndex,
store_path: PathBuf,
file_metadata: Arc<DashMap<String, FileMetadata>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FileMetadata {
path: String,
size: u64,
modified: SystemTime,
hash: String,
}
impl SymbolIndex {
pub fn new() -> Self {
Self {
symbols: Arc::new(DashMap::new()),
name_index: Arc::new(DashMap::new()),
file_index: Arc::new(DashMap::new()),
kind_index: Arc::new(DashMap::new()),
relationships: Arc::new(DashMap::new()),
}
}
pub fn add_symbol(&self, symbol: Symbol) {
let symbol_id = symbol.id.clone();
self.symbols.insert(symbol_id.clone(), symbol.clone());
self.name_index
.entry(symbol.name.clone())
.or_insert_with(Vec::new)
.push(symbol_id.clone());
self.file_index
.entry(symbol.file_path.clone())
.or_insert_with(Vec::new)
.push(symbol_id.clone());
let kind_str = format!("{:?}", symbol.kind);
self.kind_index
.entry(kind_str)
.or_insert_with(Vec::new)
.push(symbol_id.clone());
}
pub fn get_symbol(&self, id: &str) -> Option<Symbol> {
self.symbols.get(id).map(|s| s.value().clone())
}
pub fn find_by_name(&self, name: &str) -> Vec<Symbol> {
self.name_index
.get(name)
.map(|ids| {
ids.iter()
.filter_map(|id| self.get_symbol(id))
.collect()
})
.unwrap_or_default()
}
pub fn find_by_name_pattern(&self, pattern: &str) -> Result<Vec<Symbol>, Box<dyn std::error::Error>> {
let regex = Regex::new(pattern)?;
let mut symbols = Vec::new();
for entry in self.name_index.iter() {
if regex.is_match(entry.key()) {
for id in entry.value() {
if let Some(symbol) = self.get_symbol(id) {
symbols.push(symbol);
}
}
}
}
Ok(symbols)
}
pub fn get_symbols_in_file(&self, file_path: &str) -> Vec<Symbol> {
self.file_index
.get(file_path)
.map(|ids| {
ids.iter()
.filter_map(|id| self.get_symbol(id))
.collect()
})
.unwrap_or_default()
}
pub fn get_symbols_by_kind(&self, kind: &SymbolKind) -> Vec<Symbol> {
let kind_str = format!("{:?}", kind);
self.kind_index
.get(&kind_str)
.map(|ids| {
ids.iter()
.filter_map(|id| self.get_symbol(id))
.collect()
})
.unwrap_or_default()
}
pub fn search_symbols(
&self,
name_pattern: Option<&str>,
kind: Option<&SymbolKind>,
file_path: Option<&str>,
visibility: Option<&super::SymbolVisibility>,
) -> Vec<Symbol> {
let mut results = Vec::new();
let candidates: Vec<Symbol> = if let Some(pattern) = name_pattern {
if let Ok(regex) = Regex::new(pattern) {
let mut symbols = Vec::new();
for entry in self.name_index.iter() {
if regex.is_match(entry.key()) {
for id in entry.value() {
if let Some(symbol) = self.get_symbol(id) {
symbols.push(symbol);
}
}
}
}
symbols
} else {
Vec::new()
}
} else if let Some(k) = kind {
self.get_symbols_by_kind(k)
} else if let Some(fp) = file_path {
self.get_symbols_in_file(fp)
} else {
self.symbols.iter().map(|e| e.value().clone()).collect()
};
for symbol in candidates {
if let Some(k) = kind {
if symbol.kind != *k {
continue;
}
}
if let Some(fp) = file_path {
if symbol.file_path != fp {
continue;
}
}
if let Some(v) = visibility {
if &symbol.visibility != v {
continue;
}
}
results.push(symbol);
}
results
}
pub fn get_stats(&self) -> IndexStats {
let mut symbols_by_kind = HashMap::new();
let mut symbols_by_language = HashMap::new();
for symbol in self.symbols.iter() {
let kind_str = format!("{:?}", symbol.kind);
*symbols_by_kind.entry(kind_str).or_insert(0) += 1;
if let Some(ext) = Path::new(&symbol.file_path).extension() {
let lang = match ext.to_str().unwrap_or("") {
"rs" => "Rust",
"py" | "pyw" | "pyi" => "Python",
"js" | "jsx" | "mjs" | "cjs" => "JavaScript",
"ts" | "tsx" => "TypeScript",
"go" => "Go",
"java" => "Java",
_ => "Other",
};
*symbols_by_language.entry(lang.to_string()).or_insert(0) += 1;
}
}
let total_files = self.file_index.len();
IndexStats {
total_symbols: self.symbols.len(),
total_files,
symbols_by_kind,
symbols_by_language,
index_size_bytes: 0, last_updated: chrono::Utc::now(),
}
}
pub fn clear(&self) {
self.symbols.clear();
self.name_index.clear();
self.file_index.clear();
self.kind_index.clear();
self.relationships.clear();
}
}
impl Default for SymbolIndex {
fn default() -> Self {
Self::new()
}
}
impl SymbolIndexStore {
pub fn new(store_path: PathBuf) -> Self {
let index = SymbolIndex::new();
let file_metadata = Arc::new(DashMap::new());
let metadata = if store_path.exists() {
Self::load_metadata(&store_path).unwrap_or_default()
} else {
HashMap::new()
};
for (path, meta) in metadata {
file_metadata.insert(path, meta);
}
Self {
index,
store_path,
file_metadata,
}
}
pub fn index_directory(
&self,
path: &Path,
extensions: Option<&[String]>,
exclude: Option<&[String]>,
) -> Result<usize, Box<dyn std::error::Error>> {
let mut indexed_count = 0;
let walker = WalkDir::new(path)
.into_iter()
.filter_entry(|e| {
if let Some(name) = e.file_name().to_str() {
if let Some(exclude_dirs) = exclude {
for exclude_dir in exclude_dirs {
if name == exclude_dir {
return false;
}
}
}
}
true
})
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file());
let files: Vec<PathBuf> = walker
.filter(|entry| {
let file_path = entry.path();
if let Some(exts) = extensions {
if let Some(ext) = file_path.extension().and_then(|s| s.to_str()) {
exts.iter().any(|e| e == ext)
} else {
false
}
} else {
true
}
})
.map(|e| e.path().to_path_buf())
.collect();
use rayon::prelude::*;
let results: Vec<Vec<Symbol>> = files
.par_iter()
.filter_map(|file_path| {
if self.should_reindex(file_path) {
if let Ok(symbols) = self.index_file(file_path) {
Some(symbols)
} else {
None
}
} else {
None
}
})
.collect();
for symbols in results {
for symbol in symbols {
self.index.add_symbol(symbol);
}
}
indexed_count = self.index.symbols.len();
self.save()?;
Ok(indexed_count)
}
fn index_file(&self, file_path: &Path) -> Result<Vec<Symbol>, Box<dyn std::error::Error>> {
use super::extractor::SymbolExtractor;
let extractor = SymbolExtractor::new();
let symbols = extractor.extract_from_file(file_path)?;
if let Ok(metadata) = fs::metadata(file_path) {
let modified = metadata.modified()?;
let size = metadata.len();
let content = fs::read_to_string(file_path)?;
let hash = format!("{:x}", md5::compute(content.as_bytes()));
let file_meta = FileMetadata {
path: file_path.to_string_lossy().to_string(),
size,
modified,
hash,
};
self.file_metadata.insert(
file_path.to_string_lossy().to_string(),
file_meta,
);
}
Ok(symbols)
}
fn should_reindex(&self, file_path: &Path) -> bool {
if let Ok(metadata) = fs::metadata(file_path) {
if let Ok(modified) = metadata.modified() {
if let Some(existing_meta) = self.file_metadata.get(
&file_path.to_string_lossy().to_string()
) {
return existing_meta.modified != modified || existing_meta.size != metadata.len();
}
}
}
true
}
pub fn index(&self) -> &SymbolIndex {
&self.index
}
pub fn save(&self) -> Result<(), Box<dyn std::error::Error>> {
self.save_metadata()?;
Ok(())
}
fn save_metadata(&self) -> Result<(), Box<dyn std::error::Error>> {
let metadata_map: HashMap<String, FileMetadata> = self
.file_metadata
.iter()
.map(|entry| (entry.key().clone(), entry.value().clone()))
.collect();
let json = serde_json::to_string_pretty(&metadata_map)?;
if let Some(parent) = self.store_path.parent() {
fs::create_dir_all(parent)?;
}
let metadata_path = self.store_path.with_extension("meta.json");
fs::write(&metadata_path, json)?;
Ok(())
}
fn load_metadata(path: &Path) -> Result<HashMap<String, FileMetadata>, Box<dyn std::error::Error>> {
let metadata_path = path.with_extension("meta.json");
if metadata_path.exists() {
let content = fs::read_to_string(&metadata_path)?;
let metadata: HashMap<String, FileMetadata> = serde_json::from_str(&content)?;
Ok(metadata)
} else {
Ok(HashMap::new())
}
}
pub fn clear(&self) -> Result<(), Box<dyn std::error::Error>> {
self.index.clear();
self.file_metadata.clear();
if self.store_path.exists() {
fs::remove_file(&self.store_path)?;
}
let metadata_path = self.store_path.with_extension("meta.json");
if metadata_path.exists() {
fs::remove_file(&metadata_path)?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn test_symbol_index() {
let index = SymbolIndex::new();
let symbol = Symbol {
id: "test_id".to_string(),
name: "test_function".to_string(),
kind: SymbolKind::Function,
file_path: "test.rs".to_string(),
line: 10,
column: 0,
end_line: 15,
signature: "()".to_string(),
documentation: None,
visibility: super::super::SymbolVisibility::Public,
parent: None,
type_info: None,
generics: vec![],
annotations: vec![],
attributes: vec![],
metadata: HashMap::new(),
};
index.add_symbol(symbol.clone());
let retrieved = index.get_symbol("test_id");
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap().name, "test_function");
let by_name = index.find_by_name("test_function");
assert_eq!(by_name.len(), 1);
assert_eq!(by_name[0].name, "test_function");
let by_file = index.get_symbols_in_file("test.rs");
assert_eq!(by_file.len(), 1);
let by_kind = index.get_symbols_by_kind(&SymbolKind::Function);
assert_eq!(by_kind.len(), 1);
}
#[test]
fn test_index_store() {
let dir = tempdir().unwrap();
let store_path = dir.path().join("test_index");
let store = SymbolIndexStore::new(store_path);
let test_file = dir.path().join("test.rs");
fs::write(
&test_file,
r#"
fn test_function() -> i32 {
42
}
"#,
).unwrap();
let count = store
.index_directory(dir.path(), Some(&["rs".to_string()]), None)
.unwrap();
assert!(count > 0);
let symbols = store.index().find_by_name("test_function");
assert!(!symbols.is_empty());
let stats = store.index().get_stats();
assert_eq!(stats.total_symbols, count);
}
}