use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
use dashmap::DashMap;
use crate::lang::detect_file_type;
use crate::lang::outline::outline_language;
use crate::lang::treesitter::{extract_definition_name, DEFINITION_KINDS};
use crate::types::FileType;
const MAX_FILE_SIZE: u64 = 500_000;
type FileSymbols = (PathBuf, SystemTime, Vec<(Arc<str>, u32, bool)>);
#[derive(Clone, Debug)]
pub struct SymbolLocation {
pub path: PathBuf,
pub line: u32,
pub is_definition: bool,
pub mtime: SystemTime,
}
pub struct SymbolIndex {
symbols: DashMap<Arc<str>, Vec<SymbolLocation>>,
indexed_files: DashMap<PathBuf, SystemTime>,
}
impl Default for SymbolIndex {
fn default() -> Self {
Self::new()
}
}
impl SymbolIndex {
#[must_use]
pub fn new() -> Self {
Self {
symbols: DashMap::new(),
indexed_files: DashMap::new(),
}
}
pub fn build(&self, scope: &Path) {
use ignore::WalkBuilder;
use rayon::prelude::*;
let files: Vec<PathBuf> = WalkBuilder::new(scope)
.follow_links(true)
.hidden(false)
.git_ignore(false)
.git_global(false)
.git_exclude(false)
.ignore(false)
.parents(false)
.filter_entry(|entry| {
if entry.file_type().is_some_and(|ft| ft.is_dir()) {
if let Some(name) = entry.file_name().to_str() {
return !crate::search::io::SKIP_DIRS.contains(&name);
}
}
true
})
.build()
.filter_map(|entry| {
let entry = entry.ok()?;
if !entry.file_type()?.is_file() {
return None;
}
let path = entry.into_path();
if let FileType::Code(lang) = detect_file_type(&path) {
if outline_language(lang).is_some() {
if let Ok(meta) = fs::metadata(&path) {
if meta.len() <= MAX_FILE_SIZE {
return Some(path);
}
}
}
}
None
})
.collect();
let results: Vec<FileSymbols> = files
.par_iter()
.filter_map(|path| {
let content = fs::read_to_string(path).ok()?;
let mtime = fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::UNIX_EPOCH);
let symbols = extract_symbols(path, &content);
if symbols.is_empty() {
Some((path.clone(), mtime, Vec::new()))
} else {
Some((path.clone(), mtime, symbols))
}
})
.collect();
for (path, mtime, symbols) in results {
self.indexed_files.insert(path.clone(), mtime);
for (name, line, is_def) in symbols {
let loc = SymbolLocation {
path: path.clone(),
line,
is_definition: is_def,
mtime,
};
self.symbols.entry(name).or_default().push(loc);
}
}
}
#[must_use]
pub fn is_built(&self, scope: &Path) -> bool {
self.indexed_files
.iter()
.any(|entry| entry.key().starts_with(scope))
}
#[must_use]
pub fn lookup(&self, name: &str, scope: &Path) -> Vec<SymbolLocation> {
let key: Arc<str> = Arc::from(name);
let Some(locations) = self.symbols.get(&key) else {
return Vec::new();
};
locations
.iter()
.filter(|loc| loc.path.starts_with(scope))
.cloned()
.collect()
}
#[must_use]
pub fn lookup_definitions(&self, name: &str, scope: &Path) -> Vec<SymbolLocation> {
let key: Arc<str> = Arc::from(name);
let Some(locations) = self.symbols.get(&key) else {
return Vec::new();
};
locations
.iter()
.filter(|loc| loc.is_definition && loc.path.starts_with(scope))
.cloned()
.collect()
}
pub fn index_file(&self, path: &Path, content: &str) {
let mtime = fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::UNIX_EPOCH);
let old_mtime = self.indexed_files.get(path).map(|r| *r.value());
if old_mtime.is_some() {
self.symbols.iter_mut().for_each(|mut entry| {
entry.value_mut().retain(|loc| loc.path != path);
});
}
let symbols = extract_symbols(path, content);
self.indexed_files.insert(path.to_path_buf(), mtime);
for (name, line, is_def) in symbols {
let loc = SymbolLocation {
path: path.to_path_buf(),
line,
is_definition: is_def,
mtime,
};
self.symbols.entry(name).or_default().push(loc);
}
}
#[must_use]
pub fn symbol_count(&self) -> usize {
self.symbols.len()
}
#[must_use]
pub fn file_count(&self) -> usize {
self.indexed_files.len()
}
}
fn extract_symbols(path: &Path, content: &str) -> Vec<(Arc<str>, u32, bool)> {
let FileType::Code(lang) = detect_file_type(path) else {
return Vec::new();
};
let Some(ts_lang) = outline_language(lang) else {
return Vec::new();
};
let mut parser = tree_sitter::Parser::new();
if parser.set_language(&ts_lang).is_err() {
return Vec::new();
}
let Some(tree) = parser.parse(content, None) else {
return Vec::new();
};
let lines: Vec<&str> = content.lines().collect();
let mut symbols = Vec::new();
walk_definitions(tree.root_node(), &lines, &mut symbols, lang, 0);
symbols
}
fn walk_definitions(
node: tree_sitter::Node,
lines: &[&str],
symbols: &mut Vec<(Arc<str>, u32, bool)>,
lang: crate::types::Lang,
depth: usize,
) {
if depth > 3 {
return;
}
let kind = node.kind();
if DEFINITION_KINDS.contains(&kind) {
if let Some(name) = extract_definition_name(node, lines) {
let line = node.start_position().row as u32 + 1;
symbols.push((Arc::from(name.as_str()), line, true));
}
if kind == "impl_item" {
if let Some(trait_name) = crate::lang::treesitter::extract_impl_trait(node, lines) {
let line = node.start_position().row as u32 + 1;
symbols.push((Arc::from(trait_name.as_str()), line, true));
}
if let Some(type_name) = crate::lang::treesitter::extract_impl_type(node, lines) {
let line = node.start_position().row as u32 + 1;
symbols.push((Arc::from(type_name.as_str()), line, true));
}
}
if kind == "class_declaration" || kind == "class_definition" {
let interfaces = crate::lang::treesitter::extract_implemented_interfaces(node, lines);
for iface in interfaces {
let line = node.start_position().row as u32 + 1;
symbols.push((Arc::from(iface.as_str()), line, true));
}
}
} else if lang == crate::types::Lang::Elixir
&& crate::lang::treesitter::is_elixir_definition(node, lines)
{
if let Some(name) = crate::lang::treesitter::extract_elixir_definition_name(node, lines) {
let line = node.start_position().row as u32 + 1;
symbols.push((Arc::from(name.as_str()), line, true));
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk_definitions(child, lines, symbols, lang, depth + 1);
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_empty_index() {
let index = SymbolIndex::new();
assert_eq!(index.symbol_count(), 0);
assert_eq!(index.file_count(), 0);
assert!(!index.is_built(Path::new("/tmp")));
assert!(index.lookup("foo", Path::new("/tmp")).is_empty());
}
#[test]
fn test_extract_symbols_rust() {
let content = r#"
pub struct Foo {
bar: u32,
}
impl Foo {
pub fn baz(&self) -> u32 {
self.bar
}
}
trait MyTrait {
fn do_thing(&self);
}
impl MyTrait for Foo {
fn do_thing(&self) {}
}
"#;
let dir = std::env::temp_dir().join("srcwalk_test_extract_symbols");
let _ = fs::create_dir_all(&dir);
let path = dir.join("test.rs");
let mut f = fs::File::create(&path).unwrap();
f.write_all(content.as_bytes()).unwrap();
let symbols = extract_symbols(&path, content);
let names: Vec<&str> = symbols.iter().map(|(n, _, _)| n.as_ref()).collect();
assert!(names.contains(&"Foo"), "should find struct Foo: {names:?}");
assert!(names.contains(&"baz"), "should find fn baz: {names:?}");
assert!(
names.contains(&"MyTrait"),
"should find trait MyTrait: {names:?}"
);
assert!(
names.contains(&"do_thing"),
"should find fn do_thing: {names:?}"
);
assert!(symbols.iter().all(|(_, _, is_def)| *is_def));
let _ = fs::remove_file(&path);
}
#[test]
fn test_index_file() {
let content = "pub fn hello() {}\npub fn world() {}";
let dir = std::env::temp_dir().join("srcwalk_test_index_file");
let _ = fs::create_dir_all(&dir);
let path = dir.join("test.rs");
fs::write(&path, content).unwrap();
let index = SymbolIndex::new();
index.index_file(&path, content);
assert_eq!(index.file_count(), 1);
let results = index.lookup("hello", &dir);
assert_eq!(results.len(), 1);
assert!(results[0].is_definition);
assert_eq!(results[0].line, 1);
let results = index.lookup("world", &dir);
assert_eq!(results.len(), 1);
assert_eq!(results[0].line, 2);
let new_content = "pub fn hello() {}\npub fn updated() {}";
fs::write(&path, new_content).unwrap();
index.index_file(&path, new_content);
assert!(index.lookup("world", &dir).is_empty());
assert_eq!(index.lookup("updated", &dir).len(), 1);
let _ = fs::remove_file(&path);
}
#[test]
fn test_lookup_definitions_filter() {
let content = "pub fn target() {}";
let dir = std::env::temp_dir().join("srcwalk_test_lookup_defs");
let _ = fs::create_dir_all(&dir);
let path = dir.join("test.rs");
fs::write(&path, content).unwrap();
let index = SymbolIndex::new();
index.index_file(&path, content);
let defs = index.lookup_definitions("target", &dir);
assert_eq!(defs.len(), 1);
assert!(defs[0].is_definition);
let defs = index.lookup_definitions("target", Path::new("/nonexistent"));
assert!(defs.is_empty());
let _ = fs::remove_file(&path);
}
#[test]
fn test_extract_symbols_typescript() {
let content = r#"
function greet(name: string): string {
return `Hello, ${name}!`;
}
class Greeter {
greeting: string;
constructor(message: string) {
this.greeting = message;
}
}
interface Printable {
print(): void;
}
"#;
let dir = std::env::temp_dir().join("srcwalk_test_extract_ts");
let _ = fs::create_dir_all(&dir);
let path = dir.join("test.ts");
fs::write(&path, content).unwrap();
let symbols = extract_symbols(&path, content);
let names: Vec<&str> = symbols.iter().map(|(n, _, _)| n.as_ref()).collect();
assert!(
names.contains(&"greet"),
"should find function greet: {names:?}"
);
assert!(
names.contains(&"Greeter"),
"should find class Greeter: {names:?}"
);
assert!(
names.contains(&"Printable"),
"should find interface Printable: {names:?}"
);
let _ = fs::remove_file(&path);
}
#[test]
fn test_extract_symbols_python() {
let content = r#"
def hello():
pass
class MyClass:
def method(self):
pass
"#;
let dir = std::env::temp_dir().join("srcwalk_test_extract_py");
let _ = fs::create_dir_all(&dir);
let path = dir.join("test.py");
fs::write(&path, content).unwrap();
let symbols = extract_symbols(&path, content);
let names: Vec<&str> = symbols.iter().map(|(n, _, _)| n.as_ref()).collect();
assert!(names.contains(&"hello"), "should find def hello: {names:?}");
assert!(
names.contains(&"MyClass"),
"should find class MyClass: {names:?}"
);
let _ = fs::remove_file(&path);
}
}