use std::path::Path;
use std::time::SystemTime;
use anyhow::{Context, Result};
use sha2::{Digest, Sha256};
use tracing::warn;
use walkdir::WalkDir;
use crate::db::Database;
use crate::languages::{detect_language, get_extractor, Extractor};
use crate::types::{FileInfo, Symbol};
#[derive(Debug, Default, serde::Serialize)]
pub struct IndexResult {
pub files_indexed: u32,
pub files_skipped: u32,
pub files_removed: u32,
pub symbols_added: u32,
#[serde(skip_serializing_if = "is_zero")]
pub symbols_modified: u32,
#[serde(skip_serializing_if = "is_zero")]
pub symbols_unchanged: u32,
#[serde(skip_serializing_if = "is_zero")]
pub symbols_removed: u32,
pub edges_added: u32,
pub edges_resolved: u32,
#[serde(skip_serializing_if = "is_zero")]
pub edges_lsp_resolved: u32,
}
fn is_zero(v: &u32) -> bool {
*v == 0
}
pub fn index_directory(db: &Database, root: &Path, force: bool, lsp: bool) -> Result<IndexResult> {
let mut result = IndexResult::default();
let root = root.canonicalize().context("Failed to resolve root path")?;
let mut extractors: std::collections::HashMap<&'static str, Box<dyn Extractor>> =
std::collections::HashMap::new();
let mut current_files = std::collections::HashSet::new();
let mut dirty_files: std::collections::HashSet<String> = std::collections::HashSet::new();
let last_commit = if force {
None
} else {
db.get_metadata("last_commit")?
};
let changed_files = if force {
None
} else {
git_changed_files(&root, last_commit.as_deref())
};
for entry in WalkDir::new(&root)
.follow_links(true)
.into_iter()
.filter_entry(|e| !is_ignored(e))
{
let entry = match entry {
Ok(e) => e,
Err(e) => {
warn!(error = %e, "directory walk error");
continue;
}
};
if !entry.file_type().is_file() {
continue;
}
let path = entry.path();
let rel_path = match path.strip_prefix(&root) {
Ok(p) => p.to_string_lossy().to_string(),
Err(_) => continue,
};
let lang = match detect_language(Path::new(&rel_path)) {
Some(l) => l,
None => continue,
};
current_files.insert(rel_path.clone());
if !force {
if let Some(ref changed) = changed_files {
if !changed.contains(&rel_path) && db.get_file(&rel_path)?.is_some() {
result.files_skipped += 1;
continue;
}
}
}
let source = match std::fs::read_to_string(path) {
Ok(s) => s,
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => continue, Err(e) => {
warn!(file = %rel_path, error = %e, "cannot read file");
continue;
}
};
let hash = file_hash(&source);
if !force {
if let Ok(Some(existing)) = db.get_file(&rel_path) {
if existing.hash == hash {
result.files_skipped += 1;
continue;
}
}
}
let modified = file_modified(path);
let extractor = extractors
.entry(lang)
.or_insert_with(|| get_extractor(lang).expect("lang was validated by detect_language"))
.as_mut();
let mut extraction = match extractor.extract(&source, &rel_path) {
Ok(e) => e,
Err(err) => {
warn!(file = %rel_path, error = %err, "extraction failed");
continue;
}
};
dedup_symbol_ids(&mut extraction.symbols, &mut extraction.edges);
compute_merkle_hashes(&mut extraction.symbols, &source);
let old_hashes = db.get_symbol_hashes_for_file(&rel_path)?;
let has_old_hashes =
!old_hashes.is_empty() && old_hashes.iter().any(|(_, ch, _)| ch.is_some());
if has_old_hashes {
let diff = merkle_diff(&extraction.symbols, &old_hashes);
dirty_files.insert(rel_path.clone());
for id in &diff.removed {
db.delete_symbol(id)?;
result.symbols_removed += 1;
}
for &idx in &diff.added {
db.insert_symbol(&extraction.symbols[idx])?;
result.symbols_added += 1;
}
for &idx in &diff.modified {
let sym = &extraction.symbols[idx];
db.insert_symbol(sym)?; result.symbols_modified += 1;
}
for &idx in &diff.children_changed {
let sym = &extraction.symbols[idx];
db.insert_symbol(sym)?; }
result.symbols_unchanged += diff.unchanged as u32;
db.clear_edges_for_file(&rel_path)?;
db.insert_edges(&extraction.edges)?;
result.edges_added += extraction.edges.len() as u32;
let dirty_indices: Vec<usize> = diff
.added
.iter()
.chain(diff.modified.iter())
.copied()
.collect();
let contents: Vec<(String, String, String, String)> = dirty_indices
.iter()
.map(|&i| &extraction.symbols[i])
.filter(|sym| sym.kind != crate::types::SymbolKind::Import)
.filter_map(|sym| {
extract_symbol_content(&source, sym).map(|(content, header)| {
(sym.id.clone(), sym.name.clone(), content, header)
})
})
.collect();
if !contents.is_empty() {
db.insert_symbol_contents(&contents)?;
}
} else {
dirty_files.insert(rel_path.clone());
db.clear_file_data(&rel_path)?;
db.insert_symbols(&extraction.symbols)?;
db.insert_edges(&extraction.edges)?;
result.symbols_added += extraction.symbols.len() as u32;
result.edges_added += extraction.edges.len() as u32;
let contents: Vec<(String, String, String, String)> = extraction
.symbols
.iter()
.filter(|sym| sym.kind != crate::types::SymbolKind::Import)
.filter_map(|sym| {
extract_symbol_content(&source, sym).map(|(content, header)| {
(sym.id.clone(), sym.name.clone(), content, header)
})
})
.collect();
if !contents.is_empty() {
db.insert_symbol_contents(&contents)?;
}
}
let num_symbols = extraction.symbols.len() as u32;
db.upsert_file(&FileInfo {
path: rel_path,
last_modified: modified,
hash,
language: lang.to_string(),
num_symbols,
})?;
result.files_indexed += 1;
}
let all_indexed = db.all_files()?;
for indexed_path in all_indexed {
if !current_files.contains(&indexed_path) {
db.remove_file(&indexed_path)?;
result.files_removed += 1;
}
}
if force || dirty_files.len() == current_files.len() {
result.edges_resolved = db.resolve_edges()?;
db.compute_in_degrees()?;
} else if !dirty_files.is_empty() {
db.invalidate_edges_targeting(&dirty_files)?;
result.edges_resolved = db.resolve_edges_scoped(&dirty_files)?;
db.compute_in_degrees_scoped(&dirty_files)?;
}
#[cfg(feature = "lsp")]
if lsp {
result.edges_lsp_resolved = crate::lsp::lsp_resolve_edges(db, &root, None)?;
}
#[cfg(not(feature = "lsp"))]
let _ = lsp;
if let Some(commit) = git_head_commit(&root) {
db.set_metadata("last_commit", &commit)?;
}
Ok(result)
}
fn is_ignored(entry: &walkdir::DirEntry) -> bool {
let name = entry.file_name().to_string_lossy();
if entry.file_type().is_dir() {
return is_ignored_dirname(&name);
}
false
}
pub fn is_ignored_dirname(name: &str) -> bool {
matches!(
name,
".git"
| ".hg"
| ".svn"
| "node_modules"
| "__pycache__"
| ".mypy_cache"
| ".pytest_cache"
| ".tox"
| ".venv"
| "venv"
| ".env"
| "env"
| "target"
| "dist"
| "build"
| ".next"
| ".nuxt"
| "vendor"
) || name.starts_with('.')
}
fn file_hash(content: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(content.as_bytes());
format!("{:x}", hasher.finalize())
}
fn file_modified(path: &Path) -> f64 {
path.metadata()
.and_then(|m| m.modified())
.ok()
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
.map(|d| d.as_secs_f64())
.unwrap_or(0.0)
}
fn dedup_symbol_ids(symbols: &mut [Symbol], edges: &mut [crate::types::Edge]) {
use std::collections::HashMap;
let mut seen: HashMap<String, u32> = HashMap::new();
let mut renames: HashMap<String, String> = HashMap::new();
for sym in symbols.iter_mut() {
let count = seen.entry(sym.id.clone()).or_insert(0);
*count += 1;
if *count > 1 {
let old_id = sym.id.clone();
sym.id = format!("{}:{}", old_id, count);
renames.insert(format!("{}@{}", old_id, count), sym.id.clone());
renames.insert(old_id, sym.id.clone());
}
}
if renames.is_empty() {
return;
}
for edge in edges.iter_mut() {
if let Some(new_id) = renames.get(&edge.source_id) {
edge.source_id = new_id.clone();
}
}
for sym in symbols.iter_mut() {
if let Some(ref pid) = sym.parent_id {
if let Some(new_id) = renames.get(pid) {
sym.parent_id = Some(new_id.clone());
}
}
}
}
fn compute_merkle_hashes(symbols: &mut [Symbol], source: &str) {
use std::collections::HashMap;
for sym in symbols.iter_mut() {
let body = source
.get(sym.start_byte as usize..sym.end_byte as usize)
.unwrap_or("");
let mut hasher = Sha256::new();
hasher.update(sym.kind.as_str().as_bytes());
hasher.update(b":");
hasher.update(sym.name.as_bytes());
hasher.update(b":");
if let Some(ref sig) = sym.signature {
hasher.update(sig.as_bytes());
}
hasher.update(b":");
hasher.update(body.as_bytes());
sym.content_hash = Some(format!("{:x}", hasher.finalize()));
}
let id_to_idx: HashMap<&str, usize> = symbols
.iter()
.enumerate()
.map(|(i, s)| (s.id.as_str(), i))
.collect();
let mut children: HashMap<usize, Vec<usize>> = HashMap::new();
let mut roots: Vec<usize> = Vec::new();
for (i, sym) in symbols.iter().enumerate() {
if let Some(ref pid) = sym.parent_id {
if let Some(&parent_idx) = id_to_idx.get(pid.as_str()) {
children.entry(parent_idx).or_default().push(i);
} else {
roots.push(i);
}
} else {
roots.push(i);
}
}
let mut subtree_hashes: Vec<String> = vec![String::new(); symbols.len()];
let mut stack: Vec<(usize, bool)> = roots.iter().rev().map(|&i| (i, false)).collect();
while let Some((idx, visited)) = stack.pop() {
if visited {
let mut hasher = Sha256::new();
hasher.update(
symbols[idx]
.content_hash
.as_deref()
.unwrap_or("")
.as_bytes(),
);
if let Some(kids) = children.get(&idx) {
let mut kid_hashes: Vec<&str> =
kids.iter().map(|&k| subtree_hashes[k].as_str()).collect();
kid_hashes.sort();
for h in kid_hashes {
hasher.update(h.as_bytes());
}
}
subtree_hashes[idx] = format!("{:x}", hasher.finalize());
} else {
stack.push((idx, true));
if let Some(kids) = children.get(&idx) {
for &kid in kids.iter().rev() {
stack.push((kid, false));
}
}
}
}
for (i, sym) in symbols.iter_mut().enumerate() {
sym.subtree_hash = Some(std::mem::take(&mut subtree_hashes[i]));
}
}
#[derive(Debug, Default)]
struct SymbolDiff {
added: Vec<usize>, removed: Vec<String>, modified: Vec<usize>, children_changed: Vec<usize>, unchanged: usize, }
fn merkle_diff(
new_symbols: &[Symbol],
old_hashes: &[(String, Option<String>, Option<String>)],
) -> SymbolDiff {
use std::collections::{HashMap, HashSet};
let mut diff = SymbolDiff::default();
let old_map: HashMap<&str, (&Option<String>, &Option<String>)> = old_hashes
.iter()
.map(|(id, ch, sh)| (id.as_str(), (ch, sh)))
.collect();
let new_ids: HashSet<&str> = new_symbols.iter().map(|s| s.id.as_str()).collect();
for (i, sym) in new_symbols.iter().enumerate() {
if let Some(&(old_ch, old_sh)) = old_map.get(sym.id.as_str()) {
if sym.subtree_hash.as_ref() == old_sh.as_ref()
&& sym.content_hash.as_ref() == old_ch.as_ref()
{
diff.unchanged += 1;
} else if sym.content_hash.as_ref() != old_ch.as_ref() {
diff.modified.push(i);
} else {
diff.children_changed.push(i);
}
} else {
diff.added.push(i);
}
}
for (old_id, _, _) in old_hashes {
if !new_ids.contains(old_id.as_str()) {
diff.removed.push(old_id.clone());
}
}
diff
}
fn git_changed_files(
root: &Path,
last_commit: Option<&str>,
) -> Option<std::collections::HashSet<String>> {
let last_commit = last_commit?;
let verify = git_cmd(root, &["cat-file", "-t", last_commit])?;
if !verify.status.success() {
return None;
}
let diff_output = git_cmd(root, &["diff", "--name-only", last_commit, "HEAD"])?;
if !diff_output.status.success() {
return None;
}
let mut changed: std::collections::HashSet<String> =
parse_git_lines(&diff_output.stdout).collect();
if let Some(out) = git_cmd(root, &["ls-files", "--others", "--exclude-standard"]) {
if out.status.success() {
changed.extend(parse_git_lines(&out.stdout));
}
}
if let Some(out) = git_cmd(root, &["diff", "--name-only"]) {
if out.status.success() {
changed.extend(parse_git_lines(&out.stdout));
}
}
if let Some(out) = git_cmd(root, &["diff", "--name-only", "--cached"]) {
if out.status.success() {
changed.extend(parse_git_lines(&out.stdout));
}
}
Some(changed)
}
fn git_head_commit(root: &Path) -> Option<String> {
let output = git_cmd(root, &["rev-parse", "HEAD"])?;
if output.status.success() {
Some(String::from_utf8(output.stdout).ok()?.trim().to_string())
} else {
None
}
}
pub fn git_recently_changed_files(root: &Path, commits: u32) -> Result<Vec<String>> {
use std::collections::BTreeSet;
let mut changed = BTreeSet::new();
let output = git_cmd(
root,
&[
"log",
"--name-only",
"--pretty=format:",
&format!("-{commits}"),
],
)
.context("Failed to run git — are you in a git repository?")?;
if output.status.success() {
changed.extend(parse_git_lines(&output.stdout));
}
for args in [
&["diff", "--name-only"][..],
&["diff", "--name-only", "--cached"][..],
&["ls-files", "--others", "--exclude-standard"][..],
] {
if let Some(out) = git_cmd(root, args) {
if out.status.success() {
changed.extend(parse_git_lines(&out.stdout));
}
}
}
Ok(changed.into_iter().collect())
}
fn git_cmd(root: &Path, args: &[&str]) -> Option<std::process::Output> {
std::process::Command::new("git")
.args(args)
.current_dir(root)
.stdin(std::process::Stdio::null())
.output()
.ok()
}
fn parse_git_lines(stdout: &[u8]) -> impl Iterator<Item = String> + '_ {
String::from_utf8_lossy(stdout)
.lines()
.filter(|l| !l.is_empty())
.map(|l| l.to_string())
.collect::<Vec<_>>()
.into_iter()
}
fn floor_char_boundary(s: &str, index: usize) -> usize {
if index >= s.len() {
return s.len();
}
let mut i = index;
while i > 0 && !s.is_char_boundary(i) {
i -= 1;
}
i
}
const MAX_CONTENT_BYTES: usize = 2048;
const MIN_CONTENT_BYTES: usize = 50;
fn extract_symbol_content(source: &str, sym: &crate::types::Symbol) -> Option<(String, String)> {
if sym.kind == crate::types::SymbolKind::Import {
return None;
}
let start = sym.start_byte as usize;
let end = sym.end_byte as usize;
if start >= end || end > source.len() {
return None;
}
let safe_start = if source.is_char_boundary(start) {
start
} else {
let mut s = start;
while s < source.len() && !source.is_char_boundary(s) {
s += 1;
}
s
};
let truncated_end = end.min(safe_start + MAX_CONTENT_BYTES);
let safe_end = floor_char_boundary(source, truncated_end);
if safe_start >= safe_end {
return None;
}
let raw = &source[safe_start..safe_end];
let trimmed = raw.trim();
if trimmed.is_empty() || trimmed.len() < MIN_CONTENT_BYTES {
return None;
}
let header = format!(
"// File: {}\n// Type: {}\n// Name: {}",
sym.file_path, sym.kind, sym.name
);
Some((raw.to_string(), header))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_file_hash_deterministic() {
let h1 = file_hash("def foo(): pass");
let h2 = file_hash("def foo(): pass");
assert_eq!(h1, h2);
}
#[test]
fn test_file_hash_different_content() {
let h1 = file_hash("def foo(): pass");
let h2 = file_hash("def bar(): pass");
assert_ne!(h1, h2);
}
#[test]
fn test_is_ignored_directories() {
let tmp = std::env::temp_dir().join("cartog_test_ignored");
let _ = std::fs::remove_dir_all(&tmp);
std::fs::create_dir_all(&tmp).unwrap();
let ignored_dirs = [
".git",
"node_modules",
"__pycache__",
"target",
"dist",
"build",
".venv",
];
let allowed_dirs = ["src", "lib", "tests", "docs"];
for name in ignored_dirs.iter().chain(allowed_dirs.iter()) {
std::fs::create_dir_all(tmp.join(name)).unwrap();
}
let entries: Vec<_> = WalkDir::new(&tmp)
.min_depth(1)
.max_depth(1)
.into_iter()
.filter_map(|e| e.ok())
.collect();
for entry in &entries {
let name = entry.file_name().to_string_lossy();
if ignored_dirs.contains(&name.as_ref()) {
assert!(is_ignored(entry), "{name} should be ignored");
}
if allowed_dirs.contains(&name.as_ref()) {
assert!(!is_ignored(entry), "{name} should NOT be ignored");
}
}
let _ = std::fs::remove_dir_all(&tmp);
}
#[test]
fn test_git_changed_files_no_commit() {
let result = git_changed_files(Path::new("."), None);
assert!(result.is_none());
}
#[test]
fn test_git_changed_files_invalid_commit() {
let result = git_changed_files(
Path::new("."),
Some("0000000000000000000000000000000000000000"),
);
assert!(result.is_none());
}
#[test]
fn test_git_changed_files_valid_head() {
let head = git_head_commit(Path::new("."));
if let Some(commit) = head {
let result = git_changed_files(Path::new("."), Some(&commit));
assert!(result.is_some());
}
}
#[test]
fn test_index_directory_force() {
use crate::db::Database;
let db = Database::open_memory().unwrap();
let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/auth");
if fixtures.exists() {
let r1 = index_directory(&db, &fixtures, false, false).unwrap();
assert!(r1.files_indexed > 0);
let r2 = index_directory(&db, &fixtures, false, false).unwrap();
assert_eq!(r2.files_indexed, 0);
assert!(r2.files_skipped > 0);
let r3 = index_directory(&db, &fixtures, true, false).unwrap();
assert_eq!(r3.files_indexed, r1.files_indexed);
assert_eq!(r3.files_skipped, 0);
}
}
#[test]
fn test_floor_char_boundary_ascii() {
let s = "hello world";
assert_eq!(floor_char_boundary(s, 5), 5);
assert_eq!(floor_char_boundary(s, 0), 0);
assert_eq!(floor_char_boundary(s, 100), s.len());
}
#[test]
fn test_floor_char_boundary_multibyte() {
let s = "abc─def";
assert_eq!(floor_char_boundary(s, 3), 3); assert_eq!(floor_char_boundary(s, 4), 3); assert_eq!(floor_char_boundary(s, 5), 3); assert_eq!(floor_char_boundary(s, 6), 6); }
#[test]
fn test_extract_symbol_content_truncates_at_char_boundary() {
let padding = "x".repeat(MAX_CONTENT_BYTES - 1);
let source = format!("{padding}─after");
let sym = crate::types::Symbol::new(
"test_sym",
crate::types::SymbolKind::Function,
"test.rb",
1,
100,
0,
source.len() as u32,
None,
);
let result = extract_symbol_content(&source, &sym);
assert!(result.is_some());
let (content, _header) = result.unwrap();
assert_eq!(content.len(), MAX_CONTENT_BYTES - 1);
assert!(content.is_char_boundary(content.len()));
}
#[test]
fn test_compute_merkle_hashes_populates_fields() {
let source = "def foo():\n pass\n";
let mut symbols = vec![crate::types::Symbol::new(
"foo",
crate::types::SymbolKind::Function,
"test.py",
1,
2,
0,
source.len() as u32,
None,
)];
compute_merkle_hashes(&mut symbols, source);
assert!(symbols[0].content_hash.is_some());
assert!(symbols[0].subtree_hash.is_some());
}
#[test]
fn test_merkle_hashes_stable_across_position_changes() {
let source_v1 = "def foo():\n pass\n";
let source_v2 = "\n\ndef foo():\n pass\n";
let mut sym_v1 = vec![crate::types::Symbol::new(
"foo",
crate::types::SymbolKind::Function,
"test.py",
1,
2,
0,
source_v1.len() as u32,
None,
)];
let mut sym_v2 = vec![crate::types::Symbol::new(
"foo",
crate::types::SymbolKind::Function,
"test.py",
3,
4,
2,
source_v2.len() as u32,
None,
)];
compute_merkle_hashes(&mut sym_v1, source_v1);
compute_merkle_hashes(&mut sym_v2, source_v2);
assert_eq!(sym_v1[0].content_hash, sym_v2[0].content_hash);
}
#[test]
fn test_merkle_diff_detects_added_symbol() {
let old_hashes: Vec<(String, Option<String>, Option<String>)> = vec![];
let mut new_symbols = vec![crate::types::Symbol::new(
"foo",
crate::types::SymbolKind::Function,
"test.py",
1,
5,
0,
50,
None,
)];
new_symbols[0].content_hash = Some("abc".to_string());
new_symbols[0].subtree_hash = Some("def".to_string());
let diff = merkle_diff(&new_symbols, &old_hashes);
assert_eq!(diff.added.len(), 1);
assert_eq!(diff.removed.len(), 0);
assert_eq!(diff.modified.len(), 0);
}
#[test]
fn test_merkle_diff_detects_removed_symbol() {
let old_hashes = vec![(
"test.py:function:foo".to_string(),
Some("abc".to_string()),
Some("def".to_string()),
)];
let new_symbols: Vec<crate::types::Symbol> = vec![];
let diff = merkle_diff(&new_symbols, &old_hashes);
assert_eq!(diff.added.len(), 0);
assert_eq!(diff.removed.len(), 1);
assert_eq!(diff.removed[0], "test.py:function:foo");
}
#[test]
fn test_merkle_diff_detects_unchanged() {
let old_hashes = vec![(
"test.py:function:foo".to_string(),
Some("abc".to_string()),
Some("def".to_string()),
)];
let mut new_symbols = vec![crate::types::Symbol::new(
"foo",
crate::types::SymbolKind::Function,
"test.py",
1,
5,
0,
50,
None,
)];
new_symbols[0].content_hash = Some("abc".to_string());
new_symbols[0].subtree_hash = Some("def".to_string());
let diff = merkle_diff(&new_symbols, &old_hashes);
assert_eq!(diff.unchanged, 1);
assert_eq!(diff.added.len(), 0);
assert_eq!(diff.modified.len(), 0);
}
#[test]
fn test_merkle_diff_detects_modified() {
let old_hashes = vec![(
"test.py:function:foo".to_string(),
Some("old_hash".to_string()),
Some("old_subtree".to_string()),
)];
let mut new_symbols = vec![crate::types::Symbol::new(
"foo",
crate::types::SymbolKind::Function,
"test.py",
1,
5,
0,
50,
None,
)];
new_symbols[0].content_hash = Some("new_hash".to_string());
new_symbols[0].subtree_hash = Some("new_subtree".to_string());
let diff = merkle_diff(&new_symbols, &old_hashes);
assert_eq!(diff.modified.len(), 1);
assert_eq!(diff.unchanged, 0);
}
#[test]
fn test_incremental_merkle_diff_pipeline() {
use crate::db::Database;
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path().join("project");
std::fs::create_dir(&dir).unwrap();
let a_py = dir.join("a.py");
let b_py = dir.join("b.py");
std::fs::write(
&a_py,
r#"class Greeter:
def hello(self):
return "hi"
def goodbye(self):
return "bye"
"#,
)
.unwrap();
std::fs::write(
&b_py,
r#"from a import Greeter
def main():
g = Greeter()
g.hello()
"#,
)
.unwrap();
let db = Database::open_memory().unwrap();
let r1 = index_directory(&db, &dir, true, false).unwrap();
assert_eq!(r1.files_indexed, 2);
assert!(r1.symbols_added > 0, "should have symbols");
let outline_a = db.outline("a.py").unwrap();
assert_eq!(outline_a.len(), 3, "Greeter + hello + goodbye");
let names_a: Vec<&str> = outline_a.iter().map(|s| s.name.as_str()).collect();
assert!(names_a.contains(&"Greeter"));
assert!(names_a.contains(&"hello"));
assert!(names_a.contains(&"goodbye"));
let hello_id_v1 = outline_a
.iter()
.find(|s| s.name == "hello")
.unwrap()
.id
.clone();
let greeter_id_v1 = outline_a
.iter()
.find(|s| s.name == "Greeter")
.unwrap()
.id
.clone();
let hashes = db.get_symbol_hashes_for_file("a.py").unwrap();
assert!(
hashes
.iter()
.all(|(_, ch, sh)| ch.is_some() && sh.is_some()),
"all symbols should have hashes after indexing"
);
std::fs::write(
&a_py,
r#"class Greeter:
def hello(self):
return "hi"
def goodbye(self):
return "bye"
def standalone():
return "I am new"
"#,
)
.unwrap();
let r2 = index_directory(&db, &dir, false, false).unwrap();
assert_eq!(r2.files_indexed, 1, "only a.py changed");
assert!(r2.files_skipped > 0, "b.py should be skipped");
assert_eq!(r2.symbols_added, 1, "standalone is new");
assert!(
r2.symbols_unchanged >= 2,
"hello and goodbye should be unchanged, got {}",
r2.symbols_unchanged
);
let outline_a2 = db.outline("a.py").unwrap();
assert_eq!(
outline_a2.len(),
4,
"Greeter + hello + goodbye + standalone"
);
assert!(outline_a2.iter().any(|s| s.name == "standalone"));
let hello_id_v2 = outline_a2
.iter()
.find(|s| s.name == "hello")
.unwrap()
.id
.clone();
let greeter_id_v2 = outline_a2
.iter()
.find(|s| s.name == "Greeter")
.unwrap()
.id
.clone();
assert_eq!(hello_id_v1, hello_id_v2, "hello ID should be stable");
assert_eq!(greeter_id_v1, greeter_id_v2, "Greeter ID should be stable");
std::fs::write(
&a_py,
r#"class Greeter:
def hello(self):
return "hi"
def standalone():
return "I am new"
"#,
)
.unwrap();
let r3 = index_directory(&db, &dir, false, false).unwrap();
assert_eq!(r3.files_indexed, 1);
assert!(r3.symbols_removed >= 1, "goodbye should be removed");
let outline_a3 = db.outline("a.py").unwrap();
assert_eq!(outline_a3.len(), 3, "Greeter + hello + standalone");
assert!(
!outline_a3.iter().any(|s| s.name == "goodbye"),
"goodbye should be gone"
);
let hello_id_v3 = outline_a3
.iter()
.find(|s| s.name == "hello")
.unwrap()
.id
.clone();
assert_eq!(
hello_id_v1, hello_id_v3,
"hello ID stable after sibling removal"
);
}
}