use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::time::Instant;
use rayon::prelude::*;
use walkdir::WalkDir;
use crate::branch;
use crate::branch_meta::{self, BranchMeta};
use crate::config::{
get_tokensave_dir, is_excluded, is_excluded_dir, is_included, load_config, save_config,
TokenSaveConfig,
};
use crate::context::ContextBuilder;
use crate::db::Database;
use crate::errors::{Result, TokenSaveError};
use crate::extraction::LanguageRegistry;
use crate::graph::{GraphQueryManager, GraphTraverser};
use crate::resolution::ReferenceResolver;
use crate::sync;
use crate::types::*;
fn safe_extract(
extractor: &dyn crate::extraction::LanguageExtractor,
file_path: &str,
source: &str,
) -> Option<ExtractionResult> {
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
extractor.extract(file_path, source)
}))
.map_err(|_| {
eprintln!("[tokensave] extraction panicked for {file_path}, skipping");
})
.ok()
}
type ExtractTuple = (String, ExtractionResult, String, u64, i64);
fn extract_files_isolated(
project_root: &Path,
registry: &crate::extraction::LanguageRegistry,
files: Vec<String>,
) -> (Vec<ExtractTuple>, Vec<(String, String)>) {
if should_use_subprocess() {
let workers = std::thread::available_parallelism().map_or(4, std::num::NonZeroUsize::get);
let timeout = std::time::Duration::from_secs(
crate::user_config::UserConfig::load().extraction_timeout_secs,
);
match crate::extraction_worker::WorkerPool::new(workers, project_root.to_path_buf()) {
Ok(pool) => {
let outcome = pool.extract_files(files, |_, _, _| {}, timeout);
return (outcome.results, outcome.skipped);
}
Err(e) => eprintln!(
"[tokensave] could not spawn extraction worker pool ({e}), \
falling back to in-process extraction"
),
}
}
(
extract_files_in_process(project_root, registry, &files),
Vec::new(),
)
}
fn extract_files_in_process(
project_root: &Path,
registry: &crate::extraction::LanguageRegistry,
files: &[String],
) -> Vec<ExtractTuple> {
files
.par_iter()
.filter_map(|file_path| {
let abs_path = project_root.join(file_path);
let source = sync::read_source_file(&abs_path).ok()?;
let extractor = registry.extractor_for_file(file_path)?;
let mut result = safe_extract(extractor, file_path, &source)?;
result.sanitize();
let hash = sync::content_hash(&source);
let size = source.len() as u64;
let mtime = sync::file_stat(&abs_path).map_or_else(current_timestamp, |(m, _)| m);
Some((file_path.clone(), result, hash, size, mtime))
})
.collect()
}
fn should_use_subprocess() -> bool {
if std::env::var_os("TOKENSAVE_DISABLE_SUBPROCESS").is_some() {
return false;
}
let Ok(path) = std::env::current_exe() else {
return false;
};
matches!(path.file_stem().and_then(|s| s.to_str()), Some("tokensave"))
}
pub struct TokenSave {
db: Database,
config: TokenSaveConfig,
project_root: PathBuf,
registry: LanguageRegistry,
active_branch: Option<String>,
serving_branch: Option<String>,
fallback_warning: Option<String>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct DecisionRecord {
pub id: i64,
pub text: String,
pub reason: Option<String>,
pub created_at: i64,
pub files: Vec<String>,
pub tags: Vec<String>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct CodeAreaRecord {
pub id: i64,
pub path: String,
pub description: Option<String>,
pub last_touched_at: i64,
pub touch_count: u32,
}
pub struct IndexResult {
pub file_count: usize,
pub node_count: usize,
pub edge_count: usize,
pub duration_ms: u64,
}
#[derive(Debug)]
pub struct SyncResult {
pub files_added: usize,
pub files_modified: usize,
pub files_removed: usize,
pub duration_ms: u64,
pub added_paths: Vec<String>,
pub modified_paths: Vec<String>,
pub removed_paths: Vec<String>,
pub skipped_paths: Vec<(String, String)>,
}
pub fn current_timestamp() -> i64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64
}
impl TokenSave {
pub async fn init(project_root: &Path) -> Result<Self> {
let config = TokenSaveConfig {
root_dir: project_root.to_string_lossy().to_string(),
..TokenSaveConfig::default()
};
save_config(project_root, &config)?;
let db_path = get_tokensave_dir(project_root).join("tokensave.db");
let (db, _migrated) = Database::initialize(&db_path).await?;
let active_branch = branch::current_branch(project_root);
let default_branch =
branch::detect_default_branch(project_root).or_else(|| active_branch.clone());
if let Some(ref default) = default_branch {
let meta = BranchMeta::new(default);
let _ = branch_meta::save_branch_meta(&get_tokensave_dir(project_root), &meta);
}
Ok(Self {
db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
active_branch,
serving_branch: None,
fallback_warning: None,
})
}
pub fn db(&self) -> &Database {
&self.db
}
pub async fn open(project_root: &Path) -> Result<Self> {
let config = load_config(project_root)?;
let tokensave_dir = get_tokensave_dir(project_root);
let active_branch = branch::current_branch(project_root);
let (db_path, serving_branch, fallback_warning) =
Self::resolve_db_for_branch(project_root, &tokensave_dir, active_branch.as_deref());
if !db_path.exists() {
return Err(TokenSaveError::Config {
message: format!(
"no TokenSave database found at '{}'; run 'tokensave init' first",
db_path.display()
),
});
}
let crashed = has_dirty_sentinel(project_root);
if crashed {
eprintln!(
"[tokensave] previous operation was interrupted — checking database integrity…"
);
}
let open_result = Database::open(&db_path).await;
let (db, migrated) = match open_result {
Ok(pair) => pair,
Err(ref e) if Database::is_corruption_error(e) || crashed => {
print_corruption_warning();
delete_db_files(&db_path);
clear_dirty_sentinel(project_root);
let (db, _) = Database::initialize(&db_path).await?;
let ts = Self {
db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
active_branch: active_branch.clone(),
serving_branch: serving_branch.clone(),
fallback_warning: fallback_warning.clone(),
};
ts.index_all_with_progress(|c, t, f| {
eprintln!("[tokensave] re-indexing [{c}/{t}] {f}");
})
.await?;
eprintln!("[tokensave] re-index complete.");
return Ok(ts);
}
Err(e) => return Err(e),
};
if crashed {
let intact = db.quick_check().await.unwrap_or(false);
if !intact {
print_corruption_warning();
drop(db);
delete_db_files(&db_path);
clear_dirty_sentinel(project_root);
let (new_db, _) = Database::initialize(&db_path).await?;
let ts = Self {
db: new_db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
active_branch: active_branch.clone(),
serving_branch: serving_branch.clone(),
fallback_warning: fallback_warning.clone(),
};
ts.index_all_with_progress(|c, t, f| {
eprintln!("[tokensave] re-indexing [{c}/{t}] {f}");
})
.await?;
eprintln!("[tokensave] re-index complete.");
return Ok(ts);
}
clear_dirty_sentinel(project_root);
}
let ts = Self {
db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
active_branch,
serving_branch,
fallback_warning,
};
if migrated {
eprintln!("[tokensave] schema changed — performing full re-index…");
ts.index_all_with_progress(|current, total, file| {
eprintln!("[tokensave] re-indexing [{current}/{total}] {file}");
})
.await?;
eprintln!("[tokensave] re-index complete.");
}
Ok(ts)
}
fn resolve_db_for_branch(
project_root: &Path,
tokensave_dir: &Path,
branch: Option<&str>,
) -> (PathBuf, Option<String>, Option<String>) {
let default_db = tokensave_dir.join("tokensave.db");
let Some(meta) = branch_meta::load_branch_meta(tokensave_dir) else {
return (default_db, None, None);
};
let Some(branch) = branch else {
return (
default_db,
Some(meta.default_branch.clone()),
Some("detached HEAD — using default branch index".to_string()),
);
};
if let Some(path) = branch::resolve_branch_db_path(tokensave_dir, branch, &meta) {
if path.exists() {
return (path, Some(branch.to_string()), None);
}
}
if let Some(ancestor) = branch::find_nearest_tracked_ancestor(project_root, branch, &meta) {
if let Some(path) = branch::resolve_branch_db_path(tokensave_dir, &ancestor, &meta) {
if path.exists() {
return (
path,
Some(ancestor.clone()),
Some(format!(
"branch '{branch}' is not tracked — serving from '{ancestor}'. \
Run `tokensave branch add {branch}` to track it."
)),
);
}
}
}
let serving = meta.default_branch.clone();
(
default_db,
Some(serving),
Some(format!(
"branch '{branch}' is not tracked — serving from '{}'. \
Run `tokensave branch add {branch}` to track it.",
meta.default_branch
)),
)
}
pub async fn open_branch(project_root: &Path, branch_name: &str) -> Result<Self> {
let config = load_config(project_root)?;
let tokensave_dir = get_tokensave_dir(project_root);
let meta = branch_meta::load_branch_meta(&tokensave_dir).ok_or_else(|| {
TokenSaveError::Config {
message: "no branch tracking configured — run `tokensave branch add` first"
.to_string(),
}
})?;
let db_path = branch::resolve_branch_db_path(&tokensave_dir, branch_name, &meta)
.ok_or_else(|| TokenSaveError::Config {
message: format!("branch '{branch_name}' is not tracked"),
})?;
if !db_path.exists() {
return Err(TokenSaveError::Config {
message: format!(
"DB for branch '{branch_name}' not found at '{}'",
db_path.display()
),
});
}
let (db, _) = Database::open(&db_path).await?;
Ok(Self {
db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
active_branch: Some(branch_name.to_string()),
serving_branch: Some(branch_name.to_string()),
fallback_warning: None,
})
}
pub fn list_tracked_branches(project_root: &Path) -> Option<Vec<String>> {
let tokensave_dir = get_tokensave_dir(project_root);
let meta = branch_meta::load_branch_meta(&tokensave_dir)?;
Some(meta.branches.keys().cloned().collect())
}
pub fn is_initialized(project_root: &Path) -> bool {
get_tokensave_dir(project_root)
.join("tokensave.db")
.exists()
}
}
fn write_dirty_sentinel(project_root: &Path) {
let path = get_tokensave_dir(project_root).join("dirty");
let _ = std::fs::write(
&path,
format!(
"pid={}\ntime={}\nversion={}",
std::process::id(),
current_timestamp(),
env!("CARGO_PKG_VERSION"),
),
);
}
fn clear_dirty_sentinel(project_root: &Path) {
let path = get_tokensave_dir(project_root).join("dirty");
let _ = std::fs::remove_file(path);
}
fn has_dirty_sentinel(project_root: &Path) -> bool {
get_tokensave_dir(project_root).join("dirty").exists()
}
fn delete_db_files(db_path: &std::path::Path) {
let _ = std::fs::remove_file(db_path);
let mut wal = db_path.to_path_buf();
wal.set_extension("db-wal");
let _ = std::fs::remove_file(&wal);
wal.set_extension("db-shm");
let _ = std::fs::remove_file(&wal);
}
fn print_corruption_warning() {
let version = env!("CARGO_PKG_VERSION");
eprintln!("[tokensave] \x1b[33m⚠database corruption detected — rebuilding index\x1b[0m");
eprintln!("[tokensave]");
eprintln!("[tokensave] This was likely caused by a crash or kill during indexing.");
eprintln!("[tokensave] Please report this at:");
eprintln!("[tokensave] https://github.com/aovestdipaperino/tokensave/issues");
eprintln!(
"[tokensave] Include: tokensave version (v{version}), OS, and what happened before the crash."
);
eprintln!("[tokensave]");
}
struct SyncLockGuard {
path: PathBuf,
}
impl Drop for SyncLockGuard {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
}
}
fn try_acquire_sync_lock(project_root: &Path) -> Result<SyncLockGuard> {
use std::io::Write;
let lock_path = get_tokensave_dir(project_root).join("sync.lock");
let pid = std::process::id();
match std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&lock_path)
{
Ok(mut f) => {
let _ = write!(f, "{pid}");
return Ok(SyncLockGuard { path: lock_path });
}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
}
Err(e) => {
return Err(TokenSaveError::SyncLock {
message: format!("could not create lockfile: {e}"),
});
}
}
let contents = std::fs::read_to_string(&lock_path).unwrap_or_default();
if let Ok(existing_pid) = contents.trim().parse::<u32>() {
if is_pid_alive(existing_pid) {
return Err(TokenSaveError::SyncLock {
message: format!(
"another sync is already in progress (PID {existing_pid}). \
If this is stale, remove {}",
lock_path.display()
),
});
}
}
let _ = std::fs::remove_file(&lock_path);
let mut f = std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&lock_path)
.map_err(|e| TokenSaveError::SyncLock {
message: format!("could not reclaim lockfile: {e}"),
})?;
let _ = write!(f, "{pid}");
Ok(SyncLockGuard { path: lock_path })
}
fn is_pid_alive(pid: u32) -> bool {
#[cfg(unix)]
{
std::process::Command::new("kill")
.args(["-0", &pid.to_string()])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.is_ok_and(|s| s.success())
}
#[cfg(windows)]
{
std::process::Command::new("tasklist")
.args(["/FI", &format!("PID eq {pid}"), "/NH"])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.output()
.map(|o| String::from_utf8_lossy(&o.stdout).contains(&pid.to_string()))
.unwrap_or(false)
}
#[cfg(not(any(unix, windows)))]
{
let _ = pid;
false
}
}
impl TokenSave {
pub fn add_skip_folders(&mut self, folders: &[String]) {
for folder in folders {
self.config.exclude.push(format!("{folder}/**"));
}
}
pub async fn index_all(&self) -> Result<IndexResult> {
self.index_all_with_progress(|_, _, _| {}).await
}
pub async fn index_all_with_progress<F>(&self, on_file: F) -> Result<IndexResult>
where
F: Fn(usize, usize, &str),
{
self.index_all_with_progress_verbose(on_file, |_| {}).await
}
pub async fn index_all_with_progress_verbose<F, V>(
&self,
on_file: F,
on_verbose: V,
) -> Result<IndexResult>
where
F: Fn(usize, usize, &str),
V: Fn(&str),
{
debug_assert!(self.project_root.exists(), "project root does not exist");
debug_assert!(
self.project_root.is_dir(),
"project root is not a directory"
);
let _lock = try_acquire_sync_lock(&self.project_root)?;
write_dirty_sentinel(&self.project_root);
let start = Instant::now();
self.db.clear().await?;
self.db.begin_bulk_load().await?;
let phase_start = Instant::now();
let files = self.scan_files();
let total = files.len();
on_verbose(&format!(
"scanned {} files in {:.1}s",
total,
phase_start.elapsed().as_secs_f64()
));
let project_root = self.project_root.clone();
let registry = &self.registry;
let phase_start = Instant::now();
let (extractions, _skipped) =
extract_files_isolated(&project_root, registry, files.clone());
let mut all_nodes = Vec::new();
let mut all_edges = Vec::new();
let mut all_unresolved = Vec::new();
let mut file_records = Vec::new();
let mut total_nodes = 0;
for (idx, (file_path, result, hash, size, mtime)) in extractions.iter().enumerate() {
on_file(idx + 1, total, file_path);
total_nodes += result.nodes.len();
all_nodes.extend_from_slice(&result.nodes);
all_edges.extend_from_slice(&result.edges);
all_unresolved.extend_from_slice(&result.unresolved_refs);
file_records.push(FileRecord {
path: file_path.clone(),
content_hash: hash.clone(),
size: *size,
modified_at: *mtime,
indexed_at: current_timestamp(),
node_count: result.nodes.len() as u32,
});
}
on_verbose(&format!(
"extracted {} nodes, {} edges from {} files in {:.1}s",
total_nodes,
all_edges.len(),
extractions.len(),
phase_start.elapsed().as_secs_f64()
));
let phase_start = Instant::now();
if !all_unresolved.is_empty() {
let resolver = ReferenceResolver::from_nodes(&self.db, &all_nodes);
let resolution = resolver.resolve_all(&all_unresolved);
all_edges.extend(resolver.create_edges(&resolution.resolved));
}
on_verbose(&format!(
"resolved {} references in {:.1}s",
all_unresolved.len(),
phase_start.elapsed().as_secs_f64()
));
all_nodes.sort_unstable_by(|a, b| a.id.cmp(&b.id));
all_edges.sort_unstable_by(|a, b| {
(&a.source, &a.target, a.kind.as_str(), &a.line).cmp(&(
&b.source,
&b.target,
b.kind.as_str(),
&b.line,
))
});
all_edges.dedup_by(|a, b| {
a.source == b.source && a.target == b.target && a.kind == b.kind && a.line == b.line
});
file_records.sort_unstable_by(|a, b| a.path.cmp(&b.path));
let total_edges = all_edges.len();
let phase_start = Instant::now();
self.db.insert_nodes(&all_nodes).await?;
self.db.insert_edges(&all_edges).await?;
self.db.upsert_files(&file_records).await?;
self.db.end_bulk_load().await?;
on_verbose(&format!(
"wrote to database in {:.1}s",
phase_start.elapsed().as_secs_f64()
));
let duration_ms = start.elapsed().as_millis() as u64;
let now_str = current_timestamp().to_string();
self.db.set_metadata("last_full_sync_at", &now_str).await?;
self.db.set_metadata("last_sync_at", &now_str).await?;
self.db
.set_metadata("last_sync_duration_ms", &duration_ms.to_string())
.await?;
let result = IndexResult {
file_count: files.len(),
node_count: total_nodes,
edge_count: total_edges,
duration_ms,
};
debug_assert!(
result.node_count >= result.file_count || result.file_count == 0,
"fewer nodes than files is unexpected"
);
debug_assert!(
result.duration_ms > 0 || result.file_count == 0,
"non-empty index completed in zero milliseconds"
);
clear_dirty_sentinel(&self.project_root);
Ok(result)
}
pub async fn sync(&self) -> Result<SyncResult> {
self.sync_with_progress(|_, _, _| {}).await
}
pub async fn sync_with_progress<F>(&self, on_progress: F) -> Result<SyncResult>
where
F: Fn(usize, usize, &str),
{
self.sync_with_progress_verbose(on_progress, |_| {}).await
}
pub async fn sync_if_stale(&self, stale_files: &[String]) -> Result<bool> {
if stale_files.is_empty() {
return Ok(false);
}
let still_stale_before = self.check_file_staleness(stale_files).await;
if still_stale_before.is_empty() {
return Ok(false);
}
let Ok(lock) = try_acquire_sync_lock(&self.project_root) else {
return Ok(true);
};
let result = self.sync_single_files(stale_files).await;
drop(lock);
match result {
Ok(()) => {
let still_stale_after = self.check_file_staleness(stale_files).await;
Ok(!still_stale_after.is_empty())
}
Err(_) => Ok(true), }
}
async fn sync_single_files(&self, file_paths: &[String]) -> Result<()> {
use crate::sync as sync_mod;
let start = Instant::now();
let project_root = &self.project_root;
let registry = &self.registry;
let mut hash_map: HashMap<String, String> = HashMap::new();
let mut stat_map: HashMap<String, (i64, u64)> = HashMap::new();
for path in file_paths {
let abs_path = project_root.join(path);
if let Some((mtime, size)) = sync_mod::file_stat(&abs_path) {
stat_map.insert(path.clone(), (mtime, size));
}
if let Ok(source) = sync_mod::read_source_file(&abs_path) {
let hash = sync_mod::content_hash(&source);
hash_map.insert(path.clone(), hash);
}
}
let _ = stat_map; let (sync_extractions, _skipped_extractions) =
extract_files_isolated(project_root, registry, file_paths.to_vec());
let mut queued_edges: Vec<&Edge> = Vec::new();
for (file_path, result, hash, size, mtime) in &sync_extractions {
self.db.delete_nodes_by_file(file_path).await?;
self.db.insert_nodes(&result.nodes).await?;
queued_edges.extend(&result.edges);
if !result.unresolved_refs.is_empty() {
self.db
.insert_unresolved_refs(&result.unresolved_refs)
.await?;
}
let file_record = FileRecord {
path: (*file_path).clone(),
content_hash: (*hash).clone(),
size: *size,
modified_at: *mtime,
indexed_at: current_timestamp(),
node_count: result.nodes.len() as u32,
};
self.db.upsert_file(&file_record).await?;
}
if !queued_edges.is_empty() {
let owned: Vec<Edge> = queued_edges.into_iter().cloned().collect();
self.db.insert_edges(&owned).await?;
}
if !file_paths.is_empty() {
let all_nodes = self.db.get_all_nodes().await.unwrap_or_default();
let resolver = ReferenceResolver::from_nodes(&self.db, &all_nodes);
let unresolved = self.db.get_unresolved_refs().await?;
if !unresolved.is_empty() {
let resolution = resolver.resolve_all(&unresolved);
let edges = resolver.create_edges(&resolution.resolved);
if !edges.is_empty() {
self.db.insert_edges(&edges).await?;
}
}
}
self.db
.set_metadata("last_sync_at", ¤t_timestamp().to_string())
.await?;
self.db
.set_metadata(
"last_sync_duration_ms",
&start.elapsed().as_millis().to_string(),
)
.await?;
clear_dirty_sentinel(&self.project_root);
Ok(())
}
pub async fn sync_with_progress_verbose<F, V>(
&self,
on_progress: F,
on_verbose: V,
) -> Result<SyncResult>
where
F: Fn(usize, usize, &str),
V: Fn(&str),
{
debug_assert!(
self.project_root.exists(),
"sync: project root does not exist"
);
debug_assert!(
self.project_root.is_dir(),
"sync: project root is not a directory"
);
let _lock = try_acquire_sync_lock(&self.project_root)?;
write_dirty_sentinel(&self.project_root);
let start = Instant::now();
on_progress(0, 0, "scanning files");
let phase_start = Instant::now();
let current_files = self.scan_files();
on_verbose(&format!(
"scanned {} files in {:.1}s",
current_files.len(),
phase_start.elapsed().as_secs_f64()
));
on_progress(0, 0, "checking file timestamps");
let phase_start = Instant::now();
let project_root = &self.project_root;
let file_stats: Vec<(String, i64, u64)> = current_files
.par_iter()
.filter_map(|path| {
let abs_path = project_root.join(path);
let (mtime, size) = sync::file_stat(&abs_path)?;
Some((path.clone(), mtime, size))
})
.collect();
on_verbose(&format!(
"stat-checked {} files in {:.1}s",
file_stats.len(),
phase_start.elapsed().as_secs_f64()
));
let db_files = self.db.get_all_files().await?;
let db_map: HashMap<String, FileRecord> =
db_files.into_iter().map(|f| (f.path.clone(), f)).collect();
let mut new_files: Vec<String> = Vec::new();
let mut stat_changed: Vec<String> = Vec::new();
let mut current_set: std::collections::HashSet<&str> =
std::collections::HashSet::with_capacity(file_stats.len());
let mut stat_map: HashMap<String, (i64, u64)> = HashMap::with_capacity(file_stats.len());
for (path, mtime, size) in &file_stats {
current_set.insert(path.as_str());
stat_map.insert(path.clone(), (*mtime, *size));
match db_map.get(path) {
None => new_files.push(path.clone()),
Some(record) => {
if record.modified_at != *mtime || record.size != *size {
stat_changed.push(path.clone());
}
}
}
}
let removed: Vec<String> = db_map
.keys()
.filter(|path| !current_set.contains(path.as_str()))
.cloned()
.collect();
on_verbose(&format!(
"changes: {} new, {} stat-changed, {} removed, {} unchanged",
new_files.len(),
stat_changed.len(),
removed.len(),
file_stats.len() - new_files.len() - stat_changed.len()
));
on_progress(0, 0, "hashing changed files");
let phase_start = Instant::now();
let needs_read: Vec<&String> = new_files.iter().chain(stat_changed.iter()).collect();
let hash_results: Vec<_> = needs_read
.par_iter()
.map(|path| {
let abs_path = project_root.join(path.as_str());
match sync::read_source_file(&abs_path) {
Ok(source) => Ok(((*path).clone(), sync::content_hash(&source))),
Err(e) => Err(((*path).clone(), e.to_string())),
}
})
.collect();
let mut skipped: Vec<(String, String)> = Vec::new();
let mut hash_map: HashMap<String, String> = HashMap::new();
for result in hash_results {
match result {
Ok((path, hash)) => {
hash_map.insert(path, hash);
}
Err((path, reason)) => {
skipped.push((path, reason));
}
}
}
on_verbose(&format!(
"hashed {} files in {:.1}s ({} read errors)",
hash_map.len(),
phase_start.elapsed().as_secs_f64(),
skipped.len()
));
on_progress(0, 0, "detecting changes");
let mut stale: Vec<String> = Vec::new();
let mut mtime_only_changed: Vec<String> = Vec::new();
for path in &stat_changed {
if let Some(new_hash) = hash_map.get(path) {
if let Some(record) = db_map.get(path) {
if record.content_hash == *new_hash {
mtime_only_changed.push(path.clone());
} else {
stale.push(path.clone());
}
}
}
}
on_verbose(&format!(
"content check: {} modified, {} mtime-only",
stale.len(),
mtime_only_changed.len()
));
for path in &mtime_only_changed {
if let (Some(record), Some(&(mtime, size))) = (db_map.get(path), stat_map.get(path)) {
let updated = FileRecord {
modified_at: mtime,
size,
..record.clone()
};
self.db.upsert_file(&updated).await?;
}
}
for path in &removed {
on_progress(0, 0, &format!("removing {path}"));
self.db.delete_file(path).await?;
}
let to_index: Vec<String> = stale.iter().chain(new_files.iter()).cloned().collect();
let registry = &self.registry;
let phase_start = Instant::now();
let _ = stat_map; let (sync_extractions, sync_skipped): (Vec<_>, Vec<_>) =
extract_files_isolated(project_root, registry, to_index.clone());
skipped.extend(sync_skipped);
let total = sync_extractions.len();
let mut total_nodes = 0usize;
let mut total_edges = 0usize;
let mut queued_edges: Vec<&Edge> = Vec::new();
for (idx, (file_path, result, hash, size, mtime)) in sync_extractions.iter().enumerate() {
on_progress(idx + 1, total, file_path);
total_nodes += result.nodes.len();
total_edges += result.edges.len();
self.db.delete_nodes_by_file(file_path).await?;
self.db.insert_nodes(&result.nodes).await?;
queued_edges.extend(&result.edges);
if !result.unresolved_refs.is_empty() {
self.db
.insert_unresolved_refs(&result.unresolved_refs)
.await?;
}
let file_record = FileRecord {
path: file_path.clone(),
content_hash: hash.clone(),
size: *size,
modified_at: *mtime,
indexed_at: current_timestamp(),
node_count: result.nodes.len() as u32,
};
self.db.upsert_file(&file_record).await?;
}
if !queued_edges.is_empty() {
let owned: Vec<Edge> = queued_edges.into_iter().cloned().collect();
self.db.insert_edges(&owned).await?;
}
if !to_index.is_empty() {
on_verbose(&format!(
"indexed {} files ({} nodes, {} edges) in {:.1}s",
to_index.len(),
total_nodes,
total_edges,
phase_start.elapsed().as_secs_f64()
));
}
if !to_index.is_empty() {
on_progress(0, 0, "resolving references");
let phase_start = Instant::now();
let unresolved = self.db.get_unresolved_refs().await?;
if !unresolved.is_empty() {
let all_nodes = self.db.get_all_nodes().await.unwrap_or_default();
let resolver = ReferenceResolver::from_nodes(&self.db, &all_nodes);
let resolution = resolver.resolve_all(&unresolved);
let edges = resolver.create_edges(&resolution.resolved);
if !edges.is_empty() {
self.db.insert_edges(&edges).await?;
}
}
on_verbose(&format!(
"resolved {} references in {:.1}s",
unresolved.len(),
phase_start.elapsed().as_secs_f64()
));
}
let duration_ms = start.elapsed().as_millis() as u64;
self.db
.set_metadata("last_sync_at", ¤t_timestamp().to_string())
.await?;
self.db
.set_metadata("last_sync_duration_ms", &duration_ms.to_string())
.await?;
clear_dirty_sentinel(&self.project_root);
Ok(SyncResult {
files_added: new_files.len(),
files_modified: stale.len(),
files_removed: removed.len(),
duration_ms,
added_paths: new_files,
modified_paths: stale,
skipped_paths: skipped,
removed_paths: removed,
})
}
fn scan_files(&self) -> Vec<String> {
debug_assert!(
self.project_root.is_dir(),
"scan_files: project_root is not a directory"
);
let supported_exts = self.registry.supported_extensions();
debug_assert!(
!supported_exts.is_empty(),
"scan_files: no supported extensions registered"
);
if self.config.git_ignore {
let files = self.scan_files_with_gitignore(&supported_exts);
if files.is_empty() {
let has_source = WalkDir::new(&self.project_root)
.follow_links(true)
.max_depth(2)
.into_iter()
.filter_map(std::result::Result::ok)
.any(|e| {
e.file_type().is_file()
&& e.path()
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| supported_exts.contains(&ext))
});
if has_source {
eprintln!("warning: gitignore-aware scan found no files; falling back to plain walk (project may be gitignored by parent repo)");
return self.scan_files_walkdir(&supported_exts);
}
}
files
} else {
self.scan_files_walkdir(&supported_exts)
}
}
fn scan_files_walkdir(&self, supported_exts: &[&str]) -> Vec<String> {
let mut files = Vec::new();
let root = &self.project_root;
let config = &self.config;
for entry in WalkDir::new(root)
.follow_links(true)
.into_iter()
.filter_entry(|e| {
if e.depth() == 0 {
return true;
}
let name = e.file_name().to_string_lossy();
if name.starts_with('.') || name == "target" {
if let Ok(rel) = e.path().strip_prefix(root) {
let rel_str = rel.to_string_lossy().replace('\\', "/");
return is_included(&rel_str, config);
}
return false;
}
if e.file_type().is_dir() {
if let Ok(rel) = e.path().strip_prefix(root) {
let rel_str = rel.to_string_lossy().replace('\\', "/");
if is_excluded_dir(&rel_str, config) {
return false;
}
}
}
true
})
{
let Ok(entry) = entry else { continue };
if !entry.file_type().is_file() {
continue;
}
if let Some(rel_str) = self.accept_file(entry.path(), supported_exts) {
files.push(rel_str);
}
}
files
}
fn scan_files_with_gitignore(&self, supported_exts: &[&str]) -> Vec<String> {
let has_includes = !self.config.include.is_empty();
let mut files = Vec::new();
let walker = ignore::WalkBuilder::new(&self.project_root)
.follow_links(true)
.hidden(!has_includes) .git_ignore(true)
.git_global(true)
.git_exclude(true)
.add_custom_ignore_filename(".gitignore")
.build();
for entry in walker {
let Ok(entry) = entry else { continue };
let Some(ft) = entry.file_type() else {
continue;
};
if has_includes && entry.depth() > 0 {
let name = entry.file_name().to_string_lossy();
if name.starts_with('.') {
if let Ok(rel) = entry.path().strip_prefix(&self.project_root) {
let rel_str = rel.to_string_lossy().replace('\\', "/");
if !is_included(&rel_str, &self.config) {
continue;
}
} else {
continue;
}
}
}
if !ft.is_file() {
continue;
}
if let Some(rel_str) = self.accept_file(entry.path(), supported_exts) {
files.push(rel_str);
}
}
files
}
fn accept_file(&self, path: &Path, supported_exts: &[&str]) -> Option<String> {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !supported_exts.contains(&ext) {
return None;
}
let relative = path.strip_prefix(&self.project_root).ok()?;
let rel_str = relative.to_string_lossy().replace('\\', "/");
if is_excluded(&rel_str, &self.config) {
return None;
}
let metadata = std::fs::metadata(path).ok()?;
if metadata.len() > self.config.max_file_size {
return None;
}
Some(rel_str)
}
fn resolve_path(&self, path: &str) -> Option<String> {
let path = Path::new(path);
if path.is_absolute() {
let relative = path.strip_prefix(&self.project_root).ok()?;
Some(relative.to_string_lossy().replace('\\', "/"))
} else {
Some(path.to_string_lossy().replace('\\', "/"))
}
}
fn absolute_path(&self, relative_path: &str) -> PathBuf {
self.project_root.join(relative_path)
}
async fn reindex_file(&self, file_path: &str) -> Result<()> {
let abs_path = self.absolute_path(file_path);
let source = std::fs::read_to_string(&abs_path).map_err(|e| TokenSaveError::Config {
message: format!("failed to read file {file_path}: {e}"),
})?;
let Some(extractor) = self.registry.extractor_for_file(file_path) else {
return Ok(());
};
let mut result =
safe_extract(extractor, file_path, &source).ok_or_else(|| TokenSaveError::Config {
message: format!("extraction panicked for {file_path}"),
})?;
result.sanitize();
let hash = sync::content_hash(&source);
let size = source.len() as u64;
let mtime = sync::file_stat(&abs_path).map_or_else(current_timestamp, |(m, _)| m);
self.db.delete_nodes_by_file(file_path).await?;
self.db.insert_nodes(&result.nodes).await?;
self.db.insert_edges(&result.edges).await?;
if !result.unresolved_refs.is_empty() {
self.db
.insert_unresolved_refs(&result.unresolved_refs)
.await?;
}
let file_record = FileRecord {
path: file_path.to_string(),
content_hash: hash,
size,
modified_at: mtime,
indexed_at: current_timestamp(),
node_count: result.nodes.len() as u32,
};
self.db.upsert_file(&file_record).await?;
Ok(())
}
pub async fn str_replace(
&self,
path: &str,
old_str: &str,
new_str: &str,
) -> Result<EditResult> {
let rel_path = self
.resolve_path(path)
.ok_or_else(|| TokenSaveError::Config {
message: "path is not within the project".to_string(),
})?;
let abs_path = self.absolute_path(&rel_path);
let source = std::fs::read_to_string(&abs_path).map_err(|e| TokenSaveError::Config {
message: format!("failed to read {path}: {e}"),
})?;
let matches: Vec<_> = source.match_indices(old_str).collect();
match matches.len() {
0 => {
return Ok(EditResult {
success: false,
file_path: rel_path.clone(),
matched_str: old_str.to_string(),
new_str: new_str.to_string(),
message: format!("old_str not found in {path}"),
})
}
1 => {}
n => {
return Ok(EditResult {
success: false,
file_path: rel_path.clone(),
matched_str: old_str.to_string(),
new_str: new_str.to_string(),
message: format!("old_str matches {n} times, must match exactly once"),
})
}
}
let modified = source.replacen(old_str, new_str, 1);
tokio::fs::write(&abs_path, &modified)
.await
.map_err(|e| TokenSaveError::Config {
message: format!("failed to write {path}: {e}"),
})?;
self.reindex_file(&rel_path).await?;
Ok(EditResult {
success: true,
file_path: rel_path,
matched_str: old_str.to_string(),
new_str: new_str.to_string(),
message: "replacement successful".to_string(),
})
}
pub async fn multi_str_replace(
&self,
path: &str,
replacements: &[(&str, &str)],
) -> Result<MultiEditResult> {
let rel_path = self
.resolve_path(path)
.ok_or_else(|| TokenSaveError::Config {
message: "path is not within the project".to_string(),
})?;
let abs_path = self.absolute_path(&rel_path);
let source = std::fs::read_to_string(&abs_path).map_err(|e| TokenSaveError::Config {
message: format!("failed to read {path}: {e}"),
})?;
for (old, _) in replacements {
let count = source.matches(old).count();
if count != 1 {
return Ok(MultiEditResult {
success: false,
file_path: rel_path.clone(),
applied_count: 0,
message: format!(
"replacement '{}' matches {} times, must match exactly once",
if old.len() > 20 { &old[..20] } else { old },
count
),
});
}
}
let mut modified = source;
for (old, new) in replacements {
modified = modified.replacen(old, new, 1);
}
tokio::fs::write(&abs_path, &modified)
.await
.map_err(|e| TokenSaveError::Config {
message: format!("failed to write {path}: {e}"),
})?;
self.reindex_file(&rel_path).await?;
Ok(MultiEditResult {
success: true,
file_path: rel_path,
applied_count: replacements.len(),
message: format!("applied {} replacements", replacements.len()),
})
}
pub async fn insert_at(
&self,
path: &str,
anchor: &str,
content: &str,
before: bool,
) -> Result<InsertResult> {
let rel_path = self
.resolve_path(path)
.ok_or_else(|| TokenSaveError::Config {
message: "path is not within the project".to_string(),
})?;
let abs_path = self.absolute_path(&rel_path);
let source = std::fs::read_to_string(&abs_path).map_err(|e| TokenSaveError::Config {
message: format!("failed to read {path}: {e}"),
})?;
let lines: Vec<&str> = source.lines().collect();
let anchor_line = if anchor.chars().all(|c| c.is_ascii_digit()) {
let line_num: usize = anchor.parse().map_err(|_| TokenSaveError::Config {
message: format!("invalid line number: {anchor}"),
})?;
if line_num == 0 || line_num > lines.len() {
return Ok(InsertResult {
success: false,
file_path: rel_path.clone(),
anchor_line: line_num as u32,
content: content.to_string(),
before,
message: format!(
"line number {line_num} out of range (file has {} lines)",
lines.len()
),
});
}
line_num - 1
} else {
let matching_lines: Vec<usize> = lines
.iter()
.enumerate()
.filter(|(_, line)| line.contains(&anchor[..anchor.len().min(100)]))
.map(|(i, _)| i)
.collect();
if matching_lines.is_empty() {
return Ok(InsertResult {
success: false,
file_path: rel_path.clone(),
anchor_line: 0,
content: content.to_string(),
before,
message: format!("anchor '{anchor}' not found"),
});
}
if matching_lines.len() > 1 {
return Ok(InsertResult {
success: false,
file_path: rel_path.clone(),
anchor_line: matching_lines.len() as u32,
content: content.to_string(),
before,
message: format!(
"anchor '{anchor}' matches {} lines, must match exactly one",
matching_lines.len()
),
});
}
matching_lines[0]
};
let insert_idx = if before { anchor_line } else { anchor_line + 1 };
let mut new_lines: Vec<&str> = lines[..insert_idx].to_vec();
new_lines.push(content);
new_lines.extend_from_slice(&lines[insert_idx..]);
let mut modified = new_lines.join("\n");
if source.ends_with('\n') {
modified.push('\n');
}
tokio::fs::write(&abs_path, &modified)
.await
.map_err(|e| TokenSaveError::Config {
message: format!("failed to write {path}: {e}"),
})?;
self.reindex_file(&rel_path).await?;
Ok(InsertResult {
success: true,
file_path: rel_path,
anchor_line: (anchor_line + 1) as u32,
content: content.to_string(),
before,
message: format!("inserted at line {}", anchor_line + 1),
})
}
pub async fn ast_grep_rewrite(
&self,
path: &str,
pattern: &str,
rewrite: &str,
) -> Result<AstGrepResult> {
use std::process::Command;
let rel_path = self
.resolve_path(path)
.ok_or_else(|| TokenSaveError::Config {
message: "path is not within the project".to_string(),
})?;
let abs_path = self.absolute_path(&rel_path);
let check_output = Command::new("ast-grep").args(["--version"]).output();
if check_output.is_err() {
return Ok(AstGrepResult {
success: false,
file_path: rel_path.clone(),
pattern: pattern.to_string(),
rewrite: rewrite.to_string(),
message: "ast-grep is not installed. Install with: cargo install ast-grep"
.to_string(),
});
}
let output = Command::new("ast-grep")
.args([
"run",
"-p",
pattern,
"-r",
rewrite,
"-d",
abs_path.to_string_lossy().as_ref(),
])
.output()
.map_err(|e| TokenSaveError::Config {
message: format!("failed to run ast-grep: {e}"),
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Ok(AstGrepResult {
success: false,
file_path: rel_path.clone(),
pattern: pattern.to_string(),
rewrite: rewrite.to_string(),
message: format!("ast-grep failed: {stderr}"),
});
}
self.reindex_file(&rel_path).await?;
Ok(AstGrepResult {
success: true,
file_path: rel_path,
pattern: pattern.to_string(),
rewrite: rewrite.to_string(),
message: "ast-grep rewrite completed".to_string(),
})
}
}
impl TokenSave {
pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
self.db.search_nodes(query, limit).await
}
pub async fn get_stats(&self) -> Result<GraphStats> {
self.db.get_stats().await
}
pub async fn get_node(&self, id: &str) -> Result<Option<Node>> {
self.db.get_node_by_id(id).await
}
pub async fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<(Node, Edge)>> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_callers(node_id, max_depth).await
}
pub async fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<(Node, Edge)>> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_callees(node_id, max_depth).await
}
pub async fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_impact_radius(node_id, max_depth).await
}
pub async fn get_call_graph(&self, node_id: &str, depth: usize) -> Result<Subgraph> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_call_graph(node_id, depth).await
}
pub async fn find_dead_code(&self, kinds: &[NodeKind]) -> Result<Vec<Node>> {
let qm = GraphQueryManager::new(&self.db);
qm.find_dead_code(kinds).await
}
pub async fn get_nodes_by_file(&self, file_path: &str) -> Result<Vec<Node>> {
self.db.get_nodes_by_file(file_path).await
}
pub async fn get_all_nodes(&self) -> Result<Vec<Node>> {
self.db.get_all_nodes().await
}
pub async fn get_incoming_edges(&self, node_id: &str) -> Result<Vec<Edge>> {
self.db.get_incoming_edges(node_id, &[]).await
}
pub async fn get_test_annotated_node_ids(
&self,
candidate_ids: &[String],
) -> Result<HashSet<String>> {
self.db.get_test_annotated_node_ids(candidate_ids).await
}
pub async fn get_files_with_test_annotations(&self) -> Result<HashSet<String>> {
self.db.get_files_with_test_annotations().await
}
pub async fn get_skip_test_coverage_node_ids(&self) -> Result<HashSet<String>> {
self.db.get_skip_test_coverage_node_ids().await
}
pub async fn get_incoming_edges_bulk(
&self,
target_ids: &[String],
kinds: &[EdgeKind],
) -> Result<Vec<Edge>> {
self.db.get_incoming_edges_bulk(target_ids, kinds).await
}
pub async fn get_nodes_by_qualified_name(&self, qname: &str) -> Result<Vec<Node>> {
self.db.get_nodes_by_qualified_name(qname).await
}
pub async fn get_outgoing_edges(&self, node_id: &str) -> Result<Vec<Edge>> {
self.db.get_outgoing_edges(node_id, &[]).await
}
pub async fn get_all_edges(&self) -> Result<Vec<Edge>> {
self.db.get_all_edges().await
}
pub async fn get_ranked_nodes_by_edge_kind(
&self,
edge_kind: &EdgeKind,
node_kind: Option<&NodeKind>,
incoming: bool,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<(Node, u64)>> {
self.db
.get_ranked_nodes_by_edge_kind(edge_kind, node_kind, incoming, path_prefix, limit)
.await
}
pub async fn get_largest_nodes(
&self,
node_kind: Option<&NodeKind>,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<(Node, u32)>> {
self.db
.get_largest_nodes(node_kind, path_prefix, limit)
.await
}
pub async fn get_file_coupling(
&self,
fan_in: bool,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<(String, u64)>> {
self.db.get_file_coupling(fan_in, path_prefix, limit).await
}
pub async fn get_inheritance_depth(
&self,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<(Node, u64)>> {
self.db.get_inheritance_depth(path_prefix, limit).await
}
pub async fn get_node_distribution(
&self,
path_prefix: Option<&str>,
) -> Result<Vec<(String, String, u64)>> {
self.db.get_node_distribution(path_prefix).await
}
pub async fn get_call_edges(&self, path_prefix: Option<&str>) -> Result<Vec<(String, String)>> {
self.db.get_call_edges(path_prefix).await
}
pub async fn get_complexity_ranked(
&self,
node_kind: Option<&NodeKind>,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<(Node, u32, u64, u64, u64)>> {
self.db
.get_complexity_ranked(node_kind, path_prefix, limit)
.await
}
pub async fn get_undocumented_public_symbols(
&self,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<Node>> {
self.db
.get_undocumented_public_symbols(path_prefix, limit)
.await
}
pub async fn get_god_classes(
&self,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<(Node, u64, u64, u64)>> {
self.db.get_god_classes(path_prefix, limit).await
}
pub async fn find_circular_dependencies(&self) -> Result<Vec<Vec<String>>> {
let qm = GraphQueryManager::new(&self.db);
qm.find_circular_dependencies().await
}
pub async fn build_context(
&self,
task: &str,
options: &BuildContextOptions,
) -> Result<TaskContext> {
let builder = ContextBuilder::new(&self.db, &self.project_root);
builder.build_context(task, options).await
}
pub async fn get_all_files(&self) -> Result<Vec<FileRecord>> {
self.db.get_all_files().await
}
pub async fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
let qm = GraphQueryManager::new(&self.db);
qm.get_file_dependents(file_path).await
}
pub async fn get_file_token_map(&self) -> Result<HashMap<String, u64>> {
let files = self.db.get_all_files().await?;
Ok(files.into_iter().map(|f| (f.path, f.size / 4)).collect())
}
pub async fn get_tokens_saved(&self) -> Result<u64> {
match self.db.get_metadata("tokens_saved").await? {
Some(v) => Ok(v.parse::<u64>().unwrap_or(0)),
None => Ok(0),
}
}
pub async fn set_tokens_saved(&self, value: u64) -> Result<()> {
self.db
.set_metadata("tokens_saved", &value.to_string())
.await
}
pub async fn get_local_counter(&self) -> Result<u64> {
match self.db.get_metadata("local_counter").await? {
Some(v) => Ok(v.parse::<u64>().unwrap_or(0)),
None => Ok(0),
}
}
pub async fn reset_local_counter(&self) -> Result<()> {
self.db.set_metadata("local_counter", "0").await
}
pub async fn add_local_counter(&self, delta: u64) -> Result<()> {
let current = self.get_local_counter().await?;
self.db
.set_metadata("local_counter", &(current + delta).to_string())
.await
}
pub async fn get_nodes_by_dir(&self, dir: &str, kinds: &[NodeKind]) -> Result<Vec<Node>> {
self.db.get_nodes_by_dir(dir, kinds).await
}
pub async fn get_internal_edges(&self, node_ids: &[String]) -> Result<Vec<Edge>> {
self.db.get_internal_edges(node_ids).await
}
pub async fn checkpoint(&self) -> Result<()> {
self.db.checkpoint().await
}
pub async fn optimize(&self) -> Result<()> {
self.db.optimize().await
}
pub fn get_config(&self) -> &TokenSaveConfig {
&self.config
}
pub fn project_root(&self) -> &Path {
&self.project_root
}
pub fn active_branch(&self) -> Option<&str> {
self.active_branch.as_deref()
}
pub fn serving_branch(&self) -> Option<&str> {
self.serving_branch.as_deref()
}
pub fn fallback_warning(&self) -> Option<&str> {
self.fallback_warning.as_deref()
}
pub fn is_fallback(&self) -> bool {
self.fallback_warning.is_some()
}
}
impl TokenSave {
pub async fn check_file_staleness(&self, file_paths: &[String]) -> Vec<String> {
let mut stale = Vec::new();
for path in file_paths {
if let Ok(Some(record)) = self.db.get_file(path).await {
let abs_path = self.project_root.join(path);
if let Ok(metadata) = std::fs::metadata(&abs_path) {
if let Ok(mtime) = metadata.modified() {
let mtime_secs = mtime
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
if mtime_secs > record.indexed_at {
stale.push(path.clone());
}
}
}
}
}
stale
}
pub async fn last_index_time(&self) -> Result<i64> {
self.db.last_index_time().await
}
pub fn git_commits_since(&self, since_timestamp: i64) -> usize {
let Ok(repo) = gix::open(&self.project_root) else {
return 0;
};
let Ok(head) = repo.head_commit() else {
return 0;
};
let sorting = gix::revision::walk::Sorting::ByCommitTimeCutoff {
order: gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
seconds: since_timestamp,
};
let Ok(walk) = head.ancestors().sorting(sorting).all() else {
return 0;
};
walk.filter_map(std::result::Result::ok).count()
}
}
const MAX_RECALL_LIMIT: usize = 200;
const MAX_CODE_AREAS_LIMIT: usize = 200;
impl TokenSave {
pub async fn record_decision(
&self,
text: &str,
reason: Option<&str>,
files: &[String],
tags: &[String],
) -> crate::errors::Result<i64> {
debug_assert!(!text.is_empty(), "decision text must not be empty");
let files_json = serde_json::to_string(files).map_err(|e| crate::errors::TokenSaveError::Database {
message: format!("record_decision files serialization failed: {e}"),
operation: "record_decision".to_string(),
})?;
let tags_json = serde_json::to_string(tags).map_err(|e| crate::errors::TokenSaveError::Database {
message: format!("record_decision tags serialization failed: {e}"),
operation: "record_decision".to_string(),
})?;
let now = current_timestamp();
let conn = self.db.conn();
conn.execute(
"INSERT INTO memory_decisions (text, reason, created_at, files, tags) \
VALUES (?1, ?2, ?3, ?4, ?5)",
libsql::params![text, reason, now, files_json, tags_json],
)
.await
.map_err(|e| crate::errors::TokenSaveError::Database {
message: format!("record_decision insert failed: {e}"),
operation: "record_decision".to_string(),
})?;
Ok(conn.last_insert_rowid())
}
pub async fn session_recall(
&self,
query: Option<&str>,
since: Option<i64>,
limit: usize,
) -> crate::errors::Result<Vec<DecisionRecord>> {
let limit = limit.clamp(1, MAX_RECALL_LIMIT) as i64;
let conn = self.db.conn();
let db_err = |e: libsql::Error| crate::errors::TokenSaveError::Database {
message: format!("session_recall query failed: {e}"),
operation: "session_recall".to_string(),
};
let mut rows = match (query, since) {
(Some(q), Some(ts)) => {
conn.query(
"SELECT d.id, d.text, d.reason, d.created_at, d.files, d.tags \
FROM memory_decisions d \
JOIN memory_decisions_fts f ON f.rowid = d.id \
WHERE memory_decisions_fts MATCH ?1 AND d.created_at >= ?2 \
ORDER BY d.created_at DESC LIMIT ?3",
libsql::params![q, ts, limit],
)
.await
.map_err(db_err)?
}
(Some(q), None) => {
conn.query(
"SELECT d.id, d.text, d.reason, d.created_at, d.files, d.tags \
FROM memory_decisions d \
JOIN memory_decisions_fts f ON f.rowid = d.id \
WHERE memory_decisions_fts MATCH ?1 \
ORDER BY d.created_at DESC LIMIT ?2",
libsql::params![q, limit],
)
.await
.map_err(db_err)?
}
(None, Some(ts)) => {
conn.query(
"SELECT id, text, reason, created_at, files, tags \
FROM memory_decisions WHERE created_at >= ?1 \
ORDER BY created_at DESC LIMIT ?2",
libsql::params![ts, limit],
)
.await
.map_err(db_err)?
}
(None, None) => {
conn.query(
"SELECT id, text, reason, created_at, files, tags \
FROM memory_decisions ORDER BY created_at DESC LIMIT ?1",
libsql::params![limit],
)
.await
.map_err(db_err)?
}
};
let row_err = |e: libsql::Error| crate::errors::TokenSaveError::Database {
message: format!("session_recall row read failed: {e}"),
operation: "session_recall".to_string(),
};
let json_err = |e: serde_json::Error| crate::errors::TokenSaveError::Database {
message: format!("session_recall JSON parse failed: {e}"),
operation: "session_recall".to_string(),
};
let mut out = Vec::new();
while let Some(row) = rows.next().await.map_err(row_err)? {
let files_json: String = row.get(4).map_err(row_err)?;
let tags_json: String = row.get(5).map_err(row_err)?;
out.push(DecisionRecord {
id: row.get(0).map_err(row_err)?,
text: row.get(1).map_err(row_err)?,
reason: row.get::<Option<String>>(2).map_err(row_err)?,
created_at: row.get(3).map_err(row_err)?,
files: serde_json::from_str(&files_json).map_err(json_err)?,
tags: serde_json::from_str(&tags_json).map_err(json_err)?,
});
}
Ok(out)
}
pub async fn record_code_area(
&self,
path: &str,
description: Option<&str>,
) -> crate::errors::Result<()> {
debug_assert!(!path.is_empty(), "code area path must not be empty");
let now = current_timestamp();
let conn = self.db.conn();
conn.execute(
"INSERT INTO memory_code_areas (path, description, last_touched_at, touch_count) \
VALUES (?1, ?2, ?3, 1) \
ON CONFLICT(path) DO UPDATE SET \
description = COALESCE(excluded.description, memory_code_areas.description), \
last_touched_at = excluded.last_touched_at, \
touch_count = memory_code_areas.touch_count + 1",
libsql::params![path, description, now],
)
.await
.map_err(|e| crate::errors::TokenSaveError::Database {
message: format!("record_code_area upsert failed: {e}"),
operation: "record_code_area".to_string(),
})?;
Ok(())
}
pub async fn list_code_areas(&self, limit: usize) -> crate::errors::Result<Vec<CodeAreaRecord>> {
let limit = limit.clamp(1, MAX_CODE_AREAS_LIMIT) as i64;
let conn = self.db.conn();
let mut rows = conn
.query(
"SELECT id, path, description, last_touched_at, touch_count \
FROM memory_code_areas ORDER BY last_touched_at DESC LIMIT ?1",
libsql::params![limit],
)
.await
.map_err(|e| crate::errors::TokenSaveError::Database {
message: format!("list_code_areas query failed: {e}"),
operation: "list_code_areas".to_string(),
})?;
let row_err = |e: libsql::Error| crate::errors::TokenSaveError::Database {
message: format!("list_code_areas row read failed: {e}"),
operation: "list_code_areas".to_string(),
};
let mut out = Vec::new();
while let Some(row) = rows.next().await.map_err(row_err)? {
out.push(CodeAreaRecord {
id: row.get(0).map_err(row_err)?,
path: row.get(1).map_err(row_err)?,
description: row.get::<Option<String>>(2).map_err(row_err)?,
last_touched_at: row.get(3).map_err(row_err)?,
touch_count: row.get::<i64>(4).map_err(row_err)? as u32,
});
}
Ok(out)
}
}
pub fn is_test_file(path: &str) -> bool {
let test_segments = [
"test/",
"tests/",
"__tests__/",
"spec/",
"e2e/",
".test.",
".spec.",
"_test.",
"_spec.",
];
let lower = path.to_ascii_lowercase();
test_segments.iter().any(|s| lower.contains(s))
}