use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::Instant;
use rayon::prelude::*;
use walkdir::WalkDir;
use crate::config::{get_tokensave_dir, is_excluded, load_config, save_config, TokenSaveConfig};
use crate::context::ContextBuilder;
use crate::db::Database;
use crate::errors::{TokenSaveError, Result};
use crate::extraction::LanguageRegistry;
use crate::graph::{GraphQueryManager, GraphTraverser};
use crate::resolution::ReferenceResolver;
use crate::sync;
use crate::types::*;
pub struct TokenSave {
db: Database,
config: TokenSaveConfig,
project_root: PathBuf,
registry: LanguageRegistry,
}
pub struct IndexResult {
pub file_count: usize,
pub node_count: usize,
pub edge_count: usize,
pub duration_ms: u64,
}
#[derive(Debug)]
pub struct SyncResult {
pub files_added: usize,
pub files_modified: usize,
pub files_removed: usize,
pub duration_ms: u64,
}
fn current_timestamp() -> i64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64
}
impl TokenSave {
pub async fn init(project_root: &Path) -> Result<Self> {
let config = TokenSaveConfig {
root_dir: project_root.to_string_lossy().to_string(),
..TokenSaveConfig::default()
};
save_config(project_root, &config)?;
let db_path = get_tokensave_dir(project_root).join("tokensave.db");
let (db, _migrated) = Database::initialize(&db_path).await?;
Ok(Self {
db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
})
}
pub async fn open(project_root: &Path) -> Result<Self> {
let config = load_config(project_root)?;
let db_path = get_tokensave_dir(project_root).join("tokensave.db");
if !db_path.exists() {
return Err(TokenSaveError::Config {
message: format!(
"no TokenSave database found at '{}'; run 'tokensave sync' first",
db_path.display()
),
});
}
let (db, migrated) = Database::open(&db_path).await?;
let ts = Self {
db,
config,
project_root: project_root.to_path_buf(),
registry: LanguageRegistry::new(),
};
if migrated {
eprintln!("[tokensave] schema changed — performing full re-index…");
ts.index_all_with_progress(|current, total, file| {
eprintln!("[tokensave] re-indexing [{current}/{total}] {file}");
}).await?;
eprintln!("[tokensave] re-index complete.");
}
Ok(ts)
}
pub fn is_initialized(project_root: &Path) -> bool {
get_tokensave_dir(project_root)
.join("tokensave.db")
.exists()
}
}
struct SyncLockGuard {
path: PathBuf,
}
impl Drop for SyncLockGuard {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
}
}
fn try_acquire_sync_lock(project_root: &Path) -> Result<SyncLockGuard> {
let lock_path = get_tokensave_dir(project_root).join("sync.lock");
let pid = std::process::id();
match std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&lock_path)
{
Ok(mut f) => {
use std::io::Write;
let _ = write!(f, "{pid}");
return Ok(SyncLockGuard { path: lock_path });
}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
}
Err(e) => {
return Err(TokenSaveError::SyncLock {
message: format!("could not create lockfile: {e}"),
});
}
}
let contents = std::fs::read_to_string(&lock_path).unwrap_or_default();
if let Ok(existing_pid) = contents.trim().parse::<u32>() {
if is_pid_alive(existing_pid) {
return Err(TokenSaveError::SyncLock {
message: format!(
"another sync is already in progress (PID {existing_pid}). \
If this is stale, remove {}",
lock_path.display()
),
});
}
}
let _ = std::fs::remove_file(&lock_path);
let mut f = std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&lock_path)
.map_err(|e| TokenSaveError::SyncLock {
message: format!("could not reclaim lockfile: {e}"),
})?;
use std::io::Write;
let _ = write!(f, "{pid}");
Ok(SyncLockGuard { path: lock_path })
}
fn is_pid_alive(pid: u32) -> bool {
std::process::Command::new("kill")
.args(["-0", &pid.to_string()])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
impl TokenSave {
pub fn add_skip_folders(&mut self, folders: &[String]) {
for folder in folders {
self.config.exclude.push(format!("{folder}/**"));
}
}
pub async fn index_all(&self) -> Result<IndexResult> {
self.index_all_with_progress(|_, _, _| {}).await
}
pub async fn index_all_with_progress<F>(&self, on_file: F) -> Result<IndexResult>
where
F: Fn(usize, usize, &str),
{
debug_assert!(self.project_root.exists(), "project root does not exist");
debug_assert!(self.project_root.is_dir(), "project root is not a directory");
let _lock = try_acquire_sync_lock(&self.project_root)?;
let start = Instant::now();
self.db.clear().await?;
self.db.begin_bulk_load().await?;
let files = self.scan_files()?;
let total = files.len();
let project_root = &self.project_root;
let registry = &self.registry;
let extractions: Vec<_> = files
.par_iter()
.filter_map(|file_path| {
let abs_path = project_root.join(file_path);
let source = std::fs::read_to_string(&abs_path).ok()?;
let extractor = registry.extractor_for_file(file_path)?;
let result = extractor.extract(file_path, &source);
let hash = sync::content_hash(&source);
let size = source.len() as u64;
Some((file_path.clone(), result, hash, size))
})
.collect();
let mut all_nodes = Vec::new();
let mut all_edges = Vec::new();
let mut all_unresolved = Vec::new();
let mut file_records = Vec::new();
let mut total_nodes = 0;
let total_edges;
for (idx, (file_path, result, hash, size)) in extractions.iter().enumerate() {
on_file(idx + 1, total, file_path);
total_nodes += result.nodes.len();
all_nodes.extend_from_slice(&result.nodes);
all_edges.extend_from_slice(&result.edges);
all_unresolved.extend_from_slice(&result.unresolved_refs);
file_records.push(FileRecord {
path: file_path.clone(),
content_hash: hash.clone(),
size: *size,
modified_at: current_timestamp(),
indexed_at: current_timestamp(),
node_count: result.nodes.len() as u32,
});
}
if !all_unresolved.is_empty() {
let resolver = ReferenceResolver::from_nodes(&self.db, &all_nodes);
let resolution = resolver.resolve_all(&all_unresolved);
all_edges.extend(resolver.create_edges(&resolution.resolved));
}
all_nodes.sort_unstable_by(|a, b| a.id.cmp(&b.id));
all_edges.sort_unstable_by(|a, b| {
(&a.source, &a.target, a.kind.as_str(), &a.line)
.cmp(&(&b.source, &b.target, b.kind.as_str(), &b.line))
});
all_edges.dedup_by(|a, b| {
a.source == b.source && a.target == b.target && a.kind == b.kind && a.line == b.line
});
file_records.sort_unstable_by(|a, b| a.path.cmp(&b.path));
total_edges = all_edges.len();
self.db.insert_nodes(&all_nodes).await?;
self.db.insert_edges(&all_edges).await?;
self.db.upsert_files(&file_records).await?;
self.db.end_bulk_load().await?;
let now_str = current_timestamp().to_string();
self.db.set_metadata("last_full_sync_at", &now_str).await?;
self.db.set_metadata("last_sync_at", &now_str).await?;
let result = IndexResult {
file_count: files.len(),
node_count: total_nodes,
edge_count: total_edges,
duration_ms: start.elapsed().as_millis() as u64,
};
debug_assert!(result.node_count >= result.file_count || result.file_count == 0,
"fewer nodes than files is unexpected");
debug_assert!(result.duration_ms > 0 || result.file_count == 0,
"non-empty index completed in zero milliseconds");
Ok(result)
}
pub async fn sync(&self) -> Result<SyncResult> {
self.sync_with_progress(|_, _| {}).await
}
pub async fn sync_with_progress<F>(&self, on_progress: F) -> Result<SyncResult>
where
F: Fn(&str, &str),
{
debug_assert!(self.project_root.exists(), "sync: project root does not exist");
debug_assert!(self.project_root.is_dir(), "sync: project root is not a directory");
let _lock = try_acquire_sync_lock(&self.project_root)?;
let start = Instant::now();
on_progress("scanning files", "");
let current_files = self.scan_files()?;
on_progress("hashing files", "");
let project_root = &self.project_root;
let current_hashes: Vec<_> = current_files
.par_iter()
.filter_map(|path| {
let abs_path = project_root.join(path);
let source = std::fs::read_to_string(&abs_path).ok()?;
Some((path.clone(), sync::content_hash(&source)))
})
.collect();
on_progress("detecting changes", "");
let stale = sync::find_stale_files(&self.db, ¤t_hashes).await?;
let new = sync::find_new_files(&self.db, ¤t_files).await?;
let removed = sync::find_removed_files(&self.db, ¤t_files).await?;
for path in &removed {
on_progress("removing", path);
self.db.delete_file(path).await?;
}
let to_index: Vec<String> = stale.iter().chain(new.iter()).cloned().collect();
let registry = &self.registry;
let sync_extractions: Vec<_> = to_index
.par_iter()
.filter_map(|file_path| {
let abs_path = project_root.join(file_path);
let source = std::fs::read_to_string(&abs_path).ok()?;
let extractor = registry.extractor_for_file(file_path)?;
let result = extractor.extract(file_path, &source);
let hash = sync::content_hash(&source);
let size = source.len() as u64;
Some((file_path.clone(), result, hash, size))
})
.collect();
for (file_path, result, hash, size) in &sync_extractions {
on_progress("syncing", file_path);
self.db.delete_nodes_by_file(file_path).await?;
self.db.insert_nodes(&result.nodes).await?;
self.db.insert_edges(&result.edges).await?;
if !result.unresolved_refs.is_empty() {
self.db.insert_unresolved_refs(&result.unresolved_refs).await?;
}
let file_record = FileRecord {
path: file_path.clone(),
content_hash: hash.clone(),
size: *size,
modified_at: current_timestamp(),
indexed_at: current_timestamp(),
node_count: result.nodes.len() as u32,
};
self.db.upsert_file(&file_record).await?;
}
if !to_index.is_empty() {
on_progress("resolving references", "");
let unresolved = self.db.get_unresolved_refs().await?;
if !unresolved.is_empty() {
let resolver = ReferenceResolver::new(&self.db).await;
let resolution = resolver.resolve_all(&unresolved);
let edges = resolver.create_edges(&resolution.resolved);
if !edges.is_empty() {
self.db.insert_edges(&edges).await?;
}
}
}
self.db
.set_metadata("last_sync_at", ¤t_timestamp().to_string())
.await?;
Ok(SyncResult {
files_added: new.len(),
files_modified: stale.len(),
files_removed: removed.len(),
duration_ms: start.elapsed().as_millis() as u64,
})
}
fn scan_files(&self) -> Result<Vec<String>> {
debug_assert!(self.project_root.is_dir(), "scan_files: project_root is not a directory");
let supported_exts = self.registry.supported_extensions();
debug_assert!(!supported_exts.is_empty(), "scan_files: no supported extensions registered");
if self.config.git_ignore {
self.scan_files_with_gitignore(&supported_exts)
} else {
self.scan_files_walkdir(&supported_exts)
}
}
fn scan_files_walkdir(
&self,
supported_exts: &[&str],
) -> Result<Vec<String>> {
let mut files = Vec::new();
for entry in WalkDir::new(&self.project_root)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
if e.depth() == 0 {
return true;
}
let name = e.file_name().to_string_lossy();
!name.starts_with('.') && name != "target"
})
{
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
if !entry.file_type().is_file() {
continue;
}
if let Some(rel_str) = self.accept_file(entry.path(), supported_exts) {
files.push(rel_str);
}
}
Ok(files)
}
fn scan_files_with_gitignore(
&self,
supported_exts: &[&str],
) -> Result<Vec<String>> {
let mut files = Vec::new();
let walker = ignore::WalkBuilder::new(&self.project_root)
.follow_links(false)
.hidden(true) .git_ignore(true)
.git_global(true)
.git_exclude(true)
.build();
for entry in walker {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
let Some(ft) = entry.file_type() else {
continue;
};
if !ft.is_file() {
continue;
}
if let Some(rel_str) = self.accept_file(entry.path(), supported_exts) {
files.push(rel_str);
}
}
Ok(files)
}
fn accept_file(
&self,
path: &Path,
supported_exts: &[&str],
) -> Option<String> {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if !supported_exts.contains(&ext) {
return None;
}
let relative = path.strip_prefix(&self.project_root).ok()?;
let rel_str = relative.to_string_lossy().replace('\\', "/");
if is_excluded(&rel_str, &self.config) {
return None;
}
let metadata = std::fs::metadata(path).ok()?;
if metadata.len() > self.config.max_file_size {
return None;
}
Some(rel_str)
}
}
impl TokenSave {
pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
self.db.search_nodes(query, limit).await
}
pub async fn get_stats(&self) -> Result<GraphStats> {
self.db.get_stats().await
}
pub async fn get_node(&self, id: &str) -> Result<Option<Node>> {
self.db.get_node_by_id(id).await
}
pub async fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<(Node, Edge)>> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_callers(node_id, max_depth).await
}
pub async fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<(Node, Edge)>> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_callees(node_id, max_depth).await
}
pub async fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
let traverser = GraphTraverser::new(&self.db);
traverser.get_impact_radius(node_id, max_depth).await
}
pub async fn find_dead_code(&self, kinds: &[NodeKind]) -> Result<Vec<Node>> {
let qm = GraphQueryManager::new(&self.db);
qm.find_dead_code(kinds).await
}
pub async fn get_nodes_by_file(&self, file_path: &str) -> Result<Vec<Node>> {
self.db.get_nodes_by_file(file_path).await
}
pub async fn get_all_nodes(&self) -> Result<Vec<Node>> {
self.db.get_all_nodes().await
}
pub async fn get_incoming_edges(&self, node_id: &str) -> Result<Vec<Edge>> {
self.db.get_incoming_edges(node_id, &[]).await
}
pub async fn get_outgoing_edges(&self, node_id: &str) -> Result<Vec<Edge>> {
self.db.get_outgoing_edges(node_id, &[]).await
}
pub async fn get_all_edges(&self) -> Result<Vec<Edge>> {
self.db.get_all_edges().await
}
pub async fn get_ranked_nodes_by_edge_kind(
&self,
edge_kind: &EdgeKind,
node_kind: Option<&NodeKind>,
incoming: bool,
limit: usize,
) -> Result<Vec<(Node, u64)>> {
self.db
.get_ranked_nodes_by_edge_kind(edge_kind, node_kind, incoming, limit)
.await
}
pub async fn get_largest_nodes(
&self,
node_kind: Option<&NodeKind>,
limit: usize,
) -> Result<Vec<(Node, u32)>> {
self.db.get_largest_nodes(node_kind, limit).await
}
pub async fn get_file_coupling(
&self,
fan_in: bool,
limit: usize,
) -> Result<Vec<(String, u64)>> {
self.db.get_file_coupling(fan_in, limit).await
}
pub async fn get_inheritance_depth(&self, limit: usize) -> Result<Vec<(Node, u64)>> {
self.db.get_inheritance_depth(limit).await
}
pub async fn get_node_distribution(
&self,
path_prefix: Option<&str>,
) -> Result<Vec<(String, String, u64)>> {
self.db.get_node_distribution(path_prefix).await
}
pub async fn get_call_edges(&self) -> Result<Vec<(String, String)>> {
self.db.get_call_edges().await
}
pub async fn get_complexity_ranked(
&self,
node_kind: Option<&NodeKind>,
limit: usize,
) -> Result<Vec<(Node, u32, u64, u64, u64)>> {
self.db.get_complexity_ranked(node_kind, limit).await
}
pub async fn get_undocumented_public_symbols(
&self,
path_prefix: Option<&str>,
limit: usize,
) -> Result<Vec<Node>> {
self.db
.get_undocumented_public_symbols(path_prefix, limit)
.await
}
pub async fn get_god_classes(&self, limit: usize) -> Result<Vec<(Node, u64, u64, u64)>> {
self.db.get_god_classes(limit).await
}
pub async fn find_circular_dependencies(&self) -> Result<Vec<Vec<String>>> {
let qm = GraphQueryManager::new(&self.db);
qm.find_circular_dependencies().await
}
pub async fn build_context(&self, task: &str, options: &BuildContextOptions) -> Result<TaskContext> {
let builder = ContextBuilder::new(&self.db, &self.project_root);
builder.build_context(task, options).await
}
pub async fn get_all_files(&self) -> Result<Vec<FileRecord>> {
self.db.get_all_files().await
}
pub async fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
let qm = GraphQueryManager::new(&self.db);
qm.get_file_dependents(file_path).await
}
pub async fn get_file_token_map(&self) -> Result<HashMap<String, u64>> {
let files = self.db.get_all_files().await?;
Ok(files.into_iter().map(|f| (f.path, f.size / 4)).collect())
}
pub async fn get_tokens_saved(&self) -> Result<u64> {
match self.db.get_metadata("tokens_saved").await? {
Some(v) => Ok(v.parse::<u64>().unwrap_or(0)),
None => Ok(0),
}
}
pub async fn set_tokens_saved(&self, value: u64) -> Result<()> {
self.db
.set_metadata("tokens_saved", &value.to_string())
.await
}
pub async fn get_nodes_by_dir(&self, dir: &str, kinds: &[NodeKind]) -> Result<Vec<Node>> {
self.db.get_nodes_by_dir(dir, kinds).await
}
pub async fn get_internal_edges(&self, node_ids: &[String]) -> Result<Vec<Edge>> {
self.db.get_internal_edges(node_ids).await
}
pub async fn checkpoint(&self) -> Result<()> {
self.db.checkpoint().await
}
pub async fn optimize(&self) -> Result<()> {
self.db.optimize().await
}
pub fn get_config(&self) -> &TokenSaveConfig {
&self.config
}
pub fn project_root(&self) -> &Path {
&self.project_root
}
}
impl TokenSave {
pub async fn check_file_staleness(&self, file_paths: &[String]) -> Vec<String> {
let mut stale = Vec::new();
for path in file_paths {
if let Ok(Some(record)) = self.db.get_file(path).await {
let abs_path = self.project_root.join(path);
if let Ok(metadata) = std::fs::metadata(&abs_path) {
if let Ok(mtime) = metadata.modified() {
let mtime_secs = mtime
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() as i64;
if mtime_secs > record.indexed_at {
stale.push(path.clone());
}
}
}
}
}
stale
}
pub async fn last_index_time(&self) -> Result<i64> {
self.db.last_index_time().await
}
pub fn git_commits_since(&self, since_timestamp: i64) -> usize {
let repo = match gix::open(&self.project_root) {
Ok(r) => r,
Err(_) => return 0,
};
let head = match repo.head_commit() {
Ok(h) => h,
Err(_) => return 0,
};
let sorting = gix::revision::walk::Sorting::ByCommitTimeCutoff {
order: gix::traverse::commit::simple::CommitTimeOrder::NewestFirst,
seconds: since_timestamp,
};
let walk = match head.ancestors().sorting(sorting).all() {
Ok(w) => w,
Err(_) => return 0,
};
walk.filter_map(|r| r.ok()).count()
}
}
pub fn is_test_file(path: &str) -> bool {
let test_segments = [
"test/", "tests/", "__tests__/", "spec/", "e2e/",
".test.", ".spec.", "_test.", "_spec.",
];
let lower = path.to_ascii_lowercase();
test_segments.iter().any(|s| lower.contains(s))
}