use std::fs::{self, File};
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use fs2::FileExt;
use serde::{Deserialize, Serialize};
use xxhash_rust::xxh3::xxh3_64;
const STATE_FILE: &str = "state.json";
const PROJECT_FILE: &str = "project.json";
const INDEX_SUBDIR: &str = "index";
const LOCK_FILE: &str = ".lock";
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectMetadata {
pub project_path: PathBuf,
pub project_name: String,
#[serde(default)]
pub model: Option<String>,
}
impl ProjectMetadata {
pub fn new(project_path: &Path, model: &str) -> Self {
let project_name = project_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "project".to_string());
Self {
project_path: project_path.to_path_buf(),
project_name,
model: Some(model.to_string()),
}
}
pub fn load(index_dir: &Path) -> Result<Self> {
let path = index_dir.join(PROJECT_FILE);
let content = fs::read_to_string(&path)
.with_context(|| format!("Failed to read {}", path.display()))?;
Ok(serde_json::from_str(&content)?)
}
pub fn save(&self, index_dir: &Path) -> Result<()> {
fs::create_dir_all(index_dir)?;
let path = index_dir.join(PROJECT_FILE);
let content = serde_json::to_string_pretty(self)?;
fs::write(&path, content)?;
Ok(())
}
}
pub fn get_colgrep_data_dir() -> Result<PathBuf> {
if let Ok(env_dir) = std::env::var("COLGREP_DATA_DIR") {
if !env_dir.is_empty() {
return Ok(PathBuf::from(env_dir));
}
}
let data_dir = dirs::data_dir().context("Could not determine data directory")?;
Ok(data_dir.join("colgrep").join("indices"))
}
fn compute_index_dir_name(project_path: &Path, model: &str) -> String {
let path_str = project_path.to_string_lossy();
let mut hasher_input = Vec::with_capacity(path_str.len() + 1 + model.len());
hasher_input.extend_from_slice(path_str.as_bytes());
hasher_input.push(b'|');
hasher_input.extend_from_slice(model.as_bytes());
let hash = xxh3_64(&hasher_input);
let hash_prefix = format!("{:08x}", hash).chars().take(8).collect::<String>();
let project_name = project_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "project".to_string());
let sanitized_name: String = project_name
.chars()
.map(|c| {
if c.is_alphanumeric() || c == '-' || c == '_' {
c
} else {
'_'
}
})
.collect();
format!("{}-{}", sanitized_name, hash_prefix)
}
pub fn get_index_dir_for_project(project_path: &Path, model: &str) -> Result<PathBuf> {
let base_dir = get_colgrep_data_dir()?;
let dir_name = compute_index_dir_name(project_path, model);
Ok(base_dir.join(dir_name))
}
pub fn find_index_for_project(project_path: &Path, model: &str) -> Result<Option<PathBuf>> {
let index_dir = get_index_dir_for_project(project_path, model)?;
let metadata_path = index_dir.join(INDEX_SUBDIR).join("metadata.json");
if metadata_path.exists() {
if let Ok(meta) = ProjectMetadata::load(&index_dir) {
if meta.project_path == project_path {
match meta.model.as_deref() {
Some(m) if m == model => return Ok(Some(index_dir)),
None => return Ok(Some(index_dir)),
_ => return Ok(None),
}
}
}
return Ok(Some(index_dir));
}
Ok(None)
}
pub fn index_exists(project_path: &Path, model: &str) -> bool {
matches!(find_index_for_project(project_path, model), Ok(Some(_)))
}
#[derive(Debug, Clone)]
pub struct ParentIndexInfo {
pub index_dir: PathBuf,
pub project_path: PathBuf,
pub relative_subdir: PathBuf,
}
pub fn find_parent_index(search_path: &Path, model: &str) -> Result<Option<ParentIndexInfo>> {
let data_dir = get_colgrep_data_dir()?;
if !data_dir.exists() {
return Ok(None);
}
let mut best_match: Option<ParentIndexInfo> = None;
let mut best_depth = 0;
for entry in fs::read_dir(&data_dir)?.filter_map(|e| e.ok()) {
let index_dir = entry.path();
if !index_dir.is_dir() {
continue;
}
if let Ok(meta) = ProjectMetadata::load(&index_dir) {
if meta.model.as_deref() != Some(model) {
continue;
}
if search_path != meta.project_path {
if let Ok(relative) = search_path.strip_prefix(&meta.project_path) {
let depth = meta.project_path.components().count();
if depth > best_depth {
best_depth = depth;
best_match = Some(ParentIndexInfo {
index_dir,
project_path: meta.project_path,
relative_subdir: relative.to_path_buf(),
});
}
}
}
}
}
Ok(best_match)
}
pub fn get_state_path(index_dir: &Path) -> PathBuf {
index_dir.join(STATE_FILE)
}
pub fn get_vector_index_path(index_dir: &Path) -> PathBuf {
index_dir.join(INDEX_SUBDIR)
}
pub fn get_lock_path(index_dir: &Path) -> PathBuf {
index_dir.join(LOCK_FILE)
}
pub fn try_acquire_index_lock(index_dir: &Path) -> Result<Option<File>> {
fs::create_dir_all(index_dir)?;
let lock_path = get_lock_path(index_dir);
let lock_file = File::create(&lock_path)
.with_context(|| format!("Failed to create lock file at {}", lock_path.display()))?;
match lock_file.try_lock_exclusive() {
Ok(()) => Ok(Some(lock_file)),
Err(_) => Ok(None),
}
}
pub fn acquire_index_lock(index_dir: &Path) -> Result<File> {
use std::time::{Duration, Instant};
const TIMEOUT: Duration = Duration::from_secs(5);
const RETRY_INTERVAL: Duration = Duration::from_millis(500);
fs::create_dir_all(index_dir)?;
let lock_path = get_lock_path(index_dir);
let lock_file = File::create(&lock_path)
.with_context(|| format!("Failed to create lock file at {}", lock_path.display()))?;
let start = Instant::now();
loop {
match lock_file.try_lock_exclusive() {
Ok(()) => return Ok(lock_file),
Err(_) if start.elapsed() < TIMEOUT => {
std::thread::sleep(RETRY_INTERVAL);
}
Err(_) => {
return Err(anyhow::anyhow!(
"Timed out waiting for index lock after 5 seconds. \
Another colgrep instance may be updating this index."
));
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compute_index_dir_name() {
let path = PathBuf::from("/Users/foo/myproject");
let name = compute_index_dir_name(&path, "lightonai/LateOn");
assert!(name.starts_with("myproject-"));
assert_eq!(name.len(), "myproject-".len() + 8);
}
#[test]
fn test_compute_index_dir_name_with_special_chars() {
let path = PathBuf::from("/Users/foo/my project (1)");
let name = compute_index_dir_name(&path, "lightonai/LateOn");
assert!(name.starts_with("my_project__1_-"));
}
#[test]
fn test_different_paths_different_hashes() {
let path1 = PathBuf::from("/Users/foo/project1");
let path2 = PathBuf::from("/Users/foo/project2");
let name1 = compute_index_dir_name(&path1, "lightonai/LateOn");
let name2 = compute_index_dir_name(&path2, "lightonai/LateOn");
assert_ne!(name1, name2);
}
#[test]
fn test_different_models_different_hashes() {
let path = PathBuf::from("/Users/foo/project");
let a = compute_index_dir_name(&path, "lightonai/LateOn");
let b = compute_index_dir_name(&path, "lightonai/LateOn-Code-edge");
assert_ne!(a, b);
assert!(a.starts_with("project-"));
assert!(b.starts_with("project-"));
}
#[test]
fn test_same_path_and_model_stable_hash() {
let path = PathBuf::from("/Users/foo/project");
let a = compute_index_dir_name(&path, "lightonai/LateOn");
let b = compute_index_dir_name(&path, "lightonai/LateOn");
assert_eq!(a, b);
}
#[test]
fn test_empty_model_does_not_collide_with_populated_model() {
let path = PathBuf::from("/Users/foo/project");
let empty = compute_index_dir_name(&path, "");
let populated = compute_index_dir_name(&path, "lightonai/LateOn");
assert_ne!(empty, populated);
}
#[test]
fn test_project_metadata_roundtrip_with_model() {
let dir = tempfile::tempdir().unwrap();
let index_dir = dir.path();
let project_path = PathBuf::from("/some/project");
let meta = ProjectMetadata::new(&project_path, "lightonai/LateOn-Code-edge");
meta.save(index_dir).unwrap();
let loaded = ProjectMetadata::load(index_dir).unwrap();
assert_eq!(loaded.project_path, project_path);
assert_eq!(loaded.project_name, "project");
assert_eq!(loaded.model.as_deref(), Some("lightonai/LateOn-Code-edge"));
}
#[test]
fn test_project_metadata_legacy_json_without_model_field() {
let dir = tempfile::tempdir().unwrap();
let index_dir = dir.path();
let legacy = r#"{
"project_path": "/some/project",
"project_name": "project"
}"#;
std::fs::write(index_dir.join("project.json"), legacy).unwrap();
let loaded = ProjectMetadata::load(index_dir).unwrap();
assert_eq!(loaded.project_path, PathBuf::from("/some/project"));
assert_eq!(loaded.project_name, "project");
assert!(
loaded.model.is_none(),
"legacy project.json must deserialize with model=None"
);
}
#[test]
fn test_two_models_same_project_do_not_overwrite_metadata() {
let root = tempfile::tempdir().unwrap();
let project_path = PathBuf::from("/some/project");
let dir_a = root
.path()
.join(compute_index_dir_name(&project_path, "model-a"));
let dir_b = root
.path()
.join(compute_index_dir_name(&project_path, "model-b"));
std::fs::create_dir_all(&dir_a).unwrap();
std::fs::create_dir_all(&dir_b).unwrap();
ProjectMetadata::new(&project_path, "model-a")
.save(&dir_a)
.unwrap();
ProjectMetadata::new(&project_path, "model-b")
.save(&dir_b)
.unwrap();
let a = ProjectMetadata::load(&dir_a).unwrap();
let b = ProjectMetadata::load(&dir_b).unwrap();
assert_eq!(a.model.as_deref(), Some("model-a"));
assert_eq!(b.model.as_deref(), Some("model-b"));
assert_ne!(dir_a, dir_b);
}
#[test]
fn test_get_index_dir_for_project_is_deterministic() {
let path = PathBuf::from("/Users/foo/project");
let a = get_index_dir_for_project(&path, "lightonai/LateOn").unwrap();
let b = get_index_dir_for_project(&path, "lightonai/LateOn").unwrap();
assert_eq!(a, b);
}
}