use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
use cqs::config::{add_reference_to_config, remove_reference_from_config, ReferenceConfig};
use cqs::reference;
use cqs::{ModelInfo, Parser as CqParser, Store};
use crate::cli::commands::index::build_hnsw_index;
use crate::cli::{enumerate_files, find_project_root, run_index_pipeline, Cli};
#[derive(Debug, serde::Serialize)]
pub(crate) struct RefListEntry {
pub name: String,
pub path: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub source: Option<String>,
pub weight: f32,
pub chunks: u64,
}
#[derive(clap::Subcommand)]
pub(crate) enum RefCommand {
Add {
name: String,
source: PathBuf,
#[arg(long, default_value = "0.8")]
weight: f32,
},
List {
#[arg(long)]
json: bool,
},
Remove {
name: String,
},
Update {
name: String,
},
}
pub(crate) fn cmd_ref(cli: &Cli, subcmd: &RefCommand) -> Result<()> {
let _span = tracing::info_span!("cmd_ref").entered();
match subcmd {
RefCommand::Add {
name,
source,
weight,
} => cmd_ref_add(cli, name, source, *weight),
RefCommand::List { json } => cmd_ref_list(cli, *json),
RefCommand::Remove { name } => cmd_ref_remove(name),
RefCommand::Update { name } => cmd_ref_update(cli, name),
}
}
fn cmd_ref_add(cli: &Cli, name: &str, source: &std::path::Path, weight: f32) -> Result<()> {
cqs::reference::validate_ref_name(name)
.map_err(|e| anyhow::anyhow!("Invalid reference name '{}': {}", name, e))?;
if !(0.0..=1.0).contains(&weight) {
bail!("Weight must be between 0.0 and 1.0 (got {})", weight);
}
let root = find_project_root();
let config = cqs::config::Config::load(&root);
if config.references.iter().any(|r| r.name == name) {
bail!(
"Reference '{}' already exists. Use 'cqs ref update {}' to re-index.",
name,
name
);
}
let source = dunce::canonicalize(source)
.map_err(|e| anyhow::anyhow!("Source path '{}' not found: {}", source.display(), e))?;
let ref_dir = reference::ref_path(name)
.ok_or_else(|| anyhow::anyhow!("Could not determine reference storage directory"))?;
std::fs::create_dir_all(&ref_dir)
.with_context(|| format!("Failed to create {}", ref_dir.display()))?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Err(e) = std::fs::set_permissions(&ref_dir, std::fs::Permissions::from_mode(0o700)) {
tracing::debug!(path = %ref_dir.display(), error = %e, "Failed to set file permissions");
}
}
let db_path = ref_dir.join("index.db");
let parser = CqParser::new()?;
let files = enumerate_files(&source, &parser, false)?;
if files.is_empty() {
bail!("No supported source files found in '{}'", source.display());
}
if !cli.quiet {
println!(
"Indexing {} files from '{}'...",
files.len(),
source.display()
);
}
let store = Arc::new(
Store::open(&db_path)
.with_context(|| format!("Failed to open reference store at {}", db_path.display()))?,
);
let mc = cli.try_model_config()?;
store.init(&ModelInfo::new(&mc.repo, mc.dim))?;
let stats = run_index_pipeline(
&source,
files,
Arc::clone(&store),
false,
cli.quiet,
cli.try_model_config()?.clone(),
)?;
if !cli.quiet {
println!(" Embedded: {} chunks", stats.total_embedded);
}
if let Some(count) = build_hnsw_index(&store, &ref_dir)? {
if !cli.quiet {
println!(" HNSW: {} vectors", count);
}
}
let ref_config = ReferenceConfig {
name: name.to_string(),
path: ref_dir,
source: Some(source),
weight,
};
let config_path = root.join(".cqs.toml");
add_reference_to_config(&config_path, &ref_config)?;
if !cli.quiet {
println!("Reference '{}' added.", name);
}
Ok(())
}
fn cmd_ref_list(cli: &Cli, json: bool) -> Result<()> {
let root = find_project_root();
let config = cqs::config::Config::load(&root);
if config.references.is_empty() {
println!("No references configured.");
return Ok(());
}
if json || cli.json {
let refs: Vec<_> = config
.references
.iter()
.map(|r| {
let chunks = Store::open_readonly(&r.path.join("index.db"))
.map_err(|e| {
tracing::warn!(
name = %r.name,
path = %r.path.display(),
error = %e,
"Failed to open reference store, showing 0 chunks"
);
e
})
.ok()
.and_then(|s| {
s.chunk_count().map_err(|e| {
tracing::warn!(name = %r.name, error = %e, "Failed to count chunks in reference store");
}).ok()
})
.unwrap_or(0);
RefListEntry {
name: r.name.clone(),
path: cqs::normalize_path(&r.path),
source: r.source.as_ref().map(|p| cqs::normalize_path(p)),
weight: r.weight,
chunks,
}
})
.collect();
println!("{}", serde_json::to_string_pretty(&refs)?);
return Ok(());
}
println!("{:<15} {:<8} {:<10} SOURCE", "NAME", "WEIGHT", "CHUNKS");
println!("{}", "─".repeat(60));
for r in &config.references {
let chunks = Store::open(&r.path.join("index.db"))
.map_err(|e| {
tracing::warn!(
name = %r.name,
path = %r.path.display(),
error = %e,
"Failed to open reference store, showing 0 chunks"
);
e
})
.ok()
.and_then(|s| {
s.chunk_count().map_err(|e| {
tracing::warn!(name = %r.name, error = %e, "Failed to count chunks in reference store");
}).ok()
})
.unwrap_or(0);
let source_str = r
.source
.as_ref()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|| "(none)".to_string());
println!(
"{:<15} {:<8.2} {:<10} {}",
r.name, r.weight, chunks, source_str
);
}
Ok(())
}
fn cmd_ref_remove(name: &str) -> Result<()> {
let root = find_project_root();
let config_path = root.join(".cqs.toml");
let removed = remove_reference_from_config(&config_path, name)?;
if !removed {
bail!("Reference '{}' not found in config.", name);
}
if let Some(refs_root) = reference::refs_dir() {
let ref_dir = refs_root.join(name);
if ref_dir.exists() {
if let (Ok(canonical_dir), Ok(canonical_root)) = (
dunce::canonicalize(&ref_dir),
dunce::canonicalize(&refs_root),
) {
if canonical_dir.starts_with(&canonical_root) {
std::fs::remove_dir_all(&canonical_dir)
.with_context(|| format!("Failed to remove {}", ref_dir.display()))?;
} else {
tracing::warn!(
path = %canonical_dir.display(),
"Refusing to delete reference directory outside refs root"
);
}
}
}
}
println!("Reference '{}' removed.", name);
Ok(())
}
fn cmd_ref_update(cli: &Cli, name: &str) -> Result<()> {
let root = find_project_root();
let config = cqs::config::Config::load(&root);
let ref_config = config
.references
.iter()
.find(|r| r.name == name)
.ok_or_else(|| anyhow::anyhow!("Reference '{}' not found in config.", name))?;
let source = ref_config
.source
.as_ref()
.ok_or_else(|| anyhow::anyhow!("Reference '{}' has no source path configured.", name))?;
if !source.exists() {
bail!(
"Source path '{}' does not exist. Update the config or remove and re-add the reference.",
source.display()
);
}
let db_path = ref_config.path.join("index.db");
let ref_dir = &ref_config.path;
let existing_chunks = if db_path.exists() {
Store::open(&db_path)
.map_err(|e| {
tracing::warn!(error = %e, "Failed to open reference store for chunk count");
})
.ok()
.and_then(|s| {
s.chunk_count()
.map_err(|e| {
tracing::warn!(error = %e, "Failed to count chunks in reference store");
})
.ok()
})
.unwrap_or(0)
} else {
0
};
let parser = CqParser::new()?;
let files = enumerate_files(source, &parser, false)?;
if files.is_empty() && existing_chunks > 0 {
bail!(
"No supported files found in '{}', but the index has {} chunks.\n\
This usually means the binary doesn't support the language(s) in the index.\n\
Rebuild with a binary that supports the required languages, or use \
'cqs ref remove {name}' and re-add.",
source.display(),
existing_chunks,
);
}
if !cli.quiet {
println!("Updating reference '{}' ({} files)...", name, files.len());
}
let store = Arc::new(
Store::open(&db_path)
.with_context(|| format!("Failed to open reference store at {}", db_path.display()))?,
);
let stats = run_index_pipeline(
source,
files.clone(),
Arc::clone(&store),
false,
cli.quiet,
cli.try_model_config()?.clone(),
)?;
if !cli.quiet {
let newly = stats.total_embedded - stats.total_cached;
println!(
" Chunks: {} ({} cached, {} embedded)",
stats.total_embedded, stats.total_cached, newly
);
}
let existing_files: HashSet<_> = files.into_iter().collect();
let pruned = store.prune_missing(&existing_files)?;
if pruned > 0 && existing_chunks > 0 {
let remaining = existing_chunks.saturating_sub(pruned as u64);
if remaining == 0 {
tracing::warn!(
pruned,
name,
"All chunks were pruned. The index is now empty. \
If this was unintentional, re-index with 'cqs ref update'.",
);
} else if (pruned as u64) > existing_chunks / 2 {
tracing::warn!(
pruned,
existing_chunks,
pct = (pruned as f64 / existing_chunks as f64) * 100.0,
"Pruned over 50% of chunks. Verify source path is correct.",
);
}
}
if !cli.quiet && pruned > 0 {
println!(" Pruned: {} (deleted files)", pruned);
}
if let Some(count) = build_hnsw_index(&store, ref_dir)? {
if !cli.quiet {
println!(" HNSW: {} vectors", count);
}
}
if !cli.quiet {
println!("Reference '{}' updated.", name);
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ref_list_entry_serialization() {
let entry = RefListEntry {
name: "stdlib".into(),
path: "/home/user/.cqs/refs/stdlib".into(),
source: Some("/usr/src/rust/library".into()),
weight: 0.8,
chunks: 1234,
};
let json = serde_json::to_value(&entry).unwrap();
assert_eq!(json["name"], "stdlib");
assert_eq!(json["path"], "/home/user/.cqs/refs/stdlib");
assert_eq!(json["source"], "/usr/src/rust/library");
assert_eq!(json["weight"], 0.8f32 as f64);
assert_eq!(json["chunks"], 1234);
}
#[test]
fn test_ref_list_entry_no_source() {
let entry = RefListEntry {
name: "external".into(),
path: "/home/user/.cqs/refs/external".into(),
source: None,
weight: 0.5,
chunks: 0,
};
let json = serde_json::to_value(&entry).unwrap();
assert!(json.get("source").is_none());
assert_eq!(json["chunks"], 0);
}
}