use crate::core::indexer::CodeIndexer;
use dashmap::DashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StageStatus {
#[default]
Pending,
InProgress,
Ready,
Skipped,
}
impl StageStatus {
pub fn is_ready(self) -> bool {
matches!(self, StageStatus::Ready)
}
}
#[derive(Debug, Clone, Default, serde::Serialize)]
pub struct StageState {
pub status: StageStatus,
#[serde(skip_serializing_if = "Option::is_none")]
pub started_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub completed_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub files: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub chunks: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub embedded: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub total: Option<usize>,
}
impl StageState {
pub fn pending() -> Self {
Self::default()
}
pub fn skipped() -> Self {
Self {
status: StageStatus::Skipped,
..Self::default()
}
}
}
#[derive(Debug, Clone, Default, serde::Serialize)]
pub struct IndexStages {
pub lexical: StageState,
pub semantic: StageState,
pub graph: StageState,
}
impl IndexStages {
pub fn search_capabilities(&self) -> Vec<&'static str> {
let mut out = Vec::with_capacity(5);
if self.lexical.status.is_ready() {
out.push("bm25");
out.push("literal");
out.push("exact_match");
}
if self.semantic.status.is_ready() {
out.push("vector");
}
if self.graph.status.is_ready() {
out.push("kg");
}
out
}
pub fn lifecycle_status(&self) -> &'static str {
match (self.lexical.status, self.semantic.status, self.graph.status) {
(StageStatus::Pending, _, _) => "created",
(StageStatus::InProgress, _, _) => "walking",
(StageStatus::Ready, StageStatus::Skipped, _) => "ready",
(StageStatus::Ready, StageStatus::Pending, _)
| (StageStatus::Ready, StageStatus::InProgress, _) => "indexed_lexical",
(StageStatus::Ready, StageStatus::Ready, StageStatus::Pending)
| (StageStatus::Ready, StageStatus::Ready, StageStatus::InProgress) => "indexed_vector",
(StageStatus::Ready, StageStatus::Ready, StageStatus::Ready) => "ready",
(StageStatus::Ready, StageStatus::Ready, StageStatus::Skipped) => "ready",
(StageStatus::Skipped, _, _) => "ready",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct IndexId(pub String);
impl IndexId {
pub fn new(s: impl Into<String>) -> Self {
Self(s.into())
}
}
impl std::fmt::Display for IndexId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
pub struct IndexHandle {
pub id: IndexId,
pub indexer: Arc<RwLock<CodeIndexer>>,
pub root_path: std::path::PathBuf,
pub include_paths: Vec<std::path::PathBuf>,
pub exclude_globs: Vec<String>,
pub extensions: Vec<String>,
pub domain_terms: Vec<String>,
pub include_docs: bool,
pub respect_gitignore: bool,
pub path_filter: Vec<String>,
pub context_embedding: Arc<RwLock<Option<Vec<f32>>>>,
pub context_summary: Arc<RwLock<Option<String>>>,
pub indexed_head_sha: Arc<RwLock<Option<String>>>,
pub lexical_only: bool,
pub stages: Arc<RwLock<IndexStages>>,
pub search_pressure: Arc<tokio::sync::Notify>,
pub walk_diagnostics: Arc<tokio::sync::RwLock<WalkDiagnostics>>,
}
#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
pub struct WalkDiagnostics {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub last_walk_started_at: Option<String>,
#[serde(default)]
pub last_walk_files_seen: u64,
#[serde(default)]
pub last_walk_files_skipped: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub last_walk_error: Option<String>,
}
impl IndexHandle {
pub fn bare(
id: IndexId,
indexer: Arc<RwLock<CodeIndexer>>,
root_path: std::path::PathBuf,
) -> Self {
Self {
id,
indexer,
root_path,
include_paths: Vec::new(),
exclude_globs: Vec::new(),
extensions: Vec::new(),
domain_terms: Vec::new(),
include_docs: true,
respect_gitignore: true,
path_filter: Vec::new(),
context_embedding: Arc::new(RwLock::new(None)),
context_summary: Arc::new(RwLock::new(None)),
indexed_head_sha: Arc::new(RwLock::new(None)),
lexical_only: false,
stages: Arc::new(RwLock::new(IndexStages::default())),
search_pressure: Arc::new(tokio::sync::Notify::new()),
walk_diagnostics: Arc::new(tokio::sync::RwLock::new(WalkDiagnostics::default())),
}
}
}
pub fn path_matches_filter(
path: &std::path::Path,
root_path: &std::path::Path,
patterns: &[String],
) -> bool {
if patterns.is_empty() {
return true;
}
let Ok(rel) = path.strip_prefix(root_path) else {
return false;
};
let first_component = rel.components().next().and_then(|c| c.as_os_str().to_str());
let Some(subdir) = first_component else {
return false;
};
for pat in patterns {
let Ok(pattern) = glob::Pattern::new(pat) else {
tracing::warn!("path_filter pattern '{pat}' is not a valid glob; using exact match");
if pat == subdir {
return true;
}
continue;
};
if pattern.matches(subdir) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn path_filter_matches_immediate_subdir() {
let root = PathBuf::from("/data/repos");
assert!(path_matches_filter(
&PathBuf::from("/data/repos/anything/src/lib.rs"),
&root,
&[],
));
let patterns = vec!["duetto-common".to_string()];
assert!(path_matches_filter(
&PathBuf::from("/data/repos/duetto-common/src/lib.rs"),
&root,
&patterns,
));
assert!(!path_matches_filter(
&PathBuf::from("/data/repos/other/src/lib.rs"),
&root,
&patterns,
));
let patterns = vec!["common-*".to_string(), "duetto-common*".to_string()];
assert!(path_matches_filter(
&PathBuf::from("/data/repos/common-utils/foo.rs"),
&root,
&patterns,
));
assert!(path_matches_filter(
&PathBuf::from("/data/repos/duetto-common-events/lib.rs"),
&root,
&patterns,
));
assert!(!path_matches_filter(
&PathBuf::from("/data/repos/totally-other/lib.rs"),
&root,
&patterns,
));
assert!(!path_matches_filter(
&PathBuf::from("/elsewhere/duetto-common/lib.rs"),
&root,
&patterns,
));
}
#[test]
fn path_filter_matches_any_pattern() {
let root = PathBuf::from("/repos");
let patterns = vec!["api".to_string(), "frontend".to_string()];
assert!(path_matches_filter(
&PathBuf::from("/repos/api/handlers.rs"),
&root,
&patterns,
));
assert!(path_matches_filter(
&PathBuf::from("/repos/frontend/app.tsx"),
&root,
&patterns,
));
assert!(!path_matches_filter(
&PathBuf::from("/repos/docs/README.md"),
&root,
&patterns,
));
}
#[test]
fn stage_status_capabilities_grow_with_stages() {
let mut stages = IndexStages::default();
assert!(stages.search_capabilities().is_empty());
assert_eq!(stages.lifecycle_status(), "created");
stages.lexical.status = StageStatus::Ready;
let caps = stages.search_capabilities();
assert_eq!(caps, vec!["bm25", "literal", "exact_match"]);
assert_eq!(stages.lifecycle_status(), "indexed_lexical");
stages.semantic.status = StageStatus::Ready;
let caps = stages.search_capabilities();
assert!(caps.contains(&"vector"));
assert!(!caps.contains(&"kg"));
assert_eq!(stages.lifecycle_status(), "indexed_vector");
stages.graph.status = StageStatus::Ready;
let caps = stages.search_capabilities();
assert!(caps.contains(&"kg"));
assert_eq!(stages.lifecycle_status(), "ready");
}
#[test]
fn stage_status_lexical_only_treats_skipped_as_terminal() {
let stages = IndexStages {
lexical: StageState {
status: StageStatus::Ready,
..Default::default()
},
semantic: StageState::skipped(),
graph: StageState::skipped(),
};
let caps = stages.search_capabilities();
assert_eq!(caps, vec!["bm25", "literal", "exact_match"]);
assert!(!caps.contains(&"vector"));
assert!(!caps.contains(&"kg"));
assert_eq!(stages.lifecycle_status(), "ready");
}
#[test]
fn stage_status_walking_during_stage_1() {
let mut stages = IndexStages::default();
stages.lexical.status = StageStatus::InProgress;
assert_eq!(stages.lifecycle_status(), "walking");
assert!(stages.search_capabilities().is_empty());
}
#[test]
fn path_filter_malformed_pattern_falls_back_to_exact() {
let root = PathBuf::from("/r");
let patterns = vec!["[unclosed".to_string()];
assert!(path_matches_filter(
&PathBuf::from("/r/[unclosed/file.rs"),
&root,
&patterns,
));
assert!(!path_matches_filter(
&PathBuf::from("/r/other/file.rs"),
&root,
&patterns,
));
}
}
#[derive(Default, Clone)]
pub struct IndexRegistry {
indexes: Arc<DashMap<IndexId, Arc<IndexHandle>>>,
}
impl IndexRegistry {
pub fn new() -> Self {
Self::default()
}
pub fn register(&self, handle: IndexHandle) -> Arc<IndexHandle> {
let handle = Arc::new(handle);
self.indexes.insert(handle.id.clone(), Arc::clone(&handle));
handle
}
pub fn get(&self, id: &IndexId) -> Option<Arc<IndexHandle>> {
self.indexes.get(id).map(|r| Arc::clone(&*r))
}
pub fn list(&self) -> Vec<IndexId> {
self.indexes.iter().map(|r| r.key().clone()).collect()
}
pub fn unregister(&self, id: &IndexId) -> bool {
self.indexes.remove(id).is_some()
}
pub fn len(&self) -> usize {
self.indexes.len()
}
pub fn is_empty(&self) -> bool {
self.indexes.is_empty()
}
}