use std::path::PathBuf;
use crate::diff::semantic_diff;
use crate::store::{Store, StoreError};
#[derive(Debug, Clone, serde::Serialize)]
pub struct DriftEntry {
pub name: String,
#[serde(serialize_with = "crate::serialize_path_normalized")]
pub file: PathBuf,
pub chunk_type: crate::language::ChunkType,
pub similarity: f32,
pub drift: f32,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct DriftResult {
pub reference: String,
pub threshold: f32,
pub min_drift: f32,
pub drifted: Vec<DriftEntry>,
pub total_compared: usize,
pub unchanged: usize,
}
pub fn detect_drift<Mode1, Mode2>(
ref_store: &Store<Mode1>,
project_store: &Store<Mode2>,
ref_name: &str,
threshold: f32,
min_drift: f32,
language_filter: Option<&str>,
) -> Result<DriftResult, StoreError> {
let _span =
tracing::info_span!("detect_drift", reference = ref_name, threshold, min_drift).entered();
let diff = semantic_diff(
ref_store,
project_store,
ref_name,
"project",
threshold,
language_filter,
)?;
let total_compared = diff.modified.len() + diff.unchanged_count;
let mut drifted: Vec<DriftEntry> = diff
.modified
.into_iter()
.filter_map(|entry| {
let sim = entry.similarity?; let drift = 1.0 - sim;
if drift >= min_drift {
Some(DriftEntry {
name: entry.name,
file: entry.file,
chunk_type: entry.chunk_type,
similarity: sim,
drift,
})
} else {
None
}
})
.collect();
drifted.sort_by(|a, b| {
b.drift
.total_cmp(&a.drift)
.then(a.file.cmp(&b.file))
.then(a.name.cmp(&b.name))
});
tracing::info!(
drifted = drifted.len(),
total_compared,
"Drift detection complete"
);
Ok(DriftResult {
reference: ref_name.to_string(),
threshold,
min_drift,
drifted,
total_compared,
unchanged: diff.unchanged_count,
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn make_store() -> (Store, TempDir) {
let dir = TempDir::new().unwrap();
let db_path = dir.path().join(crate::INDEX_DB_FILENAME);
let store = Store::open(&db_path).unwrap();
store.init(&crate::store::ModelInfo::default()).unwrap();
(store, dir)
}
#[test]
fn test_drift_empty_stores() {
let (ref_store, _d1) = make_store();
let (proj_store, _d2) = make_store();
let result = detect_drift(&ref_store, &proj_store, "test-ref", 0.95, 0.0, None).unwrap();
assert!(result.drifted.is_empty());
assert_eq!(result.total_compared, 0);
assert_eq!(result.unchanged, 0);
}
#[test]
fn test_drift_entry_fields() {
let entry = DriftEntry {
name: "foo".into(),
file: "src/foo.rs".into(),
chunk_type: ChunkType::Function,
similarity: 0.7,
drift: 0.3,
};
assert!((entry.drift - (1.0 - entry.similarity)).abs() < f32::EPSILON);
}
#[test]
fn test_drift_sort_order() {
let mut entries = vec![
DriftEntry {
name: "a".into(),
file: "a.rs".into(),
chunk_type: ChunkType::Function,
similarity: 0.9,
drift: 0.1,
},
DriftEntry {
name: "b".into(),
file: "b.rs".into(),
chunk_type: ChunkType::Function,
similarity: 0.5,
drift: 0.5,
},
DriftEntry {
name: "c".into(),
file: "c.rs".into(),
chunk_type: ChunkType::Function,
similarity: 0.7,
drift: 0.3,
},
];
entries.sort_by(|a, b| b.drift.total_cmp(&a.drift));
assert_eq!(entries[0].name, "b"); assert_eq!(entries[1].name, "c");
assert_eq!(entries[2].name, "a"); }
#[test]
fn test_drift_min_filter() {
let entries = vec![
DriftEntry {
name: "small".into(),
file: "a.rs".into(),
chunk_type: ChunkType::Function,
similarity: 0.92,
drift: 0.08,
},
DriftEntry {
name: "big".into(),
file: "b.rs".into(),
chunk_type: ChunkType::Function,
similarity: 0.5,
drift: 0.5,
},
];
let min_drift = 0.1;
let filtered: Vec<_> = entries
.into_iter()
.filter(|e| e.drift >= min_drift)
.collect();
assert_eq!(filtered.len(), 1);
assert_eq!(filtered[0].name, "big");
}
use crate::embedder::Embedding;
use crate::language::ChunkType;
use crate::parser::types::{Chunk, Language};
use std::path::PathBuf;
fn make_chunk(name: &str, file: &str, lang: Language) -> Chunk {
let content = format!("fn {}() {{ /* body */ }}", name);
let hash = blake3::hash(content.as_bytes()).to_hex().to_string();
Chunk {
id: format!("{}:1:{}", file, &hash[..8]),
file: PathBuf::from(file),
language: lang,
chunk_type: ChunkType::Function,
name: name.to_string(),
signature: format!("fn {}()", name),
content,
doc: None,
line_start: 1,
line_end: 5,
content_hash: hash,
parent_id: None,
window_idx: None,
parent_type_name: None,
}
}
fn make_emb(seed: f32) -> Embedding {
let mut v = vec![0.0f32; crate::EMBEDDING_DIM];
let idx = (seed.abs() as usize) % crate::EMBEDDING_DIM;
v[idx] = 1.0;
Embedding::new(v)
}
#[test]
fn test_drift_with_matching_functions() {
let (ref_store, _d1) = make_store();
let (proj_store, _d2) = make_store();
let chunk = make_chunk("process_data", "src/lib.rs", Language::Rust);
let emb = make_emb(1.0);
ref_store.upsert_chunk(&chunk, &emb, Some(100)).unwrap();
proj_store.upsert_chunk(&chunk, &emb, Some(100)).unwrap();
let result = detect_drift(&ref_store, &proj_store, "test-ref", 0.95, 0.0, None).unwrap();
assert_eq!(result.total_compared, 1);
assert_eq!(result.unchanged, 1);
assert!(
result.drifted.is_empty(),
"identical embeddings should produce no drift"
);
}
#[test]
fn test_drift_with_different_embeddings() {
let (ref_store, _d1) = make_store();
let (proj_store, _d2) = make_store();
let chunk = make_chunk("process_data", "src/lib.rs", Language::Rust);
let emb_ref = make_emb(1.0);
let emb_proj = make_emb(2.0);
ref_store.upsert_chunk(&chunk, &emb_ref, Some(100)).unwrap();
proj_store
.upsert_chunk(&chunk, &emb_proj, Some(100))
.unwrap();
let result = detect_drift(&ref_store, &proj_store, "test-ref", 0.95, 0.0, None).unwrap();
assert!(
result.total_compared >= 1,
"should compare at least one pair"
);
assert!(
!result.drifted.is_empty(),
"different embeddings should produce drift"
);
let entry = &result.drifted[0];
assert_eq!(entry.name, "process_data");
assert!(entry.drift > 0.0, "drift should be positive");
assert!(
entry.similarity < 0.95,
"similarity should be below threshold"
);
}
#[test]
fn test_drift_min_drift_filter_with_stores() {
let (ref_store, _d1) = make_store();
let (proj_store, _d2) = make_store();
let mut ref_v = vec![0.0f32; crate::EMBEDDING_DIM];
ref_v[0] = 1.0;
let emb_ref = Embedding::new(ref_v);
let mut proj_v = vec![0.0f32; crate::EMBEDDING_DIM];
proj_v[0] = 1.0;
proj_v[1] = 1.0;
let norm = (2.0f32).sqrt();
proj_v[0] /= norm;
proj_v[1] /= norm;
let emb_proj = Embedding::new(proj_v);
let chunk = make_chunk("process_data", "src/lib.rs", Language::Rust);
ref_store.upsert_chunk(&chunk, &emb_ref, Some(100)).unwrap();
proj_store
.upsert_chunk(&chunk, &emb_proj, Some(100))
.unwrap();
let baseline = detect_drift(&ref_store, &proj_store, "test-ref", 0.95, 0.0, None).unwrap();
assert!(
!baseline.drifted.is_empty(),
"precondition: drift should exist (drift ≈ 0.29)"
);
let actual_drift = baseline.drifted[0].drift;
assert!(
actual_drift > 0.2 && actual_drift < 0.4,
"expected drift ≈ 0.29, got {actual_drift}"
);
let result = detect_drift(&ref_store, &proj_store, "test-ref", 0.95, 0.5, None).unwrap();
assert!(
result.drifted.is_empty(),
"min_drift=0.5 should filter out drift of ≈0.29"
);
assert!(result.total_compared >= 1);
}
#[test]
fn test_drift_language_filter() {
let (ref_store, _d1) = make_store();
let (proj_store, _d2) = make_store();
let rust_chunk = make_chunk("rust_func", "src/lib.rs", Language::Rust);
let emb_a = make_emb(1.0);
let emb_b = make_emb(2.0);
ref_store
.upsert_chunk(&rust_chunk, &emb_a, Some(100))
.unwrap();
proj_store
.upsert_chunk(&rust_chunk, &emb_b, Some(100))
.unwrap();
let py_chunk = make_chunk("py_func", "src/lib.py", Language::Python);
let emb_c = make_emb(3.0);
let emb_d = make_emb(4.0);
ref_store
.upsert_chunk(&py_chunk, &emb_c, Some(100))
.unwrap();
proj_store
.upsert_chunk(&py_chunk, &emb_d, Some(100))
.unwrap();
let result =
detect_drift(&ref_store, &proj_store, "test-ref", 0.95, 0.0, Some("rust")).unwrap();
let names: Vec<&str> = result.drifted.iter().map(|e| e.name.as_str()).collect();
assert!(
!names.contains(&"py_func"),
"Python function should be excluded by language filter"
);
assert!(
names.contains(&"rust_func"),
"Rust function should appear in drift results"
);
assert_eq!(
result.total_compared, 1,
"only one pair (rust) should be compared"
);
}
}