#![allow(clippy::expect_used, clippy::missing_panics_doc)]
use fathomdb::{
ChunkInsert, ChunkPolicy, EmbedderChoice, EmbedderError, Engine, EngineError, EngineOptions,
NodeInsert, QueryEmbedder, QueryEmbedderIdentity, VectorRegenerationConfig, WriteRequest,
};
use std::sync::Arc;
use tempfile::NamedTempFile;
#[derive(Debug)]
struct FakeEmbedder {
vector: Vec<f32>,
}
impl QueryEmbedder for FakeEmbedder {
fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
Ok(self.vector.clone())
}
fn identity(&self) -> QueryEmbedderIdentity {
QueryEmbedderIdentity {
model_identity: "fake-test-embedder".to_owned(),
model_version: "1".to_owned(),
dimension: self.vector.len(),
normalization_policy: "none".to_owned(),
}
}
fn max_tokens(&self) -> usize {
512
}
}
#[derive(Debug)]
struct FakeUnavailableEmbedder;
impl QueryEmbedder for FakeUnavailableEmbedder {
fn embed_query(&self, _text: &str) -> Result<Vec<f32>, EmbedderError> {
Err(EmbedderError::Unavailable("test".to_owned()))
}
fn identity(&self) -> QueryEmbedderIdentity {
QueryEmbedderIdentity {
model_identity: "fake-unavailable-embedder".to_owned(),
model_version: "1".to_owned(),
dimension: 4,
normalization_policy: "none".to_owned(),
}
}
fn max_tokens(&self) -> usize {
512
}
}
fn seed_goal(engine: &Engine) {
engine
.writer()
.submit(WriteRequest {
label: "seed".to_owned(),
nodes: vec![NodeInsert {
row_id: "goal-row".to_owned(),
logical_id: "goal-1".to_owned(),
kind: "Goal".to_owned(),
properties: r#"{"name":"ship docs"}"#.to_owned(),
source_ref: Some("seed".to_owned()),
upsert: false,
chunk_policy: ChunkPolicy::Preserve,
content_ref: None,
}],
node_retires: vec![],
edges: vec![],
edge_retires: vec![],
chunks: vec![ChunkInsert {
id: "goal-chunk".to_owned(),
node_logical_id: "goal-1".to_owned(),
text_content: "ship the quarterly documentation plan".to_owned(),
byte_start: None,
byte_end: None,
content_hash: None,
}],
runs: vec![],
steps: vec![],
actions: vec![],
optional_backfills: vec![],
vec_inserts: vec![],
operational_writes: vec![],
})
.expect("seed write");
}
fn open_engine(choice: EmbedderChoice) -> (NamedTempFile, Engine) {
let db = NamedTempFile::new().expect("temporary db");
let options = EngineOptions::new(db.path()).with_embedder(choice);
let engine = Engine::open(options).expect("engine opens");
(db, engine)
}
#[test]
fn search_with_no_embedder_leaves_vector_branch_dormant() {
let (_db, engine) = open_engine(EmbedderChoice::None);
seed_goal(&engine);
let builder = engine
.query("Goal")
.search("totally novel nonsense query", 10);
let plan = builder.compile_plan().expect("compile plan");
assert!(
plan.vector.is_none(),
"EmbedderChoice::None must leave CompiledRetrievalPlan::vector empty"
);
let rows = builder.execute().expect("search executes");
assert_eq!(
rows.vector_hit_count, 0,
"no embedder means the vector branch never fires"
);
assert!(
!rows.was_degraded,
"EmbedderChoice::None is a deliberate opt-out, not a capability miss"
);
}
#[test]
fn search_with_fake_embedder_runs_three_branch_pipeline() {
const DIM: usize = 4;
let fake = Arc::new(FakeEmbedder {
vector: vec![0.0; DIM],
});
let (_db, engine) = open_engine(EmbedderChoice::InProcess(fake));
seed_goal(&engine);
let builder = engine
.query("Goal")
.search("xyzzy-plover-zort-grue-xyzzy", 10);
let rows = builder.execute().expect("search executes");
assert_eq!(
rows.strict_hit_count, 0,
"synthetic query must not match seeded content"
);
assert_eq!(
rows.vector_hit_count, 0,
"no vector capability means the vector stage returns no hits"
);
assert!(
rows.was_degraded,
"vector capability miss while the embedder was invoked must set was_degraded"
);
}
#[test]
fn search_with_fake_embedder_populates_plan_vector_slot() {
const DIM: usize = 3;
let original = vec![0.25_f32, -0.5, 1.0];
let literal = serde_json::to_string(&original).expect("serialize");
let parsed: Vec<f32> = serde_json::from_str(&literal).expect("parse");
assert_eq!(parsed.len(), DIM, "dimension preserved through JSON");
assert!(
(parsed[0] - 0.25).abs() < f32::EPSILON,
"JSON round trip preserves component 0"
);
let (_db, engine) = open_engine(EmbedderChoice::None);
seed_goal(&engine);
let rows = engine
.query("Goal")
.search("xyzzy-plover-zort-grue-xyzzy", 10)
.execute()
.expect("search executes");
assert!(
!rows.was_degraded,
"no embedder => no capability-miss degradation"
);
}
#[test]
fn search_with_unavailable_embedder_degrades_gracefully() {
let unavailable = Arc::new(FakeUnavailableEmbedder);
let (_db, engine) = open_engine(EmbedderChoice::InProcess(unavailable));
seed_goal(&engine);
let rows = engine
.query("Goal")
.search("xyzzy-plover-zort-grue-xyzzy", 10)
.execute()
.expect("search executes without panic");
assert_eq!(rows.vector_hit_count, 0);
assert!(
rows.was_degraded,
"EmbedderError::Unavailable must surface as was_degraded == true"
);
}
#[test]
fn regenerate_vector_embeddings_errors_when_embedder_is_none() {
let (_db, engine) = open_engine(EmbedderChoice::None);
let config = VectorRegenerationConfig {
kind: "Document".to_owned(),
profile: "default".to_owned(),
chunking_policy: "per_chunk".to_owned(),
preprocessing_policy: "trim".to_owned(),
};
let error = engine
.regenerate_vector_embeddings(&config)
.expect_err("regen must fail without an embedder");
assert!(
matches!(error, EngineError::EmbedderNotConfigured),
"expected EmbedderNotConfigured, got: {error:?}"
);
let message = error.to_string();
assert!(
message.contains("embedder not configured"),
"error message must mention missing embedder, got: {message}"
);
}
#[cfg(feature = "sqlite-vec")]
#[test]
fn engine_regenerate_vector_embeddings_uses_open_time_embedder_identity() {
use fathomdb::{ChunkInsert, ChunkPolicy, NodeInsert, WriteRequest};
const DIM: usize = 4;
let fake = Arc::new(FakeEmbedder {
vector: vec![0.5; DIM],
});
let (db, engine) = open_engine(EmbedderChoice::InProcess(fake));
engine
.writer()
.submit(WriteRequest {
label: "seed".to_owned(),
nodes: vec![NodeInsert {
row_id: "doc-row".to_owned(),
logical_id: "doc-1".to_owned(),
kind: "Document".to_owned(),
properties: r#"{"title":"Budget"}"#.to_owned(),
source_ref: Some("seed".to_owned()),
upsert: false,
chunk_policy: ChunkPolicy::Preserve,
content_ref: None,
}],
node_retires: vec![],
edges: vec![],
edge_retires: vec![],
chunks: vec![ChunkInsert {
id: "chunk-1".to_owned(),
node_logical_id: "doc-1".to_owned(),
text_content: "budget narrative".to_owned(),
byte_start: None,
byte_end: None,
content_hash: None,
}],
runs: vec![],
steps: vec![],
actions: vec![],
optional_backfills: vec![],
vec_inserts: vec![],
operational_writes: vec![],
})
.expect("seed");
let config = VectorRegenerationConfig {
kind: "Document".to_owned(),
profile: "default".to_owned(),
chunking_policy: "per_chunk".to_owned(),
preprocessing_policy: "trim".to_owned(),
};
let report = engine
.regenerate_vector_embeddings(&config)
.expect("regenerate via open-time embedder");
assert_eq!(report.dimension, DIM);
assert_eq!(report.total_chunks, 1);
assert_eq!(report.regenerated_rows, 1);
drop(engine);
let conn = rusqlite::Connection::open(db.path()).expect("open db");
let (model_identity, model_version, dimension, normalization_policy): (
String,
String,
i64,
String,
) = conn
.query_row(
"SELECT model_identity, model_version, dimension, normalization_policy \
FROM vector_embedding_contracts WHERE profile = 'default'",
[],
|row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
)
.expect("contract row");
assert_eq!(model_identity, "fake-test-embedder");
assert_eq!(model_version, "1");
assert_eq!(dimension, DIM as i64);
assert_eq!(normalization_policy, "none");
}
#[test]
fn search_with_builtin_embedder_is_stubbed_to_none() {
let (_db, engine) = open_engine(EmbedderChoice::Builtin);
seed_goal(&engine);
let builder = engine
.query("Goal")
.search("xyzzy-plover-zort-grue-xyzzy", 10);
let plan = builder.compile_plan().expect("compile plan");
assert!(
plan.vector.is_none(),
"Phase 12.5a Builtin stub must leave the vector slot empty"
);
let rows = builder.execute().expect("search executes");
assert!(
!rows.was_degraded,
"Phase 12.5a Builtin stub resolves to None with no degradation signal"
);
}