use super::*;
use smos_domain::{Embedding, Fact, FactId, NewPendingRequest, Timestamp};
#[tokio::test]
async fn persist_facts_layer2_semantic_match_confirms_existing_fact() {
let facts = InMemoryFacts::default();
let sessions = RecordingSessions::default();
let stored = Fact::new_pending(NewPendingRequest {
content: "the token cache uses TTL=60 to avoid stale entries",
memory_key: mk(),
session: sid(1),
embedding: Embedding::new(vec![1.0]).unwrap(),
extracted_at: Timestamp::from_unix_secs(1_700_000_000).unwrap(),
base_confidence: ConfidenceConfig::default().base,
})
.unwrap();
let stored_id = stored.id().clone();
facts.seed(stored);
facts.script_dedup_hits(vec![hit_for(
&facts.get_clone(&stored_id).expect("seeded fact"),
0.98,
mk(),
)]);
let rephrased = "token cache TTL is 60 to prevent stale entries";
let extractor = ScriptedExtractor::new(vec![Ok(vec![rephrased.to_string()])]);
let fix = Fix::new();
let uc = build(
&facts,
&sessions,
&extractor,
&fix.embedder,
&fix.clock,
&fix.cfg,
&fix.extraction_cfg,
);
let n = uc.execute(rephrased, &[], &mk(), &sid(2)).await.unwrap();
assert_eq!(
n, 0,
"semantic duplicate must confirm, not create a new fact"
);
let confirmed = facts
.get_clone(&stored_id)
.expect("seeded fact still present");
assert_eq!(
confirmed.source_sessions().distinct_count(),
2,
"semantic match grows provenance to two sessions"
);
assert!(
facts.get_clone(&FactId::from_content(rephrased)).is_none(),
"no new fact id created for the rephrased variant"
);
assert!(
sessions.pending.lock().unwrap().is_empty(),
"semantic confirmation must not register on the pending list"
);
}
#[tokio::test]
async fn persist_facts_layer2_below_threshold_creates_new_fact() {
let facts = InMemoryFacts::default();
let sessions = RecordingSessions::default();
let stored = Fact::new_pending(NewPendingRequest {
content: "auth module uses Argon2id for password hashing",
memory_key: mk(),
session: sid(1),
embedding: Embedding::new(vec![1.0]).unwrap(),
extracted_at: Timestamp::from_unix_secs(1_700_000_000).unwrap(),
base_confidence: ConfidenceConfig::default().base,
})
.unwrap();
let stored_id = stored.id().clone();
facts.seed(stored);
facts.script_dedup_hits(vec![hit_for(
&facts.get_clone(&stored_id).expect("seeded fact"),
0.80,
mk(),
)]);
let new_content = "TLS handshake failure in the upstream pool";
let extractor = ScriptedExtractor::new(vec![Ok(vec![new_content.to_string()])]);
let fix = Fix::new();
let uc = build(
&facts,
&sessions,
&extractor,
&fix.embedder,
&fix.clock,
&fix.cfg,
&fix.extraction_cfg,
);
let n = uc.execute(new_content, &[], &mk(), &sid(2)).await.unwrap();
assert_eq!(n, 1, "below-threshold similarity must create a new fact");
let new_id = FactId::from_content(new_content);
assert!(
facts.contains(&new_id),
"new fact persisted under its own FactId"
);
assert_eq!(
sessions.pending.lock().unwrap().len(),
1,
"new fact registered on the pending list"
);
}
#[tokio::test]
async fn persist_facts_layer2_missing_distance_falls_through_to_new_fact() {
let facts = InMemoryFacts::default();
let sessions = RecordingSessions::default();
let stored = Fact::new_pending(NewPendingRequest {
content: "config reload triggers a graceful drain",
memory_key: mk(),
session: sid(1),
embedding: Embedding::new(vec![1.0]).unwrap(),
extracted_at: Timestamp::from_unix_secs(1_700_000_000).unwrap(),
base_confidence: ConfidenceConfig::default().base,
})
.unwrap();
let stored_id = stored.id().clone();
facts.seed(stored);
let mut hit = hit_for(
&facts.get_clone(&stored_id).expect("seeded fact"),
1.0,
mk(),
);
hit.metadata.distance = None;
facts.script_dedup_hits(vec![hit]);
let new_content = "config reload drains gracefully on SIGHUP";
let extractor = ScriptedExtractor::new(vec![Ok(vec![new_content.to_string()])]);
let fix = Fix::new();
let uc = build(
&facts,
&sessions,
&extractor,
&fix.embedder,
&fix.clock,
&fix.cfg,
&fix.extraction_cfg,
);
let n = uc.execute(new_content, &[], &mk(), &sid(2)).await.unwrap();
assert_eq!(
n, 1,
"missing distance must not collapse — fall through to new fact"
);
}
#[tokio::test]
async fn persist_facts_layer2_threshold_lowered_collapses_0_85_pair() {
let facts = InMemoryFacts::default();
let sessions = RecordingSessions::default();
let stored = Fact::new_pending(NewPendingRequest {
content: "indexer batches at most 1024 documents per commit",
memory_key: mk(),
session: sid(1),
embedding: Embedding::new(vec![1.0]).unwrap(),
extracted_at: Timestamp::from_unix_secs(1_700_000_000).unwrap(),
base_confidence: ConfidenceConfig::default().base,
})
.unwrap();
let stored_id = stored.id().clone();
facts.seed(stored);
facts.script_dedup_hits(vec![hit_for(
&facts.get_clone(&stored_id).expect("seeded fact"),
0.85,
mk(),
)]);
let rephrased = "the indexer caps batches at 1024 documents";
let extractor = ScriptedExtractor::new(vec![Ok(vec![rephrased.to_string()])]);
let mut fix = Fix::new();
fix.extraction_cfg = ExtractionConfig {
dedup_cosine_threshold: 0.80,
};
let uc = build(
&facts,
&sessions,
&extractor,
&fix.embedder,
&fix.clock,
&fix.cfg,
&fix.extraction_cfg,
);
let n = uc.execute(rephrased, &[], &mk(), &sid(2)).await.unwrap();
assert_eq!(
n, 0,
"lowered threshold collapses the 0.85 pair via semantic match"
);
let confirmed = facts
.get_clone(&stored_id)
.expect("seeded fact still present");
assert_eq!(
confirmed.source_sessions().distinct_count(),
2,
"semantic collapse grows provenance"
);
}