use crate::schema::{Contradiction, EvidenceRecord, SourceRef};
use std::collections::BTreeSet;
const DIRECT_PREFIXES: &[&str] = &["file:", "command:", "test:"];
const TOKEN_OVERLAP_FLOOR: f64 = 0.5;
fn is_non_contradicting_pair(a: &str, b: &str) -> bool {
let (left, right) = if a <= b { (a, b) } else { (b, a) };
matches!(
(left, right),
("observation", "proposal")
| ("observation", "root-cause")
| ("gap", "proposal")
| ("missing-check", "proposal")
| ("root-cause", "symptom")
| ("failure-mode", "proposal")
| ("proposal", "risk")
)
}
fn is_infrastructure_kind(kind: &str) -> bool {
matches!(kind, "subagent-session" | "blocker" | "objective")
}
const HIGH_SEVERITY_KINDS: &[&str] = &["code-change", "root-cause", "test-change"];
const LOW_SEVERITY_KINDS: &[&str] = &["observation", "measurement", "symptom"];
fn severity_for_pair(left_kind: &str, right_kind: &str) -> &'static str {
let left_high = HIGH_SEVERITY_KINDS.contains(&left_kind);
let right_high = HIGH_SEVERITY_KINDS.contains(&right_kind);
if left_high && right_high {
return "high";
}
let left_low = LOW_SEVERITY_KINDS.contains(&left_kind);
let right_low = LOW_SEVERITY_KINDS.contains(&right_kind);
if left_low && right_low {
return "low";
}
"medium"
}
fn is_direct_source_id(source_id: &str) -> bool {
DIRECT_PREFIXES
.iter()
.any(|prefix| source_id.starts_with(prefix))
}
fn direct_source_set(record: &EvidenceRecord) -> BTreeSet<String> {
record
.source_ids
.iter()
.filter(|id| is_direct_source_id(id))
.cloned()
.collect()
}
fn tokens(summary: &str) -> BTreeSet<String> {
summary
.to_lowercase()
.split(|ch: char| !ch.is_ascii_alphanumeric())
.filter(|word| !word.is_empty())
.map(|word| word.to_string())
.collect()
}
fn token_overlap_ratio(a: &BTreeSet<String>, b: &BTreeSet<String>) -> f64 {
if a.is_empty() && b.is_empty() {
return 1.0;
}
let intersect = a.intersection(b).count() as f64;
let union = a.union(b).count() as f64;
if union == 0.0 { 1.0 } else { intersect / union }
}
fn different_attribution(a: &EvidenceRecord, b: &EvidenceRecord) -> bool {
let agent_diff = match (a.agent_id.as_deref(), b.agent_id.as_deref()) {
(Some(left), Some(right)) => left != right,
_ => false,
};
let lane_diff = match (a.lane.as_deref(), b.lane.as_deref()) {
(Some(left), Some(right)) => left != right,
_ => false,
};
agent_diff || lane_diff
}
fn contradiction_id(left: &str, right: &str) -> String {
let (first, second) = if left <= right {
(left, right)
} else {
(right, left)
};
format!("con:auto:{first}:{second}")
}
pub fn detect_auto_contradictions(evidence: &[EvidenceRecord]) -> Vec<Contradiction> {
let mut out: Vec<Contradiction> = Vec::new();
let mut emitted: BTreeSet<String> = BTreeSet::new();
for left_idx in 0..evidence.len() {
for right_idx in (left_idx + 1)..evidence.len() {
let left = &evidence[left_idx];
let right = &evidence[right_idx];
if left.summary == right.summary {
continue;
}
if is_infrastructure_kind(&left.kind) || is_infrastructure_kind(&right.kind) {
continue;
}
if is_non_contradicting_pair(&left.kind, &right.kind) {
continue;
}
let left_sources = direct_source_set(left);
if left_sources.is_empty() {
continue;
}
let right_sources = direct_source_set(right);
if right_sources.is_empty() {
continue;
}
let shared: BTreeSet<String> =
left_sources.intersection(&right_sources).cloned().collect();
if shared.is_empty() {
continue;
}
if !different_attribution(left, right) {
continue;
}
let kind_diff = left.kind != right.kind;
let overlap = token_overlap_ratio(&tokens(&left.summary), &tokens(&right.summary));
let summary_diverges = kind_diff || overlap < TOKEN_OVERLAP_FLOOR;
if !summary_diverges {
continue;
}
let id = contradiction_id(&left.id, &right.id);
if !emitted.insert(id.clone()) {
continue;
}
let (first_id, second_id) = if left.id <= right.id {
(left.id.clone(), right.id.clone())
} else {
(right.id.clone(), left.id.clone())
};
let summary = format!(
"Evidence {first_id} and {second_id} disagree on the same direct span ({}): \"{}\" vs \"{}\"",
shared.iter().cloned().collect::<Vec<_>>().join(", "),
left.summary,
right.summary
);
let mut source_ids: Vec<String> = shared.iter().cloned().collect();
source_ids.sort();
let mut collected_refs: Vec<SourceRef> = Vec::new();
let mut seen_ref_ids: BTreeSet<String> = BTreeSet::new();
for candidate in left.source_refs.iter().chain(right.source_refs.iter()) {
if !shared.contains(&candidate.source_id) {
continue;
}
if !seen_ref_ids.insert(candidate.source_id.clone()) {
continue;
}
collected_refs.push(candidate.clone());
}
collected_refs.sort_by(|a, b| a.source_id.cmp(&b.source_id));
let source_refs = if collected_refs.is_empty() {
None
} else {
Some(collected_refs)
};
let severity = severity_for_pair(&left.kind, &right.kind).to_string();
out.push(Contradiction {
id,
summary,
conflicting_item_ids: vec![first_id, second_id],
severity,
source_ids,
source_refs,
});
}
}
out.sort_by(|a, b| a.id.cmp(&b.id));
out
}
#[cfg(test)]
mod tests {
use super::detect_auto_contradictions;
use crate::schema::EvidenceRecord;
fn record(
id: &str,
kind: &str,
summary: &str,
source_ids: &[&str],
agent: Option<&str>,
lane: Option<&str>,
) -> EvidenceRecord {
EvidenceRecord {
id: id.to_string(),
kind: kind.to_string(),
summary: summary.to_string(),
source_ids: source_ids.iter().map(|s| (*s).to_string()).collect(),
source_refs: vec![],
observed_at: "2026-04-21T00:00:00Z".to_string(),
agent_id: agent.map(|s| s.to_string()),
lane: lane.map(|s| s.to_string()),
confidence: None,
rationale: None,
diff_ref: None,
span_before: None,
span_after: None,
}
}
#[test]
fn fires_on_same_span_different_kind_and_agent() {
let left = record(
"ev-a",
"code-change",
"Patched strict-gate.mjs:42 to require direct refs",
&["file:scripts/strict-gate.mjs:42"],
Some("agent-alice"),
Some("impl"),
);
let right = record(
"ev-b",
"observation",
"strict-gate.mjs:42 already validates directly; no change needed",
&["file:scripts/strict-gate.mjs:42"],
Some("agent-bob"),
Some("verify"),
);
let contradictions = detect_auto_contradictions(&[left, right]);
assert_eq!(contradictions.len(), 1, "expected 1 contradiction");
assert_eq!(contradictions[0].severity, "medium");
assert_eq!(contradictions[0].conflicting_item_ids, vec!["ev-a", "ev-b"]);
}
#[test]
fn skips_when_no_shared_direct_source() {
let left = record(
"ev-a",
"code-change",
"summary one",
&["file:foo.rs:1"],
Some("a"),
None,
);
let right = record(
"ev-b",
"observation",
"summary two",
&["file:bar.rs:1"],
Some("b"),
None,
);
let contradictions = detect_auto_contradictions(&[left, right]);
assert!(contradictions.is_empty());
}
#[test]
fn skips_same_agent_same_lane() {
let left = record(
"ev-a",
"code-change",
"first take",
&["file:foo.rs:1"],
Some("alice"),
Some("impl"),
);
let right = record(
"ev-b",
"observation",
"different take entirely, unrelated words",
&["file:foo.rs:1"],
Some("alice"),
Some("impl"),
);
let contradictions = detect_auto_contradictions(&[left, right]);
assert!(
contradictions.is_empty(),
"same-agent same-lane pairs must not fire"
);
}
#[test]
fn skips_exact_duplicate_summaries() {
let left = record(
"ev-a",
"code-change",
"identical summary",
&["file:foo.rs:1"],
Some("a"),
None,
);
let right = record(
"ev-b",
"observation",
"identical summary",
&["file:foo.rs:1"],
Some("b"),
None,
);
let contradictions = detect_auto_contradictions(&[left, right]);
assert!(
contradictions.is_empty(),
"exact duplicates are dedupe candidates, not contradictions"
);
}
#[test]
fn skips_high_token_overlap_same_kind() {
let left = record(
"ev-a",
"observation",
"the file has twenty lines of code now",
&["file:foo.rs:1"],
Some("a"),
None,
);
let right = record(
"ev-b",
"observation",
"the file now has twenty lines of code",
&["file:foo.rs:1"],
Some("b"),
None,
);
let contradictions = detect_auto_contradictions(&[left, right]);
assert!(
contradictions.is_empty(),
"high-overlap same-kind summaries must not fire"
);
}
}