use std::collections::HashSet;
use chrono::Utc;
use super::llm::LlmProvider;
use super::types::*;
fn jaccard_similarity(a: &HashSet<&str>, b: &HashSet<&str>) -> f64 {
let intersection = a.intersection(b).count();
let union = a.union(b).count();
if union == 0 {
return 0.0;
}
intersection as f64 / union as f64
}
fn source_overlap(a: &TopicPage, b: &TopicPage) -> f64 {
let set_a: HashSet<&str> = a.metadata.source_memory_ids.iter().map(|s| s.as_str()).collect();
let set_b: HashSet<&str> = b.metadata.source_memory_ids.iter().map(|s| s.as_str()).collect();
jaccard_similarity(&set_a, &set_b)
}
fn content_similarity(a: &TopicPage, b: &TopicPage) -> f64 {
let words_a: HashSet<&str> = a.content.split_whitespace().collect();
let words_b: HashSet<&str> = b.content.split_whitespace().collect();
jaccard_similarity(&words_a, &words_b)
}
fn severity_from_overlap(overlap: f64) -> ConflictSeverity {
if overlap >= 0.8 {
ConflictSeverity::Critical
} else if overlap >= 0.6 {
ConflictSeverity::High
} else if overlap >= 0.3 {
ConflictSeverity::Medium
} else {
ConflictSeverity::Low
}
}
pub struct ConflictDetector {
similarity_threshold: f64,
duplicate_threshold: f64,
}
impl ConflictDetector {
pub fn new() -> Self {
Self {
similarity_threshold: 0.1,
duplicate_threshold: 0.6,
}
}
pub fn detect_conflicts(
&self,
topics: &[TopicPage],
scope: &ConflictScope,
llm: Option<&dyn LlmProvider>,
) -> Result<Vec<ConflictRecord>, KcError> {
let pairs = self.select_pairs(topics, scope);
let mut results = Vec::new();
for (i, j) in pairs {
let topic_a = &topics[i];
let topic_b = &topics[j];
let overlap = source_overlap(topic_a, topic_b);
let content_sim = content_similarity(topic_a, topic_b);
if overlap <= self.similarity_threshold && content_sim <= self.similarity_threshold {
continue;
}
let conflict_type = if overlap > self.duplicate_threshold {
ConflictType::Redundant
} else if let Some(llm_provider) = llm {
match self.llm_contradiction_check(topic_a, topic_b, llm_provider) {
Ok(true) => ConflictType::Contradiction,
Ok(false) => continue, Err(_) => {
self.heuristic_classify(overlap)
}
}
} else {
self.heuristic_classify(overlap)
};
let severity = severity_from_overlap(overlap);
let conflict_id = ConflictId(format!("conflict-{}-{}", topic_a.id, topic_b.id));
let description = match &conflict_type {
ConflictType::Redundant => format!(
"Topics '{}' and '{}' are near-duplicates (source overlap: {:.0}%)",
topic_a.title, topic_b.title, overlap * 100.0
),
ConflictType::Contradiction => format!(
"Topics '{}' and '{}' may contain contradictory information (source overlap: {:.0}%)",
topic_a.title, topic_b.title, overlap * 100.0
),
ConflictType::Outdated => format!(
"Topics '{}' and '{}' may have temporal conflict (source overlap: {:.0}%)",
topic_a.title, topic_b.title, overlap * 100.0
),
};
let evidence = vec![
format!("Source memory overlap: {:.1}%", overlap * 100.0),
format!("Content similarity: {:.1}%", content_sim * 100.0),
];
let conflict = Conflict {
id: conflict_id,
conflict_type,
scope: ConflictScope::BetweenTopics(topic_a.id.clone(), topic_b.id.clone()),
description,
status: ConflictStatus::Detected,
detected_at: Utc::now(),
resolved_at: None,
};
results.push(ConflictRecord {
conflict,
severity,
evidence,
});
}
Ok(results)
}
pub fn detect_duplicates(&self, topics: &[TopicPage]) -> Vec<DuplicateGroup> {
let n = topics.len();
let mut parent: Vec<usize> = (0..n).collect();
fn find(parent: &mut [usize], x: usize) -> usize {
if parent[x] != x {
parent[x] = find(parent, parent[x]);
}
parent[x]
}
fn union(parent: &mut [usize], a: usize, b: usize) {
let ra = find(parent, a);
let rb = find(parent, b);
if ra != rb {
parent[rb] = ra;
}
}
let mut max_overlap: std::collections::HashMap<(usize, usize), f64> =
std::collections::HashMap::new();
for i in 0..n {
for j in (i + 1)..n {
let overlap = source_overlap(&topics[i], &topics[j]);
if overlap > self.duplicate_threshold {
union(&mut parent, i, j);
let key = (i.min(j), i.max(j));
let entry = max_overlap.entry(key).or_insert(0.0);
if overlap > *entry {
*entry = overlap;
}
}
}
}
let mut groups: std::collections::HashMap<usize, Vec<usize>> =
std::collections::HashMap::new();
for i in 0..n {
let root = find(&mut parent, i);
groups.entry(root).or_default().push(i);
}
groups
.into_values()
.filter(|members| members.len() > 1)
.map(|members| {
let canonical_idx = *members
.iter()
.max_by_key(|&&idx| topics[idx].metadata.source_memory_ids.len())
.unwrap();
let mut total_sim = 0.0;
let mut pair_count = 0;
for a in &members {
for b in &members {
if a < b {
let key = (*a.min(b), *a.max(b));
if let Some(&sim) = max_overlap.get(&key) {
total_sim += sim;
pair_count += 1;
}
}
}
}
let avg_sim = if pair_count > 0 {
total_sim / pair_count as f64
} else {
0.0
};
let duplicates: Vec<TopicId> = members
.iter()
.filter(|&&idx| idx != canonical_idx)
.map(|&idx| topics[idx].id.clone())
.collect();
DuplicateGroup {
canonical: topics[canonical_idx].id.clone(),
duplicates,
similarity: avg_sim,
}
})
.collect()
}
pub fn suggest_resolutions(&self, conflict: &ConflictRecord) -> Vec<String> {
match &conflict.conflict.conflict_type {
ConflictType::Contradiction => vec![
"Merge topics keeping newer content".to_string(),
"Review both and manually resolve".to_string(),
"Archive older topic".to_string(),
],
ConflictType::Redundant => vec![
"Merge into single topic".to_string(),
"Archive the smaller topic".to_string(),
],
ConflictType::Outdated => vec![
"Recompile from latest sources".to_string(),
"Archive outdated version".to_string(),
],
}
}
fn select_pairs(&self, topics: &[TopicPage], scope: &ConflictScope) -> Vec<(usize, usize)> {
let n = topics.len();
match scope {
ConflictScope::WithinTopic(target_id) => {
if let Some(target_idx) = topics.iter().position(|t| t.id == *target_id) {
(0..n)
.filter(|&i| i != target_idx)
.map(|i| {
if target_idx < i {
(target_idx, i)
} else {
(i, target_idx)
}
})
.collect()
} else {
Vec::new()
}
}
ConflictScope::BetweenTopics(id_a, id_b) => {
let pos_a = topics.iter().position(|t| t.id == *id_a);
let pos_b = topics.iter().position(|t| t.id == *id_b);
match (pos_a, pos_b) {
(Some(a), Some(b)) if a != b => vec![(a.min(b), a.max(b))],
_ => Vec::new(),
}
}
}
}
fn heuristic_classify(&self, overlap: f64) -> ConflictType {
if overlap > self.duplicate_threshold {
ConflictType::Redundant
} else if overlap > 0.3 {
ConflictType::Outdated
} else {
ConflictType::Outdated
}
}
fn llm_contradiction_check(
&self,
topic_a: &TopicPage,
topic_b: &TopicPage,
llm: &dyn LlmProvider,
) -> Result<bool, KcError> {
let prompt = format!(
"Do these two topic summaries contradict each other? \
Topic A: {}. Topic B: {}. \
Answer YES or NO, then briefly explain.",
topic_a.summary, topic_b.summary
);
let request = LlmRequest {
task: LlmTask::DetectConflict,
prompt,
max_tokens: Some(256),
temperature: Some(0.0),
};
let response = llm
.complete(&request)
.map_err(|e| KcError::ConflictDetection(format!("LLM error: {e}")))?;
let content = response.content.trim().to_uppercase();
Ok(content.starts_with("YES"))
}
}
impl Default for ConflictDetector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
fn make_topic(id: &str, title: &str, sources: &[&str]) -> TopicPage {
TopicPage {
id: TopicId(id.to_string()),
title: title.to_string(),
content: format!("Content about {title}"),
sections: Vec::new(),
summary: format!("Summary of {title}"),
metadata: TopicMetadata {
created_at: Utc::now(),
updated_at: Utc::now(),
compilation_count: 1,
source_memory_ids: sources.iter().map(|s| s.to_string()).collect(),
tags: vec![],
quality_score: Some(0.8),
},
status: TopicStatus::Active,
version: 1,
}
}
fn make_topic_with_content(
id: &str,
title: &str,
content: &str,
sources: &[&str],
) -> TopicPage {
TopicPage {
id: TopicId(id.to_string()),
title: title.to_string(),
content: content.to_string(),
sections: Vec::new(),
summary: format!("Summary of {title}"),
metadata: TopicMetadata {
created_at: Utc::now(),
updated_at: Utc::now(),
compilation_count: 1,
source_memory_ids: sources.iter().map(|s| s.to_string()).collect(),
tags: vec![],
quality_score: Some(0.8),
},
status: TopicStatus::Active,
version: 1,
}
}
#[test]
fn test_detect_conflict_overlapping() {
let topic_a = make_topic("a", "Topic A", &["m1", "m2", "m3", "m4", "m5", "m6", "m7"]);
let topic_b = make_topic("b", "Topic B", &["m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10"]);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let scope = ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert_eq!(conflicts.len(), 1);
assert_eq!(conflicts[0].conflict.conflict_type, ConflictType::Outdated);
}
#[test]
fn test_detect_no_conflict() {
let topic_a = make_topic_with_content(
"a",
"Rust Programming",
"systems language focusing on safety and performance",
&["m1", "m2", "m3"],
);
let topic_b = make_topic_with_content(
"b",
"French Cooking",
"culinary arts and gourmet recipes from Paris",
&["m10", "m11", "m12"],
);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let scope = ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert!(conflicts.is_empty());
}
#[test]
fn test_detect_duplicates_basic() {
let topic_a = make_topic("a", "Topic A", &["m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9"]);
let topic_b = make_topic("b", "Topic B", &["m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10"]);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let groups = detector.detect_duplicates(&topics);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].duplicates.len(), 1);
assert!(groups[0].similarity > 0.7);
}
#[test]
fn test_detect_duplicates_none() {
let topic_a = make_topic_with_content(
"a",
"Rust Programming",
"systems language focusing on safety and performance",
&["m1", "m2", "m3"],
);
let topic_b = make_topic_with_content(
"b",
"French Cooking",
"culinary arts and gourmet recipes from Paris",
&["m10", "m11", "m12"],
);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let groups = detector.detect_duplicates(&topics);
assert!(groups.is_empty());
}
#[test]
fn test_scope_within_topic() {
let topic_a = make_topic("a", "Topic A", &["m1", "m2", "m3", "m4", "m5"]);
let topic_b = make_topic("b", "Topic B", &["m3", "m4", "m5", "m6", "m7"]);
let topic_c = make_topic("c", "Topic C", &["m1", "m2", "m3", "m8", "m9"]);
let topics = vec![topic_a, topic_b, topic_c];
let detector = ConflictDetector::new();
let scope = ConflictScope::WithinTopic(TopicId("b".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
for c in &conflicts {
match &c.conflict.scope {
ConflictScope::BetweenTopics(a, b) => {
assert!(
a.0 == "b" || b.0 == "b",
"Expected conflict to involve topic 'b', got ({}, {})",
a, b
);
}
_ => panic!("Expected BetweenTopics scope"),
}
}
assert!(!conflicts.is_empty());
}
#[test]
fn test_scope_between_topics() {
let topic_a = make_topic("a", "Topic A", &["m1", "m2", "m3", "m4", "m5"]);
let topic_b = make_topic("b", "Topic B", &["m3", "m4", "m5", "m6", "m7"]);
let topic_c = make_topic("c", "Topic C", &["m1", "m2", "m3", "m8", "m9"]);
let topics = vec![topic_a, topic_b, topic_c];
let detector = ConflictDetector::new();
let scope = ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("c".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
for c in &conflicts {
match &c.conflict.scope {
ConflictScope::BetweenTopics(a, b) => {
let pair = (&a.0 as &str, &b.0 as &str);
assert!(
pair == ("a", "c") || pair == ("c", "a"),
"Expected conflict between 'a' and 'c', got ({}, {})",
a, b
);
}
_ => panic!("Expected BetweenTopics scope"),
}
}
}
#[test]
fn test_suggest_resolutions() {
let detector = ConflictDetector::new();
let contradiction_record = ConflictRecord {
conflict: Conflict {
id: ConflictId("c1".into()),
conflict_type: ConflictType::Contradiction,
scope: ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into())),
description: "test".into(),
status: ConflictStatus::Detected,
detected_at: Utc::now(),
resolved_at: None,
},
severity: ConflictSeverity::High,
evidence: vec![],
};
let suggestions = detector.suggest_resolutions(&contradiction_record);
assert_eq!(suggestions.len(), 3);
assert!(suggestions[0].contains("newer content"));
let redundant_record = ConflictRecord {
conflict: Conflict {
id: ConflictId("c2".into()),
conflict_type: ConflictType::Redundant,
scope: ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into())),
description: "test".into(),
status: ConflictStatus::Detected,
detected_at: Utc::now(),
resolved_at: None,
},
severity: ConflictSeverity::Medium,
evidence: vec![],
};
let suggestions = detector.suggest_resolutions(&redundant_record);
assert_eq!(suggestions.len(), 2);
assert!(suggestions[0].contains("Merge"));
let outdated_record = ConflictRecord {
conflict: Conflict {
id: ConflictId("c3".into()),
conflict_type: ConflictType::Outdated,
scope: ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into())),
description: "test".into(),
status: ConflictStatus::Detected,
detected_at: Utc::now(),
resolved_at: None,
},
severity: ConflictSeverity::Low,
evidence: vec![],
};
let suggestions = detector.suggest_resolutions(&outdated_record);
assert_eq!(suggestions.len(), 2);
assert!(suggestions[0].contains("Recompile"));
}
#[test]
fn test_conflict_severity() {
assert_eq!(severity_from_overlap(0.05), ConflictSeverity::Low);
assert_eq!(severity_from_overlap(0.15), ConflictSeverity::Low);
assert_eq!(severity_from_overlap(0.35), ConflictSeverity::Medium);
assert_eq!(severity_from_overlap(0.5), ConflictSeverity::Medium);
assert_eq!(severity_from_overlap(0.65), ConflictSeverity::High);
assert_eq!(severity_from_overlap(0.75), ConflictSeverity::High);
assert_eq!(severity_from_overlap(0.85), ConflictSeverity::Critical);
assert_eq!(severity_from_overlap(0.95), ConflictSeverity::Critical);
let topic_a = make_topic("a", "Topic A", &["m1", "m2", "m3", "m4", "m5"]);
let topic_b = make_topic("b", "Topic B", &["m3", "m4", "m5", "m6", "m7"]);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let scope = ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert_eq!(conflicts.len(), 1);
assert_eq!(conflicts[0].severity, ConflictSeverity::Medium);
}
#[test]
fn test_empty_topics() {
let detector = ConflictDetector::new();
let topics: Vec<TopicPage> = vec![];
let scope = ConflictScope::WithinTopic(TopicId("x".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert!(conflicts.is_empty());
let groups = detector.detect_duplicates(&topics);
assert!(groups.is_empty());
}
#[test]
fn test_single_topic() {
let detector = ConflictDetector::new();
let topics = vec![make_topic("a", "Topic A", &["m1", "m2"])];
let scope = ConflictScope::WithinTopic(TopicId("a".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert!(conflicts.is_empty());
}
#[test]
fn test_redundant_conflict_high_overlap() {
let topic_a = make_topic(
"a",
"Topic A",
&["m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10"],
);
let topic_b = make_topic(
"b",
"Topic B",
&["m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m11"],
);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let scope = ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert_eq!(conflicts.len(), 1);
assert_eq!(conflicts[0].conflict.conflict_type, ConflictType::Redundant);
}
#[test]
fn test_content_similarity_candidate() {
let topic_a = make_topic_with_content(
"a",
"Topic A",
"the quick brown fox jumps over the lazy dog every day",
&["m1", "m2"],
);
let topic_b = make_topic_with_content(
"b",
"Topic B",
"the quick brown fox jumps over the lazy dog every night",
&["m3", "m4"],
);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let scope = ConflictScope::BetweenTopics(TopicId("a".into()), TopicId("b".into()));
let conflicts = detector.detect_conflicts(&topics, &scope, None).unwrap();
assert!(!conflicts.is_empty());
}
#[test]
fn test_duplicate_canonical_has_most_sources() {
let topic_a = make_topic("a", "Smaller Topic", &["m1", "m2", "m3", "m4", "m5"]);
let topic_b = make_topic(
"b",
"Bigger Topic",
&["m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8"],
);
let topics = vec![topic_a, topic_b];
let detector = ConflictDetector::new();
let groups = detector.detect_duplicates(&topics);
assert_eq!(groups.len(), 1);
assert_eq!(groups[0].canonical.0, "b");
assert_eq!(groups[0].duplicates.len(), 1);
assert_eq!(groups[0].duplicates[0].0, "a");
}
#[test]
fn test_default_impl() {
let detector = ConflictDetector::default();
assert_eq!(detector.similarity_threshold, 0.1);
assert_eq!(detector.duplicate_threshold, 0.6);
}
}