#![allow(dead_code)]
use crate::traits::Fact;
use std::collections::HashSet;
const RELATIONSHIP_ROOTS: &[&str] = &[
"mother",
"father",
"mom",
"dad",
"parent",
"partner",
"spouse",
"wife",
"husband",
"child",
"children",
"son",
"daughter",
"kid",
"sibling",
"brother",
"sister",
"grandmother",
"grandfather",
];
#[derive(Debug, Clone, Copy)]
pub struct NeighborhoodCaps {
pub max_entities: usize,
pub max_facts: usize,
}
impl Default for NeighborhoodCaps {
fn default() -> Self {
Self {
max_entities: 6,
max_facts: 16,
}
}
}
pub fn fact_namespace(key: &str) -> Option<&str> {
key.split_once(':')
.map(|(ns, _)| ns)
.filter(|ns| !ns.is_empty())
}
pub fn is_relationship_key(key: &str) -> bool {
if fact_namespace(key).is_some() {
return false; }
let lower = key.to_ascii_lowercase();
RELATIONSHIP_ROOTS.iter().any(|root| {
lower == *root
|| lower.starts_with(&format!("{root}_")) || lower.ends_with(&format!("_{root}")) })
}
fn word_tokens(s: &str) -> Vec<String> {
s.split(|c: char| !c.is_alphanumeric())
.filter(|t| !t.is_empty())
.map(|t| t.to_ascii_lowercase())
.collect()
}
pub(crate) fn entity_mentioned_as_words(haystack: &str, entity: &str) -> bool {
let haystack_tokens: HashSet<String> = word_tokens(haystack).into_iter().collect();
let entity_words: Vec<String> = word_tokens(entity);
let significant: Vec<&String> = entity_words.iter().filter(|w| w.len() >= 2).collect();
if significant.is_empty() {
return false;
}
significant.iter().all(|w| haystack_tokens.contains(*w))
}
fn salience(f: &Fact) -> i64 {
(f.recall_count as i64) * 1_000_000 + f.updated_at.timestamp()
}
pub fn select_neighborhood_facts(
all_facts: &[Fact],
resolved_names: &[String],
owner_relationship: bool,
initial_ids: &HashSet<i64>,
caps: NeighborhoodCaps,
) -> Vec<Fact> {
if resolved_names.is_empty() {
return Vec::new();
}
let names: Vec<String> = resolved_names
.iter()
.take(caps.max_entities)
.cloned()
.collect();
let target_ns: HashSet<String> = names.iter().map(|n| n.to_ascii_lowercase()).collect();
let mut picked: Vec<Fact> = all_facts
.iter()
.filter(|f| !initial_ids.contains(&f.id))
.filter(|f| {
let ns_hit = fact_namespace(&f.key)
.map(|ns| target_ns.contains(&ns.to_ascii_lowercase()))
.unwrap_or(false);
let mention_hit = names.iter().any(|n| {
entity_mentioned_as_words(&f.key, n) || entity_mentioned_as_words(&f.value, n)
});
let rel_hit = owner_relationship && is_relationship_key(&f.key);
ns_hit || mention_hit || rel_hit
})
.cloned()
.collect();
let mut seen: HashSet<i64> = HashSet::new();
picked.retain(|f| seen.insert(f.id));
picked.sort_by_key(|f| std::cmp::Reverse(salience(f)));
picked.truncate(caps.max_facts);
picked
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traits::Fact;
use std::collections::HashSet;
fn fact(id: i64, category: &str, key: &str, value: &str) -> Fact {
Fact {
id,
category: category.into(),
key: key.into(),
value: value.into(),
source: "test".into(),
created_at: chrono::Utc::now(),
updated_at: chrono::Utc::now(),
superseded_at: None,
recall_count: 0,
last_recalled_at: None,
channel_id: None,
privacy: crate::types::FactPrivacy::Global,
first_seen_at: None,
source_excerpt: None,
}
}
#[test]
fn namespace_is_prefix_before_colon() {
assert_eq!(
fact_namespace("LearnEnglishSounds:path"),
Some("LearnEnglishSounds")
);
assert_eq!(fact_namespace("partner_name"), None);
}
#[test]
fn relationship_keys_detected() {
assert!(is_relationship_key("mother_name"));
assert!(is_relationship_key("father"));
assert!(is_relationship_key("partner_name"));
assert!(!is_relationship_key("LearnEnglishSounds:path"));
}
#[test]
fn owner_relationship_query_pulls_the_whole_family_cluster() {
let all = vec![
fact(1, "user", "mother_name", "Carol Mendez"), fact(2, "user", "father", "Frank Mendez"),
fact(3, "user", "partner_name", "Alice Rivera"),
fact(4, "project", "LearnEnglishSounds:path", "~/projects/LES"),
];
let initial: HashSet<i64> = [1].into_iter().collect();
let out = select_neighborhood_facts(
&all,
&["Carol".into()],
true, &initial,
NeighborhoodCaps::default(),
);
let ids: HashSet<i64> = out.iter().map(|f| f.id).collect();
assert!(
ids.contains(&2),
"father=Frank must be pulled into the cluster"
);
assert!(
ids.contains(&3),
"partner is part of the owner relationship set"
);
assert!(!ids.contains(&1), "initial match is deduped out");
assert!(!ids.contains(&4), "unrelated project fact is not pulled");
}
#[test]
fn namespace_query_pulls_the_concept_cluster() {
let all = vec![
fact(10, "project", "LearnEnglishSounds:path", "~/p/LES"),
fact(11, "technical", "LearnEnglishSounds:tech_stack", "Next.js"),
fact(12, "user", "partner_name", "Alice"),
];
let initial: HashSet<i64> = [10].into_iter().collect();
let out = select_neighborhood_facts(
&all,
&["LearnEnglishSounds".into()],
false,
&initial,
NeighborhoodCaps::default(),
);
let ids: HashSet<i64> = out.iter().map(|f| f.id).collect();
assert!(ids.contains(&11), "same-namespace fact pulled");
assert!(!ids.contains(&12), "unrelated fact not pulled");
}
#[test]
fn empty_resolved_names_returns_empty() {
let all = vec![fact(1, "user", "partner_name", "Alice")];
let empty: HashSet<i64> = HashSet::new();
let out = select_neighborhood_facts(&all, &[], false, &empty, NeighborhoodCaps::default());
assert!(out.is_empty(), "no resolved entities -> no expansion");
}
#[test]
fn cap_is_enforced() {
let all: Vec<Fact> = (1..=20)
.map(|i| fact(i, "user", "partner_name", &format!("Person {i}")))
.collect();
let empty: HashSet<i64> = HashSet::new();
let caps = NeighborhoodCaps::default(); let out = select_neighborhood_facts(&all, &["someone".into()], true, &empty, caps);
assert_eq!(
out.len(),
caps.max_facts,
"output must be capped at max_facts even when more facts qualify"
);
}
#[test]
fn co_mention_is_word_boundary_not_substring() {
let banana_fact = fact(1, "food", "preference", "I like banana bread");
let frank_fact = fact(2, "user", "father", "Frank Mendez");
let empty: HashSet<i64> = HashSet::new();
let out_ana = select_neighborhood_facts(
&[banana_fact.clone()],
&["Ana".into()],
false,
&empty,
NeighborhoodCaps::default(),
);
assert!(
out_ana.is_empty(),
"'Ana' must not match 'banana' via substring — word boundary required"
);
let out_frank = select_neighborhood_facts(
&[frank_fact.clone()],
&["Frank".into()],
false,
&empty,
NeighborhoodCaps::default(),
);
assert!(
out_frank.iter().any(|f| f.id == 2),
"'Frank' must match 'Frank Mendez' as a whole word"
);
}
}