use anyhow::{Result, bail};
use crate::cli::EntryFilter;
use crate::index::IndexConfig;
use crate::knowledge;
use crate::store;
use crate::surreal_db::SurrealDatabase;
pub(crate) fn apply_entry_filters(
entries: Vec<knowledge::KnowledgeEntry>,
filter: &EntryFilter,
) -> Vec<knowledge::KnowledgeEntry> {
let mut entries: Vec<_> = entries
.into_iter()
.filter(|e| !filter.has_wake_phrase || e.has_any_wake_phrase())
.filter(|e| !filter.missing_wake_phrase || !e.has_any_wake_phrase())
.filter(|e| !filter.has_anchors || !e.anchors.is_empty())
.filter(|e| !filter.missing_anchors || e.anchors.is_empty())
.filter(|e| {
!filter.has_resonance_type || e.resonance_type.as_ref().is_some_and(|s| !s.is_empty())
})
.filter(|e| {
!filter.missing_resonance_type || e.resonance_type.as_ref().is_none_or(|s| s.is_empty())
})
.filter(|e| {
filter
.tags
.as_ref()
.is_none_or(|filter_tags| filter_tags.iter().any(|t| e.tags.contains(t)))
})
.collect();
if let Some(n) = filter.limit {
entries.truncate(n);
}
entries
}
pub(crate) fn normalize_id(id: &str) -> String {
if id.starts_with("kn-") {
id.to_string()
} else {
format!("kn-{}", id)
}
}
pub(crate) struct FactRouting {
pub(crate) category: &'static str,
pub(crate) tags: Vec<&'static str>,
}
pub(crate) fn find_open_thread_by_content(
db: &dyn store::KnowledgeStore,
content: &str,
agent_id: &str,
) -> Result<String> {
use crate::knowledge::KnowledgeEntry;
let ctx = store::AgentContext::for_agent(agent_id);
let filter = store::KnowledgeFilter {
categories: Some(vec!["thread".to_string()]),
..Default::default()
};
let threads = db.list_by_category("thread", &ctx, &filter)?;
let normalized_content = KnowledgeEntry::normalize_content(content);
for thread in threads {
let is_open = match thread.get_summary_state().as_deref() {
None => true, Some("open") => true,
_ => false,
};
if is_open && let Some(body) = &thread.body {
let normalized_body = KnowledgeEntry::normalize_content(body);
if normalized_body == normalized_content {
return Ok(thread.id);
}
}
}
bail!("No open thread found matching content: '{}'", content)
}
pub(crate) fn route_fact_type(fact_type: &str) -> Result<FactRouting> {
const VALID_FACT_TYPES: &[&str] = &[
"decision",
"insight",
"person",
"quote",
"thread_opened",
"commitment",
"thread_closed",
];
match fact_type {
"decision" => Ok(FactRouting {
category: "decision",
tags: vec![],
}),
"insight" => Ok(FactRouting {
category: "insight",
tags: vec![],
}),
"person" => Ok(FactRouting {
category: "reference",
tags: vec!["person"],
}),
"quote" => Ok(FactRouting {
category: "reference",
tags: vec!["quote"],
}),
"thread_opened" => Ok(FactRouting {
category: "thread",
tags: vec!["question"],
}),
"commitment" => Ok(FactRouting {
category: "thread",
tags: vec!["commitment"],
}),
"thread_closed" => Ok(FactRouting {
category: "thread",
tags: vec![],
}),
unknown => {
bail!(
"Invalid fact type '{}'. Valid types: {}",
unknown,
VALID_FACT_TYPES.join(", ")
)
}
}
}
pub(crate) fn resolve_agent_context(mine: bool, include_private: bool) -> store::AgentContext {
match std::env::var("MX_CURRENT_AGENT") {
Ok(agent) if !agent.is_empty() => {
if mine {
store::AgentContext::for_agent(agent)
} else if include_private {
store::AgentContext::for_agent(agent)
} else {
store::AgentContext::public_for_agent(agent)
}
}
_ => store::AgentContext::public_only(),
}
}
pub(crate) const NEAR_DUPLICATE_CEILING: f32 = 0.95;
pub(crate) const DEFAULT_ANCHOR_THRESHOLD: f32 = 0.75;
pub(crate) fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
if a.len() != b.len() {
return 0.0;
}
let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if magnitude_a == 0.0 || magnitude_b == 0.0 {
return 0.0;
}
dot_product / (magnitude_a * magnitude_b)
}
pub(crate) fn auto_embed(entry_id: &str, db: &dyn store::KnowledgeStore) -> Result<()> {
use crate::embeddings::{EmbeddingProvider, TractProvider};
let ctx = match std::env::var("MX_CURRENT_AGENT") {
Ok(agent) if !agent.is_empty() => store::AgentContext::for_agent(agent),
_ => store::AgentContext::public_only(),
};
let mut entry = match db.get(entry_id, &ctx)? {
Some(e) => e,
None => return Ok(()), };
let mut provider = TractProvider::new()?;
let embedding_text = entry.embedding_text();
let embedding = provider.embed(&embedding_text)?;
entry.embedding = Some(embedding);
entry.embedding_model = Some(provider.model_id().to_string());
entry.embedded_at = Some(chrono::Utc::now().to_rfc3339());
entry.updated_at = Some(chrono::Utc::now().to_rfc3339());
db.upsert_knowledge(&entry)?;
Ok(())
}
pub(crate) fn auto_anchor(
entry_id: &str,
db: &dyn store::KnowledgeStore,
explicitly_removed: Option<&[String]>,
) -> Result<()> {
let ctx = match std::env::var("MX_CURRENT_AGENT") {
Ok(agent) if !agent.is_empty() => store::AgentContext::for_agent(agent),
_ => store::AgentContext::public_only(),
};
let entry = match db.get(entry_id, &ctx)? {
Some(e) => e,
None => return Ok(()), };
if entry.embedding.is_none() {
return Ok(());
}
let entry_embedding = entry.embedding.as_ref().unwrap();
let all_candidates = db.list_all(&ctx)?;
let candidates: Vec<_> = all_candidates
.into_iter()
.filter(|e| e.embedding.is_some())
.collect();
let threshold = DEFAULT_ANCHOR_THRESHOLD;
let max_anchors = 5;
let mut similarities: Vec<(String, f32)> = Vec::new();
let mut stale_anchors: Vec<String> = Vec::new();
for candidate in &candidates {
if candidate.id == entry.id {
continue;
}
if entry.anchors.contains(&candidate.id) {
let candidate_embedding = candidate.embedding.as_ref().unwrap();
let similarity = cosine_similarity(entry_embedding, candidate_embedding);
if similarity < threshold || similarity > NEAR_DUPLICATE_CEILING {
stale_anchors.push(candidate.id.clone());
}
continue; }
if let Some(removed) = explicitly_removed
&& removed.contains(&candidate.id)
{
continue;
}
let can_anchor = if entry.visibility == "private" {
candidate.visibility == "public"
|| (candidate.visibility == "private" && candidate.owner == entry.owner)
} else {
candidate.visibility == "public"
};
if !can_anchor {
continue;
}
let candidate_embedding = candidate.embedding.as_ref().unwrap();
let similarity = cosine_similarity(entry_embedding, candidate_embedding);
if similarity >= threshold && similarity <= NEAR_DUPLICATE_CEILING {
similarities.push((candidate.id.clone(), similarity));
}
}
if stale_anchors.is_empty() && similarities.is_empty() {
return Ok(());
}
similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let top_matches: Vec<String> = similarities
.into_iter()
.take(max_anchors)
.map(|(id, _)| id)
.collect();
let mut updated_anchors: Vec<String> = entry
.anchors
.clone()
.into_iter()
.filter(|a| !stale_anchors.contains(a))
.collect();
if let Some(removed) = explicitly_removed {
updated_anchors.retain(|a| !removed.contains(a));
}
updated_anchors.extend(top_matches);
updated_anchors.sort();
updated_anchors.dedup();
let mut updated_entry = entry.clone();
updated_entry.anchors = updated_anchors;
updated_entry.updated_at = Some(chrono::Utc::now().to_rfc3339());
db.upsert_knowledge(&updated_entry)?;
Ok(())
}
pub(crate) fn open_surreal(config: &IndexConfig, verbose: bool) -> Result<SurrealDatabase> {
let surreal_path = config.db_path.with_extension("surreal");
SurrealDatabase::open_with_verbose(surreal_path, verbose)
}