use anyhow::{Context, Result};
use base_d::{DictionaryRegistry, HashAlgorithm, encode, hash};
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KnowledgeEntry {
pub id: String,
pub category_id: String,
pub title: String,
#[serde(default)]
pub body: Option<String>,
#[serde(default)]
pub summary: Option<String>,
#[serde(default)]
pub applicability: Vec<String>,
#[serde(default)]
pub source_project_id: Option<String>,
#[serde(default)]
pub source_agent_id: Option<String>,
#[serde(default)]
pub file_path: Option<String>,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default)]
pub created_at: Option<String>,
#[serde(default)]
pub updated_at: Option<String>,
#[serde(default)]
pub content_hash: Option<String>,
#[serde(default)]
pub source_type_id: Option<String>,
#[serde(default)]
pub entry_type_id: Option<String>,
#[serde(default)]
pub session_id: Option<String>,
#[serde(default)]
pub ephemeral: bool,
#[serde(default)]
pub content_type_id: Option<String>,
#[serde(default)]
pub owner: Option<String>,
#[serde(default = "default_visibility")]
pub visibility: String,
#[serde(default)]
pub resonance: i32,
#[serde(default)]
pub resonance_type: Option<String>,
#[serde(default)]
pub last_activated: Option<String>,
#[serde(default)]
pub activation_count: i32,
#[serde(default = "default_decay_rate")]
pub decay_rate: f64,
#[serde(default)]
pub anchors: Vec<String>,
#[serde(default)]
pub wake_phrases: Vec<String>,
#[serde(default)]
pub wake_order: Option<i32>,
#[serde(default)]
pub wake_phrase: Option<String>,
#[serde(default)]
pub embedding: Option<Vec<f32>>, #[serde(default)]
pub embedding_model: Option<String>, #[serde(default)]
pub embedded_at: Option<String>,
#[serde(default = "default_format")]
pub format: String,
#[serde(default)]
pub effective_resonance: Option<f64>,
}
fn default_format() -> String {
"markdown".to_string()
}
fn default_visibility() -> String {
"public".to_string()
}
fn default_decay_rate() -> f64 {
0.0
}
fn deserialize_applicability<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::Error;
use serde_yaml::Value;
let value = Value::deserialize(deserializer)?;
match value {
Value::String(s) => Ok(vec![s]),
Value::Sequence(seq) => seq
.into_iter()
.map(|v| match v {
Value::String(s) => Ok(s),
_ => Err(D::Error::custom("Expected string in applicability array")),
})
.collect(),
_ => Ok(vec![]),
}
}
#[derive(Debug, Default, Deserialize)]
pub struct Frontmatter {
#[serde(default)]
pub id: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub category: Option<String>,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default, deserialize_with = "deserialize_applicability")]
pub applicability: Vec<String>,
#[serde(default)]
pub source_project: Option<String>,
#[serde(default)]
pub source_agent: Option<String>,
#[serde(default)]
pub created: Option<String>,
}
impl KnowledgeEntry {
pub fn active_wake_phrases(&self) -> Vec<&str> {
if !self.wake_phrases.is_empty() {
self.wake_phrases.iter().map(|s| s.as_str()).collect()
} else {
self.wake_phrase.as_deref().into_iter().collect()
}
}
pub fn has_any_wake_phrase(&self) -> bool {
!self.wake_phrases.is_empty() || self.wake_phrase.as_ref().is_some_and(|s| !s.is_empty())
}
pub fn embedding_text(&self) -> String {
let mut parts = vec![self.title.clone()];
if let Some(summary) = &self.summary {
parts.push(summary.clone());
} else if let Some(body) = &self.body {
parts.push(body.chars().take(2000).collect());
}
if !self.tags.is_empty() {
parts.push(format!("Tags: {}", self.tags.join(", ")));
}
parts.join("\n\n")
}
pub fn normalize_content(content: &str) -> String {
content
.trim()
.to_lowercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
pub fn get_summary_state(&self) -> Option<String> {
self.summary
.as_ref()
.and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok())
.and_then(|v| v.get("state").and_then(|s| s.as_str()).map(String::from))
}
pub fn generate_id(path: &str, title: &str) -> String {
let input = format!("{}:{}", path, title);
let hex = Self::blake3_hex(input.as_bytes());
format!("kn-{}", &hex[..8])
}
pub fn compute_hash(content: &str) -> String {
Self::blake3_hex(content.as_bytes())
}
fn blake3_hex(data: &[u8]) -> String {
let hash_bytes = hash(data, HashAlgorithm::Blake3);
let registry = DictionaryRegistry::load_default().expect("base-d dictionaries");
let dict = registry.dictionary("base16").expect("base16 dictionary");
encode(&hash_bytes, &dict).to_lowercase()
}
pub fn from_markdown(path: &Path, memory_root: &Path) -> Result<Self> {
let content =
fs::read_to_string(path).with_context(|| format!("Failed to read {:?}", path))?;
let (frontmatter, body) = parse_frontmatter(&content)?;
let relative = path.strip_prefix(memory_root).unwrap_or(path);
let category_id = frontmatter.category.clone().unwrap_or_else(|| {
relative
.components()
.next()
.and_then(|c| c.as_os_str().to_str())
.unwrap_or("unknown")
.to_string()
});
let title = frontmatter.title.clone().unwrap_or_else(|| {
extract_first_heading(&body).unwrap_or_else(|| {
path.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("Untitled")
.to_string()
})
});
let summary = extract_summary(&body);
let path_str = relative.to_string_lossy().to_string();
let id = frontmatter
.id
.unwrap_or_else(|| Self::generate_id(&path_str, &title));
let now = chrono::Utc::now().to_rfc3339();
Ok(Self {
id,
category_id,
title,
body: Some(body),
summary,
applicability: frontmatter.applicability,
source_project_id: frontmatter.source_project,
source_agent_id: frontmatter.source_agent,
file_path: Some(path_str),
tags: frontmatter.tags,
created_at: frontmatter.created.or_else(|| Some(now.clone())),
updated_at: Some(now),
content_hash: Some(Self::compute_hash(&content)),
source_type_id: Some("manual".to_string()),
entry_type_id: Some("primary".to_string()),
session_id: None,
ephemeral: false,
content_type_id: Some("text".to_string()),
owner: None,
visibility: "public".to_string(),
resonance: 0,
resonance_type: None,
last_activated: None,
activation_count: 0,
decay_rate: 0.0,
anchors: vec![],
wake_phrases: vec![],
wake_order: None,
wake_phrase: None,
embedding: None,
embedding_model: None,
embedded_at: None,
format: "markdown".to_string(),
effective_resonance: None,
})
}
}
fn parse_frontmatter(content: &str) -> Result<(Frontmatter, String)> {
let content = content.trim_start();
if !content.starts_with("---") {
return Ok((Frontmatter::default(), content.to_string()));
}
let rest = &content[3..];
let end = rest.find("\n---").or_else(|| rest.find("\r\n---"));
match end {
Some(pos) => {
let yaml = &rest[..pos];
let body = rest[pos + 4..].trim_start_matches(['\n', '\r']).to_string();
let frontmatter: Frontmatter = serde_yaml::from_str(yaml).unwrap_or_default();
Ok((frontmatter, body))
}
None => Ok((Frontmatter::default(), content.to_string())),
}
}
fn extract_first_heading(content: &str) -> Option<String> {
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with('#') {
return Some(trimmed.trim_start_matches('#').trim().to_string());
}
}
None
}
fn extract_summary(content: &str) -> Option<String> {
let mut in_paragraph = false;
let mut paragraph = String::new();
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with('#') {
continue;
}
if trimmed.is_empty() {
if in_paragraph && !paragraph.is_empty() {
return Some(paragraph.trim().to_string());
}
in_paragraph = false;
paragraph.clear();
continue;
}
if trimmed.starts_with("```")
|| trimmed.starts_with('-')
|| trimmed.starts_with('*')
|| trimmed.starts_with('>')
|| trimmed.starts_with('|')
{
continue;
}
in_paragraph = true;
if !paragraph.is_empty() {
paragraph.push(' ');
}
paragraph.push_str(trimmed);
}
if !paragraph.is_empty() {
Some(paragraph.trim().to_string())
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_frontmatter() {
let content = r#"---
id: test-123
title: Test Entry
tags: [rust, testing]
applicability: [cross-platform, rust]
---
# Content Here
This is the body."#;
let (fm, body) = parse_frontmatter(content).unwrap();
assert_eq!(fm.id, Some("test-123".to_string()));
assert_eq!(fm.title, Some("Test Entry".to_string()));
assert_eq!(fm.tags, vec!["rust", "testing"]);
assert_eq!(fm.applicability, vec!["cross-platform", "rust"]);
assert!(body.contains("# Content Here"));
}
#[test]
fn test_parse_frontmatter_applicability_string() {
let content = r#"---
title: Test
applicability: rust
---
Body"#;
let (fm, _) = parse_frontmatter(content).unwrap();
assert_eq!(fm.applicability, vec!["rust"]);
}
#[test]
fn test_parse_frontmatter_applicability_array() {
let content = r#"---
title: Test
applicability:
- rust
- async
- cli
---
Body"#;
let (fm, _) = parse_frontmatter(content).unwrap();
assert_eq!(fm.applicability, vec!["rust", "async", "cli"]);
}
#[test]
fn test_no_frontmatter() {
let content = "# Just Content\n\nNo frontmatter here.";
let (fm, body) = parse_frontmatter(content).unwrap();
assert!(fm.id.is_none());
assert!(body.contains("# Just Content"));
}
#[test]
fn test_extract_heading() {
assert_eq!(
extract_first_heading("# Hello World"),
Some("Hello World".to_string())
);
assert_eq!(
extract_first_heading("## Subheading"),
Some("Subheading".to_string())
);
}
#[test]
fn test_generate_id() {
let id = KnowledgeEntry::generate_id("pattern/test.md", "Test Pattern");
assert!(id.starts_with("kn-"));
assert_eq!(id.len(), 11); }
#[test]
fn test_normalize_content() {
assert_eq!(
KnowledgeEntry::normalize_content(" hello world "),
"hello world"
);
assert_eq!(
KnowledgeEntry::normalize_content("Hello World"),
"hello world"
);
assert_eq!(
KnowledgeEntry::normalize_content("hello\n world\n test"),
"hello world test"
);
assert_eq!(
KnowledgeEntry::normalize_content("hello\tworld"),
"hello world"
);
}
#[test]
fn test_embedding_text() {
let entry = KnowledgeEntry {
id: "kn-test".to_string(),
title: "Test Entry".to_string(),
body: Some("This is the body content.".to_string()),
summary: None,
tags: vec!["rust".to_string(), "test".to_string()],
category_id: "technique".to_string(),
applicability: vec![],
source_project_id: None,
source_agent_id: None,
file_path: None,
created_at: None,
updated_at: None,
content_hash: None,
source_type_id: None,
entry_type_id: None,
session_id: None,
ephemeral: false,
content_type_id: None,
owner: None,
visibility: "public".to_string(),
resonance: 0,
resonance_type: None,
last_activated: None,
activation_count: 0,
decay_rate: 0.0,
anchors: vec![],
wake_phrases: vec![],
wake_order: None,
wake_phrase: None,
embedding: None,
embedding_model: None,
embedded_at: None,
format: "markdown".to_string(),
effective_resonance: None,
};
let text = entry.embedding_text();
assert!(text.contains("Test Entry"));
assert!(text.contains("This is the body content."));
assert!(text.contains("Tags: rust, test"));
}
#[test]
fn test_embedding_text_with_summary() {
let entry = KnowledgeEntry {
id: "kn-test".to_string(),
title: "Test Entry".to_string(),
body: Some("Long body that should be ignored when summary exists.".to_string()),
summary: Some("Short summary".to_string()),
tags: vec![],
category_id: "technique".to_string(),
applicability: vec![],
source_project_id: None,
source_agent_id: None,
file_path: None,
created_at: None,
updated_at: None,
content_hash: None,
source_type_id: None,
entry_type_id: None,
session_id: None,
ephemeral: false,
content_type_id: None,
owner: None,
visibility: "public".to_string(),
resonance: 0,
resonance_type: None,
last_activated: None,
activation_count: 0,
decay_rate: 0.0,
anchors: vec![],
wake_phrases: vec![],
wake_order: None,
wake_phrase: None,
embedding: None,
embedding_model: None,
embedded_at: None,
format: "markdown".to_string(),
effective_resonance: None,
};
let text = entry.embedding_text();
assert!(text.contains("Test Entry"));
assert!(text.contains("Short summary"));
assert!(!text.contains("Long body"));
}
}