use crate::domain::{RouteInput, Section};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::path::PathBuf;
use ts_rs::TS;
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
#[serde(rename_all = "snake_case")]
pub enum ConfidenceTier {
High,
#[default]
Medium,
Low,
}
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
#[serde(rename_all = "snake_case")]
pub enum ScoreSource {
NamedMatch,
MemoryType,
Frontmatter,
ScenePreferred,
DefaultTag,
Sensitivity,
TaskToken,
Confidence,
Staleness,
}
#[derive(Debug, Clone, Serialize, PartialEq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct ScoreContribution {
pub source: ScoreSource,
pub field: String,
pub term: String,
pub weight: i32,
}
#[derive(Debug, Clone, Serialize)]
pub struct Note {
pub path: PathBuf,
pub relative_path: String,
pub title: String,
pub frontmatter: BTreeMap<String, serde_json::Value>,
pub sections: Vec<Section>,
pub wikilinks: Vec<String>,
pub raw_content: String,
#[serde(skip_serializing)]
pub search_index: NoteSearchIndex,
}
#[derive(Debug, Clone, Default)]
pub struct NoteSearchIndex {
normalized_path: String,
normalized_title: String,
normalized_body: String,
normalized_headings: Vec<String>,
normalized_wikilinks: Vec<String>,
path_tokens: BTreeSet<String>,
title_tokens: BTreeSet<String>,
body_tokens: BTreeSet<String>,
heading_tokens: BTreeSet<String>,
wikilink_tokens: BTreeSet<String>,
}
#[derive(Debug, Clone, Serialize, PartialEq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct CandidateNote {
pub relative_path: String,
pub title: String,
pub score: i32,
pub reasons: Vec<String>,
#[serde(default)]
pub score_breakdown: Vec<ScoreContribution>,
pub confidence: ConfidenceTier,
pub excerpt: String,
#[serde(skip_serializing)]
#[ts(skip)]
pub memory_type: Option<String>,
#[serde(skip_serializing)]
#[ts(skip)]
pub sensitivity: Option<String>,
#[serde(skip_serializing)]
#[ts(skip)]
pub source_of_truth: bool,
}
#[derive(Debug, Clone)]
pub struct ScoredNote {
pub note: Note,
pub score: i32,
pub reasons: Vec<String>,
pub score_breakdown: Vec<ScoreContribution>,
pub confidence: ConfidenceTier,
pub excerpt: String,
}
impl ScoredNote {
pub fn to_candidate(&self) -> CandidateNote {
CandidateNote {
relative_path: self.note.relative_path.clone(),
title: self.note.title.clone(),
score: self.score,
reasons: self.reasons.clone(),
score_breakdown: self.score_breakdown.clone(),
confidence: self.confidence,
excerpt: self.excerpt.clone(),
memory_type: self.note.memory_type().map(ToString::to_string),
sensitivity: self.note.sensitivity().map(ToString::to_string),
source_of_truth: self.note.source_of_truth(),
}
}
}
impl CandidateNote {
pub fn from_scored(scored: &ScoredNote) -> Self {
scored.to_candidate()
}
}
impl From<&ScoredNote> for CandidateNote {
fn from(value: &ScoredNote) -> Self {
value.to_candidate()
}
}
impl From<ScoredNote> for CandidateNote {
fn from(value: ScoredNote) -> Self {
value.to_candidate()
}
}
impl Note {
pub fn to_scored(&self, score: i32, reasons: Vec<String>) -> ScoredNote {
ScoredNote {
note: self.clone(),
score,
excerpt: self.excerpt(220),
reasons,
score_breakdown: Vec::new(),
confidence: ConfidenceTier::Medium,
}
}
}
impl Note {
pub fn new(
path: PathBuf,
relative_path: String,
title: String,
frontmatter: BTreeMap<String, serde_json::Value>,
sections: Vec<Section>,
wikilinks: Vec<String>,
raw_content: String,
) -> Self {
let search_index =
NoteSearchIndex::build(&relative_path, &title, §ions, &wikilinks, &raw_content);
Self {
path,
relative_path,
title,
frontmatter,
sections,
wikilinks,
raw_content,
search_index,
}
}
pub fn frontmatter_str(&self, key: &str) -> Option<&str> {
self.frontmatter.get(key).and_then(|value| value.as_str())
}
pub fn frontmatter_bool(&self, key: &str) -> bool {
self.frontmatter
.get(key)
.and_then(|value| value.as_bool())
.unwrap_or(false)
}
pub fn memory_type(&self) -> Option<&str> {
self.frontmatter_str("memory_type")
}
pub fn sensitivity(&self) -> Option<&str> {
self.frontmatter_str("sensitivity")
}
pub fn source_of_truth(&self) -> bool {
self.frontmatter_bool("source_of_truth")
}
pub fn excerpt(&self, max_chars: usize) -> String {
self.sections
.iter()
.map(|section| section.content.trim())
.find(|content| !content.is_empty())
.unwrap_or(self.raw_content.trim())
.chars()
.take(max_chars)
.collect()
}
pub fn excerpt_for_input(&self, input: &RouteInput, max_chars: usize) -> String {
let terms: Vec<String> = tokenize(&input.task)
.into_iter()
.chain(
input
.files
.iter()
.flat_map(|file| tokenize(file).into_iter())
.filter(|segment| segment.chars().count() >= 3),
)
.collect();
let mut best_score = 0;
let mut best_excerpt: Option<String> = None;
for section in &self.sections {
let score = score_section_for_terms(self, section, &terms);
if score > best_score {
let candidate = build_section_excerpt_for_terms(section, &terms, max_chars);
if !candidate.is_empty() {
best_score = score;
best_excerpt = Some(candidate);
}
}
}
best_excerpt.unwrap_or_else(|| self.excerpt(max_chars))
}
}
fn score_section_for_terms(note: &Note, section: &Section, terms: &[String]) -> i32 {
let heading = section.heading.as_deref().unwrap_or_default();
let body = section.content.as_str();
terms.iter().fold(0, |score, term| {
let mut next = score;
if note.search_index.matches_title(term) {
next += 5;
}
if !heading.is_empty() && tokenize(heading).contains(term) {
next += 8;
}
if note.search_index.matches_wikilink(term) {
next += 6;
}
if tokenize(body).contains(term) {
next += 4;
}
next
})
}
fn build_section_excerpt(section: &Section, max_chars: usize) -> String {
let heading = section.heading.as_deref().unwrap_or_default().trim();
let body = section.content.trim();
let combined = if heading.is_empty() {
body.to_string()
} else if body.is_empty() {
heading.to_string()
} else {
format!("{heading}: {body}")
};
combined.chars().take(max_chars).collect()
}
fn build_section_excerpt_for_terms(
section: &Section,
terms: &[String],
max_chars: usize,
) -> String {
if terms.is_empty() {
return build_section_excerpt(section, max_chars);
}
let heading = section.heading.as_deref().unwrap_or_default().trim();
let body = section.content.trim();
let prefix_len = if heading.is_empty() {
0
} else {
heading.chars().count() + 2
};
if max_chars <= prefix_len {
return build_section_excerpt(section, max_chars);
}
let body_budget = max_chars - prefix_len;
let body_window = match locate_first_term(body, terms) {
Some(byte_pos) => window_around_byte(body, byte_pos, body_budget),
None => body.chars().take(body_budget).collect(),
};
if heading.is_empty() {
body_window
} else if body_window.is_empty() {
heading.to_string()
} else {
format!("{heading}: {body_window}")
}
}
fn locate_first_term(body: &str, terms: &[String]) -> Option<usize> {
let body_lower = body.to_lowercase();
let mut best: Option<usize> = None;
for term in terms {
if term.is_empty() {
continue;
}
let needle = term.to_lowercase();
if let Some(pos) = body_lower.find(&needle) {
best = Some(match best {
None => pos,
Some(prev) => prev.min(pos),
});
}
}
best
}
fn window_around_byte(body: &str, byte_pos: usize, max_chars: usize) -> String {
if max_chars == 0 || body.is_empty() {
return String::new();
}
let total_chars = body.chars().count();
if total_chars <= max_chars {
return body.to_string();
}
let safe_pos = byte_pos.min(body.len());
let char_pos = body[..safe_pos].chars().count();
let padding_before = (max_chars as f64 * 0.3) as usize;
let start_char = char_pos.saturating_sub(padding_before);
let end_char = (start_char + max_chars).min(total_chars);
let mut out: String = body
.chars()
.skip(start_char)
.take(end_char - start_char)
.collect();
if end_char < total_chars {
out.push('…');
}
if start_char > 0 {
out.insert(0, '…');
}
out
}
impl NoteSearchIndex {
pub fn build(
relative_path: &str,
title: &str,
sections: &[Section],
wikilinks: &[String],
raw_content: &str,
) -> Self {
let normalized_path = normalize_text(relative_path);
let normalized_title = normalize_text(title);
let normalized_body = normalize_text(raw_content);
let normalized_headings = sections
.iter()
.filter_map(|section| section.heading.as_ref())
.map(|heading| normalize_text(heading))
.filter(|heading| !heading.is_empty())
.collect::<Vec<_>>();
let normalized_wikilinks = wikilinks
.iter()
.map(|link| normalize_text(link))
.filter(|link| !link.is_empty())
.collect::<Vec<_>>();
Self {
normalized_path,
normalized_title,
normalized_body,
normalized_headings,
normalized_wikilinks,
path_tokens: tokenize(relative_path),
title_tokens: tokenize(title),
body_tokens: tokenize(raw_content),
heading_tokens: sections
.iter()
.filter_map(|section| section.heading.as_ref())
.flat_map(|heading| tokenize(heading))
.collect(),
wikilink_tokens: wikilinks.iter().flat_map(|link| tokenize(link)).collect(),
}
}
pub fn matches_path(&self, term: &str) -> bool {
normalized_contains(&self.normalized_path, &self.path_tokens, term)
}
pub fn matches_title(&self, term: &str) -> bool {
normalized_contains(&self.normalized_title, &self.title_tokens, term)
}
pub fn matches_body(&self, term: &str) -> bool {
normalized_contains(&self.normalized_body, &self.body_tokens, term)
}
pub fn matches_heading(&self, term: &str) -> bool {
self.normalized_headings
.iter()
.any(|heading| normalized_contains(heading, &self.heading_tokens, term))
}
pub fn matches_wikilink(&self, term: &str) -> bool {
self.normalized_wikilinks
.iter()
.any(|link| normalized_contains(link, &self.wikilink_tokens, term))
}
}
pub(crate) fn normalize_text(input: &str) -> String {
let mut normalized = String::new();
let mut prev_was_alnum = false;
let mut prev_was_lower_or_digit = false;
for ch in input.chars() {
if ch.is_alphanumeric() {
if ch.is_uppercase() && prev_was_lower_or_digit && !normalized.ends_with(' ') {
normalized.push(' ');
}
for lowered in ch.to_lowercase() {
normalized.push(lowered);
}
prev_was_alnum = true;
prev_was_lower_or_digit = ch.is_lowercase() || ch.is_numeric();
} else {
if prev_was_alnum && !normalized.ends_with(' ') {
normalized.push(' ');
}
prev_was_alnum = false;
prev_was_lower_or_digit = false;
}
}
normalized.trim().to_string()
}
pub(crate) fn tokenize(input: &str) -> BTreeSet<String> {
normalize_text(input)
.split_whitespace()
.filter(|token| token.chars().count() >= 2)
.map(ToString::to_string)
.collect()
}
fn normalized_contains(haystack: &str, tokens: &BTreeSet<String>, term: &str) -> bool {
let normalized_term = normalize_text(term);
if normalized_term.is_empty() {
return false;
}
if normalized_term.contains(' ') {
let bounded_haystack = format!(" {haystack} ");
let bounded_term = format!(" {normalized_term} ");
bounded_haystack.contains(&bounded_term)
} else {
tokens.contains(&normalized_term)
}
}
#[cfg(test)]
mod tests {
use super::Note;
use crate::domain::{RouteInput, Section};
use serde_json::json;
use std::collections::BTreeMap;
use std::path::PathBuf;
#[test]
fn note_should_expose_structured_frontmatter_fields() {
let note = Note::new(
PathBuf::from("/tmp/vault/note.md"),
"10-Projects/note.md".to_string(),
"Note".to_string(),
BTreeMap::from([
("memory_type".to_string(), json!("constraint")),
("sensitivity".to_string(), json!("internal")),
("source_of_truth".to_string(), json!(true)),
]),
vec![Section {
heading: Some("Heading".to_string()),
level: 1,
content: "Body".to_string(),
}],
Vec::new(),
"Body".to_string(),
);
assert_eq!(note.memory_type(), Some("constraint"));
assert_eq!(note.sensitivity(), Some("internal"));
assert!(note.source_of_truth());
}
#[test]
fn excerpt_should_prefer_first_non_empty_section() {
let note = Note::new(
PathBuf::from("/tmp/vault/note.md"),
"10-Projects/note.md".to_string(),
"Note".to_string(),
BTreeMap::new(),
vec![
Section {
heading: Some("Empty".to_string()),
level: 1,
content: " ".to_string(),
},
Section {
heading: Some("Real".to_string()),
level: 1,
content: "Useful excerpt lives here".to_string(),
},
],
Vec::new(),
"Fallback body".to_string(),
);
assert_eq!(note.excerpt(12), "Useful excer");
}
#[test]
fn excerpt_for_input_should_prefer_best_matching_section() {
let note = Note::new(
PathBuf::from("/tmp/vault/note.md"),
"10-Projects/note.md".to_string(),
"Project Notes".to_string(),
BTreeMap::new(),
vec![
Section {
heading: Some("Background".to_string()),
level: 1,
content: "General overview".to_string(),
},
Section {
heading: Some("Deploy Constraints".to_string()),
level: 1,
content: "Use internal rollout policy for deploy credentials".to_string(),
},
],
Vec::new(),
"Fallback body".to_string(),
);
let input = RouteInput {
task: "deploy credentials".to_string(),
cwd: PathBuf::from("/tmp/repo"),
files: vec!["infra/deploy.rs".to_string()],
target: crate::domain::TargetTool::Codex,
format: crate::domain::OutputFormat::Prompt,
};
let excerpt = note.excerpt_for_input(&input, 80);
assert!(excerpt.contains("Deploy Constraints"));
}
#[test]
fn excerpt_for_input_should_anchor_window_around_first_term_hit() {
let prefix = "Lorem ipsum dolor sit amet ".repeat(30);
let body = format!("{prefix}repo_path matcher inside body section");
let note = Note::new(
PathBuf::from("/tmp/vault/long.md"),
"10-Projects/long.md".to_string(),
"Long Note".to_string(),
BTreeMap::new(),
vec![Section {
heading: Some("Background".to_string()),
level: 1,
content: body.clone(),
}],
Vec::new(),
body,
);
let input = RouteInput {
task: "fix repo_path matcher".to_string(),
cwd: PathBuf::from("/tmp/repo"),
files: Vec::new(),
target: crate::domain::TargetTool::Codex,
format: crate::domain::OutputFormat::Prompt,
};
let excerpt = note.excerpt_for_input(&input, 120);
assert!(
excerpt.to_lowercase().contains("repo_path"),
"term-window excerpt must contain the matched term: {excerpt}"
);
assert!(
excerpt.contains('…'),
"ellipsis required when the window is not at the section boundary: {excerpt}"
);
}
#[test]
fn excerpt_for_input_should_fall_back_to_start_when_no_term_hits() {
let body = "Just some general background text without matches".to_string();
let note = Note::new(
PathBuf::from("/tmp/vault/n.md"),
"10-Projects/n.md".to_string(),
"Title".to_string(),
BTreeMap::new(),
vec![Section {
heading: Some("Heading".to_string()),
level: 1,
content: body.clone(),
}],
Vec::new(),
body,
);
let input = RouteInput {
task: "Title".to_string(), cwd: PathBuf::from("/tmp/repo"),
files: Vec::new(),
target: crate::domain::TargetTool::Codex,
format: crate::domain::OutputFormat::Prompt,
};
let excerpt = note.excerpt_for_input(&input, 200);
assert!(
excerpt.contains("Heading") || excerpt.contains("background"),
"fall-through excerpt must still include some content: {excerpt}"
);
}
}