1use crate::domain::{RouteInput, Section};
2use serde::{Deserialize, Serialize};
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::PathBuf;
5use ts_rs::TS;
6
7#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, TS)]
8#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
9#[serde(rename_all = "snake_case")]
10pub enum ConfidenceTier {
11 High,
12 #[default]
13 Medium,
14 Low,
15}
16
17#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, TS)]
18#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
19#[serde(rename_all = "snake_case")]
20pub enum ScoreSource {
21 NamedMatch,
24 MemoryType,
27 Frontmatter,
29 ScenePreferred,
31 DefaultTag,
33 Sensitivity,
35 TaskToken,
37 Confidence,
39 Staleness,
41}
42
43#[derive(Debug, Clone, Serialize, PartialEq, TS)]
49#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
50pub struct ScoreContribution {
51 pub source: ScoreSource,
53 pub field: String,
57 pub term: String,
59 pub weight: i32,
61}
62
63#[derive(Debug, Clone, Serialize)]
64pub struct Note {
65 pub path: PathBuf,
66 pub relative_path: String,
67 pub title: String,
68 pub frontmatter: BTreeMap<String, serde_json::Value>,
69 pub sections: Vec<Section>,
70 pub wikilinks: Vec<String>,
71 pub raw_content: String,
72 #[serde(skip_serializing)]
73 pub search_index: NoteSearchIndex,
74}
75
76#[derive(Debug, Clone, Default)]
77pub struct NoteSearchIndex {
78 normalized_path: String,
79 normalized_title: String,
80 normalized_body: String,
81 normalized_headings: Vec<String>,
82 normalized_wikilinks: Vec<String>,
83 path_tokens: BTreeSet<String>,
84 title_tokens: BTreeSet<String>,
85 body_tokens: BTreeSet<String>,
86 heading_tokens: BTreeSet<String>,
87 wikilink_tokens: BTreeSet<String>,
88}
89
90#[derive(Debug, Clone, Serialize, PartialEq, TS)]
91#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
92pub struct CandidateNote {
93 pub relative_path: String,
94 pub title: String,
95 pub score: i32,
96 pub reasons: Vec<String>,
97 #[serde(default)]
102 pub score_breakdown: Vec<ScoreContribution>,
103 pub confidence: ConfidenceTier,
104 pub excerpt: String,
105 #[serde(skip_serializing)]
106 #[ts(skip)]
107 pub memory_type: Option<String>,
108 #[serde(skip_serializing)]
109 #[ts(skip)]
110 pub sensitivity: Option<String>,
111 #[serde(skip_serializing)]
112 #[ts(skip)]
113 pub source_of_truth: bool,
114}
115
116#[derive(Debug, Clone)]
117pub struct ScoredNote {
118 pub note: Note,
119 pub score: i32,
120 pub reasons: Vec<String>,
121 pub score_breakdown: Vec<ScoreContribution>,
122 pub confidence: ConfidenceTier,
123 pub excerpt: String,
124}
125
126impl ScoredNote {
127 pub fn to_candidate(&self) -> CandidateNote {
128 CandidateNote {
129 relative_path: self.note.relative_path.clone(),
130 title: self.note.title.clone(),
131 score: self.score,
132 reasons: self.reasons.clone(),
133 score_breakdown: self.score_breakdown.clone(),
134 confidence: self.confidence,
135 excerpt: self.excerpt.clone(),
136 memory_type: self.note.memory_type().map(ToString::to_string),
137 sensitivity: self.note.sensitivity().map(ToString::to_string),
138 source_of_truth: self.note.source_of_truth(),
139 }
140 }
141}
142
143impl CandidateNote {
144 pub fn from_scored(scored: &ScoredNote) -> Self {
145 scored.to_candidate()
146 }
147}
148
149impl From<&ScoredNote> for CandidateNote {
150 fn from(value: &ScoredNote) -> Self {
151 value.to_candidate()
152 }
153}
154
155impl From<ScoredNote> for CandidateNote {
156 fn from(value: ScoredNote) -> Self {
157 value.to_candidate()
158 }
159}
160
161impl Note {
162 pub fn to_scored(&self, score: i32, reasons: Vec<String>) -> ScoredNote {
163 ScoredNote {
164 note: self.clone(),
165 score,
166 excerpt: self.excerpt(220),
167 reasons,
168 score_breakdown: Vec::new(),
169 confidence: ConfidenceTier::Medium,
170 }
171 }
172}
173
174impl Note {
175 pub fn new(
176 path: PathBuf,
177 relative_path: String,
178 title: String,
179 frontmatter: BTreeMap<String, serde_json::Value>,
180 sections: Vec<Section>,
181 wikilinks: Vec<String>,
182 raw_content: String,
183 ) -> Self {
184 let search_index =
185 NoteSearchIndex::build(&relative_path, &title, §ions, &wikilinks, &raw_content);
186
187 Self {
188 path,
189 relative_path,
190 title,
191 frontmatter,
192 sections,
193 wikilinks,
194 raw_content,
195 search_index,
196 }
197 }
198
199 pub fn frontmatter_str(&self, key: &str) -> Option<&str> {
200 self.frontmatter.get(key).and_then(|value| value.as_str())
201 }
202
203 pub fn frontmatter_bool(&self, key: &str) -> bool {
204 self.frontmatter
205 .get(key)
206 .and_then(|value| value.as_bool())
207 .unwrap_or(false)
208 }
209
210 pub fn memory_type(&self) -> Option<&str> {
211 self.frontmatter_str("memory_type")
212 }
213
214 pub fn sensitivity(&self) -> Option<&str> {
215 self.frontmatter_str("sensitivity")
216 }
217
218 pub fn source_of_truth(&self) -> bool {
219 self.frontmatter_bool("source_of_truth")
220 }
221
222 pub fn excerpt(&self, max_chars: usize) -> String {
223 self.sections
224 .iter()
225 .map(|section| section.content.trim())
226 .find(|content| !content.is_empty())
227 .unwrap_or(self.raw_content.trim())
228 .chars()
229 .take(max_chars)
230 .collect()
231 }
232
233 pub fn excerpt_for_input(&self, input: &RouteInput, max_chars: usize) -> String {
234 let terms: Vec<String> = tokenize(&input.task)
235 .into_iter()
236 .chain(
237 input
238 .files
239 .iter()
240 .flat_map(|file| tokenize(file).into_iter())
241 .filter(|segment| segment.chars().count() >= 3),
242 )
243 .collect();
244
245 let mut best_score = 0;
246 let mut best_excerpt: Option<String> = None;
247 for section in &self.sections {
248 let score = score_section_for_terms(self, section, &terms);
249 if score > best_score {
250 let candidate = build_section_excerpt_for_terms(section, &terms, max_chars);
251 if !candidate.is_empty() {
252 best_score = score;
253 best_excerpt = Some(candidate);
254 }
255 }
256 }
257
258 best_excerpt.unwrap_or_else(|| self.excerpt(max_chars))
259 }
260}
261
262fn score_section_for_terms(note: &Note, section: &Section, terms: &[String]) -> i32 {
263 let heading = section.heading.as_deref().unwrap_or_default();
264 let body = section.content.as_str();
265 terms.iter().fold(0, |score, term| {
266 let mut next = score;
267 if note.search_index.matches_title(term) {
268 next += 5;
269 }
270 if !heading.is_empty() && tokenize(heading).contains(term) {
271 next += 8;
272 }
273 if note.search_index.matches_wikilink(term) {
274 next += 6;
275 }
276 if tokenize(body).contains(term) {
277 next += 4;
278 }
279 next
280 })
281}
282
283fn build_section_excerpt(section: &Section, max_chars: usize) -> String {
284 let heading = section.heading.as_deref().unwrap_or_default().trim();
285 let body = section.content.trim();
286 let combined = if heading.is_empty() {
287 body.to_string()
288 } else if body.is_empty() {
289 heading.to_string()
290 } else {
291 format!("{heading}: {body}")
292 };
293 combined.chars().take(max_chars).collect()
294}
295
296fn build_section_excerpt_for_terms(
301 section: &Section,
302 terms: &[String],
303 max_chars: usize,
304) -> String {
305 if terms.is_empty() {
306 return build_section_excerpt(section, max_chars);
307 }
308 let heading = section.heading.as_deref().unwrap_or_default().trim();
309 let body = section.content.trim();
310 let prefix_len = if heading.is_empty() {
311 0
312 } else {
313 heading.chars().count() + 2
314 };
315 if max_chars <= prefix_len {
316 return build_section_excerpt(section, max_chars);
319 }
320 let body_budget = max_chars - prefix_len;
321 let body_window = match locate_first_term(body, terms) {
322 Some(byte_pos) => window_around_byte(body, byte_pos, body_budget),
323 None => body.chars().take(body_budget).collect(),
324 };
325 if heading.is_empty() {
326 body_window
327 } else if body_window.is_empty() {
328 heading.to_string()
329 } else {
330 format!("{heading}: {body_window}")
331 }
332}
333
334fn locate_first_term(body: &str, terms: &[String]) -> Option<usize> {
338 let body_lower = body.to_lowercase();
339 let mut best: Option<usize> = None;
340 for term in terms {
341 if term.is_empty() {
342 continue;
343 }
344 let needle = term.to_lowercase();
345 if let Some(pos) = body_lower.find(&needle) {
346 best = Some(match best {
347 None => pos,
348 Some(prev) => prev.min(pos),
349 });
350 }
351 }
352 best
353}
354
355fn window_around_byte(body: &str, byte_pos: usize, max_chars: usize) -> String {
361 if max_chars == 0 || body.is_empty() {
362 return String::new();
363 }
364 let total_chars = body.chars().count();
365 if total_chars <= max_chars {
366 return body.to_string();
367 }
368 let safe_pos = byte_pos.min(body.len());
369 let char_pos = body[..safe_pos].chars().count();
370 let padding_before = (max_chars as f64 * 0.3) as usize;
371 let start_char = char_pos.saturating_sub(padding_before);
372 let end_char = (start_char + max_chars).min(total_chars);
373 let mut out: String = body
374 .chars()
375 .skip(start_char)
376 .take(end_char - start_char)
377 .collect();
378 if end_char < total_chars {
379 out.push('…');
380 }
381 if start_char > 0 {
382 out.insert(0, '…');
383 }
384 out
385}
386
387impl NoteSearchIndex {
388 pub fn build(
389 relative_path: &str,
390 title: &str,
391 sections: &[Section],
392 wikilinks: &[String],
393 raw_content: &str,
394 ) -> Self {
395 let normalized_path = normalize_text(relative_path);
396 let normalized_title = normalize_text(title);
397 let normalized_body = normalize_text(raw_content);
398 let normalized_headings = sections
399 .iter()
400 .filter_map(|section| section.heading.as_ref())
401 .map(|heading| normalize_text(heading))
402 .filter(|heading| !heading.is_empty())
403 .collect::<Vec<_>>();
404 let normalized_wikilinks = wikilinks
405 .iter()
406 .map(|link| normalize_text(link))
407 .filter(|link| !link.is_empty())
408 .collect::<Vec<_>>();
409
410 Self {
411 normalized_path,
412 normalized_title,
413 normalized_body,
414 normalized_headings,
415 normalized_wikilinks,
416 path_tokens: tokenize(relative_path),
417 title_tokens: tokenize(title),
418 body_tokens: tokenize(raw_content),
419 heading_tokens: sections
420 .iter()
421 .filter_map(|section| section.heading.as_ref())
422 .flat_map(|heading| tokenize(heading))
423 .collect(),
424 wikilink_tokens: wikilinks.iter().flat_map(|link| tokenize(link)).collect(),
425 }
426 }
427
428 pub fn matches_path(&self, term: &str) -> bool {
429 normalized_contains(&self.normalized_path, &self.path_tokens, term)
430 }
431
432 pub fn matches_title(&self, term: &str) -> bool {
433 normalized_contains(&self.normalized_title, &self.title_tokens, term)
434 }
435
436 pub fn matches_body(&self, term: &str) -> bool {
437 normalized_contains(&self.normalized_body, &self.body_tokens, term)
438 }
439
440 pub fn matches_heading(&self, term: &str) -> bool {
441 self.normalized_headings
442 .iter()
443 .any(|heading| normalized_contains(heading, &self.heading_tokens, term))
444 }
445
446 pub fn matches_wikilink(&self, term: &str) -> bool {
447 self.normalized_wikilinks
448 .iter()
449 .any(|link| normalized_contains(link, &self.wikilink_tokens, term))
450 }
451}
452
453pub(crate) fn normalize_text(input: &str) -> String {
454 let mut normalized = String::new();
455 let mut prev_was_alnum = false;
456 let mut prev_was_lower_or_digit = false;
457
458 for ch in input.chars() {
459 if ch.is_alphanumeric() {
460 if ch.is_uppercase() && prev_was_lower_or_digit && !normalized.ends_with(' ') {
461 normalized.push(' ');
462 }
463
464 for lowered in ch.to_lowercase() {
465 normalized.push(lowered);
466 }
467
468 prev_was_alnum = true;
469 prev_was_lower_or_digit = ch.is_lowercase() || ch.is_numeric();
470 } else {
471 if prev_was_alnum && !normalized.ends_with(' ') {
472 normalized.push(' ');
473 }
474 prev_was_alnum = false;
475 prev_was_lower_or_digit = false;
476 }
477 }
478
479 normalized.trim().to_string()
480}
481
482pub(crate) fn tokenize(input: &str) -> BTreeSet<String> {
483 normalize_text(input)
484 .split_whitespace()
485 .filter(|token| token.chars().count() >= 2)
486 .map(ToString::to_string)
487 .collect()
488}
489
490fn normalized_contains(haystack: &str, tokens: &BTreeSet<String>, term: &str) -> bool {
491 let normalized_term = normalize_text(term);
492 if normalized_term.is_empty() {
493 return false;
494 }
495
496 if normalized_term.contains(' ') {
497 let bounded_haystack = format!(" {haystack} ");
498 let bounded_term = format!(" {normalized_term} ");
499 bounded_haystack.contains(&bounded_term)
500 } else {
501 tokens.contains(&normalized_term)
502 }
503}
504
505#[cfg(test)]
506mod tests {
507 use super::Note;
508 use crate::domain::{RouteInput, Section};
509 use serde_json::json;
510 use std::collections::BTreeMap;
511 use std::path::PathBuf;
512
513 #[test]
514 fn note_should_expose_structured_frontmatter_fields() {
515 let note = Note::new(
516 PathBuf::from("/tmp/vault/note.md"),
517 "10-Projects/note.md".to_string(),
518 "Note".to_string(),
519 BTreeMap::from([
520 ("memory_type".to_string(), json!("constraint")),
521 ("sensitivity".to_string(), json!("internal")),
522 ("source_of_truth".to_string(), json!(true)),
523 ]),
524 vec![Section {
525 heading: Some("Heading".to_string()),
526 level: 1,
527 content: "Body".to_string(),
528 }],
529 Vec::new(),
530 "Body".to_string(),
531 );
532
533 assert_eq!(note.memory_type(), Some("constraint"));
534 assert_eq!(note.sensitivity(), Some("internal"));
535 assert!(note.source_of_truth());
536 }
537
538 #[test]
539 fn excerpt_should_prefer_first_non_empty_section() {
540 let note = Note::new(
541 PathBuf::from("/tmp/vault/note.md"),
542 "10-Projects/note.md".to_string(),
543 "Note".to_string(),
544 BTreeMap::new(),
545 vec![
546 Section {
547 heading: Some("Empty".to_string()),
548 level: 1,
549 content: " ".to_string(),
550 },
551 Section {
552 heading: Some("Real".to_string()),
553 level: 1,
554 content: "Useful excerpt lives here".to_string(),
555 },
556 ],
557 Vec::new(),
558 "Fallback body".to_string(),
559 );
560
561 assert_eq!(note.excerpt(12), "Useful excer");
562 }
563
564 #[test]
565 fn excerpt_for_input_should_prefer_best_matching_section() {
566 let note = Note::new(
567 PathBuf::from("/tmp/vault/note.md"),
568 "10-Projects/note.md".to_string(),
569 "Project Notes".to_string(),
570 BTreeMap::new(),
571 vec![
572 Section {
573 heading: Some("Background".to_string()),
574 level: 1,
575 content: "General overview".to_string(),
576 },
577 Section {
578 heading: Some("Deploy Constraints".to_string()),
579 level: 1,
580 content: "Use internal rollout policy for deploy credentials".to_string(),
581 },
582 ],
583 Vec::new(),
584 "Fallback body".to_string(),
585 );
586
587 let input = RouteInput {
588 task: "deploy credentials".to_string(),
589 cwd: PathBuf::from("/tmp/repo"),
590 files: vec!["infra/deploy.rs".to_string()],
591 target: crate::domain::TargetTool::Codex,
592 format: crate::domain::OutputFormat::Prompt,
593 };
594
595 let excerpt = note.excerpt_for_input(&input, 80);
596 assert!(excerpt.contains("Deploy Constraints"));
597 }
598
599 #[test]
600 fn excerpt_for_input_should_anchor_window_around_first_term_hit() {
601 let prefix = "Lorem ipsum dolor sit amet ".repeat(30);
606 let body = format!("{prefix}repo_path matcher inside body section");
607 let note = Note::new(
608 PathBuf::from("/tmp/vault/long.md"),
609 "10-Projects/long.md".to_string(),
610 "Long Note".to_string(),
611 BTreeMap::new(),
612 vec![Section {
613 heading: Some("Background".to_string()),
614 level: 1,
615 content: body.clone(),
616 }],
617 Vec::new(),
618 body,
619 );
620 let input = RouteInput {
621 task: "fix repo_path matcher".to_string(),
622 cwd: PathBuf::from("/tmp/repo"),
623 files: Vec::new(),
624 target: crate::domain::TargetTool::Codex,
625 format: crate::domain::OutputFormat::Prompt,
626 };
627 let excerpt = note.excerpt_for_input(&input, 120);
628 assert!(
629 excerpt.to_lowercase().contains("repo_path"),
630 "term-window excerpt must contain the matched term: {excerpt}"
631 );
632 assert!(
633 excerpt.contains('…'),
634 "ellipsis required when the window is not at the section boundary: {excerpt}"
635 );
636 }
637
638 #[test]
639 fn excerpt_for_input_should_fall_back_to_start_when_no_term_hits() {
640 let body = "Just some general background text without matches".to_string();
644 let note = Note::new(
645 PathBuf::from("/tmp/vault/n.md"),
646 "10-Projects/n.md".to_string(),
647 "Title".to_string(),
648 BTreeMap::new(),
649 vec![Section {
650 heading: Some("Heading".to_string()),
651 level: 1,
652 content: body.clone(),
653 }],
654 Vec::new(),
655 body,
656 );
657 let input = RouteInput {
658 task: "Title".to_string(), cwd: PathBuf::from("/tmp/repo"),
660 files: Vec::new(),
661 target: crate::domain::TargetTool::Codex,
662 format: crate::domain::OutputFormat::Prompt,
663 };
664 let excerpt = note.excerpt_for_input(&input, 200);
665 assert!(
666 excerpt.contains("Heading") || excerpt.contains("background"),
667 "fall-through excerpt must still include some content: {excerpt}"
668 );
669 }
670}