spool/domain/
note.rs

1use crate::domain::{RouteInput, Section};
2use serde::{Deserialize, Serialize};
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::PathBuf;
5use ts_rs::TS;
6
7#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, TS)]
8#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
9#[serde(rename_all = "snake_case")]
10pub enum ConfidenceTier {
11    High,
12    #[default]
13    Medium,
14    Low,
15}
16
17#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, TS)]
18#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
19#[serde(rename_all = "snake_case")]
20pub enum ScoreSource {
21    /// Project id / name / module / scene match against the note's
22    /// indexed fields (title, heading, wikilink, path, body).
23    NamedMatch,
24    /// Frontmatter `memory_type` aligned with the configured project
25    /// retrieval priority.
26    MemoryType,
27    /// Frontmatter `project_id` / `source_of_truth` boost.
28    Frontmatter,
29    /// Scene-preferred-note allowlist hit.
30    ScenePreferred,
31    /// Default-tag / preferred-note-root soft boost.
32    DefaultTag,
33    /// Sensitivity penalty (negative weight).
34    Sensitivity,
35    /// Free-form task token / file token match (rendered surface only).
36    TaskToken,
37    /// Derived confidence tier contribution (high/medium/low).
38    Confidence,
39    /// Time-decay penalty for lifecycle candidates not recently retrieved.
40    Staleness,
41}
42
43/// Single contribution to a [`CandidateNote`]'s `score`. The sum of
44/// every contribution's `weight` MUST equal `CandidateNote::score`,
45/// so downstream tooling (explain output, future eval harnesses) can
46/// reconstruct the exact rationale rather than relying on
47/// human-readable `reasons` strings.
48#[derive(Debug, Clone, Serialize, PartialEq, TS)]
49#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
50pub struct ScoreContribution {
51    /// Which scoring family produced this contribution.
52    pub source: ScoreSource,
53    /// Field that matched, e.g. `title`, `heading`, `wikilink`,
54    /// `path`, `body` for [`ScoreSource::NamedMatch`]; for other
55    /// sources this is the configured key (`memory_type`, etc.).
56    pub field: String,
57    /// The matched term or label (lowercased / normalized).
58    pub term: String,
59    /// Score weight added (or subtracted) by this contribution.
60    pub weight: i32,
61}
62
63#[derive(Debug, Clone, Serialize)]
64pub struct Note {
65    pub path: PathBuf,
66    pub relative_path: String,
67    pub title: String,
68    pub frontmatter: BTreeMap<String, serde_json::Value>,
69    pub sections: Vec<Section>,
70    pub wikilinks: Vec<String>,
71    pub raw_content: String,
72    #[serde(skip_serializing)]
73    pub search_index: NoteSearchIndex,
74}
75
76#[derive(Debug, Clone, Default)]
77pub struct NoteSearchIndex {
78    normalized_path: String,
79    normalized_title: String,
80    normalized_body: String,
81    normalized_headings: Vec<String>,
82    normalized_wikilinks: Vec<String>,
83    path_tokens: BTreeSet<String>,
84    title_tokens: BTreeSet<String>,
85    body_tokens: BTreeSet<String>,
86    heading_tokens: BTreeSet<String>,
87    wikilink_tokens: BTreeSet<String>,
88}
89
90#[derive(Debug, Clone, Serialize, PartialEq, TS)]
91#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
92pub struct CandidateNote {
93    pub relative_path: String,
94    pub title: String,
95    pub score: i32,
96    pub reasons: Vec<String>,
97    /// Structured rationale that always sums to `score`. Empty when a
98    /// scored note is constructed without per-contribution tracking
99    /// (legacy paths). Tools should prefer this over `reasons` when
100    /// available.
101    #[serde(default)]
102    pub score_breakdown: Vec<ScoreContribution>,
103    pub confidence: ConfidenceTier,
104    pub excerpt: String,
105    #[serde(skip_serializing)]
106    #[ts(skip)]
107    pub memory_type: Option<String>,
108    #[serde(skip_serializing)]
109    #[ts(skip)]
110    pub sensitivity: Option<String>,
111    #[serde(skip_serializing)]
112    #[ts(skip)]
113    pub source_of_truth: bool,
114}
115
116#[derive(Debug, Clone)]
117pub struct ScoredNote {
118    pub note: Note,
119    pub score: i32,
120    pub reasons: Vec<String>,
121    pub score_breakdown: Vec<ScoreContribution>,
122    pub confidence: ConfidenceTier,
123    pub excerpt: String,
124}
125
126impl ScoredNote {
127    pub fn to_candidate(&self) -> CandidateNote {
128        CandidateNote {
129            relative_path: self.note.relative_path.clone(),
130            title: self.note.title.clone(),
131            score: self.score,
132            reasons: self.reasons.clone(),
133            score_breakdown: self.score_breakdown.clone(),
134            confidence: self.confidence,
135            excerpt: self.excerpt.clone(),
136            memory_type: self.note.memory_type().map(ToString::to_string),
137            sensitivity: self.note.sensitivity().map(ToString::to_string),
138            source_of_truth: self.note.source_of_truth(),
139        }
140    }
141}
142
143impl CandidateNote {
144    pub fn from_scored(scored: &ScoredNote) -> Self {
145        scored.to_candidate()
146    }
147}
148
149impl From<&ScoredNote> for CandidateNote {
150    fn from(value: &ScoredNote) -> Self {
151        value.to_candidate()
152    }
153}
154
155impl From<ScoredNote> for CandidateNote {
156    fn from(value: ScoredNote) -> Self {
157        value.to_candidate()
158    }
159}
160
161impl Note {
162    pub fn to_scored(&self, score: i32, reasons: Vec<String>) -> ScoredNote {
163        ScoredNote {
164            note: self.clone(),
165            score,
166            excerpt: self.excerpt(220),
167            reasons,
168            score_breakdown: Vec::new(),
169            confidence: ConfidenceTier::Medium,
170        }
171    }
172}
173
174impl Note {
175    pub fn new(
176        path: PathBuf,
177        relative_path: String,
178        title: String,
179        frontmatter: BTreeMap<String, serde_json::Value>,
180        sections: Vec<Section>,
181        wikilinks: Vec<String>,
182        raw_content: String,
183    ) -> Self {
184        let search_index =
185            NoteSearchIndex::build(&relative_path, &title, &sections, &wikilinks, &raw_content);
186
187        Self {
188            path,
189            relative_path,
190            title,
191            frontmatter,
192            sections,
193            wikilinks,
194            raw_content,
195            search_index,
196        }
197    }
198
199    pub fn frontmatter_str(&self, key: &str) -> Option<&str> {
200        self.frontmatter.get(key).and_then(|value| value.as_str())
201    }
202
203    pub fn frontmatter_bool(&self, key: &str) -> bool {
204        self.frontmatter
205            .get(key)
206            .and_then(|value| value.as_bool())
207            .unwrap_or(false)
208    }
209
210    pub fn memory_type(&self) -> Option<&str> {
211        self.frontmatter_str("memory_type")
212    }
213
214    pub fn sensitivity(&self) -> Option<&str> {
215        self.frontmatter_str("sensitivity")
216    }
217
218    pub fn source_of_truth(&self) -> bool {
219        self.frontmatter_bool("source_of_truth")
220    }
221
222    pub fn excerpt(&self, max_chars: usize) -> String {
223        self.sections
224            .iter()
225            .map(|section| section.content.trim())
226            .find(|content| !content.is_empty())
227            .unwrap_or(self.raw_content.trim())
228            .chars()
229            .take(max_chars)
230            .collect()
231    }
232
233    pub fn excerpt_for_input(&self, input: &RouteInput, max_chars: usize) -> String {
234        let terms: Vec<String> = tokenize(&input.task)
235            .into_iter()
236            .chain(
237                input
238                    .files
239                    .iter()
240                    .flat_map(|file| tokenize(file).into_iter())
241                    .filter(|segment| segment.chars().count() >= 3),
242            )
243            .collect();
244
245        let mut best_score = 0;
246        let mut best_excerpt: Option<String> = None;
247        for section in &self.sections {
248            let score = score_section_for_terms(self, section, &terms);
249            if score > best_score {
250                let candidate = build_section_excerpt_for_terms(section, &terms, max_chars);
251                if !candidate.is_empty() {
252                    best_score = score;
253                    best_excerpt = Some(candidate);
254                }
255            }
256        }
257
258        best_excerpt.unwrap_or_else(|| self.excerpt(max_chars))
259    }
260}
261
262fn score_section_for_terms(note: &Note, section: &Section, terms: &[String]) -> i32 {
263    let heading = section.heading.as_deref().unwrap_or_default();
264    let body = section.content.as_str();
265    terms.iter().fold(0, |score, term| {
266        let mut next = score;
267        if note.search_index.matches_title(term) {
268            next += 5;
269        }
270        if !heading.is_empty() && tokenize(heading).contains(term) {
271            next += 8;
272        }
273        if note.search_index.matches_wikilink(term) {
274            next += 6;
275        }
276        if tokenize(body).contains(term) {
277            next += 4;
278        }
279        next
280    })
281}
282
283fn build_section_excerpt(section: &Section, max_chars: usize) -> String {
284    let heading = section.heading.as_deref().unwrap_or_default().trim();
285    let body = section.content.trim();
286    let combined = if heading.is_empty() {
287        body.to_string()
288    } else if body.is_empty() {
289        heading.to_string()
290    } else {
291        format!("{heading}: {body}")
292    };
293    combined.chars().take(max_chars).collect()
294}
295
296/// Like [`build_section_excerpt`] but centers the body window around
297/// the first matched term so the excerpt actually shows what was
298/// scored. If no term hits the body we fall back to the simpler
299/// "from the start" rendering above.
300fn build_section_excerpt_for_terms(
301    section: &Section,
302    terms: &[String],
303    max_chars: usize,
304) -> String {
305    if terms.is_empty() {
306        return build_section_excerpt(section, max_chars);
307    }
308    let heading = section.heading.as_deref().unwrap_or_default().trim();
309    let body = section.content.trim();
310    let prefix_len = if heading.is_empty() {
311        0
312    } else {
313        heading.chars().count() + 2
314    };
315    if max_chars <= prefix_len {
316        // Not enough budget for a meaningful body window — return what
317        // build_section_excerpt would have given us.
318        return build_section_excerpt(section, max_chars);
319    }
320    let body_budget = max_chars - prefix_len;
321    let body_window = match locate_first_term(body, terms) {
322        Some(byte_pos) => window_around_byte(body, byte_pos, body_budget),
323        None => body.chars().take(body_budget).collect(),
324    };
325    if heading.is_empty() {
326        body_window
327    } else if body_window.is_empty() {
328        heading.to_string()
329    } else {
330        format!("{heading}: {body_window}")
331    }
332}
333
334/// Lowercase substring search across all terms; returns the byte
335/// offset of the earliest hit so the excerpt window can be anchored
336/// there.
337fn locate_first_term(body: &str, terms: &[String]) -> Option<usize> {
338    let body_lower = body.to_lowercase();
339    let mut best: Option<usize> = None;
340    for term in terms {
341        if term.is_empty() {
342            continue;
343        }
344        let needle = term.to_lowercase();
345        if let Some(pos) = body_lower.find(&needle) {
346            best = Some(match best {
347                None => pos,
348                Some(prev) => prev.min(pos),
349            });
350        }
351    }
352    best
353}
354
355/// Take a `max_chars`-wide window of `body` centered roughly on the
356/// character containing `byte_pos`. ~30% of the budget is reserved
357/// for context BEFORE the hit, the rest goes after; the result is
358/// prefixed/suffixed with `…` when it does not start/end at the
359/// section boundary.
360fn window_around_byte(body: &str, byte_pos: usize, max_chars: usize) -> String {
361    if max_chars == 0 || body.is_empty() {
362        return String::new();
363    }
364    let total_chars = body.chars().count();
365    if total_chars <= max_chars {
366        return body.to_string();
367    }
368    let safe_pos = byte_pos.min(body.len());
369    let char_pos = body[..safe_pos].chars().count();
370    let padding_before = (max_chars as f64 * 0.3) as usize;
371    let start_char = char_pos.saturating_sub(padding_before);
372    let end_char = (start_char + max_chars).min(total_chars);
373    let mut out: String = body
374        .chars()
375        .skip(start_char)
376        .take(end_char - start_char)
377        .collect();
378    if end_char < total_chars {
379        out.push('…');
380    }
381    if start_char > 0 {
382        out.insert(0, '…');
383    }
384    out
385}
386
387impl NoteSearchIndex {
388    pub fn build(
389        relative_path: &str,
390        title: &str,
391        sections: &[Section],
392        wikilinks: &[String],
393        raw_content: &str,
394    ) -> Self {
395        let normalized_path = normalize_text(relative_path);
396        let normalized_title = normalize_text(title);
397        let normalized_body = normalize_text(raw_content);
398        let normalized_headings = sections
399            .iter()
400            .filter_map(|section| section.heading.as_ref())
401            .map(|heading| normalize_text(heading))
402            .filter(|heading| !heading.is_empty())
403            .collect::<Vec<_>>();
404        let normalized_wikilinks = wikilinks
405            .iter()
406            .map(|link| normalize_text(link))
407            .filter(|link| !link.is_empty())
408            .collect::<Vec<_>>();
409
410        Self {
411            normalized_path,
412            normalized_title,
413            normalized_body,
414            normalized_headings,
415            normalized_wikilinks,
416            path_tokens: tokenize(relative_path),
417            title_tokens: tokenize(title),
418            body_tokens: tokenize(raw_content),
419            heading_tokens: sections
420                .iter()
421                .filter_map(|section| section.heading.as_ref())
422                .flat_map(|heading| tokenize(heading))
423                .collect(),
424            wikilink_tokens: wikilinks.iter().flat_map(|link| tokenize(link)).collect(),
425        }
426    }
427
428    pub fn matches_path(&self, term: &str) -> bool {
429        normalized_contains(&self.normalized_path, &self.path_tokens, term)
430    }
431
432    pub fn matches_title(&self, term: &str) -> bool {
433        normalized_contains(&self.normalized_title, &self.title_tokens, term)
434    }
435
436    pub fn matches_body(&self, term: &str) -> bool {
437        normalized_contains(&self.normalized_body, &self.body_tokens, term)
438    }
439
440    pub fn matches_heading(&self, term: &str) -> bool {
441        self.normalized_headings
442            .iter()
443            .any(|heading| normalized_contains(heading, &self.heading_tokens, term))
444    }
445
446    pub fn matches_wikilink(&self, term: &str) -> bool {
447        self.normalized_wikilinks
448            .iter()
449            .any(|link| normalized_contains(link, &self.wikilink_tokens, term))
450    }
451}
452
453pub(crate) fn normalize_text(input: &str) -> String {
454    let mut normalized = String::new();
455    let mut prev_was_alnum = false;
456    let mut prev_was_lower_or_digit = false;
457
458    for ch in input.chars() {
459        if ch.is_alphanumeric() {
460            if ch.is_uppercase() && prev_was_lower_or_digit && !normalized.ends_with(' ') {
461                normalized.push(' ');
462            }
463
464            for lowered in ch.to_lowercase() {
465                normalized.push(lowered);
466            }
467
468            prev_was_alnum = true;
469            prev_was_lower_or_digit = ch.is_lowercase() || ch.is_numeric();
470        } else {
471            if prev_was_alnum && !normalized.ends_with(' ') {
472                normalized.push(' ');
473            }
474            prev_was_alnum = false;
475            prev_was_lower_or_digit = false;
476        }
477    }
478
479    normalized.trim().to_string()
480}
481
482pub(crate) fn tokenize(input: &str) -> BTreeSet<String> {
483    normalize_text(input)
484        .split_whitespace()
485        .filter(|token| token.chars().count() >= 2)
486        .map(ToString::to_string)
487        .collect()
488}
489
490fn normalized_contains(haystack: &str, tokens: &BTreeSet<String>, term: &str) -> bool {
491    let normalized_term = normalize_text(term);
492    if normalized_term.is_empty() {
493        return false;
494    }
495
496    if normalized_term.contains(' ') {
497        let bounded_haystack = format!(" {haystack} ");
498        let bounded_term = format!(" {normalized_term} ");
499        bounded_haystack.contains(&bounded_term)
500    } else {
501        tokens.contains(&normalized_term)
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use super::Note;
508    use crate::domain::{RouteInput, Section};
509    use serde_json::json;
510    use std::collections::BTreeMap;
511    use std::path::PathBuf;
512
513    #[test]
514    fn note_should_expose_structured_frontmatter_fields() {
515        let note = Note::new(
516            PathBuf::from("/tmp/vault/note.md"),
517            "10-Projects/note.md".to_string(),
518            "Note".to_string(),
519            BTreeMap::from([
520                ("memory_type".to_string(), json!("constraint")),
521                ("sensitivity".to_string(), json!("internal")),
522                ("source_of_truth".to_string(), json!(true)),
523            ]),
524            vec![Section {
525                heading: Some("Heading".to_string()),
526                level: 1,
527                content: "Body".to_string(),
528            }],
529            Vec::new(),
530            "Body".to_string(),
531        );
532
533        assert_eq!(note.memory_type(), Some("constraint"));
534        assert_eq!(note.sensitivity(), Some("internal"));
535        assert!(note.source_of_truth());
536    }
537
538    #[test]
539    fn excerpt_should_prefer_first_non_empty_section() {
540        let note = Note::new(
541            PathBuf::from("/tmp/vault/note.md"),
542            "10-Projects/note.md".to_string(),
543            "Note".to_string(),
544            BTreeMap::new(),
545            vec![
546                Section {
547                    heading: Some("Empty".to_string()),
548                    level: 1,
549                    content: "   ".to_string(),
550                },
551                Section {
552                    heading: Some("Real".to_string()),
553                    level: 1,
554                    content: "Useful excerpt lives here".to_string(),
555                },
556            ],
557            Vec::new(),
558            "Fallback body".to_string(),
559        );
560
561        assert_eq!(note.excerpt(12), "Useful excer");
562    }
563
564    #[test]
565    fn excerpt_for_input_should_prefer_best_matching_section() {
566        let note = Note::new(
567            PathBuf::from("/tmp/vault/note.md"),
568            "10-Projects/note.md".to_string(),
569            "Project Notes".to_string(),
570            BTreeMap::new(),
571            vec![
572                Section {
573                    heading: Some("Background".to_string()),
574                    level: 1,
575                    content: "General overview".to_string(),
576                },
577                Section {
578                    heading: Some("Deploy Constraints".to_string()),
579                    level: 1,
580                    content: "Use internal rollout policy for deploy credentials".to_string(),
581                },
582            ],
583            Vec::new(),
584            "Fallback body".to_string(),
585        );
586
587        let input = RouteInput {
588            task: "deploy credentials".to_string(),
589            cwd: PathBuf::from("/tmp/repo"),
590            files: vec!["infra/deploy.rs".to_string()],
591            target: crate::domain::TargetTool::Codex,
592            format: crate::domain::OutputFormat::Prompt,
593        };
594
595        let excerpt = note.excerpt_for_input(&input, 80);
596        assert!(excerpt.contains("Deploy Constraints"));
597    }
598
599    #[test]
600    fn excerpt_for_input_should_anchor_window_around_first_term_hit() {
601        // Long body where the matching term lives well past the
602        // start. The legacy renderer would have truncated before the
603        // hit; the term-window renderer must include the term and
604        // mark the trailing/leading edges with ellipses.
605        let prefix = "Lorem ipsum dolor sit amet ".repeat(30);
606        let body = format!("{prefix}repo_path matcher inside body section");
607        let note = Note::new(
608            PathBuf::from("/tmp/vault/long.md"),
609            "10-Projects/long.md".to_string(),
610            "Long Note".to_string(),
611            BTreeMap::new(),
612            vec![Section {
613                heading: Some("Background".to_string()),
614                level: 1,
615                content: body.clone(),
616            }],
617            Vec::new(),
618            body,
619        );
620        let input = RouteInput {
621            task: "fix repo_path matcher".to_string(),
622            cwd: PathBuf::from("/tmp/repo"),
623            files: Vec::new(),
624            target: crate::domain::TargetTool::Codex,
625            format: crate::domain::OutputFormat::Prompt,
626        };
627        let excerpt = note.excerpt_for_input(&input, 120);
628        assert!(
629            excerpt.to_lowercase().contains("repo_path"),
630            "term-window excerpt must contain the matched term: {excerpt}"
631        );
632        assert!(
633            excerpt.contains('…'),
634            "ellipsis required when the window is not at the section boundary: {excerpt}"
635        );
636    }
637
638    #[test]
639    fn excerpt_for_input_should_fall_back_to_start_when_no_term_hits() {
640        // Section that does not contain any of the input terms — the
641        // renderer should silently return the head of the body
642        // without ellipsis or panic.
643        let body = "Just some general background text without matches".to_string();
644        let note = Note::new(
645            PathBuf::from("/tmp/vault/n.md"),
646            "10-Projects/n.md".to_string(),
647            "Title".to_string(),
648            BTreeMap::new(),
649            vec![Section {
650                heading: Some("Heading".to_string()),
651                level: 1,
652                content: body.clone(),
653            }],
654            Vec::new(),
655            body,
656        );
657        let input = RouteInput {
658            task: "Title".to_string(), // matches title only
659            cwd: PathBuf::from("/tmp/repo"),
660            files: Vec::new(),
661            target: crate::domain::TargetTool::Codex,
662            format: crate::domain::OutputFormat::Prompt,
663        };
664        let excerpt = note.excerpt_for_input(&input, 200);
665        assert!(
666            excerpt.contains("Heading") || excerpt.contains("background"),
667            "fall-through excerpt must still include some content: {excerpt}"
668        );
669    }
670}
spool/domain/note.rs

spool/domain/
note.rs