spool-memory 0.2.3

use crate::domain::{RouteInput, Section};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::path::PathBuf;
use ts_rs::TS;

#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
#[serde(rename_all = "snake_case")]
pub enum ConfidenceTier {
    High,
    #[default]
    Medium,
    Low,
}

#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
#[serde(rename_all = "snake_case")]
pub enum ScoreSource {
    /// Project id / name / module / scene match against the note's
    /// indexed fields (title, heading, wikilink, path, body).
    NamedMatch,
    /// Frontmatter `memory_type` aligned with the configured project
    /// retrieval priority.
    MemoryType,
    /// Frontmatter `project_id` / `source_of_truth` boost.
    Frontmatter,
    /// Scene-preferred-note allowlist hit.
    ScenePreferred,
    /// Default-tag / preferred-note-root soft boost.
    DefaultTag,
    /// Sensitivity penalty (negative weight).
    Sensitivity,
    /// Free-form task token / file token match (rendered surface only).
    TaskToken,
    /// Derived confidence tier contribution (high/medium/low).
    Confidence,
    /// Time-decay penalty for lifecycle candidates not recently retrieved.
    Staleness,
}

/// Single contribution to a [`CandidateNote`]'s `score`. The sum of
/// every contribution's `weight` MUST equal `CandidateNote::score`,
/// so downstream tooling (explain output, future eval harnesses) can
/// reconstruct the exact rationale rather than relying on
/// human-readable `reasons` strings.
#[derive(Debug, Clone, Serialize, PartialEq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct ScoreContribution {
    /// Which scoring family produced this contribution.
    pub source: ScoreSource,
    /// Field that matched, e.g. `title`, `heading`, `wikilink`,
    /// `path`, `body` for [`ScoreSource::NamedMatch`]; for other
    /// sources this is the configured key (`memory_type`, etc.).
    pub field: String,
    /// The matched term or label (lowercased / normalized).
    pub term: String,
    /// Score weight added (or subtracted) by this contribution.
    pub weight: i32,
}

#[derive(Debug, Clone, Serialize)]
pub struct Note {
    pub path: PathBuf,
    pub relative_path: String,
    pub title: String,
    pub frontmatter: BTreeMap<String, serde_json::Value>,
    pub sections: Vec<Section>,
    pub wikilinks: Vec<String>,
    pub raw_content: String,
    #[serde(skip_serializing)]
    pub search_index: NoteSearchIndex,
}

#[derive(Debug, Clone, Default)]
pub struct NoteSearchIndex {
    normalized_path: String,
    normalized_title: String,
    normalized_body: String,
    normalized_headings: Vec<String>,
    normalized_wikilinks: Vec<String>,
    path_tokens: BTreeSet<String>,
    title_tokens: BTreeSet<String>,
    body_tokens: BTreeSet<String>,
    heading_tokens: BTreeSet<String>,
    wikilink_tokens: BTreeSet<String>,
}

#[derive(Debug, Clone, Serialize, PartialEq, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct CandidateNote {
    pub relative_path: String,
    pub title: String,
    pub score: i32,
    pub reasons: Vec<String>,
    /// Structured rationale that always sums to `score`. Empty when a
    /// scored note is constructed without per-contribution tracking
    /// (legacy paths). Tools should prefer this over `reasons` when
    /// available.
    #[serde(default)]
    pub score_breakdown: Vec<ScoreContribution>,
    pub confidence: ConfidenceTier,
    pub excerpt: String,
    #[serde(skip_serializing)]
    #[ts(skip)]
    pub memory_type: Option<String>,
    #[serde(skip_serializing)]
    #[ts(skip)]
    pub sensitivity: Option<String>,
    #[serde(skip_serializing)]
    #[ts(skip)]
    pub source_of_truth: bool,
}

#[derive(Debug, Clone)]
pub struct ScoredNote {
    pub note: Note,
    pub score: i32,
    pub reasons: Vec<String>,
    pub score_breakdown: Vec<ScoreContribution>,
    pub confidence: ConfidenceTier,
    pub excerpt: String,
}

impl ScoredNote {
    pub fn to_candidate(&self) -> CandidateNote {
        CandidateNote {
            relative_path: self.note.relative_path.clone(),
            title: self.note.title.clone(),
            score: self.score,
            reasons: self.reasons.clone(),
            score_breakdown: self.score_breakdown.clone(),
            confidence: self.confidence,
            excerpt: self.excerpt.clone(),
            memory_type: self.note.memory_type().map(ToString::to_string),
            sensitivity: self.note.sensitivity().map(ToString::to_string),
            source_of_truth: self.note.source_of_truth(),
        }
    }
}

impl CandidateNote {
    pub fn from_scored(scored: &ScoredNote) -> Self {
        scored.to_candidate()
    }
}

impl From<&ScoredNote> for CandidateNote {
    fn from(value: &ScoredNote) -> Self {
        value.to_candidate()
    }
}

impl From<ScoredNote> for CandidateNote {
    fn from(value: ScoredNote) -> Self {
        value.to_candidate()
    }
}

impl Note {
    pub fn to_scored(&self, score: i32, reasons: Vec<String>) -> ScoredNote {
        ScoredNote {
            note: self.clone(),
            score,
            excerpt: self.excerpt(220),
            reasons,
            score_breakdown: Vec::new(),
            confidence: ConfidenceTier::Medium,
        }
    }
}

impl Note {
    pub fn new(
        path: PathBuf,
        relative_path: String,
        title: String,
        frontmatter: BTreeMap<String, serde_json::Value>,
        sections: Vec<Section>,
        wikilinks: Vec<String>,
        raw_content: String,
    ) -> Self {
        let search_index =
            NoteSearchIndex::build(&relative_path, &title, &sections, &wikilinks, &raw_content);

        Self {
            path,
            relative_path,
            title,
            frontmatter,
            sections,
            wikilinks,
            raw_content,
            search_index,
        }
    }

    pub fn frontmatter_str(&self, key: &str) -> Option<&str> {
        self.frontmatter.get(key).and_then(|value| value.as_str())
    }

    pub fn frontmatter_bool(&self, key: &str) -> bool {
        self.frontmatter
            .get(key)
            .and_then(|value| value.as_bool())
            .unwrap_or(false)
    }

    pub fn memory_type(&self) -> Option<&str> {
        self.frontmatter_str("memory_type")
    }

    pub fn sensitivity(&self) -> Option<&str> {
        self.frontmatter_str("sensitivity")
    }

    pub fn source_of_truth(&self) -> bool {
        self.frontmatter_bool("source_of_truth")
    }

    pub fn excerpt(&self, max_chars: usize) -> String {
        self.sections
            .iter()
            .map(|section| section.content.trim())
            .find(|content| !content.is_empty())
            .unwrap_or(self.raw_content.trim())
            .chars()
            .take(max_chars)
            .collect()
    }

    pub fn excerpt_for_input(&self, input: &RouteInput, max_chars: usize) -> String {
        let terms: Vec<String> = tokenize(&input.task)
            .into_iter()
            .chain(
                input
                    .files
                    .iter()
                    .flat_map(|file| tokenize(file).into_iter())
                    .filter(|segment| segment.chars().count() >= 3),
            )
            .collect();

        let mut best_score = 0;
        let mut best_excerpt: Option<String> = None;
        for section in &self.sections {
            let score = score_section_for_terms(self, section, &terms);
            if score > best_score {
                let candidate = build_section_excerpt_for_terms(section, &terms, max_chars);
                if !candidate.is_empty() {
                    best_score = score;
                    best_excerpt = Some(candidate);
                }
            }
        }

        best_excerpt.unwrap_or_else(|| self.excerpt(max_chars))
    }
}

fn score_section_for_terms(note: &Note, section: &Section, terms: &[String]) -> i32 {
    let heading = section.heading.as_deref().unwrap_or_default();
    let body = section.content.as_str();
    terms.iter().fold(0, |score, term| {
        let mut next = score;
        if note.search_index.matches_title(term) {
            next += 5;
        }
        if !heading.is_empty() && tokenize(heading).contains(term) {
            next += 8;
        }
        if note.search_index.matches_wikilink(term) {
            next += 6;
        }
        if tokenize(body).contains(term) {
            next += 4;
        }
        next
    })
}

fn build_section_excerpt(section: &Section, max_chars: usize) -> String {
    let heading = section.heading.as_deref().unwrap_or_default().trim();
    let body = section.content.trim();
    let combined = if heading.is_empty() {
        body.to_string()
    } else if body.is_empty() {
        heading.to_string()
    } else {
        format!("{heading}: {body}")
    };
    combined.chars().take(max_chars).collect()
}

/// Like [`build_section_excerpt`] but centers the body window around
/// the first matched term so the excerpt actually shows what was
/// scored. If no term hits the body we fall back to the simpler
/// "from the start" rendering above.
fn build_section_excerpt_for_terms(
    section: &Section,
    terms: &[String],
    max_chars: usize,
) -> String {
    if terms.is_empty() {
        return build_section_excerpt(section, max_chars);
    }
    let heading = section.heading.as_deref().unwrap_or_default().trim();
    let body = section.content.trim();
    let prefix_len = if heading.is_empty() {
        0
    } else {
        heading.chars().count() + 2
    };
    if max_chars <= prefix_len {
        // Not enough budget for a meaningful body window — return what
        // build_section_excerpt would have given us.
        return build_section_excerpt(section, max_chars);
    }
    let body_budget = max_chars - prefix_len;
    let body_window = match locate_first_term(body, terms) {
        Some(byte_pos) => window_around_byte(body, byte_pos, body_budget),
        None => body.chars().take(body_budget).collect(),
    };
    if heading.is_empty() {
        body_window
    } else if body_window.is_empty() {
        heading.to_string()
    } else {
        format!("{heading}: {body_window}")
    }
}

/// Lowercase substring search across all terms; returns the byte
/// offset of the earliest hit so the excerpt window can be anchored
/// there.
fn locate_first_term(body: &str, terms: &[String]) -> Option<usize> {
    let body_lower = body.to_lowercase();
    let mut best: Option<usize> = None;
    for term in terms {
        if term.is_empty() {
            continue;
        }
        let needle = term.to_lowercase();
        if let Some(pos) = body_lower.find(&needle) {
            best = Some(match best {
                None => pos,
                Some(prev) => prev.min(pos),
            });
        }
    }
    best
}

/// Take a `max_chars`-wide window of `body` centered roughly on the
/// character containing `byte_pos`. ~30% of the budget is reserved
/// for context BEFORE the hit, the rest goes after; the result is
/// prefixed/suffixed with `…` when it does not start/end at the
/// section boundary.
fn window_around_byte(body: &str, byte_pos: usize, max_chars: usize) -> String {
    if max_chars == 0 || body.is_empty() {
        return String::new();
    }
    let total_chars = body.chars().count();
    if total_chars <= max_chars {
        return body.to_string();
    }
    let safe_pos = byte_pos.min(body.len());
    let char_pos = body[..safe_pos].chars().count();
    let padding_before = (max_chars as f64 * 0.3) as usize;
    let start_char = char_pos.saturating_sub(padding_before);
    let end_char = (start_char + max_chars).min(total_chars);
    let mut out: String = body
        .chars()
        .skip(start_char)
        .take(end_char - start_char)
        .collect();
    if end_char < total_chars {
        out.push('…');
    }
    if start_char > 0 {
        out.insert(0, '…');
    }
    out
}

impl NoteSearchIndex {
    pub fn build(
        relative_path: &str,
        title: &str,
        sections: &[Section],
        wikilinks: &[String],
        raw_content: &str,
    ) -> Self {
        let normalized_path = normalize_text(relative_path);
        let normalized_title = normalize_text(title);
        let normalized_body = normalize_text(raw_content);
        let normalized_headings = sections
            .iter()
            .filter_map(|section| section.heading.as_ref())
            .map(|heading| normalize_text(heading))
            .filter(|heading| !heading.is_empty())
            .collect::<Vec<_>>();
        let normalized_wikilinks = wikilinks
            .iter()
            .map(|link| normalize_text(link))
            .filter(|link| !link.is_empty())
            .collect::<Vec<_>>();

        Self {
            normalized_path,
            normalized_title,
            normalized_body,
            normalized_headings,
            normalized_wikilinks,
            path_tokens: tokenize(relative_path),
            title_tokens: tokenize(title),
            body_tokens: tokenize(raw_content),
            heading_tokens: sections
                .iter()
                .filter_map(|section| section.heading.as_ref())
                .flat_map(|heading| tokenize(heading))
                .collect(),
            wikilink_tokens: wikilinks.iter().flat_map(|link| tokenize(link)).collect(),
        }
    }

    pub fn matches_path(&self, term: &str) -> bool {
        normalized_contains(&self.normalized_path, &self.path_tokens, term)
    }

    pub fn matches_title(&self, term: &str) -> bool {
        normalized_contains(&self.normalized_title, &self.title_tokens, term)
    }

    pub fn matches_body(&self, term: &str) -> bool {
        normalized_contains(&self.normalized_body, &self.body_tokens, term)
    }

    pub fn matches_heading(&self, term: &str) -> bool {
        self.normalized_headings
            .iter()
            .any(|heading| normalized_contains(heading, &self.heading_tokens, term))
    }

    pub fn matches_wikilink(&self, term: &str) -> bool {
        self.normalized_wikilinks
            .iter()
            .any(|link| normalized_contains(link, &self.wikilink_tokens, term))
    }
}

pub(crate) fn normalize_text(input: &str) -> String {
    let mut normalized = String::new();
    let mut prev_was_alnum = false;
    let mut prev_was_lower_or_digit = false;

    for ch in input.chars() {
        if ch.is_alphanumeric() {
            if ch.is_uppercase() && prev_was_lower_or_digit && !normalized.ends_with(' ') {
                normalized.push(' ');
            }

            for lowered in ch.to_lowercase() {
                normalized.push(lowered);
            }

            prev_was_alnum = true;
            prev_was_lower_or_digit = ch.is_lowercase() || ch.is_numeric();
        } else {
            if prev_was_alnum && !normalized.ends_with(' ') {
                normalized.push(' ');
            }
            prev_was_alnum = false;
            prev_was_lower_or_digit = false;
        }
    }

    normalized.trim().to_string()
}

pub(crate) fn tokenize(input: &str) -> BTreeSet<String> {
    normalize_text(input)
        .split_whitespace()
        .filter(|token| token.chars().count() >= 2)
        .map(ToString::to_string)
        .collect()
}

fn normalized_contains(haystack: &str, tokens: &BTreeSet<String>, term: &str) -> bool {
    let normalized_term = normalize_text(term);
    if normalized_term.is_empty() {
        return false;
    }

    if normalized_term.contains(' ') {
        let bounded_haystack = format!(" {haystack} ");
        let bounded_term = format!(" {normalized_term} ");
        bounded_haystack.contains(&bounded_term)
    } else {
        tokens.contains(&normalized_term)
    }
}

#[cfg(test)]
mod tests {
    use super::Note;
    use crate::domain::{RouteInput, Section};
    use serde_json::json;
    use std::collections::BTreeMap;
    use std::path::PathBuf;

    #[test]
    fn note_should_expose_structured_frontmatter_fields() {
        let note = Note::new(
            PathBuf::from("/tmp/vault/note.md"),
            "10-Projects/note.md".to_string(),
            "Note".to_string(),
            BTreeMap::from([
                ("memory_type".to_string(), json!("constraint")),
                ("sensitivity".to_string(), json!("internal")),
                ("source_of_truth".to_string(), json!(true)),
            ]),
            vec![Section {
                heading: Some("Heading".to_string()),
                level: 1,
                content: "Body".to_string(),
            }],
            Vec::new(),
            "Body".to_string(),
        );

        assert_eq!(note.memory_type(), Some("constraint"));
        assert_eq!(note.sensitivity(), Some("internal"));
        assert!(note.source_of_truth());
    }

    #[test]
    fn excerpt_should_prefer_first_non_empty_section() {
        let note = Note::new(
            PathBuf::from("/tmp/vault/note.md"),
            "10-Projects/note.md".to_string(),
            "Note".to_string(),
            BTreeMap::new(),
            vec![
                Section {
                    heading: Some("Empty".to_string()),
                    level: 1,
                    content: "   ".to_string(),
                },
                Section {
                    heading: Some("Real".to_string()),
                    level: 1,
                    content: "Useful excerpt lives here".to_string(),
                },
            ],
            Vec::new(),
            "Fallback body".to_string(),
        );

        assert_eq!(note.excerpt(12), "Useful excer");
    }

    #[test]
    fn excerpt_for_input_should_prefer_best_matching_section() {
        let note = Note::new(
            PathBuf::from("/tmp/vault/note.md"),
            "10-Projects/note.md".to_string(),
            "Project Notes".to_string(),
            BTreeMap::new(),
            vec![
                Section {
                    heading: Some("Background".to_string()),
                    level: 1,
                    content: "General overview".to_string(),
                },
                Section {
                    heading: Some("Deploy Constraints".to_string()),
                    level: 1,
                    content: "Use internal rollout policy for deploy credentials".to_string(),
                },
            ],
            Vec::new(),
            "Fallback body".to_string(),
        );

        let input = RouteInput {
            task: "deploy credentials".to_string(),
            cwd: PathBuf::from("/tmp/repo"),
            files: vec!["infra/deploy.rs".to_string()],
            target: crate::domain::TargetTool::Codex,
            format: crate::domain::OutputFormat::Prompt,
        };

        let excerpt = note.excerpt_for_input(&input, 80);
        assert!(excerpt.contains("Deploy Constraints"));
    }

    #[test]
    fn excerpt_for_input_should_anchor_window_around_first_term_hit() {
        // Long body where the matching term lives well past the
        // start. The legacy renderer would have truncated before the
        // hit; the term-window renderer must include the term and
        // mark the trailing/leading edges with ellipses.
        let prefix = "Lorem ipsum dolor sit amet ".repeat(30);
        let body = format!("{prefix}repo_path matcher inside body section");
        let note = Note::new(
            PathBuf::from("/tmp/vault/long.md"),
            "10-Projects/long.md".to_string(),
            "Long Note".to_string(),
            BTreeMap::new(),
            vec![Section {
                heading: Some("Background".to_string()),
                level: 1,
                content: body.clone(),
            }],
            Vec::new(),
            body,
        );
        let input = RouteInput {
            task: "fix repo_path matcher".to_string(),
            cwd: PathBuf::from("/tmp/repo"),
            files: Vec::new(),
            target: crate::domain::TargetTool::Codex,
            format: crate::domain::OutputFormat::Prompt,
        };
        let excerpt = note.excerpt_for_input(&input, 120);
        assert!(
            excerpt.to_lowercase().contains("repo_path"),
            "term-window excerpt must contain the matched term: {excerpt}"
        );
        assert!(
            excerpt.contains('…'),
            "ellipsis required when the window is not at the section boundary: {excerpt}"
        );
    }

    #[test]
    fn excerpt_for_input_should_fall_back_to_start_when_no_term_hits() {
        // Section that does not contain any of the input terms — the
        // renderer should silently return the head of the body
        // without ellipsis or panic.
        let body = "Just some general background text without matches".to_string();
        let note = Note::new(
            PathBuf::from("/tmp/vault/n.md"),
            "10-Projects/n.md".to_string(),
            "Title".to_string(),
            BTreeMap::new(),
            vec![Section {
                heading: Some("Heading".to_string()),
                level: 1,
                content: body.clone(),
            }],
            Vec::new(),
            body,
        );
        let input = RouteInput {
            task: "Title".to_string(), // matches title only
            cwd: PathBuf::from("/tmp/repo"),
            files: Vec::new(),
            target: crate::domain::TargetTool::Codex,
            format: crate::domain::OutputFormat::Prompt,
        };
        let excerpt = note.excerpt_for_input(&input, 200);
        assert!(
            excerpt.contains("Heading") || excerpt.contains("background"),
            "fall-through excerpt must still include some content: {excerpt}"
        );
    }
}