cartulary 0.3.0-alpha.1

The knowledge layer of your project — decisions, issues, docs, all in one place.
Documentation
use std::path::PathBuf;

use nucleo::pattern::{CaseMatching, Normalization, Pattern};
use nucleo::{Config, Matcher, Utf32Str};

use crate::domain::model::decision_record::DecisionRecord;
use crate::domain::model::entity_ref::EntityRef;
use crate::domain::model::issue::Issue;
use crate::domain::model::record_kind::RecordKind;
use crate::domain::model::search::SearchHit;
use crate::domain::model::title::Title;
use crate::domain::usecases::search::SearchRepository;
use crate::infra::driven::fs::decision_record_repository::DrParseCtx;
use crate::infra::driven::fs::repository_pipeline;

/// A candidate record to be matched against the query.
struct Candidate {
    id: EntityRef,
    kind: RecordKind,
    title: Title,
    /// The text fed to the matcher: `"<title> <body>"`.
    haystack: String,
}

/// Filesystem search adapter powered by `nucleo-matcher`.
///
/// Scans all issues and decision records on each invocation (no persistent
/// index). Acceptable for knowledge bases up to a few hundred records.
pub struct NucleoSearchRepository {
    /// Issues directory.
    pub issues_dir: PathBuf,
    /// `(kind, dir)` pairs for each configured decision record kind.
    pub decision_kinds: Vec<(String, PathBuf)>,
    /// Workspace schema version, threaded into the shared parse pipeline so
    /// canonical ids and link targets are validated against the right shape.
    pub schema_version: u32,
}

impl NucleoSearchRepository {
    fn collect_candidates(&self) -> Vec<Candidate> {
        let mut candidates: Vec<Candidate> = Vec::new();

        // ── Issues ────────────────────────────────────────────────────────────
        if let Ok(entries) = std::fs::read_dir(&self.issues_dir) {
            for entry in entries.flatten() {
                let index = entry.path().join("index.md");
                if !index.exists() {
                    continue;
                }
                if let Ok((issue, _raw_events)) =
                    repository_pipeline::parse_one::<Issue>(&index, self.schema_version, &())
                {
                    let haystack = format!("{} {}", issue.title.as_str(), issue.content.as_str());
                    if let (Ok(id), Ok(kind)) = (
                        EntityRef::new(issue.id.to_string()),
                        RecordKind::new("issue"),
                    ) {
                        candidates.push(Candidate {
                            id,
                            kind,
                            title: issue.title,
                            haystack,
                        });
                    }
                }
            }
        }

        // ── Decision records ──────────────────────────────────────────────────
        for (kind_str, dir) in &self.decision_kinds {
            if let Ok(entries) = std::fs::read_dir(dir) {
                for entry in entries.flatten() {
                    let path = entry.path();
                    if !path.is_dir() {
                        continue;
                    }
                    let index = path.join("index.md");
                    if !index.exists() {
                        continue;
                    }
                    let ctx = DrParseCtx {
                        kind: kind_str.clone(),
                    };
                    if let Ok((record, _raw_events)) = repository_pipeline::parse_one::<
                        DecisionRecord,
                    >(
                        &index, self.schema_version, &ctx
                    ) {
                        let haystack =
                            format!("{} {}", record.title.as_str(), record.content.as_str());
                        if let (Ok(id), Ok(kind)) = (
                            EntityRef::new(record.id.to_string()),
                            RecordKind::new(kind_str),
                        ) {
                            candidates.push(Candidate {
                                id,
                                kind,
                                title: record.title,
                                haystack,
                            });
                        }
                    }
                }
            }
        }

        candidates
    }
}

impl SearchRepository for NucleoSearchRepository {
    fn search(&self, query: &str) -> anyhow::Result<Vec<SearchHit>> {
        let candidates = self.collect_candidates();

        let mut matcher = Matcher::new(Config::DEFAULT);
        let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);

        // Match each candidate individually rather than via `match_list`,
        // whose opaque item resolution collides candidates whose displayed
        // text matches by string equality.
        let mut char_buf: Vec<char> = Vec::new();
        let mut scored: Vec<(usize, u32)> = candidates
            .iter()
            .enumerate()
            .filter_map(|(i, c)| {
                char_buf.clear();
                let haystack = Utf32Str::new(&c.haystack, &mut char_buf);
                pattern.score(haystack, &mut matcher).map(|s| (i, s))
            })
            .collect();

        scored.sort_by_key(|b| std::cmp::Reverse(b.1));

        let hits: Vec<SearchHit> = scored
            .into_iter()
            .map(|(idx, _score)| {
                let c = &candidates[idx];
                SearchHit {
                    id: c.id.clone(),
                    kind: c.kind.clone(),
                    title: c.title.clone(),
                    excerpt: make_excerpt(&c.haystack, query),
                }
            })
            .collect();

        Ok(hits)
    }
}

/// Build a short excerpt by finding the query term in `haystack` and
/// returning a window of characters around it. Returns `None` if the
/// query is not found verbatim (case-insensitive).
fn make_excerpt(haystack: &str, query: &str) -> Option<String> {
    let lower_haystack = haystack.to_lowercase();
    let lower_query = query.to_lowercase();
    let pos = lower_haystack.find(&lower_query)?;

    let start = pos.saturating_sub(30);
    let end = (pos + query.len() + 30).min(haystack.len());

    let prefix = if start > 0 { "" } else { "" };
    let suffix = if end < haystack.len() { "" } else { "" };

    // Snap to char boundaries.
    let snapped_start = (0..=start)
        .rev()
        .find(|&i| haystack.is_char_boundary(i))
        .unwrap_or(0);
    let snapped_end = (end..=haystack.len())
        .find(|&i| haystack.is_char_boundary(i))
        .unwrap_or(haystack.len());

    Some(format!(
        "{}{}{}",
        prefix,
        &haystack[snapped_start..snapped_end],
        suffix
    ))
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    fn write_file(dir: &std::path::Path, path: &str, content: &str) {
        let p = dir.join(path);
        std::fs::create_dir_all(p.parent().unwrap()).unwrap();
        std::fs::write(p, content).unwrap();
    }

    #[test]
    fn search_returns_decision_record_hit() {
        let tmp = TempDir::new().unwrap();
        write_file(
            tmp.path(),
            "docs/adr/0001-use-rust/index.md",
            "---\nid: ADR-0001\ntitle: \"Use Rust\"\nstatus: accepted\ndate: 2026-01-01\n---\n\nWe chose Rust for performance.\n",
        );
        let repo = NucleoSearchRepository {
            issues_dir: tmp.path().join("docs/issues"),
            decision_kinds: vec![("adr".into(), tmp.path().join("docs/adr"))],
            schema_version: 3,
        };
        let hits = repo.search("Rust").unwrap();
        assert!(!hits.is_empty(), "expected hits for 'Rust', got none");
        assert_eq!(hits[0].id.to_string(), "ADR-0001");
    }

    #[test]
    fn search_returns_dr_hit_when_issue_also_matches() {
        let tmp = TempDir::new().unwrap();
        // ADR that contains the keyword in body
        write_file(
            tmp.path(),
            "docs/adr/0002-hexagonal/index.md",
            "---\nid: ADR-0002\ntitle: \"Hexagonal Architecture\"\nstatus: accepted\ndate: 2026-01-01\n---\n\nDomain layer is isolated.\n",
        );
        // Issue that also mentions "hexagonal" in body
        write_file(
            tmp.path(),
            "docs/issues/0001-something/index.md",
            "---\nid: ISSUE-0001\ntitle: \"Something\"\ntype: feature\nstatus: open\ndate: 2026-01-01\n---\n\nSee ADR-0002 for hexagonal details.\n",
        );
        let repo = NucleoSearchRepository {
            issues_dir: tmp.path().join("docs/issues"),
            decision_kinds: vec![("adr".into(), tmp.path().join("docs/adr"))],
            schema_version: 3,
        };
        let hits = repo.search("hexagonal").unwrap();
        let ids: Vec<String> = hits.iter().map(|h| h.id.to_string()).collect();
        assert!(
            ids.contains(&"ADR-0002".to_string()),
            "ADR-0002 missing from hits: {ids:?}"
        );
    }

    #[test]
    fn collects_decision_record_candidates() {
        let tmp = TempDir::new().unwrap();
        write_file(
            tmp.path(),
            "docs/adr/0001-use-rust/index.md",
            "---\nid: ADR-0001\ntitle: \"Use Rust\"\nstatus: accepted\ndate: 2026-01-01\n---\n\nWe chose Rust for performance.\n",
        );
        let repo = NucleoSearchRepository {
            issues_dir: tmp.path().join("docs/issues"),
            decision_kinds: vec![("adr".into(), tmp.path().join("docs/adr"))],
            schema_version: 3,
        };
        let candidates = repo.collect_candidates();
        assert_eq!(
            candidates.len(),
            1,
            "expected 1 candidate, got {}",
            candidates.len()
        );
        assert_eq!(candidates[0].id.to_string(), "ADR-0001");
        assert_eq!(candidates[0].kind.as_str(), "adr");
    }
}