use std::path::PathBuf;
use nucleo::pattern::{CaseMatching, Normalization, Pattern};
use nucleo::{Config, Matcher, Utf32Str};
use crate::domain::model::decision_record::DecisionRecord;
use crate::domain::model::entity_ref::EntityRef;
use crate::domain::model::issue::Issue;
use crate::domain::model::record_kind::RecordKind;
use crate::domain::model::search::SearchHit;
use crate::domain::model::title::Title;
use crate::domain::usecases::search::SearchRepository;
use crate::infra::driven::fs::decision_record_repository::DrParseCtx;
use crate::infra::driven::fs::repository_pipeline;
struct Candidate {
id: EntityRef,
kind: RecordKind,
title: Title,
haystack: String,
}
pub struct NucleoSearchRepository {
pub issues_dir: PathBuf,
pub decision_kinds: Vec<(String, PathBuf)>,
pub schema_version: u32,
}
impl NucleoSearchRepository {
fn collect_candidates(&self) -> Vec<Candidate> {
let mut candidates: Vec<Candidate> = Vec::new();
if let Ok(entries) = std::fs::read_dir(&self.issues_dir) {
for entry in entries.flatten() {
let index = entry.path().join("index.md");
if !index.exists() {
continue;
}
if let Ok((issue, _raw_events)) =
repository_pipeline::parse_one::<Issue>(&index, self.schema_version, &())
{
let haystack = format!("{} {}", issue.title.as_str(), issue.content.as_str());
if let (Ok(id), Ok(kind)) = (
EntityRef::new(issue.id.to_string()),
RecordKind::new("issue"),
) {
candidates.push(Candidate {
id,
kind,
title: issue.title,
haystack,
});
}
}
}
}
for (kind_str, dir) in &self.decision_kinds {
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if !path.is_dir() {
continue;
}
let index = path.join("index.md");
if !index.exists() {
continue;
}
let ctx = DrParseCtx {
kind: kind_str.clone(),
};
if let Ok((record, _raw_events)) = repository_pipeline::parse_one::<
DecisionRecord,
>(
&index, self.schema_version, &ctx
) {
let haystack =
format!("{} {}", record.title.as_str(), record.content.as_str());
if let (Ok(id), Ok(kind)) = (
EntityRef::new(record.id.to_string()),
RecordKind::new(kind_str),
) {
candidates.push(Candidate {
id,
kind,
title: record.title,
haystack,
});
}
}
}
}
}
candidates
}
}
impl SearchRepository for NucleoSearchRepository {
fn search(&self, query: &str) -> anyhow::Result<Vec<SearchHit>> {
let candidates = self.collect_candidates();
let mut matcher = Matcher::new(Config::DEFAULT);
let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
let mut char_buf: Vec<char> = Vec::new();
let mut scored: Vec<(usize, u32)> = candidates
.iter()
.enumerate()
.filter_map(|(i, c)| {
char_buf.clear();
let haystack = Utf32Str::new(&c.haystack, &mut char_buf);
pattern.score(haystack, &mut matcher).map(|s| (i, s))
})
.collect();
scored.sort_by_key(|b| std::cmp::Reverse(b.1));
let hits: Vec<SearchHit> = scored
.into_iter()
.map(|(idx, _score)| {
let c = &candidates[idx];
SearchHit {
id: c.id.clone(),
kind: c.kind.clone(),
title: c.title.clone(),
excerpt: make_excerpt(&c.haystack, query),
}
})
.collect();
Ok(hits)
}
}
fn make_excerpt(haystack: &str, query: &str) -> Option<String> {
let lower_haystack = haystack.to_lowercase();
let lower_query = query.to_lowercase();
let pos = lower_haystack.find(&lower_query)?;
let start = pos.saturating_sub(30);
let end = (pos + query.len() + 30).min(haystack.len());
let prefix = if start > 0 { "…" } else { "" };
let suffix = if end < haystack.len() { "…" } else { "" };
let snapped_start = (0..=start)
.rev()
.find(|&i| haystack.is_char_boundary(i))
.unwrap_or(0);
let snapped_end = (end..=haystack.len())
.find(|&i| haystack.is_char_boundary(i))
.unwrap_or(haystack.len());
Some(format!(
"{}{}{}",
prefix,
&haystack[snapped_start..snapped_end],
suffix
))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn write_file(dir: &std::path::Path, path: &str, content: &str) {
let p = dir.join(path);
std::fs::create_dir_all(p.parent().unwrap()).unwrap();
std::fs::write(p, content).unwrap();
}
#[test]
fn search_returns_decision_record_hit() {
let tmp = TempDir::new().unwrap();
write_file(
tmp.path(),
"docs/adr/0001-use-rust/index.md",
"---\nid: ADR-0001\ntitle: \"Use Rust\"\nstatus: accepted\ndate: 2026-01-01\n---\n\nWe chose Rust for performance.\n",
);
let repo = NucleoSearchRepository {
issues_dir: tmp.path().join("docs/issues"),
decision_kinds: vec![("adr".into(), tmp.path().join("docs/adr"))],
schema_version: 3,
};
let hits = repo.search("Rust").unwrap();
assert!(!hits.is_empty(), "expected hits for 'Rust', got none");
assert_eq!(hits[0].id.to_string(), "ADR-0001");
}
#[test]
fn search_returns_dr_hit_when_issue_also_matches() {
let tmp = TempDir::new().unwrap();
write_file(
tmp.path(),
"docs/adr/0002-hexagonal/index.md",
"---\nid: ADR-0002\ntitle: \"Hexagonal Architecture\"\nstatus: accepted\ndate: 2026-01-01\n---\n\nDomain layer is isolated.\n",
);
write_file(
tmp.path(),
"docs/issues/0001-something/index.md",
"---\nid: ISSUE-0001\ntitle: \"Something\"\ntype: feature\nstatus: open\ndate: 2026-01-01\n---\n\nSee ADR-0002 for hexagonal details.\n",
);
let repo = NucleoSearchRepository {
issues_dir: tmp.path().join("docs/issues"),
decision_kinds: vec![("adr".into(), tmp.path().join("docs/adr"))],
schema_version: 3,
};
let hits = repo.search("hexagonal").unwrap();
let ids: Vec<String> = hits.iter().map(|h| h.id.to_string()).collect();
assert!(
ids.contains(&"ADR-0002".to_string()),
"ADR-0002 missing from hits: {ids:?}"
);
}
#[test]
fn collects_decision_record_candidates() {
let tmp = TempDir::new().unwrap();
write_file(
tmp.path(),
"docs/adr/0001-use-rust/index.md",
"---\nid: ADR-0001\ntitle: \"Use Rust\"\nstatus: accepted\ndate: 2026-01-01\n---\n\nWe chose Rust for performance.\n",
);
let repo = NucleoSearchRepository {
issues_dir: tmp.path().join("docs/issues"),
decision_kinds: vec![("adr".into(), tmp.path().join("docs/adr"))],
schema_version: 3,
};
let candidates = repo.collect_candidates();
assert_eq!(
candidates.len(),
1,
"expected 1 candidate, got {}",
candidates.len()
);
assert_eq!(candidates[0].id.to_string(), "ADR-0001");
assert_eq!(candidates[0].kind.as_str(), "adr");
}
}