use std::path::PathBuf;
pub(crate) fn body_preview(content: &str, max_chars: usize) -> String {
let body = super::frontmatter::get_body(content).trim_start();
let body = if body.starts_with("# ") {
body.find('\n')
.map_or("", |nl| &body[nl + 1..])
.trim_start()
} else {
body
};
body.chars().take(max_chars).collect()
}
pub(crate) fn compile_query_word_regex(query: &str) -> Option<regex::Regex> {
let pattern: String = query
.split_whitespace()
.map(regex::escape)
.collect::<Vec<_>>()
.join("|");
if pattern.is_empty() {
return None;
}
regex::Regex::new(&format!("(?i){pattern}")).ok()
}
pub(crate) fn normalize_bm25_scores(hits: &[(PathBuf, f32)]) -> Vec<(PathBuf, f32)> {
if hits.is_empty() {
return Vec::new();
}
let finite_hits: Vec<_> = hits.iter().filter(|(_, s)| s.is_finite()).collect();
if finite_hits.is_empty() {
return Vec::new();
}
let min = finite_hits
.iter()
.map(|(_, score)| *score)
.fold(f32::INFINITY, f32::min);
let max = finite_hits
.iter()
.map(|(_, score)| *score)
.fold(f32::NEG_INFINITY, f32::max);
let range = max - min;
finite_hits
.iter()
.map(|(path, score)| {
let normalized = if range == 0.0 {
1.0
} else {
(score - min) / range
};
((*path).clone(), normalized)
})
.collect()
}
#[cfg(test)]
mod tests {
use std::path::Path;
use super::*;
#[test]
fn body_preview_strips_frontmatter() {
let content = "---\ntags: [a]\n---\nHello world";
let preview = body_preview(content, 100);
assert_eq!(preview, "Hello world");
}
#[test]
fn body_preview_no_frontmatter() {
let content = "# Title\nSome body text";
let preview = body_preview(content, 100);
assert_eq!(preview, "Some body text");
}
#[test]
fn body_preview_truncates() {
let content = "---\nk: v\n---\nABCDEFGHIJ";
let preview = body_preview(content, 5);
assert_eq!(preview, "ABCDE");
}
#[test]
fn body_preview_empty_content() {
let preview = body_preview("", 100);
assert_eq!(preview, "");
}
#[test]
fn body_preview_unclosed_frontmatter() {
let content = "---\ntags: [a]\nNo closing delimiter here";
let preview = body_preview(content, 200);
assert!(preview.contains("tags:"));
}
#[test]
fn body_preview_skips_title_heading() {
let content = "---\ntags: [a]\n---\n# My Title\n\nActual content here";
let preview = body_preview(content, 100);
assert_eq!(preview, "Actual content here");
}
#[test]
fn body_preview_preserves_h2_start() {
let content = "## Section Header\n\nBody text follows";
let preview = body_preview(content, 100);
assert_eq!(preview, "## Section Header\n\nBody text follows");
}
#[test]
fn body_preview_skips_title_without_frontmatter() {
let content = "# Title Only\n\nThe real content starts here";
let preview = body_preview(content, 100);
assert_eq!(preview, "The real content starts here");
}
#[test]
fn body_preview_title_with_no_body_after() {
let content = "# Just a Title";
let preview = body_preview(content, 100);
assert_eq!(preview, "");
}
#[test]
fn normalize_empty() {
assert!(normalize_bm25_scores(&[]).is_empty());
}
#[test]
fn normalize_single_score() {
let hits = vec![(PathBuf::from("a.md"), 5.0)];
let norm = normalize_bm25_scores(&hits);
assert_eq!(norm.len(), 1);
assert!(
(norm[0].1 - 1.0).abs() < 1e-6,
"single item normalizes to 1.0"
);
}
#[test]
fn normalize_identical_scores() {
let hits = vec![
(PathBuf::from("a.md"), 3.0),
(PathBuf::from("b.md"), 3.0),
(PathBuf::from("c.md"), 3.0),
];
let norm = normalize_bm25_scores(&hits);
for (_, score) in &norm {
assert!(
(score - 1.0).abs() < 1e-6,
"identical scores should all normalize to 1.0"
);
}
}
#[test]
fn normalize_min_max_range() {
let hits = vec![
(PathBuf::from("high.md"), 10.0),
(PathBuf::from("mid.md"), 5.0),
(PathBuf::from("low.md"), 0.0),
];
let norm = normalize_bm25_scores(&hits);
let high = norm
.iter()
.find(|(p, _)| p == Path::new("high.md"))
.unwrap()
.1;
let mid = norm
.iter()
.find(|(p, _)| p == Path::new("mid.md"))
.unwrap()
.1;
let low = norm
.iter()
.find(|(p, _)| p == Path::new("low.md"))
.unwrap()
.1;
assert!((high - 1.0).abs() < 1e-6);
assert!((mid - 0.5).abs() < 1e-6);
assert!(low.abs() < 1e-6);
}
#[test]
fn normalize_preserves_order() {
let hits = vec![
(PathBuf::from("first.md"), 8.0),
(PathBuf::from("second.md"), 4.0),
(PathBuf::from("third.md"), 2.0),
];
let norm = normalize_bm25_scores(&hits);
assert!(norm[0].1 > norm[1].1);
assert!(norm[1].1 > norm[2].1);
}
#[test]
fn normalize_filters_non_finite_scores() {
let hits = vec![
(PathBuf::from("valid.md"), 10.0),
(PathBuf::from("nan.md"), f32::NAN),
(PathBuf::from("inf.md"), f32::INFINITY),
(PathBuf::from("neginf.md"), f32::NEG_INFINITY),
(PathBuf::from("low.md"), 0.0),
];
let norm = normalize_bm25_scores(&hits);
assert_eq!(norm.len(), 2);
assert!(norm.iter().any(|(p, _)| p == Path::new("valid.md")));
assert!(norm.iter().any(|(p, _)| p == Path::new("low.md")));
}
#[test]
fn normalize_all_non_finite_returns_empty() {
let hits = vec![
(PathBuf::from("nan.md"), f32::NAN),
(PathBuf::from("inf.md"), f32::INFINITY),
];
assert!(normalize_bm25_scores(&hits).is_empty());
}
}