use rusqlite::Connection;
use rusqlite::OptionalExtension;
use crate::search::constants::{BM25_MIN_TOKENS, BM25_TOKENS_PER_CHAR_DIV, DEFAULT_SNIPPET_LENGTH};
use crate::search::text_fts::{FtsOperator, to_fts_query};
use crate::search::{AnchorKind, MatchAnchor};
use super::match_text::build_match_text;
use super::types::RawSearchResult;
#[must_use]
pub fn build_anchors(conn: &Connection, raw: &RawSearchResult) -> Vec<MatchAnchor> {
let mut anchors: Vec<MatchAnchor> = Vec::with_capacity(2);
if !raw.snippet.is_empty() && raw.scores.bm25.is_some() {
let snippet = raw.snippet.as_str();
let heading = lookup_bm25_heading_strategy1(conn, &raw.path, snippet)
.or_else(|| scan_content_for_heading(conn, &raw.path, snippet));
let match_text = build_match_text(snippet);
if !match_text.is_empty() {
anchors.push(MatchAnchor {
kind: AnchorKind::Bm25,
heading_path: heading,
match_text,
char_start: None,
char_end: None,
});
}
}
let has_semantic = raw.semantic_heading.is_some()
|| raw.semantic_char_start.is_some()
|| raw.scores.semantic.is_some();
if has_semantic {
let match_text = build_match_text(raw.snippet.as_str());
if !match_text.is_empty() {
let heading_opt = raw
.semantic_heading
.as_deref()
.filter(|h| !h.is_empty())
.map(ToOwned::to_owned);
let sem_anchor = MatchAnchor {
kind: AnchorKind::Semantic,
heading_path: heading_opt,
match_text,
char_start: raw.semantic_char_start,
char_end: raw.semantic_char_end,
};
let duplicate = anchors
.first()
.is_some_and(|a| a.match_text == sem_anchor.match_text);
if !duplicate {
anchors.push(sem_anchor);
}
}
}
anchors
}
#[must_use]
pub(crate) fn maybe_expand_bm25_snippet(
conn: &Connection,
note_id: i64,
query: &str,
snippet: &str,
) -> Option<String> {
let snippet_chars = snippet.chars().count();
if snippet_chars * 2 >= DEFAULT_SNIPPET_LENGTH as usize {
return None;
}
if query.trim().is_empty() {
return None;
}
let fts_query = to_fts_query(query, FtsOperator::Or);
if fts_query.is_empty() {
return None;
}
let num_tokens = BM25_MIN_TOKENS.max(DEFAULT_SNIPPET_LENGTH.div_ceil(BM25_TOKENS_PER_CHAR_DIV));
let Ok(fallback) = conn.query_row(
"SELECT snippet(notes_fts_bm25, 2, '', '', '...', ?) AS snippet
FROM notes_fts_bm25
JOIN notes n ON n.id = notes_fts_bm25.rowid
WHERE n.id = ? AND n.active = 1
AND notes_fts_bm25 MATCH ?
LIMIT 1",
rusqlite::params![num_tokens, note_id, fts_query],
|row| row.get::<_, Option<String>>(0),
) else {
return None;
};
let fallback = fallback?.trim().to_owned();
if fallback.chars().count() > snippet_chars {
Some(fallback)
} else {
None
}
}
fn lookup_bm25_heading_strategy1(
conn: &Connection,
vault_path: &str,
snippet: &str,
) -> Option<String> {
let clean = snippet.trim_start_matches("...").trim();
if clean.len() < 8 {
return None;
}
let probe: String = clean.chars().take(40).collect();
conn.query_row(
"SELECT c.heading_path
FROM chunks c
JOIN notes n ON n.id = c.note_id
WHERE n.vault_path = ? AND n.active = 1
AND c.text LIKE '%' || ? || '%'
ORDER BY c.chunk_index
LIMIT 1",
rusqlite::params![vault_path, probe],
|row| row.get::<_, Option<String>>(0),
)
.ok()
.flatten()
.filter(|s| !s.is_empty())
}
fn lookup_bm25_heading_or_scan_from_chunk(
conn: &Connection,
vault_path: &str,
snippet: &str,
) -> Option<String> {
let clean = snippet.trim_start_matches("...").trim();
if clean.is_empty() {
return None;
}
let probe: String = clean.chars().take(40).collect();
let chunk = conn
.query_row(
"SELECT c.heading_path, c.char_start, n.content
FROM chunks c
JOIN notes n ON n.id = c.note_id
WHERE n.vault_path = ? AND n.active = 1
AND c.text LIKE '%' || ? || '%'
ORDER BY c.chunk_index
LIMIT 1",
rusqlite::params![vault_path, probe],
|row| {
Ok((
row.get::<_, Option<String>>(0)?,
row.get::<_, Option<i64>>(1)?,
row.get::<_, String>(2)?,
))
},
)
.optional()
.ok()??;
if let Some(heading) = chunk.0.filter(|s| !s.is_empty()) {
return Some(heading);
}
let char_start = usize::try_from(chunk.1?).ok()?;
heading_breadcrumb_before(&chunk.2, char_start)
}
fn scan_content_for_heading(conn: &Connection, vault_path: &str, snippet: &str) -> Option<String> {
let clean = snippet.trim_start_matches("...").trim();
if clean.is_empty() {
return None;
}
let probe: String = clean.chars().take(40).collect();
let content: String = conn
.query_row(
"SELECT content FROM notes WHERE vault_path = ? AND active = 1",
[vault_path],
|row| row.get(0),
)
.ok()?;
let pos = content.find(probe.as_str())?;
heading_breadcrumb_before(&content, pos)
}
fn heading_breadcrumb_before(content: &str, byte_pos: usize) -> Option<String> {
let before = content.get(..byte_pos)?;
let mut headings = [const { None::<String> }; 4];
let mut max_level = 4;
for line in before.lines().rev() {
let Some((level, text)) = parse_heading_line(line) else {
continue;
};
if level > max_level {
continue;
}
let index = level.saturating_sub(1);
if headings[index].is_none() {
headings[index] = Some(text.to_owned());
max_level = index;
}
if index == 0 {
break;
}
}
let breadcrumb: Vec<&str> = headings
.iter()
.filter_map(std::option::Option::as_deref)
.collect();
(!breadcrumb.is_empty()).then(|| breadcrumb.join(" > "))
}
fn parse_heading_line(line: &str) -> Option<(usize, &str)> {
let trimmed = line.trim_start();
let level = trimmed.bytes().take_while(|b| *b == b'#').count();
if !(1..).contains(&level)
|| !trimmed
.as_bytes()
.get(level)
.is_some_and(u8::is_ascii_whitespace)
{
return None;
}
let folded_level = level.min(4);
let text = trimmed[level..].trim();
(!text.is_empty()).then_some((folded_level, text))
}
#[must_use]
pub fn resolve_snippet_heading(
conn: &Connection,
raw: &RawSearchResult,
snippet: &str,
) -> Option<String> {
if let Some(h) = raw.semantic_heading.as_deref().filter(|h| !h.is_empty()) {
return Some(h.to_owned());
}
if snippet.is_empty() {
return None;
}
lookup_bm25_heading_or_scan_from_chunk(conn, &raw.path, snippet)
}
#[cfg(test)]
mod tests;