use rusqlite::types::Value;
use rusqlite::{Connection, params_from_iter};
use crate::text::frontmatter::normalize_keyword;
use super::constants::{BM25_FTS_SCORES, BM25_MIN_TOKENS, BM25_TOKENS_PER_CHAR_DIV};
use super::pre_filter::PreFilter;
use super::text_fts::{FtsOperator, build_bm25_score, to_fts_query};
use super::types::{RawSearchResult, SearchScores};
fn parse_string_array(raw: Option<String>) -> Vec<String> {
let Some(raw) = raw else {
return Vec::new();
};
serde_json::from_str::<Vec<String>>(&raw).unwrap_or_default()
}
#[must_use]
pub fn search_bm25(
conn: &Connection,
query: &str,
limit: u32,
snippet_length: u32,
pre_filter: &PreFilter,
) -> Vec<RawSearchResult> {
if pre_filter.is_impossible() {
return Vec::new();
}
let num_tokens = BM25_MIN_TOKENS.max(snippet_length.div_ceil(BM25_TOKENS_PER_CHAR_DIV));
let fts_query = to_fts_query(query, FtsOperator::Or);
let (filter_sql, filter_params) = pre_filter.sql_fragment();
let sql = format!(
"SELECT n.vault_path, n.title, n.tags, n.aliases,
snippet(notes_fts_bm25, 2, '', '', '...', ?) AS snippet,
bm25(notes_fts_bm25, {title}, {alias}, {content}) AS rank
FROM notes_fts_bm25
JOIN notes n ON n.id = notes_fts_bm25.rowid
WHERE notes_fts_bm25 MATCH ? AND n.active = 1{filter_sql}
ORDER BY rank
LIMIT ?",
title = BM25_FTS_SCORES.title,
alias = BM25_FTS_SCORES.alias,
content = BM25_FTS_SCORES.content,
);
let Ok(mut stmt) = conn.prepare(&sql) else {
return Vec::new();
};
let mut params: Vec<Value> = Vec::with_capacity(3 + filter_params.len());
params.push(Value::Integer(i64::from(num_tokens)));
params.push(Value::Text(fts_query));
params.extend(filter_params);
params.push(Value::Integer(i64::from(limit)));
let Ok(mapped) = stmt.query_map(params_from_iter(params), |row| {
let path: String = row.get(0)?;
let title: Option<String> = row.get(1)?;
let tags: Option<String> = row.get(2)?;
let aliases: Option<String> = row.get(3)?;
let snippet: Option<String> = row.get(4)?;
let rank: f64 = row.get(5)?;
let score = build_bm25_score(rank);
Ok(RawSearchResult {
path,
title: title.unwrap_or_default(),
tags: parse_string_array(tags),
aliases: parse_string_array(aliases),
snippet: snippet.unwrap_or_default(),
score,
scores: SearchScores {
bm25: Some(score),
..Default::default()
},
semantic_heading: None,
semantic_char_start: None,
semantic_char_end: None,
})
}) else {
return Vec::new();
};
mapped.collect::<rusqlite::Result<_>>().unwrap_or_default()
}
#[must_use]
pub fn search_by_alias_exact(
conn: &Connection,
query: &str,
limit: u32,
pre_filter: &PreFilter,
) -> Vec<RawSearchResult> {
if pre_filter.is_impossible() {
return Vec::new();
}
let normalized = normalize_keyword(query);
let (filter_sql, filter_params) = pre_filter.sql_fragment();
let sql = format!(
"SELECT DISTINCT n.vault_path, n.title, n.tags, n.aliases
FROM note_aliases a
JOIN notes n ON n.id = a.note_id
WHERE a.alias_norm = ? AND n.active = 1{filter_sql}
LIMIT ?"
);
let Ok(mut stmt) = conn.prepare(&sql) else {
return Vec::new();
};
let mut params: Vec<Value> = Vec::with_capacity(2 + filter_params.len());
params.push(Value::Text(normalized));
params.extend(filter_params);
params.push(Value::Integer(i64::from(limit)));
let Ok(mapped) = stmt.query_map(params_from_iter(params), |row| {
let path: String = row.get(0)?;
let title: Option<String> = row.get(1)?;
let tags: Option<String> = row.get(2)?;
let aliases: Option<String> = row.get(3)?;
Ok(RawSearchResult {
path,
title: title.unwrap_or_default(),
tags: parse_string_array(tags),
aliases: parse_string_array(aliases),
snippet: String::new(),
score: 1.0,
scores: SearchScores {
fuzzy_title: Some(1.0),
..Default::default()
},
semantic_heading: None,
semantic_char_start: None,
semantic_char_end: None,
})
}) else {
return Vec::new();
};
mapped.collect::<rusqlite::Result<_>>().unwrap_or_default()
}
#[cfg(test)]
#[path = "bm25/tests.rs"]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests;