use crate::output::{CliError, OutputMode, render_error, render_mode};
use bones_core::config::load_project_config;
use bones_core::db::fts;
use bones_core::db::query;
use bones_search::fusion::hybrid_search_with_threshold;
use bones_search::semantic::{SemanticModel, knn_search, sync_projection_embeddings};
use clap::Args;
use serde::Serialize;
use std::io::Write;
const MIN_SEMANTIC_SCORE: f32 = 0.15;
const MIN_SEMANTIC_TOP_SCORE: f32 = 0.20;
#[derive(Args, Debug)]
#[command(
about = "Search bones using full-text search",
long_about = "Search bones using hybrid ranking (lexical BM25 + optional semantic + structural fusion).\n\n\
FTS5 syntax is supported for lexical query parsing: stemming ('run' matches 'running'), \
prefix search ('auth*'), boolean operators (AND, OR, NOT).",
after_help = "EXAMPLES:\n # Search for bones about authentication\n bn search authentication\n\n\
# Prefix search\n bn search 'auth*'\n\n\
# Limit results\n bn search timeout -n 5\n\n\
# Machine-readable output\n bn search authentication --format json"
)]
pub struct SearchArgs {
pub query: String,
#[arg(short = 'n', long, default_value = "10")]
pub limit: usize,
#[arg(long)]
pub lexical: bool,
#[arg(long)]
pub semantic: bool,
#[arg(long, value_name = "SCORE")]
pub semantic_threshold: Option<f32>,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum SearchMode {
Hybrid,
LexicalOnly,
SemanticOnly,
}
#[derive(Debug, Serialize)]
pub struct SearchResult {
pub id: String,
pub title: String,
pub score: f64,
pub state: String,
}
#[derive(Debug, Serialize)]
pub struct SearchOutput {
pub query: String,
pub limit: usize,
pub count: usize,
pub results: Vec<SearchResult>,
pub fallback_query: Option<String>,
}
#[tracing::instrument(skip_all, name = "cmd.search")]
pub fn run_search(
args: &SearchArgs,
output: OutputMode,
project_root: &std::path::Path,
) -> anyhow::Result<()> {
if args.query.trim().is_empty() {
render_error(
output,
&CliError::with_details(
"search query must not be empty",
"provide a non-empty query string",
"empty_query",
),
)?;
anyhow::bail!("empty search query");
}
let mode = resolve_mode(args)?;
let db_path = project_root.join(".bones/bones.db");
let conn = if let Some(c) = query::try_open_projection(&db_path)? {
c
} else {
render_error(
output,
&CliError::with_details(
"projection database not found",
"run `bn admin rebuild` to initialize the projection",
"projection_missing",
),
)?;
anyhow::bail!("projection not found");
};
let limit = args.limit.min(1000);
let cfg = load_project_config(project_root).unwrap_or_default();
let mut results = execute_search_mode(
mode,
&conn,
&args.query,
limit,
cfg.search.semantic,
args.semantic_threshold,
)?;
let mut fallback_query = None;
if results.is_empty()
&& mode != SearchMode::SemanticOnly
&& let Some(or_query) = or_fallback_query(&args.query)
{
results = execute_search_mode(
mode,
&conn,
&or_query,
limit,
cfg.search.semantic,
args.semantic_threshold,
)?;
if !results.is_empty() {
fallback_query = Some(or_query);
}
}
let mut results_with_meta: Vec<SearchResult> = Vec::with_capacity(results.len());
for (item_id, score) in results {
let (title, state) = conn
.query_row(
"SELECT title, state FROM items WHERE item_id = ?1",
rusqlite::params![&item_id],
|row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)),
)
.unwrap_or_else(|_| ("<unknown>".to_string(), "unknown".to_string()));
results_with_meta.push(SearchResult {
id: item_id,
title,
score,
state,
});
}
let search_output = SearchOutput {
query: args.query.clone(),
limit,
count: results_with_meta.len(),
results: results_with_meta,
fallback_query,
};
render_mode(
output,
&search_output,
|out, w| render_search_text(out, w),
|out, w| render_search_human(out, w),
)
}
fn resolve_mode(args: &SearchArgs) -> anyhow::Result<SearchMode> {
if args.lexical && args.semantic {
anyhow::bail!("--lexical and --semantic are mutually exclusive");
}
if args.lexical {
Ok(SearchMode::LexicalOnly)
} else if args.semantic {
Ok(SearchMode::SemanticOnly)
} else {
Ok(SearchMode::Hybrid)
}
}
fn execute_search_mode(
mode: SearchMode,
conn: &rusqlite::Connection,
query_text: &str,
limit: usize,
semantic_enabled: bool,
semantic_threshold: Option<f32>,
) -> anyhow::Result<Vec<(String, f64)>> {
match mode {
SearchMode::LexicalOnly => lexical_only_search(conn, query_text, limit),
SearchMode::SemanticOnly => {
semantic_only_search(conn, query_text, limit, semantic_threshold)
}
SearchMode::Hybrid => {
let semantic_model = if semantic_enabled {
match SemanticModel::load() {
Ok(model) => Some(model),
Err(err) => {
tracing::warn!(
"semantic model unavailable; using lexical+structural search only: {err}"
);
None
}
}
} else {
None
};
hybrid_search_with_threshold(
query_text,
conn,
semantic_model.as_ref(),
limit,
60,
semantic_threshold,
)
.map_err(|e| anyhow::anyhow!("search error: {e}. Check query syntax (use 'auth*' for prefix, AND/OR/NOT for boolean)."))
.map(|hits| {
hits.into_iter()
.map(|hit| (hit.item_id, f64::from(hit.score)))
.collect()
})
}
}
}
fn or_fallback_query(query: &str) -> Option<String> {
let trimmed = query.trim();
if trimmed.is_empty() {
return None;
}
if trimmed
.chars()
.any(|ch| matches!(ch, '"' | '*' | '(' | ')' | ':' | '^'))
{
return None;
}
if trimmed
.split_whitespace()
.any(|token| matches!(token.to_ascii_uppercase().as_str(), "AND" | "OR" | "NOT"))
{
return None;
}
let mut tokens: Vec<String> = trimmed
.split(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
.filter_map(|raw| {
let token = raw.trim_matches(|c: char| c == '-' || c == '_');
if token.len() >= 2 {
Some(token.to_ascii_lowercase())
} else {
None
}
})
.collect();
let mut seen = std::collections::HashSet::new();
tokens.retain(|token| seen.insert(token.clone()));
if tokens.len() < 2 {
return None;
}
Some(tokens.join(" OR "))
}
fn lexical_only_search(
conn: &rusqlite::Connection,
query_text: &str,
limit: usize,
) -> anyhow::Result<Vec<(String, f64)>> {
let hits = fts::search_bm25(conn, query_text, limit as u32)
.map_err(|e| anyhow::anyhow!("lexical search error: {e}"))?;
Ok(hits
.into_iter()
.map(|hit| (hit.item_id, hit.rank))
.collect())
}
fn semantic_only_search(
conn: &rusqlite::Connection,
query_text: &str,
limit: usize,
threshold: Option<f32>,
) -> anyhow::Result<Vec<(String, f64)>> {
let model = SemanticModel::load()
.map_err(|e| anyhow::anyhow!("semantic model unavailable for --semantic mode: {e}"))?;
sync_projection_embeddings(conn, &model)
.map_err(|e| anyhow::anyhow!("semantic index sync failed: {e}"))?;
let embedding = model
.embed(query_text)
.map_err(|e| anyhow::anyhow!("semantic embedding failed: {e}"))?;
let hits = knn_search(conn, &embedding, limit)
.map_err(|e| anyhow::anyhow!("semantic KNN search failed: {e}"))?;
Ok(filter_semantic_hits(hits, threshold))
}
fn filter_semantic_hits(
hits: Vec<bones_search::semantic::SemanticSearchResult>,
threshold: Option<f32>,
) -> Vec<(String, f64)> {
let min_score = threshold.unwrap_or(MIN_SEMANTIC_SCORE);
let min_top = threshold.unwrap_or(MIN_SEMANTIC_TOP_SCORE);
if hits.is_empty() || hits[0].score < min_top {
return Vec::new();
}
hits.into_iter()
.filter(|hit| hit.score >= min_score)
.map(|hit| (hit.item_id, f64::from(hit.score)))
.collect()
}
fn render_search_human(out: &SearchOutput, w: &mut dyn Write) -> std::io::Result<()> {
if out.results.is_empty() {
writeln!(w, "No results for '{}'", out.query)?;
writeln!(
w,
"Try broader terms or use prefix search (example: 'auth*')"
)?;
return Ok(());
}
if out.count >= out.limit {
writeln!(
w,
"Showing first {} result(s) for '{}' (use -n to increase limit):",
out.count, out.query
)?;
} else {
writeln!(w, "{} result(s) for '{}':", out.count, out.query)?;
}
if let Some(fallback_query) = &out.fallback_query {
writeln!(w, "(fallback query applied: {fallback_query})")?;
}
writeln!(w, "{:-<90}", "")?;
writeln!(w, "{:<16} {:<8} {:>8} TITLE", "ID", "STATE", "SCORE")?;
writeln!(w, "{:-<90}", "")?;
for result in &out.results {
writeln!(
w,
"{:<16} {:<8} {:>8.3} {}",
result.id, result.state, result.score, result.title
)?;
}
Ok(())
}
fn render_search_text(out: &SearchOutput, w: &mut dyn Write) -> std::io::Result<()> {
if out.results.is_empty() {
writeln!(w, "advice no-results query={}", out.query)?;
return Ok(());
}
if out.count >= out.limit {
writeln!(
w,
"advice result-limit limit={} query={}",
out.limit, out.query
)?;
}
if let Some(fallback_query) = &out.fallback_query {
writeln!(
w,
"advice fallback-query original={} effective={}",
out.query, fallback_query
)?;
}
for result in &out.results {
writeln!(
w,
"{} {} score={:.3} {}",
result.id, result.state, result.score, result.title
)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use bones_core::db::migrations;
use bones_core::db::project::{Projector, ensure_tracking_table};
use bones_core::event::data::*;
use bones_core::event::types::EventType;
use bones_core::event::{Event, EventData};
use bones_core::model::item::{Kind, Size, Urgency};
use bones_core::model::item_id::ItemId;
use rusqlite::Connection;
use std::collections::BTreeMap;
fn make_create(
id: &str,
title: &str,
desc: Option<&str>,
labels: &[&str],
hash: &str,
) -> Event {
Event {
wall_ts_us: 1000,
agent: "test-agent".into(),
itc: "itc:AQ".into(),
parents: vec![],
event_type: EventType::Create,
item_id: ItemId::new_unchecked(id),
data: EventData::Create(CreateData {
title: title.into(),
kind: Kind::Task,
size: Some(Size::M),
urgency: Urgency::Default,
labels: labels.iter().map(|s| s.to_string()).collect(),
parent: None,
causation: None,
description: desc.map(String::from),
extra: BTreeMap::new(),
}),
event_hash: format!("blake3:{hash}"),
}
}
#[test]
fn search_args_parse_query() {
use clap::Parser;
#[derive(Parser)]
struct Wrapper {
#[command(flatten)]
args: SearchArgs,
}
let w = Wrapper::parse_from(["test", "authentication"]);
assert_eq!(w.args.query, "authentication");
assert_eq!(w.args.limit, 10);
}
#[test]
fn search_args_parse_limit() {
use clap::Parser;
#[derive(Parser)]
struct Wrapper {
#[command(flatten)]
args: SearchArgs,
}
let w = Wrapper::parse_from(["test", "auth*", "-n", "5"]);
assert_eq!(w.args.query, "auth*");
assert_eq!(w.args.limit, 5);
assert!(!w.args.lexical);
assert!(!w.args.semantic);
}
#[test]
fn search_args_parse_layer_flags() {
use clap::Parser;
#[derive(Parser)]
struct Wrapper {
#[command(flatten)]
args: SearchArgs,
}
let lexical = Wrapper::parse_from(["test", "auth", "--lexical"]);
assert!(lexical.args.lexical);
assert!(!lexical.args.semantic);
let semantic = Wrapper::parse_from(["test", "auth", "--semantic"]);
assert!(semantic.args.semantic);
assert!(!semantic.args.lexical);
}
#[test]
fn resolve_mode_rejects_conflicting_flags() {
let args = SearchArgs {
query: "auth".into(),
limit: 10,
lexical: true,
semantic: true,
semantic_threshold: None,
};
assert!(resolve_mode(&args).is_err());
}
#[test]
fn resolve_mode_selects_expected_mode() {
let lexical = SearchArgs {
query: "auth".into(),
limit: 10,
lexical: true,
semantic: false,
semantic_threshold: None,
};
assert!(matches!(
resolve_mode(&lexical).expect("mode"),
SearchMode::LexicalOnly
));
let semantic = SearchArgs {
query: "auth".into(),
limit: 10,
lexical: false,
semantic: true,
semantic_threshold: None,
};
assert!(matches!(
resolve_mode(&semantic).expect("mode"),
SearchMode::SemanticOnly
));
let hybrid = SearchArgs {
query: "auth".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
assert!(matches!(
resolve_mode(&hybrid).expect("mode"),
SearchMode::Hybrid
));
}
#[test]
fn or_fallback_query_builds_or_query_for_plain_multi_term_input() {
let fallback = or_fallback_query("semantic ranking rollout");
assert_eq!(fallback.as_deref(), Some("semantic OR ranking OR rollout"));
}
#[test]
fn or_fallback_query_skips_explicit_fts_syntax() {
assert!(or_fallback_query("auth* AND service").is_none());
assert!(or_fallback_query("title:auth").is_none());
assert!(or_fallback_query("\"auth service\"").is_none());
}
#[test]
fn render_search_human_no_results() {
let out = SearchOutput {
query: "nonexistent".into(),
limit: 10,
count: 0,
results: vec![],
fallback_query: None,
};
let mut buf = Vec::new();
render_search_human(&out, &mut buf).unwrap();
let text = String::from_utf8(buf).unwrap();
assert!(text.contains("No results"));
assert!(text.contains("nonexistent"));
}
#[test]
fn render_search_human_with_results() {
let out = SearchOutput {
query: "auth".into(),
limit: 10,
count: 2,
results: vec![
SearchResult {
id: "bn-001".into(),
title: "Authentication timeout".into(),
score: -3.5,
state: "open".into(),
},
SearchResult {
id: "bn-002".into(),
title: "Auth service broken".into(),
score: -2.1,
state: "doing".into(),
},
],
fallback_query: None,
};
let mut buf = Vec::new();
render_search_human(&out, &mut buf).unwrap();
let text = String::from_utf8(buf).unwrap();
assert!(text.contains("2 result(s)"));
assert!(text.contains("bn-001"));
assert!(text.contains("Authentication timeout"));
assert!(text.contains("open"));
assert!(text.contains("bn-002"));
assert!(text.contains("doing"));
assert!(!text.contains("Showing first"));
}
#[test]
fn render_search_human_shows_limit_hint_when_at_capacity() {
let out = SearchOutput {
query: "auth".into(),
limit: 2,
count: 2,
results: vec![
SearchResult {
id: "bn-001".into(),
title: "Authentication timeout".into(),
score: -3.5,
state: "open".into(),
},
SearchResult {
id: "bn-002".into(),
title: "Auth service broken".into(),
score: -2.1,
state: "doing".into(),
},
],
fallback_query: None,
};
let mut buf = Vec::new();
render_search_human(&out, &mut buf).unwrap();
let text = String::from_utf8(buf).unwrap();
assert!(
text.contains("Showing first"),
"should show limit hint when count >= limit"
);
assert!(
text.contains("-n"),
"should mention -n flag to increase limit"
);
}
#[test]
fn render_search_text_shows_limit_advice_when_at_capacity() {
let out = SearchOutput {
query: "auth".into(),
limit: 1,
count: 1,
results: vec![SearchResult {
id: "bn-001".into(),
title: "Auth bug".into(),
score: -2.5,
state: "open".into(),
}],
fallback_query: None,
};
let mut buf = Vec::new();
render_search_text(&out, &mut buf).unwrap();
let text = String::from_utf8(buf).unwrap();
assert!(
text.contains("advice result-limit"),
"text format should emit limit advice"
);
assert!(text.contains("limit=1"));
}
fn setup_test_dir() -> (tempfile::TempDir, std::path::PathBuf) {
let dir = tempfile::tempdir().expect("tempdir");
let bones_dir = dir.path().join(".bones");
std::fs::create_dir_all(&bones_dir).unwrap();
let db_path = bones_dir.join("bones.db");
let mut conn = Connection::open(&db_path).expect("open db");
migrations::migrate(&mut conn).expect("migrate");
ensure_tracking_table(&conn).expect("tracking");
let proj = Projector::new(&conn);
proj.project_event(&make_create(
"bn-001",
"Authentication timeout regression",
Some("Auth service fails after 30 seconds"),
&["auth", "backend"],
"h1",
))
.unwrap();
proj.project_event(&make_create(
"bn-002",
"Update documentation",
Some("Fix typos in README"),
&["docs"],
"h2",
))
.unwrap();
let root = dir.path().to_path_buf();
(dir, root)
}
#[test]
fn run_search_finds_results() {
let (_dir, root) = setup_test_dir();
let args = SearchArgs {
query: "authentication".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
run_search(&args, OutputMode::Pretty, &root).unwrap();
}
#[test]
fn run_search_json_output() {
let (_dir, root) = setup_test_dir();
let args = SearchArgs {
query: "auth".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
run_search(&args, OutputMode::Json, &root).unwrap();
}
#[test]
fn run_search_no_results() {
let (_dir, root) = setup_test_dir();
let args = SearchArgs {
query: "zzznomatch".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
run_search(&args, OutputMode::Pretty, &root).unwrap();
}
#[test]
fn run_search_prefix_query() {
let (_dir, root) = setup_test_dir();
let args = SearchArgs {
query: "auth*".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
run_search(&args, OutputMode::Pretty, &root).unwrap();
}
#[test]
fn run_search_missing_projection() {
let dir = tempfile::tempdir().expect("tempdir");
let args = SearchArgs {
query: "test".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
assert!(run_search(&args, OutputMode::Pretty, dir.path()).is_err());
}
#[test]
fn run_search_empty_query_errors() {
let (_dir, root) = setup_test_dir();
let args = SearchArgs {
query: " ".into(),
limit: 10,
lexical: false,
semantic: false,
semantic_threshold: None,
};
assert!(run_search(&args, OutputMode::Pretty, &root).is_err());
}
#[test]
fn search_output_json_serializable() {
let out = SearchOutput {
query: "auth".into(),
limit: 10,
count: 1,
results: vec![SearchResult {
id: "bn-001".into(),
title: "Auth bug".into(),
score: -2.5,
state: "open".into(),
}],
fallback_query: None,
};
let json = serde_json::to_string(&out).unwrap();
assert!(json.contains("bn-001"));
assert!(json.contains("auth"));
assert!(json.contains("Auth bug"));
assert!(
json.contains("\"limit\":10"),
"JSON output should include the effective limit"
);
}
#[test]
fn filter_semantic_hits_drops_low_confidence_queries() {
let hits = vec![
bones_search::semantic::SemanticSearchResult {
item_id: "bn-001".into(),
score: 0.12,
},
bones_search::semantic::SemanticSearchResult {
item_id: "bn-002".into(),
score: 0.10,
},
];
assert!(filter_semantic_hits(hits, None).is_empty());
}
#[test]
fn filter_semantic_hits_keeps_high_confidence_rows() {
let hits = vec![
bones_search::semantic::SemanticSearchResult {
item_id: "bn-001".into(),
score: 0.70,
},
bones_search::semantic::SemanticSearchResult {
item_id: "bn-002".into(),
score: 0.25,
},
bones_search::semantic::SemanticSearchResult {
item_id: "bn-003".into(),
score: 0.10,
},
];
let filtered = filter_semantic_hits(hits, None);
assert_eq!(filtered.len(), 2);
assert_eq!(filtered[0].0, "bn-001");
assert_eq!(filtered[1].0, "bn-002");
}
}