use crate::config::EmbeddingsConfig;
use crate::search::SearchHit;
use crate::store;
use crate::vec_ext::VecConnection;
use normalize_output::OutputFormatter;
use serde::{Deserialize, Serialize};
#[cfg(feature = "cli")]
use schemars::JsonSchema;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "cli", derive(JsonSchema))]
pub struct SearchResultEntry {
pub path: String,
pub source_type: String,
pub chunk_text: String,
pub similarity: f32,
pub staleness: f32,
pub score: f32,
pub last_commit: Option<String>,
}
impl From<SearchHit> for SearchResultEntry {
fn from(h: SearchHit) -> Self {
Self {
path: h.source_path,
source_type: h.source_type,
chunk_text: h.chunk_text,
similarity: h.similarity,
staleness: h.staleness,
score: h.score,
last_commit: h.last_commit,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "cli", derive(JsonSchema))]
pub struct SearchReport {
pub query: String,
pub model: String,
pub results: Vec<SearchResultEntry>,
pub total_scanned: usize,
pub ann_used: bool,
}
impl OutputFormatter for SearchReport {
fn format_text(&self) -> String {
if self.results.is_empty() {
return format!(
"No results for query: {}\n(Hint: run `normalize structure rebuild` to populate embeddings)",
self.query
);
}
let search_mode = if self.ann_used { "ANN" } else { "brute-force" };
let mut out = format!(
"Semantic search results for: \"{}\"\nModel: {} — scanned {} embeddings ({})\n\n",
self.query, self.model, self.total_scanned, search_mode
);
for (i, r) in self.results.iter().enumerate() {
out.push_str(&format!(
"{}. [score={:.3}] {} ({})\n",
i + 1,
r.score,
r.path,
r.source_type,
));
let snippet = r
.chunk_text
.lines()
.next()
.unwrap_or("")
.chars()
.take(120)
.collect::<String>();
out.push_str(&format!(" {}\n", snippet));
}
out
}
}
pub async fn run_search(
root: &std::path::Path,
query: String,
top_k: usize,
) -> Result<SearchReport, String> {
let config = load_embeddings_config(root);
if !config.enabled {
let is_tty = std::io::IsTerminal::is_terminal(&std::io::stderr());
if is_tty {
eprintln!(
"Semantic search is not enabled. Add to .normalize/config.toml:\n\n [embeddings]\n enabled = true\n"
);
} else {
eprintln!(
"error: semantic search not enabled (embeddings.enabled = false in config.toml)"
);
}
return Err("Semantic search not enabled.".to_string());
}
let idx = crate::open_index(root)
.await
.map_err(|e| format!("Failed to open index: {e}"))?;
let conn = idx.connection();
let db_path = root.join(".normalize").join("index.sqlite");
let vec_conn: Option<VecConnection> = VecConnection::open(&db_path);
store::ensure_schema(conn)
.await
.map_err(|e| format!("Schema error: {e}"))?;
let dims = crate::embedder::dims_for_model(&config.model).unwrap_or(768);
store::ensure_vec_schema(conn, dims, vec_conn.as_ref()).await;
let total = store::count_embeddings(conn, &config.model)
.await
.map_err(|e| format!("DB error: {e}"))?;
if total == 0 {
let is_tty = std::io::IsTerminal::is_terminal(&std::io::stderr());
if is_tty {
eprintln!(
"No embeddings found. Run `normalize structure rebuild` to populate the semantic index."
);
} else {
eprintln!(
"error: no embeddings for model '{}'. Run `normalize structure rebuild` first.",
config.model
);
}
return Ok(SearchReport {
query,
model: config.model,
results: Vec::new(),
total_scanned: 0,
ann_used: false,
});
}
let mut embedder = crate::embedder::Embedder::load(&config.model, None)
.map_err(|e| format!("Failed to load embedding model: {e}"))?;
let query_vec = embedder
.embed_one(&query)
.map_err(|e| format!("Embedding failed: {e}"))?;
let query_bytes = crate::embedder::encode_vector(&query_vec);
let ann_candidate_count = std::cmp::max(store::ANN_CANDIDATE_COUNT, top_k);
let (candidates, ann_used) = if let Some(ann_results) = store::ann_search(
conn,
&config.model,
&query_bytes,
ann_candidate_count,
vec_conn.as_ref(),
)
.await
.filter(|r| !r.is_empty())
{
(ann_results, true)
} else {
let all = store::load_all_embeddings(conn, &config.model)
.await
.map_err(|e| format!("Failed to load embeddings: {e}"))?;
(all, false)
};
let total_scanned = candidates.len();
let hits = crate::search::rerank(&query_vec, candidates, top_k);
Ok(SearchReport {
query,
model: config.model,
results: hits.into_iter().map(SearchResultEntry::from).collect(),
total_scanned,
ann_used,
})
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "cli", derive(JsonSchema))]
pub struct ContextSearchEntry {
pub path: String,
pub content: String,
pub similarity: f32,
pub score: f32,
}
impl From<SearchHit> for ContextSearchEntry {
fn from(h: SearchHit) -> Self {
Self {
path: h.source_path,
content: h.chunk_text,
similarity: h.similarity,
score: h.score,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "cli", derive(JsonSchema))]
pub struct ContextSearchReport {
pub query: String,
pub model: String,
pub results: Vec<ContextSearchEntry>,
pub total_scanned: usize,
}
impl OutputFormatter for ContextSearchReport {
fn format_text(&self) -> String {
if self.results.is_empty() {
return format!(
"No context blocks found for query: {}\n(Hint: run `normalize structure rebuild` to populate embeddings for context blocks)",
self.query
);
}
let mut out = String::new();
for r in &self.results {
out.push_str(&r.content);
if !r.content.ends_with('\n') {
out.push('\n');
}
out.push('\n');
}
while out.ends_with("\n\n") {
out.pop();
}
out
}
}
pub async fn run_context_search(
root: &std::path::Path,
query: String,
top_k: usize,
) -> Result<ContextSearchReport, String> {
let config = load_embeddings_config(root);
if !config.enabled {
return Err(
"Semantic search not enabled. Add [embeddings] enabled = true to .normalize/config.toml, then run `normalize structure rebuild`.".to_string(),
);
}
let idx = crate::open_index(root)
.await
.map_err(|e| format!("Failed to open index: {e}"))?;
let conn = idx.connection();
let db_path = root.join(".normalize").join("index.sqlite");
let vec_conn: Option<VecConnection> = VecConnection::open(&db_path);
store::ensure_schema(conn)
.await
.map_err(|e| format!("Schema error: {e}"))?;
let dims = crate::embedder::dims_for_model(&config.model).unwrap_or(768);
store::ensure_vec_schema(conn, dims, vec_conn.as_ref()).await;
let mut embedder = crate::embedder::Embedder::load(&config.model, None)
.map_err(|e| format!("Failed to load embedding model: {e}"))?;
let query_vec = embedder
.embed_one(&query)
.map_err(|e| format!("Embedding failed: {e}"))?;
let candidates = store::load_embeddings_for_type(conn, &config.model, "context")
.await
.map_err(|e| format!("Failed to load context embeddings: {e}"))?;
if candidates.is_empty() {
return Ok(ContextSearchReport {
query,
model: config.model,
results: Vec::new(),
total_scanned: 0,
});
}
let total_scanned = candidates.len();
let hits = crate::search::rerank(&query_vec, candidates, top_k);
Ok(ContextSearchReport {
query,
model: config.model,
results: hits.into_iter().map(ContextSearchEntry::from).collect(),
total_scanned,
})
}
pub fn load_embeddings_config(root: &std::path::Path) -> EmbeddingsConfig {
let config_path = root.join(".normalize").join("config.toml");
let Ok(contents) = std::fs::read_to_string(&config_path) else {
return EmbeddingsConfig::default();
};
#[derive(serde::Deserialize, Default)]
struct PartialConfig {
#[serde(default)]
embeddings: EmbeddingsConfig,
}
toml::from_str::<PartialConfig>(&contents)
.map(|c| c.embeddings)
.unwrap_or_default()
}