use rusqlite::{Connection, params};
use crate::TalonError;
use crate::inference::redact;
use super::pending::NoteWithChunks;
pub const MAX_DIAGNOSTICS: usize = 20;
#[derive(Debug, Clone, Default)]
pub struct EmbedDiagnostics {
pub processed: u32,
pub succeeded: u32,
pub failed: u32,
pub dimension_mismatch: bool,
pub diagnostics: Vec<String>,
}
#[derive(Debug, Default)]
pub struct EmbedRunContext {
pub current_dimensions: Option<u32>,
pub dimension_mismatch: bool,
pub processed: u32,
pub succeeded: u32,
pub failed: u32,
pub diagnostics: Vec<String>,
}
impl EmbedRunContext {
#[must_use]
pub fn snapshot(&self) -> EmbedDiagnostics {
EmbedDiagnostics {
processed: self.processed,
succeeded: self.succeeded,
failed: self.failed,
dimension_mismatch: self.dimension_mismatch,
diagnostics: self.diagnostics.clone(),
}
}
pub fn record_diagnostic(&mut self, vault_path: &str, detail: &str) {
if self.diagnostics.len() >= MAX_DIAGNOSTICS {
return;
}
let line = redact(&format!("{vault_path}: {detail}"));
tracing::warn!(target: "talon::embed", "{line}");
self.diagnostics.push(line);
}
}
pub fn mark_note_chunks_failed(conn: &Connection, note: &NoteWithChunks) -> Result<(), TalonError> {
for chunk in ¬e.chunks {
conn.execute(
"UPDATE chunks SET embedding_status = 'failed' WHERE id = ?",
params![chunk.chunk_id],
)
.map_err(|source| TalonError::Sqlite {
context: "mark chunk failed",
source,
})?;
}
Ok(())
}
pub fn align_embedding_dimensions(
conn: &Connection,
ctx: &mut EmbedRunContext,
dims: u32,
) -> Result<(), TalonError> {
match ctx.current_dimensions {
None => {
ctx.current_dimensions = Some(dims);
crate::vec_ext::ensure_vec_chunks(conn, dims)?;
Ok(())
}
Some(existing) if existing == dims => Ok(()),
Some(existing) => {
ctx.dimension_mismatch = true;
tracing::error!(
target: "talon::embed",
expected = existing,
got = dims,
"embedding dimension mismatch — semantic search disabled until consistent"
);
Ok(())
}
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::store::open_database;
use crate::vec_ext::register_sqlite_vec;
use std::env::temp_dir;
use std::sync::atomic::{AtomicU64, Ordering};
fn unique_path() -> std::path::PathBuf {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let pid = std::process::id();
temp_dir().join(format!("talon-embed-diag-test-{pid}-{n}.sqlite"))
}
fn cleanup(path: &std::path::Path) {
let _ = fs_err::remove_file(path);
let _ = fs_err::remove_file(path.with_extension("sqlite-wal"));
let _ = fs_err::remove_file(path.with_extension("sqlite-shm"));
}
#[test]
fn record_diagnostic_caps_at_max() {
let mut ctx = EmbedRunContext::default();
for i in 0..(MAX_DIAGNOSTICS + 5) {
ctx.record_diagnostic("a.md", &format!("detail {i}"));
}
assert_eq!(ctx.diagnostics.len(), MAX_DIAGNOSTICS);
}
#[test]
fn record_diagnostic_redacts_paths() {
let mut ctx = EmbedRunContext::default();
ctx.record_diagnostic("note.md", "POST https://localhost:8080/embed timed out");
assert!(ctx.diagnostics[0].contains("[sidecar]"));
}
#[test]
fn align_dimensions_pins_then_detects_mismatch() {
register_sqlite_vec().unwrap();
let path = unique_path();
let conn = open_database(&path).unwrap();
let mut ctx = EmbedRunContext::default();
align_embedding_dimensions(&conn, &mut ctx, 768).unwrap();
assert_eq!(ctx.current_dimensions, Some(768));
assert!(!ctx.dimension_mismatch);
align_embedding_dimensions(&conn, &mut ctx, 768).unwrap();
assert!(!ctx.dimension_mismatch);
align_embedding_dimensions(&conn, &mut ctx, 384).unwrap();
assert!(ctx.dimension_mismatch);
drop(conn);
cleanup(&path);
}
}