use crate::message::ConversationMessage;
use anyhow::{Context as _, anyhow};
use rusqlite::{Connection, OptionalExtension};
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
const DB_SCHEMA_VERSION: i64 = 1;
const TOKENIZE: &str = "unicode61 remove_diacritics 2";
#[derive(Debug, Clone)]
pub struct CacheKey {
pub client: String,
pub context_id: String,
pub source_path: PathBuf,
pub mtime: i64,
}
#[derive(Debug, Clone)]
pub struct CachedHit {
pub entry_id: String,
pub score: f64,
pub snippet: String,
}
pub struct IndexCache {
conn: Connection,
}
impl IndexCache {
pub fn open() -> anyhow::Result<Self> {
let path = cache_db_path()?;
Self::open_at(&path)
}
pub fn open_at(path: &Path) -> anyhow::Result<Self> {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("failed to create cache dir {}", parent.display()))?;
}
let conn = Connection::open(path)
.with_context(|| format!("failed to open cache at {}", path.display()))?;
let mut cache = Self { conn };
cache.migrate()?;
Ok(cache)
}
fn migrate(&mut self) -> anyhow::Result<()> {
let current_version: i64 = self
.conn
.query_row("PRAGMA user_version", [], |row| row.get(0))?;
if current_version != DB_SCHEMA_VERSION {
self.drop_cached_data()?;
}
self.create_schema()?;
self.conn
.execute_batch(&format!("PRAGMA user_version = {DB_SCHEMA_VERSION};"))?;
Ok(())
}
fn create_schema(&self) -> anyhow::Result<()> {
self.conn.execute_batch(
"CREATE TABLE IF NOT EXISTS contexts (
client TEXT NOT NULL,
context_id TEXT NOT NULL,
source_path TEXT NOT NULL,
mtime INTEGER NOT NULL,
entry_count INTEGER NOT NULL,
indexed_at INTEGER NOT NULL,
PRIMARY KEY (client, context_id)
);
CREATE INDEX IF NOT EXISTS contexts_source_path_idx
ON contexts(source_path);",
)?;
Ok(())
}
fn drop_cached_data(&mut self) -> anyhow::Result<()> {
let table_names = {
let mut stmt = self.conn.prepare(
"SELECT name FROM sqlite_master
WHERE type = 'table'
AND sql LIKE 'CREATE VIRTUAL TABLE%USING fts5%'",
)?;
stmt.query_map([], |row| row.get::<_, String>(0))?
.collect::<Result<Vec<_>, _>>()?
};
let tx = self.conn.transaction()?;
tx.execute("DROP TABLE IF EXISTS contexts", [])?;
for table_name in table_names {
tx.execute(
&format!("DROP TABLE IF EXISTS {}", quote_ident(&table_name)),
[],
)?;
}
tx.commit()?;
Ok(())
}
pub fn is_fresh(&self, key: &CacheKey) -> anyhow::Result<bool> {
let row = self
.conn
.query_row(
"SELECT source_path, mtime \
FROM contexts WHERE client = ?1 AND context_id = ?2",
rusqlite::params![key.client, key.context_id],
|row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)?)),
)
.optional()?;
let Some((source_path, mtime)) = row else {
return Ok(false);
};
Ok(source_path == key.source_path.display().to_string() && mtime == key.mtime)
}
pub fn index(
&mut self,
key: &CacheKey,
messages: &[ConversationMessage],
) -> anyhow::Result<()> {
if self.is_fresh(key)? {
return Ok(());
}
self.reindex(key, messages)
}
fn reindex(&mut self, key: &CacheKey, messages: &[ConversationMessage]) -> anyhow::Result<()> {
let tx = self.conn.transaction()?;
let table_name = fts_table_for(key);
tx.execute_batch(&format!(
"DROP TABLE IF EXISTS fts_{table_name};
CREATE VIRTUAL TABLE fts_{table_name} USING fts5(
entry_id UNINDEXED,
content,
tokenize = '{TOKENIZE}'
);"
))?;
{
let mut stmt = tx.prepare(&format!(
"INSERT INTO fts_{table_name}(entry_id, content) VALUES (?1, ?2)"
))?;
for message in messages {
let searchable = crate::display::searchable_text(message);
if searchable.is_empty() {
continue;
}
stmt.execute(rusqlite::params![message.entry_id, searchable])?;
}
}
let now = unix_now();
let entry_count: i64 = messages.len().try_into().unwrap_or(i64::MAX);
tx.execute(
"INSERT OR REPLACE INTO contexts
(client, context_id, source_path, mtime, entry_count, indexed_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![
key.client,
key.context_id,
key.source_path.display().to_string(),
key.mtime,
entry_count,
now,
],
)?;
tx.commit()?;
Ok(())
}
pub fn query(&self, key: &CacheKey, pattern: &str) -> anyhow::Result<Vec<CachedHit>> {
let table = fts_table_for(key);
let mut stmt = self.conn.prepare(&format!(
"SELECT entry_id, bm25(fts_{table}) AS score, \
snippet(fts_{table}, 1, '', '', '...', 6) AS snip \
FROM fts_{table} \
WHERE fts_{table} MATCH ?1 \
ORDER BY score ASC"
))?;
let rows = stmt
.query_map(rusqlite::params![pattern], |row| {
Ok(CachedHit {
entry_id: row.get::<_, String>(0)?,
score: row.get::<_, f64>(1)?,
snippet: row.get::<_, String>(2)?,
})
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(rows)
}
}
impl std::fmt::Display for IndexCache {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("IndexCache(<fts5>")
}
}
fn fts_table_for(key: &CacheKey) -> String {
let mut out = String::with_capacity(key.client.len() + key.context_id.len() + 4);
for ch in key.client.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch);
} else {
out.push('_');
}
}
let _ = write!(out, "_{}", sanitize_context_id(&key.context_id));
out
}
fn sanitize_context_id(id: &str) -> String {
let mut out = String::with_capacity(id.len());
for ch in id.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch);
} else {
out.push('_');
}
}
if out.is_empty() {
out.push_str("empty");
}
out
}
fn quote_ident(ident: &str) -> String {
format!("\"{}\"", ident.replace('"', "\"\""))
}
fn cache_db_path() -> anyhow::Result<PathBuf> {
let Some(base) = dirs::cache_dir() else {
return Err(anyhow!("cache directory not available on this platform"));
};
Ok(base.join("goosedump").join("index.db"))
}
fn unix_now() -> i64 {
let Ok(duration) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) else {
return 0;
};
i64::try_from(duration.as_secs()).unwrap_or_default()
}
pub fn mtime_of(path: &Path) -> i64 {
let Ok(metadata) = std::fs::metadata(path) else {
return 0;
};
let Ok(modified) = metadata.modified() else {
return 0;
};
let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) else {
return 0;
};
i64::try_from(duration.as_secs()).unwrap_or(0)
}