lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
//! Rebuild the full-text index from the canonical chunk rows.
//!
//! Only the FTS shadow structure is touched: the `chunks_fts` virtual table
//! and its three synchronization triggers are dropped and recreated, then
//! populated from `chunks`. The `sources` and `chunks` tables — the
//! authoritative data — are read-only during the operation, so there is no
//! risk of losing ingested content even if the store is interrupted. The
//! whole sequence runs inside one transaction, so either every FTS row is
//! rebuilt or nothing changes.

use anyhow::Result;
use serde::Serialize;

use crate::store::Store;

#[derive(Debug, Clone, Serialize)]
pub struct ReindexReport {
    pub schema_version: i64,
    pub chunks_reindexed: i64,
}

/// SQL executed inside the reindex transaction. Duplicates the FTS portion
/// of the v2 schema on purpose: reindex should not depend on migration
/// ordering and should be readable as a single self-contained recipe.
const REINDEX_SQL: &str = r#"
DROP TRIGGER IF EXISTS chunks_fts_ai;
DROP TRIGGER IF EXISTS chunks_fts_ad;
DROP TRIGGER IF EXISTS chunks_fts_au;
DROP TABLE    IF EXISTS chunks_fts;

CREATE VIRTUAL TABLE chunks_fts USING fts5(
    text,
    tokenize = 'unicode61 remove_diacritics 2'
);

CREATE TRIGGER chunks_fts_ai AFTER INSERT ON chunks BEGIN
    INSERT INTO chunks_fts(rowid, text) VALUES (new.rowid, new.text);
END;

CREATE TRIGGER chunks_fts_ad AFTER DELETE ON chunks BEGIN
    DELETE FROM chunks_fts WHERE rowid = old.rowid;
END;

CREATE TRIGGER chunks_fts_au AFTER UPDATE OF text ON chunks BEGIN
    UPDATE chunks_fts SET text = new.text WHERE rowid = old.rowid;
END;

INSERT INTO chunks_fts(rowid, text) SELECT rowid, text FROM chunks;
"#;

pub fn reindex(store: &mut Store) -> Result<ReindexReport> {
    let schema_version = store.schema_version()?;
    let tx = store.conn_mut().transaction()?;
    tx.execute_batch(REINDEX_SQL)?;
    let chunks_reindexed: i64 =
        tx.query_row("SELECT COUNT(*) FROM chunks_fts", [], |row| row.get(0))?;
    tx.commit()?;
    Ok(ReindexReport {
        schema_version,
        chunks_reindexed,
    })
}

pub fn print_text(report: &ReindexReport) {
    println!(
        "reindexed chunks={} schema=v{}",
        report.chunks_reindexed, report.schema_version
    );
}

pub fn print_json(report: &ReindexReport) -> Result<()> {
    println!("{}", serde_json::to_string_pretty(report)?);
    Ok(())
}