lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
//! Explicit deletion of indexed sources (and their chunks).
//!
//! `forget` removes every source whose path or URI contains the given
//! substring, alongside its chunks. The pattern argument is required — we
//! do not accept an empty string, so the command cannot be used as a
//! bulk-wipe shortcut. Chunks and the FTS index are cleaned up transitively
//! via the `ON DELETE CASCADE` foreign key and the `chunks_fts_ad` trigger,
//! so callers do not need to maintain that invariant by hand.

use anyhow::Result;
use rusqlite::params;
use serde::Serialize;

use crate::store::Store;

#[derive(Debug, Clone, Serialize)]
pub struct ForgetReport {
    pub pattern: String,
    pub applied: bool,
    pub removed: Vec<RemovedSource>,
}

#[derive(Debug, Clone, Serialize)]
pub struct RemovedSource {
    pub source_id: String,
    pub uri: String,
    pub path: Option<String>,
    pub kind: String,
    pub bytes: i64,
    pub chunks: i64,
}

pub fn forget(store: &mut Store, pattern: &str, apply: bool) -> Result<ForgetReport> {
    let trimmed = pattern.trim();
    if trimmed.is_empty() {
        anyhow::bail!("forget pattern must not be empty");
    }
    // Require at least 3 characters so a typo or stray substring can't
    // accidentally match every source in the store.
    if trimmed.chars().count() < 3 {
        anyhow::bail!(
            "forget pattern must be at least 3 characters (got {:?}) to avoid accidental full wipes",
            trimmed
        );
    }
    let like = format!("%{pattern}%");

    let removed = {
        let conn = store.conn();
        let mut stmt = conn.prepare(
            "SELECT s.id, s.uri, s.path, s.kind, s.bytes,
                    (SELECT COUNT(*) FROM chunks c WHERE c.source_id = s.id) AS chunks
             FROM sources s
             WHERE s.path LIKE ?1 OR s.uri LIKE ?1
             ORDER BY s.ingested_at DESC, s.id DESC",
        )?;
        let rows = stmt.query_map(params![&like], |row| {
            Ok(RemovedSource {
                source_id: row.get(0)?,
                uri: row.get(1)?,
                path: row.get(2)?,
                kind: row.get(3)?,
                bytes: row.get(4)?,
                chunks: row.get(5)?,
            })
        })?;
        rows.collect::<Result<Vec<_>, _>>()?
    };

    if apply && !removed.is_empty() {
        let tx = store.conn_mut().transaction()?;
        tx.execute(
            "DELETE FROM sources WHERE path LIKE ?1 OR uri LIKE ?1",
            params![&like],
        )?;
        tx.commit()?;
    }

    Ok(ForgetReport {
        pattern: pattern.to_string(),
        applied: apply,
        removed,
    })
}

pub fn print_text(report: &ForgetReport) {
    let verb = if report.applied {
        "removed"
    } else {
        "would remove"
    };
    if report.removed.is_empty() {
        println!("no sources match pattern {:?}", report.pattern);
        return;
    }
    for r in &report.removed {
        println!(
            "{verb} source={} chunks={} bytes={} uri={}",
            r.source_id, r.chunks, r.bytes, r.uri
        );
    }
    let total_chunks: i64 = report.removed.iter().map(|r| r.chunks).sum();
    println!(
        "summary pattern={:?} sources={} chunks={} applied={}",
        report.pattern,
        report.removed.len(),
        total_chunks,
        report.applied,
    );
    if !report.applied {
        println!("(dry run; pass --apply to actually delete)");
    }
}

pub fn print_json(report: &ForgetReport) -> Result<()> {
    println!("{}", serde_json::to_string_pretty(report)?);
    Ok(())
}