lantern 0.2.3

Local-first, provenance-aware semantic search for agent activity
Documentation
//! Background maintenance for access metadata.
//!
//! Lantern already bumps `access_count` and `last_accessed_at` on retrieval so
//! recently-used chunks surface as fresher, more confident hits. This module
//! provides the complementary maintenance pass: apply a time-based decay to
//! stale access counts, but only after a separate decay checkpoint says enough
//! time has elapsed. That keeps the pass idempotent across repeated runs while
//! preserving `last_accessed_at` for confidence scoring.

use anyhow::Result;
use rusqlite::params;
use serde::Serialize;

use crate::inspect::now_unix;
use crate::store::Store;

pub const DEFAULT_HALF_LIFE_SECS: u64 = 30 * 24 * 60 * 60;
pub const DEFAULT_MINIMUM_AGE_SECS: u64 = 14 * 24 * 60 * 60;

#[derive(Debug, Clone, Copy)]
pub struct CompactOptions {
    pub half_life_secs: u64,
    pub minimum_age_secs: u64,
}

impl Default for CompactOptions {
    fn default() -> Self {
        Self {
            half_life_secs: DEFAULT_HALF_LIFE_SECS,
            minimum_age_secs: DEFAULT_MINIMUM_AGE_SECS,
        }
    }
}

#[derive(Debug, Clone, Serialize)]
pub struct CompactReport {
    pub schema_version: i64,
    pub scanned_chunks: i64,
    pub decayed_chunks: i64,
    pub skipped_recent_chunks: i64,
    pub access_count_before: i64,
    pub access_count_after: i64,
    pub half_life_secs: u64,
    pub minimum_age_secs: u64,
}

#[derive(Debug, Clone)]
struct CompactCandidate {
    chunk_id: String,
    timestamp_unix: Option<i64>,
    access_count: i64,
    last_accessed_at: Option<i64>,
    access_decay_at: Option<i64>,
}

pub fn compact_access_metadata(store: &mut Store, opts: CompactOptions) -> Result<CompactReport> {
    let schema_version = store.schema_version()?;
    let now = now_unix();
    let tx = store.conn_mut().transaction()?;

    let candidates = {
        let mut stmt = tx.prepare(
            "SELECT id, timestamp_unix, access_count, last_accessed_at, access_decay_at
             FROM chunks
             WHERE access_count > 0",
        )?;
        let rows = stmt.query_map([], |row| {
            Ok(CompactCandidate {
                chunk_id: row.get(0)?,
                timestamp_unix: row.get(1)?,
                access_count: row.get(2)?,
                last_accessed_at: row.get(3)?,
                access_decay_at: row.get(4)?,
            })
        })?;
        rows.collect::<std::result::Result<Vec<_>, _>>()?
    };

    let mut scanned_chunks = 0i64;
    let mut decayed_chunks = 0i64;
    let mut skipped_recent_chunks = 0i64;
    let mut access_count_before = 0i64;
    let mut access_count_after = 0i64;
    let half_life_secs = opts.half_life_secs.max(1);
    let minimum_age_secs = opts.minimum_age_secs;
    for candidate in candidates {
        scanned_chunks += 1;
        access_count_before += candidate.access_count;

        let reference_ts = candidate
            .access_decay_at
            .or(candidate.last_accessed_at)
            .or(candidate.timestamp_unix);
        let Some(reference_ts) = reference_ts else {
            access_count_after += candidate.access_count;
            continue;
        };

        let age_secs = (now - reference_ts).max(0) as u64;
        if age_secs < minimum_age_secs {
            skipped_recent_chunks += 1;
            access_count_after += candidate.access_count;
            continue;
        }

        let decay_factor = 0.5f64.powf(age_secs as f64 / half_life_secs as f64);
        let mut decayed_count = (candidate.access_count as f64 * decay_factor).floor() as i64;
        if decayed_count < 0 {
            decayed_count = 0;
        }
        if decayed_count >= candidate.access_count {
            access_count_after += candidate.access_count;
            continue;
        }

        tx.execute(
            "UPDATE chunks
             SET access_count = ?1,
                 access_decay_at = ?2
             WHERE id = ?3",
            params![decayed_count, now, candidate.chunk_id],
        )?;
        decayed_chunks += 1;
        access_count_after += decayed_count;
    }

    tx.commit()?;

    Ok(CompactReport {
        schema_version,
        scanned_chunks,
        decayed_chunks,
        skipped_recent_chunks,
        access_count_before,
        access_count_after,
        half_life_secs,
        minimum_age_secs,
    })
}

pub fn print_text(report: &CompactReport) {
    println!(
        "compacted chunks={} decayed={} skipped_recent={} access_count={}→{} half_life={}s min_age={}s schema=v{}",
        report.scanned_chunks,
        report.decayed_chunks,
        report.skipped_recent_chunks,
        report.access_count_before,
        report.access_count_after,
        report.half_life_secs,
        report.minimum_age_secs,
        report.schema_version,
    );
}

pub fn print_json(report: &CompactReport) -> Result<()> {
    println!("{}", serde_json::to_string_pretty(report)?);
    Ok(())
}

#[cfg(test)]
mod tests {
    use std::fs;

    use crate::ingest::ingest_path;
    use crate::search::{SearchOptions, search};
    use crate::store::Store;
    use rusqlite::params;
    use tempfile::tempdir;

    use super::{CompactOptions, compact_access_metadata};

    fn setup_store_with(files: &[(&str, &str)]) -> (tempfile::TempDir, Store) {
        let root = tempdir().unwrap();
        let mut store = Store::initialize(&root.path().join("store")).unwrap();
        let data = root.path().join("data");
        fs::create_dir_all(&data).unwrap();
        for (name, body) in files {
            fs::write(data.join(name), body).unwrap();
        }
        ingest_path(&mut store, &data).unwrap();
        (root, store)
    }

    #[test]
    fn compact_skips_recently_touched_chunks() {
        let (_root, mut store) = setup_store_with(&[("a.md", "needle in haystack")]);
        let chunk_id: String = store
            .conn()
            .query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
            .unwrap();
        let now = crate::inspect::now_unix();
        store
            .conn()
            .execute(
                "UPDATE chunks
                 SET access_count = 8,
                     last_accessed_at = ?1,
                     access_decay_at = NULL
                 WHERE id = ?2",
                params![now - 24 * 60 * 60, chunk_id],
            )
            .unwrap();

        let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
        assert_eq!(report.decayed_chunks, 0);
        assert_eq!(report.skipped_recent_chunks, 1);
        assert_eq!(report.scanned_chunks, 1);
        let (access_count, decay_at): (i64, Option<i64>) = store
            .conn()
            .query_row(
                "SELECT access_count, access_decay_at FROM chunks WHERE id = ?1",
                params![chunk_id],
                |row| Ok((row.get(0)?, row.get(1)?)),
            )
            .unwrap();
        assert_eq!(access_count, 8);
        assert_eq!(decay_at, None);
    }

    #[test]
    fn compact_reports_recent_skips_and_decays_independently() {
        let (_root, mut store) = setup_store_with(&[
            ("recent.md", "first chunk body"),
            ("stale.md", "second chunk body"),
        ]);
        let now = crate::inspect::now_unix();
        store
            .conn()
            .execute(
                "UPDATE chunks
                 SET access_count = 6,
                     last_accessed_at = ?1,
                     access_decay_at = NULL
                 WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%recent.md')",
                params![now - 24 * 60 * 60],
            )
            .unwrap();
        store
            .conn()
            .execute(
                "UPDATE chunks
                 SET access_count = 6,
                     last_accessed_at = ?1,
                     access_decay_at = NULL
                 WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%stale.md')",
                params![now - 90 * 24 * 60 * 60],
            )
            .unwrap();

        let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
        assert_eq!(report.scanned_chunks, 2);
        assert_eq!(report.skipped_recent_chunks, 1);
        assert_eq!(report.decayed_chunks, 1);
        assert_eq!(report.access_count_before, 12);
        assert!(report.access_count_after < report.access_count_before);
        assert!(report.access_count_after >= 6);
    }

    #[test]
    fn compact_decays_moderately_stale_counts_with_default_thresholds() {
        let (_root, mut store) = setup_store_with(&[("a.md", "needle in haystack")]);
        let chunk_id: String = store
            .conn()
            .query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
            .unwrap();
        let now = crate::inspect::now_unix();
        store
            .conn()
            .execute(
                "UPDATE chunks
                 SET access_count = 8,
                     last_accessed_at = ?1,
                     access_decay_at = NULL
                 WHERE id = ?2",
                params![now - 21 * 24 * 60 * 60, chunk_id],
            )
            .unwrap();

        let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
        assert_eq!(report.decayed_chunks, 1);
        assert_eq!(report.access_count_before, 8);
        assert_eq!(report.access_count_after, 4);

        let (access_count, decay_at): (i64, Option<i64>) = store
            .conn()
            .query_row(
                "SELECT access_count, access_decay_at FROM chunks WHERE id = ?1",
                params![chunk_id],
                |row| Ok((row.get(0)?, row.get(1)?)),
            )
            .unwrap();
        assert_eq!(access_count, 4);
        assert!(decay_at.is_some());
        assert!(decay_at.unwrap() >= now - 5);
    }

    #[test]
    fn compact_decays_stale_counts_and_records_checkpoint() {
        let (_root, mut store) = setup_store_with(&[("a.md", "needle in haystack")]);
        let chunk_id: String = store
            .conn()
            .query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
            .unwrap();
        let now = crate::inspect::now_unix();
        store
            .conn()
            .execute(
                "UPDATE chunks
                 SET access_count = 8,
                     last_accessed_at = ?1,
                     access_decay_at = NULL
                 WHERE id = ?2",
                params![now - 90 * 24 * 60 * 60, chunk_id],
            )
            .unwrap();

        let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
        assert_eq!(report.decayed_chunks, 1);
        assert_eq!(report.access_count_before, 8);
        assert_eq!(report.access_count_after, 1);

        let (access_count, decay_at): (i64, Option<i64>) = store
            .conn()
            .query_row(
                "SELECT access_count, access_decay_at FROM chunks WHERE id = ?1",
                params![chunk_id],
                |row| Ok((row.get(0)?, row.get(1)?)),
            )
            .unwrap();
        assert_eq!(access_count, 1);
        assert!(decay_at.is_some());
        assert!(decay_at.unwrap() >= now - 5);
    }

    #[test]
    fn compact_leaves_search_results_intact() {
        let (_root, mut store) = setup_store_with(&[("a.md", "Lanterns glow in the dark forest.")]);
        let hits = search(&store, "lantern", SearchOptions::default()).unwrap();
        assert_eq!(hits.len(), 1);
        let chunk_id = hits[0].chunk_id.clone();

        // Make the hit stale enough to decay, then compact it.
        let now = crate::inspect::now_unix();
        store
            .conn()
            .execute(
                "UPDATE chunks
                 SET access_count = 4,
                     last_accessed_at = ?1,
                     access_decay_at = NULL
                 WHERE id = ?2",
                params![now - 45 * 24 * 60 * 60, chunk_id],
            )
            .unwrap();
        compact_access_metadata(&mut store, CompactOptions::default()).unwrap();

        let hits = search(&store, "lantern", SearchOptions::default()).unwrap();
        assert_eq!(hits.len(), 1);
        assert!(hits[0].access_count >= 1);
        assert!(hits[0].uri.ends_with("/a.md"));
    }
}