use anyhow::Result;
use rusqlite::params;
use serde::Serialize;
use crate::inspect::now_unix;
use crate::store::Store;
pub const DEFAULT_HALF_LIFE_SECS: u64 = 30 * 24 * 60 * 60;
pub const DEFAULT_MINIMUM_AGE_SECS: u64 = 14 * 24 * 60 * 60;
#[derive(Debug, Clone, Copy)]
pub struct CompactOptions {
pub half_life_secs: u64,
pub minimum_age_secs: u64,
}
impl Default for CompactOptions {
fn default() -> Self {
Self {
half_life_secs: DEFAULT_HALF_LIFE_SECS,
minimum_age_secs: DEFAULT_MINIMUM_AGE_SECS,
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct CompactReport {
pub schema_version: i64,
pub scanned_chunks: i64,
pub decayed_chunks: i64,
pub skipped_recent_chunks: i64,
pub access_count_before: i64,
pub access_count_after: i64,
pub half_life_secs: u64,
pub minimum_age_secs: u64,
}
#[derive(Debug, Clone)]
struct CompactCandidate {
chunk_id: String,
timestamp_unix: Option<i64>,
access_count: i64,
last_accessed_at: Option<i64>,
access_decay_at: Option<i64>,
}
pub fn compact_access_metadata(store: &mut Store, opts: CompactOptions) -> Result<CompactReport> {
let schema_version = store.schema_version()?;
let now = now_unix();
let tx = store.conn_mut().transaction()?;
let candidates = {
let mut stmt = tx.prepare(
"SELECT id, timestamp_unix, access_count, last_accessed_at, access_decay_at
FROM chunks
WHERE access_count > 0",
)?;
let rows = stmt.query_map([], |row| {
Ok(CompactCandidate {
chunk_id: row.get(0)?,
timestamp_unix: row.get(1)?,
access_count: row.get(2)?,
last_accessed_at: row.get(3)?,
access_decay_at: row.get(4)?,
})
})?;
rows.collect::<std::result::Result<Vec<_>, _>>()?
};
let mut scanned_chunks = 0i64;
let mut decayed_chunks = 0i64;
let mut skipped_recent_chunks = 0i64;
let mut access_count_before = 0i64;
let mut access_count_after = 0i64;
let half_life_secs = opts.half_life_secs.max(1);
let minimum_age_secs = opts.minimum_age_secs;
for candidate in candidates {
scanned_chunks += 1;
access_count_before += candidate.access_count;
let reference_ts = candidate
.access_decay_at
.or(candidate.last_accessed_at)
.or(candidate.timestamp_unix);
let Some(reference_ts) = reference_ts else {
access_count_after += candidate.access_count;
continue;
};
let age_secs = (now - reference_ts).max(0) as u64;
if age_secs < minimum_age_secs {
skipped_recent_chunks += 1;
access_count_after += candidate.access_count;
continue;
}
let decay_factor = 0.5f64.powf(age_secs as f64 / half_life_secs as f64);
let mut decayed_count = (candidate.access_count as f64 * decay_factor).floor() as i64;
if decayed_count < 0 {
decayed_count = 0;
}
if decayed_count >= candidate.access_count {
access_count_after += candidate.access_count;
continue;
}
tx.execute(
"UPDATE chunks
SET access_count = ?1,
access_decay_at = ?2
WHERE id = ?3",
params![decayed_count, now, candidate.chunk_id],
)?;
decayed_chunks += 1;
access_count_after += decayed_count;
}
tx.commit()?;
Ok(CompactReport {
schema_version,
scanned_chunks,
decayed_chunks,
skipped_recent_chunks,
access_count_before,
access_count_after,
half_life_secs,
minimum_age_secs,
})
}
pub fn print_text(report: &CompactReport) {
println!(
"compacted chunks={} decayed={} skipped_recent={} access_count={}→{} half_life={}s min_age={}s schema=v{}",
report.scanned_chunks,
report.decayed_chunks,
report.skipped_recent_chunks,
report.access_count_before,
report.access_count_after,
report.half_life_secs,
report.minimum_age_secs,
report.schema_version,
);
}
pub fn print_json(report: &CompactReport) -> Result<()> {
println!("{}", serde_json::to_string_pretty(report)?);
Ok(())
}
#[cfg(test)]
mod tests {
use std::fs;
use crate::ingest::ingest_path;
use crate::search::{SearchOptions, search};
use crate::store::Store;
use rusqlite::params;
use tempfile::tempdir;
use super::{CompactOptions, compact_access_metadata};
fn setup_store_with(files: &[(&str, &str)]) -> (tempfile::TempDir, Store) {
let root = tempdir().unwrap();
let mut store = Store::initialize(&root.path().join("store")).unwrap();
let data = root.path().join("data");
fs::create_dir_all(&data).unwrap();
for (name, body) in files {
fs::write(data.join(name), body).unwrap();
}
ingest_path(&mut store, &data).unwrap();
(root, store)
}
#[test]
fn compact_skips_recently_touched_chunks() {
let (_root, mut store) = setup_store_with(&[("a.md", "needle in haystack")]);
let chunk_id: String = store
.conn()
.query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
.unwrap();
let now = crate::inspect::now_unix();
store
.conn()
.execute(
"UPDATE chunks
SET access_count = 8,
last_accessed_at = ?1,
access_decay_at = NULL
WHERE id = ?2",
params![now - 24 * 60 * 60, chunk_id],
)
.unwrap();
let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
assert_eq!(report.decayed_chunks, 0);
assert_eq!(report.skipped_recent_chunks, 1);
assert_eq!(report.scanned_chunks, 1);
let (access_count, decay_at): (i64, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, access_decay_at FROM chunks WHERE id = ?1",
params![chunk_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(access_count, 8);
assert_eq!(decay_at, None);
}
#[test]
fn compact_reports_recent_skips_and_decays_independently() {
let (_root, mut store) = setup_store_with(&[
("recent.md", "first chunk body"),
("stale.md", "second chunk body"),
]);
let now = crate::inspect::now_unix();
store
.conn()
.execute(
"UPDATE chunks
SET access_count = 6,
last_accessed_at = ?1,
access_decay_at = NULL
WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%recent.md')",
params![now - 24 * 60 * 60],
)
.unwrap();
store
.conn()
.execute(
"UPDATE chunks
SET access_count = 6,
last_accessed_at = ?1,
access_decay_at = NULL
WHERE source_id = (SELECT id FROM sources WHERE uri LIKE '%stale.md')",
params![now - 90 * 24 * 60 * 60],
)
.unwrap();
let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
assert_eq!(report.scanned_chunks, 2);
assert_eq!(report.skipped_recent_chunks, 1);
assert_eq!(report.decayed_chunks, 1);
assert_eq!(report.access_count_before, 12);
assert!(report.access_count_after < report.access_count_before);
assert!(report.access_count_after >= 6);
}
#[test]
fn compact_decays_moderately_stale_counts_with_default_thresholds() {
let (_root, mut store) = setup_store_with(&[("a.md", "needle in haystack")]);
let chunk_id: String = store
.conn()
.query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
.unwrap();
let now = crate::inspect::now_unix();
store
.conn()
.execute(
"UPDATE chunks
SET access_count = 8,
last_accessed_at = ?1,
access_decay_at = NULL
WHERE id = ?2",
params![now - 21 * 24 * 60 * 60, chunk_id],
)
.unwrap();
let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
assert_eq!(report.decayed_chunks, 1);
assert_eq!(report.access_count_before, 8);
assert_eq!(report.access_count_after, 4);
let (access_count, decay_at): (i64, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, access_decay_at FROM chunks WHERE id = ?1",
params![chunk_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(access_count, 4);
assert!(decay_at.is_some());
assert!(decay_at.unwrap() >= now - 5);
}
#[test]
fn compact_decays_stale_counts_and_records_checkpoint() {
let (_root, mut store) = setup_store_with(&[("a.md", "needle in haystack")]);
let chunk_id: String = store
.conn()
.query_row("SELECT id FROM chunks LIMIT 1", [], |row| row.get(0))
.unwrap();
let now = crate::inspect::now_unix();
store
.conn()
.execute(
"UPDATE chunks
SET access_count = 8,
last_accessed_at = ?1,
access_decay_at = NULL
WHERE id = ?2",
params![now - 90 * 24 * 60 * 60, chunk_id],
)
.unwrap();
let report = compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
assert_eq!(report.decayed_chunks, 1);
assert_eq!(report.access_count_before, 8);
assert_eq!(report.access_count_after, 1);
let (access_count, decay_at): (i64, Option<i64>) = store
.conn()
.query_row(
"SELECT access_count, access_decay_at FROM chunks WHERE id = ?1",
params![chunk_id],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(access_count, 1);
assert!(decay_at.is_some());
assert!(decay_at.unwrap() >= now - 5);
}
#[test]
fn compact_leaves_search_results_intact() {
let (_root, mut store) = setup_store_with(&[("a.md", "Lanterns glow in the dark forest.")]);
let hits = search(&store, "lantern", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
let chunk_id = hits[0].chunk_id.clone();
let now = crate::inspect::now_unix();
store
.conn()
.execute(
"UPDATE chunks
SET access_count = 4,
last_accessed_at = ?1,
access_decay_at = NULL
WHERE id = ?2",
params![now - 45 * 24 * 60 * 60, chunk_id],
)
.unwrap();
compact_access_metadata(&mut store, CompactOptions::default()).unwrap();
let hits = search(&store, "lantern", SearchOptions::default()).unwrap();
assert_eq!(hits.len(), 1);
assert!(hits[0].access_count >= 1);
assert!(hits[0].uri.ends_with("/a.md"));
}
}