use crate::core::cache;
use crate::core::session::Session;
use anyhow::{Context, Result};
use std::collections::HashSet;
use std::path::Path;
#[derive(Debug, Default, serde::Serialize)]
pub struct GcReport {
pub removed: u64,
pub bytes_freed: u64,
pub kept: u64,
pub kept_bytes: u64,
}
#[derive(Debug, Default, serde::Serialize)]
pub struct CacheStats {
pub inline_max_bytes: u64,
pub inline_rows: i64,
pub inline_bytes: i64,
pub file_rows: i64,
pub linked_file_rows: i64,
pub unique_hashes: i64,
pub cache_files: u64,
pub cache_size_bytes: u64,
pub db_size_bytes: u64,
pub orphan_files: u64,
pub orphan_bytes: u64,
pub dedup_savings: i64,
pub compactable_rows: i64,
pub compactable_bytes: i64,
}
pub fn run_gc() -> Result<String> {
let session = Session::open_readonly()?;
let active: HashSet<String> = active_blob_hashes(&session)?;
let data_dir = crate::core::session::data_dir()?;
let cache_dir = cache::cache_dir(&data_dir);
let mut report = GcReport::default();
if !cache_dir.exists() {
return Ok(render_gc(&report, &cache_dir));
}
for entry in
std::fs::read_dir(&cache_dir).with_context(|| format!("listing cache dir {cache_dir:?}"))?
{
let entry = entry?;
let path = entry.path();
let Some(hash) = blob_hash_from_path(&path) else {
continue;
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_symlink() {
let _ = std::fs::remove_file(&path);
continue;
}
if !ft.is_file() {
continue;
}
let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
if active.contains(&hash) {
report.kept += 1;
report.kept_bytes += size;
} else {
std::fs::remove_file(&path).with_context(|| format!("removing orphan {path:?}"))?;
report.removed += 1;
report.bytes_freed += size;
}
}
Ok(render_gc(&report, &cache_dir))
}
pub fn run_stats() -> Result<String> {
let stats = collect_stats()?;
Ok(render_stats(&stats))
}
pub fn collect_stats() -> Result<CacheStats> {
let session = Session::open_readonly()?;
let mut s = CacheStats {
inline_max_bytes: cache::inline_max_bytes() as u64,
..CacheStats::default()
};
let (inline_rows, inline_bytes): (i64, i64) = session
.conn
.query_row(
"SELECT COUNT(*), COALESCE(SUM(LENGTH(content)), 0)
FROM reads WHERE content_storage = 'inline'",
[],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.unwrap_or((0, 0));
s.inline_rows = inline_rows;
s.inline_bytes = inline_bytes;
let (file_rows, unique_hashes): (i64, i64) = session
.conn
.query_row(
"SELECT COUNT(*), COUNT(DISTINCT content_hash)
FROM reads WHERE content_storage = 'file'",
[],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.unwrap_or((0, 0));
s.file_rows = file_rows;
s.unique_hashes = unique_hashes;
s.dedup_savings = (file_rows - unique_hashes).max(0);
let data_dir = crate::core::session::data_dir()?;
let cache_dir = cache::cache_dir(&data_dir);
let active = active_blob_hashes(&session)?;
if cache_dir.exists() {
for entry in std::fs::read_dir(&cache_dir)?.flatten() {
let path = entry.path();
let Some(hash) = blob_hash_from_path(&path) else {
continue;
};
let ft = match entry.file_type() {
Ok(ft) => ft,
Err(_) => continue,
};
if !ft.is_file() {
continue;
}
let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
s.cache_files += 1;
s.cache_size_bytes += size;
if !active.contains(&hash) {
s.orphan_files += 1;
s.orphan_bytes += size;
}
}
}
s.linked_file_rows = active.len() as i64;
let db_path = data_dir.join("sessions.db");
s.db_size_bytes = std::fs::metadata(&db_path).map(|m| m.len()).unwrap_or(0);
let limit_i64 = i64::try_from(cache::inline_max_bytes()).unwrap_or(i64::MAX);
let (compactable_rows, compactable_bytes): (i64, i64) = session
.conn
.query_row(
"SELECT COUNT(*), COALESCE(SUM(LENGTH(content)), 0)
FROM reads
WHERE content_storage = 'inline' AND LENGTH(content) > ?1",
rusqlite::params![limit_i64],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.unwrap_or((0, 0));
s.compactable_rows = compactable_rows;
s.compactable_bytes = compactable_bytes;
Ok(s)
}
#[derive(Debug, Default, serde::Serialize)]
pub struct CompactReport {
pub rows_moved: i64,
pub bytes_moved: i64,
pub db_size_before: u64,
pub db_size_after: u64,
}
pub fn run_compact() -> Result<String> {
use rusqlite::params;
let session = crate::core::session::Session::open()?;
let data_dir = crate::core::session::data_dir()?;
let db_path = data_dir.join("sessions.db");
let limit = cache::inline_max_bytes();
let limit_i64 = i64::try_from(limit).unwrap_or(i64::MAX);
let mut report = CompactReport {
db_size_before: std::fs::metadata(&db_path).map(|m| m.len()).unwrap_or(0),
..CompactReport::default()
};
let mut stmt = session.conn.prepare(
"SELECT session_id, file_path, content_hash
FROM reads
WHERE content_storage = 'inline' AND LENGTH(content) > ?1",
)?;
let candidates: Vec<(String, String, String)> = stmt
.query_map(params![limit_i64], |r| {
Ok((r.get(0)?, r.get(1)?, r.get(2)?))
})?
.collect::<rusqlite::Result<Vec<_>>>()?;
drop(stmt);
if candidates.is_empty() {
report.db_size_after = report.db_size_before;
return Ok(render_compact(&report, false));
}
for (session_id, file_path, content_hash) in &candidates {
let content: String = session.conn.query_row(
"SELECT content FROM reads WHERE session_id = ?1 AND file_path = ?2",
params![session_id, file_path],
|r| r.get(0),
)?;
let bytes = content.len() as i64;
cache::write_blob(&data_dir, content_hash, content.as_bytes())?;
session.conn.execute(
"UPDATE reads
SET content = '', content_storage = 'file'
WHERE session_id = ?1 AND file_path = ?2",
params![session_id, file_path],
)?;
report.rows_moved += 1;
report.bytes_moved += bytes;
}
session.conn.execute("VACUUM", [])?;
let _: i64 = session
.conn
.query_row("PRAGMA wal_checkpoint(TRUNCATE)", [], |r| r.get(0))
.unwrap_or(0);
drop(session);
report.db_size_after = std::fs::metadata(&db_path).map(|m| m.len()).unwrap_or(0);
Ok(render_compact(&report, true))
}
fn render_compact(r: &CompactReport, vacuumed: bool) -> String {
let mut out = String::new();
out.push_str("DRIP cache compact\n");
if r.rows_moved == 0 {
out.push_str(" Nothing to do — no inline rows above the current threshold.\n");
return out;
}
out.push_str(&format!(
" Compacted: {} row(s), {} hoisted to cache\n",
r.rows_moved,
format_bytes(r.bytes_moved.max(0) as u64),
));
if vacuumed {
let delta = r.db_size_before.saturating_sub(r.db_size_after);
out.push_str(&format!(
" sessions.db: {} → {} (reclaimed {})\n",
format_bytes(r.db_size_before),
format_bytes(r.db_size_after),
format_bytes(delta),
));
}
out
}
fn active_blob_hashes(session: &Session) -> Result<HashSet<String>> {
let mut stmt = session.conn.prepare(
"SELECT content_hash FROM reads WHERE content_storage = 'file'
UNION
SELECT content_hash FROM file_registry WHERE content_storage = 'file'",
)?;
let rows: HashSet<String> = stmt
.query_map([], |r| r.get::<_, String>(0))?
.filter_map(|r| r.ok())
.collect();
Ok(rows)
}
fn blob_hash_from_path(path: &Path) -> Option<String> {
if path.extension().and_then(|s| s.to_str()) != Some("bin") {
return None;
}
let stem = path.file_stem().and_then(|s| s.to_str())?;
if stem.len() == 64 && stem.chars().all(|c| c.is_ascii_hexdigit()) {
Some(stem.to_string())
} else {
None
}
}
fn render_gc(r: &GcReport, dir: &Path) -> String {
let mut out = String::new();
out.push_str(&format!("DRIP cache GC ({})\n", dir.display()));
out.push_str(&format!(
" Removed: {} file(s), {} freed\n",
r.removed,
format_bytes(r.bytes_freed),
));
out.push_str(&format!(
" Kept: {} file(s), {} in use\n",
r.kept,
format_bytes(r.kept_bytes),
));
out
}
fn render_stats(s: &CacheStats) -> String {
let mode = if s.inline_max_bytes == usize::MAX as u64 {
"all-inline (DRIP_INLINE_MAX_BYTES=-1)".to_string()
} else {
format!("hybrid (inline ≤ {})", format_bytes(s.inline_max_bytes))
};
let mut out = String::new();
out.push_str("DRIP Cache Stats\n");
out.push_str(&format!(" Mode : {mode}\n"));
out.push_str(&format!(
" Inline rows : {} ({} stored in DB)\n",
s.inline_rows,
format_bytes(s.inline_bytes.max(0) as u64),
));
out.push_str(&format!(
" Cached files : {} ({} on disk, {} unique blobs)\n",
s.cache_files,
format_bytes(s.cache_size_bytes),
s.unique_hashes,
));
out.push_str(&format!(
" Dedup savings : {} row(s) sharing a blob\n",
s.dedup_savings,
));
out.push_str(&format!(
" Orphan blobs : {} ({}) — run `drip cache gc` to reclaim\n",
s.orphan_files,
format_bytes(s.orphan_bytes),
));
out.push_str(&format!(
" sessions.db size : {}\n",
format_bytes(s.db_size_bytes),
));
if s.compactable_rows > 0 {
out.push('\n');
out.push_str(&format!(
" ⚠ Compactable : {} inline row(s) over the threshold ({})\n",
s.compactable_rows,
format_bytes(s.compactable_bytes.max(0) as u64),
));
out.push_str(
" Run `drip cache compact` to hoist them to the file cache and VACUUM the DB.\n",
);
}
out
}
fn format_bytes(n: u64) -> String {
const KB: f64 = 1024.0;
const MB: f64 = KB * 1024.0;
const GB: f64 = MB * 1024.0;
let n_f = n as f64;
if n_f >= GB {
format!("{:.2} GB", n_f / GB)
} else if n_f >= MB {
format!("{:.2} MB", n_f / MB)
} else if n_f >= KB {
format!("{:.1} KB", n_f / KB)
} else {
format!("{n} B")
}
}