use std::fs;
use anyhow::Result;
use rusqlite::params;
use serde::Serialize;
use crate::embed::{EmbeddingStats, embedding_stats};
use crate::store::Store;
#[derive(Debug, Clone, Serialize)]
pub struct InspectReport {
pub schema_version: i64,
pub store_path: String,
pub db_path: String,
pub db_bytes: u64,
pub source_count: i64,
pub chunk_count: i64,
pub indexed_bytes: i64,
pub embeddings: Vec<EmbeddingStats>,
pub recent_sources: Vec<RecentSource>,
}
#[derive(Debug, Clone, Serialize)]
pub struct RecentSource {
pub source_id: String,
pub uri: String,
pub path: Option<String>,
pub kind: String,
pub bytes: i64,
pub chunks: i64,
pub ingested_at: i64,
}
#[derive(Debug, Clone, Copy)]
pub struct InspectOptions {
pub recent_limit: usize,
}
impl Default for InspectOptions {
fn default() -> Self {
Self { recent_limit: 10 }
}
}
pub fn inspect(store: &Store, opts: InspectOptions) -> Result<InspectReport> {
let conn = store.conn();
let schema_version = store.schema_version()?;
let db_path = store.db_path();
let db_bytes = fs::metadata(&db_path).map(|m| m.len()).unwrap_or(0);
let source_count: i64 = conn.query_row("SELECT COUNT(*) FROM sources", [], |row| row.get(0))?;
let chunk_count: i64 = conn.query_row("SELECT COUNT(*) FROM chunks", [], |row| row.get(0))?;
let indexed_bytes: i64 =
conn.query_row("SELECT COALESCE(SUM(bytes), 0) FROM sources", [], |row| {
row.get(0)
})?;
let mut stmt = conn.prepare(
"SELECT s.id, s.uri, s.path, s.kind, s.bytes, s.ingested_at,
(SELECT COUNT(*) FROM chunks c WHERE c.source_id = s.id) AS chunks
FROM sources s
ORDER BY s.ingested_at DESC, s.id DESC
LIMIT ?1",
)?;
let rows = stmt.query_map(params![opts.recent_limit as i64], |row| {
Ok(RecentSource {
source_id: row.get(0)?,
uri: row.get(1)?,
path: row.get(2)?,
kind: row.get(3)?,
bytes: row.get(4)?,
ingested_at: row.get(5)?,
chunks: row.get(6)?,
})
})?;
let recent_sources = rows.collect::<Result<Vec<_>, _>>()?;
let embeddings = embedding_stats(store)?;
Ok(InspectReport {
schema_version,
store_path: store.root().to_string_lossy().into_owned(),
db_path: db_path.to_string_lossy().into_owned(),
db_bytes,
source_count,
chunk_count,
indexed_bytes,
embeddings,
recent_sources,
})
}
pub fn print_text(report: &InspectReport) {
println!("lantern store");
println!(" path: {}", report.store_path);
println!(
" database: {} ({})",
report.db_path,
format_bytes(report.db_bytes)
);
println!(" schema: v{}", report.schema_version);
println!(" sources: {}", report.source_count);
println!(" chunks: {}", report.chunk_count);
println!(" indexed: {} bytes of source text", report.indexed_bytes);
if report.embeddings.is_empty() {
println!(" embeds: none");
} else {
for e in &report.embeddings {
println!(
" embeds: {count} chunks model={model} dim={dim}",
count = e.count,
model = e.model,
dim = e.dim,
);
}
}
if report.recent_sources.is_empty() {
println!();
println!("no sources ingested yet");
return;
}
let now = now_unix();
println!();
println!("recent sources:");
for s in &report.recent_sources {
println!(
" {id} {ago:>8} chunks={chunks:<3} bytes={bytes:<6} {kind:<14} {uri}",
id = s.source_id,
ago = ago(now, s.ingested_at),
chunks = s.chunks,
bytes = s.bytes,
kind = s.kind,
uri = s.uri,
);
}
}
pub fn print_json(report: &InspectReport) -> Result<()> {
println!("{}", serde_json::to_string_pretty(report)?);
Ok(())
}
pub(crate) fn now_unix() -> i64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0)
}
pub(crate) fn ago(now: i64, then: i64) -> String {
let delta = (now - then).max(0);
if delta < 60 {
return format!("{delta}s ago");
}
let m = delta / 60;
if m < 60 {
return format!("{m}m ago");
}
let h = m / 60;
if h < 24 {
return format!("{h}h ago");
}
let d = h / 24;
format!("{d}d ago")
}
fn format_bytes(n: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
const GB: u64 = MB * 1024;
if n >= GB {
format!("{:.1} GB", n as f64 / GB as f64)
} else if n >= MB {
format!("{:.1} MB", n as f64 / MB as f64)
} else if n >= KB {
format!("{:.1} KB", n as f64 / KB as f64)
} else {
format!("{n} B")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ago_buckets() {
assert_eq!(ago(1_000, 1_000), "0s ago");
assert_eq!(ago(1_050, 1_000), "50s ago");
assert_eq!(ago(1_000 + 60 * 5, 1_000), "5m ago");
assert_eq!(ago(1_000 + 3600 * 3, 1_000), "3h ago");
assert_eq!(ago(1_000 + 86400 * 2, 1_000), "2d ago");
assert_eq!(ago(500, 1_000), "0s ago");
}
#[test]
fn format_bytes_thresholds() {
assert_eq!(format_bytes(0), "0 B");
assert_eq!(format_bytes(512), "512 B");
assert_eq!(format_bytes(1024), "1.0 KB");
assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
}
}