use crate::errors::AppError;
use crate::output;
use crate::paths::AppPaths;
use crate::storage::connection::open_ro;
use serde::Serialize;
#[derive(clap::Args)]
#[command(after_long_help = "EXAMPLES:\n \
# Show database statistics (memory counts, sizes, namespace breakdown)\n \
sqlite-graphrag stats\n\n \
# Stats for a database at a custom path\n \
sqlite-graphrag stats --db /path/to/graphrag.sqlite\n\n \
# Use SQLITE_GRAPHRAG_DB_PATH env var\n \
SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag stats")]
pub struct StatsArgs {
#[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
pub db: Option<String>,
#[arg(long, default_value_t = false)]
pub json: bool,
#[arg(long, value_parser = ["json", "text"], hide = true)]
pub format: Option<String>,
}
#[derive(Serialize)]
struct StatsResponse {
memories: i64,
memories_total: i64,
entities: i64,
entities_total: i64,
relationships: i64,
relationships_total: i64,
edges: i64,
chunks_total: i64,
avg_body_len: f64,
namespaces: Vec<String>,
db_size_bytes: u64,
db_bytes: u64,
schema_version: u32,
elapsed_ms: u64,
}
pub fn run(args: StatsArgs) -> Result<(), AppError> {
let start = std::time::Instant::now();
let _ = args.json; let _ = args.format; let paths = AppPaths::resolve(args.db.as_deref())?;
crate::storage::connection::ensure_db_ready(&paths)?;
let conn = open_ro(&paths.db)?;
let memories: i64 = conn.query_row(
"SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
[],
|r| r.get(0),
)?;
let entities: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
let relationships: i64 =
conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
let mut stmt = conn.prepare(
"SELECT DISTINCT namespace FROM memories WHERE deleted_at IS NULL ORDER BY namespace",
)?;
let namespaces: Vec<String> = stmt
.query_map([], |r| r.get(0))?
.collect::<Result<Vec<_>, _>>()?;
let schema_version: u32 = conn
.query_row(
"SELECT MAX(version) FROM refinery_schema_history",
[],
|row| row.get::<_, Option<i64>>(0),
)
.ok()
.flatten()
.map(|v| v.max(0) as u32)
.unwrap_or(0);
let db_size_bytes = std::fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
let chunks_total: i64 = match conn.query_row("SELECT COUNT(*) FROM memory_chunks", [], |r| {
r.get::<_, i64>(0)
}) {
Ok(n) => n,
Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("no such table") => 0,
Err(e) => {
tracing::warn!("failed to count memory_chunks: {e}");
0
}
};
let avg_body_len: f64 = conn
.query_row(
"SELECT COALESCE(AVG(LENGTH(body)), 0.0) FROM memories WHERE deleted_at IS NULL",
[],
|r| r.get(0),
)
.unwrap_or(0.0);
output::emit_json(&StatsResponse {
memories,
memories_total: memories,
entities,
entities_total: entities,
relationships,
relationships_total: relationships,
edges: relationships,
chunks_total,
avg_body_len,
namespaces,
db_size_bytes,
db_bytes: db_size_bytes,
schema_version,
elapsed_ms: start.elapsed().as_millis() as u64,
})?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn stats_response_serializes_all_fields() {
let resp = StatsResponse {
memories: 10,
memories_total: 10,
entities: 5,
entities_total: 5,
relationships: 3,
relationships_total: 3,
edges: 3,
chunks_total: 20,
avg_body_len: 42.5,
namespaces: vec!["global".to_string(), "project".to_string()],
db_size_bytes: 8192,
db_bytes: 8192,
schema_version: 6,
elapsed_ms: 7,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
assert_eq!(json["memories"], 10);
assert_eq!(json["memories_total"], 10);
assert_eq!(json["entities"], 5);
assert_eq!(json["entities_total"], 5);
assert_eq!(json["relationships"], 3);
assert_eq!(json["relationships_total"], 3);
assert_eq!(json["edges"], 3);
assert_eq!(json["chunks_total"], 20);
assert_eq!(json["db_size_bytes"], 8192u64);
assert_eq!(json["db_bytes"], 8192u64);
assert_eq!(json["schema_version"], 6);
assert_eq!(json["elapsed_ms"], 7u64);
}
#[test]
fn stats_response_namespaces_is_string_array() {
let resp = StatsResponse {
memories: 0,
memories_total: 0,
entities: 0,
entities_total: 0,
relationships: 0,
relationships_total: 0,
edges: 0,
chunks_total: 0,
avg_body_len: 0.0,
namespaces: vec!["ns1".to_string(), "ns2".to_string(), "ns3".to_string()],
db_size_bytes: 0,
db_bytes: 0,
schema_version: 0,
elapsed_ms: 0,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
let arr = json["namespaces"]
.as_array()
.expect("namespaces must be array");
assert_eq!(arr.len(), 3);
assert_eq!(arr[0], "ns1");
assert_eq!(arr[1], "ns2");
assert_eq!(arr[2], "ns3");
}
#[test]
fn stats_response_namespaces_empty_serializes_empty_array() {
let resp = StatsResponse {
memories: 0,
memories_total: 0,
entities: 0,
entities_total: 0,
relationships: 0,
relationships_total: 0,
edges: 0,
chunks_total: 0,
avg_body_len: 0.0,
namespaces: vec![],
db_size_bytes: 0,
db_bytes: 0,
schema_version: 0,
elapsed_ms: 0,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
let arr = json["namespaces"]
.as_array()
.expect("namespaces must be array");
assert!(arr.is_empty(), "empty namespaces must serialize as []");
}
#[test]
fn stats_response_aliases_memories_total_and_memories_equal() {
let resp = StatsResponse {
memories: 42,
memories_total: 42,
entities: 7,
entities_total: 7,
relationships: 2,
relationships_total: 2,
edges: 2,
chunks_total: 0,
avg_body_len: 0.0,
namespaces: vec![],
db_size_bytes: 0,
db_bytes: 0,
schema_version: 6,
elapsed_ms: 0,
};
let json = serde_json::to_value(&resp).expect("serialization failed");
assert_eq!(json["memories"], json["memories_total"]);
assert_eq!(json["entities"], json["entities_total"]);
assert_eq!(json["relationships"], json["relationships_total"]);
assert_eq!(json["relationships"], json["edges"]);
assert_eq!(json["db_size_bytes"], json["db_bytes"]);
}
}