sqlite_graphrag/commands/
stats.rs1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8
9#[derive(clap::Args)]
10#[command(after_long_help = "EXAMPLES:\n \
11 # Show database statistics (memory counts, sizes, namespace breakdown)\n \
12 sqlite-graphrag stats\n\n \
13 # Stats for a database at a custom path\n \
14 sqlite-graphrag stats --db /path/to/graphrag.sqlite\n\n \
15 # Use SQLITE_GRAPHRAG_DB_PATH env var\n \
16 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag stats")]
17pub struct StatsArgs {
18 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
19 pub db: Option<String>,
20 #[arg(long, default_value_t = false)]
22 pub json: bool,
23 #[arg(long, value_parser = ["json", "text"], hide = true)]
25 pub format: Option<String>,
26}
27
28#[derive(Serialize)]
29struct StatsResponse {
30 memories: i64,
31 memories_total: i64,
33 entities: i64,
34 entities_total: i64,
36 relationships: i64,
37 relationships_total: i64,
39 edges: i64,
41 chunks_total: i64,
43 avg_body_len: f64,
45 namespaces: Vec<String>,
46 db_size_bytes: u64,
47 db_bytes: u64,
49 schema_version: u32,
53 elapsed_ms: u64,
55}
56
57pub fn run(args: StatsArgs) -> Result<(), AppError> {
58 let start = std::time::Instant::now();
59 let _ = args.json; let _ = args.format; let paths = AppPaths::resolve(args.db.as_deref())?;
62
63 crate::storage::connection::ensure_db_ready(&paths)?;
64
65 let conn = open_ro(&paths.db)?;
66
67 let memories: i64 = conn.query_row(
68 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
69 [],
70 |r| r.get(0),
71 )?;
72 let entities: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
73 let relationships: i64 =
74 conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
75
76 let mut stmt = conn.prepare(
77 "SELECT DISTINCT namespace FROM memories WHERE deleted_at IS NULL ORDER BY namespace",
78 )?;
79 let namespaces: Vec<String> = stmt
80 .query_map([], |r| r.get(0))?
81 .collect::<Result<Vec<_>, _>>()?;
82
83 let schema_version: u32 = conn
84 .query_row(
85 "SELECT MAX(version) FROM refinery_schema_history",
86 [],
87 |row| row.get::<_, Option<i64>>(0),
88 )
89 .ok()
90 .flatten()
91 .map(|v| v.max(0) as u32)
92 .unwrap_or(0);
93
94 let db_size_bytes = std::fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
95
96 let chunks_total: i64 = match conn.query_row("SELECT COUNT(*) FROM memory_chunks", [], |r| {
100 r.get::<_, i64>(0)
101 }) {
102 Ok(n) => n,
103 Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("no such table") => 0,
104 Err(e) => {
105 tracing::warn!("failed to count memory_chunks: {e}");
106 0
107 }
108 };
109
110 let avg_body_len: f64 = conn
111 .query_row(
112 "SELECT COALESCE(AVG(LENGTH(body)), 0.0) FROM memories WHERE deleted_at IS NULL",
113 [],
114 |r| r.get(0),
115 )
116 .unwrap_or(0.0);
117
118 output::emit_json(&StatsResponse {
119 memories,
120 memories_total: memories,
121 entities,
122 entities_total: entities,
123 relationships,
124 relationships_total: relationships,
125 edges: relationships,
126 chunks_total,
127 avg_body_len,
128 namespaces,
129 db_size_bytes,
130 db_bytes: db_size_bytes,
131 schema_version,
132 elapsed_ms: start.elapsed().as_millis() as u64,
133 })?;
134
135 Ok(())
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141
142 #[test]
143 fn stats_response_serializes_all_fields() {
144 let resp = StatsResponse {
145 memories: 10,
146 memories_total: 10,
147 entities: 5,
148 entities_total: 5,
149 relationships: 3,
150 relationships_total: 3,
151 edges: 3,
152 chunks_total: 20,
153 avg_body_len: 42.5,
154 namespaces: vec!["global".to_string(), "project".to_string()],
155 db_size_bytes: 8192,
156 db_bytes: 8192,
157 schema_version: 6,
158 elapsed_ms: 7,
159 };
160 let json = serde_json::to_value(&resp).expect("serialization failed");
161 assert_eq!(json["memories"], 10);
162 assert_eq!(json["memories_total"], 10);
163 assert_eq!(json["entities"], 5);
164 assert_eq!(json["entities_total"], 5);
165 assert_eq!(json["relationships"], 3);
166 assert_eq!(json["relationships_total"], 3);
167 assert_eq!(json["edges"], 3);
168 assert_eq!(json["chunks_total"], 20);
169 assert_eq!(json["db_size_bytes"], 8192u64);
170 assert_eq!(json["db_bytes"], 8192u64);
171 assert_eq!(json["schema_version"], 6);
172 assert_eq!(json["elapsed_ms"], 7u64);
173 }
174
175 #[test]
176 fn stats_response_namespaces_is_string_array() {
177 let resp = StatsResponse {
178 memories: 0,
179 memories_total: 0,
180 entities: 0,
181 entities_total: 0,
182 relationships: 0,
183 relationships_total: 0,
184 edges: 0,
185 chunks_total: 0,
186 avg_body_len: 0.0,
187 namespaces: vec!["ns1".to_string(), "ns2".to_string(), "ns3".to_string()],
188 db_size_bytes: 0,
189 db_bytes: 0,
190 schema_version: 0,
191 elapsed_ms: 0,
192 };
193 let json = serde_json::to_value(&resp).expect("serialization failed");
194 let arr = json["namespaces"]
195 .as_array()
196 .expect("namespaces must be array");
197 assert_eq!(arr.len(), 3);
198 assert_eq!(arr[0], "ns1");
199 assert_eq!(arr[1], "ns2");
200 assert_eq!(arr[2], "ns3");
201 }
202
203 #[test]
204 fn stats_response_namespaces_empty_serializes_empty_array() {
205 let resp = StatsResponse {
206 memories: 0,
207 memories_total: 0,
208 entities: 0,
209 entities_total: 0,
210 relationships: 0,
211 relationships_total: 0,
212 edges: 0,
213 chunks_total: 0,
214 avg_body_len: 0.0,
215 namespaces: vec![],
216 db_size_bytes: 0,
217 db_bytes: 0,
218 schema_version: 0,
219 elapsed_ms: 0,
220 };
221 let json = serde_json::to_value(&resp).expect("serialization failed");
222 let arr = json["namespaces"]
223 .as_array()
224 .expect("namespaces must be array");
225 assert!(arr.is_empty(), "empty namespaces must serialize as []");
226 }
227
228 #[test]
229 fn stats_response_aliases_memories_total_and_memories_equal() {
230 let resp = StatsResponse {
231 memories: 42,
232 memories_total: 42,
233 entities: 7,
234 entities_total: 7,
235 relationships: 2,
236 relationships_total: 2,
237 edges: 2,
238 chunks_total: 0,
239 avg_body_len: 0.0,
240 namespaces: vec![],
241 db_size_bytes: 0,
242 db_bytes: 0,
243 schema_version: 6,
244 elapsed_ms: 0,
245 };
246 let json = serde_json::to_value(&resp).expect("serialization failed");
247 assert_eq!(json["memories"], json["memories_total"]);
248 assert_eq!(json["entities"], json["entities_total"]);
249 assert_eq!(json["relationships"], json["relationships_total"]);
250 assert_eq!(json["relationships"], json["edges"]);
251 assert_eq!(json["db_size_bytes"], json["db_bytes"]);
252 }
253}