sqlite_graphrag/commands/
stats.rs1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8
9#[derive(clap::Args)]
10#[command(after_long_help = "EXAMPLES:\n \
11 # Show database statistics (memory counts, sizes, namespace breakdown)\n \
12 sqlite-graphrag stats\n\n \
13 # Stats for a database at a custom path\n \
14 sqlite-graphrag stats --db /path/to/graphrag.sqlite\n\n \
15 # Use SQLITE_GRAPHRAG_DB_PATH env var\n \
16 SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag stats")]
17pub struct StatsArgs {
18 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
19 pub db: Option<String>,
20 #[arg(long, default_value_t = false)]
22 pub json: bool,
23 #[arg(long, value_parser = ["json", "text"], hide = true)]
25 pub format: Option<String>,
26}
27
28#[derive(Serialize)]
29struct StatsResponse {
30 memories: i64,
31 memories_total: i64,
33 total_memories: i64,
37 entities: i64,
38 entities_total: i64,
40 relationships: i64,
41 relationships_total: i64,
43 edges: i64,
45 chunks_total: i64,
47 avg_body_len: f64,
49 namespaces: Vec<String>,
50 db_size_bytes: u64,
51 db_bytes: u64,
53 schema_version: u32,
57 elapsed_ms: u64,
59}
60
61pub fn run(args: StatsArgs) -> Result<(), AppError> {
62 let start = std::time::Instant::now();
63 let _ = args.json; let _ = args.format; let paths = AppPaths::resolve(args.db.as_deref())?;
66
67 crate::storage::connection::ensure_db_ready(&paths)?;
68
69 let conn = open_ro(&paths.db)?;
70
71 let memories: i64 = conn.query_row(
72 "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
73 [],
74 |r| r.get(0),
75 )?;
76 let entities: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
77 let relationships: i64 =
78 conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
79
80 let mut stmt = conn.prepare_cached(
81 "SELECT DISTINCT namespace FROM memories WHERE deleted_at IS NULL ORDER BY namespace",
82 )?;
83 let namespaces: Vec<String> = stmt
84 .query_map([], |r| r.get(0))?
85 .collect::<Result<Vec<_>, _>>()?;
86
87 let schema_version: u32 = conn
88 .query_row(
89 "SELECT MAX(version) FROM refinery_schema_history",
90 [],
91 |row| row.get::<_, Option<i64>>(0),
92 )
93 .ok()
94 .flatten()
95 .map(|v| v.max(0) as u32)
96 .unwrap_or(0);
97
98 let db_size_bytes = std::fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
99
100 let chunks_total: i64 = match conn.query_row("SELECT COUNT(*) FROM memory_chunks", [], |r| {
104 r.get::<_, i64>(0)
105 }) {
106 Ok(n) => n,
107 Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("no such table") => 0,
108 Err(e) => {
109 tracing::warn!(target: "stats", error = %e, "memory_chunks count failed");
110 0
111 }
112 };
113
114 let avg_body_len: f64 = conn
115 .query_row(
116 "SELECT COALESCE(AVG(LENGTH(body)), 0.0) FROM memories WHERE deleted_at IS NULL",
117 [],
118 |r| r.get(0),
119 )
120 .unwrap_or(0.0);
121
122 output::emit_json(&StatsResponse {
123 memories,
124 memories_total: memories,
125 total_memories: memories,
126 entities,
127 entities_total: entities,
128 relationships,
129 relationships_total: relationships,
130 edges: relationships,
131 chunks_total,
132 avg_body_len,
133 namespaces,
134 db_size_bytes,
135 db_bytes: db_size_bytes,
136 schema_version,
137 elapsed_ms: start.elapsed().as_millis() as u64,
138 })?;
139
140 Ok(())
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn stats_response_serializes_all_fields() {
149 let resp = StatsResponse {
150 memories: 10,
151 memories_total: 10,
152 total_memories: 10,
153 entities: 5,
154 entities_total: 5,
155 relationships: 3,
156 relationships_total: 3,
157 edges: 3,
158 chunks_total: 20,
159 avg_body_len: 42.5,
160 namespaces: vec!["global".to_string(), "project".to_string()],
161 db_size_bytes: 8192,
162 db_bytes: 8192,
163 schema_version: 6,
164 elapsed_ms: 7,
165 };
166 let json = serde_json::to_value(&resp).expect("serialization failed");
167 assert_eq!(json["memories"], 10);
168 assert_eq!(json["memories_total"], 10);
169 assert_eq!(json["total_memories"], 10);
171 assert!(
172 json["total_memories"].is_number(),
173 "total_memories must be a number, not null"
174 );
175 assert_eq!(json["entities"], 5);
176 assert_eq!(json["entities_total"], 5);
177 assert_eq!(json["relationships"], 3);
178 assert_eq!(json["relationships_total"], 3);
179 assert_eq!(json["edges"], 3);
180 assert_eq!(json["chunks_total"], 20);
181 assert_eq!(json["db_size_bytes"], 8192u64);
182 assert_eq!(json["db_bytes"], 8192u64);
183 assert_eq!(json["schema_version"], 6);
184 assert_eq!(json["elapsed_ms"], 7u64);
185 }
186
187 #[test]
188 fn stats_response_namespaces_is_string_array() {
189 let resp = StatsResponse {
190 memories: 0,
191 memories_total: 0,
192 total_memories: 0,
193 entities: 0,
194 entities_total: 0,
195 relationships: 0,
196 relationships_total: 0,
197 edges: 0,
198 chunks_total: 0,
199 avg_body_len: 0.0,
200 namespaces: vec!["ns1".to_string(), "ns2".to_string(), "ns3".to_string()],
201 db_size_bytes: 0,
202 db_bytes: 0,
203 schema_version: 0,
204 elapsed_ms: 0,
205 };
206 let json = serde_json::to_value(&resp).expect("serialization failed");
207 let arr = json["namespaces"]
208 .as_array()
209 .expect("namespaces must be array");
210 assert_eq!(arr.len(), 3);
211 assert_eq!(arr[0], "ns1");
212 assert_eq!(arr[1], "ns2");
213 assert_eq!(arr[2], "ns3");
214 }
215
216 #[test]
217 fn stats_response_namespaces_empty_serializes_empty_array() {
218 let resp = StatsResponse {
219 memories: 0,
220 memories_total: 0,
221 total_memories: 0,
222 entities: 0,
223 entities_total: 0,
224 relationships: 0,
225 relationships_total: 0,
226 edges: 0,
227 chunks_total: 0,
228 avg_body_len: 0.0,
229 namespaces: vec![],
230 db_size_bytes: 0,
231 db_bytes: 0,
232 schema_version: 0,
233 elapsed_ms: 0,
234 };
235 let json = serde_json::to_value(&resp).expect("serialization failed");
236 let arr = json["namespaces"]
237 .as_array()
238 .expect("namespaces must be array");
239 assert!(arr.is_empty(), "empty namespaces must serialize as []");
240 }
241
242 #[test]
243 fn stats_response_aliases_memories_total_and_memories_equal() {
244 let resp = StatsResponse {
245 memories: 42,
246 memories_total: 42,
247 total_memories: 42,
248 entities: 7,
249 entities_total: 7,
250 relationships: 2,
251 relationships_total: 2,
252 edges: 2,
253 chunks_total: 0,
254 avg_body_len: 0.0,
255 namespaces: vec![],
256 db_size_bytes: 0,
257 db_bytes: 0,
258 schema_version: 6,
259 elapsed_ms: 0,
260 };
261 let json = serde_json::to_value(&resp).expect("serialization failed");
262 assert_eq!(json["memories"], json["memories_total"]);
263 assert_eq!(json["entities"], json["entities_total"]);
264 assert_eq!(json["relationships"], json["relationships_total"]);
265 assert_eq!(json["relationships"], json["edges"]);
266 assert_eq!(json["db_size_bytes"], json["db_bytes"]);
267 }
268}