Skip to main content

sqlite_graphrag/commands/
stats.rs

1//! Handler for the `stats` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8
9#[derive(clap::Args)]
10#[command(after_long_help = "EXAMPLES:\n  \
11    # Show database statistics (memory counts, sizes, namespace breakdown)\n  \
12    sqlite-graphrag stats\n\n  \
13    # Stats for a database at a custom path\n  \
14    sqlite-graphrag stats --db /path/to/graphrag.sqlite\n\n  \
15    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
16    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag stats")]
17pub struct StatsArgs {
18    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
19    pub db: Option<String>,
20    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
21    #[arg(long, default_value_t = false)]
22    pub json: bool,
23    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
24    #[arg(long, value_parser = ["json", "text"], hide = true)]
25    pub format: Option<String>,
26}
27
28#[derive(Serialize)]
29struct StatsResponse {
30    memories: i64,
31    /// Alias of `memories` for the documented contract in SKILL.md.
32    memories_total: i64,
33    /// GAP-SG-43: documented alias consumers read as `.total_memories`. Was
34    /// absent (so clients saw `null`); now populated with the live memory count
35    /// so `stats --json` is self-sufficient without a `list` fallback.
36    total_memories: i64,
37    entities: i64,
38    /// Alias of `entities` for the documented contract.
39    entities_total: i64,
40    relationships: i64,
41    /// Alias of `relationships` for the documented contract.
42    relationships_total: i64,
43    /// Semantic alias of `relationships` per the contract in SKILL.md.
44    edges: i64,
45    /// Total indexed chunks (one row per chunk in `memory_chunks`).
46    chunks_total: i64,
47    /// Average length of the body field in active (non-deleted) memories.
48    avg_body_len: f64,
49    namespaces: Vec<String>,
50    db_size_bytes: u64,
51    /// Semantic alias of `db_size_bytes` for the documented contract.
52    db_bytes: u64,
53    /// Latest applied migration number from `refinery_schema_history`.
54    /// Emitted as a JSON number for cross-command consistency with `health` (since v1.0.35).
55    /// Returns `0` when the database has no recorded migrations yet.
56    schema_version: u32,
57    /// Total execution time in milliseconds from handler start to serialisation.
58    elapsed_ms: u64,
59}
60
61pub fn run(args: StatsArgs) -> Result<(), AppError> {
62    let start = std::time::Instant::now();
63    let _ = args.json; // --json is a no-op because output is already JSON by default
64    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
65    let paths = AppPaths::resolve(args.db.as_deref())?;
66
67    crate::storage::connection::ensure_db_ready(&paths)?;
68
69    let conn = open_ro(&paths.db)?;
70
71    let memories: i64 = conn.query_row(
72        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
73        [],
74        |r| r.get(0),
75    )?;
76    let entities: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
77    let relationships: i64 =
78        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
79
80    let mut stmt = conn.prepare_cached(
81        "SELECT DISTINCT namespace FROM memories WHERE deleted_at IS NULL ORDER BY namespace",
82    )?;
83    let namespaces: Vec<String> = stmt
84        .query_map([], |r| r.get(0))?
85        .collect::<Result<Vec<_>, _>>()?;
86
87    let schema_version: u32 = conn
88        .query_row(
89            "SELECT MAX(version) FROM refinery_schema_history",
90            [],
91            |row| row.get::<_, Option<i64>>(0),
92        )
93        .ok()
94        .flatten()
95        .map(|v| v.max(0) as u32)
96        .unwrap_or(0);
97
98    let db_size_bytes = std::fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
99
100    // v1.0.21 P1-C: query uses the (correct) `memory_chunks` table.
101    // If the table does not exist (legacy pre-chunking DB), the error is "no such table"
102    // and the fallback returns 0. Other errors are logged via tracing for audit.
103    let chunks_total: i64 = match conn.query_row("SELECT COUNT(*) FROM memory_chunks", [], |r| {
104        r.get::<_, i64>(0)
105    }) {
106        Ok(n) => n,
107        Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("no such table") => 0,
108        Err(e) => {
109            tracing::warn!(target: "stats", error = %e, "memory_chunks count failed");
110            0
111        }
112    };
113
114    let avg_body_len: f64 = conn
115        .query_row(
116            "SELECT COALESCE(AVG(LENGTH(body)), 0.0) FROM memories WHERE deleted_at IS NULL",
117            [],
118            |r| r.get(0),
119        )
120        .unwrap_or(0.0);
121
122    output::emit_json(&StatsResponse {
123        memories,
124        memories_total: memories,
125        total_memories: memories,
126        entities,
127        entities_total: entities,
128        relationships,
129        relationships_total: relationships,
130        edges: relationships,
131        chunks_total,
132        avg_body_len,
133        namespaces,
134        db_size_bytes,
135        db_bytes: db_size_bytes,
136        schema_version,
137        elapsed_ms: start.elapsed().as_millis() as u64,
138    })?;
139
140    Ok(())
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn stats_response_serializes_all_fields() {
149        let resp = StatsResponse {
150            memories: 10,
151            memories_total: 10,
152            total_memories: 10,
153            entities: 5,
154            entities_total: 5,
155            relationships: 3,
156            relationships_total: 3,
157            edges: 3,
158            chunks_total: 20,
159            avg_body_len: 42.5,
160            namespaces: vec!["global".to_string(), "project".to_string()],
161            db_size_bytes: 8192,
162            db_bytes: 8192,
163            schema_version: 6,
164            elapsed_ms: 7,
165        };
166        let json = serde_json::to_value(&resp).expect("serialization failed");
167        assert_eq!(json["memories"], 10);
168        assert_eq!(json["memories_total"], 10);
169        // GAP-SG-43: total_memories must be a populated number, never null.
170        assert_eq!(json["total_memories"], 10);
171        assert!(
172            json["total_memories"].is_number(),
173            "total_memories must be a number, not null"
174        );
175        assert_eq!(json["entities"], 5);
176        assert_eq!(json["entities_total"], 5);
177        assert_eq!(json["relationships"], 3);
178        assert_eq!(json["relationships_total"], 3);
179        assert_eq!(json["edges"], 3);
180        assert_eq!(json["chunks_total"], 20);
181        assert_eq!(json["db_size_bytes"], 8192u64);
182        assert_eq!(json["db_bytes"], 8192u64);
183        assert_eq!(json["schema_version"], 6);
184        assert_eq!(json["elapsed_ms"], 7u64);
185    }
186
187    #[test]
188    fn stats_response_namespaces_is_string_array() {
189        let resp = StatsResponse {
190            memories: 0,
191            memories_total: 0,
192            total_memories: 0,
193            entities: 0,
194            entities_total: 0,
195            relationships: 0,
196            relationships_total: 0,
197            edges: 0,
198            chunks_total: 0,
199            avg_body_len: 0.0,
200            namespaces: vec!["ns1".to_string(), "ns2".to_string(), "ns3".to_string()],
201            db_size_bytes: 0,
202            db_bytes: 0,
203            schema_version: 0,
204            elapsed_ms: 0,
205        };
206        let json = serde_json::to_value(&resp).expect("serialization failed");
207        let arr = json["namespaces"]
208            .as_array()
209            .expect("namespaces must be array");
210        assert_eq!(arr.len(), 3);
211        assert_eq!(arr[0], "ns1");
212        assert_eq!(arr[1], "ns2");
213        assert_eq!(arr[2], "ns3");
214    }
215
216    #[test]
217    fn stats_response_namespaces_empty_serializes_empty_array() {
218        let resp = StatsResponse {
219            memories: 0,
220            memories_total: 0,
221            total_memories: 0,
222            entities: 0,
223            entities_total: 0,
224            relationships: 0,
225            relationships_total: 0,
226            edges: 0,
227            chunks_total: 0,
228            avg_body_len: 0.0,
229            namespaces: vec![],
230            db_size_bytes: 0,
231            db_bytes: 0,
232            schema_version: 0,
233            elapsed_ms: 0,
234        };
235        let json = serde_json::to_value(&resp).expect("serialization failed");
236        let arr = json["namespaces"]
237            .as_array()
238            .expect("namespaces must be array");
239        assert!(arr.is_empty(), "empty namespaces must serialize as []");
240    }
241
242    #[test]
243    fn stats_response_aliases_memories_total_and_memories_equal() {
244        let resp = StatsResponse {
245            memories: 42,
246            memories_total: 42,
247            total_memories: 42,
248            entities: 7,
249            entities_total: 7,
250            relationships: 2,
251            relationships_total: 2,
252            edges: 2,
253            chunks_total: 0,
254            avg_body_len: 0.0,
255            namespaces: vec![],
256            db_size_bytes: 0,
257            db_bytes: 0,
258            schema_version: 6,
259            elapsed_ms: 0,
260        };
261        let json = serde_json::to_value(&resp).expect("serialization failed");
262        assert_eq!(json["memories"], json["memories_total"]);
263        assert_eq!(json["entities"], json["entities_total"]);
264        assert_eq!(json["relationships"], json["relationships_total"]);
265        assert_eq!(json["relationships"], json["edges"]);
266        assert_eq!(json["db_size_bytes"], json["db_bytes"]);
267    }
268}