Skip to main content

sqlite_graphrag/commands/
stats.rs

1//! Handler for the `stats` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8
9#[derive(clap::Args)]
10#[command(after_long_help = "EXAMPLES:\n  \
11    # Show database statistics (memory counts, sizes, namespace breakdown)\n  \
12    sqlite-graphrag stats\n\n  \
13    # Stats for a database at a custom path\n  \
14    sqlite-graphrag stats --db /path/to/graphrag.sqlite\n\n  \
15    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
16    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag stats")]
17pub struct StatsArgs {
18    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
19    pub db: Option<String>,
20    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
21    #[arg(long, default_value_t = false)]
22    pub json: bool,
23    /// Output format: `json` or `text`. JSON is always emitted on stdout regardless of the value.
24    #[arg(long, value_parser = ["json", "text"], hide = true)]
25    pub format: Option<String>,
26}
27
28#[derive(Serialize)]
29struct StatsResponse {
30    memories: i64,
31    /// Alias de `memories` para contrato documentado em SKILL.md e AGENT_PROTOCOL.md.
32    memories_total: i64,
33    entities: i64,
34    /// Alias de `entities` para contrato documentado.
35    entities_total: i64,
36    relationships: i64,
37    /// Alias de `relationships` para contrato documentado.
38    relationships_total: i64,
39    /// Semantic alias of `relationships` per the contract in AGENT_PROTOCOL.md.
40    edges: i64,
41    /// Total indexed chunks (one row per chunk in `memory_chunks`).
42    chunks_total: i64,
43    /// Average length of the body field in active (non-deleted) memories.
44    avg_body_len: f64,
45    namespaces: Vec<String>,
46    db_size_bytes: u64,
47    /// Semantic alias of `db_size_bytes` for the documented contract.
48    db_bytes: u64,
49    /// Latest applied migration number from `refinery_schema_history`.
50    /// Emitted as a JSON number for cross-command consistency with `health` (since v1.0.35).
51    /// Returns `0` when the database has no recorded migrations yet.
52    schema_version: u32,
53    /// Total execution time in milliseconds from handler start to serialisation.
54    elapsed_ms: u64,
55}
56
57pub fn run(args: StatsArgs) -> Result<(), AppError> {
58    let start = std::time::Instant::now();
59    let _ = args.json; // --json is a no-op because output is already JSON by default
60    let _ = args.format; // --format is a no-op; JSON is always emitted on stdout
61    let paths = AppPaths::resolve(args.db.as_deref())?;
62
63    crate::storage::connection::ensure_db_ready(&paths)?;
64
65    let conn = open_ro(&paths.db)?;
66
67    let memories: i64 = conn.query_row(
68        "SELECT COUNT(*) FROM memories WHERE deleted_at IS NULL",
69        [],
70        |r| r.get(0),
71    )?;
72    let entities: i64 = conn.query_row("SELECT COUNT(*) FROM entities", [], |r| r.get(0))?;
73    let relationships: i64 =
74        conn.query_row("SELECT COUNT(*) FROM relationships", [], |r| r.get(0))?;
75
76    let mut stmt = conn.prepare(
77        "SELECT DISTINCT namespace FROM memories WHERE deleted_at IS NULL ORDER BY namespace",
78    )?;
79    let namespaces: Vec<String> = stmt
80        .query_map([], |r| r.get(0))?
81        .collect::<Result<Vec<_>, _>>()?;
82
83    let schema_version: u32 = conn
84        .query_row(
85            "SELECT MAX(version) FROM refinery_schema_history",
86            [],
87            |row| row.get::<_, Option<i64>>(0),
88        )
89        .ok()
90        .flatten()
91        .map(|v| v.max(0) as u32)
92        .unwrap_or(0);
93
94    let db_size_bytes = std::fs::metadata(&paths.db).map(|m| m.len()).unwrap_or(0);
95
96    // v1.0.21 P1-C: query uses the (correct) `memory_chunks` table.
97    // If the table does not exist (legacy pre-chunking DB), the error is "no such table"
98    // and the fallback returns 0. Other errors are logged via tracing for audit.
99    let chunks_total: i64 = match conn.query_row("SELECT COUNT(*) FROM memory_chunks", [], |r| {
100        r.get::<_, i64>(0)
101    }) {
102        Ok(n) => n,
103        Err(rusqlite::Error::SqliteFailure(_, Some(msg))) if msg.contains("no such table") => 0,
104        Err(e) => {
105            tracing::warn!("failed to count memory_chunks: {e}");
106            0
107        }
108    };
109
110    let avg_body_len: f64 = conn
111        .query_row(
112            "SELECT COALESCE(AVG(LENGTH(body)), 0.0) FROM memories WHERE deleted_at IS NULL",
113            [],
114            |r| r.get(0),
115        )
116        .unwrap_or(0.0);
117
118    output::emit_json(&StatsResponse {
119        memories,
120        memories_total: memories,
121        entities,
122        entities_total: entities,
123        relationships,
124        relationships_total: relationships,
125        edges: relationships,
126        chunks_total,
127        avg_body_len,
128        namespaces,
129        db_size_bytes,
130        db_bytes: db_size_bytes,
131        schema_version,
132        elapsed_ms: start.elapsed().as_millis() as u64,
133    })?;
134
135    Ok(())
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    #[test]
143    fn stats_response_serializes_all_fields() {
144        let resp = StatsResponse {
145            memories: 10,
146            memories_total: 10,
147            entities: 5,
148            entities_total: 5,
149            relationships: 3,
150            relationships_total: 3,
151            edges: 3,
152            chunks_total: 20,
153            avg_body_len: 42.5,
154            namespaces: vec!["global".to_string(), "project".to_string()],
155            db_size_bytes: 8192,
156            db_bytes: 8192,
157            schema_version: 6,
158            elapsed_ms: 7,
159        };
160        let json = serde_json::to_value(&resp).expect("serialization failed");
161        assert_eq!(json["memories"], 10);
162        assert_eq!(json["memories_total"], 10);
163        assert_eq!(json["entities"], 5);
164        assert_eq!(json["entities_total"], 5);
165        assert_eq!(json["relationships"], 3);
166        assert_eq!(json["relationships_total"], 3);
167        assert_eq!(json["edges"], 3);
168        assert_eq!(json["chunks_total"], 20);
169        assert_eq!(json["db_size_bytes"], 8192u64);
170        assert_eq!(json["db_bytes"], 8192u64);
171        assert_eq!(json["schema_version"], 6);
172        assert_eq!(json["elapsed_ms"], 7u64);
173    }
174
175    #[test]
176    fn stats_response_namespaces_is_string_array() {
177        let resp = StatsResponse {
178            memories: 0,
179            memories_total: 0,
180            entities: 0,
181            entities_total: 0,
182            relationships: 0,
183            relationships_total: 0,
184            edges: 0,
185            chunks_total: 0,
186            avg_body_len: 0.0,
187            namespaces: vec!["ns1".to_string(), "ns2".to_string(), "ns3".to_string()],
188            db_size_bytes: 0,
189            db_bytes: 0,
190            schema_version: 0,
191            elapsed_ms: 0,
192        };
193        let json = serde_json::to_value(&resp).expect("serialization failed");
194        let arr = json["namespaces"]
195            .as_array()
196            .expect("namespaces must be array");
197        assert_eq!(arr.len(), 3);
198        assert_eq!(arr[0], "ns1");
199        assert_eq!(arr[1], "ns2");
200        assert_eq!(arr[2], "ns3");
201    }
202
203    #[test]
204    fn stats_response_namespaces_empty_serializes_empty_array() {
205        let resp = StatsResponse {
206            memories: 0,
207            memories_total: 0,
208            entities: 0,
209            entities_total: 0,
210            relationships: 0,
211            relationships_total: 0,
212            edges: 0,
213            chunks_total: 0,
214            avg_body_len: 0.0,
215            namespaces: vec![],
216            db_size_bytes: 0,
217            db_bytes: 0,
218            schema_version: 0,
219            elapsed_ms: 0,
220        };
221        let json = serde_json::to_value(&resp).expect("serialization failed");
222        let arr = json["namespaces"]
223            .as_array()
224            .expect("namespaces must be array");
225        assert!(arr.is_empty(), "empty namespaces must serialize as []");
226    }
227
228    #[test]
229    fn stats_response_aliases_memories_total_and_memories_equal() {
230        let resp = StatsResponse {
231            memories: 42,
232            memories_total: 42,
233            entities: 7,
234            entities_total: 7,
235            relationships: 2,
236            relationships_total: 2,
237            edges: 2,
238            chunks_total: 0,
239            avg_body_len: 0.0,
240            namespaces: vec![],
241            db_size_bytes: 0,
242            db_bytes: 0,
243            schema_version: 6,
244            elapsed_ms: 0,
245        };
246        let json = serde_json::to_value(&resp).expect("serialization failed");
247        assert_eq!(json["memories"], json["memories_total"]);
248        assert_eq!(json["entities"], json["entities_total"]);
249        assert_eq!(json["relationships"], json["relationships_total"]);
250        assert_eq!(json["relationships"], json["edges"]);
251        assert_eq!(json["db_size_bytes"], json["db_bytes"]);
252    }
253}