Skip to main content

sqlite_graphrag/commands/
debug_schema.rs

1//! Handler for the `debug-schema` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_ro;
7use serde::Serialize;
8use std::time::Instant;
9
10#[derive(clap::Args)]
11#[command(after_long_help = "EXAMPLES:\n  \
12    # Dump the SQLite schema (tables, indices, triggers) as JSON\n  \
13    sqlite-graphrag __debug_schema\n\n  \
14    # Dump schema of a database at a custom path\n  \
15    sqlite-graphrag __debug_schema --db /path/to/graphrag.sqlite\n\n  \
16    # Use SQLITE_GRAPHRAG_DB_PATH env var\n  \
17    SQLITE_GRAPHRAG_DB_PATH=/data/graphrag.sqlite sqlite-graphrag __debug_schema")]
18pub struct DebugSchemaArgs {
19    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
20    pub json: bool,
21    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
22    pub db: Option<String>,
23}
24
25#[derive(Serialize)]
26struct SchemaObject {
27    name: String,
28    #[serde(rename = "type")]
29    object_type: String,
30}
31
32#[derive(Serialize)]
33struct MigrationRecord {
34    version: i64,
35    name: String,
36    applied_on: String,
37}
38
39#[derive(Serialize)]
40struct DebugSchemaResponse {
41    /// Internal SQLite counter incremented on each DDL (PRAGMA schema_version).
42    /// Distinct from `user_version`: this one is managed automatically by SQLite.
43    schema_version: i64,
44    /// Canonical SCHEMA_USER_VERSION value set explicitly by migrations
45    /// (PRAGMA user_version). Distinct from `schema_version` (SQLite DDL counter)
46    /// and from `health.schema_version` (MAX version in refinery_schema_history).
47    user_version: i64,
48    objects: Vec<SchemaObject>,
49    migrations: Vec<MigrationRecord>,
50    elapsed_ms: u64,
51}
52
53pub fn run(args: DebugSchemaArgs) -> Result<(), AppError> {
54    let inicio = Instant::now();
55    let paths = AppPaths::resolve(args.db.as_deref())?;
56
57    crate::storage::connection::ensure_db_ready(&paths)?;
58
59    let conn = open_ro(&paths.db)?;
60
61    let schema_version: i64 = conn
62        .query_row("PRAGMA schema_version", [], |r| r.get(0))
63        .unwrap_or(0);
64
65    // PRAGMA user_version is set explicitly after migrations (canonical value SCHEMA_USER_VERSION).
66    let user_version: i64 = conn
67        .query_row("PRAGMA user_version", [], |r| r.get(0))
68        .unwrap_or(0);
69
70    let mut stmt = conn.prepare(
71        "SELECT name, type FROM sqlite_master \
72         WHERE type IN ('table','view','trigger','index') \
73         ORDER BY type, name",
74    )?;
75    let objects: Vec<SchemaObject> = stmt
76        .query_map([], |r| {
77            Ok(SchemaObject {
78                name: r.get(0)?,
79                object_type: r.get(1)?,
80            })
81        })?
82        .collect::<Result<Vec<_>, _>>()?;
83
84    let existe_hist: i64 = conn
85        .query_row(
86            "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
87            [],
88            |r| r.get(0),
89        )
90        .unwrap_or(0);
91
92    let migrations: Vec<MigrationRecord> = if existe_hist > 0 {
93        let mut stmt_mig = conn.prepare(
94            "SELECT version, name, applied_on \
95             FROM refinery_schema_history \
96             ORDER BY version",
97        )?;
98        let rows: Vec<MigrationRecord> = stmt_mig
99            .query_map([], |r| {
100                Ok(MigrationRecord {
101                    version: r.get(0)?,
102                    name: r.get(1)?,
103                    applied_on: r.get(2)?,
104                })
105            })?
106            .collect::<Result<Vec<_>, _>>()?;
107        rows
108    } else {
109        Vec::new()
110    };
111
112    let elapsed_ms = inicio.elapsed().as_millis() as u64;
113
114    output::emit_json(&DebugSchemaResponse {
115        schema_version,
116        user_version,
117        objects,
118        migrations,
119        elapsed_ms,
120    })?;
121
122    Ok(())
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128    use serde_json::Value;
129
130    #[test]
131    fn debug_schema_response_serializa_campos_obrigatorios() {
132        let resp = DebugSchemaResponse {
133            schema_version: 42,
134            user_version: 49,
135            objects: vec![SchemaObject {
136                name: "memories".to_string(),
137                object_type: "table".to_string(),
138            }],
139            migrations: vec![MigrationRecord {
140                version: 1,
141                name: "V001__init".to_string(),
142                applied_on: "2026-01-01T00:00:00Z".to_string(),
143            }],
144            elapsed_ms: 7,
145        };
146        let json: Value = serde_json::to_value(&resp).unwrap();
147        assert_eq!(json["schema_version"], 42);
148        assert_eq!(json["user_version"], 49);
149        assert!(json["objects"].is_array());
150        assert_eq!(json["objects"][0]["name"], "memories");
151        assert_eq!(json["objects"][0]["type"], "table");
152        assert!(json["migrations"].is_array());
153        assert_eq!(json["migrations"][0]["version"], 1);
154        assert_eq!(json["elapsed_ms"], 7);
155    }
156
157    #[test]
158    fn schema_object_renomeia_campo_type() {
159        let obj = SchemaObject {
160            name: "entities".to_string(),
161            object_type: "table".to_string(),
162        };
163        let json: Value = serde_json::to_value(&obj).unwrap();
164        assert!(json.get("object_type").is_none());
165        assert_eq!(json["type"], "table");
166    }
167
168    #[test]
169    fn migration_record_serializa_todos_campos() {
170        let rec = MigrationRecord {
171            version: 3,
172            name: "V003__indexes".to_string(),
173            applied_on: "2026-04-19T12:00:00Z".to_string(),
174        };
175        let json: Value = serde_json::to_value(&rec).unwrap();
176        assert_eq!(json["version"], 3);
177        assert_eq!(json["name"], "V003__indexes");
178        assert_eq!(json["applied_on"], "2026-04-19T12:00:00Z");
179    }
180}