Skip to main content

sqlrite/ask/
schema.rs

1//! Schema introspection — turn an open `Connection` (or raw
2//! `Database`) into the textual schema dump the LLM uses to ground
3//! its SQL generation.
4//!
5//! ## Why we don't `SELECT FROM sqlrite_master`
6//!
7//! The `sqlrite_master` catalog persists the original `CREATE TABLE`
8//! SQL, so reflecting via SQL would work. But we already have the
9//! same information typed in `Database.tables` — going through SQL
10//! would mean parsing the round-tripped CREATE statement a second
11//! time. Walking the typed structure is cheaper and matches what
12//! the engine considers authoritative *right now* (relevant inside
13//! an open transaction, where the persisted catalog may already be
14//! stale relative to the in-memory snapshot).
15//!
16//! ## Determinism (matters for prompt caching)
17//!
18//! Tables are dumped in alphabetical order, columns in declaration
19//! order. The output is byte-stable for a fixed schema — that's
20//! what lets `cache_control: ephemeral` actually hit on repeat
21//! calls. Any change to the format invalidates the cache once for
22//! everyone, but steady-state hits are cheap.
23
24use std::fmt::Write;
25
26use crate::Connection;
27use crate::sql::db::database::Database;
28use crate::sql::db::table::{DataType, Table};
29
30/// Render the schema of every user-visible table on `conn` as a
31/// sequence of `CREATE TABLE … (…);` statements, sorted
32/// alphabetically by table name.
33pub fn dump_schema_for_connection(conn: &Connection) -> String {
34    let db = conn.database();
35    dump_schema_for_database(&db)
36}
37
38/// Same as [`dump_schema_for_connection`], but takes a `Database`
39/// reference directly. Used by the REPL's `.ask` meta-command
40/// (which holds `&Database`, not `&Connection`).
41pub fn dump_schema_for_database(db: &Database) -> String {
42    let mut names: Vec<&str> = db
43        .tables
44        .keys()
45        .filter(|k| k.as_str() != "sqlrite_master")
46        .map(String::as_str)
47        .collect();
48    names.sort_unstable();
49
50    let mut out = String::new();
51    for (i, name) in names.iter().enumerate() {
52        if i > 0 {
53            out.push('\n');
54        }
55        let table = match db.tables.get(*name) {
56            Some(t) => t,
57            None => continue,
58        };
59        format_create_table(table, &mut out);
60    }
61    out
62}
63
64/// Format one `Table` as a single `CREATE TABLE` statement.
65///
66/// Secondary indexes are deliberately not included — they're a
67/// `WHERE col = literal` performance concern, not something the LLM
68/// needs to reason about when choosing what columns to project or
69/// filter on. (HNSW indexes affect what queries are even *possible*
70/// efficiently. A future follow-up can add an `[indexed: hnsw,
71/// M=16, ef=200]` annotation on indexed vector columns once the
72/// prompt budget can absorb it.)
73fn format_create_table(table: &Table, out: &mut String) {
74    let _ = writeln!(out, "CREATE TABLE {} (", table.tb_name);
75    for (i, col) in table.columns.iter().enumerate() {
76        let datatype = render_datatype(&col.datatype);
77        let mut clauses: Vec<&'static str> = Vec::new();
78        if col.is_pk {
79            clauses.push("PRIMARY KEY");
80        }
81        if col.is_unique && !col.is_pk {
82            // PRIMARY KEY already implies UNIQUE; SQLite's reflection
83            // doesn't double-print it and neither do we.
84            clauses.push("UNIQUE");
85        }
86        if col.not_null && !col.is_pk {
87            clauses.push("NOT NULL");
88        }
89
90        let trailing = if i + 1 == table.columns.len() {
91            ""
92        } else {
93            ","
94        };
95        if clauses.is_empty() {
96            let _ = writeln!(out, "  {} {}{}", col.column_name, datatype, trailing);
97        } else {
98            let _ = writeln!(
99                out,
100                "  {} {} {}{}",
101                col.column_name,
102                datatype,
103                clauses.join(" "),
104                trailing
105            );
106        }
107    }
108    out.push_str(");\n");
109}
110
111fn render_datatype(dt: &DataType) -> String {
112    // Match the canonical SQL the parser accepts on the way in (so
113    // round-trip is `dump → CREATE TABLE` → identical schema). The
114    // engine's internal `Display` impl is debug-leaning ("Boolean",
115    // "Vector(384)") — different enough from the on-the-wire form
116    // that we render explicitly here.
117    match dt {
118        DataType::Integer => "INTEGER".to_string(),
119        DataType::Text => "TEXT".to_string(),
120        DataType::Real => "REAL".to_string(),
121        DataType::Bool => "BOOLEAN".to_string(),
122        DataType::Vector(dim) => format!("VECTOR({dim})"),
123        DataType::Json => "JSON".to_string(),
124        DataType::None => "TEXT".to_string(),
125        // Invalid columns shouldn't reach a schema dump in practice
126        // (the parser rejects them at CREATE time), but if one slips
127        // through we render it as TEXT rather than panicking — the
128        // LLM will at worst suggest a TEXT-shaped query.
129        DataType::Invalid => "TEXT".to_string(),
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use crate::Connection;
137
138    fn open() -> Connection {
139        Connection::open_in_memory().expect("open in-memory db")
140    }
141
142    #[test]
143    fn empty_schema_returns_empty_string() {
144        let conn = open();
145        assert_eq!(dump_schema_for_connection(&conn), "");
146    }
147
148    #[test]
149    fn single_table_round_trips() {
150        let mut conn = open();
151        conn.execute(
152            "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT UNIQUE)",
153        )
154        .unwrap();
155        let dump = dump_schema_for_connection(&conn);
156        assert!(dump.contains("CREATE TABLE users ("), "got: {dump}");
157        assert!(dump.contains("id INTEGER PRIMARY KEY"));
158        assert!(dump.contains("name TEXT NOT NULL"));
159        assert!(dump.contains("email TEXT UNIQUE"));
160    }
161
162    #[test]
163    fn vector_and_json_columns_render_with_canonical_keywords() {
164        let mut conn = open();
165        conn.execute(
166            "CREATE TABLE docs (id INTEGER PRIMARY KEY, embedding VECTOR(384), payload JSON)",
167        )
168        .unwrap();
169        let dump = dump_schema_for_connection(&conn);
170        assert!(dump.contains("embedding VECTOR(384)"), "got: {dump}");
171        assert!(dump.contains("payload JSON"), "got: {dump}");
172    }
173
174    #[test]
175    fn tables_emitted_in_alphabetical_order() {
176        let mut conn = open();
177        conn.execute("CREATE TABLE zebra (id INTEGER PRIMARY KEY)")
178            .unwrap();
179        conn.execute("CREATE TABLE alpha (id INTEGER PRIMARY KEY)")
180            .unwrap();
181        conn.execute("CREATE TABLE mango (id INTEGER PRIMARY KEY)")
182            .unwrap();
183        let dump = dump_schema_for_connection(&conn);
184        let alpha = dump.find("CREATE TABLE alpha").unwrap();
185        let mango = dump.find("CREATE TABLE mango").unwrap();
186        let zebra = dump.find("CREATE TABLE zebra").unwrap();
187        assert!(
188            alpha < mango && mango < zebra,
189            "non-deterministic order: {dump}"
190        );
191    }
192
193    #[test]
194    fn sqlrite_master_is_excluded() {
195        let mut conn = open();
196        conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY)")
197            .unwrap();
198        let dump = dump_schema_for_connection(&conn);
199        assert!(
200            !dump.contains("sqlrite_master"),
201            "internal catalog leaked: {dump}"
202        );
203    }
204
205    #[test]
206    fn dump_is_byte_stable_across_calls() {
207        // The whole point of putting the schema dump behind a
208        // `cache_control: ephemeral` breakpoint is that this is
209        // true. If sort order ever becomes non-deterministic
210        // (e.g. by walking the HashMap directly), prompt caching
211        // silently degrades.
212        let mut conn = open();
213        conn.execute("CREATE TABLE a (id INTEGER PRIMARY KEY, x TEXT)")
214            .unwrap();
215        conn.execute("CREATE TABLE b (id INTEGER PRIMARY KEY, y REAL)")
216            .unwrap();
217        conn.execute("CREATE TABLE c (id INTEGER PRIMARY KEY, z BOOLEAN)")
218            .unwrap();
219        let first = dump_schema_for_connection(&conn);
220        for _ in 0..20 {
221            assert_eq!(dump_schema_for_connection(&conn), first);
222        }
223    }
224}