Skip to main content

sqlrite/ask/
schema.rs

1//! Schema introspection — turn an open `Connection` (or raw
2//! `Database`) into the textual schema dump the LLM uses to ground
3//! its SQL generation.
4//!
5//! ## Why we don't `SELECT FROM sqlrite_master`
6//!
7//! The `sqlrite_master` catalog persists the original `CREATE TABLE`
8//! SQL, so reflecting via SQL would work. But we already have the
9//! same information typed in `Database.tables` — going through SQL
10//! would mean parsing the round-tripped CREATE statement a second
11//! time. Walking the typed structure is cheaper and matches what
12//! the engine considers authoritative *right now* (relevant inside
13//! an open transaction, where the persisted catalog may already be
14//! stale relative to the in-memory snapshot).
15//!
16//! ## Determinism (matters for prompt caching)
17//!
18//! Tables are dumped in alphabetical order, columns in declaration
19//! order. The output is byte-stable for a fixed schema — that's
20//! what lets `cache_control: ephemeral` actually hit on repeat
21//! calls. Any change to the format invalidates the cache once for
22//! everyone, but steady-state hits are cheap.
23
24use std::fmt::Write;
25
26use crate::Connection;
27use crate::sql::db::database::Database;
28use crate::sql::db::table::{DataType, Table};
29
30/// Render the schema of every user-visible table on `conn` as a
31/// sequence of `CREATE TABLE … (…);` statements, sorted
32/// alphabetically by table name.
33pub fn dump_schema_for_connection(conn: &Connection) -> String {
34    dump_schema_for_database(conn.database())
35}
36
37/// Same as [`dump_schema_for_connection`], but takes a `Database`
38/// reference directly. Used by the REPL's `.ask` meta-command
39/// (which holds `&Database`, not `&Connection`).
40pub fn dump_schema_for_database(db: &Database) -> String {
41    let mut names: Vec<&str> = db
42        .tables
43        .keys()
44        .filter(|k| k.as_str() != "sqlrite_master")
45        .map(String::as_str)
46        .collect();
47    names.sort_unstable();
48
49    let mut out = String::new();
50    for (i, name) in names.iter().enumerate() {
51        if i > 0 {
52            out.push('\n');
53        }
54        let table = match db.tables.get(*name) {
55            Some(t) => t,
56            None => continue,
57        };
58        format_create_table(table, &mut out);
59    }
60    out
61}
62
63/// Format one `Table` as a single `CREATE TABLE` statement.
64///
65/// Secondary indexes are deliberately not included — they're a
66/// `WHERE col = literal` performance concern, not something the LLM
67/// needs to reason about when choosing what columns to project or
68/// filter on. (HNSW indexes affect what queries are even *possible*
69/// efficiently. A future follow-up can add an `[indexed: hnsw,
70/// M=16, ef=200]` annotation on indexed vector columns once the
71/// prompt budget can absorb it.)
72fn format_create_table(table: &Table, out: &mut String) {
73    let _ = writeln!(out, "CREATE TABLE {} (", table.tb_name);
74    for (i, col) in table.columns.iter().enumerate() {
75        let datatype = render_datatype(&col.datatype);
76        let mut clauses: Vec<&'static str> = Vec::new();
77        if col.is_pk {
78            clauses.push("PRIMARY KEY");
79        }
80        if col.is_unique && !col.is_pk {
81            // PRIMARY KEY already implies UNIQUE; SQLite's reflection
82            // doesn't double-print it and neither do we.
83            clauses.push("UNIQUE");
84        }
85        if col.not_null && !col.is_pk {
86            clauses.push("NOT NULL");
87        }
88
89        let trailing = if i + 1 == table.columns.len() {
90            ""
91        } else {
92            ","
93        };
94        if clauses.is_empty() {
95            let _ = writeln!(out, "  {} {}{}", col.column_name, datatype, trailing);
96        } else {
97            let _ = writeln!(
98                out,
99                "  {} {} {}{}",
100                col.column_name,
101                datatype,
102                clauses.join(" "),
103                trailing
104            );
105        }
106    }
107    out.push_str(");\n");
108}
109
110fn render_datatype(dt: &DataType) -> String {
111    // Match the canonical SQL the parser accepts on the way in (so
112    // round-trip is `dump → CREATE TABLE` → identical schema). The
113    // engine's internal `Display` impl is debug-leaning ("Boolean",
114    // "Vector(384)") — different enough from the on-the-wire form
115    // that we render explicitly here.
116    match dt {
117        DataType::Integer => "INTEGER".to_string(),
118        DataType::Text => "TEXT".to_string(),
119        DataType::Real => "REAL".to_string(),
120        DataType::Bool => "BOOLEAN".to_string(),
121        DataType::Vector(dim) => format!("VECTOR({dim})"),
122        DataType::Json => "JSON".to_string(),
123        DataType::None => "TEXT".to_string(),
124        // Invalid columns shouldn't reach a schema dump in practice
125        // (the parser rejects them at CREATE time), but if one slips
126        // through we render it as TEXT rather than panicking — the
127        // LLM will at worst suggest a TEXT-shaped query.
128        DataType::Invalid => "TEXT".to_string(),
129    }
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135    use crate::Connection;
136
137    fn open() -> Connection {
138        Connection::open_in_memory().expect("open in-memory db")
139    }
140
141    #[test]
142    fn empty_schema_returns_empty_string() {
143        let conn = open();
144        assert_eq!(dump_schema_for_connection(&conn), "");
145    }
146
147    #[test]
148    fn single_table_round_trips() {
149        let mut conn = open();
150        conn.execute(
151            "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT UNIQUE)",
152        )
153        .unwrap();
154        let dump = dump_schema_for_connection(&conn);
155        assert!(dump.contains("CREATE TABLE users ("), "got: {dump}");
156        assert!(dump.contains("id INTEGER PRIMARY KEY"));
157        assert!(dump.contains("name TEXT NOT NULL"));
158        assert!(dump.contains("email TEXT UNIQUE"));
159    }
160
161    #[test]
162    fn vector_and_json_columns_render_with_canonical_keywords() {
163        let mut conn = open();
164        conn.execute(
165            "CREATE TABLE docs (id INTEGER PRIMARY KEY, embedding VECTOR(384), payload JSON)",
166        )
167        .unwrap();
168        let dump = dump_schema_for_connection(&conn);
169        assert!(dump.contains("embedding VECTOR(384)"), "got: {dump}");
170        assert!(dump.contains("payload JSON"), "got: {dump}");
171    }
172
173    #[test]
174    fn tables_emitted_in_alphabetical_order() {
175        let mut conn = open();
176        conn.execute("CREATE TABLE zebra (id INTEGER PRIMARY KEY)")
177            .unwrap();
178        conn.execute("CREATE TABLE alpha (id INTEGER PRIMARY KEY)")
179            .unwrap();
180        conn.execute("CREATE TABLE mango (id INTEGER PRIMARY KEY)")
181            .unwrap();
182        let dump = dump_schema_for_connection(&conn);
183        let alpha = dump.find("CREATE TABLE alpha").unwrap();
184        let mango = dump.find("CREATE TABLE mango").unwrap();
185        let zebra = dump.find("CREATE TABLE zebra").unwrap();
186        assert!(
187            alpha < mango && mango < zebra,
188            "non-deterministic order: {dump}"
189        );
190    }
191
192    #[test]
193    fn sqlrite_master_is_excluded() {
194        let mut conn = open();
195        conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY)")
196            .unwrap();
197        let dump = dump_schema_for_connection(&conn);
198        assert!(
199            !dump.contains("sqlrite_master"),
200            "internal catalog leaked: {dump}"
201        );
202    }
203
204    #[test]
205    fn dump_is_byte_stable_across_calls() {
206        // The whole point of putting the schema dump behind a
207        // `cache_control: ephemeral` breakpoint is that this is
208        // true. If sort order ever becomes non-deterministic
209        // (e.g. by walking the HashMap directly), prompt caching
210        // silently degrades.
211        let mut conn = open();
212        conn.execute("CREATE TABLE a (id INTEGER PRIMARY KEY, x TEXT)")
213            .unwrap();
214        conn.execute("CREATE TABLE b (id INTEGER PRIMARY KEY, y REAL)")
215            .unwrap();
216        conn.execute("CREATE TABLE c (id INTEGER PRIMARY KEY, z BOOLEAN)")
217            .unwrap();
218        let first = dump_schema_for_connection(&conn);
219        for _ in 0..20 {
220            assert_eq!(dump_schema_for_connection(&conn), first);
221        }
222    }
223}