Skip to main content

sqlrite_ask/
schema.rs

1//! Schema introspection — turn an open `Connection` into a textual
2//! schema dump the LLM can ground its SQL generation on.
3//!
4//! ## Why we don't `SELECT FROM sqlrite_master`
5//!
6//! The `sqlrite_master` catalog persists the original `CREATE TABLE`
7//! SQL, so reflecting via SQL would work. But we already have the
8//! same information typed in `Database.tables` (declared columns,
9//! primary key, NOT NULL / UNIQUE, vector dimensions, …) — going
10//! through SQL would mean parsing the round-tripped CREATE statement
11//! a second time. Walking the typed structure is cheaper and matches
12//! whatever the engine considers authoritative *right now* (a
13//! relevant distinction inside an open transaction, where the
14//! catalog's persisted state may already be stale).
15//!
16//! ## Determinism (matters for prompt caching)
17//!
18//! Tables are dumped in alphabetical order, columns in declaration
19//! order. The output is byte-stable for a fixed schema — that's
20//! what lets `cache_control: ephemeral` actually hit on repeat
21//! calls. Any change to the dump format (adding a clause, changing
22//! whitespace) will invalidate the cache once for everyone, but
23//! steady-state hits are cheap.
24
25use std::fmt::Write;
26
27use sqlrite::Connection;
28use sqlrite::sql::db::database::Database;
29use sqlrite::sql::db::table::{DataType, Table};
30
31/// Render the schema of every user-visible table as a sequence of
32/// `CREATE TABLE … (…);` statements, sorted alphabetically by name.
33///
34/// The internal `sqlrite_master` catalog is filtered out — the LLM
35/// doesn't need to know about it and including it would confuse the
36/// generation.
37pub fn dump_schema(conn: &Connection) -> String {
38    dump_database(conn.database())
39}
40
41/// Same as [`dump_schema`], but takes a `Database` reference directly.
42/// Useful for callers that already hold a `Database` (e.g., advanced
43/// embedders bypassing the public `Connection` API).
44pub fn dump_database(db: &Database) -> String {
45    let mut names: Vec<&str> = db
46        .tables
47        .keys()
48        .filter(|k| k.as_str() != "sqlrite_master")
49        .map(String::as_str)
50        .collect();
51    names.sort_unstable();
52
53    let mut out = String::new();
54    for (i, name) in names.iter().enumerate() {
55        if i > 0 {
56            out.push('\n');
57        }
58        let table = match db.tables.get(*name) {
59            Some(t) => t,
60            None => continue,
61        };
62        format_create_table(table, &mut out);
63    }
64    out
65}
66
67/// Format one `Table` as a single `CREATE TABLE` statement.
68///
69/// We don't emit the secondary-index DDL here — indexes are an
70/// implementation detail of `WHERE col = literal` performance, not
71/// something the LLM needs to reason about when choosing what columns
72/// to project or filter on. (HNSW indexes are different — they affect
73/// what queries are even *possible* efficiently. Phase 7g.x follow-up
74/// will add an `[indexed: hnsw, M=16, ef=200]` annotation on indexed
75/// vector columns once the prompt budget can absorb it.)
76fn format_create_table(table: &Table, out: &mut String) {
77    let _ = writeln!(out, "CREATE TABLE {} (", table.tb_name);
78    for (i, col) in table.columns.iter().enumerate() {
79        let datatype = render_datatype(&col.datatype);
80        let mut clauses: Vec<&'static str> = Vec::new();
81        if col.is_pk {
82            clauses.push("PRIMARY KEY");
83        }
84        if col.is_unique && !col.is_pk {
85            // PRIMARY KEY already implies UNIQUE; SQLite's reflection
86            // never double-prints it and neither do we.
87            clauses.push("UNIQUE");
88        }
89        if col.not_null && !col.is_pk {
90            clauses.push("NOT NULL");
91        }
92
93        let trailing = if i + 1 == table.columns.len() {
94            ""
95        } else {
96            ","
97        };
98        if clauses.is_empty() {
99            let _ = writeln!(out, "  {} {}{}", col.column_name, datatype, trailing);
100        } else {
101            let _ = writeln!(
102                out,
103                "  {} {} {}{}",
104                col.column_name,
105                datatype,
106                clauses.join(" "),
107                trailing
108            );
109        }
110    }
111    out.push_str(");\n");
112}
113
114fn render_datatype(dt: &DataType) -> String {
115    // Match the canonical SQL the parser accepts on the way in (so
116    // round-trip is `dump → CREATE TABLE` → identical schema). The
117    // engine's internal `Display` impl is debug-leaning ("Boolean",
118    // "Vector(384)") — different enough from the on-the-wire form
119    // that we render explicitly here.
120    match dt {
121        DataType::Integer => "INTEGER".to_string(),
122        DataType::Text => "TEXT".to_string(),
123        DataType::Real => "REAL".to_string(),
124        DataType::Bool => "BOOLEAN".to_string(),
125        DataType::Vector(dim) => format!("VECTOR({dim})"),
126        DataType::Json => "JSON".to_string(),
127        DataType::None => "TEXT".to_string(),
128        // Invalid columns shouldn't reach a schema dump in practice
129        // (the parser rejects them at CREATE time), but if one slips
130        // through we render it as TEXT rather than panicking — the
131        // LLM will at worst suggest a TEXT-shaped query.
132        DataType::Invalid => "TEXT".to_string(),
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139    use sqlrite::Connection;
140
141    fn open() -> Connection {
142        Connection::open_in_memory().expect("open in-memory db")
143    }
144
145    #[test]
146    fn empty_schema_returns_empty_string() {
147        let conn = open();
148        assert_eq!(dump_schema(&conn), "");
149    }
150
151    #[test]
152    fn single_table_round_trips() {
153        let mut conn = open();
154        conn.execute(
155            "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, email TEXT UNIQUE)",
156        )
157        .unwrap();
158        let dump = dump_schema(&conn);
159        assert!(dump.contains("CREATE TABLE users ("), "got: {dump}");
160        assert!(dump.contains("id INTEGER PRIMARY KEY"));
161        assert!(dump.contains("name TEXT NOT NULL"));
162        assert!(dump.contains("email TEXT UNIQUE"));
163    }
164
165    #[test]
166    fn vector_and_json_columns_render_with_canonical_keywords() {
167        let mut conn = open();
168        conn.execute(
169            "CREATE TABLE docs (id INTEGER PRIMARY KEY, embedding VECTOR(384), payload JSON)",
170        )
171        .unwrap();
172        let dump = dump_schema(&conn);
173        assert!(dump.contains("embedding VECTOR(384)"), "got: {dump}");
174        assert!(dump.contains("payload JSON"), "got: {dump}");
175    }
176
177    #[test]
178    fn tables_emitted_in_alphabetical_order() {
179        let mut conn = open();
180        conn.execute("CREATE TABLE zebra (id INTEGER PRIMARY KEY)")
181            .unwrap();
182        conn.execute("CREATE TABLE alpha (id INTEGER PRIMARY KEY)")
183            .unwrap();
184        conn.execute("CREATE TABLE mango (id INTEGER PRIMARY KEY)")
185            .unwrap();
186        let dump = dump_schema(&conn);
187        let alpha = dump.find("CREATE TABLE alpha").unwrap();
188        let mango = dump.find("CREATE TABLE mango").unwrap();
189        let zebra = dump.find("CREATE TABLE zebra").unwrap();
190        assert!(
191            alpha < mango && mango < zebra,
192            "non-deterministic order: {dump}"
193        );
194    }
195
196    #[test]
197    fn sqlrite_master_is_excluded() {
198        let mut conn = open();
199        conn.execute("CREATE TABLE users (id INTEGER PRIMARY KEY)")
200            .unwrap();
201        let dump = dump_schema(&conn);
202        assert!(
203            !dump.contains("sqlrite_master"),
204            "internal catalog leaked: {dump}"
205        );
206    }
207
208    #[test]
209    fn dump_is_byte_stable_across_calls() {
210        // The whole point of putting the schema dump behind a
211        // `cache_control: ephemeral` breakpoint is that this is true.
212        // If sort order ever becomes non-deterministic (e.g. by walking
213        // the HashMap directly), prompt caching silently degrades.
214        let mut conn = open();
215        conn.execute("CREATE TABLE a (id INTEGER PRIMARY KEY, x TEXT)")
216            .unwrap();
217        conn.execute("CREATE TABLE b (id INTEGER PRIMARY KEY, y REAL)")
218            .unwrap();
219        conn.execute("CREATE TABLE c (id INTEGER PRIMARY KEY, z BOOLEAN)")
220            .unwrap();
221        let first = dump_schema(&conn);
222        for _ in 0..20 {
223            assert_eq!(dump_schema(&conn), first);
224        }
225    }
226}