1use serde::Serialize;
32
33pub const SYSTEM_RULES: &str = "\
39You translate natural-language questions into SQL queries against a SQLRite database.
40
41SQLRite is a small SQLite-compatible database. The dialect supported here is a strict subset of SQLite:
42
43- SELECT with WHERE, ORDER BY (single sort key, can be an expression), LIMIT.
44- INSERT, UPDATE, DELETE.
45- CREATE TABLE, CREATE [UNIQUE] INDEX [IF NOT EXISTS] <name> ON <table> (<col>).
46- BEGIN / COMMIT / ROLLBACK.
47- Operators: = <> < <= > >= AND OR NOT + - * / % ||.
48- Functions: vec_distance_l2(a, b), vec_distance_cosine(a, b), vec_distance_dot(a, b),
49 json_extract(json, path), json_type(json[, path]), json_array_length(json[, path]),
50 json_object_keys(json[, path]).
51- Vector literals are bracket arrays: [0.1, 0.2, 0.3]. Vector columns are VECTOR(N).
52- JSON columns store text; query with the json_* functions and a JSONPath subset
53 ($, .key, [N], chained).
54- Composite-column ORDER BY, JOIN, GROUP BY, aggregates, subqueries, CTEs, LIKE,
55 IN, IS NULL, BETWEEN, OFFSET, column aliases (AS), and DISTINCT are NOT supported
56 yet. If the user's question requires any of those, return SQL that's as close as
57 possible and explain the limitation in the explanation field.
58
59You will see the database schema as a list of CREATE TABLE statements. Use only
60those tables and columns; never invent columns that aren't in the schema.
61
62Respond with a single JSON object on one line, no surrounding prose, no Markdown
63code fences:
64
65 {\"sql\": \"<the SQL query, single statement, no trailing semicolon required>\", \
66\"explanation\": \"<one short sentence on what the query does or why it can't be answered>\"}
67
68If the question can't be answered with the available schema, set sql to an empty
69string and explain in the explanation field.\n";
70
71#[derive(Serialize, Debug)]
79pub struct SystemBlock {
80 #[serde(rename = "type")]
81 pub kind: &'static str,
82 pub text: String,
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub cache_control: Option<CacheControl>,
85}
86
87#[derive(Serialize, Debug)]
91pub struct CacheControl {
92 #[serde(rename = "type")]
93 pub kind: &'static str,
94 #[serde(skip_serializing_if = "Option::is_none")]
95 pub ttl: Option<&'static str>,
96}
97
98impl CacheControl {
99 pub fn ephemeral() -> Self {
100 Self {
101 kind: "ephemeral",
102 ttl: None,
103 }
104 }
105
106 pub fn ephemeral_1h() -> Self {
107 Self {
108 kind: "ephemeral",
109 ttl: Some("1h"),
110 }
111 }
112}
113
114#[derive(Serialize, Debug)]
117pub struct UserMessage {
118 pub role: &'static str,
119 pub content: String,
120}
121
122impl UserMessage {
123 pub fn new(question: &str) -> Self {
124 Self {
125 role: "user",
126 content: question.to_string(),
127 }
128 }
129}
130
131pub fn build_system(schema_dump: &str, cache_schema: Option<CacheControl>) -> Vec<SystemBlock> {
139 vec![
140 SystemBlock {
141 kind: "text",
142 text: SYSTEM_RULES.to_string(),
143 cache_control: None,
144 },
145 SystemBlock {
146 kind: "text",
147 text: format!("<schema>\n{schema_dump}</schema>\n"),
148 cache_control: cache_schema,
149 },
150 ]
151}
152
153#[cfg(test)]
154mod tests {
155 use super::*;
156
157 #[test]
158 fn cache_control_omitted_when_none() {
159 let block = SystemBlock {
160 kind: "text",
161 text: "hi".to_string(),
162 cache_control: None,
163 };
164 let json = serde_json::to_string(&block).unwrap();
165 assert!(!json.contains("cache_control"), "got: {json}");
166 }
167
168 #[test]
169 fn cache_control_emits_ephemeral_when_set() {
170 let block = SystemBlock {
171 kind: "text",
172 text: "hi".to_string(),
173 cache_control: Some(CacheControl::ephemeral()),
174 };
175 let json = serde_json::to_string(&block).unwrap();
176 assert!(json.contains("\"cache_control\""), "got: {json}");
177 assert!(json.contains("\"ephemeral\""));
178 assert!(!json.contains("\"ttl\""), "got: {json}");
180 }
181
182 #[test]
183 fn cache_control_1h_emits_ttl() {
184 let block = SystemBlock {
185 kind: "text",
186 text: "hi".to_string(),
187 cache_control: Some(CacheControl::ephemeral_1h()),
188 };
189 let json = serde_json::to_string(&block).unwrap();
190 assert!(json.contains("\"ttl\":\"1h\""), "got: {json}");
191 }
192
193 #[test]
194 fn build_system_places_cache_marker_only_on_schema_block() {
195 let blocks = build_system(
196 "CREATE TABLE x (id INTEGER);\n",
197 Some(CacheControl::ephemeral()),
198 );
199 assert_eq!(blocks.len(), 2);
200 assert!(
201 blocks[0].cache_control.is_none(),
202 "rules block must not be marked"
203 );
204 assert!(
205 blocks[1].cache_control.is_some(),
206 "schema block must be marked"
207 );
208 }
209
210 #[test]
211 fn schema_block_wraps_dump_in_xml_tags() {
212 let blocks = build_system("CREATE TABLE foo (id INT);\n", None);
218 let text = &blocks[1].text;
219 assert!(text.starts_with("<schema>\n"), "got: {text}");
220 assert!(text.ends_with("</schema>\n"), "got: {text}");
221 }
222
223 #[test]
224 fn user_message_roles_are_always_user() {
225 let m = UserMessage::new("how many users are over 30?");
226 let json = serde_json::to_string(&m).unwrap();
227 assert!(json.contains("\"role\":\"user\""));
228 assert!(json.contains("how many users are over 30?"));
229 }
230}