use serde::Serialize;
pub const SYSTEM_RULES: &str = "\
You translate natural-language questions into SQL queries against a SQLRite database.
SQLRite is a small SQLite-compatible database. The dialect supported here is a strict subset of SQLite:
- SELECT with WHERE, ORDER BY (single sort key, can be an expression), LIMIT.
- INSERT, UPDATE, DELETE.
- CREATE TABLE, CREATE [UNIQUE] INDEX [IF NOT EXISTS] <name> ON <table> (<col>).
- BEGIN / COMMIT / ROLLBACK.
- Operators: = <> < <= > >= AND OR NOT + - * / % ||.
- Functions: vec_distance_l2(a, b), vec_distance_cosine(a, b), vec_distance_dot(a, b),
json_extract(json, path), json_type(json[, path]), json_array_length(json[, path]),
json_object_keys(json[, path]).
- Vector literals are bracket arrays: [0.1, 0.2, 0.3]. Vector columns are VECTOR(N).
- JSON columns store text; query with the json_* functions and a JSONPath subset
($, .key, [N], chained).
- Composite-column ORDER BY, JOIN, GROUP BY, aggregates, subqueries, CTEs, LIKE,
IN, IS NULL, BETWEEN, OFFSET, column aliases (AS), and DISTINCT are NOT supported
yet. If the user's question requires any of those, return SQL that's as close as
possible and explain the limitation in the explanation field.
You will see the database schema as a list of CREATE TABLE statements. Use only
those tables and columns; never invent columns that aren't in the schema.
Respond with a single JSON object on one line, no surrounding prose, no Markdown
code fences:
{\"sql\": \"<the SQL query, single statement, no trailing semicolon required>\", \
\"explanation\": \"<one short sentence on what the query does or why it can't be answered>\"}
If the question can't be answered with the available schema, set sql to an empty
string and explain in the explanation field.\n";
#[derive(Serialize, Debug)]
pub struct SystemBlock {
#[serde(rename = "type")]
pub kind: &'static str,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Serialize, Debug)]
pub struct CacheControl {
#[serde(rename = "type")]
pub kind: &'static str,
#[serde(skip_serializing_if = "Option::is_none")]
pub ttl: Option<&'static str>,
}
impl CacheControl {
pub fn ephemeral() -> Self {
Self {
kind: "ephemeral",
ttl: None,
}
}
pub fn ephemeral_1h() -> Self {
Self {
kind: "ephemeral",
ttl: Some("1h"),
}
}
}
#[derive(Serialize, Debug)]
pub struct UserMessage {
pub role: &'static str,
pub content: String,
}
impl UserMessage {
pub fn new(question: &str) -> Self {
Self {
role: "user",
content: question.to_string(),
}
}
}
pub fn build_system(schema_dump: &str, cache_schema: Option<CacheControl>) -> Vec<SystemBlock> {
vec![
SystemBlock {
kind: "text",
text: SYSTEM_RULES.to_string(),
cache_control: None,
},
SystemBlock {
kind: "text",
text: format!("<schema>\n{schema_dump}</schema>\n"),
cache_control: cache_schema,
},
]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cache_control_omitted_when_none() {
let block = SystemBlock {
kind: "text",
text: "hi".to_string(),
cache_control: None,
};
let json = serde_json::to_string(&block).unwrap();
assert!(!json.contains("cache_control"), "got: {json}");
}
#[test]
fn cache_control_emits_ephemeral_when_set() {
let block = SystemBlock {
kind: "text",
text: "hi".to_string(),
cache_control: Some(CacheControl::ephemeral()),
};
let json = serde_json::to_string(&block).unwrap();
assert!(json.contains("\"cache_control\""), "got: {json}");
assert!(json.contains("\"ephemeral\""));
assert!(!json.contains("\"ttl\""), "got: {json}");
}
#[test]
fn cache_control_1h_emits_ttl() {
let block = SystemBlock {
kind: "text",
text: "hi".to_string(),
cache_control: Some(CacheControl::ephemeral_1h()),
};
let json = serde_json::to_string(&block).unwrap();
assert!(json.contains("\"ttl\":\"1h\""), "got: {json}");
}
#[test]
fn build_system_places_cache_marker_only_on_schema_block() {
let blocks = build_system(
"CREATE TABLE x (id INTEGER);\n",
Some(CacheControl::ephemeral()),
);
assert_eq!(blocks.len(), 2);
assert!(
blocks[0].cache_control.is_none(),
"rules block must not be marked"
);
assert!(
blocks[1].cache_control.is_some(),
"schema block must be marked"
);
}
#[test]
fn schema_block_wraps_dump_in_xml_tags() {
let blocks = build_system("CREATE TABLE foo (id INT);\n", None);
let text = &blocks[1].text;
assert!(text.starts_with("<schema>\n"), "got: {text}");
assert!(text.ends_with("</schema>\n"), "got: {text}");
}
#[test]
fn user_message_roles_are_always_user() {
let m = UserMessage::new("how many users are over 30?");
let json = serde_json::to_string(&m).unwrap();
assert!(json.contains("\"role\":\"user\""));
assert!(json.contains("how many users are over 30?"));
}
}