faucet_transform_sql/config.rs
1//! Config types for the SQL transform. No I/O or DuckDB here.
2
3use schemars::JsonSchema;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6
7/// Configuration for the `sql` transform.
8#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
9pub struct SqlTransformConfig {
10 /// The SQL statement. The page's records are the relation `batch`. Must
11 /// produce a result set; each result row becomes one output record.
12 pub query: String,
13 /// Reference relations loaded once at compile time and joinable by name.
14 #[serde(default, skip_serializing_if = "Vec::is_empty")]
15 pub relations: Vec<RelationSpec>,
16 /// Optional DuckDB `memory_limit` pragma (e.g. "1GB"). Default: DuckDB's own.
17 #[serde(default, skip_serializing_if = "Option::is_none")]
18 pub memory_limit: Option<String>,
19 /// Optional DuckDB `threads` pragma. Default: DuckDB's own. Set to 1–2 for
20 /// high-fan-out matrices to avoid CPU over-subscription across rows.
21 #[serde(default, skip_serializing_if = "Option::is_none")]
22 pub threads: Option<usize>,
23}
24
25/// A reference relation registered before the first page.
26#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
27pub struct RelationSpec {
28 /// Relation name as referenced in the query. Must be a safe SQL identifier
29 /// and must not be `batch` (reserved for the page).
30 pub name: String,
31 /// Where the relation's data comes from.
32 pub source: RelationSource,
33 /// Re-stat the file's mtime before each page; rebuild + atomic swap if it
34 /// changed. Default false. Ignored for `values`.
35 #[serde(default)]
36 pub reload_on_change: bool,
37}
38
39// serde `default = "..."` needs a function, not a literal.
40fn default_true() -> bool {
41 true
42}
43
44/// The data source for a reference relation.
45#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
46#[serde(tag = "type", rename_all = "snake_case")]
47pub enum RelationSource {
48 /// Delimited file loaded via DuckDB `read_csv_auto`.
49 Csv {
50 /// Filesystem path to the CSV file (absolute, or relative to the working directory).
51 path: String,
52 /// Whether the first row is a header row. Default: `true`.
53 #[serde(default = "default_true")]
54 has_header: bool,
55 },
56 /// Newline-delimited JSON loaded via DuckDB `read_json_auto`.
57 Jsonl {
58 /// Filesystem path to the JSONL file (absolute, or relative to the working directory).
59 path: String,
60 },
61 /// Inline rows materialized into a table.
62 Values {
63 /// Column names, in declaration order.
64 columns: Vec<String>,
65 /// Rows of cell values; each inner row must have the same length as `columns`.
66 rows: Vec<Vec<Value>>,
67 },
68}
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73
74 #[test]
75 fn config_round_trips_and_schema_builds() {
76 let cfg: SqlTransformConfig = serde_json::from_value(serde_json::json!({
77 "query": "SELECT * FROM batch",
78 "relations": [
79 {"name": "countries",
80 "source": {"type": "csv", "path": "c.csv", "has_header": true}}
81 ]
82 }))
83 .unwrap();
84 assert_eq!(cfg.relations.len(), 1);
85 assert!(matches!(
86 cfg.relations[0].source,
87 RelationSource::Csv { .. }
88 ));
89 // schema_for! must succeed (used by `faucet schema transform sql`).
90 let schema = schemars::schema_for!(SqlTransformConfig);
91 let json = serde_json::to_value(&schema).unwrap();
92 assert!(
93 json.get("properties")
94 .and_then(|p| p.get("query"))
95 .is_some()
96 );
97 }
98}