Skip to main content

faucet_transform_sql/
config.rs

1//! Config types for the SQL transform. No I/O or DuckDB here.
2
3use schemars::JsonSchema;
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6
7/// Configuration for the `sql` transform.
8#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
9pub struct SqlTransformConfig {
10    /// The SQL statement. The page's records are the relation `batch`. Must
11    /// produce a result set; each result row becomes one output record.
12    pub query: String,
13    /// Reference relations loaded once at compile time and joinable by name.
14    #[serde(default, skip_serializing_if = "Vec::is_empty")]
15    pub relations: Vec<RelationSpec>,
16    /// Optional DuckDB `memory_limit` pragma (e.g. "1GB"). Default: DuckDB's own.
17    #[serde(default, skip_serializing_if = "Option::is_none")]
18    pub memory_limit: Option<String>,
19    /// Optional DuckDB `threads` pragma. Default: DuckDB's own. Set to 1–2 for
20    /// high-fan-out matrices to avoid CPU over-subscription across rows.
21    #[serde(default, skip_serializing_if = "Option::is_none")]
22    pub threads: Option<usize>,
23}
24
25/// A reference relation registered before the first page.
26#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
27pub struct RelationSpec {
28    /// Relation name as referenced in the query. Must be a safe SQL identifier
29    /// and must not be `batch` (reserved for the page).
30    pub name: String,
31    /// Where the relation's data comes from.
32    pub source: RelationSource,
33    /// Re-stat the file's mtime before each page; rebuild + atomic swap if it
34    /// changed. Default false. Ignored for `values`.
35    #[serde(default)]
36    pub reload_on_change: bool,
37}
38
39// serde `default = "..."` needs a function, not a literal.
40fn default_true() -> bool {
41    true
42}
43
44/// The data source for a reference relation.
45#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
46#[serde(tag = "type", rename_all = "snake_case")]
47pub enum RelationSource {
48    /// Delimited file loaded via DuckDB `read_csv_auto`.
49    Csv {
50        /// Filesystem path to the CSV file (absolute, or relative to the working directory).
51        path: String,
52        /// Whether the first row is a header row. Default: `true`.
53        #[serde(default = "default_true")]
54        has_header: bool,
55    },
56    /// Newline-delimited JSON loaded via DuckDB `read_json_auto`.
57    Jsonl {
58        /// Filesystem path to the JSONL file (absolute, or relative to the working directory).
59        path: String,
60    },
61    /// Inline rows materialized into a table.
62    Values {
63        /// Column names, in declaration order.
64        columns: Vec<String>,
65        /// Rows of cell values; each inner row must have the same length as `columns`.
66        rows: Vec<Vec<Value>>,
67    },
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn config_round_trips_and_schema_builds() {
76        let cfg: SqlTransformConfig = serde_json::from_value(serde_json::json!({
77            "query": "SELECT * FROM batch",
78            "relations": [
79                {"name": "countries",
80                 "source": {"type": "csv", "path": "c.csv", "has_header": true}}
81            ]
82        }))
83        .unwrap();
84        assert_eq!(cfg.relations.len(), 1);
85        assert!(matches!(
86            cfg.relations[0].source,
87            RelationSource::Csv { .. }
88        ));
89        // schema_for! must succeed (used by `faucet schema transform sql`).
90        let schema = schemars::schema_for!(SqlTransformConfig);
91        let json = serde_json::to_value(&schema).unwrap();
92        assert!(
93            json.get("properties")
94                .and_then(|p| p.get("query"))
95                .is_some()
96        );
97    }
98}