Skip to main content

datapress_core/
schema.rs

1//! Backend-agnostic schema model for a registered dataset.
2//!
3//! Both backends introspect their parquet source at startup and produce a
4//! [`DatasetSchema`]. Predicate validation, identifier quoting, and the
5//! `GET /api/datasets/{name}/schema` response all go through this type.
6
7use std::collections::HashMap;
8
9use serde::Serialize;
10
11use crate::errors::AppError;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
14#[serde(rename_all = "lowercase")]
15pub enum LogicalType {
16    Bool,
17    Int,
18    Float,
19    Utf8,
20    /// Timestamp / Date / Time — serialised as string, requires CAST when
21    /// projected from DuckDB.
22    Temporal,
23    /// Anything else (Decimal, Binary, List, Struct …) — round-tripped as
24    /// best the encoder can manage; predicates over these are rejected.
25    Other,
26}
27
28impl LogicalType {
29    /// True iff the type must be cast to VARCHAR when projected through
30    /// DuckDB's `json_object()` call.
31    pub fn needs_cast(self) -> bool {
32        matches!(self, LogicalType::Temporal)
33    }
34}
35
36#[derive(Debug, Clone, Serialize)]
37pub struct ColumnInfo {
38    pub name: String,
39    pub logical: LogicalType,
40    /// Original backend-specific type name (e.g. "TIMESTAMP", "VARCHAR",
41    /// "Float64") — included in the schema response for clients.
42    pub sql_type: String,
43    pub nullable: bool,
44}
45
46#[derive(Debug, Clone)]
47pub struct DatasetSchema {
48    pub name: String,
49    pub columns: Vec<ColumnInfo>,
50    /// lowercase name → index in `columns`.
51    pub by_name: HashMap<String, usize>,
52}
53
54impl DatasetSchema {
55    pub fn new(name: impl Into<String>, columns: Vec<ColumnInfo>) -> Self {
56        let by_name = columns
57            .iter()
58            .enumerate()
59            .map(|(i, c)| (c.name.to_lowercase(), i))
60            .collect();
61        Self {
62            name: name.into(),
63            columns,
64            by_name,
65        }
66    }
67
68    /// Case-insensitive lookup. Returns the canonical `ColumnInfo`.
69    pub fn find(&self, name: &str) -> Result<&ColumnInfo, AppError> {
70        self.by_name
71            .get(&name.to_lowercase())
72            .map(|&i| &self.columns[i])
73            .ok_or_else(|| AppError::UnknownColumn(name.into()))
74    }
75
76    /// Quoted identifier safe for WHERE / SELECT clauses.
77    /// Double-quotes embedded `"` per SQL spec.
78    pub fn quote_ident(name: &str) -> String {
79        format!("\"{}\"", name.replace('"', "\"\""))
80    }
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86
87    fn s() -> DatasetSchema {
88        DatasetSchema::new(
89            "ds",
90            vec![
91                ColumnInfo {
92                    name: "Id".into(),
93                    logical: LogicalType::Int,
94                    sql_type: "BIGINT".into(),
95                    nullable: false,
96                },
97                ColumnInfo {
98                    name: "When".into(),
99                    logical: LogicalType::Temporal,
100                    sql_type: "TIMESTAMP".into(),
101                    nullable: true,
102                },
103            ],
104        )
105    }
106
107    #[test]
108    fn quote_ident_plain() {
109        assert_eq!(DatasetSchema::quote_ident("foo"), "\"foo\"");
110    }
111
112    #[test]
113    fn quote_ident_escapes_inner_quote() {
114        assert_eq!(DatasetSchema::quote_ident("a\"b"), "\"a\"\"b\"");
115    }
116
117    #[test]
118    fn find_case_insensitive_returns_canonical_name() {
119        let sch = s();
120        let c = sch.find("ID").expect("found");
121        assert_eq!(c.name, "Id");
122    }
123
124    #[test]
125    fn find_unknown_column() {
126        let sch = s();
127        let err = sch.find("nope").unwrap_err();
128        assert!(matches!(err, AppError::UnknownColumn(_)));
129    }
130
131    #[test]
132    fn needs_cast_only_temporal() {
133        assert!(LogicalType::Temporal.needs_cast());
134        for t in [
135            LogicalType::Bool,
136            LogicalType::Int,
137            LogicalType::Float,
138            LogicalType::Utf8,
139            LogicalType::Other,
140        ] {
141            assert!(!t.needs_cast());
142        }
143    }
144}