Skip to main content

fraiseql_core/compiler/
window_allowlist.rs

1//! Schema-based allowlist for window query identifiers.
2//!
3//! Provides defence-in-depth on top of the character-level validation in
4//! `window_functions/planner.rs`: every identifier used in a window query's
5//! PARTITION BY, ORDER BY, or SELECT clauses is checked against the set of
6//! fields declared in the compiled schema.
7//!
8//! When the allowlist is empty (i.e. the schema declares no fact-table metadata
9//! for this type) the validation is skipped, matching the behaviour of
10//! `compiler/aggregation.rs` when `metadata.dimensions.paths` is empty.
11
12use std::collections::HashSet;
13
14use crate::{
15    compiler::fact_table::FactTableMetadata,
16    error::{FraiseQLError, Result},
17};
18
19/// Validated allowlist of identifiers permitted in window queries for a given type.
20///
21/// Built at request-planning time from the compiled schema's `FactTableMetadata`.
22/// An empty allowlist means "no schema constraints are declared; character-level
23/// validation still applies".
24#[derive(Debug, Clone, Default)]
25pub struct WindowAllowlist {
26    /// All valid field expressions.
27    ///
28    /// Contains:
29    /// - measure column names (e.g. `"revenue"`)
30    /// - denormalised filter column names (e.g. `"occurred_at"`)
31    /// - dimension JSONB path expressions (e.g. `"dimensions->>'category'"`)
32    fields: HashSet<String>,
33}
34
35impl WindowAllowlist {
36    /// Build an allowlist from compiled fact-table metadata.
37    #[must_use]
38    pub fn from_metadata(metadata: &FactTableMetadata) -> Self {
39        let mut fields = HashSet::new();
40        for m in &metadata.measures {
41            fields.insert(m.name.clone());
42        }
43        for f in &metadata.denormalized_filters {
44            fields.insert(f.name.clone());
45        }
46        for p in &metadata.dimensions.paths {
47            // Store both the short name ("category") and the full JSONB expression
48            // ("dimensions->>'category'") so callers can use either form.
49            fields.insert(p.name.clone());
50            fields.insert(p.json_path.clone());
51        }
52        Self { fields }
53    }
54
55    /// Returns `true` if no schema constraints are declared.
56    ///
57    /// An empty allowlist does not block any identifier; character-level
58    /// validation in the planner still applies.
59    #[must_use]
60    pub fn is_empty(&self) -> bool {
61        self.fields.is_empty()
62    }
63
64    /// Validate that `identifier` is in the allowlist.
65    ///
66    /// When the allowlist is empty (no schema constraints), this is a no-op.
67    ///
68    /// # Errors
69    ///
70    /// Returns `FraiseQLError::Validation` if the identifier is not in the
71    /// allowlist and the allowlist is non-empty.
72    pub fn validate(&self, identifier: &str, context: &str) -> Result<()> {
73        if self.fields.is_empty() || self.fields.contains(identifier) {
74            Ok(())
75        } else {
76            Err(FraiseQLError::Validation {
77                message: format!(
78                    "Field '{identifier}' is not a known {context} field for this window query. \
79                     Only fields declared in the compiled schema are permitted."
80                ),
81                path:    None,
82            })
83        }
84    }
85}
86
87#[cfg(test)]
88mod tests {
89    #![allow(clippy::unwrap_used)] // Reason: test code, panics are acceptable
90
91    use super::*;
92    use crate::compiler::fact_table::{
93        DimensionColumn, DimensionPath, FactTableMetadata, FilterColumn, MeasureColumn, SqlType,
94    };
95
96    fn test_metadata() -> FactTableMetadata {
97        FactTableMetadata {
98            table_name:           "tf_sales".to_string(),
99            measures:             vec![
100                MeasureColumn {
101                    name:     "revenue".to_string(),
102                    sql_type: SqlType::Decimal,
103                    nullable: false,
104                },
105                MeasureColumn {
106                    name:     "units".to_string(),
107                    sql_type: SqlType::Int,
108                    nullable: false,
109                },
110            ],
111            dimensions:           DimensionColumn {
112                name:  "dimensions".to_string(),
113                paths: vec![DimensionPath {
114                    name:      "category".to_string(),
115                    json_path: "dimensions->>'category'".to_string(),
116                    data_type: "text".to_string(),
117                }],
118            },
119            denormalized_filters: vec![FilterColumn {
120                name:     "occurred_at".to_string(),
121                sql_type: SqlType::Timestamp,
122                indexed:  true,
123            }],
124            calendar_dimensions:  vec![],
125        }
126    }
127
128    #[test]
129    fn test_measure_name_accepted() {
130        let al = WindowAllowlist::from_metadata(&test_metadata());
131        al.validate("revenue", "PARTITION BY")
132            .unwrap_or_else(|e| panic!("expected Ok: {e}"));
133    }
134
135    #[test]
136    fn test_filter_name_accepted() {
137        let al = WindowAllowlist::from_metadata(&test_metadata());
138        al.validate("occurred_at", "ORDER BY")
139            .unwrap_or_else(|e| panic!("expected Ok: {e}"));
140    }
141
142    #[test]
143    fn test_dimension_short_name_accepted() {
144        let al = WindowAllowlist::from_metadata(&test_metadata());
145        al.validate("category", "PARTITION BY")
146            .unwrap_or_else(|e| panic!("expected Ok: {e}"));
147    }
148
149    #[test]
150    fn test_dimension_full_json_path_accepted() {
151        let al = WindowAllowlist::from_metadata(&test_metadata());
152        al.validate("dimensions->>'category'", "PARTITION BY")
153            .unwrap_or_else(|e| panic!("expected Ok: {e}"));
154    }
155
156    #[test]
157    fn test_unknown_field_rejected() {
158        let al = WindowAllowlist::from_metadata(&test_metadata());
159        assert!(
160            matches!(
161                al.validate("secret_column", "PARTITION BY"),
162                Err(FraiseQLError::Validation { .. })
163            ),
164            "expected Validation error for unknown field"
165        );
166    }
167
168    #[test]
169    fn test_sql_injection_payloads_rejected() {
170        let al = WindowAllowlist::from_metadata(&test_metadata());
171        let payloads = [
172            "'; DROP TABLE users; --",
173            "1 UNION SELECT * FROM secrets",
174            "field; DELETE FROM logs",
175            "x\x00y",
176            "field' OR '1'='1",
177            "revenue--",
178            "revenue UNION SELECT password FROM admin",
179        ];
180        for payload in &payloads {
181            assert!(
182                al.validate(payload, "PARTITION BY").is_err(),
183                "Should reject payload: {payload}"
184            );
185        }
186    }
187
188    #[test]
189    fn test_empty_allowlist_accepts_anything() {
190        // When metadata has no known fields, allowlist is empty and validation is
191        // skipped (character-level validation in the planner still applies).
192        let al = WindowAllowlist::default();
193        assert!(al.is_empty());
194        al.validate("any_field", "PARTITION BY")
195            .unwrap_or_else(|e| panic!("expected Ok for empty allowlist: {e}"));
196        al.validate("'; DROP TABLE users; --", "ORDER BY")
197            .unwrap_or_else(|e| panic!("expected Ok for empty allowlist: {e}"));
198    }
199}