Skip to main content

qail_core/optimizer/
normalized_select.rs

1use crate::ast::{
2    Action, Cage, CageKind, Condition, Expr, Join, JoinKind, LogicalOp, Qail, SortOrder, Value,
3};
4use std::collections::HashSet;
5
6/// Canonical representation for the rewrite-safe subset of `SELECT`.
7#[derive(Debug, Clone, PartialEq)]
8pub struct NormalizedSelect {
9    pub table: String,
10    pub columns: Vec<Expr>,
11    pub joins: Vec<NormalizedJoin>,
12    pub filters: Vec<FilterClause>,
13    pub order_by: Vec<OrderByItem>,
14    pub limit: Option<usize>,
15    pub offset: Option<usize>,
16}
17
18/// Canonical JOIN item.
19#[derive(Debug, Clone, PartialEq)]
20pub struct NormalizedJoin {
21    pub table: String,
22    pub kind: JoinKind,
23    pub on: Vec<Condition>,
24    pub on_true: bool,
25}
26
27/// Canonical WHERE clause block.
28#[derive(Debug, Clone, PartialEq)]
29pub struct FilterClause {
30    pub logical_op: LogicalOp,
31    pub conditions: Vec<Condition>,
32}
33
34/// Canonical ORDER BY item.
35#[derive(Debug, Clone, PartialEq)]
36pub struct OrderByItem {
37    pub expr: Expr,
38    pub order: SortOrder,
39}
40
41/// Errors returned when a query cannot be normalized into the supported subset.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum NormalizeError {
44    UnsupportedAction(Action),
45    UnsupportedFeature(&'static str),
46    DuplicateClause(&'static str),
47}
48
49impl std::fmt::Display for NormalizeError {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        match self {
52            Self::UnsupportedAction(action) => {
53                write!(f, "normalization only supports SELECT, got {}", action)
54            }
55            Self::UnsupportedFeature(feature) => {
56                write!(f, "unsupported SELECT feature: {}", feature)
57            }
58            Self::DuplicateClause(clause) => {
59                write!(f, "duplicate {} clause is not supported", clause)
60            }
61        }
62    }
63}
64
65impl std::error::Error for NormalizeError {}
66
67/// Normalize a QAIL `SELECT` into a canonical representation.
68pub fn normalize_select(qail: &Qail) -> Result<NormalizedSelect, NormalizeError> {
69    NormalizedSelect::try_from(qail)
70}
71
72impl TryFrom<&Qail> for NormalizedSelect {
73    type Error = NormalizeError;
74
75    fn try_from(qail: &Qail) -> Result<Self, Self::Error> {
76        if qail.action != Action::Get {
77            return Err(NormalizeError::UnsupportedAction(qail.action));
78        }
79
80        reject_unsupported_select_features(qail)?;
81
82        let mut filters = Vec::new();
83        let mut order_by = Vec::new();
84        let mut limit = None;
85        let mut offset = None;
86
87        for cage in &qail.cages {
88            match &cage.kind {
89                CageKind::Filter => filters.push(FilterClause {
90                    logical_op: cage.logical_op,
91                    conditions: cage.conditions.clone(),
92                }),
93                CageKind::Sort(order) => {
94                    for condition in &cage.conditions {
95                        order_by.push(OrderByItem {
96                            expr: condition.left.clone(),
97                            order: *order,
98                        });
99                    }
100                }
101                CageKind::Limit(value) => {
102                    if limit.replace(*value).is_some() {
103                        return Err(NormalizeError::DuplicateClause("LIMIT"));
104                    }
105                }
106                CageKind::Offset(value) => {
107                    if offset.replace(*value).is_some() {
108                        return Err(NormalizeError::DuplicateClause("OFFSET"));
109                    }
110                }
111                CageKind::Payload => {
112                    return Err(NormalizeError::UnsupportedFeature("payload cages"));
113                }
114                CageKind::Sample(_) => {
115                    return Err(NormalizeError::UnsupportedFeature("sample cages"));
116                }
117                CageKind::Qualify => {
118                    return Err(NormalizeError::UnsupportedFeature("QUALIFY cages"));
119                }
120                CageKind::Partition => {
121                    return Err(NormalizeError::UnsupportedFeature("GROUP BY cages"));
122                }
123            }
124        }
125
126        let columns = if qail.columns.is_empty() {
127            vec![Expr::Star]
128        } else {
129            qail.columns.clone()
130        };
131
132        let joins = qail
133            .joins
134            .iter()
135            .map(|join| NormalizedJoin {
136                table: join.table.clone(),
137                kind: join.kind.clone(),
138                on: join.on.clone().unwrap_or_default(),
139                on_true: join.on_true,
140            })
141            .collect();
142
143        Ok(Self {
144            table: qail.table.clone(),
145            columns,
146            joins,
147            filters,
148            order_by,
149            limit,
150            offset,
151        })
152    }
153}
154
155impl NormalizedSelect {
156    /// Apply a deterministic cleanup rewrite that preserves the supported-subset semantics.
157    ///
158    /// Cleanup rules:
159    /// - merge all `AND` filter clauses into one clause
160    /// - merge all `OR` filter clauses into one clause
161    /// - remove empty filter clauses
162    /// - sort and dedupe conditions in filter and join ON clauses
163    /// - dedupe repeated ORDER BY items while preserving the first occurrence
164    pub fn cleaned(&self) -> Self {
165        let mut cleaned = self.clone();
166
167        for join in &mut cleaned.joins {
168            if join.on_true {
169                join.on.clear();
170            } else {
171                join.on = dedupe_conditions_sorted(std::mem::take(&mut join.on));
172            }
173        }
174
175        let mut and_conditions = Vec::new();
176        let mut or_conditions = Vec::new();
177        for filter in &cleaned.filters {
178            match filter.logical_op {
179                LogicalOp::And => and_conditions.extend(filter.conditions.clone()),
180                LogicalOp::Or => or_conditions.extend(filter.conditions.clone()),
181            }
182        }
183
184        and_conditions = dedupe_conditions_sorted(and_conditions);
185        or_conditions = dedupe_conditions_sorted(or_conditions);
186
187        cleaned.filters.clear();
188        if !and_conditions.is_empty() {
189            cleaned.filters.push(FilterClause {
190                logical_op: LogicalOp::And,
191                conditions: and_conditions,
192            });
193        }
194        if !or_conditions.is_empty() {
195            cleaned.filters.push(FilterClause {
196                logical_op: LogicalOp::Or,
197                conditions: or_conditions,
198            });
199        }
200
201        cleaned.order_by = dedupe_order_by_stable(std::mem::take(&mut cleaned.order_by));
202
203        cleaned
204    }
205
206    /// Return a canonicalized clone for structural comparison.
207    ///
208    /// This only reorders semantically commutative components:
209    /// - condition order within a filter clause
210    /// - condition order within a join `ON` conjunction
211    ///
212    /// It intentionally preserves the order of:
213    /// - projection columns
214    /// - joins
215    /// - filter clauses
216    /// - ORDER BY items
217    pub fn canonicalized(&self) -> Self {
218        self.cleaned()
219    }
220
221    /// Compare two normalized queries under the supported-subset semantics.
222    pub fn equivalent_shape(&self, other: &Self) -> bool {
223        self.canonicalized() == other.canonicalized()
224    }
225
226    /// Lower the normalized form back into a canonical QAIL `SELECT`.
227    pub fn to_qail(&self) -> Qail {
228        let mut qail = Qail {
229            action: Action::Get,
230            table: self.table.clone(),
231            columns: self.columns.clone(),
232            joins: self
233                .joins
234                .iter()
235                .map(|join| Join {
236                    table: join.table.clone(),
237                    kind: join.kind.clone(),
238                    on: if join.on_true {
239                        None
240                    } else {
241                        Some(join.on.clone())
242                    },
243                    on_true: join.on_true,
244                })
245                .collect(),
246            ..Default::default()
247        };
248
249        for filter in &self.filters {
250            qail.cages.push(Cage {
251                kind: CageKind::Filter,
252                conditions: filter.conditions.clone(),
253                logical_op: filter.logical_op,
254            });
255        }
256
257        for item in &self.order_by {
258            qail.cages.push(Cage {
259                kind: CageKind::Sort(item.order),
260                conditions: vec![Condition {
261                    left: item.expr.clone(),
262                    op: crate::ast::Operator::Eq,
263                    value: Value::Null,
264                    is_array_unnest: false,
265                }],
266                logical_op: LogicalOp::And,
267            });
268        }
269
270        if let Some(limit) = self.limit {
271            qail.cages.push(Cage {
272                kind: CageKind::Limit(limit),
273                conditions: Vec::new(),
274                logical_op: LogicalOp::And,
275            });
276        }
277
278        if let Some(offset) = self.offset {
279            qail.cages.push(Cage {
280                kind: CageKind::Offset(offset),
281                conditions: Vec::new(),
282                logical_op: LogicalOp::And,
283            });
284        }
285
286        qail
287    }
288}
289
290fn reject_unsupported_select_features(qail: &Qail) -> Result<(), NormalizeError> {
291    if qail.distinct {
292        return Err(NormalizeError::UnsupportedFeature("DISTINCT"));
293    }
294    if !qail.distinct_on.is_empty() {
295        return Err(NormalizeError::UnsupportedFeature("DISTINCT ON"));
296    }
297    if !qail.table_constraints.is_empty() {
298        return Err(NormalizeError::UnsupportedFeature("table constraints"));
299    }
300    if !qail.set_ops.is_empty() {
301        return Err(NormalizeError::UnsupportedFeature("set operations"));
302    }
303    if !qail.having.is_empty() {
304        return Err(NormalizeError::UnsupportedFeature("HAVING"));
305    }
306    if !qail.group_by_mode.is_simple() {
307        return Err(NormalizeError::UnsupportedFeature("GROUP BY mode"));
308    }
309    if !qail.ctes.is_empty() {
310        return Err(NormalizeError::UnsupportedFeature("CTEs"));
311    }
312    if qail.returning.is_some() {
313        return Err(NormalizeError::UnsupportedFeature("RETURNING"));
314    }
315    if qail.on_conflict.is_some() {
316        return Err(NormalizeError::UnsupportedFeature("ON CONFLICT"));
317    }
318    if qail.source_query.is_some() {
319        return Err(NormalizeError::UnsupportedFeature("source query"));
320    }
321    if qail.channel.is_some() || qail.payload.is_some() {
322        return Err(NormalizeError::UnsupportedFeature("LISTEN/NOTIFY metadata"));
323    }
324    if qail.savepoint_name.is_some() {
325        return Err(NormalizeError::UnsupportedFeature("savepoint metadata"));
326    }
327    if !qail.from_tables.is_empty() || !qail.using_tables.is_empty() {
328        return Err(NormalizeError::UnsupportedFeature(
329            "auxiliary FROM/USING tables",
330        ));
331    }
332    if qail.lock_mode.is_some() || qail.skip_locked {
333        return Err(NormalizeError::UnsupportedFeature("row locks"));
334    }
335    if qail.fetch.is_some() {
336        return Err(NormalizeError::UnsupportedFeature("FETCH FIRST"));
337    }
338    if qail.default_values {
339        return Err(NormalizeError::UnsupportedFeature("DEFAULT VALUES"));
340    }
341    if qail.overriding.is_some() {
342        return Err(NormalizeError::UnsupportedFeature("OVERRIDING"));
343    }
344    if qail.sample.is_some() {
345        return Err(NormalizeError::UnsupportedFeature("TABLESAMPLE"));
346    }
347    if qail.only_table {
348        return Err(NormalizeError::UnsupportedFeature("ONLY"));
349    }
350    if qail.vector.is_some()
351        || qail.score_threshold.is_some()
352        || qail.vector_name.is_some()
353        || qail.with_vector
354        || qail.vector_size.is_some()
355        || qail.distance.is_some()
356        || qail.on_disk.is_some()
357    {
358        return Err(NormalizeError::UnsupportedFeature("vector search fields"));
359    }
360    if qail.function_def.is_some() || qail.trigger_def.is_some() {
361        return Err(NormalizeError::UnsupportedFeature("procedural objects"));
362    }
363
364    Ok(())
365}
366
367fn condition_signature(condition: &Condition) -> String {
368    format!(
369        "{}|{}|{}|{}",
370        expr_signature(&condition.left),
371        condition.op.sql_symbol(),
372        value_signature(&condition.value),
373        condition.is_array_unnest
374    )
375}
376
377fn expr_signature(expr: &Expr) -> String {
378    format!("{}", expr)
379}
380
381fn value_signature(value: &Value) -> String {
382    format!("{}", value)
383}
384
385fn order_item_signature(item: &OrderByItem) -> String {
386    format!("{}|{:?}", expr_signature(&item.expr), item.order)
387}
388
389fn dedupe_conditions_sorted(mut conditions: Vec<Condition>) -> Vec<Condition> {
390    conditions.sort_by_key(condition_signature);
391
392    let mut seen = HashSet::new();
393    let mut deduped = Vec::with_capacity(conditions.len());
394    for condition in conditions {
395        let signature = condition_signature(&condition);
396        if seen.insert(signature) {
397            deduped.push(condition);
398        }
399    }
400    deduped
401}
402
403fn dedupe_order_by_stable(items: Vec<OrderByItem>) -> Vec<OrderByItem> {
404    let mut seen = HashSet::new();
405    let mut deduped = Vec::with_capacity(items.len());
406    for item in items {
407        let signature = order_item_signature(&item);
408        if seen.insert(signature) {
409            deduped.push(item);
410        }
411    }
412    deduped
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418    use crate::ast::{Cage, Operator};
419    use crate::optimizer::cleanup_select;
420
421    #[test]
422    fn normalize_supported_select_into_canonical_form() {
423        let qail = Qail::get("users")
424            .filter("active", Operator::Eq, true)
425            .or_filter("role", Operator::Eq, "admin")
426            .left_join("teams", "users.team_id", "teams.id")
427            .order_desc("created_at")
428            .limit(10)
429            .offset(5);
430
431        let normalized = normalize_select(&qail).expect("supported select should normalize");
432
433        assert_eq!(normalized.table, "users");
434        assert_eq!(normalized.columns, vec![Expr::Star]);
435        assert_eq!(normalized.joins.len(), 1);
436        assert_eq!(normalized.filters.len(), 2);
437        assert_eq!(normalized.filters[0].logical_op, LogicalOp::And);
438        assert_eq!(normalized.filters[1].logical_op, LogicalOp::Or);
439        assert_eq!(normalized.order_by.len(), 1);
440        assert_eq!(normalized.limit, Some(10));
441        assert_eq!(normalized.offset, Some(5));
442    }
443
444    #[test]
445    fn normalize_flattens_multi_expr_sort_cages() {
446        let qail = Qail {
447            action: Action::Get,
448            table: "users".to_string(),
449            cages: vec![Cage {
450                kind: CageKind::Sort(SortOrder::Asc),
451                conditions: vec![
452                    Condition {
453                        left: Expr::Named("last_name".to_string()),
454                        op: Operator::Eq,
455                        value: Value::Null,
456                        is_array_unnest: false,
457                    },
458                    Condition {
459                        left: Expr::Named("first_name".to_string()),
460                        op: Operator::Eq,
461                        value: Value::Null,
462                        is_array_unnest: false,
463                    },
464                ],
465                logical_op: LogicalOp::And,
466            }],
467            ..Default::default()
468        };
469
470        let normalized = normalize_select(&qail).expect("sort cage should normalize");
471
472        assert_eq!(
473            normalized.order_by,
474            vec![
475                OrderByItem {
476                    expr: Expr::Named("last_name".to_string()),
477                    order: SortOrder::Asc,
478                },
479                OrderByItem {
480                    expr: Expr::Named("first_name".to_string()),
481                    order: SortOrder::Asc,
482                },
483            ]
484        );
485    }
486
487    #[test]
488    fn normalize_rejects_unsupported_features() {
489        let qail = Qail::get("users").distinct_on(["email"]);
490        let err = normalize_select(&qail).expect_err("DISTINCT ON should be rejected");
491        assert_eq!(err, NormalizeError::UnsupportedFeature("DISTINCT ON"));
492    }
493
494    #[test]
495    fn normalize_rejects_duplicate_limit() {
496        let qail = Qail::get("users").limit(10).limit(20);
497        let err = normalize_select(&qail).expect_err("duplicate LIMIT should be rejected");
498        assert_eq!(err, NormalizeError::DuplicateClause("LIMIT"));
499    }
500
501    #[test]
502    fn normalized_select_roundtrips_to_canonical_qail() {
503        let qail = Qail::get("users")
504            .column("id")
505            .column("email")
506            .filter("active", Operator::Eq, true)
507            .order_asc("email")
508            .limit(25);
509
510        let normalized = normalize_select(&qail).expect("supported select should normalize");
511        let roundtrip = normalized.to_qail();
512
513        assert_eq!(roundtrip.action, Action::Get);
514        assert_eq!(roundtrip.table, "users");
515        assert_eq!(
516            roundtrip.columns,
517            vec![Expr::Named("id".into()), Expr::Named("email".into())]
518        );
519        assert_eq!(
520            normalize_select(&roundtrip).expect("roundtrip should normalize"),
521            normalized
522        );
523    }
524
525    #[test]
526    fn equivalent_shape_ignores_condition_order_within_filter_clause() {
527        let left = Qail::get("users")
528            .filter("active", Operator::Eq, true)
529            .filter("role", Operator::Eq, "admin");
530
531        let right = Qail {
532            action: Action::Get,
533            table: "users".to_string(),
534            cages: vec![Cage {
535                kind: CageKind::Filter,
536                conditions: vec![
537                    Condition {
538                        left: Expr::Named("role".to_string()),
539                        op: Operator::Eq,
540                        value: Value::String("admin".to_string()),
541                        is_array_unnest: false,
542                    },
543                    Condition {
544                        left: Expr::Named("active".to_string()),
545                        op: Operator::Eq,
546                        value: Value::Bool(true),
547                        is_array_unnest: false,
548                    },
549                ],
550                logical_op: LogicalOp::And,
551            }],
552            ..Default::default()
553        };
554
555        let left = normalize_select(&left).expect("left query should normalize");
556        let right = normalize_select(&right).expect("right query should normalize");
557
558        assert!(left.equivalent_shape(&right));
559    }
560
561    #[test]
562    fn equivalent_shape_preserves_order_sensitive_parts() {
563        let left = normalize_select(
564            &Qail::get("users")
565                .column("id")
566                .column("email")
567                .order_asc("email"),
568        )
569        .expect("left query should normalize");
570
571        let right = normalize_select(
572            &Qail::get("users")
573                .column("email")
574                .column("id")
575                .order_asc("email"),
576        )
577        .expect("right query should normalize");
578
579        assert!(!left.equivalent_shape(&right));
580    }
581
582    #[test]
583    fn cleanup_merges_filter_clauses_and_dedupes_conditions() {
584        let qail = Qail {
585            action: Action::Get,
586            table: "users".to_string(),
587            cages: vec![
588                Cage {
589                    kind: CageKind::Filter,
590                    conditions: vec![Condition {
591                        left: Expr::Named("active".to_string()),
592                        op: Operator::Eq,
593                        value: Value::Bool(true),
594                        is_array_unnest: false,
595                    }],
596                    logical_op: LogicalOp::And,
597                },
598                Cage {
599                    kind: CageKind::Filter,
600                    conditions: vec![Condition {
601                        left: Expr::Named("active".to_string()),
602                        op: Operator::Eq,
603                        value: Value::Bool(true),
604                        is_array_unnest: false,
605                    }],
606                    logical_op: LogicalOp::And,
607                },
608                Cage {
609                    kind: CageKind::Filter,
610                    conditions: vec![Condition {
611                        left: Expr::Named("role".to_string()),
612                        op: Operator::Eq,
613                        value: Value::String("admin".to_string()),
614                        is_array_unnest: false,
615                    }],
616                    logical_op: LogicalOp::Or,
617                },
618            ],
619            ..Default::default()
620        };
621
622        let normalized = normalize_select(&qail).expect("query should normalize");
623        let cleaned = cleanup_select(&normalized);
624
625        assert_eq!(cleaned.filters.len(), 2);
626        assert_eq!(cleaned.filters[0].logical_op, LogicalOp::And);
627        assert_eq!(cleaned.filters[0].conditions.len(), 1);
628        assert_eq!(cleaned.filters[1].logical_op, LogicalOp::Or);
629        assert_eq!(cleaned.filters[1].conditions.len(), 1);
630    }
631
632    #[test]
633    fn cleanup_dedupes_order_by_stably() {
634        let qail = Qail::get("users")
635            .order_asc("email")
636            .order_asc("email")
637            .order_desc("created_at");
638        let normalized = normalize_select(&qail).expect("query should normalize");
639        let cleaned = cleanup_select(&normalized);
640
641        assert_eq!(
642            cleaned.order_by,
643            vec![
644                OrderByItem {
645                    expr: Expr::Named("email".to_string()),
646                    order: SortOrder::Asc,
647                },
648                OrderByItem {
649                    expr: Expr::Named("created_at".to_string()),
650                    order: SortOrder::Desc,
651                },
652            ]
653        );
654    }
655
656    #[test]
657    fn cleanup_is_idempotent() {
658        let qail = Qail::get("users")
659            .filter("active", Operator::Eq, true)
660            .filter("active", Operator::Eq, true)
661            .order_asc("email")
662            .order_asc("email");
663
664        let normalized = normalize_select(&qail).expect("query should normalize");
665        let cleaned = cleanup_select(&normalized);
666        let cleaned_twice = cleanup_select(&cleaned);
667
668        assert_eq!(cleaned, cleaned_twice);
669    }
670
671    #[test]
672    fn cleanup_preserves_equivalent_shape() {
673        let qail = Qail::get("users")
674            .filter("active", Operator::Eq, true)
675            .filter("active", Operator::Eq, true)
676            .or_filter("role", Operator::Eq, "admin");
677
678        let normalized = normalize_select(&qail).expect("query should normalize");
679        let cleaned = cleanup_select(&normalized);
680
681        assert!(normalized.equivalent_shape(&cleaned));
682    }
683}