Skip to main content

qail_core/optimizer/
normalized_select.rs

1use crate::ast::{
2    Action, Cage, CageKind, Condition, Expr, Join, JoinKind, LogicalOp, Qail, SortOrder, Value,
3};
4use std::collections::HashSet;
5
6/// Canonical representation for the rewrite-safe subset of `SELECT`.
7#[derive(Debug, Clone, PartialEq)]
8pub struct NormalizedSelect {
9    pub table: String,
10    pub columns: Vec<Expr>,
11    pub joins: Vec<NormalizedJoin>,
12    pub filters: Vec<FilterClause>,
13    pub order_by: Vec<OrderByItem>,
14    pub limit: Option<usize>,
15    pub offset: Option<usize>,
16}
17
18/// Canonical JOIN item.
19#[derive(Debug, Clone, PartialEq)]
20pub struct NormalizedJoin {
21    pub table: String,
22    pub kind: JoinKind,
23    pub on: Option<Vec<Condition>>,
24    pub on_true: bool,
25}
26
27/// Canonical WHERE clause block.
28#[derive(Debug, Clone, PartialEq)]
29pub struct FilterClause {
30    pub logical_op: LogicalOp,
31    pub conditions: Vec<Condition>,
32}
33
34/// Canonical ORDER BY item.
35#[derive(Debug, Clone, PartialEq)]
36pub struct OrderByItem {
37    pub expr: Expr,
38    pub order: SortOrder,
39}
40
41/// Errors returned when a query cannot be normalized into the supported subset.
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum NormalizeError {
44    UnsupportedAction(Action),
45    UnsupportedFeature(&'static str),
46    DuplicateClause(&'static str),
47}
48
49impl std::fmt::Display for NormalizeError {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        match self {
52            Self::UnsupportedAction(action) => {
53                write!(f, "normalization only supports SELECT, got {}", action)
54            }
55            Self::UnsupportedFeature(feature) => {
56                write!(f, "unsupported SELECT feature: {}", feature)
57            }
58            Self::DuplicateClause(clause) => {
59                write!(f, "duplicate {} clause is not supported", clause)
60            }
61        }
62    }
63}
64
65impl std::error::Error for NormalizeError {}
66
67/// Normalize a QAIL `SELECT` into a canonical representation.
68pub fn normalize_select(qail: &Qail) -> Result<NormalizedSelect, NormalizeError> {
69    NormalizedSelect::try_from(qail)
70}
71
72impl TryFrom<&Qail> for NormalizedSelect {
73    type Error = NormalizeError;
74
75    fn try_from(qail: &Qail) -> Result<Self, Self::Error> {
76        if qail.action != Action::Get {
77            return Err(NormalizeError::UnsupportedAction(qail.action));
78        }
79
80        reject_unsupported_select_features(qail)?;
81
82        let mut filters = Vec::new();
83        let mut order_by = Vec::new();
84        let mut limit = None;
85        let mut offset = None;
86
87        for cage in &qail.cages {
88            match &cage.kind {
89                CageKind::Filter => filters.push(FilterClause {
90                    logical_op: cage.logical_op,
91                    conditions: cage.conditions.clone(),
92                }),
93                CageKind::Sort(order) => {
94                    for condition in &cage.conditions {
95                        order_by.push(OrderByItem {
96                            expr: condition.left.clone(),
97                            order: *order,
98                        });
99                    }
100                }
101                CageKind::Limit(value) => {
102                    if limit.replace(*value).is_some() {
103                        return Err(NormalizeError::DuplicateClause("LIMIT"));
104                    }
105                }
106                CageKind::Offset(value) => {
107                    if offset.replace(*value).is_some() {
108                        return Err(NormalizeError::DuplicateClause("OFFSET"));
109                    }
110                }
111                CageKind::Payload => {
112                    return Err(NormalizeError::UnsupportedFeature("payload cages"));
113                }
114                CageKind::Sample(_) => {
115                    return Err(NormalizeError::UnsupportedFeature("sample cages"));
116                }
117                CageKind::Qualify => {
118                    return Err(NormalizeError::UnsupportedFeature("QUALIFY cages"));
119                }
120                CageKind::Partition => {
121                    return Err(NormalizeError::UnsupportedFeature("GROUP BY cages"));
122                }
123            }
124        }
125
126        let columns = if qail.columns.is_empty() {
127            vec![Expr::Star]
128        } else {
129            qail.columns.clone()
130        };
131
132        let joins = qail
133            .joins
134            .iter()
135            .map(|join| NormalizedJoin {
136                table: join.table.clone(),
137                kind: join.kind.clone(),
138                on: join.on.clone(),
139                on_true: join.on_true,
140            })
141            .collect();
142
143        Ok(Self {
144            table: qail.table.clone(),
145            columns,
146            joins,
147            filters,
148            order_by,
149            limit,
150            offset,
151        })
152    }
153}
154
155impl NormalizedSelect {
156    /// Apply a deterministic cleanup rewrite that preserves the supported-subset semantics.
157    ///
158    /// Cleanup rules:
159    /// - merge all `AND` filter clauses into one clause
160    /// - merge all `OR` filter clauses into one clause
161    /// - remove empty filter clauses
162    /// - sort and dedupe conditions in filter and join ON clauses
163    /// - dedupe repeated ORDER BY items while preserving the first occurrence
164    pub fn cleaned(&self) -> Self {
165        let mut cleaned = self.clone();
166
167        for join in &mut cleaned.joins {
168            if join.on_true {
169                join.on = None;
170            } else if let Some(on) = &mut join.on {
171                *on = dedupe_conditions_sorted(std::mem::take(on));
172            }
173        }
174
175        let mut and_conditions = Vec::new();
176        let mut or_conditions = Vec::new();
177        for filter in &cleaned.filters {
178            match filter.logical_op {
179                LogicalOp::And => and_conditions.extend(filter.conditions.clone()),
180                LogicalOp::Or => or_conditions.extend(filter.conditions.clone()),
181            }
182        }
183
184        and_conditions = dedupe_conditions_sorted(and_conditions);
185        or_conditions = dedupe_conditions_sorted(or_conditions);
186
187        cleaned.filters.clear();
188        if !and_conditions.is_empty() {
189            cleaned.filters.push(FilterClause {
190                logical_op: LogicalOp::And,
191                conditions: and_conditions,
192            });
193        }
194        if !or_conditions.is_empty() {
195            cleaned.filters.push(FilterClause {
196                logical_op: LogicalOp::Or,
197                conditions: or_conditions,
198            });
199        }
200
201        cleaned.order_by = dedupe_order_by_stable(std::mem::take(&mut cleaned.order_by));
202
203        cleaned
204    }
205
206    /// Return a canonicalized clone for structural comparison.
207    ///
208    /// This only reorders semantically commutative components:
209    /// - condition order within a filter clause
210    /// - condition order within a join `ON` conjunction
211    ///
212    /// It intentionally preserves the order of:
213    /// - projection columns
214    /// - joins
215    /// - filter clauses
216    /// - ORDER BY items
217    pub fn canonicalized(&self) -> Self {
218        self.cleaned()
219    }
220
221    /// Compare two normalized queries under the supported-subset semantics.
222    pub fn equivalent_shape(&self, other: &Self) -> bool {
223        self.canonicalized() == other.canonicalized()
224    }
225
226    /// Lower the normalized form back into a canonical QAIL `SELECT`.
227    pub fn to_qail(&self) -> Qail {
228        let mut qail = Qail {
229            action: Action::Get,
230            table: self.table.clone(),
231            columns: self.columns.clone(),
232            joins: self
233                .joins
234                .iter()
235                .map(|join| Join {
236                    table: join.table.clone(),
237                    kind: join.kind.clone(),
238                    on: if join.on_true { None } else { join.on.clone() },
239                    on_true: join.on_true,
240                })
241                .collect(),
242            ..Default::default()
243        };
244
245        for filter in &self.filters {
246            qail.cages.push(Cage {
247                kind: CageKind::Filter,
248                conditions: filter.conditions.clone(),
249                logical_op: filter.logical_op,
250            });
251        }
252
253        for item in &self.order_by {
254            qail.cages.push(Cage {
255                kind: CageKind::Sort(item.order),
256                conditions: vec![Condition {
257                    left: item.expr.clone(),
258                    op: crate::ast::Operator::Eq,
259                    value: Value::Null,
260                    is_array_unnest: false,
261                }],
262                logical_op: LogicalOp::And,
263            });
264        }
265
266        if let Some(limit) = self.limit {
267            qail.cages.push(Cage {
268                kind: CageKind::Limit(limit),
269                conditions: Vec::new(),
270                logical_op: LogicalOp::And,
271            });
272        }
273
274        if let Some(offset) = self.offset {
275            qail.cages.push(Cage {
276                kind: CageKind::Offset(offset),
277                conditions: Vec::new(),
278                logical_op: LogicalOp::And,
279            });
280        }
281
282        qail
283    }
284}
285
286fn reject_unsupported_select_features(qail: &Qail) -> Result<(), NormalizeError> {
287    if qail.distinct {
288        return Err(NormalizeError::UnsupportedFeature("DISTINCT"));
289    }
290    if !qail.distinct_on.is_empty() {
291        return Err(NormalizeError::UnsupportedFeature("DISTINCT ON"));
292    }
293    if !qail.table_constraints.is_empty() {
294        return Err(NormalizeError::UnsupportedFeature("table constraints"));
295    }
296    if !qail.set_ops.is_empty() {
297        return Err(NormalizeError::UnsupportedFeature("set operations"));
298    }
299    if !qail.having.is_empty() {
300        return Err(NormalizeError::UnsupportedFeature("HAVING"));
301    }
302    if !qail.group_by_mode.is_simple() {
303        return Err(NormalizeError::UnsupportedFeature("GROUP BY mode"));
304    }
305    if !qail.ctes.is_empty() {
306        return Err(NormalizeError::UnsupportedFeature("CTEs"));
307    }
308    if qail.returning.is_some() {
309        return Err(NormalizeError::UnsupportedFeature("RETURNING"));
310    }
311    if qail.on_conflict.is_some() {
312        return Err(NormalizeError::UnsupportedFeature("ON CONFLICT"));
313    }
314    if qail.source_query.is_some() {
315        return Err(NormalizeError::UnsupportedFeature("source query"));
316    }
317    if qail.channel.is_some() || qail.payload.is_some() {
318        return Err(NormalizeError::UnsupportedFeature("LISTEN/NOTIFY metadata"));
319    }
320    if qail.savepoint_name.is_some() {
321        return Err(NormalizeError::UnsupportedFeature("savepoint metadata"));
322    }
323    if !qail.from_tables.is_empty() || !qail.using_tables.is_empty() {
324        return Err(NormalizeError::UnsupportedFeature(
325            "auxiliary FROM/USING tables",
326        ));
327    }
328    if qail.lock_mode.is_some() || qail.skip_locked {
329        return Err(NormalizeError::UnsupportedFeature("row locks"));
330    }
331    if qail.fetch.is_some() {
332        return Err(NormalizeError::UnsupportedFeature("FETCH FIRST"));
333    }
334    if qail.default_values {
335        return Err(NormalizeError::UnsupportedFeature("DEFAULT VALUES"));
336    }
337    if qail.overriding.is_some() {
338        return Err(NormalizeError::UnsupportedFeature("OVERRIDING"));
339    }
340    if qail.sample.is_some() {
341        return Err(NormalizeError::UnsupportedFeature("TABLESAMPLE"));
342    }
343    if qail.only_table {
344        return Err(NormalizeError::UnsupportedFeature("ONLY"));
345    }
346    if qail.vector.is_some()
347        || qail.score_threshold.is_some()
348        || qail.vector_name.is_some()
349        || qail.with_vector
350        || qail.vector_size.is_some()
351        || qail.distance.is_some()
352        || qail.on_disk.is_some()
353    {
354        return Err(NormalizeError::UnsupportedFeature("vector search fields"));
355    }
356    if qail.function_def.is_some() || qail.trigger_def.is_some() {
357        return Err(NormalizeError::UnsupportedFeature("procedural objects"));
358    }
359
360    Ok(())
361}
362
363fn condition_signature(condition: &Condition) -> String {
364    format!(
365        "{}|{}|{}|{}",
366        expr_signature(&condition.left),
367        condition.op.sql_symbol(),
368        value_signature(&condition.value),
369        condition.is_array_unnest
370    )
371}
372
373fn expr_signature(expr: &Expr) -> String {
374    format!("{}", expr)
375}
376
377fn value_signature(value: &Value) -> String {
378    format!("{}", value)
379}
380
381fn order_item_signature(item: &OrderByItem) -> String {
382    format!("{}|{:?}", expr_signature(&item.expr), item.order)
383}
384
385fn dedupe_conditions_sorted(mut conditions: Vec<Condition>) -> Vec<Condition> {
386    conditions.sort_by_key(condition_signature);
387
388    let mut seen = HashSet::new();
389    let mut deduped = Vec::with_capacity(conditions.len());
390    for condition in conditions {
391        let signature = condition_signature(&condition);
392        if seen.insert(signature) {
393            deduped.push(condition);
394        }
395    }
396    deduped
397}
398
399fn dedupe_order_by_stable(items: Vec<OrderByItem>) -> Vec<OrderByItem> {
400    let mut seen = HashSet::new();
401    let mut deduped = Vec::with_capacity(items.len());
402    for item in items {
403        let signature = order_item_signature(&item);
404        if seen.insert(signature) {
405            deduped.push(item);
406        }
407    }
408    deduped
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414    use crate::ast::{Cage, Operator};
415    use crate::optimizer::cleanup_select;
416
417    #[test]
418    fn normalize_supported_select_into_canonical_form() {
419        let qail = Qail::get("users")
420            .filter("active", Operator::Eq, true)
421            .or_filter("role", Operator::Eq, "admin")
422            .left_join("teams", "users.team_id", "teams.id")
423            .order_desc("created_at")
424            .limit(10)
425            .offset(5);
426
427        let normalized = normalize_select(&qail).expect("supported select should normalize");
428
429        assert_eq!(normalized.table, "users");
430        assert_eq!(normalized.columns, vec![Expr::Star]);
431        assert_eq!(normalized.joins.len(), 1);
432        assert_eq!(normalized.filters.len(), 2);
433        assert_eq!(normalized.filters[0].logical_op, LogicalOp::And);
434        assert_eq!(normalized.filters[1].logical_op, LogicalOp::Or);
435        assert_eq!(normalized.order_by.len(), 1);
436        assert_eq!(normalized.limit, Some(10));
437        assert_eq!(normalized.offset, Some(5));
438    }
439
440    #[test]
441    fn normalize_flattens_multi_expr_sort_cages() {
442        let qail = Qail {
443            action: Action::Get,
444            table: "users".to_string(),
445            cages: vec![Cage {
446                kind: CageKind::Sort(SortOrder::Asc),
447                conditions: vec![
448                    Condition {
449                        left: Expr::Named("last_name".to_string()),
450                        op: Operator::Eq,
451                        value: Value::Null,
452                        is_array_unnest: false,
453                    },
454                    Condition {
455                        left: Expr::Named("first_name".to_string()),
456                        op: Operator::Eq,
457                        value: Value::Null,
458                        is_array_unnest: false,
459                    },
460                ],
461                logical_op: LogicalOp::And,
462            }],
463            ..Default::default()
464        };
465
466        let normalized = normalize_select(&qail).expect("sort cage should normalize");
467
468        assert_eq!(
469            normalized.order_by,
470            vec![
471                OrderByItem {
472                    expr: Expr::Named("last_name".to_string()),
473                    order: SortOrder::Asc,
474                },
475                OrderByItem {
476                    expr: Expr::Named("first_name".to_string()),
477                    order: SortOrder::Asc,
478                },
479            ]
480        );
481    }
482
483    #[test]
484    fn normalize_rejects_unsupported_features() {
485        let qail = Qail::get("users").distinct_on(["email"]);
486        let err = normalize_select(&qail).expect_err("DISTINCT ON should be rejected");
487        assert_eq!(err, NormalizeError::UnsupportedFeature("DISTINCT ON"));
488    }
489
490    #[test]
491    fn normalize_rejects_duplicate_limit() {
492        let qail = Qail::get("users").limit(10).limit(20);
493        let err = normalize_select(&qail).expect_err("duplicate LIMIT should be rejected");
494        assert_eq!(err, NormalizeError::DuplicateClause("LIMIT"));
495    }
496
497    #[test]
498    fn normalized_select_roundtrips_to_canonical_qail() {
499        let qail = Qail::get("users")
500            .column("id")
501            .column("email")
502            .filter("active", Operator::Eq, true)
503            .order_asc("email")
504            .limit(25);
505
506        let normalized = normalize_select(&qail).expect("supported select should normalize");
507        let roundtrip = normalized.to_qail();
508
509        assert_eq!(roundtrip.action, Action::Get);
510        assert_eq!(roundtrip.table, "users");
511        assert_eq!(
512            roundtrip.columns,
513            vec![Expr::Named("id".into()), Expr::Named("email".into())]
514        );
515        assert_eq!(
516            normalize_select(&roundtrip).expect("roundtrip should normalize"),
517            normalized
518        );
519    }
520
521    #[test]
522    fn normalize_preserves_implicit_join_conditions() {
523        let qail = Qail {
524            action: Action::Get,
525            table: "users".to_string(),
526            joins: vec![Join {
527                table: "posts".to_string(),
528                kind: JoinKind::Left,
529                on: None,
530                on_true: false,
531            }],
532            ..Default::default()
533        };
534
535        let normalized = normalize_select(&qail).expect("implicit joins should normalize");
536        assert_eq!(normalized.joins[0].on, None);
537
538        let cleaned = cleanup_select(&normalized);
539        assert_eq!(cleaned.joins[0].on, None);
540        assert_eq!(cleaned.to_qail().joins[0].on, None);
541    }
542
543    #[test]
544    fn equivalent_shape_ignores_condition_order_within_filter_clause() {
545        let left = Qail::get("users")
546            .filter("active", Operator::Eq, true)
547            .filter("role", Operator::Eq, "admin");
548
549        let right = Qail {
550            action: Action::Get,
551            table: "users".to_string(),
552            cages: vec![Cage {
553                kind: CageKind::Filter,
554                conditions: vec![
555                    Condition {
556                        left: Expr::Named("role".to_string()),
557                        op: Operator::Eq,
558                        value: Value::String("admin".to_string()),
559                        is_array_unnest: false,
560                    },
561                    Condition {
562                        left: Expr::Named("active".to_string()),
563                        op: Operator::Eq,
564                        value: Value::Bool(true),
565                        is_array_unnest: false,
566                    },
567                ],
568                logical_op: LogicalOp::And,
569            }],
570            ..Default::default()
571        };
572
573        let left = normalize_select(&left).expect("left query should normalize");
574        let right = normalize_select(&right).expect("right query should normalize");
575
576        assert!(left.equivalent_shape(&right));
577    }
578
579    #[test]
580    fn equivalent_shape_preserves_order_sensitive_parts() {
581        let left = normalize_select(
582            &Qail::get("users")
583                .column("id")
584                .column("email")
585                .order_asc("email"),
586        )
587        .expect("left query should normalize");
588
589        let right = normalize_select(
590            &Qail::get("users")
591                .column("email")
592                .column("id")
593                .order_asc("email"),
594        )
595        .expect("right query should normalize");
596
597        assert!(!left.equivalent_shape(&right));
598    }
599
600    #[test]
601    fn cleanup_merges_filter_clauses_and_dedupes_conditions() {
602        let qail = Qail {
603            action: Action::Get,
604            table: "users".to_string(),
605            cages: vec![
606                Cage {
607                    kind: CageKind::Filter,
608                    conditions: vec![Condition {
609                        left: Expr::Named("active".to_string()),
610                        op: Operator::Eq,
611                        value: Value::Bool(true),
612                        is_array_unnest: false,
613                    }],
614                    logical_op: LogicalOp::And,
615                },
616                Cage {
617                    kind: CageKind::Filter,
618                    conditions: vec![Condition {
619                        left: Expr::Named("active".to_string()),
620                        op: Operator::Eq,
621                        value: Value::Bool(true),
622                        is_array_unnest: false,
623                    }],
624                    logical_op: LogicalOp::And,
625                },
626                Cage {
627                    kind: CageKind::Filter,
628                    conditions: vec![Condition {
629                        left: Expr::Named("role".to_string()),
630                        op: Operator::Eq,
631                        value: Value::String("admin".to_string()),
632                        is_array_unnest: false,
633                    }],
634                    logical_op: LogicalOp::Or,
635                },
636            ],
637            ..Default::default()
638        };
639
640        let normalized = normalize_select(&qail).expect("query should normalize");
641        let cleaned = cleanup_select(&normalized);
642
643        assert_eq!(cleaned.filters.len(), 2);
644        assert_eq!(cleaned.filters[0].logical_op, LogicalOp::And);
645        assert_eq!(cleaned.filters[0].conditions.len(), 1);
646        assert_eq!(cleaned.filters[1].logical_op, LogicalOp::Or);
647        assert_eq!(cleaned.filters[1].conditions.len(), 1);
648    }
649
650    #[test]
651    fn cleanup_dedupes_order_by_stably() {
652        let qail = Qail::get("users")
653            .order_asc("email")
654            .order_asc("email")
655            .order_desc("created_at");
656        let normalized = normalize_select(&qail).expect("query should normalize");
657        let cleaned = cleanup_select(&normalized);
658
659        assert_eq!(
660            cleaned.order_by,
661            vec![
662                OrderByItem {
663                    expr: Expr::Named("email".to_string()),
664                    order: SortOrder::Asc,
665                },
666                OrderByItem {
667                    expr: Expr::Named("created_at".to_string()),
668                    order: SortOrder::Desc,
669                },
670            ]
671        );
672    }
673
674    #[test]
675    fn cleanup_is_idempotent() {
676        let qail = Qail::get("users")
677            .filter("active", Operator::Eq, true)
678            .filter("active", Operator::Eq, true)
679            .order_asc("email")
680            .order_asc("email");
681
682        let normalized = normalize_select(&qail).expect("query should normalize");
683        let cleaned = cleanup_select(&normalized);
684        let cleaned_twice = cleanup_select(&cleaned);
685
686        assert_eq!(cleaned, cleaned_twice);
687    }
688
689    #[test]
690    fn cleanup_preserves_equivalent_shape() {
691        let qail = Qail::get("users")
692            .filter("active", Operator::Eq, true)
693            .filter("active", Operator::Eq, true)
694            .or_filter("role", Operator::Eq, "admin");
695
696        let normalized = normalize_select(&qail).expect("query should normalize");
697        let cleaned = cleanup_select(&normalized);
698
699        assert!(normalized.equivalent_shape(&cleaned));
700    }
701}