vibesql_executor/cache/
query_signature.rs

1//! Query signature generation for cache keys
2//!
3//! Generates deterministic cache keys from SQL queries by normalizing the AST
4//! and creating a hash. Queries with identical structure (different literals)
5//! will have the same signature.
6
7use std::{
8    collections::hash_map::DefaultHasher,
9    hash::{Hash, Hasher},
10};
11
12use vibesql_ast::{
13    arena::{
14        Expression as ArenaExpression, ExtendedExpr as ArenaExtendedExpr,
15        FromClause as ArenaFromClause, GroupByClause as ArenaGroupByClause,
16        GroupingElement as ArenaGroupingElement, GroupingSet as ArenaGroupingSet,
17        MixedGroupingItem as ArenaMixedGroupingItem, SelectItem as ArenaSelectItem,
18        SelectStmt as ArenaSelectStmt, WindowFunctionSpec as ArenaWindowFunctionSpec,
19    },
20    Expression, Statement,
21};
22
23/// Unique identifier for a query based on its structure
24#[derive(Clone, Debug, Eq, PartialEq, Hash)]
25pub struct QuerySignature {
26    hash: u64,
27}
28
29impl QuerySignature {
30    /// Create a signature from SQL text (legacy string-based approach)
31    pub fn from_sql(sql: &str) -> Self {
32        let normalized = Self::normalize(sql);
33        let mut hasher = DefaultHasher::new();
34        normalized.hash(&mut hasher);
35        let hash = hasher.finish();
36        Self { hash }
37    }
38
39    /// Create a signature from parsed AST, ignoring literal values
40    /// This allows queries with different literals but identical structure to share cached plans
41    pub fn from_ast(stmt: &Statement) -> Self {
42        let mut hasher = DefaultHasher::new();
43        Self::hash_statement(stmt, &mut hasher);
44        let hash = hasher.finish();
45        Self { hash }
46    }
47
48    /// Create a signature from arena-allocated SelectStmt, ignoring literal values
49    pub fn from_arena_select(select: &ArenaSelectStmt<'_>) -> Self {
50        let mut hasher = DefaultHasher::new();
51        "SELECT".hash(&mut hasher);
52        Self::hash_arena_select(select, &mut hasher);
53        let hash = hasher.finish();
54        Self { hash }
55    }
56
57    /// Get the underlying hash
58    pub fn hash(&self) -> u64 {
59        self.hash
60    }
61
62    /// Normalize SQL: trim and collapse whitespace
63    fn normalize(sql: &str) -> String {
64        sql.split_whitespace().collect::<Vec<_>>().join(" ").to_lowercase()
65    }
66
67    /// Hash a statement, replacing literals with a placeholder marker
68    fn hash_statement(stmt: &Statement, hasher: &mut DefaultHasher) {
69        match stmt {
70            Statement::Select(select) => {
71                "SELECT".hash(hasher);
72                Self::hash_select(select, hasher);
73            }
74            Statement::Insert(insert) => {
75                "INSERT".hash(hasher);
76                insert.table_name.hash(hasher);
77                for col in &insert.columns {
78                    col.hash(hasher);
79                }
80                // Hash the insert source structure without literals
81                match &insert.source {
82                    vibesql_ast::InsertSource::Values(rows) => {
83                        "VALUES".hash(hasher);
84                        rows.len().hash(hasher);
85                        for row in rows {
86                            row.len().hash(hasher);
87                            for expr in row {
88                                Self::hash_expression(expr, hasher);
89                            }
90                        }
91                    }
92                    vibesql_ast::InsertSource::Select(select) => {
93                        "SELECT".hash(hasher);
94                        Self::hash_select(select, hasher);
95                    }
96                    vibesql_ast::InsertSource::DefaultValues => {
97                        "DEFAULT_VALUES".hash(hasher);
98                    }
99                }
100            }
101            Statement::Update(update) => {
102                "UPDATE".hash(hasher);
103                update.table_name.hash(hasher);
104                for assignment in &update.assignments {
105                    assignment.column.hash(hasher);
106                    Self::hash_expression(&assignment.value, hasher);
107                }
108                if let Some(ref where_clause) = update.where_clause {
109                    match where_clause {
110                        vibesql_ast::WhereClause::Condition(expr) => {
111                            Self::hash_expression(expr, hasher);
112                        }
113                        vibesql_ast::WhereClause::CurrentOf(cursor) => {
114                            "CURRENT_OF".hash(hasher);
115                            cursor.hash(hasher);
116                        }
117                    }
118                }
119            }
120            Statement::Delete(delete) => {
121                "DELETE".hash(hasher);
122                delete.table_name.hash(hasher);
123                if let Some(ref where_clause) = delete.where_clause {
124                    match where_clause {
125                        vibesql_ast::WhereClause::Condition(expr) => {
126                            Self::hash_expression(expr, hasher);
127                        }
128                        vibesql_ast::WhereClause::CurrentOf(cursor) => {
129                            "CURRENT_OF".hash(hasher);
130                            cursor.hash(hasher);
131                        }
132                    }
133                }
134            }
135            // For other statement types, fall back to discriminant hashing
136            _ => {
137                std::mem::discriminant(stmt).hash(hasher);
138            }
139        }
140    }
141
142    /// Hash a SELECT statement structure
143    fn hash_select(select: &vibesql_ast::SelectStmt, hasher: &mut DefaultHasher) {
144        // Hash DISTINCT
145        select.distinct.hash(hasher);
146
147        // Hash select items
148        for item in &select.select_list {
149            match item {
150                vibesql_ast::SelectItem::Wildcard { .. } => "WILDCARD".hash(hasher),
151                vibesql_ast::SelectItem::QualifiedWildcard { qualifier, .. } => {
152                    "QUALIFIED_WILDCARD".hash(hasher);
153                    qualifier.hash(hasher);
154                }
155                vibesql_ast::SelectItem::Expression { expr, alias, .. } => {
156                    Self::hash_expression(expr, hasher);
157                    alias.hash(hasher);
158                }
159            }
160        }
161
162        // Hash FROM clause
163        if let Some(ref from) = select.from {
164            Self::hash_from_clause(from, hasher);
165        }
166
167        // Hash WHERE clause
168        if let Some(ref where_clause) = select.where_clause {
169            Self::hash_expression(where_clause, hasher);
170        }
171
172        // Hash GROUP BY
173        if let Some(ref group_by) = select.group_by {
174            Self::hash_group_by(group_by, hasher);
175        }
176
177        // Hash HAVING
178        if let Some(ref having) = select.having {
179            Self::hash_expression(having, hasher);
180        }
181
182        // Hash ORDER BY
183        if let Some(ref order_by) = select.order_by {
184            for item in order_by {
185                Self::hash_expression(&item.expr, hasher);
186                std::mem::discriminant(&item.direction).hash(hasher);
187            }
188        }
189
190        // Hash LIMIT/OFFSET expressions
191        // Note: We hash the presence of limit/offset but not the expression itself
192        // since Expression doesn't implement Hash
193        select.limit.is_some().hash(hasher);
194        select.offset.is_some().hash(hasher);
195    }
196
197    /// Hash a FROM clause structure
198    fn hash_from_clause(from: &vibesql_ast::FromClause, hasher: &mut DefaultHasher) {
199        match from {
200            vibesql_ast::FromClause::Table { name, alias, .. } => {
201                "TABLE".hash(hasher);
202                name.hash(hasher);
203                alias.hash(hasher);
204            }
205            vibesql_ast::FromClause::Join { left, join_type, right, condition, .. } => {
206                "JOIN".hash(hasher);
207                Self::hash_from_clause(left, hasher);
208                std::mem::discriminant(join_type).hash(hasher);
209                Self::hash_from_clause(right, hasher);
210                if let Some(expr) = condition {
211                    Self::hash_expression(expr, hasher);
212                }
213            }
214            vibesql_ast::FromClause::Subquery { query, alias, .. } => {
215                "SUBQUERY".hash(hasher);
216                Self::hash_select(query, hasher);
217                alias.hash(hasher);
218            }
219            vibesql_ast::FromClause::Values { rows, alias, column_aliases } => {
220                "VALUES".hash(hasher);
221                rows.len().hash(hasher);
222                if let Some(first_row) = rows.first() {
223                    first_row.len().hash(hasher);
224                }
225                for row in rows {
226                    for expr in row {
227                        Self::hash_expression(expr, hasher);
228                    }
229                }
230                alias.hash(hasher);
231                column_aliases.hash(hasher);
232            }
233        }
234    }
235
236    fn hash_group_by(group_by: &vibesql_ast::GroupByClause, hasher: &mut DefaultHasher) {
237        match group_by {
238            vibesql_ast::GroupByClause::Simple(exprs) => {
239                "SIMPLE".hash(hasher);
240                for expr in exprs {
241                    Self::hash_expression(expr, hasher);
242                }
243            }
244            vibesql_ast::GroupByClause::Rollup(elements) => {
245                "ROLLUP".hash(hasher);
246                Self::hash_grouping_elements(elements, hasher);
247            }
248            vibesql_ast::GroupByClause::Cube(elements) => {
249                "CUBE".hash(hasher);
250                Self::hash_grouping_elements(elements, hasher);
251            }
252            vibesql_ast::GroupByClause::GroupingSets(sets) => {
253                "GROUPING_SETS".hash(hasher);
254                Self::hash_grouping_sets(sets, hasher);
255            }
256            vibesql_ast::GroupByClause::Mixed(items) => {
257                "MIXED".hash(hasher);
258                for item in items {
259                    match item {
260                        vibesql_ast::MixedGroupingItem::Simple(expr) => {
261                            "SIMPLE".hash(hasher);
262                            Self::hash_expression(expr, hasher);
263                        }
264                        vibesql_ast::MixedGroupingItem::Rollup(elements) => {
265                            "ROLLUP".hash(hasher);
266                            Self::hash_grouping_elements(elements, hasher);
267                        }
268                        vibesql_ast::MixedGroupingItem::Cube(elements) => {
269                            "CUBE".hash(hasher);
270                            Self::hash_grouping_elements(elements, hasher);
271                        }
272                        vibesql_ast::MixedGroupingItem::GroupingSets(sets) => {
273                            "GROUPING_SETS".hash(hasher);
274                            Self::hash_grouping_sets(sets, hasher);
275                        }
276                    }
277                }
278            }
279        }
280    }
281
282    fn hash_grouping_sets(sets: &[vibesql_ast::GroupingSet], hasher: &mut DefaultHasher) {
283        for set in sets {
284            "SET".hash(hasher);
285            for expr in &set.columns {
286                Self::hash_expression(expr, hasher);
287            }
288        }
289    }
290
291    fn hash_grouping_elements(
292        elements: &[vibesql_ast::GroupingElement],
293        hasher: &mut DefaultHasher,
294    ) {
295        for element in elements {
296            match element {
297                vibesql_ast::GroupingElement::Single(expr) => {
298                    "SINGLE".hash(hasher);
299                    Self::hash_expression(expr, hasher);
300                }
301                vibesql_ast::GroupingElement::Composite(exprs) => {
302                    "COMPOSITE".hash(hasher);
303                    for expr in exprs {
304                        Self::hash_expression(expr, hasher);
305                    }
306                }
307            }
308        }
309    }
310
311    /// Hash an expression, replacing literals with a placeholder marker
312    fn hash_expression(expr: &Expression, hasher: &mut DefaultHasher) {
313        match expr {
314            // Key difference: All literals and placeholders hash to the same value
315            // This allows parameterized queries to match with literal values
316            Expression::Literal(_)
317            | Expression::Placeholder(_)
318            | Expression::NumberedPlaceholder(_)
319            | Expression::NamedPlaceholder(_) => "LITERAL_PLACEHOLDER".hash(hasher),
320
321            Expression::ColumnRef(col_id) => {
322                "COLUMN".hash(hasher);
323                col_id.table_canonical().hash(hasher);
324                col_id.column_canonical().hash(hasher);
325            }
326
327            Expression::PseudoVariable { pseudo_table, column } => {
328                "PSEUDO_VARIABLE".hash(hasher);
329                std::mem::discriminant(pseudo_table).hash(hasher);
330                column.hash(hasher);
331            }
332
333            Expression::BinaryOp { op, left, right } => {
334                "BINARY_OP".hash(hasher);
335                std::mem::discriminant(op).hash(hasher);
336                Self::hash_expression(left, hasher);
337                Self::hash_expression(right, hasher);
338            }
339
340            Expression::UnaryOp { op, expr } => {
341                "UNARY_OP".hash(hasher);
342                std::mem::discriminant(op).hash(hasher);
343                Self::hash_expression(expr, hasher);
344            }
345
346            Expression::Function { name, args, character_unit } => {
347                "FUNCTION".hash(hasher);
348                name.canonical().hash(hasher);
349                for arg in args {
350                    Self::hash_expression(arg, hasher);
351                }
352                if let Some(ref unit) = character_unit {
353                    std::mem::discriminant(unit).hash(hasher);
354                }
355            }
356
357            Expression::AggregateFunction { name, distinct, args, order_by, filter } => {
358                "AGGREGATE".hash(hasher);
359                name.canonical().hash(hasher);
360                distinct.hash(hasher);
361                for arg in args {
362                    Self::hash_expression(arg, hasher);
363                }
364                // Hash order_by clause if present
365                if let Some(items) = order_by {
366                    "ORDER_BY".hash(hasher);
367                    for item in items {
368                        Self::hash_expression(&item.expr, hasher);
369                        std::mem::discriminant(&item.direction).hash(hasher);
370                    }
371                }
372                // Hash filter clause if present
373                if let Some(f) = filter {
374                    "FILTER".hash(hasher);
375                    Self::hash_expression(f, hasher);
376                }
377            }
378
379            Expression::IsNull { expr, negated } => {
380                "IS_NULL".hash(hasher);
381                Self::hash_expression(expr, hasher);
382                negated.hash(hasher);
383            }
384
385            Expression::IsDistinctFrom { left, right, negated } => {
386                "IS_DISTINCT_FROM".hash(hasher);
387                Self::hash_expression(left, hasher);
388                Self::hash_expression(right, hasher);
389                negated.hash(hasher);
390            }
391
392            Expression::IsTruthValue { expr, truth_value, negated } => {
393                "IS_TRUTH_VALUE".hash(hasher);
394                Self::hash_expression(expr, hasher);
395                std::mem::discriminant(truth_value).hash(hasher);
396                negated.hash(hasher);
397            }
398
399            Expression::Wildcard => "WILDCARD".hash(hasher),
400
401            Expression::Case { operand, when_clauses, else_result } => {
402                "CASE".hash(hasher);
403                if let Some(ref op) = operand {
404                    Self::hash_expression(op, hasher);
405                }
406                for when in when_clauses {
407                    for cond in &when.conditions {
408                        Self::hash_expression(cond, hasher);
409                    }
410                    Self::hash_expression(&when.result, hasher);
411                }
412                if let Some(ref else_expr) = else_result {
413                    Self::hash_expression(else_expr, hasher);
414                }
415            }
416
417            Expression::ScalarSubquery(subquery) => {
418                "SCALAR_SUBQUERY".hash(hasher);
419                Self::hash_select(subquery, hasher);
420            }
421
422            Expression::In { expr, subquery, negated } => {
423                "IN_SUBQUERY".hash(hasher);
424                Self::hash_expression(expr, hasher);
425                Self::hash_select(subquery, hasher);
426                negated.hash(hasher);
427            }
428
429            Expression::InList { expr, values, negated } => {
430                "IN_LIST".hash(hasher);
431                Self::hash_expression(expr, hasher);
432                values.len().hash(hasher);
433                for val in values {
434                    Self::hash_expression(val, hasher);
435                }
436                negated.hash(hasher);
437            }
438
439            Expression::Between { expr, low, high, negated, symmetric } => {
440                "BETWEEN".hash(hasher);
441                Self::hash_expression(expr, hasher);
442                Self::hash_expression(low, hasher);
443                Self::hash_expression(high, hasher);
444                negated.hash(hasher);
445                symmetric.hash(hasher);
446            }
447
448            Expression::Cast { expr, data_type } => {
449                "CAST".hash(hasher);
450                Self::hash_expression(expr, hasher);
451                std::mem::discriminant(data_type).hash(hasher);
452            }
453
454            Expression::Position { substring, string, character_unit } => {
455                "POSITION".hash(hasher);
456                Self::hash_expression(substring, hasher);
457                Self::hash_expression(string, hasher);
458                if let Some(ref unit) = character_unit {
459                    std::mem::discriminant(unit).hash(hasher);
460                }
461            }
462
463            Expression::Trim { position, removal_char, string } => {
464                "TRIM".hash(hasher);
465                if let Some(ref pos) = position {
466                    std::mem::discriminant(pos).hash(hasher);
467                }
468                if let Some(ref ch) = removal_char {
469                    Self::hash_expression(ch, hasher);
470                }
471                Self::hash_expression(string, hasher);
472            }
473
474            Expression::Extract { field, expr } => {
475                "EXTRACT".hash(hasher);
476                std::mem::discriminant(field).hash(hasher);
477                Self::hash_expression(expr, hasher);
478            }
479
480            Expression::Like { expr, pattern, negated, .. } => {
481                "LIKE".hash(hasher);
482                Self::hash_expression(expr, hasher);
483                Self::hash_expression(pattern, hasher);
484                negated.hash(hasher);
485            }
486
487            Expression::Glob { expr, pattern, negated, .. } => {
488                "GLOB".hash(hasher);
489                Self::hash_expression(expr, hasher);
490                Self::hash_expression(pattern, hasher);
491                negated.hash(hasher);
492            }
493
494            Expression::Exists { subquery, negated } => {
495                "EXISTS".hash(hasher);
496                Self::hash_select(subquery, hasher);
497                negated.hash(hasher);
498            }
499
500            Expression::QuantifiedComparison { expr, op, quantifier, subquery } => {
501                "QUANTIFIED".hash(hasher);
502                Self::hash_expression(expr, hasher);
503                std::mem::discriminant(op).hash(hasher);
504                std::mem::discriminant(quantifier).hash(hasher);
505                Self::hash_select(subquery, hasher);
506            }
507
508            Expression::CurrentDate => "CURRENT_DATE".hash(hasher),
509
510            Expression::CurrentTime { precision } => {
511                "CURRENT_TIME".hash(hasher);
512                precision.hash(hasher);
513            }
514
515            Expression::CurrentTimestamp { precision } => {
516                "CURRENT_TIMESTAMP".hash(hasher);
517                precision.hash(hasher);
518            }
519
520            Expression::Interval { value, unit, leading_precision, fractional_precision } => {
521                "INTERVAL".hash(hasher);
522                Self::hash_expression(value, hasher);
523                format!("{:?}", unit).hash(hasher);
524                leading_precision.hash(hasher);
525                fractional_precision.hash(hasher);
526            }
527
528            Expression::Default => "DEFAULT".hash(hasher),
529
530            Expression::DuplicateKeyValue { column } => {
531                "DUPLICATE_KEY_VALUE".hash(hasher);
532                column.hash(hasher);
533            }
534
535            Expression::WindowFunction { function, over } => {
536                "WINDOW_FUNCTION".hash(hasher);
537                // Hash function type and arguments
538                match function {
539                    vibesql_ast::WindowFunctionSpec::Aggregate { name, args, filter } => {
540                        "AGGREGATE".hash(hasher);
541                        name.canonical().hash(hasher);
542                        for arg in args {
543                            Self::hash_expression(arg, hasher);
544                        }
545                        if let Some(f) = filter {
546                            "FILTER".hash(hasher);
547                            Self::hash_expression(f, hasher);
548                        }
549                    }
550                    vibesql_ast::WindowFunctionSpec::Ranking { name, args } => {
551                        "RANKING".hash(hasher);
552                        name.canonical().hash(hasher);
553                        for arg in args {
554                            Self::hash_expression(arg, hasher);
555                        }
556                    }
557                    vibesql_ast::WindowFunctionSpec::Value { name, args } => {
558                        "VALUE".hash(hasher);
559                        name.canonical().hash(hasher);
560                        for arg in args {
561                            Self::hash_expression(arg, hasher);
562                        }
563                    }
564                }
565
566                // Hash OVER clause components
567                if let Some(ref partition_by) = over.partition_by {
568                    for expr in partition_by {
569                        Self::hash_expression(expr, hasher);
570                    }
571                }
572                if let Some(ref order_by) = over.order_by {
573                    for item in order_by {
574                        Self::hash_expression(&item.expr, hasher);
575                        std::mem::discriminant(&item.direction).hash(hasher);
576                    }
577                }
578                if let Some(ref frame) = over.frame {
579                    std::mem::discriminant(&frame.unit).hash(hasher);
580                    std::mem::discriminant(&frame.start).hash(hasher);
581                    if let Some(ref end) = frame.end {
582                        std::mem::discriminant(end).hash(hasher);
583                    }
584                }
585            }
586
587            Expression::NextValue { sequence_name } => {
588                "NEXT_VALUE".hash(hasher);
589                sequence_name.hash(hasher);
590            }
591
592            Expression::MatchAgainst { columns, search_modifier, mode } => {
593                "MATCH_AGAINST".hash(hasher);
594                for col in columns {
595                    col.hash(hasher);
596                }
597                Self::hash_expression(search_modifier, hasher);
598                std::mem::discriminant(mode).hash(hasher);
599            }
600
601            Expression::SessionVariable { name } => {
602                "SESSION_VARIABLE".hash(hasher);
603                name.hash(hasher);
604            }
605
606            Expression::Conjunction(children)
607            | Expression::Disjunction(children)
608            | Expression::RowValueConstructor(children) => {
609                for child in children {
610                    Self::hash_expression(child, hasher);
611                }
612            }
613
614            Expression::Collate { expr, collation } => {
615                "COLLATE".hash(hasher);
616                Self::hash_expression(expr, hasher);
617                collation.hash(hasher);
618            }
619        }
620    }
621
622    // ========================================================================
623    // Arena-allocated type hashing
624    // ========================================================================
625
626    /// Hash an arena-allocated SELECT statement structure
627    fn hash_arena_select(select: &ArenaSelectStmt<'_>, hasher: &mut DefaultHasher) {
628        // Hash DISTINCT
629        select.distinct.hash(hasher);
630
631        // Hash select items
632        for item in &select.select_list {
633            match item {
634                ArenaSelectItem::Wildcard { .. } => "WILDCARD".hash(hasher),
635                ArenaSelectItem::QualifiedWildcard { qualifier, .. } => {
636                    "QUALIFIED_WILDCARD".hash(hasher);
637                    qualifier.hash(hasher);
638                }
639                ArenaSelectItem::Expression { expr, alias, .. } => {
640                    Self::hash_arena_expression(expr, hasher);
641                    alias.hash(hasher);
642                }
643            }
644        }
645
646        // Hash FROM clause
647        if let Some(ref from) = select.from {
648            Self::hash_arena_from_clause(from, hasher);
649        }
650
651        // Hash WHERE clause
652        if let Some(ref where_clause) = select.where_clause {
653            Self::hash_arena_expression(where_clause, hasher);
654        }
655
656        // Hash GROUP BY
657        if let Some(ref group_by) = select.group_by {
658            Self::hash_arena_group_by(group_by, hasher);
659        }
660
661        // Hash HAVING
662        if let Some(ref having) = select.having {
663            Self::hash_arena_expression(having, hasher);
664        }
665
666        // Hash ORDER BY
667        if let Some(ref order_by) = select.order_by {
668            for item in order_by {
669                Self::hash_arena_expression(&item.expr, hasher);
670                std::mem::discriminant(&item.direction).hash(hasher);
671            }
672        }
673
674        // Hash LIMIT/OFFSET expressions
675        // Note: We hash the presence of limit/offset but not the expression itself
676        // since arena Expression doesn't implement Hash
677        select.limit.is_some().hash(hasher);
678        select.offset.is_some().hash(hasher);
679    }
680
681    /// Hash an arena-allocated FROM clause structure
682    fn hash_arena_from_clause(from: &ArenaFromClause<'_>, hasher: &mut DefaultHasher) {
683        match from {
684            ArenaFromClause::Table { name, alias, .. } => {
685                "TABLE".hash(hasher);
686                name.hash(hasher);
687                alias.hash(hasher);
688            }
689            ArenaFromClause::Join { left, join_type, right, condition, .. } => {
690                "JOIN".hash(hasher);
691                Self::hash_arena_from_clause(left, hasher);
692                std::mem::discriminant(join_type).hash(hasher);
693                Self::hash_arena_from_clause(right, hasher);
694                if let Some(expr) = condition {
695                    Self::hash_arena_expression(expr, hasher);
696                }
697            }
698            ArenaFromClause::Subquery { query, alias, .. } => {
699                "SUBQUERY".hash(hasher);
700                Self::hash_arena_select(query, hasher);
701                alias.hash(hasher);
702            }
703        }
704    }
705
706    fn hash_arena_group_by(group_by: &ArenaGroupByClause<'_>, hasher: &mut DefaultHasher) {
707        match group_by {
708            ArenaGroupByClause::Simple(exprs) => {
709                "SIMPLE".hash(hasher);
710                for expr in exprs {
711                    Self::hash_arena_expression(expr, hasher);
712                }
713            }
714            ArenaGroupByClause::Rollup(elements) => {
715                "ROLLUP".hash(hasher);
716                Self::hash_arena_grouping_elements(elements, hasher);
717            }
718            ArenaGroupByClause::Cube(elements) => {
719                "CUBE".hash(hasher);
720                Self::hash_arena_grouping_elements(elements, hasher);
721            }
722            ArenaGroupByClause::GroupingSets(sets) => {
723                "GROUPING_SETS".hash(hasher);
724                Self::hash_arena_grouping_sets(sets, hasher);
725            }
726            ArenaGroupByClause::Mixed(items) => {
727                "MIXED".hash(hasher);
728                for item in items {
729                    match item {
730                        ArenaMixedGroupingItem::Simple(expr) => {
731                            "SIMPLE".hash(hasher);
732                            Self::hash_arena_expression(expr, hasher);
733                        }
734                        ArenaMixedGroupingItem::Rollup(elements) => {
735                            "ROLLUP".hash(hasher);
736                            Self::hash_arena_grouping_elements(elements, hasher);
737                        }
738                        ArenaMixedGroupingItem::Cube(elements) => {
739                            "CUBE".hash(hasher);
740                            Self::hash_arena_grouping_elements(elements, hasher);
741                        }
742                        ArenaMixedGroupingItem::GroupingSets(sets) => {
743                            "GROUPING_SETS".hash(hasher);
744                            Self::hash_arena_grouping_sets(sets, hasher);
745                        }
746                    }
747                }
748            }
749        }
750    }
751
752    fn hash_arena_grouping_sets(
753        sets: &bumpalo::collections::Vec<'_, ArenaGroupingSet<'_>>,
754        hasher: &mut DefaultHasher,
755    ) {
756        for set in sets {
757            "SET".hash(hasher);
758            for expr in &set.columns {
759                Self::hash_arena_expression(expr, hasher);
760            }
761        }
762    }
763
764    fn hash_arena_grouping_elements(
765        elements: &bumpalo::collections::Vec<'_, ArenaGroupingElement<'_>>,
766        hasher: &mut DefaultHasher,
767    ) {
768        for element in elements {
769            match element {
770                ArenaGroupingElement::Single(expr) => {
771                    "SINGLE".hash(hasher);
772                    Self::hash_arena_expression(expr, hasher);
773                }
774                ArenaGroupingElement::Composite(exprs) => {
775                    "COMPOSITE".hash(hasher);
776                    for expr in exprs {
777                        Self::hash_arena_expression(expr, hasher);
778                    }
779                }
780            }
781        }
782    }
783
784    /// Hash an arena-allocated expression, replacing literals with a placeholder marker
785    fn hash_arena_expression(expr: &ArenaExpression<'_>, hasher: &mut DefaultHasher) {
786        match expr {
787            // Hot-path inline variants
788            // All literals and placeholders hash to the same value
789            ArenaExpression::Literal(_)
790            | ArenaExpression::Placeholder(_)
791            | ArenaExpression::NumberedPlaceholder(_)
792            | ArenaExpression::NamedPlaceholder(_) => "LITERAL_PLACEHOLDER".hash(hasher),
793
794            ArenaExpression::ColumnRef { table, column, .. } => {
795                "COLUMN".hash(hasher);
796                table.hash(hasher);
797                column.hash(hasher);
798            }
799
800            ArenaExpression::BinaryOp { op, left, right } => {
801                "BINARY_OP".hash(hasher);
802                std::mem::discriminant(op).hash(hasher);
803                Self::hash_arena_expression(left, hasher);
804                Self::hash_arena_expression(right, hasher);
805            }
806
807            ArenaExpression::UnaryOp { op, expr } => {
808                "UNARY_OP".hash(hasher);
809                std::mem::discriminant(op).hash(hasher);
810                Self::hash_arena_expression(expr, hasher);
811            }
812
813            ArenaExpression::IsNull { expr, negated } => {
814                "IS_NULL".hash(hasher);
815                Self::hash_arena_expression(expr, hasher);
816                negated.hash(hasher);
817            }
818
819            ArenaExpression::IsDistinctFrom { left, right, negated } => {
820                "IS_DISTINCT_FROM".hash(hasher);
821                Self::hash_arena_expression(left, hasher);
822                Self::hash_arena_expression(right, hasher);
823                negated.hash(hasher);
824            }
825
826            ArenaExpression::IsTruthValue { expr, truth_value, negated } => {
827                "IS_TRUTH_VALUE".hash(hasher);
828                Self::hash_arena_expression(expr, hasher);
829                std::mem::discriminant(truth_value).hash(hasher);
830                negated.hash(hasher);
831            }
832
833            ArenaExpression::Wildcard => "WILDCARD".hash(hasher),
834
835            ArenaExpression::CurrentDate => "CURRENT_DATE".hash(hasher),
836
837            ArenaExpression::CurrentTime { precision } => {
838                "CURRENT_TIME".hash(hasher);
839                precision.hash(hasher);
840            }
841
842            ArenaExpression::CurrentTimestamp { precision } => {
843                "CURRENT_TIMESTAMP".hash(hasher);
844                precision.hash(hasher);
845            }
846
847            ArenaExpression::Default => "DEFAULT".hash(hasher),
848
849            ArenaExpression::Conjunction(children) | ArenaExpression::Disjunction(children) => {
850                for child in children.iter() {
851                    Self::hash_arena_expression(child, hasher);
852                }
853            }
854
855            // Cold-path extended variants
856            ArenaExpression::Extended(ext) => Self::hash_arena_extended_expr(ext, hasher),
857        }
858    }
859
860    /// Hash an arena-allocated extended expression
861    fn hash_arena_extended_expr(ext: &ArenaExtendedExpr<'_>, hasher: &mut DefaultHasher) {
862        match ext {
863            ArenaExtendedExpr::Function { name, args, character_unit } => {
864                "FUNCTION".hash(hasher);
865                name.hash(hasher);
866                for arg in args {
867                    Self::hash_arena_expression(arg, hasher);
868                }
869                if let Some(ref unit) = character_unit {
870                    std::mem::discriminant(unit).hash(hasher);
871                }
872            }
873
874            ArenaExtendedExpr::AggregateFunction { name, distinct, args } => {
875                "AGGREGATE".hash(hasher);
876                name.hash(hasher);
877                distinct.hash(hasher);
878                for arg in args {
879                    Self::hash_arena_expression(arg, hasher);
880                }
881            }
882
883            ArenaExtendedExpr::Case { operand, when_clauses, else_result } => {
884                "CASE".hash(hasher);
885                if let Some(op) = operand {
886                    Self::hash_arena_expression(op, hasher);
887                }
888                for when in when_clauses {
889                    for cond in &when.conditions {
890                        Self::hash_arena_expression(cond, hasher);
891                    }
892                    Self::hash_arena_expression(&when.result, hasher);
893                }
894                if let Some(else_expr) = else_result {
895                    Self::hash_arena_expression(else_expr, hasher);
896                }
897            }
898
899            ArenaExtendedExpr::ScalarSubquery(subquery) => {
900                "SCALAR_SUBQUERY".hash(hasher);
901                Self::hash_arena_select(subquery, hasher);
902            }
903
904            ArenaExtendedExpr::In { expr, subquery, negated } => {
905                "IN_SUBQUERY".hash(hasher);
906                Self::hash_arena_expression(expr, hasher);
907                Self::hash_arena_select(subquery, hasher);
908                negated.hash(hasher);
909            }
910
911            ArenaExtendedExpr::InList { expr, values, negated } => {
912                "IN_LIST".hash(hasher);
913                Self::hash_arena_expression(expr, hasher);
914                values.len().hash(hasher);
915                for val in values {
916                    Self::hash_arena_expression(val, hasher);
917                }
918                negated.hash(hasher);
919            }
920
921            ArenaExtendedExpr::Between { expr, low, high, negated, symmetric } => {
922                "BETWEEN".hash(hasher);
923                Self::hash_arena_expression(expr, hasher);
924                Self::hash_arena_expression(low, hasher);
925                Self::hash_arena_expression(high, hasher);
926                negated.hash(hasher);
927                symmetric.hash(hasher);
928            }
929
930            ArenaExtendedExpr::Cast { expr, data_type } => {
931                "CAST".hash(hasher);
932                Self::hash_arena_expression(expr, hasher);
933                std::mem::discriminant(data_type).hash(hasher);
934            }
935
936            ArenaExtendedExpr::Position { substring, string, character_unit } => {
937                "POSITION".hash(hasher);
938                Self::hash_arena_expression(substring, hasher);
939                Self::hash_arena_expression(string, hasher);
940                if let Some(unit) = character_unit {
941                    std::mem::discriminant(unit).hash(hasher);
942                }
943            }
944
945            ArenaExtendedExpr::Trim { position, removal_char, string } => {
946                "TRIM".hash(hasher);
947                if let Some(pos) = position {
948                    std::mem::discriminant(pos).hash(hasher);
949                }
950                if let Some(ch) = removal_char {
951                    Self::hash_arena_expression(ch, hasher);
952                }
953                Self::hash_arena_expression(string, hasher);
954            }
955
956            ArenaExtendedExpr::Extract { field, expr } => {
957                "EXTRACT".hash(hasher);
958                std::mem::discriminant(field).hash(hasher);
959                Self::hash_arena_expression(expr, hasher);
960            }
961
962            ArenaExtendedExpr::Like { expr, pattern, negated, .. } => {
963                "LIKE".hash(hasher);
964                Self::hash_arena_expression(expr, hasher);
965                Self::hash_arena_expression(pattern, hasher);
966                negated.hash(hasher);
967            }
968
969            ArenaExtendedExpr::Glob { expr, pattern, negated, .. } => {
970                "GLOB".hash(hasher);
971                Self::hash_arena_expression(expr, hasher);
972                Self::hash_arena_expression(pattern, hasher);
973                negated.hash(hasher);
974            }
975
976            ArenaExtendedExpr::Exists { subquery, negated } => {
977                "EXISTS".hash(hasher);
978                Self::hash_arena_select(subquery, hasher);
979                negated.hash(hasher);
980            }
981
982            ArenaExtendedExpr::QuantifiedComparison { expr, op, quantifier, subquery } => {
983                "QUANTIFIED".hash(hasher);
984                Self::hash_arena_expression(expr, hasher);
985                std::mem::discriminant(op).hash(hasher);
986                std::mem::discriminant(quantifier).hash(hasher);
987                Self::hash_arena_select(subquery, hasher);
988            }
989
990            ArenaExtendedExpr::Interval {
991                value,
992                unit,
993                leading_precision,
994                fractional_precision,
995            } => {
996                "INTERVAL".hash(hasher);
997                Self::hash_arena_expression(value, hasher);
998                format!("{:?}", unit).hash(hasher);
999                leading_precision.hash(hasher);
1000                fractional_precision.hash(hasher);
1001            }
1002
1003            ArenaExtendedExpr::DuplicateKeyValue { column } => {
1004                "DUPLICATE_KEY_VALUE".hash(hasher);
1005                column.hash(hasher);
1006            }
1007
1008            ArenaExtendedExpr::WindowFunction { function, over } => {
1009                "WINDOW_FUNCTION".hash(hasher);
1010                match function {
1011                    ArenaWindowFunctionSpec::Aggregate { name, args } => {
1012                        "AGGREGATE".hash(hasher);
1013                        name.hash(hasher);
1014                        for arg in args {
1015                            Self::hash_arena_expression(arg, hasher);
1016                        }
1017                    }
1018                    ArenaWindowFunctionSpec::Ranking { name, args } => {
1019                        "RANKING".hash(hasher);
1020                        name.hash(hasher);
1021                        for arg in args {
1022                            Self::hash_arena_expression(arg, hasher);
1023                        }
1024                    }
1025                    ArenaWindowFunctionSpec::Value { name, args } => {
1026                        "VALUE".hash(hasher);
1027                        name.hash(hasher);
1028                        for arg in args {
1029                            Self::hash_arena_expression(arg, hasher);
1030                        }
1031                    }
1032                }
1033
1034                if let Some(ref partition_by) = over.partition_by {
1035                    for expr in partition_by {
1036                        Self::hash_arena_expression(expr, hasher);
1037                    }
1038                }
1039                if let Some(ref order_by) = over.order_by {
1040                    for item in order_by {
1041                        Self::hash_arena_expression(&item.expr, hasher);
1042                        std::mem::discriminant(&item.direction).hash(hasher);
1043                    }
1044                }
1045                if let Some(ref frame) = over.frame {
1046                    std::mem::discriminant(&frame.unit).hash(hasher);
1047                    std::mem::discriminant(&frame.start).hash(hasher);
1048                    if let Some(ref end) = frame.end {
1049                        std::mem::discriminant(end).hash(hasher);
1050                    }
1051                }
1052            }
1053
1054            ArenaExtendedExpr::NextValue { sequence_name } => {
1055                "NEXT_VALUE".hash(hasher);
1056                sequence_name.hash(hasher);
1057            }
1058
1059            ArenaExtendedExpr::MatchAgainst { columns, search_modifier, mode } => {
1060                "MATCH_AGAINST".hash(hasher);
1061                for col in columns {
1062                    col.hash(hasher);
1063                }
1064                Self::hash_arena_expression(search_modifier, hasher);
1065                std::mem::discriminant(mode).hash(hasher);
1066            }
1067
1068            ArenaExtendedExpr::PseudoVariable { pseudo_table, column } => {
1069                "PSEUDO_VARIABLE".hash(hasher);
1070                std::mem::discriminant(pseudo_table).hash(hasher);
1071                column.hash(hasher);
1072            }
1073
1074            ArenaExtendedExpr::SessionVariable { name } => {
1075                "SESSION_VARIABLE".hash(hasher);
1076                name.hash(hasher);
1077            }
1078
1079            ArenaExtendedExpr::RowValueConstructor(children) => {
1080                "ROW_VALUE_CONSTRUCTOR".hash(hasher);
1081                for child in children {
1082                    Self::hash_arena_expression(child, hasher);
1083                }
1084            }
1085        }
1086    }
1087}
1088
1089#[cfg(test)]
1090mod tests {
1091    use super::*;
1092
1093    #[test]
1094    fn test_same_query_same_signature() {
1095        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1096        let sig2 = QuerySignature::from_sql("SELECT * FROM users");
1097        assert_eq!(sig1, sig2);
1098    }
1099
1100    #[test]
1101    fn test_whitespace_normalization() {
1102        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1103        let sig2 = QuerySignature::from_sql("SELECT  *  FROM  users");
1104        assert_eq!(sig1, sig2);
1105    }
1106
1107    #[test]
1108    fn test_case_insensitive() {
1109        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1110        let sig2 = QuerySignature::from_sql("select * from users");
1111        assert_eq!(sig1, sig2);
1112    }
1113
1114    #[test]
1115    fn test_different_queries_different_signature() {
1116        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1117        let sig2 = QuerySignature::from_sql("SELECT * FROM orders");
1118        assert_ne!(sig1, sig2);
1119    }
1120
1121    #[test]
1122    fn test_different_literals_different_signature_string_based() {
1123        // Different literals create different signatures with string-based hashing
1124        let sig1 = QuerySignature::from_sql("SELECT col0 FROM tab WHERE col1 > 5");
1125        let sig2 = QuerySignature::from_sql("SELECT col0 FROM tab WHERE col1 > 10");
1126        // String-based hashing includes literals in the signature
1127        assert_ne!(sig1, sig2);
1128    }
1129
1130    #[test]
1131    fn test_ast_based_same_structure_different_literals() {
1132        use vibesql_ast::{
1133            BinaryOperator, Expression, FromClause, SelectItem, SelectStmt, Statement,
1134        };
1135        use vibesql_types::SqlValue;
1136
1137        // SELECT col0 FROM tab WHERE col1 > 5
1138        let stmt1 = Statement::Select(Box::new(SelectStmt {
1139            with_clause: None,
1140            distinct: false,
1141            select_list: vec![SelectItem::Expression {
1142                expr: Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple("col0", false)),
1143                alias: None,
1144                source_text: None,
1145            }],
1146            into_table: None,
1147            into_variables: None,
1148            from: Some(FromClause::Table {
1149                name: "tab".to_string(),
1150                alias: None,
1151                column_aliases: None,
1152                quoted: false,
1153            }),
1154            where_clause: Some(Expression::BinaryOp {
1155                op: BinaryOperator::GreaterThan,
1156                left: Box::new(Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
1157                    "col1", false,
1158                ))),
1159                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1160            }),
1161            group_by: None,
1162            having: None,
1163            order_by: None,
1164            limit: None,
1165            offset: None,
1166            set_operation: None,
1167            values: None,
1168        }));
1169
1170        // SELECT col0 FROM tab WHERE col1 > 10 (different literal)
1171        let stmt2 = Statement::Select(Box::new(SelectStmt {
1172            with_clause: None,
1173            distinct: false,
1174            select_list: vec![SelectItem::Expression {
1175                expr: Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple("col0", false)),
1176                alias: None,
1177                source_text: None,
1178            }],
1179            into_table: None,
1180            into_variables: None,
1181            from: Some(FromClause::Table {
1182                name: "tab".to_string(),
1183                alias: None,
1184                column_aliases: None,
1185                quoted: false,
1186            }),
1187            where_clause: Some(Expression::BinaryOp {
1188                op: BinaryOperator::GreaterThan,
1189                left: Box::new(Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
1190                    "col1", false,
1191                ))),
1192                right: Box::new(Expression::Literal(SqlValue::Integer(10))),
1193            }),
1194            group_by: None,
1195            having: None,
1196            order_by: None,
1197            limit: None,
1198            offset: None,
1199            set_operation: None,
1200            values: None,
1201        }));
1202
1203        let sig1 = QuerySignature::from_ast(&stmt1);
1204        let sig2 = QuerySignature::from_ast(&stmt2);
1205
1206        // AST-based signatures should be the same despite different literals
1207        assert_eq!(sig1, sig2);
1208    }
1209
1210    #[test]
1211    fn test_ast_based_different_structure() {
1212        use vibesql_ast::{
1213            BinaryOperator, Expression, FromClause, SelectItem, SelectStmt, Statement,
1214        };
1215        use vibesql_types::SqlValue;
1216
1217        // SELECT col0 FROM tab WHERE col1 > 5
1218        let stmt1 = Statement::Select(Box::new(SelectStmt {
1219            with_clause: None,
1220            distinct: false,
1221            select_list: vec![SelectItem::Expression {
1222                expr: Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple("col0", false)),
1223                alias: None,
1224                source_text: None,
1225            }],
1226            into_table: None,
1227            into_variables: None,
1228            from: Some(FromClause::Table {
1229                name: "tab".to_string(),
1230                alias: None,
1231                column_aliases: None,
1232                quoted: false,
1233            }),
1234            where_clause: Some(Expression::BinaryOp {
1235                op: BinaryOperator::GreaterThan,
1236                left: Box::new(Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
1237                    "col1", false,
1238                ))),
1239                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1240            }),
1241            group_by: None,
1242            having: None,
1243            order_by: None,
1244            limit: None,
1245            offset: None,
1246            set_operation: None,
1247            values: None,
1248        }));
1249
1250        // SELECT col0 FROM tab WHERE col1 < 5 (different operator)
1251        let stmt2 = Statement::Select(Box::new(SelectStmt {
1252            with_clause: None,
1253            distinct: false,
1254            select_list: vec![SelectItem::Expression {
1255                expr: Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple("col0", false)),
1256                alias: None,
1257                source_text: None,
1258            }],
1259            into_table: None,
1260            into_variables: None,
1261            from: Some(FromClause::Table {
1262                name: "tab".to_string(),
1263                alias: None,
1264                column_aliases: None,
1265                quoted: false,
1266            }),
1267            where_clause: Some(Expression::BinaryOp {
1268                op: BinaryOperator::LessThan, // Different operator!
1269                left: Box::new(Expression::ColumnRef(vibesql_ast::ColumnIdentifier::simple(
1270                    "col1", false,
1271                ))),
1272                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1273            }),
1274            group_by: None,
1275            having: None,
1276            order_by: None,
1277            limit: None,
1278            offset: None,
1279            set_operation: None,
1280            values: None,
1281        }));
1282
1283        let sig1 = QuerySignature::from_ast(&stmt1);
1284        let sig2 = QuerySignature::from_ast(&stmt2);
1285
1286        // Different structure should produce different signatures
1287        assert_ne!(sig1, sig2);
1288    }
1289}