vibesql_executor/cache/
query_signature.rs

1//! Query signature generation for cache keys
2//!
3//! Generates deterministic cache keys from SQL queries by normalizing the AST
4//! and creating a hash. Queries with identical structure (different literals)
5//! will have the same signature.
6
7use std::{
8    collections::hash_map::DefaultHasher,
9    hash::{Hash, Hasher},
10};
11
12use vibesql_ast::{Expression, Statement};
13use vibesql_ast::arena::{
14    Expression as ArenaExpression, ExtendedExpr as ArenaExtendedExpr,
15    FromClause as ArenaFromClause, GroupByClause as ArenaGroupByClause,
16    GroupingElement as ArenaGroupingElement, GroupingSet as ArenaGroupingSet,
17    MixedGroupingItem as ArenaMixedGroupingItem, SelectItem as ArenaSelectItem,
18    SelectStmt as ArenaSelectStmt, WindowFunctionSpec as ArenaWindowFunctionSpec,
19};
20
21/// Unique identifier for a query based on its structure
22#[derive(Clone, Debug, Eq, PartialEq, Hash)]
23pub struct QuerySignature {
24    hash: u64,
25}
26
27impl QuerySignature {
28    /// Create a signature from SQL text (legacy string-based approach)
29    pub fn from_sql(sql: &str) -> Self {
30        let normalized = Self::normalize(sql);
31        let mut hasher = DefaultHasher::new();
32        normalized.hash(&mut hasher);
33        let hash = hasher.finish();
34        Self { hash }
35    }
36
37    /// Create a signature from parsed AST, ignoring literal values
38    /// This allows queries with different literals but identical structure to share cached plans
39    pub fn from_ast(stmt: &Statement) -> Self {
40        let mut hasher = DefaultHasher::new();
41        Self::hash_statement(stmt, &mut hasher);
42        let hash = hasher.finish();
43        Self { hash }
44    }
45
46    /// Create a signature from arena-allocated SelectStmt, ignoring literal values
47    pub fn from_arena_select(select: &ArenaSelectStmt<'_>) -> Self {
48        let mut hasher = DefaultHasher::new();
49        "SELECT".hash(&mut hasher);
50        Self::hash_arena_select(select, &mut hasher);
51        let hash = hasher.finish();
52        Self { hash }
53    }
54
55    /// Get the underlying hash
56    pub fn hash(&self) -> u64 {
57        self.hash
58    }
59
60    /// Normalize SQL: trim and collapse whitespace
61    fn normalize(sql: &str) -> String {
62        sql.split_whitespace().collect::<Vec<_>>().join(" ").to_lowercase()
63    }
64
65    /// Hash a statement, replacing literals with a placeholder marker
66    fn hash_statement(stmt: &Statement, hasher: &mut DefaultHasher) {
67        match stmt {
68            Statement::Select(select) => {
69                "SELECT".hash(hasher);
70                Self::hash_select(select, hasher);
71            }
72            Statement::Insert(insert) => {
73                "INSERT".hash(hasher);
74                insert.table_name.hash(hasher);
75                for col in &insert.columns {
76                    col.hash(hasher);
77                }
78                // Hash the insert source structure without literals
79                match &insert.source {
80                    vibesql_ast::InsertSource::Values(rows) => {
81                        "VALUES".hash(hasher);
82                        rows.len().hash(hasher);
83                        for row in rows {
84                            row.len().hash(hasher);
85                            for expr in row {
86                                Self::hash_expression(expr, hasher);
87                            }
88                        }
89                    }
90                    vibesql_ast::InsertSource::Select(select) => {
91                        "SELECT".hash(hasher);
92                        Self::hash_select(select, hasher);
93                    }
94                }
95            }
96            Statement::Update(update) => {
97                "UPDATE".hash(hasher);
98                update.table_name.hash(hasher);
99                for assignment in &update.assignments {
100                    assignment.column.hash(hasher);
101                    Self::hash_expression(&assignment.value, hasher);
102                }
103                if let Some(ref where_clause) = update.where_clause {
104                    match where_clause {
105                        vibesql_ast::WhereClause::Condition(expr) => {
106                            Self::hash_expression(expr, hasher);
107                        }
108                        vibesql_ast::WhereClause::CurrentOf(cursor) => {
109                            "CURRENT_OF".hash(hasher);
110                            cursor.hash(hasher);
111                        }
112                    }
113                }
114            }
115            Statement::Delete(delete) => {
116                "DELETE".hash(hasher);
117                delete.table_name.hash(hasher);
118                if let Some(ref where_clause) = delete.where_clause {
119                    match where_clause {
120                        vibesql_ast::WhereClause::Condition(expr) => {
121                            Self::hash_expression(expr, hasher);
122                        }
123                        vibesql_ast::WhereClause::CurrentOf(cursor) => {
124                            "CURRENT_OF".hash(hasher);
125                            cursor.hash(hasher);
126                        }
127                    }
128                }
129            }
130            // For other statement types, fall back to discriminant hashing
131            _ => {
132                std::mem::discriminant(stmt).hash(hasher);
133            }
134        }
135    }
136
137    /// Hash a SELECT statement structure
138    fn hash_select(select: &vibesql_ast::SelectStmt, hasher: &mut DefaultHasher) {
139        // Hash DISTINCT
140        select.distinct.hash(hasher);
141
142        // Hash select items
143        for item in &select.select_list {
144            match item {
145                vibesql_ast::SelectItem::Wildcard { .. } => "WILDCARD".hash(hasher),
146                vibesql_ast::SelectItem::QualifiedWildcard { qualifier, .. } => {
147                    "QUALIFIED_WILDCARD".hash(hasher);
148                    qualifier.hash(hasher);
149                }
150                vibesql_ast::SelectItem::Expression { expr, alias } => {
151                    Self::hash_expression(expr, hasher);
152                    alias.hash(hasher);
153                }
154            }
155        }
156
157        // Hash FROM clause
158        if let Some(ref from) = select.from {
159            Self::hash_from_clause(from, hasher);
160        }
161
162        // Hash WHERE clause
163        if let Some(ref where_clause) = select.where_clause {
164            Self::hash_expression(where_clause, hasher);
165        }
166
167        // Hash GROUP BY
168        if let Some(ref group_by) = select.group_by {
169            Self::hash_group_by(group_by, hasher);
170        }
171
172        // Hash HAVING
173        if let Some(ref having) = select.having {
174            Self::hash_expression(having, hasher);
175        }
176
177        // Hash ORDER BY
178        if let Some(ref order_by) = select.order_by {
179            for item in order_by {
180                Self::hash_expression(&item.expr, hasher);
181                std::mem::discriminant(&item.direction).hash(hasher);
182            }
183        }
184
185        // Hash LIMIT/OFFSET (these are often literals, but we treat them as part of structure)
186        select.limit.hash(hasher);
187        select.offset.hash(hasher);
188    }
189
190    /// Hash a FROM clause structure
191    fn hash_from_clause(from: &vibesql_ast::FromClause, hasher: &mut DefaultHasher) {
192        match from {
193            vibesql_ast::FromClause::Table { name, alias, .. } => {
194                "TABLE".hash(hasher);
195                name.hash(hasher);
196                alias.hash(hasher);
197            }
198            vibesql_ast::FromClause::Join { left, join_type, right, condition, .. } => {
199                "JOIN".hash(hasher);
200                Self::hash_from_clause(left, hasher);
201                std::mem::discriminant(join_type).hash(hasher);
202                Self::hash_from_clause(right, hasher);
203                if let Some(expr) = condition {
204                    Self::hash_expression(expr, hasher);
205                }
206            }
207            vibesql_ast::FromClause::Subquery { query, alias, .. } => {
208                "SUBQUERY".hash(hasher);
209                Self::hash_select(query, hasher);
210                alias.hash(hasher);
211            }
212        }
213    }
214
215    fn hash_group_by(group_by: &vibesql_ast::GroupByClause, hasher: &mut DefaultHasher) {
216        match group_by {
217            vibesql_ast::GroupByClause::Simple(exprs) => {
218                "SIMPLE".hash(hasher);
219                for expr in exprs {
220                    Self::hash_expression(expr, hasher);
221                }
222            }
223            vibesql_ast::GroupByClause::Rollup(elements) => {
224                "ROLLUP".hash(hasher);
225                Self::hash_grouping_elements(elements, hasher);
226            }
227            vibesql_ast::GroupByClause::Cube(elements) => {
228                "CUBE".hash(hasher);
229                Self::hash_grouping_elements(elements, hasher);
230            }
231            vibesql_ast::GroupByClause::GroupingSets(sets) => {
232                "GROUPING_SETS".hash(hasher);
233                Self::hash_grouping_sets(sets, hasher);
234            }
235            vibesql_ast::GroupByClause::Mixed(items) => {
236                "MIXED".hash(hasher);
237                for item in items {
238                    match item {
239                        vibesql_ast::MixedGroupingItem::Simple(expr) => {
240                            "SIMPLE".hash(hasher);
241                            Self::hash_expression(expr, hasher);
242                        }
243                        vibesql_ast::MixedGroupingItem::Rollup(elements) => {
244                            "ROLLUP".hash(hasher);
245                            Self::hash_grouping_elements(elements, hasher);
246                        }
247                        vibesql_ast::MixedGroupingItem::Cube(elements) => {
248                            "CUBE".hash(hasher);
249                            Self::hash_grouping_elements(elements, hasher);
250                        }
251                        vibesql_ast::MixedGroupingItem::GroupingSets(sets) => {
252                            "GROUPING_SETS".hash(hasher);
253                            Self::hash_grouping_sets(sets, hasher);
254                        }
255                    }
256                }
257            }
258        }
259    }
260
261    fn hash_grouping_sets(sets: &[vibesql_ast::GroupingSet], hasher: &mut DefaultHasher) {
262        for set in sets {
263            "SET".hash(hasher);
264            for expr in &set.columns {
265                Self::hash_expression(expr, hasher);
266            }
267        }
268    }
269
270    fn hash_grouping_elements(
271        elements: &[vibesql_ast::GroupingElement],
272        hasher: &mut DefaultHasher,
273    ) {
274        for element in elements {
275            match element {
276                vibesql_ast::GroupingElement::Single(expr) => {
277                    "SINGLE".hash(hasher);
278                    Self::hash_expression(expr, hasher);
279                }
280                vibesql_ast::GroupingElement::Composite(exprs) => {
281                    "COMPOSITE".hash(hasher);
282                    for expr in exprs {
283                        Self::hash_expression(expr, hasher);
284                    }
285                }
286            }
287        }
288    }
289
290    /// Hash an expression, replacing literals with a placeholder marker
291    fn hash_expression(expr: &Expression, hasher: &mut DefaultHasher) {
292        match expr {
293            // Key difference: All literals and placeholders hash to the same value
294            // This allows parameterized queries to match with literal values
295            Expression::Literal(_)
296            | Expression::Placeholder(_)
297            | Expression::NumberedPlaceholder(_)
298            | Expression::NamedPlaceholder(_) => {
299                "LITERAL_PLACEHOLDER".hash(hasher)
300            }
301
302            Expression::ColumnRef { table, column } => {
303                "COLUMN".hash(hasher);
304                table.hash(hasher);
305                column.hash(hasher);
306            }
307
308            Expression::PseudoVariable { pseudo_table, column } => {
309                "PSEUDO_VARIABLE".hash(hasher);
310                std::mem::discriminant(pseudo_table).hash(hasher);
311                column.hash(hasher);
312            }
313
314            Expression::BinaryOp { op, left, right } => {
315                "BINARY_OP".hash(hasher);
316                std::mem::discriminant(op).hash(hasher);
317                Self::hash_expression(left, hasher);
318                Self::hash_expression(right, hasher);
319            }
320
321            Expression::UnaryOp { op, expr } => {
322                "UNARY_OP".hash(hasher);
323                std::mem::discriminant(op).hash(hasher);
324                Self::hash_expression(expr, hasher);
325            }
326
327            Expression::Function { name, args, character_unit } => {
328                "FUNCTION".hash(hasher);
329                name.to_lowercase().hash(hasher);
330                for arg in args {
331                    Self::hash_expression(arg, hasher);
332                }
333                if let Some(ref unit) = character_unit {
334                    std::mem::discriminant(unit).hash(hasher);
335                }
336            }
337
338            Expression::AggregateFunction { name, distinct, args } => {
339                "AGGREGATE".hash(hasher);
340                name.to_lowercase().hash(hasher);
341                distinct.hash(hasher);
342                for arg in args {
343                    Self::hash_expression(arg, hasher);
344                }
345            }
346
347            Expression::IsNull { expr, negated } => {
348                "IS_NULL".hash(hasher);
349                Self::hash_expression(expr, hasher);
350                negated.hash(hasher);
351            }
352
353            Expression::Wildcard => "WILDCARD".hash(hasher),
354
355            Expression::Case { operand, when_clauses, else_result } => {
356                "CASE".hash(hasher);
357                if let Some(ref op) = operand {
358                    Self::hash_expression(op, hasher);
359                }
360                for when in when_clauses {
361                    for cond in &when.conditions {
362                        Self::hash_expression(cond, hasher);
363                    }
364                    Self::hash_expression(&when.result, hasher);
365                }
366                if let Some(ref else_expr) = else_result {
367                    Self::hash_expression(else_expr, hasher);
368                }
369            }
370
371            Expression::ScalarSubquery(subquery) => {
372                "SCALAR_SUBQUERY".hash(hasher);
373                Self::hash_select(subquery, hasher);
374            }
375
376            Expression::In { expr, subquery, negated } => {
377                "IN_SUBQUERY".hash(hasher);
378                Self::hash_expression(expr, hasher);
379                Self::hash_select(subquery, hasher);
380                negated.hash(hasher);
381            }
382
383            Expression::InList { expr, values, negated } => {
384                "IN_LIST".hash(hasher);
385                Self::hash_expression(expr, hasher);
386                values.len().hash(hasher);
387                for val in values {
388                    Self::hash_expression(val, hasher);
389                }
390                negated.hash(hasher);
391            }
392
393            Expression::Between { expr, low, high, negated, symmetric } => {
394                "BETWEEN".hash(hasher);
395                Self::hash_expression(expr, hasher);
396                Self::hash_expression(low, hasher);
397                Self::hash_expression(high, hasher);
398                negated.hash(hasher);
399                symmetric.hash(hasher);
400            }
401
402            Expression::Cast { expr, data_type } => {
403                "CAST".hash(hasher);
404                Self::hash_expression(expr, hasher);
405                std::mem::discriminant(data_type).hash(hasher);
406            }
407
408            Expression::Position { substring, string, character_unit } => {
409                "POSITION".hash(hasher);
410                Self::hash_expression(substring, hasher);
411                Self::hash_expression(string, hasher);
412                if let Some(ref unit) = character_unit {
413                    std::mem::discriminant(unit).hash(hasher);
414                }
415            }
416
417            Expression::Trim { position, removal_char, string } => {
418                "TRIM".hash(hasher);
419                if let Some(ref pos) = position {
420                    std::mem::discriminant(pos).hash(hasher);
421                }
422                if let Some(ref ch) = removal_char {
423                    Self::hash_expression(ch, hasher);
424                }
425                Self::hash_expression(string, hasher);
426            }
427
428            Expression::Extract { field, expr } => {
429                "EXTRACT".hash(hasher);
430                std::mem::discriminant(field).hash(hasher);
431                Self::hash_expression(expr, hasher);
432            }
433
434            Expression::Like { expr, pattern, negated } => {
435                "LIKE".hash(hasher);
436                Self::hash_expression(expr, hasher);
437                Self::hash_expression(pattern, hasher);
438                negated.hash(hasher);
439            }
440
441            Expression::Exists { subquery, negated } => {
442                "EXISTS".hash(hasher);
443                Self::hash_select(subquery, hasher);
444                negated.hash(hasher);
445            }
446
447            Expression::QuantifiedComparison { expr, op, quantifier, subquery } => {
448                "QUANTIFIED".hash(hasher);
449                Self::hash_expression(expr, hasher);
450                std::mem::discriminant(op).hash(hasher);
451                std::mem::discriminant(quantifier).hash(hasher);
452                Self::hash_select(subquery, hasher);
453            }
454
455            Expression::CurrentDate => "CURRENT_DATE".hash(hasher),
456
457            Expression::CurrentTime { precision } => {
458                "CURRENT_TIME".hash(hasher);
459                precision.hash(hasher);
460            }
461
462            Expression::CurrentTimestamp { precision } => {
463                "CURRENT_TIMESTAMP".hash(hasher);
464                precision.hash(hasher);
465            }
466
467            Expression::Interval {
468                value,
469                unit,
470                leading_precision,
471                fractional_precision,
472            } => {
473                "INTERVAL".hash(hasher);
474                Self::hash_expression(value, hasher);
475                format!("{:?}", unit).hash(hasher);
476                leading_precision.hash(hasher);
477                fractional_precision.hash(hasher);
478            }
479
480            Expression::Default => "DEFAULT".hash(hasher),
481
482            Expression::DuplicateKeyValue { column } => {
483                "DUPLICATE_KEY_VALUE".hash(hasher);
484                column.hash(hasher);
485            }
486
487            Expression::WindowFunction { function, over } => {
488                "WINDOW_FUNCTION".hash(hasher);
489                // Hash function type and arguments
490                match function {
491                    vibesql_ast::WindowFunctionSpec::Aggregate { name, args } => {
492                        "AGGREGATE".hash(hasher);
493                        name.to_lowercase().hash(hasher);
494                        for arg in args {
495                            Self::hash_expression(arg, hasher);
496                        }
497                    }
498                    vibesql_ast::WindowFunctionSpec::Ranking { name, args } => {
499                        "RANKING".hash(hasher);
500                        name.to_lowercase().hash(hasher);
501                        for arg in args {
502                            Self::hash_expression(arg, hasher);
503                        }
504                    }
505                    vibesql_ast::WindowFunctionSpec::Value { name, args } => {
506                        "VALUE".hash(hasher);
507                        name.to_lowercase().hash(hasher);
508                        for arg in args {
509                            Self::hash_expression(arg, hasher);
510                        }
511                    }
512                }
513
514                // Hash OVER clause components
515                if let Some(ref partition_by) = over.partition_by {
516                    for expr in partition_by {
517                        Self::hash_expression(expr, hasher);
518                    }
519                }
520                if let Some(ref order_by) = over.order_by {
521                    for item in order_by {
522                        Self::hash_expression(&item.expr, hasher);
523                        std::mem::discriminant(&item.direction).hash(hasher);
524                    }
525                }
526                if let Some(ref frame) = over.frame {
527                    std::mem::discriminant(&frame.unit).hash(hasher);
528                    std::mem::discriminant(&frame.start).hash(hasher);
529                    if let Some(ref end) = frame.end {
530                        std::mem::discriminant(end).hash(hasher);
531                    }
532                }
533            }
534
535            Expression::NextValue { sequence_name } => {
536                "NEXT_VALUE".hash(hasher);
537                sequence_name.hash(hasher);
538            }
539
540            Expression::MatchAgainst { columns, search_modifier, mode } => {
541                "MATCH_AGAINST".hash(hasher);
542                for col in columns {
543                    col.hash(hasher);
544                }
545                Self::hash_expression(search_modifier, hasher);
546                std::mem::discriminant(mode).hash(hasher);
547            }
548
549            Expression::SessionVariable { name } => {
550                "SESSION_VARIABLE".hash(hasher);
551                name.hash(hasher);
552            }
553
554            Expression::Conjunction(children) | Expression::Disjunction(children) => {
555                for child in children {
556                    Self::hash_expression(child, hasher);
557                }
558            }
559        }
560    }
561
562    // ========================================================================
563    // Arena-allocated type hashing
564    // ========================================================================
565
566    /// Hash an arena-allocated SELECT statement structure
567    fn hash_arena_select(select: &ArenaSelectStmt<'_>, hasher: &mut DefaultHasher) {
568        // Hash DISTINCT
569        select.distinct.hash(hasher);
570
571        // Hash select items
572        for item in &select.select_list {
573            match item {
574                ArenaSelectItem::Wildcard { .. } => "WILDCARD".hash(hasher),
575                ArenaSelectItem::QualifiedWildcard { qualifier, .. } => {
576                    "QUALIFIED_WILDCARD".hash(hasher);
577                    qualifier.hash(hasher);
578                }
579                ArenaSelectItem::Expression { expr, alias } => {
580                    Self::hash_arena_expression(expr, hasher);
581                    alias.hash(hasher);
582                }
583            }
584        }
585
586        // Hash FROM clause
587        if let Some(ref from) = select.from {
588            Self::hash_arena_from_clause(from, hasher);
589        }
590
591        // Hash WHERE clause
592        if let Some(ref where_clause) = select.where_clause {
593            Self::hash_arena_expression(where_clause, hasher);
594        }
595
596        // Hash GROUP BY
597        if let Some(ref group_by) = select.group_by {
598            Self::hash_arena_group_by(group_by, hasher);
599        }
600
601        // Hash HAVING
602        if let Some(ref having) = select.having {
603            Self::hash_arena_expression(having, hasher);
604        }
605
606        // Hash ORDER BY
607        if let Some(ref order_by) = select.order_by {
608            for item in order_by {
609                Self::hash_arena_expression(&item.expr, hasher);
610                std::mem::discriminant(&item.direction).hash(hasher);
611            }
612        }
613
614        // Hash LIMIT/OFFSET
615        select.limit.hash(hasher);
616        select.offset.hash(hasher);
617    }
618
619    /// Hash an arena-allocated FROM clause structure
620    fn hash_arena_from_clause(from: &ArenaFromClause<'_>, hasher: &mut DefaultHasher) {
621        match from {
622            ArenaFromClause::Table { name, alias, .. } => {
623                "TABLE".hash(hasher);
624                name.hash(hasher);
625                alias.hash(hasher);
626            }
627            ArenaFromClause::Join {
628                left,
629                join_type,
630                right,
631                condition,
632                ..
633            } => {
634                "JOIN".hash(hasher);
635                Self::hash_arena_from_clause(left, hasher);
636                std::mem::discriminant(join_type).hash(hasher);
637                Self::hash_arena_from_clause(right, hasher);
638                if let Some(expr) = condition {
639                    Self::hash_arena_expression(expr, hasher);
640                }
641            }
642            ArenaFromClause::Subquery { query, alias, .. } => {
643                "SUBQUERY".hash(hasher);
644                Self::hash_arena_select(query, hasher);
645                alias.hash(hasher);
646            }
647        }
648    }
649
650    fn hash_arena_group_by(group_by: &ArenaGroupByClause<'_>, hasher: &mut DefaultHasher) {
651        match group_by {
652            ArenaGroupByClause::Simple(exprs) => {
653                "SIMPLE".hash(hasher);
654                for expr in exprs {
655                    Self::hash_arena_expression(expr, hasher);
656                }
657            }
658            ArenaGroupByClause::Rollup(elements) => {
659                "ROLLUP".hash(hasher);
660                Self::hash_arena_grouping_elements(elements, hasher);
661            }
662            ArenaGroupByClause::Cube(elements) => {
663                "CUBE".hash(hasher);
664                Self::hash_arena_grouping_elements(elements, hasher);
665            }
666            ArenaGroupByClause::GroupingSets(sets) => {
667                "GROUPING_SETS".hash(hasher);
668                Self::hash_arena_grouping_sets(sets, hasher);
669            }
670            ArenaGroupByClause::Mixed(items) => {
671                "MIXED".hash(hasher);
672                for item in items {
673                    match item {
674                        ArenaMixedGroupingItem::Simple(expr) => {
675                            "SIMPLE".hash(hasher);
676                            Self::hash_arena_expression(expr, hasher);
677                        }
678                        ArenaMixedGroupingItem::Rollup(elements) => {
679                            "ROLLUP".hash(hasher);
680                            Self::hash_arena_grouping_elements(elements, hasher);
681                        }
682                        ArenaMixedGroupingItem::Cube(elements) => {
683                            "CUBE".hash(hasher);
684                            Self::hash_arena_grouping_elements(elements, hasher);
685                        }
686                        ArenaMixedGroupingItem::GroupingSets(sets) => {
687                            "GROUPING_SETS".hash(hasher);
688                            Self::hash_arena_grouping_sets(sets, hasher);
689                        }
690                    }
691                }
692            }
693        }
694    }
695
696    fn hash_arena_grouping_sets(
697        sets: &bumpalo::collections::Vec<'_, ArenaGroupingSet<'_>>,
698        hasher: &mut DefaultHasher,
699    ) {
700        for set in sets {
701            "SET".hash(hasher);
702            for expr in &set.columns {
703                Self::hash_arena_expression(expr, hasher);
704            }
705        }
706    }
707
708    fn hash_arena_grouping_elements(
709        elements: &bumpalo::collections::Vec<'_, ArenaGroupingElement<'_>>,
710        hasher: &mut DefaultHasher,
711    ) {
712        for element in elements {
713            match element {
714                ArenaGroupingElement::Single(expr) => {
715                    "SINGLE".hash(hasher);
716                    Self::hash_arena_expression(expr, hasher);
717                }
718                ArenaGroupingElement::Composite(exprs) => {
719                    "COMPOSITE".hash(hasher);
720                    for expr in exprs {
721                        Self::hash_arena_expression(expr, hasher);
722                    }
723                }
724            }
725        }
726    }
727
728    /// Hash an arena-allocated expression, replacing literals with a placeholder marker
729    fn hash_arena_expression(expr: &ArenaExpression<'_>, hasher: &mut DefaultHasher) {
730        match expr {
731            // Hot-path inline variants
732            // All literals and placeholders hash to the same value
733            ArenaExpression::Literal(_)
734            | ArenaExpression::Placeholder(_)
735            | ArenaExpression::NumberedPlaceholder(_)
736            | ArenaExpression::NamedPlaceholder(_) => {
737                "LITERAL_PLACEHOLDER".hash(hasher)
738            }
739
740            ArenaExpression::ColumnRef { table, column } => {
741                "COLUMN".hash(hasher);
742                table.hash(hasher);
743                column.hash(hasher);
744            }
745
746            ArenaExpression::BinaryOp { op, left, right } => {
747                "BINARY_OP".hash(hasher);
748                std::mem::discriminant(op).hash(hasher);
749                Self::hash_arena_expression(left, hasher);
750                Self::hash_arena_expression(right, hasher);
751            }
752
753            ArenaExpression::UnaryOp { op, expr } => {
754                "UNARY_OP".hash(hasher);
755                std::mem::discriminant(op).hash(hasher);
756                Self::hash_arena_expression(expr, hasher);
757            }
758
759            ArenaExpression::IsNull { expr, negated } => {
760                "IS_NULL".hash(hasher);
761                Self::hash_arena_expression(expr, hasher);
762                negated.hash(hasher);
763            }
764
765            ArenaExpression::Wildcard => "WILDCARD".hash(hasher),
766
767            ArenaExpression::CurrentDate => "CURRENT_DATE".hash(hasher),
768
769            ArenaExpression::CurrentTime { precision } => {
770                "CURRENT_TIME".hash(hasher);
771                precision.hash(hasher);
772            }
773
774            ArenaExpression::CurrentTimestamp { precision } => {
775                "CURRENT_TIMESTAMP".hash(hasher);
776                precision.hash(hasher);
777            }
778
779            ArenaExpression::Default => "DEFAULT".hash(hasher),
780
781            ArenaExpression::Conjunction(children) | ArenaExpression::Disjunction(children) => {
782                for child in children.iter() {
783                    Self::hash_arena_expression(child, hasher);
784                }
785            }
786
787            // Cold-path extended variants
788            ArenaExpression::Extended(ext) => Self::hash_arena_extended_expr(ext, hasher),
789        }
790    }
791
792    /// Hash an arena-allocated extended expression
793    fn hash_arena_extended_expr(ext: &ArenaExtendedExpr<'_>, hasher: &mut DefaultHasher) {
794        match ext {
795            ArenaExtendedExpr::Function { name, args, character_unit } => {
796                "FUNCTION".hash(hasher);
797                name.hash(hasher);
798                for arg in args {
799                    Self::hash_arena_expression(arg, hasher);
800                }
801                if let Some(ref unit) = character_unit {
802                    std::mem::discriminant(unit).hash(hasher);
803                }
804            }
805
806            ArenaExtendedExpr::AggregateFunction { name, distinct, args } => {
807                "AGGREGATE".hash(hasher);
808                name.hash(hasher);
809                distinct.hash(hasher);
810                for arg in args {
811                    Self::hash_arena_expression(arg, hasher);
812                }
813            }
814
815            ArenaExtendedExpr::Case { operand, when_clauses, else_result } => {
816                "CASE".hash(hasher);
817                if let Some(op) = operand {
818                    Self::hash_arena_expression(op, hasher);
819                }
820                for when in when_clauses {
821                    for cond in &when.conditions {
822                        Self::hash_arena_expression(cond, hasher);
823                    }
824                    Self::hash_arena_expression(&when.result, hasher);
825                }
826                if let Some(else_expr) = else_result {
827                    Self::hash_arena_expression(else_expr, hasher);
828                }
829            }
830
831            ArenaExtendedExpr::ScalarSubquery(subquery) => {
832                "SCALAR_SUBQUERY".hash(hasher);
833                Self::hash_arena_select(subquery, hasher);
834            }
835
836            ArenaExtendedExpr::In { expr, subquery, negated } => {
837                "IN_SUBQUERY".hash(hasher);
838                Self::hash_arena_expression(expr, hasher);
839                Self::hash_arena_select(subquery, hasher);
840                negated.hash(hasher);
841            }
842
843            ArenaExtendedExpr::InList { expr, values, negated } => {
844                "IN_LIST".hash(hasher);
845                Self::hash_arena_expression(expr, hasher);
846                values.len().hash(hasher);
847                for val in values {
848                    Self::hash_arena_expression(val, hasher);
849                }
850                negated.hash(hasher);
851            }
852
853            ArenaExtendedExpr::Between { expr, low, high, negated, symmetric } => {
854                "BETWEEN".hash(hasher);
855                Self::hash_arena_expression(expr, hasher);
856                Self::hash_arena_expression(low, hasher);
857                Self::hash_arena_expression(high, hasher);
858                negated.hash(hasher);
859                symmetric.hash(hasher);
860            }
861
862            ArenaExtendedExpr::Cast { expr, data_type } => {
863                "CAST".hash(hasher);
864                Self::hash_arena_expression(expr, hasher);
865                std::mem::discriminant(data_type).hash(hasher);
866            }
867
868            ArenaExtendedExpr::Position { substring, string, character_unit } => {
869                "POSITION".hash(hasher);
870                Self::hash_arena_expression(substring, hasher);
871                Self::hash_arena_expression(string, hasher);
872                if let Some(unit) = character_unit {
873                    std::mem::discriminant(unit).hash(hasher);
874                }
875            }
876
877            ArenaExtendedExpr::Trim { position, removal_char, string } => {
878                "TRIM".hash(hasher);
879                if let Some(pos) = position {
880                    std::mem::discriminant(pos).hash(hasher);
881                }
882                if let Some(ch) = removal_char {
883                    Self::hash_arena_expression(ch, hasher);
884                }
885                Self::hash_arena_expression(string, hasher);
886            }
887
888            ArenaExtendedExpr::Extract { field, expr } => {
889                "EXTRACT".hash(hasher);
890                std::mem::discriminant(field).hash(hasher);
891                Self::hash_arena_expression(expr, hasher);
892            }
893
894            ArenaExtendedExpr::Like { expr, pattern, negated } => {
895                "LIKE".hash(hasher);
896                Self::hash_arena_expression(expr, hasher);
897                Self::hash_arena_expression(pattern, hasher);
898                negated.hash(hasher);
899            }
900
901            ArenaExtendedExpr::Exists { subquery, negated } => {
902                "EXISTS".hash(hasher);
903                Self::hash_arena_select(subquery, hasher);
904                negated.hash(hasher);
905            }
906
907            ArenaExtendedExpr::QuantifiedComparison { expr, op, quantifier, subquery } => {
908                "QUANTIFIED".hash(hasher);
909                Self::hash_arena_expression(expr, hasher);
910                std::mem::discriminant(op).hash(hasher);
911                std::mem::discriminant(quantifier).hash(hasher);
912                Self::hash_arena_select(subquery, hasher);
913            }
914
915            ArenaExtendedExpr::Interval {
916                value,
917                unit,
918                leading_precision,
919                fractional_precision,
920            } => {
921                "INTERVAL".hash(hasher);
922                Self::hash_arena_expression(value, hasher);
923                format!("{:?}", unit).hash(hasher);
924                leading_precision.hash(hasher);
925                fractional_precision.hash(hasher);
926            }
927
928            ArenaExtendedExpr::DuplicateKeyValue { column } => {
929                "DUPLICATE_KEY_VALUE".hash(hasher);
930                column.hash(hasher);
931            }
932
933            ArenaExtendedExpr::WindowFunction { function, over } => {
934                "WINDOW_FUNCTION".hash(hasher);
935                match function {
936                    ArenaWindowFunctionSpec::Aggregate { name, args } => {
937                        "AGGREGATE".hash(hasher);
938                        name.hash(hasher);
939                        for arg in args {
940                            Self::hash_arena_expression(arg, hasher);
941                        }
942                    }
943                    ArenaWindowFunctionSpec::Ranking { name, args } => {
944                        "RANKING".hash(hasher);
945                        name.hash(hasher);
946                        for arg in args {
947                            Self::hash_arena_expression(arg, hasher);
948                        }
949                    }
950                    ArenaWindowFunctionSpec::Value { name, args } => {
951                        "VALUE".hash(hasher);
952                        name.hash(hasher);
953                        for arg in args {
954                            Self::hash_arena_expression(arg, hasher);
955                        }
956                    }
957                }
958
959                if let Some(ref partition_by) = over.partition_by {
960                    for expr in partition_by {
961                        Self::hash_arena_expression(expr, hasher);
962                    }
963                }
964                if let Some(ref order_by) = over.order_by {
965                    for item in order_by {
966                        Self::hash_arena_expression(&item.expr, hasher);
967                        std::mem::discriminant(&item.direction).hash(hasher);
968                    }
969                }
970                if let Some(ref frame) = over.frame {
971                    std::mem::discriminant(&frame.unit).hash(hasher);
972                    std::mem::discriminant(&frame.start).hash(hasher);
973                    if let Some(ref end) = frame.end {
974                        std::mem::discriminant(end).hash(hasher);
975                    }
976                }
977            }
978
979            ArenaExtendedExpr::NextValue { sequence_name } => {
980                "NEXT_VALUE".hash(hasher);
981                sequence_name.hash(hasher);
982            }
983
984            ArenaExtendedExpr::MatchAgainst { columns, search_modifier, mode } => {
985                "MATCH_AGAINST".hash(hasher);
986                for col in columns {
987                    col.hash(hasher);
988                }
989                Self::hash_arena_expression(search_modifier, hasher);
990                std::mem::discriminant(mode).hash(hasher);
991            }
992
993            ArenaExtendedExpr::PseudoVariable { pseudo_table, column } => {
994                "PSEUDO_VARIABLE".hash(hasher);
995                std::mem::discriminant(pseudo_table).hash(hasher);
996                column.hash(hasher);
997            }
998
999            ArenaExtendedExpr::SessionVariable { name } => {
1000                "SESSION_VARIABLE".hash(hasher);
1001                name.hash(hasher);
1002            }
1003        }
1004    }
1005}
1006
1007#[cfg(test)]
1008mod tests {
1009    use super::*;
1010
1011    #[test]
1012    fn test_same_query_same_signature() {
1013        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1014        let sig2 = QuerySignature::from_sql("SELECT * FROM users");
1015        assert_eq!(sig1, sig2);
1016    }
1017
1018    #[test]
1019    fn test_whitespace_normalization() {
1020        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1021        let sig2 = QuerySignature::from_sql("SELECT  *  FROM  users");
1022        assert_eq!(sig1, sig2);
1023    }
1024
1025    #[test]
1026    fn test_case_insensitive() {
1027        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1028        let sig2 = QuerySignature::from_sql("select * from users");
1029        assert_eq!(sig1, sig2);
1030    }
1031
1032    #[test]
1033    fn test_different_queries_different_signature() {
1034        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1035        let sig2 = QuerySignature::from_sql("SELECT * FROM orders");
1036        assert_ne!(sig1, sig2);
1037    }
1038
1039    #[test]
1040    fn test_different_literals_different_signature_string_based() {
1041        // Different literals create different signatures with string-based hashing
1042        let sig1 = QuerySignature::from_sql("SELECT col0 FROM tab WHERE col1 > 5");
1043        let sig2 = QuerySignature::from_sql("SELECT col0 FROM tab WHERE col1 > 10");
1044        // String-based hashing includes literals in the signature
1045        assert_ne!(sig1, sig2);
1046    }
1047
1048    #[test]
1049    fn test_ast_based_same_structure_different_literals() {
1050        use vibesql_ast::{BinaryOperator, Expression, FromClause, SelectItem, SelectStmt, Statement};
1051        use vibesql_types::SqlValue;
1052
1053        // SELECT col0 FROM tab WHERE col1 > 5
1054        let stmt1 = Statement::Select(Box::new(SelectStmt {
1055            with_clause: None,
1056            distinct: false,
1057            select_list: vec![SelectItem::Expression {
1058                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1059                alias: None,
1060            }],
1061            into_table: None,
1062            into_variables: None,            from: Some(FromClause::Table { name: "tab".to_string(), alias: None, column_aliases: None }),
1063            where_clause: Some(Expression::BinaryOp {
1064                op: BinaryOperator::GreaterThan,
1065                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1066                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1067            }),
1068            group_by: None,
1069            having: None,
1070            order_by: None,
1071            limit: None,
1072            offset: None,
1073            set_operation: None,
1074        }));
1075
1076        // SELECT col0 FROM tab WHERE col1 > 10 (different literal)
1077        let stmt2 = Statement::Select(Box::new(SelectStmt {
1078            with_clause: None,
1079            distinct: false,
1080            select_list: vec![SelectItem::Expression {
1081                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1082                alias: None,
1083            }],
1084            into_table: None,
1085            into_variables: None,            from: Some(FromClause::Table { name: "tab".to_string(), alias: None, column_aliases: None }),
1086            where_clause: Some(Expression::BinaryOp {
1087                op: BinaryOperator::GreaterThan,
1088                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1089                right: Box::new(Expression::Literal(SqlValue::Integer(10))),
1090            }),
1091            group_by: None,
1092            having: None,
1093            order_by: None,
1094            limit: None,
1095            offset: None,
1096            set_operation: None,
1097        }));
1098
1099        let sig1 = QuerySignature::from_ast(&stmt1);
1100        let sig2 = QuerySignature::from_ast(&stmt2);
1101
1102        // AST-based signatures should be the same despite different literals
1103        assert_eq!(sig1, sig2);
1104    }
1105
1106    #[test]
1107    fn test_ast_based_different_structure() {
1108        use vibesql_ast::{BinaryOperator, Expression, FromClause, SelectItem, SelectStmt, Statement};
1109        use vibesql_types::SqlValue;
1110
1111        // SELECT col0 FROM tab WHERE col1 > 5
1112        let stmt1 = Statement::Select(Box::new(SelectStmt {
1113            with_clause: None,
1114            distinct: false,
1115            select_list: vec![SelectItem::Expression {
1116                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1117                alias: None,
1118            }],
1119            into_table: None,
1120            into_variables: None,            from: Some(FromClause::Table { name: "tab".to_string(), alias: None, column_aliases: None }),
1121            where_clause: Some(Expression::BinaryOp {
1122                op: BinaryOperator::GreaterThan,
1123                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1124                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1125            }),
1126            group_by: None,
1127            having: None,
1128            order_by: None,
1129            limit: None,
1130            offset: None,
1131            set_operation: None,
1132        }));
1133
1134        // SELECT col0 FROM tab WHERE col1 < 5 (different operator)
1135        let stmt2 = Statement::Select(Box::new(SelectStmt {
1136            with_clause: None,
1137            distinct: false,
1138            select_list: vec![SelectItem::Expression {
1139                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1140                alias: None,
1141            }],
1142            into_table: None,
1143            into_variables: None,            from: Some(FromClause::Table { name: "tab".to_string(), alias: None, column_aliases: None }),
1144            where_clause: Some(Expression::BinaryOp {
1145                op: BinaryOperator::LessThan, // Different operator!
1146                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1147                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1148            }),
1149            group_by: None,
1150            having: None,
1151            order_by: None,
1152            limit: None,
1153            offset: None,
1154            set_operation: None,
1155        }));
1156
1157        let sig1 = QuerySignature::from_ast(&stmt1);
1158        let sig2 = QuerySignature::from_ast(&stmt2);
1159
1160        // Different structure should produce different signatures
1161        assert_ne!(sig1, sig2);
1162    }
1163}