vibesql_executor/cache/
query_signature.rs

1//! Query signature generation for cache keys
2//!
3//! Generates deterministic cache keys from SQL queries by normalizing the AST
4//! and creating a hash. Queries with identical structure (different literals)
5//! will have the same signature.
6
7use std::{
8    collections::hash_map::DefaultHasher,
9    hash::{Hash, Hasher},
10};
11
12use vibesql_ast::arena::{
13    Expression as ArenaExpression, ExtendedExpr as ArenaExtendedExpr,
14    FromClause as ArenaFromClause, GroupByClause as ArenaGroupByClause,
15    GroupingElement as ArenaGroupingElement, GroupingSet as ArenaGroupingSet,
16    MixedGroupingItem as ArenaMixedGroupingItem, SelectItem as ArenaSelectItem,
17    SelectStmt as ArenaSelectStmt, WindowFunctionSpec as ArenaWindowFunctionSpec,
18};
19use vibesql_ast::{Expression, Statement};
20
21/// Unique identifier for a query based on its structure
22#[derive(Clone, Debug, Eq, PartialEq, Hash)]
23pub struct QuerySignature {
24    hash: u64,
25}
26
27impl QuerySignature {
28    /// Create a signature from SQL text (legacy string-based approach)
29    pub fn from_sql(sql: &str) -> Self {
30        let normalized = Self::normalize(sql);
31        let mut hasher = DefaultHasher::new();
32        normalized.hash(&mut hasher);
33        let hash = hasher.finish();
34        Self { hash }
35    }
36
37    /// Create a signature from parsed AST, ignoring literal values
38    /// This allows queries with different literals but identical structure to share cached plans
39    pub fn from_ast(stmt: &Statement) -> Self {
40        let mut hasher = DefaultHasher::new();
41        Self::hash_statement(stmt, &mut hasher);
42        let hash = hasher.finish();
43        Self { hash }
44    }
45
46    /// Create a signature from arena-allocated SelectStmt, ignoring literal values
47    pub fn from_arena_select(select: &ArenaSelectStmt<'_>) -> Self {
48        let mut hasher = DefaultHasher::new();
49        "SELECT".hash(&mut hasher);
50        Self::hash_arena_select(select, &mut hasher);
51        let hash = hasher.finish();
52        Self { hash }
53    }
54
55    /// Get the underlying hash
56    pub fn hash(&self) -> u64 {
57        self.hash
58    }
59
60    /// Normalize SQL: trim and collapse whitespace
61    fn normalize(sql: &str) -> String {
62        sql.split_whitespace().collect::<Vec<_>>().join(" ").to_lowercase()
63    }
64
65    /// Hash a statement, replacing literals with a placeholder marker
66    fn hash_statement(stmt: &Statement, hasher: &mut DefaultHasher) {
67        match stmt {
68            Statement::Select(select) => {
69                "SELECT".hash(hasher);
70                Self::hash_select(select, hasher);
71            }
72            Statement::Insert(insert) => {
73                "INSERT".hash(hasher);
74                insert.table_name.hash(hasher);
75                for col in &insert.columns {
76                    col.hash(hasher);
77                }
78                // Hash the insert source structure without literals
79                match &insert.source {
80                    vibesql_ast::InsertSource::Values(rows) => {
81                        "VALUES".hash(hasher);
82                        rows.len().hash(hasher);
83                        for row in rows {
84                            row.len().hash(hasher);
85                            for expr in row {
86                                Self::hash_expression(expr, hasher);
87                            }
88                        }
89                    }
90                    vibesql_ast::InsertSource::Select(select) => {
91                        "SELECT".hash(hasher);
92                        Self::hash_select(select, hasher);
93                    }
94                }
95            }
96            Statement::Update(update) => {
97                "UPDATE".hash(hasher);
98                update.table_name.hash(hasher);
99                for assignment in &update.assignments {
100                    assignment.column.hash(hasher);
101                    Self::hash_expression(&assignment.value, hasher);
102                }
103                if let Some(ref where_clause) = update.where_clause {
104                    match where_clause {
105                        vibesql_ast::WhereClause::Condition(expr) => {
106                            Self::hash_expression(expr, hasher);
107                        }
108                        vibesql_ast::WhereClause::CurrentOf(cursor) => {
109                            "CURRENT_OF".hash(hasher);
110                            cursor.hash(hasher);
111                        }
112                    }
113                }
114            }
115            Statement::Delete(delete) => {
116                "DELETE".hash(hasher);
117                delete.table_name.hash(hasher);
118                if let Some(ref where_clause) = delete.where_clause {
119                    match where_clause {
120                        vibesql_ast::WhereClause::Condition(expr) => {
121                            Self::hash_expression(expr, hasher);
122                        }
123                        vibesql_ast::WhereClause::CurrentOf(cursor) => {
124                            "CURRENT_OF".hash(hasher);
125                            cursor.hash(hasher);
126                        }
127                    }
128                }
129            }
130            // For other statement types, fall back to discriminant hashing
131            _ => {
132                std::mem::discriminant(stmt).hash(hasher);
133            }
134        }
135    }
136
137    /// Hash a SELECT statement structure
138    fn hash_select(select: &vibesql_ast::SelectStmt, hasher: &mut DefaultHasher) {
139        // Hash DISTINCT
140        select.distinct.hash(hasher);
141
142        // Hash select items
143        for item in &select.select_list {
144            match item {
145                vibesql_ast::SelectItem::Wildcard { .. } => "WILDCARD".hash(hasher),
146                vibesql_ast::SelectItem::QualifiedWildcard { qualifier, .. } => {
147                    "QUALIFIED_WILDCARD".hash(hasher);
148                    qualifier.hash(hasher);
149                }
150                vibesql_ast::SelectItem::Expression { expr, alias } => {
151                    Self::hash_expression(expr, hasher);
152                    alias.hash(hasher);
153                }
154            }
155        }
156
157        // Hash FROM clause
158        if let Some(ref from) = select.from {
159            Self::hash_from_clause(from, hasher);
160        }
161
162        // Hash WHERE clause
163        if let Some(ref where_clause) = select.where_clause {
164            Self::hash_expression(where_clause, hasher);
165        }
166
167        // Hash GROUP BY
168        if let Some(ref group_by) = select.group_by {
169            Self::hash_group_by(group_by, hasher);
170        }
171
172        // Hash HAVING
173        if let Some(ref having) = select.having {
174            Self::hash_expression(having, hasher);
175        }
176
177        // Hash ORDER BY
178        if let Some(ref order_by) = select.order_by {
179            for item in order_by {
180                Self::hash_expression(&item.expr, hasher);
181                std::mem::discriminant(&item.direction).hash(hasher);
182            }
183        }
184
185        // Hash LIMIT/OFFSET (these are often literals, but we treat them as part of structure)
186        select.limit.hash(hasher);
187        select.offset.hash(hasher);
188    }
189
190    /// Hash a FROM clause structure
191    fn hash_from_clause(from: &vibesql_ast::FromClause, hasher: &mut DefaultHasher) {
192        match from {
193            vibesql_ast::FromClause::Table { name, alias, .. } => {
194                "TABLE".hash(hasher);
195                name.hash(hasher);
196                alias.hash(hasher);
197            }
198            vibesql_ast::FromClause::Join { left, join_type, right, condition, .. } => {
199                "JOIN".hash(hasher);
200                Self::hash_from_clause(left, hasher);
201                std::mem::discriminant(join_type).hash(hasher);
202                Self::hash_from_clause(right, hasher);
203                if let Some(expr) = condition {
204                    Self::hash_expression(expr, hasher);
205                }
206            }
207            vibesql_ast::FromClause::Subquery { query, alias, .. } => {
208                "SUBQUERY".hash(hasher);
209                Self::hash_select(query, hasher);
210                alias.hash(hasher);
211            }
212        }
213    }
214
215    fn hash_group_by(group_by: &vibesql_ast::GroupByClause, hasher: &mut DefaultHasher) {
216        match group_by {
217            vibesql_ast::GroupByClause::Simple(exprs) => {
218                "SIMPLE".hash(hasher);
219                for expr in exprs {
220                    Self::hash_expression(expr, hasher);
221                }
222            }
223            vibesql_ast::GroupByClause::Rollup(elements) => {
224                "ROLLUP".hash(hasher);
225                Self::hash_grouping_elements(elements, hasher);
226            }
227            vibesql_ast::GroupByClause::Cube(elements) => {
228                "CUBE".hash(hasher);
229                Self::hash_grouping_elements(elements, hasher);
230            }
231            vibesql_ast::GroupByClause::GroupingSets(sets) => {
232                "GROUPING_SETS".hash(hasher);
233                Self::hash_grouping_sets(sets, hasher);
234            }
235            vibesql_ast::GroupByClause::Mixed(items) => {
236                "MIXED".hash(hasher);
237                for item in items {
238                    match item {
239                        vibesql_ast::MixedGroupingItem::Simple(expr) => {
240                            "SIMPLE".hash(hasher);
241                            Self::hash_expression(expr, hasher);
242                        }
243                        vibesql_ast::MixedGroupingItem::Rollup(elements) => {
244                            "ROLLUP".hash(hasher);
245                            Self::hash_grouping_elements(elements, hasher);
246                        }
247                        vibesql_ast::MixedGroupingItem::Cube(elements) => {
248                            "CUBE".hash(hasher);
249                            Self::hash_grouping_elements(elements, hasher);
250                        }
251                        vibesql_ast::MixedGroupingItem::GroupingSets(sets) => {
252                            "GROUPING_SETS".hash(hasher);
253                            Self::hash_grouping_sets(sets, hasher);
254                        }
255                    }
256                }
257            }
258        }
259    }
260
261    fn hash_grouping_sets(sets: &[vibesql_ast::GroupingSet], hasher: &mut DefaultHasher) {
262        for set in sets {
263            "SET".hash(hasher);
264            for expr in &set.columns {
265                Self::hash_expression(expr, hasher);
266            }
267        }
268    }
269
270    fn hash_grouping_elements(
271        elements: &[vibesql_ast::GroupingElement],
272        hasher: &mut DefaultHasher,
273    ) {
274        for element in elements {
275            match element {
276                vibesql_ast::GroupingElement::Single(expr) => {
277                    "SINGLE".hash(hasher);
278                    Self::hash_expression(expr, hasher);
279                }
280                vibesql_ast::GroupingElement::Composite(exprs) => {
281                    "COMPOSITE".hash(hasher);
282                    for expr in exprs {
283                        Self::hash_expression(expr, hasher);
284                    }
285                }
286            }
287        }
288    }
289
290    /// Hash an expression, replacing literals with a placeholder marker
291    fn hash_expression(expr: &Expression, hasher: &mut DefaultHasher) {
292        match expr {
293            // Key difference: All literals and placeholders hash to the same value
294            // This allows parameterized queries to match with literal values
295            Expression::Literal(_)
296            | Expression::Placeholder(_)
297            | Expression::NumberedPlaceholder(_)
298            | Expression::NamedPlaceholder(_) => "LITERAL_PLACEHOLDER".hash(hasher),
299
300            Expression::ColumnRef { table, column } => {
301                "COLUMN".hash(hasher);
302                table.hash(hasher);
303                column.hash(hasher);
304            }
305
306            Expression::PseudoVariable { pseudo_table, column } => {
307                "PSEUDO_VARIABLE".hash(hasher);
308                std::mem::discriminant(pseudo_table).hash(hasher);
309                column.hash(hasher);
310            }
311
312            Expression::BinaryOp { op, left, right } => {
313                "BINARY_OP".hash(hasher);
314                std::mem::discriminant(op).hash(hasher);
315                Self::hash_expression(left, hasher);
316                Self::hash_expression(right, hasher);
317            }
318
319            Expression::UnaryOp { op, expr } => {
320                "UNARY_OP".hash(hasher);
321                std::mem::discriminant(op).hash(hasher);
322                Self::hash_expression(expr, hasher);
323            }
324
325            Expression::Function { name, args, character_unit } => {
326                "FUNCTION".hash(hasher);
327                name.to_lowercase().hash(hasher);
328                for arg in args {
329                    Self::hash_expression(arg, hasher);
330                }
331                if let Some(ref unit) = character_unit {
332                    std::mem::discriminant(unit).hash(hasher);
333                }
334            }
335
336            Expression::AggregateFunction { name, distinct, args } => {
337                "AGGREGATE".hash(hasher);
338                name.to_lowercase().hash(hasher);
339                distinct.hash(hasher);
340                for arg in args {
341                    Self::hash_expression(arg, hasher);
342                }
343            }
344
345            Expression::IsNull { expr, negated } => {
346                "IS_NULL".hash(hasher);
347                Self::hash_expression(expr, hasher);
348                negated.hash(hasher);
349            }
350
351            Expression::Wildcard => "WILDCARD".hash(hasher),
352
353            Expression::Case { operand, when_clauses, else_result } => {
354                "CASE".hash(hasher);
355                if let Some(ref op) = operand {
356                    Self::hash_expression(op, hasher);
357                }
358                for when in when_clauses {
359                    for cond in &when.conditions {
360                        Self::hash_expression(cond, hasher);
361                    }
362                    Self::hash_expression(&when.result, hasher);
363                }
364                if let Some(ref else_expr) = else_result {
365                    Self::hash_expression(else_expr, hasher);
366                }
367            }
368
369            Expression::ScalarSubquery(subquery) => {
370                "SCALAR_SUBQUERY".hash(hasher);
371                Self::hash_select(subquery, hasher);
372            }
373
374            Expression::In { expr, subquery, negated } => {
375                "IN_SUBQUERY".hash(hasher);
376                Self::hash_expression(expr, hasher);
377                Self::hash_select(subquery, hasher);
378                negated.hash(hasher);
379            }
380
381            Expression::InList { expr, values, negated } => {
382                "IN_LIST".hash(hasher);
383                Self::hash_expression(expr, hasher);
384                values.len().hash(hasher);
385                for val in values {
386                    Self::hash_expression(val, hasher);
387                }
388                negated.hash(hasher);
389            }
390
391            Expression::Between { expr, low, high, negated, symmetric } => {
392                "BETWEEN".hash(hasher);
393                Self::hash_expression(expr, hasher);
394                Self::hash_expression(low, hasher);
395                Self::hash_expression(high, hasher);
396                negated.hash(hasher);
397                symmetric.hash(hasher);
398            }
399
400            Expression::Cast { expr, data_type } => {
401                "CAST".hash(hasher);
402                Self::hash_expression(expr, hasher);
403                std::mem::discriminant(data_type).hash(hasher);
404            }
405
406            Expression::Position { substring, string, character_unit } => {
407                "POSITION".hash(hasher);
408                Self::hash_expression(substring, hasher);
409                Self::hash_expression(string, hasher);
410                if let Some(ref unit) = character_unit {
411                    std::mem::discriminant(unit).hash(hasher);
412                }
413            }
414
415            Expression::Trim { position, removal_char, string } => {
416                "TRIM".hash(hasher);
417                if let Some(ref pos) = position {
418                    std::mem::discriminant(pos).hash(hasher);
419                }
420                if let Some(ref ch) = removal_char {
421                    Self::hash_expression(ch, hasher);
422                }
423                Self::hash_expression(string, hasher);
424            }
425
426            Expression::Extract { field, expr } => {
427                "EXTRACT".hash(hasher);
428                std::mem::discriminant(field).hash(hasher);
429                Self::hash_expression(expr, hasher);
430            }
431
432            Expression::Like { expr, pattern, negated } => {
433                "LIKE".hash(hasher);
434                Self::hash_expression(expr, hasher);
435                Self::hash_expression(pattern, hasher);
436                negated.hash(hasher);
437            }
438
439            Expression::Exists { subquery, negated } => {
440                "EXISTS".hash(hasher);
441                Self::hash_select(subquery, hasher);
442                negated.hash(hasher);
443            }
444
445            Expression::QuantifiedComparison { expr, op, quantifier, subquery } => {
446                "QUANTIFIED".hash(hasher);
447                Self::hash_expression(expr, hasher);
448                std::mem::discriminant(op).hash(hasher);
449                std::mem::discriminant(quantifier).hash(hasher);
450                Self::hash_select(subquery, hasher);
451            }
452
453            Expression::CurrentDate => "CURRENT_DATE".hash(hasher),
454
455            Expression::CurrentTime { precision } => {
456                "CURRENT_TIME".hash(hasher);
457                precision.hash(hasher);
458            }
459
460            Expression::CurrentTimestamp { precision } => {
461                "CURRENT_TIMESTAMP".hash(hasher);
462                precision.hash(hasher);
463            }
464
465            Expression::Interval { value, unit, leading_precision, fractional_precision } => {
466                "INTERVAL".hash(hasher);
467                Self::hash_expression(value, hasher);
468                format!("{:?}", unit).hash(hasher);
469                leading_precision.hash(hasher);
470                fractional_precision.hash(hasher);
471            }
472
473            Expression::Default => "DEFAULT".hash(hasher),
474
475            Expression::DuplicateKeyValue { column } => {
476                "DUPLICATE_KEY_VALUE".hash(hasher);
477                column.hash(hasher);
478            }
479
480            Expression::WindowFunction { function, over } => {
481                "WINDOW_FUNCTION".hash(hasher);
482                // Hash function type and arguments
483                match function {
484                    vibesql_ast::WindowFunctionSpec::Aggregate { name, args } => {
485                        "AGGREGATE".hash(hasher);
486                        name.to_lowercase().hash(hasher);
487                        for arg in args {
488                            Self::hash_expression(arg, hasher);
489                        }
490                    }
491                    vibesql_ast::WindowFunctionSpec::Ranking { name, args } => {
492                        "RANKING".hash(hasher);
493                        name.to_lowercase().hash(hasher);
494                        for arg in args {
495                            Self::hash_expression(arg, hasher);
496                        }
497                    }
498                    vibesql_ast::WindowFunctionSpec::Value { name, args } => {
499                        "VALUE".hash(hasher);
500                        name.to_lowercase().hash(hasher);
501                        for arg in args {
502                            Self::hash_expression(arg, hasher);
503                        }
504                    }
505                }
506
507                // Hash OVER clause components
508                if let Some(ref partition_by) = over.partition_by {
509                    for expr in partition_by {
510                        Self::hash_expression(expr, hasher);
511                    }
512                }
513                if let Some(ref order_by) = over.order_by {
514                    for item in order_by {
515                        Self::hash_expression(&item.expr, hasher);
516                        std::mem::discriminant(&item.direction).hash(hasher);
517                    }
518                }
519                if let Some(ref frame) = over.frame {
520                    std::mem::discriminant(&frame.unit).hash(hasher);
521                    std::mem::discriminant(&frame.start).hash(hasher);
522                    if let Some(ref end) = frame.end {
523                        std::mem::discriminant(end).hash(hasher);
524                    }
525                }
526            }
527
528            Expression::NextValue { sequence_name } => {
529                "NEXT_VALUE".hash(hasher);
530                sequence_name.hash(hasher);
531            }
532
533            Expression::MatchAgainst { columns, search_modifier, mode } => {
534                "MATCH_AGAINST".hash(hasher);
535                for col in columns {
536                    col.hash(hasher);
537                }
538                Self::hash_expression(search_modifier, hasher);
539                std::mem::discriminant(mode).hash(hasher);
540            }
541
542            Expression::SessionVariable { name } => {
543                "SESSION_VARIABLE".hash(hasher);
544                name.hash(hasher);
545            }
546
547            Expression::Conjunction(children) | Expression::Disjunction(children) => {
548                for child in children {
549                    Self::hash_expression(child, hasher);
550                }
551            }
552        }
553    }
554
555    // ========================================================================
556    // Arena-allocated type hashing
557    // ========================================================================
558
559    /// Hash an arena-allocated SELECT statement structure
560    fn hash_arena_select(select: &ArenaSelectStmt<'_>, hasher: &mut DefaultHasher) {
561        // Hash DISTINCT
562        select.distinct.hash(hasher);
563
564        // Hash select items
565        for item in &select.select_list {
566            match item {
567                ArenaSelectItem::Wildcard { .. } => "WILDCARD".hash(hasher),
568                ArenaSelectItem::QualifiedWildcard { qualifier, .. } => {
569                    "QUALIFIED_WILDCARD".hash(hasher);
570                    qualifier.hash(hasher);
571                }
572                ArenaSelectItem::Expression { expr, alias } => {
573                    Self::hash_arena_expression(expr, hasher);
574                    alias.hash(hasher);
575                }
576            }
577        }
578
579        // Hash FROM clause
580        if let Some(ref from) = select.from {
581            Self::hash_arena_from_clause(from, hasher);
582        }
583
584        // Hash WHERE clause
585        if let Some(ref where_clause) = select.where_clause {
586            Self::hash_arena_expression(where_clause, hasher);
587        }
588
589        // Hash GROUP BY
590        if let Some(ref group_by) = select.group_by {
591            Self::hash_arena_group_by(group_by, hasher);
592        }
593
594        // Hash HAVING
595        if let Some(ref having) = select.having {
596            Self::hash_arena_expression(having, hasher);
597        }
598
599        // Hash ORDER BY
600        if let Some(ref order_by) = select.order_by {
601            for item in order_by {
602                Self::hash_arena_expression(&item.expr, hasher);
603                std::mem::discriminant(&item.direction).hash(hasher);
604            }
605        }
606
607        // Hash LIMIT/OFFSET
608        select.limit.hash(hasher);
609        select.offset.hash(hasher);
610    }
611
612    /// Hash an arena-allocated FROM clause structure
613    fn hash_arena_from_clause(from: &ArenaFromClause<'_>, hasher: &mut DefaultHasher) {
614        match from {
615            ArenaFromClause::Table { name, alias, .. } => {
616                "TABLE".hash(hasher);
617                name.hash(hasher);
618                alias.hash(hasher);
619            }
620            ArenaFromClause::Join { left, join_type, right, condition, .. } => {
621                "JOIN".hash(hasher);
622                Self::hash_arena_from_clause(left, hasher);
623                std::mem::discriminant(join_type).hash(hasher);
624                Self::hash_arena_from_clause(right, hasher);
625                if let Some(expr) = condition {
626                    Self::hash_arena_expression(expr, hasher);
627                }
628            }
629            ArenaFromClause::Subquery { query, alias, .. } => {
630                "SUBQUERY".hash(hasher);
631                Self::hash_arena_select(query, hasher);
632                alias.hash(hasher);
633            }
634        }
635    }
636
637    fn hash_arena_group_by(group_by: &ArenaGroupByClause<'_>, hasher: &mut DefaultHasher) {
638        match group_by {
639            ArenaGroupByClause::Simple(exprs) => {
640                "SIMPLE".hash(hasher);
641                for expr in exprs {
642                    Self::hash_arena_expression(expr, hasher);
643                }
644            }
645            ArenaGroupByClause::Rollup(elements) => {
646                "ROLLUP".hash(hasher);
647                Self::hash_arena_grouping_elements(elements, hasher);
648            }
649            ArenaGroupByClause::Cube(elements) => {
650                "CUBE".hash(hasher);
651                Self::hash_arena_grouping_elements(elements, hasher);
652            }
653            ArenaGroupByClause::GroupingSets(sets) => {
654                "GROUPING_SETS".hash(hasher);
655                Self::hash_arena_grouping_sets(sets, hasher);
656            }
657            ArenaGroupByClause::Mixed(items) => {
658                "MIXED".hash(hasher);
659                for item in items {
660                    match item {
661                        ArenaMixedGroupingItem::Simple(expr) => {
662                            "SIMPLE".hash(hasher);
663                            Self::hash_arena_expression(expr, hasher);
664                        }
665                        ArenaMixedGroupingItem::Rollup(elements) => {
666                            "ROLLUP".hash(hasher);
667                            Self::hash_arena_grouping_elements(elements, hasher);
668                        }
669                        ArenaMixedGroupingItem::Cube(elements) => {
670                            "CUBE".hash(hasher);
671                            Self::hash_arena_grouping_elements(elements, hasher);
672                        }
673                        ArenaMixedGroupingItem::GroupingSets(sets) => {
674                            "GROUPING_SETS".hash(hasher);
675                            Self::hash_arena_grouping_sets(sets, hasher);
676                        }
677                    }
678                }
679            }
680        }
681    }
682
683    fn hash_arena_grouping_sets(
684        sets: &bumpalo::collections::Vec<'_, ArenaGroupingSet<'_>>,
685        hasher: &mut DefaultHasher,
686    ) {
687        for set in sets {
688            "SET".hash(hasher);
689            for expr in &set.columns {
690                Self::hash_arena_expression(expr, hasher);
691            }
692        }
693    }
694
695    fn hash_arena_grouping_elements(
696        elements: &bumpalo::collections::Vec<'_, ArenaGroupingElement<'_>>,
697        hasher: &mut DefaultHasher,
698    ) {
699        for element in elements {
700            match element {
701                ArenaGroupingElement::Single(expr) => {
702                    "SINGLE".hash(hasher);
703                    Self::hash_arena_expression(expr, hasher);
704                }
705                ArenaGroupingElement::Composite(exprs) => {
706                    "COMPOSITE".hash(hasher);
707                    for expr in exprs {
708                        Self::hash_arena_expression(expr, hasher);
709                    }
710                }
711            }
712        }
713    }
714
715    /// Hash an arena-allocated expression, replacing literals with a placeholder marker
716    fn hash_arena_expression(expr: &ArenaExpression<'_>, hasher: &mut DefaultHasher) {
717        match expr {
718            // Hot-path inline variants
719            // All literals and placeholders hash to the same value
720            ArenaExpression::Literal(_)
721            | ArenaExpression::Placeholder(_)
722            | ArenaExpression::NumberedPlaceholder(_)
723            | ArenaExpression::NamedPlaceholder(_) => "LITERAL_PLACEHOLDER".hash(hasher),
724
725            ArenaExpression::ColumnRef { table, column } => {
726                "COLUMN".hash(hasher);
727                table.hash(hasher);
728                column.hash(hasher);
729            }
730
731            ArenaExpression::BinaryOp { op, left, right } => {
732                "BINARY_OP".hash(hasher);
733                std::mem::discriminant(op).hash(hasher);
734                Self::hash_arena_expression(left, hasher);
735                Self::hash_arena_expression(right, hasher);
736            }
737
738            ArenaExpression::UnaryOp { op, expr } => {
739                "UNARY_OP".hash(hasher);
740                std::mem::discriminant(op).hash(hasher);
741                Self::hash_arena_expression(expr, hasher);
742            }
743
744            ArenaExpression::IsNull { expr, negated } => {
745                "IS_NULL".hash(hasher);
746                Self::hash_arena_expression(expr, hasher);
747                negated.hash(hasher);
748            }
749
750            ArenaExpression::Wildcard => "WILDCARD".hash(hasher),
751
752            ArenaExpression::CurrentDate => "CURRENT_DATE".hash(hasher),
753
754            ArenaExpression::CurrentTime { precision } => {
755                "CURRENT_TIME".hash(hasher);
756                precision.hash(hasher);
757            }
758
759            ArenaExpression::CurrentTimestamp { precision } => {
760                "CURRENT_TIMESTAMP".hash(hasher);
761                precision.hash(hasher);
762            }
763
764            ArenaExpression::Default => "DEFAULT".hash(hasher),
765
766            ArenaExpression::Conjunction(children) | ArenaExpression::Disjunction(children) => {
767                for child in children.iter() {
768                    Self::hash_arena_expression(child, hasher);
769                }
770            }
771
772            // Cold-path extended variants
773            ArenaExpression::Extended(ext) => Self::hash_arena_extended_expr(ext, hasher),
774        }
775    }
776
777    /// Hash an arena-allocated extended expression
778    fn hash_arena_extended_expr(ext: &ArenaExtendedExpr<'_>, hasher: &mut DefaultHasher) {
779        match ext {
780            ArenaExtendedExpr::Function { name, args, character_unit } => {
781                "FUNCTION".hash(hasher);
782                name.hash(hasher);
783                for arg in args {
784                    Self::hash_arena_expression(arg, hasher);
785                }
786                if let Some(ref unit) = character_unit {
787                    std::mem::discriminant(unit).hash(hasher);
788                }
789            }
790
791            ArenaExtendedExpr::AggregateFunction { name, distinct, args } => {
792                "AGGREGATE".hash(hasher);
793                name.hash(hasher);
794                distinct.hash(hasher);
795                for arg in args {
796                    Self::hash_arena_expression(arg, hasher);
797                }
798            }
799
800            ArenaExtendedExpr::Case { operand, when_clauses, else_result } => {
801                "CASE".hash(hasher);
802                if let Some(op) = operand {
803                    Self::hash_arena_expression(op, hasher);
804                }
805                for when in when_clauses {
806                    for cond in &when.conditions {
807                        Self::hash_arena_expression(cond, hasher);
808                    }
809                    Self::hash_arena_expression(&when.result, hasher);
810                }
811                if let Some(else_expr) = else_result {
812                    Self::hash_arena_expression(else_expr, hasher);
813                }
814            }
815
816            ArenaExtendedExpr::ScalarSubquery(subquery) => {
817                "SCALAR_SUBQUERY".hash(hasher);
818                Self::hash_arena_select(subquery, hasher);
819            }
820
821            ArenaExtendedExpr::In { expr, subquery, negated } => {
822                "IN_SUBQUERY".hash(hasher);
823                Self::hash_arena_expression(expr, hasher);
824                Self::hash_arena_select(subquery, hasher);
825                negated.hash(hasher);
826            }
827
828            ArenaExtendedExpr::InList { expr, values, negated } => {
829                "IN_LIST".hash(hasher);
830                Self::hash_arena_expression(expr, hasher);
831                values.len().hash(hasher);
832                for val in values {
833                    Self::hash_arena_expression(val, hasher);
834                }
835                negated.hash(hasher);
836            }
837
838            ArenaExtendedExpr::Between { expr, low, high, negated, symmetric } => {
839                "BETWEEN".hash(hasher);
840                Self::hash_arena_expression(expr, hasher);
841                Self::hash_arena_expression(low, hasher);
842                Self::hash_arena_expression(high, hasher);
843                negated.hash(hasher);
844                symmetric.hash(hasher);
845            }
846
847            ArenaExtendedExpr::Cast { expr, data_type } => {
848                "CAST".hash(hasher);
849                Self::hash_arena_expression(expr, hasher);
850                std::mem::discriminant(data_type).hash(hasher);
851            }
852
853            ArenaExtendedExpr::Position { substring, string, character_unit } => {
854                "POSITION".hash(hasher);
855                Self::hash_arena_expression(substring, hasher);
856                Self::hash_arena_expression(string, hasher);
857                if let Some(unit) = character_unit {
858                    std::mem::discriminant(unit).hash(hasher);
859                }
860            }
861
862            ArenaExtendedExpr::Trim { position, removal_char, string } => {
863                "TRIM".hash(hasher);
864                if let Some(pos) = position {
865                    std::mem::discriminant(pos).hash(hasher);
866                }
867                if let Some(ch) = removal_char {
868                    Self::hash_arena_expression(ch, hasher);
869                }
870                Self::hash_arena_expression(string, hasher);
871            }
872
873            ArenaExtendedExpr::Extract { field, expr } => {
874                "EXTRACT".hash(hasher);
875                std::mem::discriminant(field).hash(hasher);
876                Self::hash_arena_expression(expr, hasher);
877            }
878
879            ArenaExtendedExpr::Like { expr, pattern, negated } => {
880                "LIKE".hash(hasher);
881                Self::hash_arena_expression(expr, hasher);
882                Self::hash_arena_expression(pattern, hasher);
883                negated.hash(hasher);
884            }
885
886            ArenaExtendedExpr::Exists { subquery, negated } => {
887                "EXISTS".hash(hasher);
888                Self::hash_arena_select(subquery, hasher);
889                negated.hash(hasher);
890            }
891
892            ArenaExtendedExpr::QuantifiedComparison { expr, op, quantifier, subquery } => {
893                "QUANTIFIED".hash(hasher);
894                Self::hash_arena_expression(expr, hasher);
895                std::mem::discriminant(op).hash(hasher);
896                std::mem::discriminant(quantifier).hash(hasher);
897                Self::hash_arena_select(subquery, hasher);
898            }
899
900            ArenaExtendedExpr::Interval {
901                value,
902                unit,
903                leading_precision,
904                fractional_precision,
905            } => {
906                "INTERVAL".hash(hasher);
907                Self::hash_arena_expression(value, hasher);
908                format!("{:?}", unit).hash(hasher);
909                leading_precision.hash(hasher);
910                fractional_precision.hash(hasher);
911            }
912
913            ArenaExtendedExpr::DuplicateKeyValue { column } => {
914                "DUPLICATE_KEY_VALUE".hash(hasher);
915                column.hash(hasher);
916            }
917
918            ArenaExtendedExpr::WindowFunction { function, over } => {
919                "WINDOW_FUNCTION".hash(hasher);
920                match function {
921                    ArenaWindowFunctionSpec::Aggregate { name, args } => {
922                        "AGGREGATE".hash(hasher);
923                        name.hash(hasher);
924                        for arg in args {
925                            Self::hash_arena_expression(arg, hasher);
926                        }
927                    }
928                    ArenaWindowFunctionSpec::Ranking { name, args } => {
929                        "RANKING".hash(hasher);
930                        name.hash(hasher);
931                        for arg in args {
932                            Self::hash_arena_expression(arg, hasher);
933                        }
934                    }
935                    ArenaWindowFunctionSpec::Value { name, args } => {
936                        "VALUE".hash(hasher);
937                        name.hash(hasher);
938                        for arg in args {
939                            Self::hash_arena_expression(arg, hasher);
940                        }
941                    }
942                }
943
944                if let Some(ref partition_by) = over.partition_by {
945                    for expr in partition_by {
946                        Self::hash_arena_expression(expr, hasher);
947                    }
948                }
949                if let Some(ref order_by) = over.order_by {
950                    for item in order_by {
951                        Self::hash_arena_expression(&item.expr, hasher);
952                        std::mem::discriminant(&item.direction).hash(hasher);
953                    }
954                }
955                if let Some(ref frame) = over.frame {
956                    std::mem::discriminant(&frame.unit).hash(hasher);
957                    std::mem::discriminant(&frame.start).hash(hasher);
958                    if let Some(ref end) = frame.end {
959                        std::mem::discriminant(end).hash(hasher);
960                    }
961                }
962            }
963
964            ArenaExtendedExpr::NextValue { sequence_name } => {
965                "NEXT_VALUE".hash(hasher);
966                sequence_name.hash(hasher);
967            }
968
969            ArenaExtendedExpr::MatchAgainst { columns, search_modifier, mode } => {
970                "MATCH_AGAINST".hash(hasher);
971                for col in columns {
972                    col.hash(hasher);
973                }
974                Self::hash_arena_expression(search_modifier, hasher);
975                std::mem::discriminant(mode).hash(hasher);
976            }
977
978            ArenaExtendedExpr::PseudoVariable { pseudo_table, column } => {
979                "PSEUDO_VARIABLE".hash(hasher);
980                std::mem::discriminant(pseudo_table).hash(hasher);
981                column.hash(hasher);
982            }
983
984            ArenaExtendedExpr::SessionVariable { name } => {
985                "SESSION_VARIABLE".hash(hasher);
986                name.hash(hasher);
987            }
988        }
989    }
990}
991
992#[cfg(test)]
993mod tests {
994    use super::*;
995
996    #[test]
997    fn test_same_query_same_signature() {
998        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
999        let sig2 = QuerySignature::from_sql("SELECT * FROM users");
1000        assert_eq!(sig1, sig2);
1001    }
1002
1003    #[test]
1004    fn test_whitespace_normalization() {
1005        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1006        let sig2 = QuerySignature::from_sql("SELECT  *  FROM  users");
1007        assert_eq!(sig1, sig2);
1008    }
1009
1010    #[test]
1011    fn test_case_insensitive() {
1012        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1013        let sig2 = QuerySignature::from_sql("select * from users");
1014        assert_eq!(sig1, sig2);
1015    }
1016
1017    #[test]
1018    fn test_different_queries_different_signature() {
1019        let sig1 = QuerySignature::from_sql("SELECT * FROM users");
1020        let sig2 = QuerySignature::from_sql("SELECT * FROM orders");
1021        assert_ne!(sig1, sig2);
1022    }
1023
1024    #[test]
1025    fn test_different_literals_different_signature_string_based() {
1026        // Different literals create different signatures with string-based hashing
1027        let sig1 = QuerySignature::from_sql("SELECT col0 FROM tab WHERE col1 > 5");
1028        let sig2 = QuerySignature::from_sql("SELECT col0 FROM tab WHERE col1 > 10");
1029        // String-based hashing includes literals in the signature
1030        assert_ne!(sig1, sig2);
1031    }
1032
1033    #[test]
1034    fn test_ast_based_same_structure_different_literals() {
1035        use vibesql_ast::{
1036            BinaryOperator, Expression, FromClause, SelectItem, SelectStmt, Statement,
1037        };
1038        use vibesql_types::SqlValue;
1039
1040        // SELECT col0 FROM tab WHERE col1 > 5
1041        let stmt1 = Statement::Select(Box::new(SelectStmt {
1042            with_clause: None,
1043            distinct: false,
1044            select_list: vec![SelectItem::Expression {
1045                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1046                alias: None,
1047            }],
1048            into_table: None,
1049            into_variables: None,
1050            from: Some(FromClause::Table {
1051                name: "tab".to_string(),
1052                alias: None,
1053                column_aliases: None,
1054            }),
1055            where_clause: Some(Expression::BinaryOp {
1056                op: BinaryOperator::GreaterThan,
1057                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1058                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1059            }),
1060            group_by: None,
1061            having: None,
1062            order_by: None,
1063            limit: None,
1064            offset: None,
1065            set_operation: None,
1066        }));
1067
1068        // SELECT col0 FROM tab WHERE col1 > 10 (different literal)
1069        let stmt2 = Statement::Select(Box::new(SelectStmt {
1070            with_clause: None,
1071            distinct: false,
1072            select_list: vec![SelectItem::Expression {
1073                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1074                alias: None,
1075            }],
1076            into_table: None,
1077            into_variables: None,
1078            from: Some(FromClause::Table {
1079                name: "tab".to_string(),
1080                alias: None,
1081                column_aliases: None,
1082            }),
1083            where_clause: Some(Expression::BinaryOp {
1084                op: BinaryOperator::GreaterThan,
1085                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1086                right: Box::new(Expression::Literal(SqlValue::Integer(10))),
1087            }),
1088            group_by: None,
1089            having: None,
1090            order_by: None,
1091            limit: None,
1092            offset: None,
1093            set_operation: None,
1094        }));
1095
1096        let sig1 = QuerySignature::from_ast(&stmt1);
1097        let sig2 = QuerySignature::from_ast(&stmt2);
1098
1099        // AST-based signatures should be the same despite different literals
1100        assert_eq!(sig1, sig2);
1101    }
1102
1103    #[test]
1104    fn test_ast_based_different_structure() {
1105        use vibesql_ast::{
1106            BinaryOperator, Expression, FromClause, SelectItem, SelectStmt, Statement,
1107        };
1108        use vibesql_types::SqlValue;
1109
1110        // SELECT col0 FROM tab WHERE col1 > 5
1111        let stmt1 = Statement::Select(Box::new(SelectStmt {
1112            with_clause: None,
1113            distinct: false,
1114            select_list: vec![SelectItem::Expression {
1115                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1116                alias: None,
1117            }],
1118            into_table: None,
1119            into_variables: None,
1120            from: Some(FromClause::Table {
1121                name: "tab".to_string(),
1122                alias: None,
1123                column_aliases: None,
1124            }),
1125            where_clause: Some(Expression::BinaryOp {
1126                op: BinaryOperator::GreaterThan,
1127                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1128                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1129            }),
1130            group_by: None,
1131            having: None,
1132            order_by: None,
1133            limit: None,
1134            offset: None,
1135            set_operation: None,
1136        }));
1137
1138        // SELECT col0 FROM tab WHERE col1 < 5 (different operator)
1139        let stmt2 = Statement::Select(Box::new(SelectStmt {
1140            with_clause: None,
1141            distinct: false,
1142            select_list: vec![SelectItem::Expression {
1143                expr: Expression::ColumnRef { table: None, column: "col0".to_string() },
1144                alias: None,
1145            }],
1146            into_table: None,
1147            into_variables: None,
1148            from: Some(FromClause::Table {
1149                name: "tab".to_string(),
1150                alias: None,
1151                column_aliases: None,
1152            }),
1153            where_clause: Some(Expression::BinaryOp {
1154                op: BinaryOperator::LessThan, // Different operator!
1155                left: Box::new(Expression::ColumnRef { table: None, column: "col1".to_string() }),
1156                right: Box::new(Expression::Literal(SqlValue::Integer(5))),
1157            }),
1158            group_by: None,
1159            having: None,
1160            order_by: None,
1161            limit: None,
1162            offset: None,
1163            set_operation: None,
1164        }));
1165
1166        let sig1 = QuerySignature::from_ast(&stmt1);
1167        let sig2 = QuerySignature::from_ast(&stmt2);
1168
1169        // Different structure should produce different signatures
1170        assert_ne!(sig1, sig2);
1171    }
1172}