Skip to main content

fraiseql_core/cache/
key.rs

1//! Cache key generation for query results.
2//!
3//! # Security Critical
4//!
5//! This module is **security-critical**. Cache keys MUST include variable values
6//! to prevent data leakage between different users or requests. Incorrect key
7//! generation could allow User A to see User B's cached data.
8//!
9//! # Key Composition
10//!
11//! Cache keys are generated from a single-pass ahash over:
12//! 1. Query string bytes
13//! 2. Recursively hashed variable values (canonical ordering)
14//! 3. WHERE clause structure (hashed structurally, not via serde)
15//! 4. Schema version string
16//!
17//! The hasher uses fixed seeds so that keys are deterministic across restarts.
18//!
19//! # Example
20//!
21//! ```rust
22//! use fraiseql_core::cache::generate_cache_key;
23//! use fraiseql_core::db::{WhereClause, WhereOperator};
24//! use serde_json::json;
25//!
26//! // Two different users querying their own data
27//! let key1 = generate_cache_key(
28//!     "query { user(id: $id) { name } }",
29//!     &json!({"id": "alice"}),
30//!     None,
31//!     "v1"
32//! );
33//!
34//! let key2 = generate_cache_key(
35//!     "query { user(id: $id) { name } }",
36//!     &json!({"id": "bob"}),
37//!     None,
38//!     "v1"
39//! );
40//!
41//! // Different variables MUST produce different keys (security requirement)
42//! assert_ne!(key1, key2);
43//! ```
44
45use std::hash::{BuildHasher, Hash, Hasher};
46
47use ahash::RandomState;
48use serde_json::Value as JsonValue;
49
50use crate::{
51    db::{WhereOperator, where_clause::WhereClause},
52    schema::QueryDefinition,
53};
54
55// Fixed seeds for deterministic hashing across process restarts.
56// These are arbitrary constants — changing them invalidates all cached entries.
57const SEED_K0: u64 = 0x5241_4953_454F_4E31; // "RAISEON1"
58const SEED_K1: u64 = 0x4652_4149_5345_514C; // "FRAISEQL"
59const SEED_K2: u64 = 0x4341_4348_454B_4559; // "CACHEKEY"
60const SEED_K3: u64 = 0x5632_5F43_4143_4845; // "V2_CACHE"
61
62/// Create a new hasher from the fixed-seed `RandomState`.
63fn new_hasher() -> impl Hasher {
64    RandomState::with_seeds(SEED_K0, SEED_K1, SEED_K2, SEED_K3).build_hasher()
65}
66
67/// Generate cache key for query result.
68///
69/// # Security Critical
70///
71/// **DIFFERENT VARIABLE VALUES MUST PRODUCE DIFFERENT KEYS** to prevent data
72/// leakage between users. This function feeds the full query, variables, WHERE
73/// clause, and schema version into a single-pass ahash for a fast, deterministic
74/// `u64` key.
75///
76/// # Key Composition
77///
78/// The cache key is a single ahash pass over:
79/// ```text
80/// ahash(
81///   query_bytes          +
82///   hash(variables)      +   ← recursive, canonical key ordering
83///   hash(WHERE_clause)   +   ← structural, not serde-dependent
84///   schema_version_bytes
85/// )
86/// ```
87///
88/// This ensures:
89/// - Same query + variables = same key (cache hit)
90/// - Different variables = different key (security)
91/// - Different WHERE clauses = different key (correctness)
92/// - Schema changes = different key (validity)
93///
94/// # Arguments
95///
96/// * `query` - GraphQL query string
97/// * `variables` - Query variables from GraphQL request (optional)
98/// * `where_clause` - WHERE filter from auto-params (optional)
99/// * `schema_version` - Schema hash from `CompiledSchema`
100///
101/// # Returns
102///
103/// A `u64` cache key suitable for use as a hash-map key.
104///
105/// # Security Examples
106///
107/// ```rust
108/// use fraiseql_core::cache::generate_cache_key;
109/// use serde_json::json;
110///
111/// let query = "query getUser($id: ID!) { user(id: $id) { name } }";
112///
113/// // Different users MUST get different cache keys
114/// let alice_key = generate_cache_key(query, &json!({"id": "alice"}), None, "v1");
115/// let bob_key = generate_cache_key(query, &json!({"id": "bob"}), None, "v1");
116/// assert_ne!(alice_key, bob_key, "Security: different variables must produce different keys");
117///
118/// // Same user MUST get same key (determinism)
119/// let alice_key2 = generate_cache_key(query, &json!({"id": "alice"}), None, "v1");
120/// assert_eq!(alice_key, alice_key2, "Determinism: same inputs must produce same key");
121/// ```
122#[must_use]
123pub fn generate_cache_key(
124    query: &str,
125    variables: &JsonValue,
126    where_clause: Option<&WhereClause>,
127    schema_version: &str,
128) -> u64 {
129    let mut h = new_hasher();
130
131    // Domain-separate the four sections with unique tags so that, e.g.,
132    // a query ending with "v1" and an empty schema_version can never
133    // collide with a shorter query and schema_version = "v1".
134    h.write(b"q:");
135    h.write(query.as_bytes());
136
137    h.write(b"\0v:");
138    hash_json_value(&mut h, variables);
139
140    h.write(b"\0w:");
141    if let Some(wc) = where_clause {
142        h.write_u8(1);
143        hash_where_clause(&mut h, wc);
144    } else {
145        h.write_u8(0);
146    }
147
148    h.write(b"\0s:");
149    h.write(schema_version.as_bytes());
150
151    h.finish()
152}
153
154/// Recursively hash a `serde_json::Value` into the given hasher.
155///
156/// Object keys are sorted before hashing so that insertion order does not
157/// affect the output (critical for variable-order independence).
158fn hash_json_value(h: &mut impl Hasher, value: &JsonValue) {
159    // Write a type discriminant so that `null`, `false`, `0`, `""`, `[]`, and `{}`
160    // all produce distinct hashes.
161    match value {
162        JsonValue::Null => h.write_u8(0),
163        JsonValue::Bool(b) => {
164            h.write_u8(1);
165            b.hash(h);
166        },
167        JsonValue::Number(n) => {
168            h.write_u8(2);
169            // Use the canonical string form so that 1.0 and 1 hash identically
170            // when serde represents them the same way.
171            h.write(n.to_string().as_bytes());
172        },
173        JsonValue::String(s) => {
174            h.write_u8(3);
175            h.write(s.as_bytes());
176        },
177        JsonValue::Array(arr) => {
178            h.write_u8(4);
179            h.write_usize(arr.len());
180            for item in arr {
181                hash_json_value(h, item);
182            }
183        },
184        JsonValue::Object(map) => {
185            h.write_u8(5);
186            h.write_usize(map.len());
187            // Sort keys for canonical ordering.
188            let mut keys: Vec<&String> = map.keys().collect();
189            keys.sort_unstable();
190            for key in keys {
191                h.write(key.as_bytes());
192                hash_json_value(h, &map[key]);
193            }
194        },
195    }
196}
197
198/// Hash a `WhereClause` tree structurally.
199///
200/// Uses discriminant tags and recursion so that structurally different clauses
201/// always produce different hash contributions.
202fn hash_where_clause(h: &mut impl Hasher, clause: &WhereClause) {
203    match clause {
204        WhereClause::Field {
205            path,
206            operator,
207            value,
208        } => {
209            h.write_u8(b'F');
210            h.write_usize(path.len());
211            for segment in path {
212                h.write(segment.as_bytes());
213                h.write_u8(0); // separator
214            }
215            hash_where_operator(h, operator);
216            hash_json_value(h, value);
217        },
218        WhereClause::And(clauses) => {
219            h.write_u8(b'A');
220            h.write_usize(clauses.len());
221            for c in clauses {
222                hash_where_clause(h, c);
223            }
224        },
225        WhereClause::Or(clauses) => {
226            h.write_u8(b'O');
227            h.write_usize(clauses.len());
228            for c in clauses {
229                hash_where_clause(h, c);
230            }
231        },
232        WhereClause::Not(inner) => {
233            h.write_u8(b'N');
234            hash_where_clause(h, inner);
235        },
236        // WhereClause is #[non_exhaustive]; unknown variants get a distinct tag
237        // plus their Debug representation as a conservative fallback.
238        _ => {
239            h.write_u8(b'?');
240            h.write(format!("{clause:?}").as_bytes());
241        },
242    }
243}
244
245/// Hash a `WhereOperator` by its `Debug` representation.
246///
247/// `WhereOperator` is `#[non_exhaustive]` with 40+ variants (including
248/// `Extended(ExtendedOperator)`). Using the `Debug` string is stable across
249/// refactors and automatically covers new variants without maintenance.
250/// Hash a `WhereOperator` without allocating.
251///
252/// Uses `std::mem::discriminant` for the variant tag (zero-allocation).
253/// For the `Extended(op)` variant which carries data, also hashes the
254/// Debug representation of the inner operator (rare path, acceptable allocation).
255fn hash_where_operator(h: &mut impl Hasher, op: &WhereOperator) {
256    // discriminant is a fixed-size hashable value — no allocation
257    std::mem::discriminant(op).hash(h);
258
259    // Extended operators carry inner data that affects the hash.
260    // All other variants are fully distinguished by their discriminant.
261    if let WhereOperator::Extended(inner) = op {
262        // Rare path: Extended operators are uncommon. The Debug allocation
263        // here is acceptable because it only triggers for rich-filter queries.
264        let inner_str = format!("{inner:?}");
265        h.write(inner_str.as_bytes());
266    }
267}
268
269/// Extract accessed views from query definition.
270///
271/// We track which database views/tables a query accesses for view-based
272/// cache invalidation. When a mutation modifies a view, we can invalidate
273/// all cached queries that read from that view.
274///
275/// # Current Scope
276///
277/// Currently extracts only the primary SQL source from the query definition.
278/// Does not analyze:
279/// - JOIN clauses (requires compiled SQL)
280/// - Resolver chains (requires runtime context)
281/// - Nested queries (requires query analyzer)
282///
283/// # Future Enhancements
284///
285/// - Extract views from JOIN clauses in compiled SQL
286/// - Extract views from resolver chains
287/// - Support for custom resolver view tracking
288/// - Entity-level tracking (extract IDs from results)
289///
290/// # Arguments
291///
292/// * `query_def` - The compiled query definition from schema
293///
294/// # Returns
295///
296/// List of view/table names accessed by this query
297///
298/// # Examples
299///
300/// ```rust
301/// use fraiseql_core::cache::extract_accessed_views;
302/// use fraiseql_core::schema::QueryDefinition;
303///
304/// let query_def = QueryDefinition::new("users", "User")
305///     .returning_list()
306///     .with_sql_source("v_user");
307///
308/// let views = extract_accessed_views(&query_def);
309/// assert_eq!(views, vec!["v_user"]);
310/// ```
311#[must_use]
312pub fn extract_accessed_views(query_def: &QueryDefinition) -> Vec<String> {
313    let mut views = Vec::new();
314
315    // Add primary SQL source
316    if let Some(sql_source) = &query_def.sql_source {
317        views.push(sql_source.clone());
318    }
319
320    // Add developer-declared secondary views (JOINs, nested queries, etc.)
321    // Required for correct invalidation when a query reads from multiple views.
322    views.extend(query_def.additional_views.iter().cloned());
323
324    views
325}
326
327/// Verify cache key generation is deterministic.
328///
329/// Used in testing to ensure cache hits work correctly.
330/// Same inputs must always produce the same key.
331///
332/// # Arguments
333///
334/// * `query` - GraphQL query string
335/// * `variables` - Query variables
336/// * `schema_version` - Schema version hash
337///
338/// # Returns
339///
340/// `true` if two sequential key generations produce identical keys
341#[cfg(test)]
342#[must_use]
343pub fn verify_deterministic(query: &str, variables: &JsonValue, schema_version: &str) -> bool {
344    let key1 = generate_cache_key(query, variables, None, schema_version);
345    let key2 = generate_cache_key(query, variables, None, schema_version);
346    key1 == key2
347}
348
349#[cfg(test)]
350mod tests {
351    use std::collections::HashSet;
352
353    use indexmap::IndexMap;
354    use serde_json::json;
355
356    use super::*;
357    use crate::schema::CursorType;
358
359    // ========================================================================
360    // Security Tests (CRITICAL)
361    // ========================================================================
362
363    #[test]
364    fn test_different_variables_produce_different_keys() {
365        // SECURITY CRITICAL: Different variables MUST produce different keys
366        // to prevent User A from seeing User B's cached data
367        let query = "query getUser($id: ID!) { user(id: $id) { name email } }";
368
369        let key_alice = generate_cache_key(query, &json!({"id": "alice"}), None, "v1");
370        let key_bob = generate_cache_key(query, &json!({"id": "bob"}), None, "v1");
371
372        assert_ne!(
373            key_alice, key_bob,
374            "SECURITY: Different variables MUST produce different cache keys"
375        );
376    }
377
378    #[test]
379    fn test_different_variable_values_produce_different_keys() {
380        let query = "query getUsers($limit: Int!) { users(limit: $limit) { id } }";
381
382        let key_10 = generate_cache_key(query, &json!({"limit": 10}), None, "v1");
383        let key_20 = generate_cache_key(query, &json!({"limit": 20}), None, "v1");
384
385        assert_ne!(
386            key_10, key_20,
387            "SECURITY: Different variable values MUST produce different keys"
388        );
389    }
390
391    #[test]
392    fn test_empty_vs_non_empty_variables() {
393        let query = "query { users { id } }";
394
395        let key_empty = generate_cache_key(query, &json!({}), None, "v1");
396        let key_with_vars = generate_cache_key(query, &json!({"limit": 10}), None, "v1");
397
398        assert_ne!(
399            key_empty, key_with_vars,
400            "Empty variables must produce different key than non-empty"
401        );
402    }
403
404    #[test]
405    fn test_variable_order_independence() {
406        // Object keys are sorted before hashing, so insertion order should
407        // not affect the result. serde_json's default Map is BTreeMap (sorted),
408        // but we sort explicitly in hash_json_value to be safe regardless.
409        let query = "query($a: Int, $b: Int) { users { id } }";
410
411        let key1 = generate_cache_key(query, &json!({"a": 1, "b": 2}), None, "v1");
412        let key2 = generate_cache_key(query, &json!({"a": 1, "b": 2}), None, "v1");
413
414        assert_eq!(key1, key2, "Same variables must produce same key");
415    }
416
417    // ========================================================================
418    // Determinism Tests
419    // ========================================================================
420
421    #[test]
422    fn test_cache_key_deterministic() {
423        // Same inputs must always produce same output
424        let query = "query { users { id } }";
425        let vars = json!({"limit": 10});
426
427        let key1 = generate_cache_key(query, &vars, None, "v1");
428        let key2 = generate_cache_key(query, &vars, None, "v1");
429
430        assert_eq!(key1, key2, "Cache keys must be deterministic");
431    }
432
433    #[test]
434    fn test_verify_deterministic_helper() {
435        assert!(
436            verify_deterministic("query { users }", &json!({}), "v1"),
437            "Helper should verify determinism"
438        );
439    }
440
441    // ========================================================================
442    // WHERE Clause Tests
443    // ========================================================================
444
445    #[test]
446    fn test_different_where_clauses_produce_different_keys() {
447        let query = "query { users { id } }";
448
449        let where1 = WhereClause::Field {
450            path:     vec!["email".to_string()],
451            operator: WhereOperator::Eq,
452            value:    json!("alice@example.com"),
453        };
454
455        let where2 = WhereClause::Field {
456            path:     vec!["email".to_string()],
457            operator: WhereOperator::Eq,
458            value:    json!("bob@example.com"),
459        };
460
461        let key1 = generate_cache_key(query, &json!({}), Some(&where1), "v1");
462        let key2 = generate_cache_key(query, &json!({}), Some(&where2), "v1");
463
464        assert_ne!(key1, key2, "Different WHERE clauses must produce different keys");
465    }
466
467    #[test]
468    fn test_different_where_operators_produce_different_keys() {
469        let query = "query { users { id } }";
470
471        let where_eq = WhereClause::Field {
472            path:     vec!["age".to_string()],
473            operator: WhereOperator::Eq,
474            value:    json!(30),
475        };
476
477        let where_gt = WhereClause::Field {
478            path:     vec!["age".to_string()],
479            operator: WhereOperator::Gt,
480            value:    json!(30),
481        };
482
483        let key_eq = generate_cache_key(query, &json!({}), Some(&where_eq), "v1");
484        let key_gt = generate_cache_key(query, &json!({}), Some(&where_gt), "v1");
485
486        assert_ne!(key_eq, key_gt, "Different operators must produce different keys");
487    }
488
489    #[test]
490    fn test_with_and_without_where_clause() {
491        let query = "query { users { id } }";
492
493        let where_clause = WhereClause::Field {
494            path:     vec!["active".to_string()],
495            operator: WhereOperator::Eq,
496            value:    json!(true),
497        };
498
499        let key_without = generate_cache_key(query, &json!({}), None, "v1");
500        let key_with = generate_cache_key(query, &json!({}), Some(&where_clause), "v1");
501
502        assert_ne!(key_without, key_with, "Presence of WHERE clause must change key");
503    }
504
505    #[test]
506    fn test_complex_where_clause() {
507        let query = "query { users { id } }";
508
509        let where_clause = WhereClause::And(vec![
510            WhereClause::Field {
511                path:     vec!["age".to_string()],
512                operator: WhereOperator::Gte,
513                value:    json!(18),
514            },
515            WhereClause::Field {
516                path:     vec!["active".to_string()],
517                operator: WhereOperator::Eq,
518                value:    json!(true),
519            },
520        ]);
521
522        // Should not panic; produces a valid u64.
523        let _key = generate_cache_key(query, &json!({}), Some(&where_clause), "v1");
524    }
525
526    // ========================================================================
527    // Schema Version Tests
528    // ========================================================================
529
530    #[test]
531    fn test_different_schema_versions_produce_different_keys() {
532        let query = "query { users { id } }";
533
534        let key_v1 = generate_cache_key(query, &json!({}), None, "v1");
535        let key_v2 = generate_cache_key(query, &json!({}), None, "v2");
536
537        assert_ne!(key_v1, key_v2, "Different schema versions must produce different keys");
538    }
539
540    #[test]
541    fn test_schema_version_invalidation() {
542        // When schema changes, all cache keys change (automatic invalidation)
543        let query = "query { users { id } }";
544
545        let old_schema = "abc123";
546        let new_schema = "def456";
547
548        let key_old = generate_cache_key(query, &json!({}), None, old_schema);
549        let key_new = generate_cache_key(query, &json!({}), None, new_schema);
550
551        assert_ne!(key_old, key_new, "Schema changes should invalidate cache");
552    }
553
554    // ========================================================================
555    // Collision Avoidance Test
556    // ========================================================================
557
558    #[test]
559    fn test_no_collisions_in_sample() {
560        // Generate a sample of cache keys from varied inputs and verify
561        // that no two distinct inputs produce the same u64.
562        let mut keys = HashSet::new();
563        let mut count = 0u32;
564
565        let queries = [
566            "query { users { id } }",
567            "query { posts { id } }",
568            "query { users { id name } }",
569            "query getUser($id: ID!) { user(id: $id) { name } }",
570            "",
571        ];
572        let variable_sets: &[JsonValue] = &[
573            json!({}),
574            json!(null),
575            json!({"id": 1}),
576            json!({"id": 2}),
577            json!({"id": "alice"}),
578            json!({"limit": 10, "offset": 0}),
579            json!({"filter": {"active": true}}),
580        ];
581        let schema_versions = ["v1", "v2", "abc123"];
582
583        for query in &queries {
584            for vars in variable_sets {
585                for sv in &schema_versions {
586                    let key = generate_cache_key(query, vars, None, sv);
587                    keys.insert(key);
588                    count += 1;
589                }
590            }
591        }
592
593        assert_eq!(
594            keys.len(),
595            count as usize,
596            "Collision detected among {count} sample cache keys"
597        );
598    }
599
600    // ========================================================================
601    // Extract Views Tests
602    // ========================================================================
603
604    #[test]
605    fn test_extract_accessed_views_with_sql_source() {
606        use crate::schema::AutoParams;
607
608        let query_def = QueryDefinition {
609            name:                "users".to_string(),
610            return_type:         "User".to_string(),
611            returns_list:        true,
612            nullable:            false,
613            arguments:           vec![],
614            sql_source:          Some("v_user".to_string()),
615            description:         None,
616            auto_params:         AutoParams {
617                has_where:    true,
618                has_order_by: false,
619                has_limit:    true,
620                has_offset:   false,
621            },
622            deprecation:         None,
623            jsonb_column:        "data".to_string(),
624            relay:               false,
625            relay_cursor_column: None,
626            relay_cursor_type:   CursorType::default(),
627            inject_params:       IndexMap::default(),
628            cache_ttl_seconds:   None,
629            additional_views:    vec![],
630            requires_role:       None,
631            rest_path:           None,
632            rest_method:         None,
633        };
634
635        let views = extract_accessed_views(&query_def);
636        assert_eq!(views, vec!["v_user"]);
637    }
638
639    #[test]
640    fn test_extract_accessed_views_without_sql_source() {
641        use crate::schema::AutoParams;
642
643        let query_def = QueryDefinition {
644            name:                "customQuery".to_string(),
645            return_type:         "Custom".to_string(),
646            returns_list:        false,
647            nullable:            false,
648            arguments:           vec![],
649            sql_source:          None, // No SQL source (custom resolver)
650            description:         None,
651            auto_params:         AutoParams {
652                has_where:    false,
653                has_order_by: false,
654                has_limit:    false,
655                has_offset:   false,
656            },
657            deprecation:         None,
658            jsonb_column:        "data".to_string(),
659            relay:               false,
660            relay_cursor_column: None,
661            relay_cursor_type:   CursorType::default(),
662            inject_params:       IndexMap::default(),
663            cache_ttl_seconds:   None,
664            additional_views:    vec![],
665            requires_role:       None,
666            rest_path:           None,
667            rest_method:         None,
668        };
669
670        let views = extract_accessed_views(&query_def);
671        assert_eq!(views, Vec::<String>::new());
672    }
673
674    #[test]
675    fn test_extract_accessed_views_with_additional_views() {
676        use crate::schema::AutoParams;
677
678        let query_def = QueryDefinition {
679            name:                "usersWithPosts".to_string(),
680            return_type:         "UserWithPosts".to_string(),
681            returns_list:        true,
682            nullable:            false,
683            arguments:           vec![],
684            sql_source:          Some("v_user_with_posts".to_string()),
685            description:         None,
686            auto_params:         AutoParams::default(),
687            deprecation:         None,
688            jsonb_column:        "data".to_string(),
689            relay:               false,
690            relay_cursor_column: None,
691            relay_cursor_type:   CursorType::default(),
692            inject_params:       IndexMap::default(),
693            cache_ttl_seconds:   None,
694            additional_views:    vec!["v_post".to_string(), "v_tag".to_string()],
695            requires_role:       None,
696            rest_path:           None,
697            rest_method:         None,
698        };
699
700        let views = extract_accessed_views(&query_def);
701        assert_eq!(views, vec!["v_user_with_posts", "v_post", "v_tag"]);
702    }
703
704    // ========================================================================
705    // Edge Case Tests
706    // ========================================================================
707
708    #[test]
709    fn test_empty_query_string() {
710        // Should not panic; produces a valid u64.
711        let _key = generate_cache_key("", &json!({}), None, "v1");
712    }
713
714    #[test]
715    fn test_null_variables() {
716        // Should not panic; produces a valid u64.
717        let _key = generate_cache_key("query { users }", &json!(null), None, "v1");
718    }
719
720    #[test]
721    fn test_large_variable_object() {
722        let large_vars = json!({
723            "filter": {
724                "age": 30,
725                "active": true,
726                "tags": ["rust", "graphql", "database"],
727                "metadata": {
728                    "created_after": "2024-01-01",
729                    "updated_before": "2024-12-31"
730                }
731            }
732        });
733
734        // Should not panic; produces a valid u64.
735        let _key = generate_cache_key("query { users }", &large_vars, None, "v1");
736    }
737
738    #[test]
739    fn test_special_characters_in_query() {
740        let query = r#"query { user(email: "test@example.com") { name } }"#;
741        // Should not panic; produces a valid u64.
742        let _key = generate_cache_key(query, &json!({}), None, "v1");
743    }
744}