fraiseql_core/cache/key.rs
1//! Cache key generation for query results.
2//!
3//! # Security Critical
4//!
5//! This module is **security-critical**. Cache keys MUST include variable values
6//! to prevent data leakage between different users or requests. Incorrect key
7//! generation could allow User A to see User B's cached data.
8//!
9//! # Key Composition
10//!
11//! Cache keys are generated from a single-pass ahash over:
12//! 1. Query string bytes
13//! 2. Recursively hashed variable values (canonical ordering)
14//! 3. WHERE clause structure (hashed structurally, not via serde)
15//! 4. Schema version string
16//!
17//! The hasher uses fixed seeds so that keys are deterministic across restarts.
18//!
19//! # Example
20//!
21//! ```rust
22//! use fraiseql_core::cache::generate_cache_key;
23//! use fraiseql_core::db::{WhereClause, WhereOperator};
24//! use serde_json::json;
25//!
26//! // Two different users querying their own data
27//! let key1 = generate_cache_key(
28//! "query { user(id: $id) { name } }",
29//! &json!({"id": "alice"}),
30//! None,
31//! "v1"
32//! );
33//!
34//! let key2 = generate_cache_key(
35//! "query { user(id: $id) { name } }",
36//! &json!({"id": "bob"}),
37//! None,
38//! "v1"
39//! );
40//!
41//! // Different variables MUST produce different keys (security requirement)
42//! assert_ne!(key1, key2);
43//! ```
44
45use std::hash::{BuildHasher, Hash, Hasher};
46
47use ahash::RandomState;
48use serde_json::Value as JsonValue;
49
50use crate::{
51 db::{WhereOperator, where_clause::WhereClause},
52 schema::QueryDefinition,
53};
54
55// Fixed seeds for deterministic hashing across process restarts.
56// These are arbitrary constants — changing them invalidates all cached entries.
57const SEED_K0: u64 = 0x5241_4953_454F_4E31; // "RAISEON1"
58const SEED_K1: u64 = 0x4652_4149_5345_514C; // "FRAISEQL"
59const SEED_K2: u64 = 0x4341_4348_454B_4559; // "CACHEKEY"
60const SEED_K3: u64 = 0x5632_5F43_4143_4845; // "V2_CACHE"
61
62/// Create a new hasher from the fixed-seed `RandomState`.
63fn new_hasher() -> impl Hasher {
64 RandomState::with_seeds(SEED_K0, SEED_K1, SEED_K2, SEED_K3).build_hasher()
65}
66
67/// Generate cache key for query result.
68///
69/// # Security Critical
70///
71/// **DIFFERENT VARIABLE VALUES MUST PRODUCE DIFFERENT KEYS** to prevent data
72/// leakage between users. This function feeds the full query, variables, WHERE
73/// clause, and schema version into a single-pass ahash for a fast, deterministic
74/// `u64` key.
75///
76/// # Key Composition
77///
78/// The cache key is a single ahash pass over:
79/// ```text
80/// ahash(
81/// query_bytes +
82/// hash(variables) + ← recursive, canonical key ordering
83/// hash(WHERE_clause) + ← structural, not serde-dependent
84/// schema_version_bytes
85/// )
86/// ```
87///
88/// This ensures:
89/// - Same query + variables = same key (cache hit)
90/// - Different variables = different key (security)
91/// - Different WHERE clauses = different key (correctness)
92/// - Schema changes = different key (validity)
93///
94/// # Arguments
95///
96/// * `query` - GraphQL query string
97/// * `variables` - Query variables from GraphQL request (optional)
98/// * `where_clause` - WHERE filter from auto-params (optional)
99/// * `schema_version` - Schema hash from `CompiledSchema`
100///
101/// # Returns
102///
103/// A `u64` cache key suitable for use as a hash-map key.
104///
105/// # Security Examples
106///
107/// ```rust
108/// use fraiseql_core::cache::generate_cache_key;
109/// use serde_json::json;
110///
111/// let query = "query getUser($id: ID!) { user(id: $id) { name } }";
112///
113/// // Different users MUST get different cache keys
114/// let alice_key = generate_cache_key(query, &json!({"id": "alice"}), None, "v1");
115/// let bob_key = generate_cache_key(query, &json!({"id": "bob"}), None, "v1");
116/// assert_ne!(alice_key, bob_key, "Security: different variables must produce different keys");
117///
118/// // Same user MUST get same key (determinism)
119/// let alice_key2 = generate_cache_key(query, &json!({"id": "alice"}), None, "v1");
120/// assert_eq!(alice_key, alice_key2, "Determinism: same inputs must produce same key");
121/// ```
122#[must_use]
123pub fn generate_cache_key(
124 query: &str,
125 variables: &JsonValue,
126 where_clause: Option<&WhereClause>,
127 schema_version: &str,
128) -> u64 {
129 let mut h = new_hasher();
130
131 // Domain-separate the four sections with unique tags so that, e.g.,
132 // a query ending with "v1" and an empty schema_version can never
133 // collide with a shorter query and schema_version = "v1".
134 h.write(b"q:");
135 h.write(query.as_bytes());
136
137 h.write(b"\0v:");
138 hash_json_value(&mut h, variables);
139
140 h.write(b"\0w:");
141 if let Some(wc) = where_clause {
142 h.write_u8(1);
143 hash_where_clause(&mut h, wc);
144 } else {
145 h.write_u8(0);
146 }
147
148 h.write(b"\0s:");
149 h.write(schema_version.as_bytes());
150
151 h.finish()
152}
153
154/// Recursively hash a `serde_json::Value` into the given hasher.
155///
156/// Object keys are sorted before hashing so that insertion order does not
157/// affect the output (critical for variable-order independence).
158fn hash_json_value(h: &mut impl Hasher, value: &JsonValue) {
159 // Write a type discriminant so that `null`, `false`, `0`, `""`, `[]`, and `{}`
160 // all produce distinct hashes.
161 match value {
162 JsonValue::Null => h.write_u8(0),
163 JsonValue::Bool(b) => {
164 h.write_u8(1);
165 b.hash(h);
166 },
167 JsonValue::Number(n) => {
168 h.write_u8(2);
169 // Use the canonical string form so that 1.0 and 1 hash identically
170 // when serde represents them the same way.
171 h.write(n.to_string().as_bytes());
172 },
173 JsonValue::String(s) => {
174 h.write_u8(3);
175 h.write(s.as_bytes());
176 },
177 JsonValue::Array(arr) => {
178 h.write_u8(4);
179 h.write_usize(arr.len());
180 for item in arr {
181 hash_json_value(h, item);
182 }
183 },
184 JsonValue::Object(map) => {
185 h.write_u8(5);
186 h.write_usize(map.len());
187 // Sort keys for canonical ordering.
188 let mut keys: Vec<&String> = map.keys().collect();
189 keys.sort_unstable();
190 for key in keys {
191 h.write(key.as_bytes());
192 hash_json_value(h, &map[key]);
193 }
194 },
195 }
196}
197
198/// Hash a `WhereClause` tree structurally.
199///
200/// Uses discriminant tags and recursion so that structurally different clauses
201/// always produce different hash contributions.
202fn hash_where_clause(h: &mut impl Hasher, clause: &WhereClause) {
203 match clause {
204 WhereClause::Field {
205 path,
206 operator,
207 value,
208 } => {
209 h.write_u8(b'F');
210 h.write_usize(path.len());
211 for segment in path {
212 h.write(segment.as_bytes());
213 h.write_u8(0); // separator
214 }
215 hash_where_operator(h, operator);
216 hash_json_value(h, value);
217 },
218 WhereClause::And(clauses) => {
219 h.write_u8(b'A');
220 h.write_usize(clauses.len());
221 for c in clauses {
222 hash_where_clause(h, c);
223 }
224 },
225 WhereClause::Or(clauses) => {
226 h.write_u8(b'O');
227 h.write_usize(clauses.len());
228 for c in clauses {
229 hash_where_clause(h, c);
230 }
231 },
232 WhereClause::Not(inner) => {
233 h.write_u8(b'N');
234 hash_where_clause(h, inner);
235 },
236 // WhereClause is #[non_exhaustive]; unknown variants get a distinct tag
237 // plus their Debug representation as a conservative fallback.
238 _ => {
239 h.write_u8(b'?');
240 h.write(format!("{clause:?}").as_bytes());
241 },
242 }
243}
244
245/// Hash a `WhereOperator` by its `Debug` representation.
246///
247/// `WhereOperator` is `#[non_exhaustive]` with 40+ variants (including
248/// `Extended(ExtendedOperator)`). Using the `Debug` string is stable across
249/// refactors and automatically covers new variants without maintenance.
250/// Hash a `WhereOperator` without allocating.
251///
252/// Uses `std::mem::discriminant` for the variant tag (zero-allocation).
253/// For the `Extended(op)` variant which carries data, also hashes the
254/// Debug representation of the inner operator (rare path, acceptable allocation).
255fn hash_where_operator(h: &mut impl Hasher, op: &WhereOperator) {
256 // discriminant is a fixed-size hashable value — no allocation
257 std::mem::discriminant(op).hash(h);
258
259 // Extended operators carry inner data that affects the hash.
260 // All other variants are fully distinguished by their discriminant.
261 if let WhereOperator::Extended(inner) = op {
262 // Rare path: Extended operators are uncommon. The Debug allocation
263 // here is acceptable because it only triggers for rich-filter queries.
264 let inner_str = format!("{inner:?}");
265 h.write(inner_str.as_bytes());
266 }
267}
268
269/// Extract accessed views from query definition.
270///
271/// We track which database views/tables a query accesses for view-based
272/// cache invalidation. When a mutation modifies a view, we can invalidate
273/// all cached queries that read from that view.
274///
275/// # Current Scope
276///
277/// Currently extracts only the primary SQL source from the query definition.
278/// Does not analyze:
279/// - JOIN clauses (requires compiled SQL)
280/// - Resolver chains (requires runtime context)
281/// - Nested queries (requires query analyzer)
282///
283/// # Future Enhancements
284///
285/// - Extract views from JOIN clauses in compiled SQL
286/// - Extract views from resolver chains
287/// - Support for custom resolver view tracking
288/// - Entity-level tracking (extract IDs from results)
289///
290/// # Arguments
291///
292/// * `query_def` - The compiled query definition from schema
293///
294/// # Returns
295///
296/// List of view/table names accessed by this query
297///
298/// # Examples
299///
300/// ```rust
301/// use fraiseql_core::cache::extract_accessed_views;
302/// use fraiseql_core::schema::QueryDefinition;
303///
304/// let query_def = QueryDefinition::new("users", "User")
305/// .returning_list()
306/// .with_sql_source("v_user");
307///
308/// let views = extract_accessed_views(&query_def);
309/// assert_eq!(views, vec!["v_user"]);
310/// ```
311#[must_use]
312pub fn extract_accessed_views(query_def: &QueryDefinition) -> Vec<String> {
313 let mut views = Vec::new();
314
315 // Add primary SQL source
316 if let Some(sql_source) = &query_def.sql_source {
317 views.push(sql_source.clone());
318 }
319
320 // Add developer-declared secondary views (JOINs, nested queries, etc.)
321 // Required for correct invalidation when a query reads from multiple views.
322 views.extend(query_def.additional_views.iter().cloned());
323
324 views
325}
326
327/// Verify cache key generation is deterministic.
328///
329/// Used in testing to ensure cache hits work correctly.
330/// Same inputs must always produce the same key.
331///
332/// # Arguments
333///
334/// * `query` - GraphQL query string
335/// * `variables` - Query variables
336/// * `schema_version` - Schema version hash
337///
338/// # Returns
339///
340/// `true` if two sequential key generations produce identical keys
341#[cfg(test)]
342#[must_use]
343pub fn verify_deterministic(query: &str, variables: &JsonValue, schema_version: &str) -> bool {
344 let key1 = generate_cache_key(query, variables, None, schema_version);
345 let key2 = generate_cache_key(query, variables, None, schema_version);
346 key1 == key2
347}
348
349#[cfg(test)]
350mod tests {
351 use std::collections::HashSet;
352
353 use indexmap::IndexMap;
354 use serde_json::json;
355
356 use super::*;
357 use crate::schema::CursorType;
358
359 // ========================================================================
360 // Security Tests (CRITICAL)
361 // ========================================================================
362
363 #[test]
364 fn test_different_variables_produce_different_keys() {
365 // SECURITY CRITICAL: Different variables MUST produce different keys
366 // to prevent User A from seeing User B's cached data
367 let query = "query getUser($id: ID!) { user(id: $id) { name email } }";
368
369 let key_alice = generate_cache_key(query, &json!({"id": "alice"}), None, "v1");
370 let key_bob = generate_cache_key(query, &json!({"id": "bob"}), None, "v1");
371
372 assert_ne!(
373 key_alice, key_bob,
374 "SECURITY: Different variables MUST produce different cache keys"
375 );
376 }
377
378 #[test]
379 fn test_different_variable_values_produce_different_keys() {
380 let query = "query getUsers($limit: Int!) { users(limit: $limit) { id } }";
381
382 let key_10 = generate_cache_key(query, &json!({"limit": 10}), None, "v1");
383 let key_20 = generate_cache_key(query, &json!({"limit": 20}), None, "v1");
384
385 assert_ne!(
386 key_10, key_20,
387 "SECURITY: Different variable values MUST produce different keys"
388 );
389 }
390
391 #[test]
392 fn test_empty_vs_non_empty_variables() {
393 let query = "query { users { id } }";
394
395 let key_empty = generate_cache_key(query, &json!({}), None, "v1");
396 let key_with_vars = generate_cache_key(query, &json!({"limit": 10}), None, "v1");
397
398 assert_ne!(
399 key_empty, key_with_vars,
400 "Empty variables must produce different key than non-empty"
401 );
402 }
403
404 #[test]
405 fn test_variable_order_independence() {
406 // Object keys are sorted before hashing, so insertion order should
407 // not affect the result. serde_json's default Map is BTreeMap (sorted),
408 // but we sort explicitly in hash_json_value to be safe regardless.
409 let query = "query($a: Int, $b: Int) { users { id } }";
410
411 let key1 = generate_cache_key(query, &json!({"a": 1, "b": 2}), None, "v1");
412 let key2 = generate_cache_key(query, &json!({"a": 1, "b": 2}), None, "v1");
413
414 assert_eq!(key1, key2, "Same variables must produce same key");
415 }
416
417 // ========================================================================
418 // Determinism Tests
419 // ========================================================================
420
421 #[test]
422 fn test_cache_key_deterministic() {
423 // Same inputs must always produce same output
424 let query = "query { users { id } }";
425 let vars = json!({"limit": 10});
426
427 let key1 = generate_cache_key(query, &vars, None, "v1");
428 let key2 = generate_cache_key(query, &vars, None, "v1");
429
430 assert_eq!(key1, key2, "Cache keys must be deterministic");
431 }
432
433 #[test]
434 fn test_verify_deterministic_helper() {
435 assert!(
436 verify_deterministic("query { users }", &json!({}), "v1"),
437 "Helper should verify determinism"
438 );
439 }
440
441 // ========================================================================
442 // WHERE Clause Tests
443 // ========================================================================
444
445 #[test]
446 fn test_different_where_clauses_produce_different_keys() {
447 let query = "query { users { id } }";
448
449 let where1 = WhereClause::Field {
450 path: vec!["email".to_string()],
451 operator: WhereOperator::Eq,
452 value: json!("alice@example.com"),
453 };
454
455 let where2 = WhereClause::Field {
456 path: vec!["email".to_string()],
457 operator: WhereOperator::Eq,
458 value: json!("bob@example.com"),
459 };
460
461 let key1 = generate_cache_key(query, &json!({}), Some(&where1), "v1");
462 let key2 = generate_cache_key(query, &json!({}), Some(&where2), "v1");
463
464 assert_ne!(key1, key2, "Different WHERE clauses must produce different keys");
465 }
466
467 #[test]
468 fn test_different_where_operators_produce_different_keys() {
469 let query = "query { users { id } }";
470
471 let where_eq = WhereClause::Field {
472 path: vec!["age".to_string()],
473 operator: WhereOperator::Eq,
474 value: json!(30),
475 };
476
477 let where_gt = WhereClause::Field {
478 path: vec!["age".to_string()],
479 operator: WhereOperator::Gt,
480 value: json!(30),
481 };
482
483 let key_eq = generate_cache_key(query, &json!({}), Some(&where_eq), "v1");
484 let key_gt = generate_cache_key(query, &json!({}), Some(&where_gt), "v1");
485
486 assert_ne!(key_eq, key_gt, "Different operators must produce different keys");
487 }
488
489 #[test]
490 fn test_with_and_without_where_clause() {
491 let query = "query { users { id } }";
492
493 let where_clause = WhereClause::Field {
494 path: vec!["active".to_string()],
495 operator: WhereOperator::Eq,
496 value: json!(true),
497 };
498
499 let key_without = generate_cache_key(query, &json!({}), None, "v1");
500 let key_with = generate_cache_key(query, &json!({}), Some(&where_clause), "v1");
501
502 assert_ne!(key_without, key_with, "Presence of WHERE clause must change key");
503 }
504
505 #[test]
506 fn test_complex_where_clause() {
507 let query = "query { users { id } }";
508
509 let where_clause = WhereClause::And(vec![
510 WhereClause::Field {
511 path: vec!["age".to_string()],
512 operator: WhereOperator::Gte,
513 value: json!(18),
514 },
515 WhereClause::Field {
516 path: vec!["active".to_string()],
517 operator: WhereOperator::Eq,
518 value: json!(true),
519 },
520 ]);
521
522 // Should not panic; produces a valid u64.
523 let _key = generate_cache_key(query, &json!({}), Some(&where_clause), "v1");
524 }
525
526 // ========================================================================
527 // Schema Version Tests
528 // ========================================================================
529
530 #[test]
531 fn test_different_schema_versions_produce_different_keys() {
532 let query = "query { users { id } }";
533
534 let key_v1 = generate_cache_key(query, &json!({}), None, "v1");
535 let key_v2 = generate_cache_key(query, &json!({}), None, "v2");
536
537 assert_ne!(key_v1, key_v2, "Different schema versions must produce different keys");
538 }
539
540 #[test]
541 fn test_schema_version_invalidation() {
542 // When schema changes, all cache keys change (automatic invalidation)
543 let query = "query { users { id } }";
544
545 let old_schema = "abc123";
546 let new_schema = "def456";
547
548 let key_old = generate_cache_key(query, &json!({}), None, old_schema);
549 let key_new = generate_cache_key(query, &json!({}), None, new_schema);
550
551 assert_ne!(key_old, key_new, "Schema changes should invalidate cache");
552 }
553
554 // ========================================================================
555 // Collision Avoidance Test
556 // ========================================================================
557
558 #[test]
559 fn test_no_collisions_in_sample() {
560 // Generate a sample of cache keys from varied inputs and verify
561 // that no two distinct inputs produce the same u64.
562 let mut keys = HashSet::new();
563 let mut count = 0u32;
564
565 let queries = [
566 "query { users { id } }",
567 "query { posts { id } }",
568 "query { users { id name } }",
569 "query getUser($id: ID!) { user(id: $id) { name } }",
570 "",
571 ];
572 let variable_sets: &[JsonValue] = &[
573 json!({}),
574 json!(null),
575 json!({"id": 1}),
576 json!({"id": 2}),
577 json!({"id": "alice"}),
578 json!({"limit": 10, "offset": 0}),
579 json!({"filter": {"active": true}}),
580 ];
581 let schema_versions = ["v1", "v2", "abc123"];
582
583 for query in &queries {
584 for vars in variable_sets {
585 for sv in &schema_versions {
586 let key = generate_cache_key(query, vars, None, sv);
587 keys.insert(key);
588 count += 1;
589 }
590 }
591 }
592
593 assert_eq!(
594 keys.len(),
595 count as usize,
596 "Collision detected among {count} sample cache keys"
597 );
598 }
599
600 // ========================================================================
601 // Extract Views Tests
602 // ========================================================================
603
604 #[test]
605 fn test_extract_accessed_views_with_sql_source() {
606 use crate::schema::AutoParams;
607
608 let query_def = QueryDefinition {
609 name: "users".to_string(),
610 return_type: "User".to_string(),
611 returns_list: true,
612 nullable: false,
613 arguments: vec![],
614 sql_source: Some("v_user".to_string()),
615 description: None,
616 auto_params: AutoParams {
617 has_where: true,
618 has_order_by: false,
619 has_limit: true,
620 has_offset: false,
621 },
622 deprecation: None,
623 jsonb_column: "data".to_string(),
624 relay: false,
625 relay_cursor_column: None,
626 relay_cursor_type: CursorType::default(),
627 inject_params: IndexMap::default(),
628 cache_ttl_seconds: None,
629 additional_views: vec![],
630 requires_role: None,
631 rest_path: None,
632 rest_method: None,
633 };
634
635 let views = extract_accessed_views(&query_def);
636 assert_eq!(views, vec!["v_user"]);
637 }
638
639 #[test]
640 fn test_extract_accessed_views_without_sql_source() {
641 use crate::schema::AutoParams;
642
643 let query_def = QueryDefinition {
644 name: "customQuery".to_string(),
645 return_type: "Custom".to_string(),
646 returns_list: false,
647 nullable: false,
648 arguments: vec![],
649 sql_source: None, // No SQL source (custom resolver)
650 description: None,
651 auto_params: AutoParams {
652 has_where: false,
653 has_order_by: false,
654 has_limit: false,
655 has_offset: false,
656 },
657 deprecation: None,
658 jsonb_column: "data".to_string(),
659 relay: false,
660 relay_cursor_column: None,
661 relay_cursor_type: CursorType::default(),
662 inject_params: IndexMap::default(),
663 cache_ttl_seconds: None,
664 additional_views: vec![],
665 requires_role: None,
666 rest_path: None,
667 rest_method: None,
668 };
669
670 let views = extract_accessed_views(&query_def);
671 assert_eq!(views, Vec::<String>::new());
672 }
673
674 #[test]
675 fn test_extract_accessed_views_with_additional_views() {
676 use crate::schema::AutoParams;
677
678 let query_def = QueryDefinition {
679 name: "usersWithPosts".to_string(),
680 return_type: "UserWithPosts".to_string(),
681 returns_list: true,
682 nullable: false,
683 arguments: vec![],
684 sql_source: Some("v_user_with_posts".to_string()),
685 description: None,
686 auto_params: AutoParams::default(),
687 deprecation: None,
688 jsonb_column: "data".to_string(),
689 relay: false,
690 relay_cursor_column: None,
691 relay_cursor_type: CursorType::default(),
692 inject_params: IndexMap::default(),
693 cache_ttl_seconds: None,
694 additional_views: vec!["v_post".to_string(), "v_tag".to_string()],
695 requires_role: None,
696 rest_path: None,
697 rest_method: None,
698 };
699
700 let views = extract_accessed_views(&query_def);
701 assert_eq!(views, vec!["v_user_with_posts", "v_post", "v_tag"]);
702 }
703
704 // ========================================================================
705 // Edge Case Tests
706 // ========================================================================
707
708 #[test]
709 fn test_empty_query_string() {
710 // Should not panic; produces a valid u64.
711 let _key = generate_cache_key("", &json!({}), None, "v1");
712 }
713
714 #[test]
715 fn test_null_variables() {
716 // Should not panic; produces a valid u64.
717 let _key = generate_cache_key("query { users }", &json!(null), None, "v1");
718 }
719
720 #[test]
721 fn test_large_variable_object() {
722 let large_vars = json!({
723 "filter": {
724 "age": 30,
725 "active": true,
726 "tags": ["rust", "graphql", "database"],
727 "metadata": {
728 "created_after": "2024-01-01",
729 "updated_before": "2024-12-31"
730 }
731 }
732 });
733
734 // Should not panic; produces a valid u64.
735 let _key = generate_cache_key("query { users }", &large_vars, None, "v1");
736 }
737
738 #[test]
739 fn test_special_characters_in_query() {
740 let query = r#"query { user(email: "test@example.com") { name } }"#;
741 // Should not panic; produces a valid u64.
742 let _key = generate_cache_key(query, &json!({}), None, "v1");
743 }
744}