Skip to main content

cyrs_db/
inputs.rs

1// Salsa's `#[input]` macro expands into generated getter / setter
2// methods on each input struct.  Those are collectively described by
3// the module-level docs below, not per-method.  The outer
4// `#[allow(missing_docs)]` attributes on each struct don't carry
5// through to the macro-expanded impl blocks, so we lift the
6// exemption to file-scope.  Hand-written items in this file still
7// carry their own docstrings — see `options_digest` below.
8#![allow(missing_docs)]
9
10//! Input queries for the incremental analysis database (spec §11.2).
11//!
12//! ## Design
13//!
14//! Three Salsa `#[input]` structs cover the full input surface:
15//!
16//! | Input struct        | Scope      | Salsa kind   | Purpose                                   |
17//! |---------------------|------------|--------------|-------------------------------------------|
18//! | [`crate::SourceFile`] | per-file | `#[input]`   | raw source text + dialect (from `cy-zx6`) |
19//! | [`FileOptions`]     | per-file   | `#[input]`   | full [`AnalysisOptions`] for this file    |
20//! | [`WorkspaceInputs`] | workspace  | `#[input]`   | workspace-scoped schema (§11.4)           |
21//!
22//! The `options_digest` for a file is a **derived** query (`#[salsa::tracked]`)
23//! computed from `FileOptions`.  Changing any field of `AnalysisOptions` →
24//! different digest → all derived queries keyed on that digest are invalidated.
25//!
26//! ## Stable hashing (`AnalysisOptions::digest`)
27//!
28//! The digest is a 64-bit FNV-1a hash computed over a canonical byte
29//! representation of `AnalysisOptions`.  The canonicalisation rules are:
30//!
31//! 1. `dialect`         — encoded as a single `u8` (0 = `GqlAligned`, 1 = `OpenCypherV9`).
32//! 2. `warn_shadowing`  — encoded as a `u8` (0 = false, 1 = true).
33//! 3. `parameter_hints` — stored in a `BTreeMap<SmolStr, Type>` so iteration
34//!    is always in lexicographic key order, independent of insertion order.
35//!    Each entry is serialised as `len(key) ++ key_bytes ++ type_tag`.
36//!
37//! No `std::collections::HashMap` is used anywhere in the hash path (§8
38//! determinism rule).
39
40use std::collections::BTreeMap;
41use std::sync::Arc;
42
43use cyrs_schema::SchemaProvider;
44use cyrs_sema::ty::Type;
45use smol_str::SmolStr;
46
47use crate::{CypherDb, DialectMode};
48
49// ---------------------------------------------------------------------------
50// AnalysisOptions
51// ---------------------------------------------------------------------------
52
53/// Full set of analysis options for a single file.
54///
55/// All fields that affect any derived query MUST appear here so that a
56/// digest change automatically invalidates everything downstream.
57///
58/// `parameter_hints` uses [`BTreeMap`] (not `HashMap`) to guarantee that
59/// the digest is independent of insertion order (spec §8 determinism).
60#[derive(Debug, Default, Clone, PartialEq, Eq)]
61pub struct AnalysisOptions {
62    /// Dialect used for gate checks (§9). Defaults to [`DialectMode::GqlAligned`].
63    pub dialect: DialectMode,
64    /// Warn when a variable in an inner scope shadows one in an outer scope.
65    pub warn_shadowing: bool,
66    /// Caller-supplied type hints for query parameters.
67    ///
68    /// Stored in a [`BTreeMap`] so digest computation is order-independent.
69    pub parameter_hints: BTreeMap<SmolStr, Type>,
70}
71
72impl AnalysisOptions {
73    /// Compute a stable, deterministic 64-bit digest of these options.
74    ///
75    /// Uses FNV-1a (64-bit) over a canonical byte encoding.  The encoding
76    /// is deliberately simple and stable across process restarts because:
77    ///
78    /// - `std::collections::hash_map::DefaultHasher` is NOT stable across
79    ///   runs (Rust explicitly reserves the right to change it).
80    /// - No external hashing crate is required: FNV-1a is trivial to inline
81    ///   and has no dependencies.
82    ///
83    /// ## Canonical encoding
84    ///
85    /// ```text
86    /// [dialect_u8] [warn_shadowing_u8]
87    /// [num_hints_le64]
88    /// for each (key, ty) in parameter_hints (BTreeMap order = lex):
89    ///   [key_len_le64] [key_utf8_bytes] [type_tag_u8]
90    /// ```
91    #[must_use]
92    pub fn digest(&self) -> u64 {
93        let mut h = Fnv64::new();
94        // dialect
95        h.write_u8(match self.dialect {
96            DialectMode::GqlAligned => 0,
97            DialectMode::OpenCypherV9 => 1,
98        });
99        // warn_shadowing
100        h.write_u8(u8::from(self.warn_shadowing));
101        // parameter_hints (BTreeMap → lexicographic order)
102        h.write_u64(self.parameter_hints.len() as u64);
103        for (key, ty) in &self.parameter_hints {
104            let kb = key.as_bytes();
105            h.write_u64(kb.len() as u64);
106            h.write_bytes(kb);
107            h.write_u8(type_tag(ty));
108        }
109        h.finish()
110    }
111}
112
113/// Map a [`Type`] to a single byte tag for digest purposes.
114///
115/// Only the top-level variant matters for cache invalidation — two calls
116/// that differ only in the inner `Type` of a `List` will both produce a
117/// `List` tag, which is conservative (safe: may cause a spurious
118/// re-evaluation, but never a missed invalidation).  A full structural
119/// encoding is unnecessary for the v1 parameter-hint use case.
120fn type_tag(ty: &Type) -> u8 {
121    match ty {
122        Type::Any => 0,
123        Type::Null => 1,
124        Type::Bool => 2,
125        Type::Int => 3,
126        Type::Float => 4,
127        Type::Num => 5,
128        Type::String => 6,
129        Type::Date => 7,
130        Type::Datetime => 8,
131        Type::List(_) => 9,
132        Type::Map(_) => 10,
133        Type::Node(_) => 11,
134        Type::Relationship(_) => 12,
135        Type::Path => 13,
136        Type::Union(_) => 14,
137        Type::Unknown => 15,
138    }
139}
140
141// ---------------------------------------------------------------------------
142// FNV-1a 64-bit hasher (inline, no external dep)
143// ---------------------------------------------------------------------------
144
145struct Fnv64(u64);
146
147const FNV_OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
148const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;
149
150impl Fnv64 {
151    fn new() -> Self {
152        Self(FNV_OFFSET)
153    }
154
155    fn write_u8(&mut self, b: u8) {
156        self.0 ^= u64::from(b);
157        self.0 = self.0.wrapping_mul(FNV_PRIME);
158    }
159
160    fn write_u64(&mut self, v: u64) {
161        for b in v.to_le_bytes() {
162            self.write_u8(b);
163        }
164    }
165
166    fn write_bytes(&mut self, bs: &[u8]) {
167        for &b in bs {
168            self.write_u8(b);
169        }
170    }
171
172    fn finish(self) -> u64 {
173        self.0
174    }
175}
176
177// ---------------------------------------------------------------------------
178// Salsa #[input] — per-file analysis options
179// ---------------------------------------------------------------------------
180
181/// Per-file analysis options input.
182///
183/// This is a separate `#[salsa::input]` from [`SourceFile`] so that changing
184/// options for one file does not touch the source revision of any other file.
185/// The `options_digest` derived query reads only this struct.
186#[allow(missing_docs)]
187#[salsa::input]
188pub struct FileOptions {
189    /// Full analysis options for this file.
190    pub options: AnalysisOptions,
191}
192
193// ---------------------------------------------------------------------------
194// Salsa #[input] — workspace-scoped schema
195// ---------------------------------------------------------------------------
196
197/// Workspace-scoped schema input (spec §11.4).
198///
199/// There is exactly one `WorkspaceInputs` per database.  The schema is
200/// shared across all files; changing it invalidates all schema-dependent
201/// derived queries regardless of which file they belong to.
202///
203/// `schema` is `None` when no schema is configured (schema-free mode, §7.1).
204#[allow(missing_docs)]
205#[salsa::input]
206pub struct WorkspaceInputs {
207    /// Workspace-scoped schema.  `None` = schema-free analysis.
208    pub schema: Option<Arc<dyn SchemaProvider>>,
209}
210
211// ---------------------------------------------------------------------------
212// Derived query: options_digest
213// ---------------------------------------------------------------------------
214
215/// Compute the stable digest for a file's [`AnalysisOptions`].
216///
217/// This is a derived (`#[salsa::tracked]`) query, not an input, so Salsa
218/// memoises the result and only re-evaluates it when `FileOptions::options`
219/// changes.  Consumers that only need to know *whether* options changed
220/// (e.g. to decide if a cache entry is still valid) read this digest rather
221/// than comparing `AnalysisOptions` values directly.
222#[salsa::tracked]
223pub fn options_digest(db: &dyn CypherDb, file_opts: FileOptions) -> u64 {
224    file_opts.options(db).digest()
225}
226
227// ---------------------------------------------------------------------------
228// Tests
229// ---------------------------------------------------------------------------
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234    use crate::CypherDatabase;
235    use salsa::Setter as _;
236
237    // -----------------------------------------------------------------------
238    // Digest stability
239    // -----------------------------------------------------------------------
240
241    /// Same options → same digest regardless of [`BTreeMap`] insertion order.
242    #[test]
243    fn digest_is_order_independent() {
244        let mut hints_a = BTreeMap::new();
245        hints_a.insert(SmolStr::new("x"), Type::Int);
246        hints_a.insert(SmolStr::new("y"), Type::String);
247
248        let mut hints_b = BTreeMap::new();
249        // Insert in reverse order — BTreeMap will sort both the same way.
250        hints_b.insert(SmolStr::new("y"), Type::String);
251        hints_b.insert(SmolStr::new("x"), Type::Int);
252
253        let a = AnalysisOptions {
254            dialect: DialectMode::GqlAligned,
255            warn_shadowing: false,
256            parameter_hints: hints_a,
257        };
258        let b = AnalysisOptions {
259            dialect: DialectMode::GqlAligned,
260            warn_shadowing: false,
261            parameter_hints: hints_b,
262        };
263
264        assert_eq!(
265            a.digest(),
266            b.digest(),
267            "digest must be independent of insertion order"
268        );
269    }
270
271    /// Changing `warn_shadowing` changes the digest.
272    #[test]
273    fn digest_changes_on_warn_shadowing_toggle() {
274        let opts_false = AnalysisOptions {
275            warn_shadowing: false,
276            ..Default::default()
277        };
278        let opts_true = AnalysisOptions {
279            warn_shadowing: true,
280            ..Default::default()
281        };
282        assert_ne!(
283            opts_false.digest(),
284            opts_true.digest(),
285            "toggling warn_shadowing must change the digest"
286        );
287    }
288
289    /// Adding a parameter hint changes the digest.
290    #[test]
291    fn digest_changes_on_hint_addition() {
292        let base = AnalysisOptions::default();
293        let mut hints = BTreeMap::new();
294        hints.insert(SmolStr::new("p"), Type::Bool);
295        let with_hint = AnalysisOptions {
296            parameter_hints: hints,
297            ..Default::default()
298        };
299        assert_ne!(
300            base.digest(),
301            with_hint.digest(),
302            "adding a parameter hint must change the digest"
303        );
304    }
305
306    /// Changing a single parameter hint's type changes the digest.
307    #[test]
308    fn digest_changes_on_hint_type_change() {
309        let mut hints_int = BTreeMap::new();
310        hints_int.insert(SmolStr::new("p"), Type::Int);
311        let opts_int = AnalysisOptions {
312            parameter_hints: hints_int,
313            ..Default::default()
314        };
315
316        let mut hints_str = BTreeMap::new();
317        hints_str.insert(SmolStr::new("p"), Type::String);
318        let opts_str = AnalysisOptions {
319            parameter_hints: hints_str,
320            ..Default::default()
321        };
322
323        assert_ne!(
324            opts_int.digest(),
325            opts_str.digest(),
326            "changing a hint type must change the digest"
327        );
328    }
329
330    /// Changing dialect changes the digest.
331    #[test]
332    fn digest_changes_on_dialect_change() {
333        let gql = AnalysisOptions {
334            dialect: DialectMode::GqlAligned,
335            ..Default::default()
336        };
337        let oc = AnalysisOptions {
338            dialect: DialectMode::OpenCypherV9,
339            ..Default::default()
340        };
341        assert_ne!(
342            gql.digest(),
343            oc.digest(),
344            "changing dialect must change the digest"
345        );
346    }
347
348    // -----------------------------------------------------------------------
349    // Salsa invalidation
350    // -----------------------------------------------------------------------
351
352    /// Setting a different dialect on `FileOptions` invalidates `options_digest`.
353    #[test]
354    fn options_digest_invalidates_on_dialect_change() {
355        let mut db = CypherDatabase::new();
356        let file_opts = FileOptions::new(
357            &db,
358            AnalysisOptions {
359                dialect: DialectMode::GqlAligned,
360                ..Default::default()
361            },
362        );
363
364        let d1 = options_digest(&db, file_opts);
365
366        // Mutate dialect → revision bump.
367        file_opts.set_options(&mut db).to(AnalysisOptions {
368            dialect: DialectMode::OpenCypherV9,
369            ..Default::default()
370        });
371
372        let d2 = options_digest(&db, file_opts);
373
374        assert_ne!(d1, d2, "options_digest must change after dialect mutation");
375    }
376
377    /// Two `AnalysisOptions` with identical content (regardless of hint
378    /// insertion order) produce the same digest → derived queries are NOT
379    /// re-evaluated.
380    #[test]
381    fn options_digest_stable_for_equal_options() {
382        let db = CypherDatabase::new();
383
384        let mut hints_a = BTreeMap::new();
385        hints_a.insert(SmolStr::new("x"), Type::Int);
386        hints_a.insert(SmolStr::new("y"), Type::String);
387
388        let mut hints_b = BTreeMap::new();
389        hints_b.insert(SmolStr::new("y"), Type::String); // reversed
390        hints_b.insert(SmolStr::new("x"), Type::Int);
391
392        let file_opts_a = FileOptions::new(
393            &db,
394            AnalysisOptions {
395                parameter_hints: hints_a,
396                ..Default::default()
397            },
398        );
399        let file_opts_b = FileOptions::new(
400            &db,
401            AnalysisOptions {
402                parameter_hints: hints_b,
403                ..Default::default()
404            },
405        );
406
407        let d_a = options_digest(&db, file_opts_a);
408        let d_b = options_digest(&db, file_opts_b);
409
410        assert_eq!(
411            d_a, d_b,
412            "equal options (any insertion order) must produce the same digest"
413        );
414    }
415
416    /// Changing a single parameter hint → digest changes → derived query re-evaluates.
417    #[test]
418    fn options_digest_invalidates_on_hint_change() {
419        let mut db = CypherDatabase::new();
420
421        let mut hints = BTreeMap::new();
422        hints.insert(SmolStr::new("p"), Type::Int);
423
424        let file_opts = FileOptions::new(
425            &db,
426            AnalysisOptions {
427                parameter_hints: hints,
428                ..Default::default()
429            },
430        );
431
432        let d1 = options_digest(&db, file_opts);
433
434        // Change the hint type.
435        let mut new_hints = BTreeMap::new();
436        new_hints.insert(SmolStr::new("p"), Type::String);
437        file_opts.set_options(&mut db).to(AnalysisOptions {
438            parameter_hints: new_hints,
439            ..Default::default()
440        });
441
442        let d2 = options_digest(&db, file_opts);
443
444        assert_ne!(
445            d1, d2,
446            "changing a parameter hint must invalidate options_digest"
447        );
448    }
449
450    // -----------------------------------------------------------------------
451    // WorkspaceInputs
452    // -----------------------------------------------------------------------
453
454    /// [`WorkspaceInputs`] can be constructed with `None` (schema-free mode).
455    #[test]
456    fn workspace_inputs_none_schema() {
457        let db = CypherDatabase::new();
458        let ws = WorkspaceInputs::new(&db, None);
459        assert!(ws.schema(&db).is_none());
460    }
461
462    /// [`WorkspaceInputs`] schema can be updated.
463    #[test]
464    fn workspace_inputs_schema_round_trip() {
465        use cyrs_schema::EmptySchema;
466        let mut db = CypherDatabase::new();
467        let ws = WorkspaceInputs::new(&db, None);
468        assert!(ws.schema(&db).is_none());
469
470        let schema: Arc<dyn SchemaProvider> = Arc::new(EmptySchema);
471        ws.set_schema(&mut db).to(Some(schema.clone()));
472        assert!(ws.schema(&db).is_some());
473    }
474}