Skip to main content

cyrs_db/
workspace.rs

1//! Workspace-level `Database` API — `FileId` model, snapshot semantics,
2//! and the `SchemaProvider` wiring (spec 0001 §11.4, §11.5).
3//!
4//! ## Design
5//!
6//! ```text
7//! Database (owned, wraps CypherDatabase)
8//!   ├── FileId map: BTreeMap<FileId, SourceFile>  (stable u32 handles)
9//!   ├── WorkspaceInputs                           (workspace-scoped schema)
10//!   └── FileOptions per file                      (per-file analysis options)
11//!
12//! DatabaseSnapshot (Clone of CypherDatabase, Send)
13//!   └── read-only view, carries FileId → SourceFile handles
14//! ```
15//!
16//! ## Concurrency contract (spec §11.5)
17//!
18//! * `Database` is `Send` (not `Sync` — Salsa's `ZalsaLocal` is per-thread).
19//!   Mutation (`&mut self`) serialises writes.
20//! * `DatabaseSnapshot` is obtained via [`Database::snapshot`].  It clones the
21//!   `Arc<Zalsa>` backing store and creates a fresh `ZalsaLocal`, making it
22//!   safe to send to another thread.  The snapshot sees a frozen view of the
23//!   database at the point of the clone; subsequent mutations to the origin
24//!   `Database` are invisible to the snapshot (Salsa's snapshot-isolation
25//!   guarantee).
26//! * **Pattern — one DB + snapshot per request** (spec §11.5):
27//!
28//! ```rust,ignore
29//! // Main thread owns the database.
30//! let mut db = Database::new();
31//! let file = db.open_file(Path::new("q.cyp"), "RETURN 1".into(), DialectMode::GqlAligned);
32//!
33//! // Per-request: take a snapshot, send to worker thread.
34//! let snap = db.snapshot();
35//! let src  = db.source_of(file).unwrap().to_string();
36//! let handle = std::thread::spawn(move || {
37//!     let result = snap.parse_cst(file);
38//!     result.parse().syntax().to_string()
39//! });
40//! let output = handle.join().unwrap();
41//! assert_eq!(output, src);
42//! ```
43//!
44//! ## `FileId` representation
45//!
46//! `FileId` is a `u32` newtype — simple, stable across process restarts,
47//! and cheap to copy.  Each `Database` maintains a monotonically-increasing
48//! counter; IDs are never reused within a single database instance.
49//!
50//! ## `SchemaProvider` wiring
51//!
52//! The workspace-scoped `SchemaProvider` is stored in a single
53//! [`WorkspaceInputs`] input.  Calling [`Database::set_schema`] bumps the
54//! Salsa revision for that input, which Salsa propagates to every
55//! schema-dependent derived query (e.g. `sema_diagnostics`, `all_diagnostics`)
56//! across **all** files.  The parse cache is unaffected (parse does not read
57//! `WorkspaceInputs`).
58
59use std::path::{Path, PathBuf};
60use std::sync::Arc;
61
62use indexmap::IndexMap;
63
64use cyrs_schema::SchemaProvider;
65use cyrs_syntax::{TextEdit, incremental_reparse};
66
67use crate::inputs::{AnalysisOptions, FileOptions, WorkspaceInputs};
68use crate::options::DatabaseOptions;
69use crate::queries::{
70    AstOutput, DiagnosticsOutput, PlanOutput, ResolvedNamesOutput, all_diagnostics, analyse_file,
71    parse_ast, plan_of, resolved_names, sema_diagnostics,
72};
73use crate::{Analysis, CypherDatabase, DialectMode, ParseOutput, SourceFile, parse_cst};
74
75// ---------------------------------------------------------------------------
76// FileId
77// ---------------------------------------------------------------------------
78
79/// A stable, workspace-scoped file identity (spec §11.4).
80///
81/// `FileId` is the unit of caching in the incremental database.  Every
82/// source file opened via [`Database::open_file`] receives a unique
83/// `FileId`.  IDs are monotonically increasing `u32` values; they are
84/// never reused within a single `Database` instance.
85///
86/// `FileId` values are intentionally opaque — callers store them but
87/// should not interpret the numeric value.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
89pub struct FileId(pub u32);
90
91impl std::fmt::Display for FileId {
92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93        write!(f, "FileId({})", self.0)
94    }
95}
96
97// ---------------------------------------------------------------------------
98// Error type
99// ---------------------------------------------------------------------------
100
101/// Error returned when a `FileId` is not found in the workspace.
102///
103/// Marked `#[non_exhaustive]` (cy-2i9.1) on the tuple — consumers must
104/// match it with `..` in patterns.  The inner `FileId` remains
105/// accessible via the public `.0` field.
106#[derive(Debug, Clone, PartialEq, Eq)]
107#[non_exhaustive]
108pub struct UnknownFileId(pub FileId);
109
110impl std::fmt::Display for UnknownFileId {
111    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112        write!(f, "unknown FileId: {}", self.0)
113    }
114}
115
116impl std::error::Error for UnknownFileId {}
117
118// ---------------------------------------------------------------------------
119// FileRecord — internal per-file state
120// ---------------------------------------------------------------------------
121
122/// Internal per-file record mapping a `FileId` to its Salsa inputs.
123struct FileRecord {
124    /// Salsa input struct carrying `source` + `dialect`.
125    source_file: SourceFile,
126    /// Salsa input struct carrying per-file `AnalysisOptions`.
127    file_opts: FileOptions,
128    /// Path associated with the file (used for display / lookup only).
129    path: PathBuf,
130}
131
132/// A freed pair of Salsa input handles retained for reuse by a subsequent
133/// [`Database::open_file`] call.
134///
135/// Salsa 0.26 does not expose a public API for deleting input structs, so
136/// once a `SourceFile` / `FileOptions` is allocated its internal slot in
137/// the Salsa interner persists for the lifetime of the database.  Without
138/// recycling, an LSP-style workload that churns file IDs (open → edit →
139/// close) would grow Salsa's input table unboundedly and violate the
140/// spec §11.6 steady-state RSS bound.
141///
142/// To respect the spec bound we pool the handles: `remove_file` pushes
143/// the pair into [`Database::free_slots`] and resets the source to empty
144/// to free the backing `String`; `open_file` prefers to pop a free slot
145/// and reset its fields before allocating a fresh one.  The pool's
146/// steady-state size is bounded by the peak number of simultaneously-open
147/// files, which is naturally bounded by realistic client behaviour.
148struct FreeSlot {
149    source_file: SourceFile,
150    file_opts: FileOptions,
151}
152
153// ---------------------------------------------------------------------------
154// DatabaseSnapshot
155// ---------------------------------------------------------------------------
156
157/// A read-only, `Send` snapshot of the database at a point in time.
158///
159/// Obtained via [`Database::snapshot`].  The snapshot shares the
160/// `Arc<Zalsa>` backing store with the origin `Database` and sees a
161/// frozen view of the Salsa revision at the moment of cloning.  Subsequent
162/// mutations to the `Database` are invisible to this snapshot.
163///
164/// Snapshots are suitable for cross-thread queries: they implement `Send`
165/// so they can be shipped to a worker thread for concurrent read queries.
166///
167/// ## Example
168///
169/// ```rust,ignore
170/// let snap = db.snapshot();
171/// let file = /* FileId from the origin db */;
172/// let result = std::thread::spawn(move || {
173///     snap.parse_cst(file).unwrap()
174/// }).join().unwrap();
175/// ```
176pub struct DatabaseSnapshot {
177    /// Cloned Salsa database (read-only from the snapshot's perspective).
178    inner: CypherDatabase,
179    /// Snapshot of the file registry at the time of cloning.
180    files: Arc<IndexMap<FileId, SourceFile>>,
181    /// Snapshot of the workspace inputs handle.
182    workspace: Option<WorkspaceInputs>,
183    /// Per-file options snapshot.
184    file_opts: Arc<IndexMap<FileId, FileOptions>>,
185}
186
187impl std::fmt::Debug for DatabaseSnapshot {
188    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
189        f.debug_struct("DatabaseSnapshot")
190            .field("num_files", &self.files.len())
191            .finish_non_exhaustive()
192    }
193}
194
195// Compile-time Send assertion for DatabaseSnapshot.
196// CypherDatabase is Send (salsa #[salsa::db] macro emits `unsafe impl Send`).
197// Arc<IndexMap<…>> is Send when the value types are Send.
198// FileId, SourceFile, FileOptions, WorkspaceInputs are all Send.
199const _: fn() = || {
200    fn check_send<T: Send>() {}
201    check_send::<DatabaseSnapshot>();
202};
203
204impl DatabaseSnapshot {
205    /// Run `parse_cst` on the given file using this snapshot's view.
206    ///
207    /// Returns `Err(UnknownFileId)` if `id` was not open at snapshot time.
208    pub fn parse_cst(&self, id: FileId) -> Result<ParseOutput, UnknownFileId> {
209        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
210        Ok(parse_cst(&self.inner, sf))
211    }
212
213    /// Run `parse_ast` on the given file using this snapshot's view.
214    pub fn parse_ast(&self, id: FileId) -> Result<AstOutput, UnknownFileId> {
215        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
216        Ok(parse_ast(&self.inner, sf))
217    }
218
219    /// Run `plan_of` on the given file using this snapshot's view.
220    pub fn plan_of(&self, id: FileId) -> Result<PlanOutput, UnknownFileId> {
221        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
222        Ok(plan_of(&self.inner, sf))
223    }
224
225    /// Run `sema_diagnostics` on the given file using this snapshot's view.
226    pub fn sema_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
227        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
228        let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
229        let ws = self.workspace.ok_or(UnknownFileId(id))?;
230        Ok(sema_diagnostics(&self.inner, sf, fo, ws))
231    }
232
233    /// Run `resolved_names` on the given file using this snapshot's view.
234    pub fn resolved_names(&self, id: FileId) -> Result<ResolvedNamesOutput, UnknownFileId> {
235        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
236        let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
237        Ok(resolved_names(&self.inner, sf, fo))
238    }
239
240    /// Run `all_diagnostics` on the given file using this snapshot's view.
241    pub fn all_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
242        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
243        let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
244        let ws = self.workspace.ok_or(UnknownFileId(id))?;
245        Ok(all_diagnostics(&self.inner, sf, fo, ws))
246    }
247
248    /// Run the full analysis pipeline on the given file using this snapshot's view.
249    pub fn analyse_file(&self, id: FileId) -> Result<Analysis, UnknownFileId> {
250        let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
251        let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
252        let ws = self.workspace.ok_or(UnknownFileId(id))?;
253        Ok(analyse_file(&self.inner, sf, fo, ws))
254    }
255}
256
257// ---------------------------------------------------------------------------
258// Database
259// ---------------------------------------------------------------------------
260
261/// Workspace-scoped incremental analysis database (spec §11).
262///
263/// The primary public API for all consumers (`cyrs-lsp`, `cyrs-agent`,
264/// `cyrs-cli`, `cyrs-tck`).  Wraps [`CypherDatabase`] with:
265///
266/// * A `FileId` → `SourceFile` registry so callers use stable `u32` handles
267///   instead of raw Salsa input structs.
268/// * A single [`WorkspaceInputs`] for the workspace-scoped schema.
269/// * Snapshot support via [`Database::snapshot`].
270///
271/// ## Concurrency contract (spec §11.5)
272///
273/// `Database` is `Send` but not `Sync`.  All mutating methods take `&mut self`.
274/// For concurrent read access from multiple threads, call [`snapshot`] to
275/// obtain a [`DatabaseSnapshot`] that implements `Send` and can be given to
276/// a worker thread.
277///
278/// [`snapshot`]: Database::snapshot
279pub struct Database {
280    inner: CypherDatabase,
281    /// Registry: `FileId` → per-file Salsa inputs.
282    files: IndexMap<FileId, FileRecord>,
283    /// Pool of Salsa input handles freed by [`remove_file`] and available
284    /// for reuse on the next [`open_file`].  See [`FreeSlot`] for rationale
285    /// (spec §11.6 steady-state RSS bound).
286    ///
287    /// [`remove_file`]: Database::remove_file
288    /// [`open_file`]: Database::open_file
289    free_slots: Vec<FreeSlot>,
290    /// The single workspace-scoped input; created lazily on first use.
291    workspace: Option<WorkspaceInputs>,
292    /// Monotonically increasing `FileId` counter.
293    next_id: u32,
294    /// LRU options captured at construction (immutable).
295    #[allow(dead_code)]
296    options: DatabaseOptions,
297}
298
299impl std::fmt::Debug for Database {
300    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
301        f.debug_struct("Database")
302            .field("num_files", &self.files.len())
303            .finish_non_exhaustive()
304    }
305}
306
307impl Default for Database {
308    fn default() -> Self {
309        Self::new()
310    }
311}
312
313impl Database {
314    // -----------------------------------------------------------------------
315    // Construction
316    // -----------------------------------------------------------------------
317
318    /// Create a new, empty workspace database with default LRU caps (256).
319    #[must_use]
320    pub fn new() -> Self {
321        Self::with_options(DatabaseOptions::default())
322    }
323
324    /// Create a new, empty workspace database with the given [`DatabaseOptions`].
325    ///
326    /// LRU capacities in `opts` are applied via Salsa's runtime
327    /// `set_lru_capacity` API immediately after construction.  The options
328    /// are immutable after this point.
329    ///
330    /// # Example
331    ///
332    /// ```rust,ignore
333    /// use cyrs_db::{Database, DatabaseOptions};
334    ///
335    /// let db = Database::with_options(DatabaseOptions {
336    ///     parse_lru: 512,
337    ///     sema_lru: 128,
338    ///     ..DatabaseOptions::default()
339    /// });
340    /// ```
341    #[must_use]
342    pub fn with_options(opts: DatabaseOptions) -> Self {
343        let mut inner = CypherDatabase::new();
344
345        // Apply runtime LRU capacity adjustments.
346        // The compile-time default is 256 (encoded in #[salsa::tracked(lru = 256)]).
347        // We always call set_lru_capacity so that non-default values take effect.
348        crate::set_parse_cst_lru(&mut inner, opts.parse_lru);
349        crate::queries::set_resolved_names_lru(&mut inner, opts.sema_lru);
350        crate::queries::set_sema_diagnostics_lru(&mut inner, opts.sema_lru);
351        crate::queries::set_plan_of_lru(&mut inner, opts.plan_lru);
352
353        let workspace = Some(inner.new_workspace_inputs(None));
354        Self {
355            inner,
356            files: IndexMap::new(),
357            free_slots: Vec::new(),
358            workspace,
359            next_id: 0,
360            options: opts,
361        }
362    }
363
364    // -----------------------------------------------------------------------
365    // Workspace API — spec §11.4
366    // -----------------------------------------------------------------------
367
368    /// Open a file in the workspace.
369    ///
370    /// Returns a stable [`FileId`] that uniquely identifies this file for the
371    /// lifetime of the database.  The `path` is recorded for diagnostics /
372    /// display but is not used as a key; two calls with the same path but
373    /// different sources produce two independent files.
374    ///
375    /// ## Caching
376    ///
377    /// Opening a file creates new Salsa input structs and does not yet run
378    /// any derived query.  All analysis is lazy and memoised.
379    pub fn open_file(&mut self, path: &Path, source: String, dialect: DialectMode) -> FileId {
380        let id = FileId(self.next_id);
381        self.next_id += 1;
382
383        let options = AnalysisOptions {
384            dialect,
385            ..Default::default()
386        };
387
388        let (source_file, file_opts) = if let Some(slot) = self.free_slots.pop() {
389            // Recycle the pooled Salsa handles: reset all fields so the slot
390            // behaves like a freshly-allocated input from the perspective of
391            // derived queries.  Keeps Salsa's input-struct interner bounded
392            // under LSP-style FileId churn (spec §11.6, bead cy-bh5).
393            self.inner.set_source(slot.source_file, source);
394            self.inner.set_dialect(slot.source_file, dialect);
395            self.inner.set_options(slot.file_opts, options);
396            (slot.source_file, slot.file_opts)
397        } else {
398            let source_file = self.inner.new_source_file_with(source, dialect, 0);
399            let file_opts = self.inner.new_file_options(options);
400            (source_file, file_opts)
401        };
402
403        self.files.insert(
404            id,
405            FileRecord {
406                source_file,
407                file_opts,
408                path: path.to_owned(),
409            },
410        );
411
412        id
413    }
414
415    /// Update the source text of an already-open file.
416    ///
417    /// Bumps the Salsa revision for this file's `SourceFile` input, causing
418    /// all derived queries that depend on `source` to be re-evaluated on the
419    /// next access.
420    ///
421    /// Returns `Err(UnknownFileId)` if `id` is not currently open.
422    pub fn update_file(&mut self, id: FileId, new_source: String) -> Result<(), UnknownFileId> {
423        let record = self.files.get(&id).ok_or(UnknownFileId(id))?;
424        let sf = record.source_file;
425        self.inner.set_source(sf, new_source);
426        Ok(())
427    }
428
429    /// Apply a single-range text edit to an already-open file (cy-zv0, spec §11).
430    ///
431    /// This is the incremental-edit entry point that `textDocument/didChange`
432    /// and agent edit flows should prefer over [`update_file`] when only a
433    /// byte range changed. The API is shaped so that a future sub-tree
434    /// reparse (see `cyrs_syntax::edit::incremental_reparse`) can plug in
435    /// underneath without breaking callers.
436    ///
437    /// # Current implementation
438    ///
439    /// Today the underlying [`incremental_reparse`] is a whole-file reparse
440    /// fallback, so the cache-invalidation behaviour is identical to
441    /// `update_file`. The observable difference is:
442    ///
443    /// - Callers pass a [`TextEdit`] value (range + replacement) instead of
444    ///   re-materialising the full source, so they don't pay an extra
445    ///   `String` allocation for the unchanged prefix / suffix.
446    /// - The edit is applied to the current source held by Salsa, not to a
447    ///   caller-managed copy, so there is no opportunity for the caller's
448    ///   local mirror to drift out of sync.
449    ///
450    /// Once the smart path lands (tracked as a follow-up bead to cy-zv0),
451    /// `edit_file` will become strictly sub-linear in file size for small
452    /// edits. Callers do not need to change.
453    ///
454    /// Returns `Err(UnknownFileId)` if `id` is not currently open.
455    ///
456    /// [`update_file`]: Database::update_file
457    pub fn edit_file(&mut self, id: FileId, edit: &TextEdit) -> Result<(), UnknownFileId> {
458        let record = self.files.get(&id).ok_or(UnknownFileId(id))?;
459        let sf = record.source_file;
460
461        // Pull the current tree from Salsa so `incremental_reparse` gets a
462        // real `SyntaxNode` to dispatch on.  Calling parse_cst here warms
463        // the memo and pins the `Arc<Parse>` that the sub-tree splicer
464        // (cy-li5) reuses on the smart path.
465        let parse_out = parse_cst(&self.inner, sf);
466        let old_tree = parse_out.parse().syntax();
467
468        // Dispatch through the edit crate.  On the smart path this returns
469        // a green-spliced `Parse` whose tree shares structure with the old
470        // one; on the fallback path it is a whole-file reparse.  Either
471        // way the resulting source text is canonical.
472        let new_parse = incremental_reparse(&old_tree, edit);
473        let new_source = new_parse.syntax().to_string();
474
475        // cy-li6: publish the freshly-computed `Parse` to Salsa as the
476        // memoised `parse_cst` result for the next revision.  Without this
477        // wiring the bench would re-parse `new_source` from scratch on the
478        // next `analyse_file`, defeating the smart-path savings.
479        let new_parse_out = crate::ParseOutput::new(new_parse);
480        self.inner
481            .set_source_with_parse(sf, new_source, new_parse_out);
482        Ok(())
483    }
484
485    /// Remove a file from the workspace.
486    ///
487    /// After removal, the `FileId` is considered stale.  Any subsequent call
488    /// that takes this `FileId` will return `Err(UnknownFileId)` rather than
489    /// panic.
490    ///
491    /// Returns `Err(UnknownFileId)` if `id` was not open.
492    pub fn remove_file(&mut self, id: FileId) -> Result<(), UnknownFileId> {
493        let record = self.files.swap_remove(&id).ok_or(UnknownFileId(id))?;
494
495        // Release the backing source string immediately so a long-lived pool
496        // entry does not pin a large `String` allocation (spec §11.6).  The
497        // Salsa revision bump here is harmless: no derived query will read
498        // this `SourceFile` until the slot is recycled, at which point
499        // `open_file` sets the new source and bumps the revision again.
500        self.inner.set_source(record.source_file, String::new());
501
502        self.free_slots.push(FreeSlot {
503            source_file: record.source_file,
504            file_opts: record.file_opts,
505        });
506
507        Ok(())
508    }
509
510    // -----------------------------------------------------------------------
511    // Schema API — spec §11.4 workspace-scoped SchemaProvider
512    // -----------------------------------------------------------------------
513
514    /// Set the workspace-scoped schema.
515    ///
516    /// Bumps the Salsa revision on [`WorkspaceInputs`], which cascades to
517    /// every schema-aware derived query (`sema_diagnostics`, `all_diagnostics`)
518    /// across **all** files.  The parse cache is unaffected.
519    ///
520    /// Pass `None` to switch to schema-free analysis mode (§7.1).
521    pub fn set_schema(&mut self, schema: Option<Arc<dyn SchemaProvider>>) {
522        let ws = self.workspace_inputs_mut();
523        self.inner.set_schema(ws, schema);
524    }
525
526    /// Read the current workspace-scoped schema (may be `None`).
527    #[must_use]
528    pub fn schema(&self) -> Option<Arc<dyn SchemaProvider>> {
529        self.workspace.as_ref()?.schema(&self.inner)
530    }
531
532    // -----------------------------------------------------------------------
533    // Snapshot API — spec §11.5
534    // -----------------------------------------------------------------------
535
536    /// Create a [`DatabaseSnapshot`] suitable for cross-thread queries.
537    ///
538    /// The snapshot is a frozen read-only view of the database at this
539    /// instant.  It can be sent to another thread (`Send`) and used to run
540    /// any derived query without `&mut`.  Mutations applied to `self` after
541    /// calling `snapshot()` are **not** visible to the snapshot.
542    ///
543    /// ## Pattern — one DB + snapshot per request
544    ///
545    /// ```rust,ignore
546    /// let snap = db.snapshot();
547    /// std::thread::spawn(move || {
548    ///     let out = snap.parse_cst(file_id).unwrap();
549    ///     // use out …
550    /// });
551    /// ```
552    #[must_use]
553    pub fn snapshot(&self) -> DatabaseSnapshot {
554        // Collect a lightweight view of the file registry (FileId → SourceFile).
555        let files: IndexMap<FileId, SourceFile> = self
556            .files
557            .iter()
558            .map(|(&id, rec)| (id, rec.source_file))
559            .collect();
560
561        let file_opts: IndexMap<FileId, FileOptions> = self
562            .files
563            .iter()
564            .map(|(&id, rec)| (id, rec.file_opts))
565            .collect();
566
567        DatabaseSnapshot {
568            inner: self.inner.clone(),
569            files: Arc::new(files),
570            workspace: self.workspace,
571            file_opts: Arc::new(file_opts),
572        }
573    }
574
575    // -----------------------------------------------------------------------
576    // Query access — delegates to the Salsa derived queries
577    // -----------------------------------------------------------------------
578
579    /// Run `parse_cst` on the given file.
580    pub fn parse_cst(&self, id: FileId) -> Result<ParseOutput, UnknownFileId> {
581        let sf = self.source_file(id)?;
582        Ok(parse_cst(&self.inner, sf))
583    }
584
585    /// Run `parse_ast` on the given file.
586    pub fn parse_ast(&self, id: FileId) -> Result<AstOutput, UnknownFileId> {
587        let sf = self.source_file(id)?;
588        Ok(parse_ast(&self.inner, sf))
589    }
590
591    /// Run `plan_of` on the given file.
592    pub fn plan_of(&self, id: FileId) -> Result<PlanOutput, UnknownFileId> {
593        let sf = self.source_file(id)?;
594        Ok(plan_of(&self.inner, sf))
595    }
596
597    /// Run `sema_diagnostics` on the given file.
598    pub fn sema_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
599        let sf = self.source_file(id)?;
600        let fo = self.file_options(id)?;
601        let ws = self.workspace_inputs()?;
602        Ok(sema_diagnostics(&self.inner, sf, fo, ws))
603    }
604
605    /// Run `resolved_names` on the given file.
606    pub fn resolved_names(&self, id: FileId) -> Result<ResolvedNamesOutput, UnknownFileId> {
607        let sf = self.source_file(id)?;
608        let fo = self.file_options(id)?;
609        Ok(resolved_names(&self.inner, sf, fo))
610    }
611
612    /// Run `all_diagnostics` on the given file.
613    pub fn all_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
614        let sf = self.source_file(id)?;
615        let fo = self.file_options(id)?;
616        let ws = self.workspace_inputs()?;
617        Ok(all_diagnostics(&self.inner, sf, fo, ws))
618    }
619
620    /// Run the full analysis pipeline on the given file.
621    pub fn analyse_file(&self, id: FileId) -> Result<Analysis, UnknownFileId> {
622        let sf = self.source_file(id)?;
623        let fo = self.file_options(id)?;
624        let ws = self.workspace_inputs()?;
625        Ok(analyse_file(&self.inner, sf, fo, ws))
626    }
627
628    /// Return the source text of a file.
629    ///
630    /// Returns `Err(UnknownFileId)` if `id` is not open.
631    pub fn source_of(&self, id: FileId) -> Result<String, UnknownFileId> {
632        let sf = self.source_file(id)?;
633        Ok(sf.source(&self.inner).clone())
634    }
635
636    /// Return the path associated with a file.
637    ///
638    /// Returns `Err(UnknownFileId)` if `id` is not open.
639    pub fn path_of(&self, id: FileId) -> Result<&Path, UnknownFileId> {
640        self.files
641            .get(&id)
642            .map(|r| r.path.as_path())
643            .ok_or(UnknownFileId(id))
644    }
645
646    /// Returns `true` if `id` refers to an open file.
647    #[must_use]
648    pub fn is_open(&self, id: FileId) -> bool {
649        self.files.contains_key(&id)
650    }
651
652    /// Iterate every currently-open [`FileId`] in insertion order.
653    ///
654    /// Needed by cross-file analyses (workspace symbols / references /
655    /// goto-definition, spec §14.2, bead cy-kkw) so the navigation
656    /// engine can walk every member file of the workspace without the
657    /// caller having to track its own file list alongside the
658    /// [`Database`].
659    pub fn file_ids(&self) -> impl Iterator<Item = FileId> + '_ {
660        self.files.keys().copied()
661    }
662
663    // -----------------------------------------------------------------------
664    // Internal helpers
665    // -----------------------------------------------------------------------
666
667    fn source_file(&self, id: FileId) -> Result<SourceFile, UnknownFileId> {
668        self.files
669            .get(&id)
670            .map(|r| r.source_file)
671            .ok_or(UnknownFileId(id))
672    }
673
674    fn file_options(&self, id: FileId) -> Result<FileOptions, UnknownFileId> {
675        self.files
676            .get(&id)
677            .map(|r| r.file_opts)
678            .ok_or(UnknownFileId(id))
679    }
680
681    fn workspace_inputs(&self) -> Result<WorkspaceInputs, UnknownFileId> {
682        // WorkspaceInputs is always present after construction.
683        self.workspace
684            .ok_or_else(|| unreachable!("WorkspaceInputs always initialised in Database::new"))
685    }
686
687    fn workspace_inputs_mut(&mut self) -> WorkspaceInputs {
688        self.workspace
689            .expect("WorkspaceInputs always initialised in Database::new")
690    }
691}
692
693// Compile-time Send assertion.  Database is Send because:
694//   - CypherDatabase: Send (Salsa emits this)
695//   - IndexMap<FileId, FileRecord>: Send (FileRecord fields are Send)
696//   - WorkspaceInputs: Copy + Send
697//   - u32: Send
698const _: fn() = || {
699    fn check_send<T: Send>() {}
700    check_send::<Database>();
701};
702
703// ---------------------------------------------------------------------------
704// Tests
705// ---------------------------------------------------------------------------
706
707#[cfg(test)]
708mod tests {
709    use super::*;
710    use std::sync::Arc;
711
712    // -----------------------------------------------------------------------
713    // Basic open / query / update / remove
714    // -----------------------------------------------------------------------
715
716    #[test]
717    fn open_and_query_single_file() {
718        let mut db = Database::new();
719        let id = db.open_file(
720            Path::new("a.cyp"),
721            "MATCH (n) RETURN n".into(),
722            DialectMode::GqlAligned,
723        );
724
725        let out = db.parse_cst(id).expect("file must be open");
726        assert_eq!(out.parse().syntax().to_string(), "MATCH (n) RETURN n");
727    }
728
729    #[test]
730    fn update_file_invalidates_cache() {
731        let mut db = Database::new();
732        let id = db.open_file(
733            Path::new("a.cyp"),
734            "RETURN 1".into(),
735            DialectMode::GqlAligned,
736        );
737
738        let out1 = db.parse_cst(id).unwrap();
739        assert_eq!(out1.parse().syntax().to_string(), "RETURN 1");
740
741        db.update_file(id, "RETURN 2".into()).unwrap();
742
743        let out2 = db.parse_cst(id).unwrap();
744        assert_eq!(out2.parse().syntax().to_string(), "RETURN 2");
745    }
746
747    // -----------------------------------------------------------------------
748    // edit_file (cy-zv0)
749    // -----------------------------------------------------------------------
750
751    #[test]
752    fn edit_file_applies_edit_and_invalidates_cache() {
753        use cyrs_syntax::{TextEdit, TextRange, TextSize};
754
755        let mut db = Database::new();
756        let id = db.open_file(
757            Path::new("e.cyp"),
758            "RETURN 1".into(),
759            DialectMode::GqlAligned,
760        );
761        let before = db.parse_cst(id).unwrap();
762        assert_eq!(before.parse().syntax().to_string(), "RETURN 1");
763
764        // Replace "1" (byte 7..8) with "42".
765        let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "42");
766        db.edit_file(id, &edit).unwrap();
767
768        let after = db.parse_cst(id).unwrap();
769        assert_eq!(after.parse().syntax().to_string(), "RETURN 42");
770        assert_eq!(db.source_of(id).unwrap(), "RETURN 42");
771        assert!(
772            !Arc::ptr_eq(&before.0, &after.0),
773            "edit_file must bump the Salsa revision → new Arc"
774        );
775    }
776
777    /// cy-li6: after `edit_file`, the next `parse_cst` query must return the
778    /// same `Arc<Parse>` that `incremental_reparse` produced — i.e. the
779    /// precomputed Parse is published to Salsa instead of being reparsed.
780    #[test]
781    fn edit_file_publishes_precomputed_parse_to_salsa() {
782        use cyrs_syntax::{TextEdit, TextRange, TextSize};
783
784        let mut db = Database::new();
785        let id = db.open_file(
786            Path::new("e.cyp"),
787            "RETURN 1".into(),
788            DialectMode::GqlAligned,
789        );
790        // Warm parse_cst so the SourceFile input exists in Salsa storage.
791        let _ = db.parse_cst(id).unwrap();
792
793        // Apply an edit. The smart path inside `edit_file` will splice a
794        // sub-tree and publish the resulting Parse via the cy-li6
795        // `precomputed_parse` input slot.
796        let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "42");
797        db.edit_file(id, &edit).unwrap();
798
799        // Read the published hint directly from Salsa.
800        let sf = db.source_file(id).unwrap();
801        let hint = sf
802            .precomputed_parse(&db.inner)
803            .as_ref()
804            .expect("edit_file must publish a precomputed Parse to the SourceFile input")
805            .clone();
806
807        // The next parse_cst call must return that exact Arc — proving
808        // the Salsa-tracked query short-circuited on the hint instead of
809        // re-parsing the source string.
810        let after = db.parse_cst(id).unwrap();
811        assert!(
812            Arc::ptr_eq(&hint.0, &after.0),
813            "parse_cst after edit_file must return the precomputed Parse Arc, \
814             not a freshly re-parsed one"
815        );
816
817        // And a second parse_cst call must hit Salsa's memo (same Arc).
818        let after2 = db.parse_cst(id).unwrap();
819        assert!(
820            Arc::ptr_eq(&after.0, &after2.0),
821            "parse_cst memo must be stable across subsequent queries"
822        );
823
824        // Source text is the post-edit canonical text.
825        assert_eq!(after.parse().syntax().to_string(), "RETURN 42");
826    }
827
828    /// cy-li6: a non-incremental `update_file` must clear any stale
829    /// precomputed Parse so the next `parse_cst` re-parses fresh source.
830    #[test]
831    fn update_file_clears_precomputed_parse_hint() {
832        use cyrs_syntax::{TextEdit, TextRange, TextSize};
833
834        let mut db = Database::new();
835        let id = db.open_file(
836            Path::new("e.cyp"),
837            "RETURN 1".into(),
838            DialectMode::GqlAligned,
839        );
840        let _ = db.parse_cst(id).unwrap();
841
842        // Seed the hint via edit_file.
843        let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "42");
844        db.edit_file(id, &edit).unwrap();
845
846        let sf = db.source_file(id).unwrap();
847        assert!(
848            sf.precomputed_parse(&db.inner).is_some(),
849            "edit_file must seed the precomputed_parse hint"
850        );
851
852        // Full source replacement must clear the stale hint.
853        db.update_file(id, "RETURN 99".into()).unwrap();
854        assert!(
855            sf.precomputed_parse(&db.inner).is_none(),
856            "update_file must clear the stale precomputed_parse hint"
857        );
858
859        let after = db.parse_cst(id).unwrap();
860        assert_eq!(after.parse().syntax().to_string(), "RETURN 99");
861    }
862
863    #[test]
864    fn edit_file_unknown_fileid() {
865        use cyrs_syntax::{TextEdit, TextSize};
866
867        let mut db = Database::new();
868        let ghost = FileId(999);
869        let edit = TextEdit::insert(TextSize::new(0), "x");
870        assert_eq!(db.edit_file(ghost, &edit), Err(UnknownFileId(ghost)));
871    }
872
873    #[test]
874    fn edit_file_preserves_other_files_cache() {
875        use cyrs_syntax::{TextEdit, TextRange, TextSize};
876
877        let mut db = Database::new();
878        let a = db.open_file(
879            Path::new("a.cyp"),
880            "RETURN 1".into(),
881            DialectMode::GqlAligned,
882        );
883        let b = db.open_file(
884            Path::new("b.cyp"),
885            "RETURN 2".into(),
886            DialectMode::GqlAligned,
887        );
888
889        let oa = db.parse_cst(a).unwrap();
890        let ob = db.parse_cst(b).unwrap();
891
892        // Edit file `a`.
893        let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "99");
894        db.edit_file(a, &edit).unwrap();
895
896        // File b's cache must survive.
897        let ob2 = db.parse_cst(b).unwrap();
898        assert!(
899            Arc::ptr_eq(&ob.0, &ob2.0),
900            "file b cache must survive edit to file a"
901        );
902
903        // File a reflects the edit.
904        let oa2 = db.parse_cst(a).unwrap();
905        assert!(!Arc::ptr_eq(&oa.0, &oa2.0));
906        assert_eq!(oa2.parse().syntax().to_string(), "RETURN 99");
907    }
908
909    #[test]
910    fn remove_file_stale_returns_error() {
911        let mut db = Database::new();
912        let id = db.open_file(
913            Path::new("a.cyp"),
914            "RETURN 1".into(),
915            DialectMode::GqlAligned,
916        );
917
918        // Confirm it's open.
919        assert!(db.is_open(id));
920
921        // Remove it.
922        db.remove_file(id).expect("remove should succeed");
923
924        // All subsequent queries return an error, not a panic.
925        assert!(!db.is_open(id));
926        assert_eq!(db.parse_cst(id), Err(UnknownFileId(id)));
927        assert_eq!(
928            db.update_file(id, "RETURN 2".into()),
929            Err(UnknownFileId(id))
930        );
931        assert_eq!(db.remove_file(id), Err(UnknownFileId(id)));
932    }
933
934    // -----------------------------------------------------------------------
935    // Three files — independent caching
936    // -----------------------------------------------------------------------
937
938    #[test]
939    fn three_files_independent_caching() {
940        let mut db = Database::new();
941        let a = db.open_file(
942            Path::new("a.cyp"),
943            "RETURN 1".into(),
944            DialectMode::GqlAligned,
945        );
946        let b = db.open_file(
947            Path::new("b.cyp"),
948            "RETURN 2".into(),
949            DialectMode::GqlAligned,
950        );
951        let c = db.open_file(
952            Path::new("c.cyp"),
953            "RETURN 3".into(),
954            DialectMode::GqlAligned,
955        );
956
957        // Query each file.
958        let oa = db.parse_cst(a).unwrap();
959        let ob = db.parse_cst(b).unwrap();
960        let oc = db.parse_cst(c).unwrap();
961
962        assert_eq!(oa.parse().syntax().to_string(), "RETURN 1");
963        assert_eq!(ob.parse().syntax().to_string(), "RETURN 2");
964        assert_eq!(oc.parse().syntax().to_string(), "RETURN 3");
965
966        // Mutate only file `b`.
967        db.update_file(b, "RETURN 99".into()).unwrap();
968
969        // `a` and `c` caches are untouched.
970        let oa2 = db.parse_cst(a).unwrap();
971        let oc2 = db.parse_cst(c).unwrap();
972        assert!(
973            Arc::ptr_eq(&oa.0, &oa2.0),
974            "file a cache must survive update to file b"
975        );
976        assert!(
977            Arc::ptr_eq(&oc.0, &oc2.0),
978            "file c cache must survive update to file b"
979        );
980
981        // `b` is invalidated.
982        let ob2 = db.parse_cst(b).unwrap();
983        assert_eq!(ob2.parse().syntax().to_string(), "RETURN 99");
984        assert!(
985            !Arc::ptr_eq(&ob.0, &ob2.0),
986            "file b cache must be invalidated"
987        );
988    }
989
990    // -----------------------------------------------------------------------
991    // Snapshot + thread: one DB + snapshot per request pattern
992    // -----------------------------------------------------------------------
993
994    #[test]
995    fn snapshot_send_concurrent_query() {
996        let mut db = Database::new();
997        let id = db.open_file(
998            Path::new("q.cyp"),
999            "MATCH (n) RETURN n".into(),
1000            DialectMode::GqlAligned,
1001        );
1002
1003        // Warm the main-thread cache.
1004        let main_out = db.parse_cst(id).unwrap();
1005        let main_text = main_out.parse().syntax().to_string();
1006
1007        // Take a snapshot; ship to worker thread.
1008        let snap = db.snapshot();
1009        let worker_text = std::thread::spawn(move || {
1010            snap.parse_cst(id)
1011                .expect("snapshot must contain the file")
1012                .parse()
1013                .syntax()
1014                .to_string()
1015        })
1016        .join()
1017        .expect("worker thread panicked");
1018
1019        assert_eq!(
1020            worker_text, main_text,
1021            "snapshot result must match main-thread result"
1022        );
1023    }
1024
1025    /// Compile-time assertion: `DatabaseSnapshot: Send`.
1026    #[test]
1027    fn database_snapshot_is_send() {
1028        fn require_send<T: Send>(_: T) {}
1029        let mut db = Database::new();
1030        let _id = db.open_file(
1031            Path::new("a.cyp"),
1032            "RETURN 1".into(),
1033            DialectMode::GqlAligned,
1034        );
1035        let snap = db.snapshot();
1036        require_send(snap);
1037    }
1038
1039    // -----------------------------------------------------------------------
1040    // Schema change: sema cache invalidates, parse cache survives
1041    // -----------------------------------------------------------------------
1042
1043    #[test]
1044    fn schema_change_invalidates_sema_not_parse() {
1045        use cyrs_schema::EmptySchema;
1046
1047        let mut db = Database::new();
1048        let id = db.open_file(
1049            Path::new("s.cyp"),
1050            "MATCH (n:Person) RETURN n".into(),
1051            DialectMode::GqlAligned,
1052        );
1053
1054        // Warm the parse cache.
1055        let cst1 = db.parse_cst(id).unwrap();
1056
1057        // Run sema (no schema yet).
1058        let _sema1 = db.sema_diagnostics(id).unwrap();
1059
1060        // Set a schema — bumps WorkspaceInputs revision.
1061        let schema: Arc<dyn SchemaProvider> = Arc::new(EmptySchema);
1062        db.set_schema(Some(schema));
1063
1064        // Parse cache must survive schema change.
1065        let cst2 = db.parse_cst(id).unwrap();
1066        assert!(
1067            Arc::ptr_eq(&cst1.0, &cst2.0),
1068            "parse_cst Arc must survive schema change"
1069        );
1070
1071        // Sema runs without error under the new schema.
1072        let _sema2 = db.sema_diagnostics(id).unwrap();
1073    }
1074
1075    // -----------------------------------------------------------------------
1076    // source_of / path_of helpers
1077    // -----------------------------------------------------------------------
1078
1079    #[test]
1080    fn source_and_path_accessors() {
1081        let mut db = Database::new();
1082        let p = Path::new("myfile.cyp");
1083        let id = db.open_file(p, "RETURN 42".into(), DialectMode::GqlAligned);
1084
1085        assert_eq!(db.source_of(id).unwrap(), "RETURN 42");
1086        assert_eq!(db.path_of(id).unwrap(), p);
1087    }
1088
1089    // -----------------------------------------------------------------------
1090    // DatabaseOptions / with_options — bead cy-31b
1091    // -----------------------------------------------------------------------
1092
1093    /// `Database::with_options` constructs successfully and the resulting DB
1094    /// operates correctly (`parse_lru` = 2 stress test).
1095    #[test]
1096    fn with_options_parse_lru_2() {
1097        use crate::DatabaseOptions;
1098
1099        // parse_lru = 2 means only 2 parse_cst results are kept.
1100        // This test verifies the API compiles and the DB is functional.
1101        let db = Database::with_options(DatabaseOptions {
1102            parse_lru: 2,
1103            sema_lru: 2,
1104            plan_lru: 2,
1105            formatted_lru: 2,
1106        });
1107
1108        // Verify the DB is usable.
1109        assert_eq!(db.files.len(), 0);
1110    }
1111
1112    /// `Database::with_options` with `parse_lru` = 2: open 3 files, query each.
1113    /// The LRU eviction will drop older cached values, so querying is still
1114    /// correct (Salsa recomputes on cache miss) but the cache is bounded.
1115    #[test]
1116    fn with_options_lru_2_three_files() {
1117        use crate::DatabaseOptions;
1118
1119        let mut db = Database::with_options(DatabaseOptions {
1120            parse_lru: 2,
1121            sema_lru: 2,
1122            plan_lru: 2,
1123            ..DatabaseOptions::default()
1124        });
1125
1126        let a = db.open_file(
1127            Path::new("a.cyp"),
1128            "RETURN 1".into(),
1129            DialectMode::GqlAligned,
1130        );
1131        let b = db.open_file(
1132            Path::new("b.cyp"),
1133            "RETURN 2".into(),
1134            DialectMode::GqlAligned,
1135        );
1136        let c = db.open_file(
1137            Path::new("c.cyp"),
1138            "RETURN 3".into(),
1139            DialectMode::GqlAligned,
1140        );
1141
1142        // All three files parse correctly even with a tiny LRU cap.
1143        assert_eq!(
1144            db.parse_cst(a).unwrap().parse().syntax().to_string(),
1145            "RETURN 1"
1146        );
1147        assert_eq!(
1148            db.parse_cst(b).unwrap().parse().syntax().to_string(),
1149            "RETURN 2"
1150        );
1151        assert_eq!(
1152            db.parse_cst(c).unwrap().parse().syntax().to_string(),
1153            "RETURN 3"
1154        );
1155
1156        // Re-query all after the cap has been exceeded — Salsa recomputes on
1157        // cache miss, so results must still be correct.
1158        assert_eq!(
1159            db.parse_cst(a).unwrap().parse().syntax().to_string(),
1160            "RETURN 1"
1161        );
1162        assert_eq!(
1163            db.parse_cst(b).unwrap().parse().syntax().to_string(),
1164            "RETURN 2"
1165        );
1166        assert_eq!(
1167            db.parse_cst(c).unwrap().parse().syntax().to_string(),
1168            "RETURN 3"
1169        );
1170    }
1171
1172    /// `Database::new()` uses default options (`parse_lru` = 256).
1173    #[test]
1174    fn database_new_uses_default_options() {
1175        use crate::DatabaseOptions;
1176        let default_opts = DatabaseOptions::default();
1177        assert_eq!(default_opts.parse_lru, 256);
1178        assert_eq!(default_opts.sema_lru, 256);
1179        assert_eq!(default_opts.plan_lru, 256);
1180        assert_eq!(default_opts.formatted_lru, 256);
1181
1182        // Verify Database::new() still works.
1183        let db = Database::new();
1184        assert_eq!(db.files.len(), 0);
1185    }
1186
1187    // -----------------------------------------------------------------------
1188    // Unknown FileId accessors
1189    // -----------------------------------------------------------------------
1190
1191    #[test]
1192    fn unknown_fileid_source_of() {
1193        let db = Database::new();
1194        let ghost = FileId(999);
1195        assert_eq!(db.source_of(ghost), Err(UnknownFileId(ghost)));
1196        assert_eq!(db.path_of(ghost), Err(UnknownFileId(ghost)));
1197    }
1198}