cyrs_db/workspace.rs
1//! Workspace-level `Database` API — `FileId` model, snapshot semantics,
2//! and the `SchemaProvider` wiring (spec 0001 §11.4, §11.5).
3//!
4//! ## Design
5//!
6//! ```text
7//! Database (owned, wraps CypherDatabase)
8//! ├── FileId map: BTreeMap<FileId, SourceFile> (stable u32 handles)
9//! ├── WorkspaceInputs (workspace-scoped schema)
10//! └── FileOptions per file (per-file analysis options)
11//!
12//! DatabaseSnapshot (Clone of CypherDatabase, Send)
13//! └── read-only view, carries FileId → SourceFile handles
14//! ```
15//!
16//! ## Concurrency contract (spec §11.5)
17//!
18//! * `Database` is `Send` (not `Sync` — Salsa's `ZalsaLocal` is per-thread).
19//! Mutation (`&mut self`) serialises writes.
20//! * `DatabaseSnapshot` is obtained via [`Database::snapshot`]. It clones the
21//! `Arc<Zalsa>` backing store and creates a fresh `ZalsaLocal`, making it
22//! safe to send to another thread. The snapshot sees a frozen view of the
23//! database at the point of the clone; subsequent mutations to the origin
24//! `Database` are invisible to the snapshot (Salsa's snapshot-isolation
25//! guarantee).
26//! * **Pattern — one DB + snapshot per request** (spec §11.5):
27//!
28//! ```rust,ignore
29//! // Main thread owns the database.
30//! let mut db = Database::new();
31//! let file = db.open_file(Path::new("q.cyp"), "RETURN 1".into(), DialectMode::GqlAligned);
32//!
33//! // Per-request: take a snapshot, send to worker thread.
34//! let snap = db.snapshot();
35//! let src = db.source_of(file).unwrap().to_string();
36//! let handle = std::thread::spawn(move || {
37//! let result = snap.parse_cst(file);
38//! result.parse().syntax().to_string()
39//! });
40//! let output = handle.join().unwrap();
41//! assert_eq!(output, src);
42//! ```
43//!
44//! ## `FileId` representation
45//!
46//! `FileId` is a `u32` newtype — simple, stable across process restarts,
47//! and cheap to copy. Each `Database` maintains a monotonically-increasing
48//! counter; IDs are never reused within a single database instance.
49//!
50//! ## `SchemaProvider` wiring
51//!
52//! The workspace-scoped `SchemaProvider` is stored in a single
53//! [`WorkspaceInputs`] input. Calling [`Database::set_schema`] bumps the
54//! Salsa revision for that input, which Salsa propagates to every
55//! schema-dependent derived query (e.g. `sema_diagnostics`, `all_diagnostics`)
56//! across **all** files. The parse cache is unaffected (parse does not read
57//! `WorkspaceInputs`).
58
59use std::path::{Path, PathBuf};
60use std::sync::Arc;
61
62use indexmap::IndexMap;
63
64use cyrs_schema::SchemaProvider;
65use cyrs_syntax::{TextEdit, incremental_reparse};
66
67use crate::inputs::{AnalysisOptions, FileOptions, WorkspaceInputs};
68use crate::options::DatabaseOptions;
69use crate::queries::{
70 AstOutput, DiagnosticsOutput, PlanOutput, ResolvedNamesOutput, all_diagnostics, analyse_file,
71 parse_ast, plan_of, resolved_names, sema_diagnostics,
72};
73use crate::{Analysis, CypherDatabase, DialectMode, ParseOutput, SourceFile, parse_cst};
74
75// ---------------------------------------------------------------------------
76// FileId
77// ---------------------------------------------------------------------------
78
79/// A stable, workspace-scoped file identity (spec §11.4).
80///
81/// `FileId` is the unit of caching in the incremental database. Every
82/// source file opened via [`Database::open_file`] receives a unique
83/// `FileId`. IDs are monotonically increasing `u32` values; they are
84/// never reused within a single `Database` instance.
85///
86/// `FileId` values are intentionally opaque — callers store them but
87/// should not interpret the numeric value.
88#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
89pub struct FileId(pub u32);
90
91impl std::fmt::Display for FileId {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 write!(f, "FileId({})", self.0)
94 }
95}
96
97// ---------------------------------------------------------------------------
98// Error type
99// ---------------------------------------------------------------------------
100
101/// Error returned when a `FileId` is not found in the workspace.
102///
103/// Marked `#[non_exhaustive]` (cy-2i9.1) on the tuple — consumers must
104/// match it with `..` in patterns. The inner `FileId` remains
105/// accessible via the public `.0` field.
106#[derive(Debug, Clone, PartialEq, Eq)]
107#[non_exhaustive]
108pub struct UnknownFileId(pub FileId);
109
110impl std::fmt::Display for UnknownFileId {
111 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
112 write!(f, "unknown FileId: {}", self.0)
113 }
114}
115
116impl std::error::Error for UnknownFileId {}
117
118// ---------------------------------------------------------------------------
119// FileRecord — internal per-file state
120// ---------------------------------------------------------------------------
121
122/// Internal per-file record mapping a `FileId` to its Salsa inputs.
123struct FileRecord {
124 /// Salsa input struct carrying `source` + `dialect`.
125 source_file: SourceFile,
126 /// Salsa input struct carrying per-file `AnalysisOptions`.
127 file_opts: FileOptions,
128 /// Path associated with the file (used for display / lookup only).
129 path: PathBuf,
130}
131
132/// A freed pair of Salsa input handles retained for reuse by a subsequent
133/// [`Database::open_file`] call.
134///
135/// Salsa 0.26 does not expose a public API for deleting input structs, so
136/// once a `SourceFile` / `FileOptions` is allocated its internal slot in
137/// the Salsa interner persists for the lifetime of the database. Without
138/// recycling, an LSP-style workload that churns file IDs (open → edit →
139/// close) would grow Salsa's input table unboundedly and violate the
140/// spec §11.6 steady-state RSS bound.
141///
142/// To respect the spec bound we pool the handles: `remove_file` pushes
143/// the pair into [`Database::free_slots`] and resets the source to empty
144/// to free the backing `String`; `open_file` prefers to pop a free slot
145/// and reset its fields before allocating a fresh one. The pool's
146/// steady-state size is bounded by the peak number of simultaneously-open
147/// files, which is naturally bounded by realistic client behaviour.
148struct FreeSlot {
149 source_file: SourceFile,
150 file_opts: FileOptions,
151}
152
153// ---------------------------------------------------------------------------
154// DatabaseSnapshot
155// ---------------------------------------------------------------------------
156
157/// A read-only, `Send` snapshot of the database at a point in time.
158///
159/// Obtained via [`Database::snapshot`]. The snapshot shares the
160/// `Arc<Zalsa>` backing store with the origin `Database` and sees a
161/// frozen view of the Salsa revision at the moment of cloning. Subsequent
162/// mutations to the `Database` are invisible to this snapshot.
163///
164/// Snapshots are suitable for cross-thread queries: they implement `Send`
165/// so they can be shipped to a worker thread for concurrent read queries.
166///
167/// ## Example
168///
169/// ```rust,ignore
170/// let snap = db.snapshot();
171/// let file = /* FileId from the origin db */;
172/// let result = std::thread::spawn(move || {
173/// snap.parse_cst(file).unwrap()
174/// }).join().unwrap();
175/// ```
176pub struct DatabaseSnapshot {
177 /// Cloned Salsa database (read-only from the snapshot's perspective).
178 inner: CypherDatabase,
179 /// Snapshot of the file registry at the time of cloning.
180 files: Arc<IndexMap<FileId, SourceFile>>,
181 /// Snapshot of the workspace inputs handle.
182 workspace: Option<WorkspaceInputs>,
183 /// Per-file options snapshot.
184 file_opts: Arc<IndexMap<FileId, FileOptions>>,
185}
186
187impl std::fmt::Debug for DatabaseSnapshot {
188 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
189 f.debug_struct("DatabaseSnapshot")
190 .field("num_files", &self.files.len())
191 .finish_non_exhaustive()
192 }
193}
194
195// Compile-time Send assertion for DatabaseSnapshot.
196// CypherDatabase is Send (salsa #[salsa::db] macro emits `unsafe impl Send`).
197// Arc<IndexMap<…>> is Send when the value types are Send.
198// FileId, SourceFile, FileOptions, WorkspaceInputs are all Send.
199const _: fn() = || {
200 fn check_send<T: Send>() {}
201 check_send::<DatabaseSnapshot>();
202};
203
204impl DatabaseSnapshot {
205 /// Run `parse_cst` on the given file using this snapshot's view.
206 ///
207 /// Returns `Err(UnknownFileId)` if `id` was not open at snapshot time.
208 pub fn parse_cst(&self, id: FileId) -> Result<ParseOutput, UnknownFileId> {
209 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
210 Ok(parse_cst(&self.inner, sf))
211 }
212
213 /// Run `parse_ast` on the given file using this snapshot's view.
214 pub fn parse_ast(&self, id: FileId) -> Result<AstOutput, UnknownFileId> {
215 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
216 Ok(parse_ast(&self.inner, sf))
217 }
218
219 /// Run `plan_of` on the given file using this snapshot's view.
220 pub fn plan_of(&self, id: FileId) -> Result<PlanOutput, UnknownFileId> {
221 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
222 Ok(plan_of(&self.inner, sf))
223 }
224
225 /// Run `sema_diagnostics` on the given file using this snapshot's view.
226 pub fn sema_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
227 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
228 let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
229 let ws = self.workspace.ok_or(UnknownFileId(id))?;
230 Ok(sema_diagnostics(&self.inner, sf, fo, ws))
231 }
232
233 /// Run `resolved_names` on the given file using this snapshot's view.
234 pub fn resolved_names(&self, id: FileId) -> Result<ResolvedNamesOutput, UnknownFileId> {
235 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
236 let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
237 Ok(resolved_names(&self.inner, sf, fo))
238 }
239
240 /// Run `all_diagnostics` on the given file using this snapshot's view.
241 pub fn all_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
242 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
243 let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
244 let ws = self.workspace.ok_or(UnknownFileId(id))?;
245 Ok(all_diagnostics(&self.inner, sf, fo, ws))
246 }
247
248 /// Run the full analysis pipeline on the given file using this snapshot's view.
249 pub fn analyse_file(&self, id: FileId) -> Result<Analysis, UnknownFileId> {
250 let sf = self.files.get(&id).copied().ok_or(UnknownFileId(id))?;
251 let fo = self.file_opts.get(&id).copied().ok_or(UnknownFileId(id))?;
252 let ws = self.workspace.ok_or(UnknownFileId(id))?;
253 Ok(analyse_file(&self.inner, sf, fo, ws))
254 }
255}
256
257// ---------------------------------------------------------------------------
258// Database
259// ---------------------------------------------------------------------------
260
261/// Workspace-scoped incremental analysis database (spec §11).
262///
263/// The primary public API for all consumers (`cyrs-lsp`, `cyrs-agent`,
264/// `cyrs-cli`, `cyrs-tck`). Wraps [`CypherDatabase`] with:
265///
266/// * A `FileId` → `SourceFile` registry so callers use stable `u32` handles
267/// instead of raw Salsa input structs.
268/// * A single [`WorkspaceInputs`] for the workspace-scoped schema.
269/// * Snapshot support via [`Database::snapshot`].
270///
271/// ## Concurrency contract (spec §11.5)
272///
273/// `Database` is `Send` but not `Sync`. All mutating methods take `&mut self`.
274/// For concurrent read access from multiple threads, call [`snapshot`] to
275/// obtain a [`DatabaseSnapshot`] that implements `Send` and can be given to
276/// a worker thread.
277///
278/// [`snapshot`]: Database::snapshot
279pub struct Database {
280 inner: CypherDatabase,
281 /// Registry: `FileId` → per-file Salsa inputs.
282 files: IndexMap<FileId, FileRecord>,
283 /// Pool of Salsa input handles freed by [`remove_file`] and available
284 /// for reuse on the next [`open_file`]. See [`FreeSlot`] for rationale
285 /// (spec §11.6 steady-state RSS bound).
286 ///
287 /// [`remove_file`]: Database::remove_file
288 /// [`open_file`]: Database::open_file
289 free_slots: Vec<FreeSlot>,
290 /// The single workspace-scoped input; created lazily on first use.
291 workspace: Option<WorkspaceInputs>,
292 /// Monotonically increasing `FileId` counter.
293 next_id: u32,
294 /// LRU options captured at construction (immutable).
295 #[allow(dead_code)]
296 options: DatabaseOptions,
297}
298
299impl std::fmt::Debug for Database {
300 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
301 f.debug_struct("Database")
302 .field("num_files", &self.files.len())
303 .finish_non_exhaustive()
304 }
305}
306
307impl Default for Database {
308 fn default() -> Self {
309 Self::new()
310 }
311}
312
313impl Database {
314 // -----------------------------------------------------------------------
315 // Construction
316 // -----------------------------------------------------------------------
317
318 /// Create a new, empty workspace database with default LRU caps (256).
319 #[must_use]
320 pub fn new() -> Self {
321 Self::with_options(DatabaseOptions::default())
322 }
323
324 /// Create a new, empty workspace database with the given [`DatabaseOptions`].
325 ///
326 /// LRU capacities in `opts` are applied via Salsa's runtime
327 /// `set_lru_capacity` API immediately after construction. The options
328 /// are immutable after this point.
329 ///
330 /// # Example
331 ///
332 /// ```rust,ignore
333 /// use cyrs_db::{Database, DatabaseOptions};
334 ///
335 /// let db = Database::with_options(DatabaseOptions {
336 /// parse_lru: 512,
337 /// sema_lru: 128,
338 /// ..DatabaseOptions::default()
339 /// });
340 /// ```
341 #[must_use]
342 pub fn with_options(opts: DatabaseOptions) -> Self {
343 let mut inner = CypherDatabase::new();
344
345 // Apply runtime LRU capacity adjustments.
346 // The compile-time default is 256 (encoded in #[salsa::tracked(lru = 256)]).
347 // We always call set_lru_capacity so that non-default values take effect.
348 crate::set_parse_cst_lru(&mut inner, opts.parse_lru);
349 crate::queries::set_resolved_names_lru(&mut inner, opts.sema_lru);
350 crate::queries::set_sema_diagnostics_lru(&mut inner, opts.sema_lru);
351 crate::queries::set_plan_of_lru(&mut inner, opts.plan_lru);
352
353 let workspace = Some(inner.new_workspace_inputs(None));
354 Self {
355 inner,
356 files: IndexMap::new(),
357 free_slots: Vec::new(),
358 workspace,
359 next_id: 0,
360 options: opts,
361 }
362 }
363
364 // -----------------------------------------------------------------------
365 // Workspace API — spec §11.4
366 // -----------------------------------------------------------------------
367
368 /// Open a file in the workspace.
369 ///
370 /// Returns a stable [`FileId`] that uniquely identifies this file for the
371 /// lifetime of the database. The `path` is recorded for diagnostics /
372 /// display but is not used as a key; two calls with the same path but
373 /// different sources produce two independent files.
374 ///
375 /// ## Caching
376 ///
377 /// Opening a file creates new Salsa input structs and does not yet run
378 /// any derived query. All analysis is lazy and memoised.
379 pub fn open_file(&mut self, path: &Path, source: String, dialect: DialectMode) -> FileId {
380 let id = FileId(self.next_id);
381 self.next_id += 1;
382
383 let options = AnalysisOptions {
384 dialect,
385 ..Default::default()
386 };
387
388 let (source_file, file_opts) = if let Some(slot) = self.free_slots.pop() {
389 // Recycle the pooled Salsa handles: reset all fields so the slot
390 // behaves like a freshly-allocated input from the perspective of
391 // derived queries. Keeps Salsa's input-struct interner bounded
392 // under LSP-style FileId churn (spec §11.6, bead cy-bh5).
393 self.inner.set_source(slot.source_file, source);
394 self.inner.set_dialect(slot.source_file, dialect);
395 self.inner.set_options(slot.file_opts, options);
396 (slot.source_file, slot.file_opts)
397 } else {
398 let source_file = self.inner.new_source_file_with(source, dialect, 0);
399 let file_opts = self.inner.new_file_options(options);
400 (source_file, file_opts)
401 };
402
403 self.files.insert(
404 id,
405 FileRecord {
406 source_file,
407 file_opts,
408 path: path.to_owned(),
409 },
410 );
411
412 id
413 }
414
415 /// Update the source text of an already-open file.
416 ///
417 /// Bumps the Salsa revision for this file's `SourceFile` input, causing
418 /// all derived queries that depend on `source` to be re-evaluated on the
419 /// next access.
420 ///
421 /// Returns `Err(UnknownFileId)` if `id` is not currently open.
422 pub fn update_file(&mut self, id: FileId, new_source: String) -> Result<(), UnknownFileId> {
423 let record = self.files.get(&id).ok_or(UnknownFileId(id))?;
424 let sf = record.source_file;
425 self.inner.set_source(sf, new_source);
426 Ok(())
427 }
428
429 /// Apply a single-range text edit to an already-open file (cy-zv0, spec §11).
430 ///
431 /// This is the incremental-edit entry point that `textDocument/didChange`
432 /// and agent edit flows should prefer over [`update_file`] when only a
433 /// byte range changed. The API is shaped so that a future sub-tree
434 /// reparse (see `cyrs_syntax::edit::incremental_reparse`) can plug in
435 /// underneath without breaking callers.
436 ///
437 /// # Current implementation
438 ///
439 /// Today the underlying [`incremental_reparse`] is a whole-file reparse
440 /// fallback, so the cache-invalidation behaviour is identical to
441 /// `update_file`. The observable difference is:
442 ///
443 /// - Callers pass a [`TextEdit`] value (range + replacement) instead of
444 /// re-materialising the full source, so they don't pay an extra
445 /// `String` allocation for the unchanged prefix / suffix.
446 /// - The edit is applied to the current source held by Salsa, not to a
447 /// caller-managed copy, so there is no opportunity for the caller's
448 /// local mirror to drift out of sync.
449 ///
450 /// Once the smart path lands (tracked as a follow-up bead to cy-zv0),
451 /// `edit_file` will become strictly sub-linear in file size for small
452 /// edits. Callers do not need to change.
453 ///
454 /// Returns `Err(UnknownFileId)` if `id` is not currently open.
455 ///
456 /// [`update_file`]: Database::update_file
457 pub fn edit_file(&mut self, id: FileId, edit: &TextEdit) -> Result<(), UnknownFileId> {
458 let record = self.files.get(&id).ok_or(UnknownFileId(id))?;
459 let sf = record.source_file;
460
461 // Pull the current tree from Salsa so `incremental_reparse` gets a
462 // real `SyntaxNode` to dispatch on. Calling parse_cst here warms
463 // the memo and pins the `Arc<Parse>` that the sub-tree splicer
464 // (cy-li5) reuses on the smart path.
465 let parse_out = parse_cst(&self.inner, sf);
466 let old_tree = parse_out.parse().syntax();
467
468 // Dispatch through the edit crate. On the smart path this returns
469 // a green-spliced `Parse` whose tree shares structure with the old
470 // one; on the fallback path it is a whole-file reparse. Either
471 // way the resulting source text is canonical.
472 let new_parse = incremental_reparse(&old_tree, edit);
473 let new_source = new_parse.syntax().to_string();
474
475 // cy-li6: publish the freshly-computed `Parse` to Salsa as the
476 // memoised `parse_cst` result for the next revision. Without this
477 // wiring the bench would re-parse `new_source` from scratch on the
478 // next `analyse_file`, defeating the smart-path savings.
479 let new_parse_out = crate::ParseOutput::new(new_parse);
480 self.inner
481 .set_source_with_parse(sf, new_source, new_parse_out);
482 Ok(())
483 }
484
485 /// Remove a file from the workspace.
486 ///
487 /// After removal, the `FileId` is considered stale. Any subsequent call
488 /// that takes this `FileId` will return `Err(UnknownFileId)` rather than
489 /// panic.
490 ///
491 /// Returns `Err(UnknownFileId)` if `id` was not open.
492 pub fn remove_file(&mut self, id: FileId) -> Result<(), UnknownFileId> {
493 let record = self.files.swap_remove(&id).ok_or(UnknownFileId(id))?;
494
495 // Release the backing source string immediately so a long-lived pool
496 // entry does not pin a large `String` allocation (spec §11.6). The
497 // Salsa revision bump here is harmless: no derived query will read
498 // this `SourceFile` until the slot is recycled, at which point
499 // `open_file` sets the new source and bumps the revision again.
500 self.inner.set_source(record.source_file, String::new());
501
502 self.free_slots.push(FreeSlot {
503 source_file: record.source_file,
504 file_opts: record.file_opts,
505 });
506
507 Ok(())
508 }
509
510 // -----------------------------------------------------------------------
511 // Schema API — spec §11.4 workspace-scoped SchemaProvider
512 // -----------------------------------------------------------------------
513
514 /// Set the workspace-scoped schema.
515 ///
516 /// Bumps the Salsa revision on [`WorkspaceInputs`], which cascades to
517 /// every schema-aware derived query (`sema_diagnostics`, `all_diagnostics`)
518 /// across **all** files. The parse cache is unaffected.
519 ///
520 /// Pass `None` to switch to schema-free analysis mode (§7.1).
521 pub fn set_schema(&mut self, schema: Option<Arc<dyn SchemaProvider>>) {
522 let ws = self.workspace_inputs_mut();
523 self.inner.set_schema(ws, schema);
524 }
525
526 /// Read the current workspace-scoped schema (may be `None`).
527 #[must_use]
528 pub fn schema(&self) -> Option<Arc<dyn SchemaProvider>> {
529 self.workspace.as_ref()?.schema(&self.inner)
530 }
531
532 // -----------------------------------------------------------------------
533 // Snapshot API — spec §11.5
534 // -----------------------------------------------------------------------
535
536 /// Create a [`DatabaseSnapshot`] suitable for cross-thread queries.
537 ///
538 /// The snapshot is a frozen read-only view of the database at this
539 /// instant. It can be sent to another thread (`Send`) and used to run
540 /// any derived query without `&mut`. Mutations applied to `self` after
541 /// calling `snapshot()` are **not** visible to the snapshot.
542 ///
543 /// ## Pattern — one DB + snapshot per request
544 ///
545 /// ```rust,ignore
546 /// let snap = db.snapshot();
547 /// std::thread::spawn(move || {
548 /// let out = snap.parse_cst(file_id).unwrap();
549 /// // use out …
550 /// });
551 /// ```
552 #[must_use]
553 pub fn snapshot(&self) -> DatabaseSnapshot {
554 // Collect a lightweight view of the file registry (FileId → SourceFile).
555 let files: IndexMap<FileId, SourceFile> = self
556 .files
557 .iter()
558 .map(|(&id, rec)| (id, rec.source_file))
559 .collect();
560
561 let file_opts: IndexMap<FileId, FileOptions> = self
562 .files
563 .iter()
564 .map(|(&id, rec)| (id, rec.file_opts))
565 .collect();
566
567 DatabaseSnapshot {
568 inner: self.inner.clone(),
569 files: Arc::new(files),
570 workspace: self.workspace,
571 file_opts: Arc::new(file_opts),
572 }
573 }
574
575 // -----------------------------------------------------------------------
576 // Query access — delegates to the Salsa derived queries
577 // -----------------------------------------------------------------------
578
579 /// Run `parse_cst` on the given file.
580 pub fn parse_cst(&self, id: FileId) -> Result<ParseOutput, UnknownFileId> {
581 let sf = self.source_file(id)?;
582 Ok(parse_cst(&self.inner, sf))
583 }
584
585 /// Run `parse_ast` on the given file.
586 pub fn parse_ast(&self, id: FileId) -> Result<AstOutput, UnknownFileId> {
587 let sf = self.source_file(id)?;
588 Ok(parse_ast(&self.inner, sf))
589 }
590
591 /// Run `plan_of` on the given file.
592 pub fn plan_of(&self, id: FileId) -> Result<PlanOutput, UnknownFileId> {
593 let sf = self.source_file(id)?;
594 Ok(plan_of(&self.inner, sf))
595 }
596
597 /// Run `sema_diagnostics` on the given file.
598 pub fn sema_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
599 let sf = self.source_file(id)?;
600 let fo = self.file_options(id)?;
601 let ws = self.workspace_inputs()?;
602 Ok(sema_diagnostics(&self.inner, sf, fo, ws))
603 }
604
605 /// Run `resolved_names` on the given file.
606 pub fn resolved_names(&self, id: FileId) -> Result<ResolvedNamesOutput, UnknownFileId> {
607 let sf = self.source_file(id)?;
608 let fo = self.file_options(id)?;
609 Ok(resolved_names(&self.inner, sf, fo))
610 }
611
612 /// Run `all_diagnostics` on the given file.
613 pub fn all_diagnostics(&self, id: FileId) -> Result<DiagnosticsOutput, UnknownFileId> {
614 let sf = self.source_file(id)?;
615 let fo = self.file_options(id)?;
616 let ws = self.workspace_inputs()?;
617 Ok(all_diagnostics(&self.inner, sf, fo, ws))
618 }
619
620 /// Run the full analysis pipeline on the given file.
621 pub fn analyse_file(&self, id: FileId) -> Result<Analysis, UnknownFileId> {
622 let sf = self.source_file(id)?;
623 let fo = self.file_options(id)?;
624 let ws = self.workspace_inputs()?;
625 Ok(analyse_file(&self.inner, sf, fo, ws))
626 }
627
628 /// Return the source text of a file.
629 ///
630 /// Returns `Err(UnknownFileId)` if `id` is not open.
631 pub fn source_of(&self, id: FileId) -> Result<String, UnknownFileId> {
632 let sf = self.source_file(id)?;
633 Ok(sf.source(&self.inner).clone())
634 }
635
636 /// Return the path associated with a file.
637 ///
638 /// Returns `Err(UnknownFileId)` if `id` is not open.
639 pub fn path_of(&self, id: FileId) -> Result<&Path, UnknownFileId> {
640 self.files
641 .get(&id)
642 .map(|r| r.path.as_path())
643 .ok_or(UnknownFileId(id))
644 }
645
646 /// Returns `true` if `id` refers to an open file.
647 #[must_use]
648 pub fn is_open(&self, id: FileId) -> bool {
649 self.files.contains_key(&id)
650 }
651
652 /// Iterate every currently-open [`FileId`] in insertion order.
653 ///
654 /// Needed by cross-file analyses (workspace symbols / references /
655 /// goto-definition, spec §14.2, bead cy-kkw) so the navigation
656 /// engine can walk every member file of the workspace without the
657 /// caller having to track its own file list alongside the
658 /// [`Database`].
659 pub fn file_ids(&self) -> impl Iterator<Item = FileId> + '_ {
660 self.files.keys().copied()
661 }
662
663 // -----------------------------------------------------------------------
664 // Internal helpers
665 // -----------------------------------------------------------------------
666
667 fn source_file(&self, id: FileId) -> Result<SourceFile, UnknownFileId> {
668 self.files
669 .get(&id)
670 .map(|r| r.source_file)
671 .ok_or(UnknownFileId(id))
672 }
673
674 fn file_options(&self, id: FileId) -> Result<FileOptions, UnknownFileId> {
675 self.files
676 .get(&id)
677 .map(|r| r.file_opts)
678 .ok_or(UnknownFileId(id))
679 }
680
681 fn workspace_inputs(&self) -> Result<WorkspaceInputs, UnknownFileId> {
682 // WorkspaceInputs is always present after construction.
683 self.workspace
684 .ok_or_else(|| unreachable!("WorkspaceInputs always initialised in Database::new"))
685 }
686
687 fn workspace_inputs_mut(&mut self) -> WorkspaceInputs {
688 self.workspace
689 .expect("WorkspaceInputs always initialised in Database::new")
690 }
691}
692
693// Compile-time Send assertion. Database is Send because:
694// - CypherDatabase: Send (Salsa emits this)
695// - IndexMap<FileId, FileRecord>: Send (FileRecord fields are Send)
696// - WorkspaceInputs: Copy + Send
697// - u32: Send
698const _: fn() = || {
699 fn check_send<T: Send>() {}
700 check_send::<Database>();
701};
702
703// ---------------------------------------------------------------------------
704// Tests
705// ---------------------------------------------------------------------------
706
707#[cfg(test)]
708mod tests {
709 use super::*;
710 use std::sync::Arc;
711
712 // -----------------------------------------------------------------------
713 // Basic open / query / update / remove
714 // -----------------------------------------------------------------------
715
716 #[test]
717 fn open_and_query_single_file() {
718 let mut db = Database::new();
719 let id = db.open_file(
720 Path::new("a.cyp"),
721 "MATCH (n) RETURN n".into(),
722 DialectMode::GqlAligned,
723 );
724
725 let out = db.parse_cst(id).expect("file must be open");
726 assert_eq!(out.parse().syntax().to_string(), "MATCH (n) RETURN n");
727 }
728
729 #[test]
730 fn update_file_invalidates_cache() {
731 let mut db = Database::new();
732 let id = db.open_file(
733 Path::new("a.cyp"),
734 "RETURN 1".into(),
735 DialectMode::GqlAligned,
736 );
737
738 let out1 = db.parse_cst(id).unwrap();
739 assert_eq!(out1.parse().syntax().to_string(), "RETURN 1");
740
741 db.update_file(id, "RETURN 2".into()).unwrap();
742
743 let out2 = db.parse_cst(id).unwrap();
744 assert_eq!(out2.parse().syntax().to_string(), "RETURN 2");
745 }
746
747 // -----------------------------------------------------------------------
748 // edit_file (cy-zv0)
749 // -----------------------------------------------------------------------
750
751 #[test]
752 fn edit_file_applies_edit_and_invalidates_cache() {
753 use cyrs_syntax::{TextEdit, TextRange, TextSize};
754
755 let mut db = Database::new();
756 let id = db.open_file(
757 Path::new("e.cyp"),
758 "RETURN 1".into(),
759 DialectMode::GqlAligned,
760 );
761 let before = db.parse_cst(id).unwrap();
762 assert_eq!(before.parse().syntax().to_string(), "RETURN 1");
763
764 // Replace "1" (byte 7..8) with "42".
765 let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "42");
766 db.edit_file(id, &edit).unwrap();
767
768 let after = db.parse_cst(id).unwrap();
769 assert_eq!(after.parse().syntax().to_string(), "RETURN 42");
770 assert_eq!(db.source_of(id).unwrap(), "RETURN 42");
771 assert!(
772 !Arc::ptr_eq(&before.0, &after.0),
773 "edit_file must bump the Salsa revision → new Arc"
774 );
775 }
776
777 /// cy-li6: after `edit_file`, the next `parse_cst` query must return the
778 /// same `Arc<Parse>` that `incremental_reparse` produced — i.e. the
779 /// precomputed Parse is published to Salsa instead of being reparsed.
780 #[test]
781 fn edit_file_publishes_precomputed_parse_to_salsa() {
782 use cyrs_syntax::{TextEdit, TextRange, TextSize};
783
784 let mut db = Database::new();
785 let id = db.open_file(
786 Path::new("e.cyp"),
787 "RETURN 1".into(),
788 DialectMode::GqlAligned,
789 );
790 // Warm parse_cst so the SourceFile input exists in Salsa storage.
791 let _ = db.parse_cst(id).unwrap();
792
793 // Apply an edit. The smart path inside `edit_file` will splice a
794 // sub-tree and publish the resulting Parse via the cy-li6
795 // `precomputed_parse` input slot.
796 let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "42");
797 db.edit_file(id, &edit).unwrap();
798
799 // Read the published hint directly from Salsa.
800 let sf = db.source_file(id).unwrap();
801 let hint = sf
802 .precomputed_parse(&db.inner)
803 .as_ref()
804 .expect("edit_file must publish a precomputed Parse to the SourceFile input")
805 .clone();
806
807 // The next parse_cst call must return that exact Arc — proving
808 // the Salsa-tracked query short-circuited on the hint instead of
809 // re-parsing the source string.
810 let after = db.parse_cst(id).unwrap();
811 assert!(
812 Arc::ptr_eq(&hint.0, &after.0),
813 "parse_cst after edit_file must return the precomputed Parse Arc, \
814 not a freshly re-parsed one"
815 );
816
817 // And a second parse_cst call must hit Salsa's memo (same Arc).
818 let after2 = db.parse_cst(id).unwrap();
819 assert!(
820 Arc::ptr_eq(&after.0, &after2.0),
821 "parse_cst memo must be stable across subsequent queries"
822 );
823
824 // Source text is the post-edit canonical text.
825 assert_eq!(after.parse().syntax().to_string(), "RETURN 42");
826 }
827
828 /// cy-li6: a non-incremental `update_file` must clear any stale
829 /// precomputed Parse so the next `parse_cst` re-parses fresh source.
830 #[test]
831 fn update_file_clears_precomputed_parse_hint() {
832 use cyrs_syntax::{TextEdit, TextRange, TextSize};
833
834 let mut db = Database::new();
835 let id = db.open_file(
836 Path::new("e.cyp"),
837 "RETURN 1".into(),
838 DialectMode::GqlAligned,
839 );
840 let _ = db.parse_cst(id).unwrap();
841
842 // Seed the hint via edit_file.
843 let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "42");
844 db.edit_file(id, &edit).unwrap();
845
846 let sf = db.source_file(id).unwrap();
847 assert!(
848 sf.precomputed_parse(&db.inner).is_some(),
849 "edit_file must seed the precomputed_parse hint"
850 );
851
852 // Full source replacement must clear the stale hint.
853 db.update_file(id, "RETURN 99".into()).unwrap();
854 assert!(
855 sf.precomputed_parse(&db.inner).is_none(),
856 "update_file must clear the stale precomputed_parse hint"
857 );
858
859 let after = db.parse_cst(id).unwrap();
860 assert_eq!(after.parse().syntax().to_string(), "RETURN 99");
861 }
862
863 #[test]
864 fn edit_file_unknown_fileid() {
865 use cyrs_syntax::{TextEdit, TextSize};
866
867 let mut db = Database::new();
868 let ghost = FileId(999);
869 let edit = TextEdit::insert(TextSize::new(0), "x");
870 assert_eq!(db.edit_file(ghost, &edit), Err(UnknownFileId(ghost)));
871 }
872
873 #[test]
874 fn edit_file_preserves_other_files_cache() {
875 use cyrs_syntax::{TextEdit, TextRange, TextSize};
876
877 let mut db = Database::new();
878 let a = db.open_file(
879 Path::new("a.cyp"),
880 "RETURN 1".into(),
881 DialectMode::GqlAligned,
882 );
883 let b = db.open_file(
884 Path::new("b.cyp"),
885 "RETURN 2".into(),
886 DialectMode::GqlAligned,
887 );
888
889 let oa = db.parse_cst(a).unwrap();
890 let ob = db.parse_cst(b).unwrap();
891
892 // Edit file `a`.
893 let edit = TextEdit::replace(TextRange::new(TextSize::new(7), TextSize::new(8)), "99");
894 db.edit_file(a, &edit).unwrap();
895
896 // File b's cache must survive.
897 let ob2 = db.parse_cst(b).unwrap();
898 assert!(
899 Arc::ptr_eq(&ob.0, &ob2.0),
900 "file b cache must survive edit to file a"
901 );
902
903 // File a reflects the edit.
904 let oa2 = db.parse_cst(a).unwrap();
905 assert!(!Arc::ptr_eq(&oa.0, &oa2.0));
906 assert_eq!(oa2.parse().syntax().to_string(), "RETURN 99");
907 }
908
909 #[test]
910 fn remove_file_stale_returns_error() {
911 let mut db = Database::new();
912 let id = db.open_file(
913 Path::new("a.cyp"),
914 "RETURN 1".into(),
915 DialectMode::GqlAligned,
916 );
917
918 // Confirm it's open.
919 assert!(db.is_open(id));
920
921 // Remove it.
922 db.remove_file(id).expect("remove should succeed");
923
924 // All subsequent queries return an error, not a panic.
925 assert!(!db.is_open(id));
926 assert_eq!(db.parse_cst(id), Err(UnknownFileId(id)));
927 assert_eq!(
928 db.update_file(id, "RETURN 2".into()),
929 Err(UnknownFileId(id))
930 );
931 assert_eq!(db.remove_file(id), Err(UnknownFileId(id)));
932 }
933
934 // -----------------------------------------------------------------------
935 // Three files — independent caching
936 // -----------------------------------------------------------------------
937
938 #[test]
939 fn three_files_independent_caching() {
940 let mut db = Database::new();
941 let a = db.open_file(
942 Path::new("a.cyp"),
943 "RETURN 1".into(),
944 DialectMode::GqlAligned,
945 );
946 let b = db.open_file(
947 Path::new("b.cyp"),
948 "RETURN 2".into(),
949 DialectMode::GqlAligned,
950 );
951 let c = db.open_file(
952 Path::new("c.cyp"),
953 "RETURN 3".into(),
954 DialectMode::GqlAligned,
955 );
956
957 // Query each file.
958 let oa = db.parse_cst(a).unwrap();
959 let ob = db.parse_cst(b).unwrap();
960 let oc = db.parse_cst(c).unwrap();
961
962 assert_eq!(oa.parse().syntax().to_string(), "RETURN 1");
963 assert_eq!(ob.parse().syntax().to_string(), "RETURN 2");
964 assert_eq!(oc.parse().syntax().to_string(), "RETURN 3");
965
966 // Mutate only file `b`.
967 db.update_file(b, "RETURN 99".into()).unwrap();
968
969 // `a` and `c` caches are untouched.
970 let oa2 = db.parse_cst(a).unwrap();
971 let oc2 = db.parse_cst(c).unwrap();
972 assert!(
973 Arc::ptr_eq(&oa.0, &oa2.0),
974 "file a cache must survive update to file b"
975 );
976 assert!(
977 Arc::ptr_eq(&oc.0, &oc2.0),
978 "file c cache must survive update to file b"
979 );
980
981 // `b` is invalidated.
982 let ob2 = db.parse_cst(b).unwrap();
983 assert_eq!(ob2.parse().syntax().to_string(), "RETURN 99");
984 assert!(
985 !Arc::ptr_eq(&ob.0, &ob2.0),
986 "file b cache must be invalidated"
987 );
988 }
989
990 // -----------------------------------------------------------------------
991 // Snapshot + thread: one DB + snapshot per request pattern
992 // -----------------------------------------------------------------------
993
994 #[test]
995 fn snapshot_send_concurrent_query() {
996 let mut db = Database::new();
997 let id = db.open_file(
998 Path::new("q.cyp"),
999 "MATCH (n) RETURN n".into(),
1000 DialectMode::GqlAligned,
1001 );
1002
1003 // Warm the main-thread cache.
1004 let main_out = db.parse_cst(id).unwrap();
1005 let main_text = main_out.parse().syntax().to_string();
1006
1007 // Take a snapshot; ship to worker thread.
1008 let snap = db.snapshot();
1009 let worker_text = std::thread::spawn(move || {
1010 snap.parse_cst(id)
1011 .expect("snapshot must contain the file")
1012 .parse()
1013 .syntax()
1014 .to_string()
1015 })
1016 .join()
1017 .expect("worker thread panicked");
1018
1019 assert_eq!(
1020 worker_text, main_text,
1021 "snapshot result must match main-thread result"
1022 );
1023 }
1024
1025 /// Compile-time assertion: `DatabaseSnapshot: Send`.
1026 #[test]
1027 fn database_snapshot_is_send() {
1028 fn require_send<T: Send>(_: T) {}
1029 let mut db = Database::new();
1030 let _id = db.open_file(
1031 Path::new("a.cyp"),
1032 "RETURN 1".into(),
1033 DialectMode::GqlAligned,
1034 );
1035 let snap = db.snapshot();
1036 require_send(snap);
1037 }
1038
1039 // -----------------------------------------------------------------------
1040 // Schema change: sema cache invalidates, parse cache survives
1041 // -----------------------------------------------------------------------
1042
1043 #[test]
1044 fn schema_change_invalidates_sema_not_parse() {
1045 use cyrs_schema::EmptySchema;
1046
1047 let mut db = Database::new();
1048 let id = db.open_file(
1049 Path::new("s.cyp"),
1050 "MATCH (n:Person) RETURN n".into(),
1051 DialectMode::GqlAligned,
1052 );
1053
1054 // Warm the parse cache.
1055 let cst1 = db.parse_cst(id).unwrap();
1056
1057 // Run sema (no schema yet).
1058 let _sema1 = db.sema_diagnostics(id).unwrap();
1059
1060 // Set a schema — bumps WorkspaceInputs revision.
1061 let schema: Arc<dyn SchemaProvider> = Arc::new(EmptySchema);
1062 db.set_schema(Some(schema));
1063
1064 // Parse cache must survive schema change.
1065 let cst2 = db.parse_cst(id).unwrap();
1066 assert!(
1067 Arc::ptr_eq(&cst1.0, &cst2.0),
1068 "parse_cst Arc must survive schema change"
1069 );
1070
1071 // Sema runs without error under the new schema.
1072 let _sema2 = db.sema_diagnostics(id).unwrap();
1073 }
1074
1075 // -----------------------------------------------------------------------
1076 // source_of / path_of helpers
1077 // -----------------------------------------------------------------------
1078
1079 #[test]
1080 fn source_and_path_accessors() {
1081 let mut db = Database::new();
1082 let p = Path::new("myfile.cyp");
1083 let id = db.open_file(p, "RETURN 42".into(), DialectMode::GqlAligned);
1084
1085 assert_eq!(db.source_of(id).unwrap(), "RETURN 42");
1086 assert_eq!(db.path_of(id).unwrap(), p);
1087 }
1088
1089 // -----------------------------------------------------------------------
1090 // DatabaseOptions / with_options — bead cy-31b
1091 // -----------------------------------------------------------------------
1092
1093 /// `Database::with_options` constructs successfully and the resulting DB
1094 /// operates correctly (`parse_lru` = 2 stress test).
1095 #[test]
1096 fn with_options_parse_lru_2() {
1097 use crate::DatabaseOptions;
1098
1099 // parse_lru = 2 means only 2 parse_cst results are kept.
1100 // This test verifies the API compiles and the DB is functional.
1101 let db = Database::with_options(DatabaseOptions {
1102 parse_lru: 2,
1103 sema_lru: 2,
1104 plan_lru: 2,
1105 formatted_lru: 2,
1106 });
1107
1108 // Verify the DB is usable.
1109 assert_eq!(db.files.len(), 0);
1110 }
1111
1112 /// `Database::with_options` with `parse_lru` = 2: open 3 files, query each.
1113 /// The LRU eviction will drop older cached values, so querying is still
1114 /// correct (Salsa recomputes on cache miss) but the cache is bounded.
1115 #[test]
1116 fn with_options_lru_2_three_files() {
1117 use crate::DatabaseOptions;
1118
1119 let mut db = Database::with_options(DatabaseOptions {
1120 parse_lru: 2,
1121 sema_lru: 2,
1122 plan_lru: 2,
1123 ..DatabaseOptions::default()
1124 });
1125
1126 let a = db.open_file(
1127 Path::new("a.cyp"),
1128 "RETURN 1".into(),
1129 DialectMode::GqlAligned,
1130 );
1131 let b = db.open_file(
1132 Path::new("b.cyp"),
1133 "RETURN 2".into(),
1134 DialectMode::GqlAligned,
1135 );
1136 let c = db.open_file(
1137 Path::new("c.cyp"),
1138 "RETURN 3".into(),
1139 DialectMode::GqlAligned,
1140 );
1141
1142 // All three files parse correctly even with a tiny LRU cap.
1143 assert_eq!(
1144 db.parse_cst(a).unwrap().parse().syntax().to_string(),
1145 "RETURN 1"
1146 );
1147 assert_eq!(
1148 db.parse_cst(b).unwrap().parse().syntax().to_string(),
1149 "RETURN 2"
1150 );
1151 assert_eq!(
1152 db.parse_cst(c).unwrap().parse().syntax().to_string(),
1153 "RETURN 3"
1154 );
1155
1156 // Re-query all after the cap has been exceeded — Salsa recomputes on
1157 // cache miss, so results must still be correct.
1158 assert_eq!(
1159 db.parse_cst(a).unwrap().parse().syntax().to_string(),
1160 "RETURN 1"
1161 );
1162 assert_eq!(
1163 db.parse_cst(b).unwrap().parse().syntax().to_string(),
1164 "RETURN 2"
1165 );
1166 assert_eq!(
1167 db.parse_cst(c).unwrap().parse().syntax().to_string(),
1168 "RETURN 3"
1169 );
1170 }
1171
1172 /// `Database::new()` uses default options (`parse_lru` = 256).
1173 #[test]
1174 fn database_new_uses_default_options() {
1175 use crate::DatabaseOptions;
1176 let default_opts = DatabaseOptions::default();
1177 assert_eq!(default_opts.parse_lru, 256);
1178 assert_eq!(default_opts.sema_lru, 256);
1179 assert_eq!(default_opts.plan_lru, 256);
1180 assert_eq!(default_opts.formatted_lru, 256);
1181
1182 // Verify Database::new() still works.
1183 let db = Database::new();
1184 assert_eq!(db.files.len(), 0);
1185 }
1186
1187 // -----------------------------------------------------------------------
1188 // Unknown FileId accessors
1189 // -----------------------------------------------------------------------
1190
1191 #[test]
1192 fn unknown_fileid_source_of() {
1193 let db = Database::new();
1194 let ghost = FileId(999);
1195 assert_eq!(db.source_of(ghost), Err(UnknownFileId(ghost)));
1196 assert_eq!(db.path_of(ghost), Err(UnknownFileId(ghost)));
1197 }
1198}