badness 0.3.0

An LSP, formatter, and linter for LaTeX
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
//! Salsa-backed incremental layer: file text → parse tree.
//!
//! The CST is cached as a `rowan::GreenNode` (Arc-backed, `Send + Sync`) rather
//! than a `SyntaxNode` (which holds non-`Send` cursor state and is neither
//! `Eq` nor `salsa::Update`). Callers materialize a fresh cursor via
//! [`parsed_tree_root`] — a cheap atomic clone — so each consumer gets its own
//! tree without leaking the salsa cell.
//!
//! This is the Phase 3 foundation (TODO.md): the salsa harness only. The
//! per-file semantic-model query, the cross-file firewall queries, and the
//! project graph that the sibling project `arity` layers on top of this same
//! harness arrive with later Phase 3 items, once their consumers (linter, LSP)
//! and the `semantic`/`project` modules exist. Keep this file close to arity's
//! `incremental.rs` so the eventual shared-crate extraction stays a mechanical
//! lift.

use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};

use salsa::Setter;
use smol_str::SmolStr;

use crate::bib::semantic::Model as BibModel;
use crate::bib::syntax::SyntaxNode as BibSyntaxNode;
use crate::file_discovery::file_kind_or_tex;
use crate::parser::parse_with_flavor;
use crate::project::citations::document_cite_names;
use crate::project::labels::{document_label_names, is_document_root};
use crate::project::{
    BibTarget, IncludeEdgeKey, Project, ProjectMember, ResolvedCitations, ResolvedLabels,
    collect_bib_resource_targets, collect_include_edge_keys, resolved_citations, resolved_labels,
};
use crate::semantic::{SemanticModel, SignatureDb, scan_definitions};
use crate::syntax::SyntaxNode;

#[salsa::input]
pub struct SourceFile {
    /// The path this file was tracked under. Set once at creation and never
    /// mutated, so path-keyed queries (which later items will add) don't re-run
    /// on a text edit. In-memory files (see [`IncrementalDatabase::add_file`])
    /// get a unique synthetic path so they never collide.
    #[returns(ref)]
    pub path: PathBuf,
    #[returns(ref)]
    pub text: String,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum QueryKind {
    ParsedDocument,
    /// A file's per-file label/reference model ([`semantic_model`]).
    SemanticModel,
    /// A file's scanned `\newcommand`/`\newenvironment`/xparse signatures
    /// ([`document_signatures`]).
    DocumentSignatures,
    /// A file's range-free inclusion edges ([`include_edges`]).
    IncludeEdges,
    /// A file's sorted, distinct label-name set ([`file_labels`]) — the firewall
    /// the cross-file label resolver consumes.
    FileLabels,
    /// Whether a file is a document root ([`file_is_document_root`]).
    FileIsDocumentRoot,
    /// The cross-file inclusion graph ([`crate::project::project_graph`]); a
    /// project-level query, not keyed on a single file.
    ProjectGraph,
    /// The cross-file label resolution ([`crate::project::resolved_labels`]); a
    /// project-level query, not keyed on a single file.
    ResolvedLabels,
    /// A `.bib` file's parse tree ([`parsed_bib_document`]).
    ParsedBibDocument,
    /// A `.bib` file's per-file entry / cite-key / `@string` model
    /// ([`bib_semantic_model`]).
    BibSemanticModel,
    /// A `.bib` file's sorted, distinct cite-key set ([`file_cite_names`]) — the
    /// firewall the cross-file citation resolver consumes.
    FileCiteNames,
    /// A `.tex` file's bibliography-resource targets + `\nocite{*}` flag
    /// ([`file_cite_facts`]) — the per-file citation firewall.
    FileCiteFacts,
    /// The cross-file citation resolution ([`crate::project::resolved_citations`]);
    /// a project-level query, not keyed on a single file.
    ResolvedCitations,
}

#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct QueryLogEntry {
    pub kind: QueryKind,
    /// The per-file query subject, or `None` for project-level queries (none
    /// exist yet; the field mirrors arity so later items slot in mechanically).
    pub file: Option<SourceFile>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseDiagnosticData {
    pub message: String,
    pub start: usize,
    pub end: usize,
}

/// A cached parse: the green tree plus parse diagnostics, computed once per
/// `(db, file)`.
///
/// The `GreenNode` is not `Eq`/`salsa::Update`, so [`parsed_document`] is
/// `no_eq, unsafe(non_update_types)`: salsa never compares parse outputs and
/// relies purely on input (text) change detection to invalidate. That is sound
/// because the tree is a pure function of the text.
#[derive(Debug, Clone)]
pub struct ParsedDocument {
    pub green: rowan::GreenNode,
    pub diagnostics: Vec<ParseDiagnosticData>,
}

/// A cached `.bib` parse: the green tree plus parse diagnostics. The bib analog
/// of [`ParsedDocument`], `no_eq, unsafe(non_update_types)` for the identical
/// reason — `rowan::GreenNode` is neither `Eq` nor `salsa::Update`, so
/// [`parsed_bib_document`] relies purely on text-input change detection to
/// invalidate.
#[derive(Debug, Clone)]
pub struct ParsedBibDocument {
    pub green: rowan::GreenNode,
    pub diagnostics: Vec<ParseDiagnosticData>,
}

#[salsa::db]
pub trait IncrementalDb: salsa::Database {
    fn record_query(&self, entry: QueryLogEntry);
}

#[salsa::tracked(returns(ref), no_eq, unsafe(non_update_types))]
pub fn parsed_document(db: &dyn IncrementalDb, file: SourceFile) -> ParsedDocument {
    db.record_query(QueryLogEntry {
        kind: QueryKind::ParsedDocument,
        file: Some(file),
    });

    // Parse with the flavor implied by the file's extension: a `.sty`/`.cls` is
    // loaded under an implicit `\makeatletter` (`LatexFlavor::Package`), so `@` is
    // a letter throughout. `file_kind_or_tex` reads only the path name.
    let flavor = file_kind_or_tex(file.path(db)).latex_flavor();
    let parsed = parse_with_flavor(file.text(db).as_str(), flavor);
    let diagnostics = parsed
        .errors
        .into_iter()
        .map(|error| ParseDiagnosticData {
            message: error.message,
            start: error.start,
            end: error.end,
        })
        .collect();

    ParsedDocument {
        green: parsed.green,
        diagnostics,
    }
}

/// The parse diagnostics for `file` (empty when the file parses cleanly).
pub fn parse_diagnostics(db: &dyn IncrementalDb, file: SourceFile) -> &[ParseDiagnosticData] {
    &parsed_document(db, file).diagnostics
}

/// Materialize the cached parse for `file` as a fresh `SyntaxNode` cursor.
pub fn parsed_tree_root(db: &dyn IncrementalDb, file: SourceFile) -> SyntaxNode {
    SyntaxNode::new_root(parsed_document(db, file).green.clone())
}

/// The per-file label/reference model, built on the cached parse tree.
///
/// Unlike [`parsed_document`], this query is **not** `no_eq`: [`SemanticModel`]
/// *is* `Eq`, so salsa compares outputs and **backdates** when an edit leaves
/// the model unchanged (e.g. a prose edit that touches no `\label`/`\ref`),
/// keeping any downstream query from re-running. (`parsed_document` must be
/// `no_eq` only because its `GreenNode` is neither `Eq` nor `salsa::Update`, so
/// salsa cannot compare parses and falls back to text-input change detection.)
/// This is the same firewall [`include_edges`] uses; the future cross-file label
/// resolver is its first consumer.
#[salsa::tracked(returns(ref))]
pub fn semantic_model(db: &dyn IncrementalDb, file: SourceFile) -> SemanticModel {
    db.record_query(QueryLogEntry {
        kind: QueryKind::SemanticModel,
        file: Some(file),
    });
    SemanticModel::build(&parsed_tree_root(db, file))
}

/// The file's scanned user-definition signatures — `\newcommand`,
/// `\newenvironment`, and the xparse `\NewDocument…` family
/// ([`crate::semantic::scan_definitions`]) — built on the cached parse tree.
///
/// Like [`semantic_model`] (and unlike [`parsed_document`]) this is **not**
/// `no_eq`: [`SignatureDb`] is `Eq`, so salsa backdates when an edit defines no
/// new command/environment (e.g. a prose or `\ref` edit), keeping completion's
/// consumer from re-running. Its first consumer is the language server's
/// completion request, which unions these scanned names with the built-in DB.
#[salsa::tracked(returns(ref))]
pub fn document_signatures(db: &dyn IncrementalDb, file: SourceFile) -> SignatureDb {
    db.record_query(QueryLogEntry {
        kind: QueryKind::DocumentSignatures,
        file: Some(file),
    });
    scan_definitions(&parsed_tree_root(db, file))
}

/// The file's inclusion edges, range-free
/// ([`crate::project::collect_include_edge_keys`]), as a tracked query. Resolves
/// relative targets against the file's own directory (`path.parent()`); the path
/// is an input field set once, so this re-runs only on a text edit and backdates
/// when the edges are unchanged — the firewall that keeps a body edit from
/// rebuilding the cross-file [`crate::project::project_graph`].
#[salsa::tracked(returns(ref))]
pub fn include_edges(db: &dyn IncrementalDb, file: SourceFile) -> Vec<IncludeEdgeKey> {
    db.record_query(QueryLogEntry {
        kind: QueryKind::IncludeEdges,
        file: Some(file),
    });
    let root = parsed_tree_root(db, file);
    collect_include_edge_keys(&root, file.path(db).parent())
}

/// The file's distinct `\label` names, sorted — a range-free, ref-free
/// projection of [`semantic_model`].
///
/// This is the per-file firewall the cross-file
/// [`crate::project::resolved_labels`] resolver consumes (the LaTeX analog of
/// arity's `file_exports`). Stripping ranges and refs means a prose edit, or a
/// `\ref` edit, or a body edit that shifts a `\label`'s offset, leaves this
/// `Vec` *equal* — salsa backdates and the project-level union is not rebuilt.
/// Unlike [`project_graph`](crate::project::project_graph) it is **not** `no_eq`:
/// `Vec<SmolStr>` is `Eq`, which is exactly what makes the firewall hold (same
/// reasoning as [`semantic_model`]).
#[salsa::tracked(returns(ref))]
pub fn file_labels(db: &dyn IncrementalDb, file: SourceFile) -> Vec<SmolStr> {
    db.record_query(QueryLogEntry {
        kind: QueryKind::FileLabels,
        file: Some(file),
    });
    document_label_names(semantic_model(db, file))
}

/// Whether `file` looks like a document *root* — it carries a `\documentclass`
/// or a `\begin{document}`. The cross-file `undefined-ref` lint only fires
/// inside a namespace that contains a root, so a bare chapter fragment opened
/// alone (whose labels live in the main document) is never flagged.
///
/// A cheap `bool` projection of the parse tree, `Eq` for the same firewall
/// reason as [`file_labels`]: it changes only when a `\documentclass` /
/// `\begin{document}` is added or removed, so ordinary edits backdate.
#[salsa::tracked(returns(ref))]
pub fn file_is_document_root(db: &dyn IncrementalDb, file: SourceFile) -> bool {
    db.record_query(QueryLogEntry {
        kind: QueryKind::FileIsDocumentRoot,
        file: Some(file),
    });
    is_document_root(&parsed_tree_root(db, file))
}

/// A `.bib` file's cached parse: the green tree plus parse diagnostics. The bib
/// analog of [`parsed_document`].
///
/// `no_eq, unsafe(non_update_types)` for the same reason — `GreenNode` is neither
/// `Eq` nor `salsa::Update`, so salsa never compares parses and relies on
/// text-input change detection. The same [`SourceFile`] input feeds both this and
/// [`parsed_document`]: queries dispatch on the function, not the path, so a
/// buffer's `.bib`-ness is decided by which query the caller runs, not by the
/// input's synthetic extension.
#[salsa::tracked(returns(ref), no_eq, unsafe(non_update_types))]
pub fn parsed_bib_document(db: &dyn IncrementalDb, file: SourceFile) -> ParsedBibDocument {
    db.record_query(QueryLogEntry {
        kind: QueryKind::ParsedBibDocument,
        file: Some(file),
    });

    let parsed = crate::bib::parse(file.text(db).as_str());
    let diagnostics = parsed
        .errors
        .into_iter()
        .map(|error| ParseDiagnosticData {
            message: error.message,
            start: error.start,
            end: error.end,
        })
        .collect();

    ParsedBibDocument {
        green: parsed.green,
        diagnostics,
    }
}

/// The `.bib` parse diagnostics for `file` (empty when it parses cleanly).
pub fn bib_parse_diagnostics(db: &dyn IncrementalDb, file: SourceFile) -> &[ParseDiagnosticData] {
    &parsed_bib_document(db, file).diagnostics
}

/// Materialize the cached `.bib` parse for `file` as a fresh bib `SyntaxNode`.
pub fn parsed_bib_tree_root(db: &dyn IncrementalDb, file: SourceFile) -> BibSyntaxNode {
    BibSyntaxNode::new_root(parsed_bib_document(db, file).green.clone())
}

/// The per-file bib model (entries, `@string` defs/uses), built on the cached
/// `.bib` parse.
///
/// Like [`semantic_model`] and unlike [`parsed_bib_document`] this is **not**
/// `no_eq`: [`crate::bib::semantic::Model`] is `Eq`, so salsa backdates when an
/// edit leaves the model unchanged.
#[salsa::tracked(returns(ref))]
pub fn bib_semantic_model(db: &dyn IncrementalDb, file: SourceFile) -> BibModel {
    db.record_query(QueryLogEntry {
        kind: QueryKind::BibSemanticModel,
        file: Some(file),
    });
    BibModel::build(&parsed_bib_tree_root(db, file))
}

/// A `.bib` file's distinct cite keys, sorted — a range-free projection of
/// [`bib_semantic_model`].
///
/// The per-file firewall the cross-file [`crate::project::resolved_citations`]
/// resolver consumes (the bib analog of [`file_labels`]). Stripping ranges means
/// an edit that shifts a `@entry`'s offset, or touches a field but not a key,
/// leaves this `Vec` *equal* — salsa backdates and the project-level union is not
/// rebuilt. Like [`file_labels`] it is **not** `no_eq`: `Vec<SmolStr>` is `Eq`,
/// which is what makes the firewall hold.
#[salsa::tracked(returns(ref))]
pub fn file_cite_names(db: &dyn IncrementalDb, file: SourceFile) -> Vec<SmolStr> {
    db.record_query(QueryLogEntry {
        kind: QueryKind::FileCiteNames,
        file: Some(file),
    });
    document_cite_names(bib_semantic_model(db, file))
}

/// A `.tex` file's citation facts: its bibliography-resource targets
/// (`\bibliography`/`\addbibresource`) and whether it carries a `\nocite{*}`
/// wildcard. The per-file firewall feeding [`crate::project::resolved_citations`]
/// on the `.tex` side (the document-root flag reuses [`file_is_document_root`]).
///
/// `Eq` for the same firewall reason as [`file_labels`]: a prose or `\cite` edit
/// changes neither the resource targets nor the wildcard, so it backdates and the
/// cross-file resolution memo holds. Resolves relative targets against the file's
/// own directory (`path.parent()`), like [`include_edges`].
#[derive(Debug, Clone, PartialEq, Eq, salsa::Update)]
pub struct FileCiteFacts {
    pub bib_targets: Vec<BibTarget>,
    pub nocite_all: bool,
}

#[salsa::tracked(returns(ref))]
pub fn file_cite_facts(db: &dyn IncrementalDb, file: SourceFile) -> FileCiteFacts {
    db.record_query(QueryLogEntry {
        kind: QueryKind::FileCiteFacts,
        file: Some(file),
    });
    let root = parsed_tree_root(db, file);
    FileCiteFacts {
        bib_targets: collect_bib_resource_targets(&root, file.path(db).parent()),
        nocite_all: semantic_model(db, file).has_wildcard_nocite(),
    }
}

#[salsa::db]
pub struct IncrementalDatabase {
    storage: salsa::Storage<Self>,
    query_log: Arc<Mutex<Vec<QueryLogEntry>>>,
    /// Path → input mapping, so repeated edits to the same path reuse the same
    /// `SourceFile` input (and thus its cached queries) instead of creating a
    /// fresh one each time. Seeds the cross-file project graph (later items).
    files: Arc<Mutex<HashMap<PathBuf, SourceFile>>>,
}

impl Default for IncrementalDatabase {
    fn default() -> Self {
        Self {
            storage: salsa::Storage::new(None),
            query_log: Arc::new(Mutex::new(Vec::new())),
            files: Arc::new(Mutex::new(HashMap::new())),
        }
    }
}

/// Cloning yields a second handle onto the *same* salsa storage (a cheap
/// `Arc`-bump of the shared `Zalsa`, plus the shared path→input map and query
/// log). This is how the language server runs read-only queries off the lint
/// thread: the owner mints a short-lived clone, hands it to a worker, and the
/// clone is dropped promptly. Salsa is single-writer — a clone outstanding when
/// the owner performs a write blocks that write until the clone drops (and trips
/// `salsa::Cancelled` in any read still in flight), so clones must never be held
/// across a write or parked long-term.
impl Clone for IncrementalDatabase {
    fn clone(&self) -> Self {
        Self {
            storage: self.storage.clone(),
            query_log: Arc::clone(&self.query_log),
            files: Arc::clone(&self.files),
        }
    }
}

impl std::fmt::Debug for IncrementalDatabase {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("IncrementalDatabase")
            .finish_non_exhaustive()
    }
}

/// Lexically normalize `path` for use as a deduplication key: absolutize it
/// (against the current directory, without touching the filesystem) and collapse
/// `.` / `..` segments. Purely textual — no symlink resolution, no existence
/// check — so it is stable for not-yet-saved buffers and never blocks on I/O.
/// `a.tex`, `./a.tex`, and a sibling resolved as `dir/../a.tex` all map to one
/// key, so the language server's `\input`-resolved siblings collapse onto the
/// same input as the buffer the editor opened. Copied from arity's `normalize_path`.
pub(crate) fn normalize_path(path: &Path) -> PathBuf {
    use std::path::Component;
    let absolute = std::path::absolute(path).unwrap_or_else(|_| path.to_path_buf());
    let mut out = PathBuf::new();
    for component in absolute.components() {
        match component {
            Component::CurDir => {}
            Component::ParentDir
                if matches!(out.components().next_back(), Some(Component::Normal(_))) =>
            {
                out.pop();
            }
            other => out.push(other.as_os_str()),
        }
    }
    out
}

/// Monotonic counter minting unique synthetic paths for in-memory documents, so
/// two of them never alias in a path-keyed query. Unique-within-process is
/// sufficient; this sidesteps a `uuid` dependency.
static MEM_FILE_COUNTER: AtomicU64 = AtomicU64::new(0);

impl IncrementalDatabase {
    /// Track an in-memory document with no on-disk path. Each call mints a
    /// unique synthetic path. Used by tests and one-shot single-file checks; the
    /// LSP/CLI use [`upsert_file`](Self::upsert_file) with the real path.
    pub fn add_file(&self, text: impl Into<String>) -> SourceFile {
        let n = MEM_FILE_COUNTER.fetch_add(1, Ordering::Relaxed);
        let path = PathBuf::from(format!("<mem>/{n}.tex"));
        SourceFile::new(self, path, text.into())
    }

    pub fn set_file_text(&mut self, file: SourceFile, text: impl Into<String>) {
        file.set_text(self).to(text.into());
    }

    /// Insert or update the input for `path`, reusing the existing `SourceFile`
    /// when one is already tracked. The hot path for editor buffers: a keystroke
    /// updates the text of an existing input so unchanged downstream queries stay
    /// cached.
    pub fn upsert_file(&mut self, path: &Path, text: String) -> SourceFile {
        let key = normalize_path(path);
        let existing = self
            .files
            .lock()
            .expect("file cache mutex poisoned")
            .get(&key)
            .copied();
        match existing {
            Some(file) => {
                // Skip the write when the text is unchanged: setting an input
                // unconditionally bumps the revision and would re-run every
                // downstream query (a sibling file re-read on each keystroke).
                if file.text(self) != &text {
                    file.set_text(self).to(text);
                }
                file
            }
            None => {
                // Store the normalized key as the input's path so `\input`/bib
                // resolution (which joins onto `file.path(db).parent()`) lands in
                // the same normalized space as the member set.
                let file = SourceFile::new(self, key.clone(), text);
                self.files
                    .lock()
                    .expect("file cache mutex poisoned")
                    .insert(key, file);
                file
            }
        }
    }

    /// Every currently-tracked `(normalized path, input)` pair, sorted by path —
    /// the membership snapshot the language server interns a `Project` from.
    pub fn tracked_files(&self) -> Vec<(PathBuf, SourceFile)> {
        let mut files: Vec<(PathBuf, SourceFile)> = self
            .files
            .lock()
            .expect("file cache mutex poisoned")
            .iter()
            .map(|(path, &file)| (path.clone(), file))
            .collect();
        files.sort_by(|a, b| a.0.cmp(&b.0));
        files
    }

    /// The `SourceFile` input currently tracked for `path`, if any. Read-only:
    /// unlike [`upsert_file`](Self::upsert_file) it never inserts, so it is safe
    /// to call on a shared clone (the language server's read path uses it to find
    /// the cached parse for the buffer under the cursor).
    pub fn lookup_file(&self, path: &Path) -> Option<SourceFile> {
        self.files
            .lock()
            .expect("file cache mutex poisoned")
            .get(&normalize_path(path))
            .copied()
    }

    /// Stop tracking `path`, returning the `SourceFile` it was mapped to (if
    /// any). Best-effort eviction for the language server's `didClose`: salsa has
    /// no true input delete, so the input cell and its query memos linger in
    /// storage as unreachable garbage; dropping the map entry is what releases the
    /// strong handle and lets a later `didOpen` mint a *fresh* input rather than
    /// reusing the closed one.
    ///
    /// Caveat: a closed file that another open document `\input`s is no longer
    /// resolvable by path until it is reopened. That is acceptable today — there
    /// is no cross-file label resolver yet (see TODO.md), and [`include_edges`]
    /// re-resolves targets from disk.
    pub fn remove_file(&mut self, path: &Path) -> Option<SourceFile> {
        self.files
            .lock()
            .expect("file cache mutex poisoned")
            .remove(&normalize_path(path))
    }

    /// The text currently tracked for `file`.
    pub fn file_text(&self, file: SourceFile) -> &str {
        file.text(self)
    }

    /// The path `file` is tracked under.
    pub fn file_path(&self, file: SourceFile) -> &Path {
        file.path(self)
    }

    /// Parse diagnostics for `file` (empty when it parses cleanly).
    pub fn parse_diagnostics(&self, file: SourceFile) -> &[ParseDiagnosticData] {
        parse_diagnostics(self, file)
    }

    /// A fresh `SyntaxNode` over the cached parse tree.
    pub fn parsed_tree(&self, file: SourceFile) -> SyntaxNode {
        parsed_tree_root(self, file)
    }

    /// The file's range-free inclusion edges.
    pub fn include_edges(&self, file: SourceFile) -> &[IncludeEdgeKey] {
        include_edges(self, file)
    }

    /// The file's per-file label/reference model.
    pub fn semantic_model(&self, file: SourceFile) -> &SemanticModel {
        semantic_model(self, file)
    }

    /// The file's scanned user-definition signatures.
    pub fn document_signatures(&self, file: SourceFile) -> &SignatureDb {
        document_signatures(self, file)
    }

    /// The file's distinct, sorted `\label` names (the firewall feeding the
    /// cross-file resolver).
    pub fn file_labels(&self, file: SourceFile) -> &[SmolStr] {
        file_labels(self, file)
    }

    /// Whether `file` carries a `\documentclass` / `\begin{document}`.
    pub fn file_is_document_root(&self, file: SourceFile) -> bool {
        *file_is_document_root(self, file)
    }

    /// `.bib` parse diagnostics for `file` (empty when it parses cleanly).
    pub fn bib_parse_diagnostics(&self, file: SourceFile) -> &[ParseDiagnosticData] {
        bib_parse_diagnostics(self, file)
    }

    /// A fresh bib `SyntaxNode` over the cached `.bib` parse tree.
    pub fn parsed_bib_tree(&self, file: SourceFile) -> BibSyntaxNode {
        parsed_bib_tree_root(self, file)
    }

    /// The file's per-file bib model (entries, `@string` defs/uses).
    pub fn bib_semantic_model(&self, file: SourceFile) -> &BibModel {
        bib_semantic_model(self, file)
    }

    pub fn clear_query_log(&self) {
        self.query_log
            .lock()
            .expect("query log mutex poisoned")
            .clear();
    }

    pub fn query_log(&self) -> Vec<QueryLogEntry> {
        self.query_log
            .lock()
            .expect("query log mutex poisoned")
            .clone()
    }

    /// Mint a read-only [`Analysis`] snapshot: a short-lived db clone wrapped so
    /// callers can only *read*. Drop it promptly — an outstanding clone blocks
    /// the next write (salsa is single-writer; see the [`Clone`] impl).
    pub fn snapshot(&self) -> Analysis {
        Analysis(self.clone())
    }
}

/// A read-only handle onto the incremental database, à la rust-analyzer's
/// `Analysis` (vs. its writer `AnalysisHost`). Wraps a short-lived clone of the
/// worker thread's [`IncrementalDatabase`] and exposes *only* read queries, so a
/// read job cannot call `upsert_file` / salsa setters — the single-writer
/// invariant is encoded in the type system rather than left to convention.
///
/// Handed to the language server's read jobs (formatting, the parse-diagnostics
/// read-phase); the `&mut`-capable [`IncrementalDatabase`] stays private to the
/// worker thread.
pub struct Analysis(IncrementalDatabase);

impl Analysis {
    /// The `SourceFile` input currently tracked for `path`, if any.
    pub fn lookup_file(&self, path: &Path) -> Option<SourceFile> {
        self.0.lookup_file(path)
    }

    /// The text currently tracked for `file`.
    pub fn file_text(&self, file: SourceFile) -> &str {
        self.0.file_text(file)
    }

    /// The normalized path `file` is tracked under (its cross-file identity).
    pub fn file_path(&self, file: SourceFile) -> &Path {
        self.0.file_path(file)
    }

    /// Every currently-tracked `(normalized path, input)` pair, sorted by path.
    pub fn tracked_files(&self) -> Vec<(PathBuf, SourceFile)> {
        self.0.tracked_files()
    }

    /// Parse diagnostics for `file` (empty when it parses cleanly).
    pub fn parse_diagnostics(&self, file: SourceFile) -> &[ParseDiagnosticData] {
        self.0.parse_diagnostics(file)
    }

    /// A fresh `SyntaxNode` over the cached parse tree.
    pub fn parsed_tree(&self, file: SourceFile) -> SyntaxNode {
        self.0.parsed_tree(file)
    }

    /// The file's per-file label/reference model (for lint rules).
    pub fn semantic_model(&self, file: SourceFile) -> &SemanticModel {
        self.0.semantic_model(file)
    }

    /// The file's scanned user-definition signatures (for completion).
    pub fn document_signatures(&self, file: SourceFile) -> &SignatureDb {
        self.0.document_signatures(file)
    }

    /// `.bib` parse diagnostics for `file` (empty when it parses cleanly).
    pub fn bib_parse_diagnostics(&self, file: SourceFile) -> &[ParseDiagnosticData] {
        self.0.bib_parse_diagnostics(file)
    }

    /// A fresh bib `SyntaxNode` over the cached `.bib` parse tree.
    pub fn parsed_bib_tree(&self, file: SourceFile) -> BibSyntaxNode {
        self.0.parsed_bib_tree(file)
    }

    /// The file's per-file bib model (entries, `@string` defs/uses).
    pub fn bib_semantic_model(&self, file: SourceFile) -> &BibModel {
        self.0.bib_semantic_model(file)
    }

    /// Intern `members` as a `Project` against this snapshot and resolve its
    /// cross-file label and citation models (the inputs the cross-file lint rules
    /// consume). The returned references borrow the snapshot's salsa storage, so
    /// they live as long as this `Analysis`. Interning takes `&db` and is safe on a
    /// read snapshot.
    pub fn resolve_project(
        &self,
        members: Vec<ProjectMember>,
    ) -> (&ResolvedLabels, &ResolvedCitations) {
        let project = Project::new(&self.0, members);
        (
            resolved_labels(&self.0, project),
            resolved_citations(&self.0, project),
        )
    }
}

#[salsa::db]
impl salsa::Database for IncrementalDatabase {}

#[salsa::db]
impl IncrementalDb for IncrementalDatabase {
    fn record_query(&self, entry: QueryLogEntry) {
        self.query_log
            .lock()
            .expect("query log mutex poisoned")
            .push(entry);
    }
}