badness 0.2.0

An LSP, formatter, and linter for LaTeX
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
//! The cross-file inclusion graph: which files pull in which, assembled from the
//! per-file [`crate::incremental::include_edges`] firewall and wrapped as a
//! tracked salsa query so a body edit doesn't rebuild the whole graph.
//!
//! This is the LaTeX analog of arity's `project/{scope,graph}.rs`. The layering,
//! from the per-file firewall up:
//!
//! - [`crate::incremental::include_edges`] — a per-file projection (range-free
//!   [`IncludeEdgeKey`]s) that stays *equal* across a body edit (salsa backdates).
//! - [`project_graph`] — assembles those into the cross-file [`IncludeGraph`],
//!   keyed on the interned [`Project`] membership snapshot, so an unchanged
//!   project + backdated per-file facts means its memo is reused.
//!
//! [`IncludeGraph::build`] is the **pure** algorithm (no salsa, no disk); the
//! salsa query is a thin wrapper. The pure layer is where the future consumers
//! (label/ref resolution, cross-file `\newcommand` scope) will plug in — like
//! arity, the graph lands "harness + graph only," directly testable, awaiting a
//! consumer to designate the document root.

use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};

use crate::incremental::{IncrementalDb, QueryKind, QueryLogEntry, SourceFile, include_edges};
use crate::project::include::{IncludeEdgeKey, IncludeKind, IncludeTarget};

/// One file's contribution to the inclusion graph: its path and the range-free
/// inclusion edges it declares.
#[derive(Debug, Clone)]
pub struct FileFacts {
    pub path: PathBuf,
    pub include_edges: Vec<IncludeEdgeKey>,
}

/// A resolved inclusion edge: `from` includes `to` (a path within the analyzed
/// member set) via `kind`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ResolvedInclude {
    pub from: PathBuf,
    pub to: PathBuf,
    pub kind: IncludeKind,
}

/// An inclusion edge that could not be resolved to an analyzed member: a dynamic
/// target, or a literal path to a file outside the set we were given. Callers
/// must stay conservative about such files (their contents are opaque).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UnresolvedInclude {
    pub from: PathBuf,
    pub target: IncludeTarget,
    pub kind: IncludeKind,
}

/// The inclusion graph over a set of files.
///
/// Holds `HashMap`s, so it is neither `Eq` nor `salsa::Update`; [`project_graph`]
/// is therefore `no_eq` (see its doc). Built by [`IncludeGraph::build`].
#[derive(Debug, Default)]
pub struct IncludeGraph {
    /// Per file: resolved outgoing edges, in source order.
    edges: HashMap<PathBuf, Vec<ResolvedInclude>>,
    /// Reverse map: included file → the files that include it.
    included_by: HashMap<PathBuf, Vec<PathBuf>>,
    /// Edges that resolve to nothing in the analyzed set (dynamic or external).
    unresolved: Vec<UnresolvedInclude>,
    /// Files reachable from the document root via resolved edges (root inclusive).
    /// Empty when [`build`](Self::build) was given no root — the salsa wrapper
    /// passes `None`, deferring "which file is the main document" to a consumer.
    reachable: HashSet<PathBuf>,
    /// Detected inclusion cycles (each a list of paths forming the cycle).
    /// `\input`/`\include` recursion is illegal in TeX; we expose cycles for a
    /// later linter rather than diagnosing here.
    cycles: Vec<Vec<PathBuf>>,
}

impl IncludeGraph {
    /// Assemble the inclusion graph for `files`. `root`, when given, seeds the
    /// reachability set (the main document and everything it transitively
    /// includes). Pure: resolves edge targets against the member set by path and
    /// never touches the disk.
    pub fn build(files: &[FileFacts], root: Option<&Path>) -> Self {
        let members: HashSet<&Path> = files.iter().map(|f| f.path.as_path()).collect();

        let mut edges: HashMap<PathBuf, Vec<ResolvedInclude>> = HashMap::new();
        let mut included_by: HashMap<PathBuf, Vec<PathBuf>> = HashMap::new();
        let mut unresolved: Vec<UnresolvedInclude> = Vec::new();

        for file in files {
            let mut outgoing = Vec::new();
            for edge in &file.include_edges {
                match &edge.target {
                    IncludeTarget::Path(to) if members.contains(to.as_path()) => {
                        outgoing.push(ResolvedInclude {
                            from: file.path.clone(),
                            to: to.clone(),
                            kind: edge.kind,
                        });
                        included_by
                            .entry(to.clone())
                            .or_default()
                            .push(file.path.clone());
                    }
                    // A resolved path to a file we didn't analyze is just as
                    // opaque as a dynamic target.
                    _ => unresolved.push(UnresolvedInclude {
                        from: file.path.clone(),
                        target: edge.target.clone(),
                        kind: edge.kind,
                    }),
                }
            }
            edges.insert(file.path.clone(), outgoing);
        }

        let reachable = match root {
            Some(root) => reachable_from(root, &edges),
            None => HashSet::new(),
        };
        let cycles = detect_cycles(files, &edges);

        Self {
            edges,
            included_by,
            unresolved,
            reachable,
            cycles,
        }
    }

    /// Resolved outgoing edges of `file`, in source order.
    pub fn outgoing(&self, file: &Path) -> &[ResolvedInclude] {
        self.edges.get(file).map_or(&[], Vec::as_slice)
    }

    /// Files that include `file`.
    pub fn included_by(&self, file: &Path) -> &[PathBuf] {
        self.included_by.get(file).map_or(&[], Vec::as_slice)
    }

    /// Whether `file` is reachable from the document root (always `false` when
    /// the graph was built without a root).
    pub fn is_reachable(&self, file: &Path) -> bool {
        self.reachable.contains(file)
    }

    /// Edges that resolve to nothing in the analyzed set.
    pub fn unresolved(&self) -> &[UnresolvedInclude] {
        &self.unresolved
    }

    /// Detected inclusion cycles.
    pub fn cycles(&self) -> &[Vec<PathBuf>] {
        &self.cycles
    }
}

/// The set of paths reachable from `root` (inclusive) over resolved edges.
fn reachable_from(root: &Path, edges: &HashMap<PathBuf, Vec<ResolvedInclude>>) -> HashSet<PathBuf> {
    let mut seen: HashSet<PathBuf> = HashSet::new();
    let mut stack = vec![root.to_path_buf()];
    while let Some(path) = stack.pop() {
        if !seen.insert(path.clone()) {
            continue;
        }
        for edge in edges.get(&path).map_or(&[][..], Vec::as_slice) {
            stack.push(edge.to.clone());
        }
    }
    seen
}

/// Find inclusion cycles via DFS over the resolved-edge digraph. Each cycle is
/// returned once, rotated to start at its lexicographically smallest path so
/// equivalent rotations dedupe.
fn detect_cycles(
    files: &[FileFacts],
    edges: &HashMap<PathBuf, Vec<ResolvedInclude>>,
) -> Vec<Vec<PathBuf>> {
    let mut on_stack: HashSet<PathBuf> = HashSet::new();
    let mut visited: HashSet<PathBuf> = HashSet::new();
    let mut path: Vec<PathBuf> = Vec::new();
    let mut found: HashSet<Vec<PathBuf>> = HashSet::new();

    // Deterministic root order: the members as given (callers sort them).
    for file in files {
        dfs_cycles(
            &file.path,
            edges,
            &mut on_stack,
            &mut visited,
            &mut path,
            &mut found,
        );
    }

    found.into_iter().collect()
}

fn dfs_cycles(
    node: &Path,
    edges: &HashMap<PathBuf, Vec<ResolvedInclude>>,
    on_stack: &mut HashSet<PathBuf>,
    visited: &mut HashSet<PathBuf>,
    path: &mut Vec<PathBuf>,
    found: &mut HashSet<Vec<PathBuf>>,
) {
    if on_stack.contains(node) {
        // Back-edge: the cycle is the path slice from this node's first
        // appearance to the current end.
        if let Some(start) = path.iter().position(|p| p == node) {
            found.insert(normalize_cycle(&path[start..]));
        }
        return;
    }
    if !visited.insert(node.to_path_buf()) {
        return;
    }

    on_stack.insert(node.to_path_buf());
    path.push(node.to_path_buf());
    for edge in edges.get(node).map_or(&[][..], Vec::as_slice) {
        dfs_cycles(&edge.to, edges, on_stack, visited, path, found);
    }
    path.pop();
    on_stack.remove(node);
}

/// Rotate a cycle so it starts at its smallest path, giving every rotation of
/// the same cycle one canonical form.
fn normalize_cycle(cycle: &[PathBuf]) -> Vec<PathBuf> {
    let Some(min_at) = (0..cycle.len()).min_by_key(|&i| &cycle[i]) else {
        return Vec::new();
    };
    cycle[min_at..]
        .iter()
        .chain(&cycle[..min_at])
        .cloned()
        .collect()
}

/// One member of a project: its tracked input and on-disk path. Plain-derived
/// (no `salsa::Update`) so it can key the interned [`Project`]. No `Debug`:
/// `SourceFile` is a salsa input id without a standalone `Debug` impl.
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct ProjectMember {
    pub file: SourceFile,
    pub path: PathBuf,
}

/// A project as an interned membership snapshot. Interning dedups by value, so
/// an unchanged membership yields the same id across runs (a body edit doesn't
/// change the set) and the [`project_graph`] memo survives. Callers must sort
/// `members` for a stable, dedup-friendly key.
#[salsa::interned]
pub struct Project<'db> {
    #[returns(ref)]
    pub members: Vec<ProjectMember>,
}

/// The inclusion graph for `project`, built from the per-file firewall query.
///
/// `no_eq` because its output ([`IncludeGraph`]) holds `HashMap`s that aren't
/// `salsa::Update`/`Eq`-comparable here; `unsafe(non_update_types)` asserts it
/// carries no salsa references (the graph is a pure function of the interned
/// membership plus the backdated per-file edges). This costs nothing for the
/// firewall: a body edit leaves the per-file inputs backdated, so this query
/// simply isn't re-executed.
///
/// The root is passed as `None` — no consumer designates the main document yet,
/// so reachability is left to a future caller of [`IncludeGraph::build`]. Edges,
/// reverse map, unresolved targets, and cycles are all order-independent and
/// populated regardless.
#[salsa::tracked(returns(ref), no_eq, unsafe(non_update_types))]
pub fn project_graph<'db>(db: &'db dyn IncrementalDb, project: Project<'db>) -> IncludeGraph {
    db.record_query(QueryLogEntry {
        kind: QueryKind::ProjectGraph,
        file: None,
    });

    let facts: Vec<FileFacts> = project
        .members(db)
        .iter()
        .map(|member| FileFacts {
            path: member.path.clone(),
            include_edges: include_edges(db, member.file).clone(),
        })
        .collect();

    IncludeGraph::build(&facts, None)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn facts(path: &str, edges: &[(IncludeKind, &str)]) -> FileFacts {
        FileFacts {
            path: PathBuf::from(path),
            include_edges: edges
                .iter()
                .map(|(kind, target)| IncludeEdgeKey {
                    kind: *kind,
                    target: IncludeTarget::Path(PathBuf::from(target)),
                })
                .collect(),
        }
    }

    #[test]
    fn resolves_edges_within_the_member_set() {
        let files = vec![
            facts("/p/main.tex", &[(IncludeKind::Input, "/p/a.tex")]),
            facts("/p/a.tex", &[]),
        ];
        let g = IncludeGraph::build(&files, None);
        assert_eq!(
            g.outgoing(Path::new("/p/main.tex")),
            &[ResolvedInclude {
                from: PathBuf::from("/p/main.tex"),
                to: PathBuf::from("/p/a.tex"),
                kind: IncludeKind::Input,
            }]
        );
        assert_eq!(
            g.included_by(Path::new("/p/a.tex")),
            &[PathBuf::from("/p/main.tex")]
        );
        assert!(g.unresolved().is_empty());
    }

    #[test]
    fn target_outside_member_set_is_unresolved() {
        let files = vec![facts(
            "/p/main.tex",
            &[(IncludeKind::Input, "/p/missing.tex")],
        )];
        let g = IncludeGraph::build(&files, None);
        assert!(g.outgoing(Path::new("/p/main.tex")).is_empty());
        assert_eq!(g.unresolved().len(), 1);
        assert_eq!(g.unresolved()[0].from, PathBuf::from("/p/main.tex"));
    }

    #[test]
    fn dynamic_target_is_unresolved() {
        let files = vec![FileFacts {
            path: PathBuf::from("/p/main.tex"),
            include_edges: vec![IncludeEdgeKey {
                kind: IncludeKind::Input,
                target: IncludeTarget::Dynamic,
            }],
        }];
        let g = IncludeGraph::build(&files, None);
        assert_eq!(g.unresolved().len(), 1);
        assert_eq!(g.unresolved()[0].target, IncludeTarget::Dynamic);
    }

    #[test]
    fn reachability_follows_transitive_edges_from_root() {
        let files = vec![
            facts("/p/main.tex", &[(IncludeKind::Input, "/p/a.tex")]),
            facts("/p/a.tex", &[(IncludeKind::Input, "/p/b.tex")]),
            facts("/p/b.tex", &[]),
            facts("/p/orphan.tex", &[]),
        ];
        let g = IncludeGraph::build(&files, Some(Path::new("/p/main.tex")));
        assert!(g.is_reachable(Path::new("/p/main.tex")));
        assert!(g.is_reachable(Path::new("/p/a.tex")));
        assert!(g.is_reachable(Path::new("/p/b.tex")));
        assert!(!g.is_reachable(Path::new("/p/orphan.tex")));
    }

    #[test]
    fn no_root_means_nothing_reachable() {
        let files = vec![facts("/p/main.tex", &[])];
        let g = IncludeGraph::build(&files, None);
        assert!(!g.is_reachable(Path::new("/p/main.tex")));
    }

    #[test]
    fn detects_a_cycle() {
        let files = vec![
            facts("/p/a.tex", &[(IncludeKind::Input, "/p/b.tex")]),
            facts("/p/b.tex", &[(IncludeKind::Input, "/p/a.tex")]),
        ];
        let g = IncludeGraph::build(&files, None);
        assert_eq!(g.cycles().len(), 1);
        assert_eq!(
            g.cycles()[0],
            vec![PathBuf::from("/p/a.tex"), PathBuf::from("/p/b.tex")]
        );
    }

    #[test]
    fn self_inclusion_is_a_cycle() {
        let files = vec![facts("/p/a.tex", &[(IncludeKind::Input, "/p/a.tex")])];
        let g = IncludeGraph::build(&files, None);
        assert_eq!(g.cycles(), &[vec![PathBuf::from("/p/a.tex")]]);
    }

    #[test]
    fn acyclic_graph_has_no_cycles() {
        let files = vec![
            facts("/p/main.tex", &[(IncludeKind::Input, "/p/a.tex")]),
            facts("/p/a.tex", &[]),
        ];
        let g = IncludeGraph::build(&files, None);
        assert!(g.cycles().is_empty());
    }
}