Skip to main content

quarto_error_reporting/
coalesce.rs

1//! Cross-source diagnostic coalescing.
2//!
3//! When a single underlying problem produces a diagnostic on many
4//! pages — for example, one bad `theme:` key in `_quarto.yml`
5//! triggering [`Q-14-1`](../../error_catalog.json) once per rendered
6//! page — the renderer should collapse them into a single emission
7//! that lists the affected pages, rather than printing the same
8//! ariadne block hundreds of times.
9//!
10//! # The primary key is the source location
11//!
12//! Two diagnostics whose `location` resolves to the same source
13//! span in the same file are presumed to be the same error and are
14//! grouped together. We deliberately do **not** include the code or
15//! title in the grouping key — the source location alone is the
16//! relation's primary key (decision recorded in
17//! `claude-notes/plans/2026-05-22-theme-diagnostic-epic.md`).
18//!
19//! If two unrelated checks ever land at the same span this is a
20//! design risk; the v1 cost (one merged emission with a possibly
21//! mixed-content representative) is low. We will widen the key to
22//! `(location, code)` if it turns out to bite.
23//!
24//! # What does not coalesce
25//!
26//! Diagnostics whose `location` is one of:
27//!
28//! - `None`,
29//! - [`SourceInfo::Concat`], or
30//! - [`SourceInfo::FilterProvenance`],
31//!
32//! pass through as singleton groups (one entry each). These shapes
33//! don't reduce to a single contiguous byte range, so we can't form
34//! a stable group key for them. This is the same conservative
35//! contract as [`SourceInfo::resolve_byte_range`].
36//!
37//! [`SourceInfo::Concat`]: quarto_source_map::SourceInfo::Concat
38//! [`SourceInfo::FilterProvenance`]: quarto_source_map::SourceInfo::FilterProvenance
39//! [`SourceInfo::resolve_byte_range`]: quarto_source_map::SourceInfo::resolve_byte_range
40
41use std::collections::HashMap;
42use std::path::PathBuf;
43
44use quarto_source_map::{SourceContext, SourceInfo};
45
46use crate::diagnostic::{DiagnosticMessage, TextRenderOptions};
47
48/// One entry from a coalesced render summary.
49///
50/// `affected_files` is in encounter order — the order in which the
51/// caller's iterator produced each (path, diagnostic) pair that
52/// contributed to this group. Singleton groups (size 1) carry one
53/// path; rendered output for them omits the "Affected files:" tail
54/// to match the legacy per-page render.
55#[derive(Debug, Clone)]
56pub struct CoalescedDiagnostic {
57    pub representative: DiagnosticMessage,
58    pub source_context: Option<SourceContext>,
59    pub affected_files: Vec<PathBuf>,
60}
61
62/// Maximum number of file names rendered inline in the "Affected
63/// files:" tail before switching to "… (and N others)".
64///
65/// Tunable; v1 sets it small so the typical "hundreds of pages"
66/// case stays one line.
67pub const AFFECTED_FILES_CAP: usize = 3;
68
69impl CoalescedDiagnostic {
70    /// Render the underlying ariadne diagnostic, followed by an
71    /// `Affected files:` tail listing up to [`AFFECTED_FILES_CAP`]
72    /// of the affected paths and a `(and N others)` count for the
73    /// rest. Single-element groups omit the tail.
74    pub fn to_text(&self) -> String {
75        self.to_text_with_options(&TextRenderOptions::default())
76    }
77
78    /// Like [`Self::to_text`] but with explicit render options
79    /// (mostly useful in tests, where hyperlinks are disabled for
80    /// path-independent assertions).
81    pub fn to_text_with_options(&self, opts: &TextRenderOptions) -> String {
82        let body = self
83            .representative
84            .to_text_with_options(self.source_context.as_ref(), opts);
85        if self.affected_files.len() <= 1 {
86            return body;
87        }
88        let tail = render_affected_files_tail(&self.affected_files);
89        format!("{}\n{}", body, tail)
90    }
91}
92
93fn render_affected_files_tail(paths: &[PathBuf]) -> String {
94    let shown = paths
95        .iter()
96        .take(AFFECTED_FILES_CAP)
97        .map(|p| p.display().to_string())
98        .collect::<Vec<_>>()
99        .join(", ");
100    let remaining = paths.len().saturating_sub(AFFECTED_FILES_CAP);
101    if remaining == 0 {
102        format!("Affected files: {}", shown)
103    } else {
104        format!(
105            "Affected files: {} (and {} other{})",
106            shown,
107            remaining,
108            if remaining == 1 { "" } else { "s" },
109        )
110    }
111}
112
113/// Canonical, hashable form of a [`SourceInfo`] for grouping.
114///
115/// Resolves to the root `Original`'s `(file_id, start_offset,
116/// end_offset)` tuple. Returns `None` for shapes that don't reduce
117/// cleanly (mirrors [`SourceInfo::resolve_byte_range`]).
118#[derive(Debug, Clone, PartialEq, Eq, Hash)]
119struct LocationKey {
120    file_id: usize,
121    start: usize,
122    end: usize,
123}
124
125impl LocationKey {
126    fn from(info: &SourceInfo) -> Option<Self> {
127        let (file_id, start, end) = info.resolve_byte_range()?;
128        Some(LocationKey {
129            file_id,
130            start,
131            end,
132        })
133    }
134}
135
136/// Group the input by source location and return one
137/// [`CoalescedDiagnostic`] per group, in encounter order.
138///
139/// Inputs without a coalescable location (no `location`, or `Concat`
140/// / `FilterProvenance`) pass through as singleton groups in their
141/// original order — they always print exactly once.
142///
143/// The first `(path, diagnostic, source_context)` triple to introduce
144/// a given key becomes the group's representative. Later triples
145/// only contribute to `affected_files`. This matches the principle
146/// that the user sees the first diagnostic they would have seen
147/// before, with extra context appended.
148pub fn coalesce_by_source<I>(input: I) -> Vec<CoalescedDiagnostic>
149where
150    I: IntoIterator<Item = (PathBuf, DiagnosticMessage, Option<SourceContext>)>,
151{
152    let mut groups: Vec<CoalescedDiagnostic> = Vec::new();
153    let mut index: HashMap<LocationKey, usize> = HashMap::new();
154
155    for (path, diagnostic, source_context) in input {
156        let key = diagnostic.location.as_ref().and_then(LocationKey::from);
157        match key {
158            Some(k) => match index.get(&k).copied() {
159                Some(idx) => {
160                    groups[idx].affected_files.push(path);
161                }
162                None => {
163                    let idx = groups.len();
164                    index.insert(k, idx);
165                    groups.push(CoalescedDiagnostic {
166                        representative: diagnostic,
167                        source_context,
168                        affected_files: vec![path],
169                    });
170                }
171            },
172            None => {
173                // Non-coalescable: emit as a singleton group at the
174                // tail. Do not register in the index, so subsequent
175                // identical-but-uncoalescable entries also emit as
176                // singletons.
177                groups.push(CoalescedDiagnostic {
178                    representative: diagnostic,
179                    source_context,
180                    affected_files: vec![path],
181                });
182            }
183        }
184    }
185
186    groups
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use crate::builder::DiagnosticMessageBuilder;
193    use quarto_source_map::{FileId, SourcePiece};
194    use std::sync::Arc;
195
196    fn original(file_id: usize, start: usize, end: usize) -> SourceInfo {
197        SourceInfo::Original {
198            file_id: FileId(file_id),
199            start_offset: start,
200            end_offset: end,
201        }
202    }
203
204    fn diag_at(loc: SourceInfo, title: &str) -> DiagnosticMessage {
205        DiagnosticMessageBuilder::error(title)
206            .with_code("Q-14-1")
207            .with_location(loc)
208            .problem("…")
209            .build()
210    }
211
212    #[test]
213    fn two_diagnostics_at_the_same_location_collapse() {
214        let loc = original(1, 100, 110);
215        let input = vec![
216            (PathBuf::from("a.qmd"), diag_at(loc.clone(), "T"), None),
217            (PathBuf::from("b.qmd"), diag_at(loc.clone(), "T"), None),
218        ];
219        let groups = coalesce_by_source(input);
220        assert_eq!(groups.len(), 1);
221        assert_eq!(
222            groups[0].affected_files,
223            vec![PathBuf::from("a.qmd"), PathBuf::from("b.qmd"),]
224        );
225    }
226
227    #[test]
228    fn different_locations_do_not_collapse() {
229        let input = vec![
230            (
231                PathBuf::from("a.qmd"),
232                diag_at(original(1, 100, 110), "T"),
233                None,
234            ),
235            (
236                PathBuf::from("b.qmd"),
237                diag_at(original(1, 200, 210), "T"),
238                None,
239            ),
240        ];
241        let groups = coalesce_by_source(input);
242        assert_eq!(groups.len(), 2);
243    }
244
245    #[test]
246    fn different_file_ids_do_not_collapse() {
247        let input = vec![
248            (
249                PathBuf::from("a.qmd"),
250                diag_at(original(1, 100, 110), "T"),
251                None,
252            ),
253            (
254                PathBuf::from("b.qmd"),
255                diag_at(original(2, 100, 110), "T"),
256                None,
257            ),
258        ];
259        let groups = coalesce_by_source(input);
260        assert_eq!(groups.len(), 2);
261    }
262
263    #[test]
264    fn substring_resolves_to_root_original_and_groups_with_it() {
265        // A Substring whose root Original matches another Original
266        // must coalesce into the same group — the canonical key is
267        // the resolved root.
268        let root = original(1, 100, 200);
269        let sub = SourceInfo::Substring {
270            parent: Arc::new(root.clone()),
271            // Offsets relative to parent's text; resolve_byte_range
272            // composes them: (fid, parent_start + sub_start,
273            // parent_start + sub_end) = (1, 100, 110).
274            start_offset: 0,
275            end_offset: 10,
276        };
277        let input = vec![
278            (PathBuf::from("a.qmd"), diag_at(root.clone(), "T"), None),
279            (PathBuf::from("b.qmd"), diag_at(sub, "T"), None),
280        ];
281        let groups = coalesce_by_source(input);
282        // root resolves to (1, 100, 200); sub resolves to (1, 100,
283        // 110). Different end offsets ⇒ different keys ⇒ separate
284        // groups. This documents the v1 contract: Substring uses
285        // the *composed* offsets, not the parent's offsets.
286        assert_eq!(groups.len(), 2);
287    }
288
289    #[test]
290    fn concat_location_passes_through_as_singleton() {
291        let concat = SourceInfo::Concat {
292            pieces: vec![SourcePiece {
293                source_info: original(1, 0, 10),
294                offset_in_concat: 0,
295                length: 10,
296            }],
297        };
298        let input = vec![
299            (PathBuf::from("a.qmd"), diag_at(concat.clone(), "T"), None),
300            (PathBuf::from("b.qmd"), diag_at(concat, "T"), None),
301        ];
302        let groups = coalesce_by_source(input);
303        // Both emitted as singletons because Concat has no
304        // coalescable key. Order preserved.
305        assert_eq!(groups.len(), 2);
306        assert_eq!(groups[0].affected_files, vec![PathBuf::from("a.qmd")]);
307        assert_eq!(groups[1].affected_files, vec![PathBuf::from("b.qmd")]);
308    }
309
310    #[test]
311    fn diagnostics_without_location_pass_through_as_singletons() {
312        let d = DiagnosticMessageBuilder::error("no location")
313            .problem("…")
314            .build();
315        let input = vec![
316            (PathBuf::from("a.qmd"), d.clone(), None),
317            (PathBuf::from("b.qmd"), d, None),
318        ];
319        let groups = coalesce_by_source(input);
320        assert_eq!(groups.len(), 2);
321    }
322
323    #[test]
324    fn encounter_order_preserved_across_groups() {
325        let loc1 = original(1, 100, 110);
326        let loc2 = original(1, 200, 210);
327        let input = vec![
328            (PathBuf::from("a.qmd"), diag_at(loc1.clone(), "T1"), None),
329            (PathBuf::from("b.qmd"), diag_at(loc2.clone(), "T2"), None),
330            (PathBuf::from("c.qmd"), diag_at(loc1.clone(), "T1"), None),
331        ];
332        let groups = coalesce_by_source(input);
333        assert_eq!(groups.len(), 2);
334        // Group order = order of first occurrence.
335        assert_eq!(groups[0].representative.title, "T1");
336        assert_eq!(
337            groups[0].affected_files,
338            vec![PathBuf::from("a.qmd"), PathBuf::from("c.qmd"),]
339        );
340        assert_eq!(groups[1].representative.title, "T2");
341        assert_eq!(groups[1].affected_files, vec![PathBuf::from("b.qmd")]);
342    }
343
344    #[test]
345    fn first_encounter_supplies_representative_and_context() {
346        // The representative is the *first* (path, diagnostic) seen
347        // for a given key. Later contributions only add to
348        // `affected_files`. The same goes for the SourceContext.
349        let loc = original(1, 100, 110);
350        let mut ctx_first = SourceContext::new();
351        ctx_first.add_file_with_id(FileId(1), "first.yml".into(), Some("first".into()));
352        let mut ctx_second = SourceContext::new();
353        ctx_second.add_file_with_id(FileId(1), "second.yml".into(), Some("second".into()));
354
355        let input = vec![
356            (
357                PathBuf::from("a.qmd"),
358                diag_at(loc.clone(), "first"),
359                Some(ctx_first),
360            ),
361            (
362                PathBuf::from("b.qmd"),
363                diag_at(loc.clone(), "second"),
364                Some(ctx_second),
365            ),
366        ];
367        let groups = coalesce_by_source(input);
368        assert_eq!(groups.len(), 1);
369        assert_eq!(groups[0].representative.title, "first");
370        assert!(groups[0].source_context.is_some());
371    }
372
373    #[test]
374    fn singleton_group_omits_affected_files_tail() {
375        let loc = original(1, 100, 110);
376        let input = vec![(PathBuf::from("a.qmd"), diag_at(loc, "T"), None)];
377        let groups = coalesce_by_source(input);
378        let opts = TextRenderOptions {
379            enable_hyperlinks: false,
380        };
381        let text = groups[0].to_text_with_options(&opts);
382        assert!(
383            !text.contains("Affected files:"),
384            "singleton groups must not emit the affected-files tail:\n{}",
385            text
386        );
387    }
388
389    #[test]
390    fn multi_group_below_cap_lists_all_files() {
391        let loc = original(1, 100, 110);
392        let input = vec![
393            (PathBuf::from("a.qmd"), diag_at(loc.clone(), "T"), None),
394            (PathBuf::from("b.qmd"), diag_at(loc.clone(), "T"), None),
395        ];
396        let groups = coalesce_by_source(input);
397        let opts = TextRenderOptions {
398            enable_hyperlinks: false,
399        };
400        let text = groups[0].to_text_with_options(&opts);
401        assert!(text.contains("Affected files: a.qmd, b.qmd"), "{}", text);
402        assert!(
403            !text.contains("other"),
404            "no '(and N others)' tail expected for ≤ cap:\n{}",
405            text
406        );
407    }
408
409    #[test]
410    fn multi_group_above_cap_truncates_and_counts() {
411        // AFFECTED_FILES_CAP=3, so 5 files should produce
412        // "a.qmd, b.qmd, c.qmd (and 2 others)".
413        let loc = original(1, 100, 110);
414        let input: Vec<_> = ["a", "b", "c", "d", "e"]
415            .iter()
416            .map(|n| {
417                (
418                    PathBuf::from(format!("{n}.qmd")),
419                    diag_at(loc.clone(), "T"),
420                    None,
421                )
422            })
423            .collect();
424        let groups = coalesce_by_source(input);
425        let opts = TextRenderOptions {
426            enable_hyperlinks: false,
427        };
428        let text = groups[0].to_text_with_options(&opts);
429        assert!(
430            text.contains("Affected files: a.qmd, b.qmd, c.qmd (and 2 others)"),
431            "{}",
432            text,
433        );
434    }
435
436    #[test]
437    fn multi_group_just_above_cap_uses_singular_other() {
438        // 4 files at cap=3 ⇒ 1 other (singular).
439        let loc = original(1, 100, 110);
440        let input: Vec<_> = ["a", "b", "c", "d"]
441            .iter()
442            .map(|n| {
443                (
444                    PathBuf::from(format!("{n}.qmd")),
445                    diag_at(loc.clone(), "T"),
446                    None,
447                )
448            })
449            .collect();
450        let groups = coalesce_by_source(input);
451        let opts = TextRenderOptions {
452            enable_hyperlinks: false,
453        };
454        let text = groups[0].to_text_with_options(&opts);
455        assert!(
456            text.contains("(and 1 other)"),
457            "expected singular 'other' for exactly 1 over cap:\n{}",
458            text,
459        );
460    }
461}