Skip to main content

valua_diagnostics/
lib.rs

1use codespan_reporting::term::termcolor::{ColorChoice, StandardStream, WriteColor};
2
3#[cfg(feature = "serde")]
4use serde::Serialize;
5
6// TODO(Phase 5 - Refactor UTF-8 Column Alignment):
7//
8// ARCHITECTURAL DEBT — byte-indexed column tracking
9//
10// `Span::col` is populated by the lexer by counting raw bytes since the last
11// newline, NOT logical Unicode scalar values. For the current codebase this is
12// safe only because every integration-test fixture is 100% ASCII (1 byte per
13// char). The moment a source file contains multi-byte sequences (accented
14// letters, CJK, emoji, etc.) `col` will report a byte offset instead of a
15// visual column, causing every diagnostic that prints "line:col" coordinates
16// to point at the wrong position.
17//
18// Scope of the problem:
19//   • `Span::col` — stored byte column, not char column.
20//   • `Span::Display` — emits `line:col`; col is byte-based.
21//   • The lexer (`valua-lexer`) — increments its column counter with `+=
22//     token_bytes` instead of `+= token.chars().count()`.
23//   • `render_to_writer` below — passes `span.start..span.end` byte ranges to
24//     `codespan_reporting`. That library does re-derive column from the byte
25//     range, so caret rendering may survive for valid UTF-8 boundaries, but
26//     `col` stored in `Span` itself will still be wrong for multi-byte input.
27//
28// Resolution path for Phase 5:
29//   Option A (minimal): remap the lexer to walk chars (`str::chars()`) and
30//     count code-points; update `col` semantics to mean "1-based Unicode scalar
31//     column".
32//   Option B (full): replace the hand-rolled rendering path entirely with
33//     `codespan-reporting`'s own line/column resolution (it already does this
34//     correctly from byte offsets), drop `Span::col` from the public API, and
35//     keep only `start`/`end` byte offsets. `codespan_reporting::files::
36//     SimpleFiles::location()` handles UTF-8 transparently.
37//   Option C (display width): if terminal display-width accuracy matters (e.g.
38//     CJK double-width glyphs), additionally integrate the `unicode-width` crate
39//     to compute display columns separately from scalar counts.
40//
41// Until Phase 5 this is guarded by ASCII-only test fixtures. Do NOT extend test
42// fixtures or production input with non-ASCII source text before this is fixed.
43/// Byte-range + line/column position within a source file.
44#[cfg_attr(feature = "serde", derive(Serialize))]
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
46pub struct Span {
47    /// Byte offset of the first character (inclusive).
48    pub start: usize,
49    /// Byte offset past the last character (exclusive).
50    pub end: usize,
51    /// 1-based line number.
52    pub line: u32,
53    /// 1-based column number (byte-based; see UTF-8 TODO above).
54    pub col: u32,
55}
56
57impl std::fmt::Display for Span {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        write!(f, "{}:{}", self.line, self.col)
60    }
61}
62
63impl Span {
64    /// Create a new span from raw fields.
65    #[must_use]
66    pub fn new(start: usize, end: usize, line: u32, col: u32) -> Self {
67        Self {
68            start,
69            end,
70            line,
71            col,
72        }
73    }
74
75    /// Placeholder span for generated nodes that have no source location.
76    #[must_use]
77    pub fn dummy() -> Self {
78        Self {
79            start: 0,
80            end: 0,
81            line: 0,
82            col: 0,
83        }
84    }
85
86    /// Merge two spans into one that covers both.
87    ///
88    /// The column of the merged span comes from whichever span starts first
89    /// in the source (smaller `start` offset).
90    #[must_use]
91    pub fn merge(self, other: Self) -> Self {
92        let first = if self.start <= other.start {
93            self
94        } else {
95            other
96        };
97        Self {
98            start: self.start.min(other.start),
99            end: self.end.max(other.end),
100            line: self.line.min(other.line),
101            col: first.col,
102        }
103    }
104}
105
106/// Severity level of a diagnostic message.
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108pub enum Severity {
109    /// A hard error that prevents transpilation.
110    Error,
111    /// A non-fatal issue; transpilation may continue.
112    Warning,
113    /// Informational annotation attached to another diagnostic.
114    Note,
115}
116
117/// A structured diagnostic message with optional error code, fix suggestion,
118/// and secondary source spans.
119#[derive(Debug, Clone)]
120pub struct Diagnostic {
121    pub severity: Severity,
122    pub message: String,
123    pub span: Span,
124    /// Short error code, e.g. `"E0001"`.
125    pub code: Option<&'static str>,
126    /// Human-readable fix hint shown below the message.
127    pub suggestion: Option<String>,
128    /// Informational context note rendered before the suggestion.
129    pub note: Option<String>,
130    /// Additional labeled spans rendered alongside the primary span.
131    /// Each entry is `(span, label_message)`.
132    pub secondary_labels: Vec<(Span, String)>,
133}
134
135impl Diagnostic {
136    /// Build an error-level diagnostic.
137    #[must_use]
138    pub fn error(message: impl Into<String>, span: Span) -> Self {
139        Self {
140            severity: Severity::Error,
141            message: message.into(),
142            span,
143            code: None,
144            suggestion: None,
145            note: None,
146            secondary_labels: Vec::new(),
147        }
148    }
149
150    /// Build a warning-level diagnostic.
151    #[must_use]
152    pub fn warning(message: impl Into<String>, span: Span) -> Self {
153        Self {
154            severity: Severity::Warning,
155            message: message.into(),
156            span,
157            code: None,
158            suggestion: None,
159            note: None,
160            secondary_labels: Vec::new(),
161        }
162    }
163
164    /// Build a note-level diagnostic.
165    #[must_use]
166    pub fn note(message: impl Into<String>, span: Span) -> Self {
167        Self {
168            severity: Severity::Note,
169            message: message.into(),
170            span,
171            code: None,
172            suggestion: None,
173            note: None,
174            secondary_labels: Vec::new(),
175        }
176    }
177
178    /// Attach a short error code.
179    #[must_use]
180    pub fn with_code(mut self, code: &'static str) -> Self {
181        self.code = Some(code);
182        self
183    }
184
185    /// Attach a fix suggestion shown to the user.
186    #[must_use]
187    pub fn with_suggestion(mut self, suggestion: impl Into<String>) -> Self {
188        self.suggestion = Some(suggestion.into());
189        self
190    }
191
192    /// Attach an informational context note shown before the suggestion.
193    #[must_use]
194    pub fn with_note(mut self, note: impl Into<String>) -> Self {
195        self.note = Some(note.into());
196        self
197    }
198
199    /// Attach an additional labeled span shown alongside the primary span.
200    /// Used for two-location diagnostics such as E0301 (declaration + mutation site).
201    #[must_use]
202    pub fn with_secondary_label(mut self, span: Span, message: impl Into<String>) -> Self {
203        self.secondary_labels.push((span, message.into()));
204        self
205    }
206}
207
208/// Trait for sinks that receive and display diagnostics.
209pub trait Reporter {
210    /// Emit a single diagnostic against the provided source text.
211    fn report(&mut self, diagnostic: &Diagnostic, source: &str, filename: &str);
212
213    /// Returns `true` if at least one error has been reported.
214    fn has_errors(&self) -> bool;
215}
216
217// ── Rendering helpers ─────────────────────────────────────────────────────────
218
219fn render_to_writer(
220    writer: &mut dyn WriteColor,
221    diagnostic: &Diagnostic,
222    source: &str,
223    filename: &str,
224) {
225    use codespan_reporting::diagnostic::{Diagnostic as CsDiag, Label, Severity as CsSeverity};
226    use codespan_reporting::files::SimpleFiles;
227    use codespan_reporting::term;
228
229    let mut files: SimpleFiles<&str, &str> = SimpleFiles::new();
230    let file_id = files.add(filename, source);
231
232    let cs_severity = match diagnostic.severity {
233        Severity::Error => CsSeverity::Error,
234        Severity::Warning => CsSeverity::Warning,
235        Severity::Note => CsSeverity::Note,
236    };
237
238    // TODO(Phase 5 - Refactor UTF-8 Column Alignment):
239    // Byte ranges below are fed directly to `codespan_reporting`. That library
240    // derives visual columns internally from byte offsets and will misplace
241    // carets for multi-byte UTF-8 sequences if `start`/`end` do not land on
242    // valid char boundaries (or for wide glyphs). Safe today because all input
243    // is ASCII. Fix by validating char boundaries here, or adopt
244    // `codespan_reporting`'s own location API and drop `Span::col` (see the
245    // full resolution plan above the `Span` declaration).
246    let mut labels = vec![
247        Label::primary(file_id, diagnostic.span.start..diagnostic.span.end)
248            .with_message(&diagnostic.message),
249    ];
250
251    for (span, msg) in &diagnostic.secondary_labels {
252        labels.push(Label::secondary(file_id, span.start..span.end).with_message(msg));
253    }
254
255    let mut cs_diag = CsDiag::new(cs_severity)
256        .with_message(&diagnostic.message)
257        .with_labels(labels);
258
259    if let Some(code) = diagnostic.code {
260        cs_diag = cs_diag.with_code(code);
261    }
262
263    let mut notes: Vec<String> = Vec::new();
264    if let Some(ref note) = diagnostic.note {
265        notes.push(format!("note: {note}"));
266    }
267    if let Some(ref suggestion) = diagnostic.suggestion {
268        notes.push(format!("help: {suggestion}"));
269    }
270    if !notes.is_empty() {
271        cs_diag = cs_diag.with_notes(notes);
272    }
273
274    let config = term::Config::default();
275    if let Err(e) = term::emit(writer, &config, &files, &cs_diag) {
276        eprintln!("valua: failed to render diagnostic: {e}");
277    }
278}
279
280/// Render a diagnostic to a plain string with no ANSI color codes.
281/// Intended for tests that verify visual layout of error output.
282#[must_use]
283pub fn render_diagnostic_to_string(
284    diagnostic: &Diagnostic,
285    source: &str,
286    filename: &str,
287) -> String {
288    use codespan_reporting::term::termcolor::Buffer;
289    let mut buf = Buffer::no_color();
290    render_to_writer(&mut buf, diagnostic, source, filename);
291    String::from_utf8_lossy(buf.as_slice()).into_owned()
292}
293
294/// Writes diagnostics to stderr using `codespan-reporting`.
295pub struct ConsoleReporter {
296    error_count: usize,
297    color: ColorChoice,
298}
299
300impl ConsoleReporter {
301    /// Create a reporter with explicit color control.
302    #[must_use]
303    pub fn new(color: ColorChoice) -> Self {
304        Self {
305            error_count: 0,
306            color,
307        }
308    }
309
310    /// Create a reporter that auto-detects color support on stderr.
311    #[must_use]
312    pub fn stderr() -> Self {
313        Self::new(ColorChoice::Auto)
314    }
315}
316
317impl Reporter for ConsoleReporter {
318    fn report(&mut self, diagnostic: &Diagnostic, source: &str, filename: &str) {
319        if diagnostic.severity == Severity::Error {
320            self.error_count += 1;
321        }
322        let writer = StandardStream::stderr(self.color);
323        let mut lock = writer.lock();
324        render_to_writer(&mut lock, diagnostic, source, filename);
325    }
326
327    fn has_errors(&self) -> bool {
328        self.error_count > 0
329    }
330}
331
332/// A simple in-memory reporter useful for testing.
333#[derive(Debug, Default)]
334pub struct CollectingReporter {
335    pub diagnostics: Vec<Diagnostic>,
336}
337
338impl Reporter for CollectingReporter {
339    fn report(&mut self, diagnostic: &Diagnostic, _source: &str, _filename: &str) {
340        self.diagnostics.push(diagnostic.clone());
341    }
342
343    fn has_errors(&self) -> bool {
344        self.diagnostics
345            .iter()
346            .any(|d| d.severity == Severity::Error)
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    fn span(start: usize, end: usize, line: u32, col: u32) -> Span {
355        Span::new(start, end, line, col)
356    }
357
358    #[test]
359    fn test_span_merge_covers_both_endpoints() {
360        let a = span(0, 5, 1, 1);
361        let b = span(3, 10, 2, 3);
362        let m = a.merge(b);
363        assert_eq!(m.start, 0);
364        assert_eq!(m.end, 10);
365    }
366
367    #[test]
368    fn test_span_merge_col_from_earlier_span() {
369        // col must come from the span with smaller start, not always self
370        let earlier = span(0, 5, 1, 7);
371        let later = span(6, 10, 1, 15);
372        // self is later, other is earlier — col must still be 7
373        let m = later.merge(earlier);
374        assert_eq!(m.col, 7, "col should be from whichever span starts first");
375    }
376
377    #[test]
378    fn test_span_display() {
379        let s = span(0, 5, 3, 7);
380        assert_eq!(format!("{s}"), "3:7");
381    }
382
383    #[test]
384    fn test_diagnostic_builder() {
385        let s = span(0, 1, 1, 1);
386        let d = Diagnostic::error("bad token", s)
387            .with_code("E0001")
388            .with_suggestion("remove it");
389        assert_eq!(d.severity, Severity::Error);
390        assert_eq!(d.code, Some("E0001"));
391        assert!(d.suggestion.is_some());
392    }
393
394    #[test]
395    fn test_diagnostic_secondary_label() {
396        let s1 = span(0, 5, 1, 1);
397        let s2 = span(10, 15, 3, 1);
398        let d = Diagnostic::error("mutation", s2).with_secondary_label(s1, "declared here");
399        assert_eq!(d.secondary_labels.len(), 1);
400        assert_eq!(d.secondary_labels[0].1, "declared here");
401    }
402
403    #[test]
404    fn test_collecting_reporter_tracks_errors() {
405        let mut r = CollectingReporter::default();
406        let s = span(0, 1, 1, 1);
407        assert!(!r.has_errors());
408        r.report(&Diagnostic::warning("w", s), "", "f");
409        assert!(!r.has_errors());
410        r.report(&Diagnostic::error("e", s), "", "f");
411        assert!(r.has_errors());
412        assert_eq!(r.diagnostics.len(), 2);
413    }
414}