Skip to main content

shape_diagnostics/
lib.rs

1//! LLM-Structured Diagnostic Schema (LSDS).
2//!
3//! Per ADR-006 §9, LSDS is the primary compiler diagnostic format. Renderers
4//! (terminal, LSP, MCP) consume LSDS and produce human-readable / machine-
5//! readable output. **LSDS is the source of truth** — text strings, LSP
6//! `Diagnostic` payloads, and MCP tool responses are all derived from it.
7//!
8//! # Crate layout
9//!
10//! - [`Diagnostic`] — the canonical struct. JSON-serializable. Stable across
11//!   versions per the ADR.
12//! - [`Severity`], [`Location`], [`TypeWitness`], [`SuggestedFix`],
13//!   [`ContextWindow`] — sub-structures referenced from `Diagnostic`.
14//! - [`render`] — built-in renderers. Currently:
15//!   - [`render::terminal`] — human-readable text output.
16//!   LSP and MCP renderers are reserved for subsequent Phase 2 sessions.
17//!
18//! # Stability contract
19//!
20//! Field names in [`Diagnostic`] (and nested types) are part of the public
21//! wire format. They must not be renamed or reordered without bumping the
22//! schema version. Add new optional fields only; never remove or rename
23//! existing ones.
24//!
25//! The schema version is exposed as [`SCHEMA_VERSION`].
26//!
27//! # Cross-references
28//!
29//! - ADR-006 §9 (`docs/adr/006-value-and-memory-model.md`) — binding spec.
30//! - ADR-006 §13.5 success metric — average payload ≤500 cl100k tokens.
31//! - `crates/shape-vm/src/mir/analysis.rs` — `BorrowError` /
32//!   `BorrowErrorKind` / `BorrowErrorCode`, the source for the B-series
33//!   diagnostics.
34
35#![warn(missing_docs)]
36
37use serde::{Deserialize, Serialize};
38
39pub mod render;
40
41/// Wire-format schema version. Bumped on breaking changes.
42pub const SCHEMA_VERSION: u32 = 1;
43
44/// Severity of a diagnostic.
45///
46/// Lower-cased in the wire format (`"error"`, `"warning"`, `"info"`,
47/// `"hint"`). Renderers map these to terminal colours, LSP severities, and
48/// MCP severity strings.
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
50#[serde(rename_all = "lowercase")]
51pub enum Severity {
52    /// Compilation cannot proceed.
53    Error,
54    /// Compilation proceeds but the user should look.
55    Warning,
56    /// Informational — used for `var` inference inlay-hint suggestions
57    /// (ADR-006 §1.3) and similar non-actionable feedback.
58    Info,
59    /// Hint — soft suggestions, e.g. style nits or refactor proposals
60    /// surfaced by tooling consumers.
61    Hint,
62}
63
64/// Source location of a diagnostic — a 1-based line/column plus an
65/// absolute byte span.
66///
67/// `file` is the canonical path string; absent for synthetic / REPL
68/// diagnostics.
69#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
70pub struct Location {
71    /// File path; absent for synthetic / REPL / in-memory sources.
72    #[serde(skip_serializing_if = "Option::is_none", default)]
73    pub file: Option<String>,
74    /// 1-based line number.
75    pub line: u32,
76    /// 1-based column number.
77    pub col: u32,
78    /// Absolute byte span `[start, end)` into the source buffer.
79    pub span: [u32; 2],
80}
81
82impl Location {
83    /// Construct a `Location` with a 1-based line/column and an absolute
84    /// byte span.
85    pub fn new(file: Option<String>, line: u32, col: u32, span_start: u32, span_end: u32) -> Self {
86        Self {
87            file,
88            line,
89            col,
90            span: [span_start, span_end],
91        }
92    }
93
94    /// Synthetic location with no file and zero positions — used for
95    /// diagnostics not anchored to source (e.g., compiler-internal
96    /// configuration errors).
97    pub fn synthetic() -> Self {
98        Self::new(None, 0, 0, 0, 0)
99    }
100}
101
102/// A type witness — a concrete value that satisfies (`expected`) or
103/// violates (`found`) the type constraint at the diagnostic site, per
104/// ADR-006 §9.3.
105///
106/// `r#type` is the type's surface name (e.g. `"int"`, `"string"`,
107/// `"Array<int>"`). `witness` is an optional concrete example value.
108///
109/// For simple primitive types (`int`, `number`, `bool`, `string`), the
110/// emitter is encouraged to populate `witness`. For recursive / generic /
111/// trait-bounded types, `witness` may be `None`; the surface name alone
112/// communicates the constraint.
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
114pub struct TypeWitness {
115    /// Surface name of the type (`"int"`, `"Option<string>"`, ...).
116    #[serde(rename = "type")]
117    pub r#type: String,
118    /// Optional concrete value satisfying or violating the constraint.
119    /// Encoded as a JSON value; LLM consumers parse it directly.
120    #[serde(skip_serializing_if = "Option::is_none", default)]
121    pub witness: Option<serde_json::Value>,
122}
123
124impl TypeWitness {
125    /// Construct a witness from a type name and an optional JSON value.
126    pub fn new(type_name: impl Into<String>, witness: Option<serde_json::Value>) -> Self {
127        Self {
128            r#type: type_name.into(),
129            witness,
130        }
131    }
132
133    /// Construct a witness with only a type name and no concrete value.
134    pub fn type_only(type_name: impl Into<String>) -> Self {
135        Self::new(type_name, None)
136    }
137}
138
139/// A suggested fix — a ranked, optionally-diff-bearing proposal that a
140/// renderer (LSP code action, MCP `apply_fix` tool call) can apply.
141///
142/// `confidence` is in `[0.0, 1.0]`. Phase-2 first-session emitters may
143/// produce empty `fixes` lists; richer fix generation is later-session
144/// scope per the dispatch.
145#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
146pub struct SuggestedFix {
147    /// Short user-facing label (e.g. `"convert string to int"`).
148    pub label: String,
149    /// Optional unified-diff fragment. Renderers that can apply diffs
150    /// (LSP, MCP) consume this directly. May be empty.
151    #[serde(skip_serializing_if = "Option::is_none", default)]
152    pub diff: Option<String>,
153    /// Confidence in the fix, `0.0..=1.0`. Renderers may rank by this.
154    pub confidence: f32,
155}
156
157impl SuggestedFix {
158    /// Construct a suggestion with a label and a confidence.
159    pub fn new(label: impl Into<String>, confidence: f32) -> Self {
160        Self {
161            label: label.into(),
162            diff: None,
163            confidence,
164        }
165    }
166
167    /// Attach a unified-diff fragment.
168    pub fn with_diff(mut self, diff: impl Into<String>) -> Self {
169        self.diff = Some(diff.into());
170        self
171    }
172}
173
174/// A token-budgeted context window — the smallest set of source spans
175/// needed to understand the diagnostic, with a token count.
176///
177/// Per ADR-006 §9.5. LLM consumers use this to bound the source they
178/// must include alongside the diagnostic. `tokens` is an estimate against
179/// the cl100k tokenizer (per ADR-006 §13.5 success metric).
180#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
181pub struct ContextWindow {
182    /// Estimated token count for the included spans (cl100k).
183    pub tokens: u32,
184    /// Spans that comprise the context window.
185    pub spans: Vec<ContextSpan>,
186}
187
188impl ContextWindow {
189    /// Construct an empty context window with a token budget of zero.
190    pub fn empty() -> Self {
191        Self {
192            tokens: 0,
193            spans: Vec::new(),
194        }
195    }
196}
197
198/// A span of source — a file plus an inclusive line range.
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct ContextSpan {
201    /// File path; absent for synthetic / REPL.
202    #[serde(skip_serializing_if = "Option::is_none", default)]
203    pub file: Option<String>,
204    /// Inclusive 1-based line range `[start, end]`.
205    pub lines: [u32; 2],
206}
207
208/// The canonical LSDS diagnostic.
209///
210/// JSON shape matches ADR-006 §9.2. Field names are part of the public
211/// wire format; see crate-level docs for the stability contract.
212#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
213pub struct Diagnostic {
214    /// Stable diagnostic identifier — e.g. `"B0013"`, `"E0100"`. The
215    /// scheme matches the existing `BorrowErrorCode` (`B`-series for
216    /// borrow / lifetime / aliasing) and `ErrorCode` (`E`-series for
217    /// type, parse, semantic) namespaces.
218    pub diagnostic_id: String,
219    /// Severity bucket.
220    pub severity: Severity,
221    /// Primary source location.
222    pub location: Location,
223    /// Expected type at this site, when applicable. `None` for
224    /// non-type-related diagnostics (e.g. parse errors).
225    #[serde(skip_serializing_if = "Option::is_none", default)]
226    pub expected: Option<TypeWitness>,
227    /// Found type at this site, when applicable.
228    #[serde(skip_serializing_if = "Option::is_none", default)]
229    pub found: Option<TypeWitness>,
230    /// Human-readable message body (does NOT include the
231    /// `[B00XX]` prefix — that's the `diagnostic_id` field's job;
232    /// renderers prepend it on output).
233    pub message: String,
234    /// Ranked suggested fixes; may be empty.
235    #[serde(default)]
236    pub fixes: Vec<SuggestedFix>,
237    /// Token-budgeted context window for LLM consumers.
238    #[serde(skip_serializing_if = "Option::is_none", default)]
239    pub context_window: Option<ContextWindow>,
240    /// Citation pointing at the binding spec section that governs this
241    /// diagnostic. E.g. `"ADR-006-§1.1"` or `"ADR-005-§4"`.
242    #[serde(skip_serializing_if = "Option::is_none", default)]
243    pub rule: Option<String>,
244    /// Auxiliary notes. Each note has its own location (e.g. "borrow
245    /// originates here") so renderers can present them as related-info
246    /// callouts.
247    #[serde(default, skip_serializing_if = "Vec::is_empty")]
248    pub notes: Vec<DiagnosticNote>,
249}
250
251/// Auxiliary note attached to a diagnostic — e.g. "borrow originates
252/// here", "binding declared here". Mirrors the existing `ErrorNote`
253/// structure used by `ShapeError::SemanticError.location.notes`.
254#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
255pub struct DiagnosticNote {
256    /// Note message.
257    pub message: String,
258    /// Location the note refers to; `None` for synthetic notes.
259    #[serde(skip_serializing_if = "Option::is_none", default)]
260    pub location: Option<Location>,
261}
262
263impl DiagnosticNote {
264    /// Construct a note with a message and an optional location.
265    pub fn new(message: impl Into<String>, location: Option<Location>) -> Self {
266        Self {
267            message: message.into(),
268            location,
269        }
270    }
271}
272
273/// Builder for [`Diagnostic`]. Use this rather than struct literal at
274/// emission sites so future schema evolution doesn't ripple through.
275#[derive(Debug)]
276pub struct DiagnosticBuilder {
277    diagnostic_id: String,
278    severity: Severity,
279    location: Location,
280    expected: Option<TypeWitness>,
281    found: Option<TypeWitness>,
282    message: String,
283    fixes: Vec<SuggestedFix>,
284    context_window: Option<ContextWindow>,
285    rule: Option<String>,
286    notes: Vec<DiagnosticNote>,
287}
288
289impl DiagnosticBuilder {
290    /// Start building a diagnostic with the required minimum (id,
291    /// severity, location, message).
292    pub fn new(
293        diagnostic_id: impl Into<String>,
294        severity: Severity,
295        location: Location,
296        message: impl Into<String>,
297    ) -> Self {
298        Self {
299            diagnostic_id: diagnostic_id.into(),
300            severity,
301            location,
302            expected: None,
303            found: None,
304            message: message.into(),
305            fixes: Vec::new(),
306            context_window: None,
307            rule: None,
308            notes: Vec::new(),
309        }
310    }
311
312    /// Attach an `expected` type witness.
313    pub fn expected(mut self, witness: TypeWitness) -> Self {
314        self.expected = Some(witness);
315        self
316    }
317
318    /// Attach a `found` type witness.
319    pub fn found(mut self, witness: TypeWitness) -> Self {
320        self.found = Some(witness);
321        self
322    }
323
324    /// Append a suggested fix.
325    pub fn with_fix(mut self, fix: SuggestedFix) -> Self {
326        self.fixes.push(fix);
327        self
328    }
329
330    /// Attach a context window.
331    pub fn context_window(mut self, window: ContextWindow) -> Self {
332        self.context_window = Some(window);
333        self
334    }
335
336    /// Attach a rule citation (`"ADR-006-§1.1"` etc.).
337    pub fn rule(mut self, rule: impl Into<String>) -> Self {
338        self.rule = Some(rule.into());
339        self
340    }
341
342    /// Append an auxiliary note.
343    pub fn with_note(mut self, note: DiagnosticNote) -> Self {
344        self.notes.push(note);
345        self
346    }
347
348    /// Finalize.
349    pub fn build(self) -> Diagnostic {
350        Diagnostic {
351            diagnostic_id: self.diagnostic_id,
352            severity: self.severity,
353            location: self.location,
354            expected: self.expected,
355            found: self.found,
356            message: self.message,
357            fixes: self.fixes,
358            context_window: self.context_window,
359            rule: self.rule,
360            notes: self.notes,
361        }
362    }
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368    use serde_json::json;
369
370    #[test]
371    fn schema_version_is_one() {
372        assert_eq!(SCHEMA_VERSION, 1);
373    }
374
375    #[test]
376    fn diagnostic_round_trips_through_json() {
377        let diag = DiagnosticBuilder::new(
378            "B0013",
379            Severity::Error,
380            Location::new(Some("src/main.shape".into()), 12, 4, 102, 145),
381            "expected int, found string",
382        )
383        .expected(TypeWitness::new("int", Some(json!(42))))
384        .found(TypeWitness::new("string", Some(json!("hello"))))
385        .with_fix(
386            SuggestedFix::new("convert string to int", 0.85)
387                .with_diff("let x: int = parse_int(value)?"),
388        )
389        .rule("ADR-006-§1.1")
390        .build();
391
392        let s = serde_json::to_string(&diag).expect("serialize");
393        let back: Diagnostic = serde_json::from_str(&s).expect("deserialize");
394        assert_eq!(diag, back);
395    }
396
397    #[test]
398    fn omitted_optional_fields_round_trip() {
399        let diag = DiagnosticBuilder::new(
400            "E0100",
401            Severity::Error,
402            Location::synthetic(),
403            "type mismatch",
404        )
405        .build();
406
407        let s = serde_json::to_string(&diag).expect("serialize");
408        // No expected/found/fixes/context_window/rule/notes appear when empty.
409        assert!(!s.contains("\"expected\""));
410        assert!(!s.contains("\"found\""));
411        // `fixes` is `default` (empty Vec) — not skipped, but encoded as `[]`.
412        assert!(s.contains("\"fixes\":[]"));
413        assert!(!s.contains("\"context_window\""));
414        assert!(!s.contains("\"rule\""));
415        assert!(!s.contains("\"notes\""));
416
417        let back: Diagnostic = serde_json::from_str(&s).expect("deserialize");
418        assert_eq!(diag, back);
419    }
420
421    #[test]
422    fn severity_serializes_lowercase() {
423        let s = serde_json::to_string(&Severity::Error).unwrap();
424        assert_eq!(s, "\"error\"");
425        let s = serde_json::to_string(&Severity::Warning).unwrap();
426        assert_eq!(s, "\"warning\"");
427        let s = serde_json::to_string(&Severity::Info).unwrap();
428        assert_eq!(s, "\"info\"");
429        let s = serde_json::to_string(&Severity::Hint).unwrap();
430        assert_eq!(s, "\"hint\"");
431    }
432}