shape_diagnostics/lib.rs
1//! LLM-Structured Diagnostic Schema (LSDS).
2//!
3//! Per ADR-006 §9, LSDS is the primary compiler diagnostic format. Renderers
4//! (terminal, LSP, MCP) consume LSDS and produce human-readable / machine-
5//! readable output. **LSDS is the source of truth** — text strings, LSP
6//! `Diagnostic` payloads, and MCP tool responses are all derived from it.
7//!
8//! # Crate layout
9//!
10//! - [`Diagnostic`] — the canonical struct. JSON-serializable. Stable across
11//! versions per the ADR.
12//! - [`Severity`], [`Location`], [`TypeWitness`], [`SuggestedFix`],
13//! [`ContextWindow`] — sub-structures referenced from `Diagnostic`.
14//! - [`render`] — built-in renderers. Currently:
15//! - [`render::terminal`] — human-readable text output.
16//! LSP and MCP renderers are reserved for subsequent Phase 2 sessions.
17//!
18//! # Stability contract
19//!
20//! Field names in [`Diagnostic`] (and nested types) are part of the public
21//! wire format. They must not be renamed or reordered without bumping the
22//! schema version. Add new optional fields only; never remove or rename
23//! existing ones.
24//!
25//! The schema version is exposed as [`SCHEMA_VERSION`].
26//!
27//! # Cross-references
28//!
29//! - ADR-006 §9 (`docs/adr/006-value-and-memory-model.md`) — binding spec.
30//! - ADR-006 §13.5 success metric — average payload ≤500 cl100k tokens.
31//! - `crates/shape-vm/src/mir/analysis.rs` — `BorrowError` /
32//! `BorrowErrorKind` / `BorrowErrorCode`, the source for the B-series
33//! diagnostics.
34
35#![warn(missing_docs)]
36
37use serde::{Deserialize, Serialize};
38
39pub mod render;
40
41/// Wire-format schema version. Bumped on breaking changes.
42pub const SCHEMA_VERSION: u32 = 1;
43
44/// Severity of a diagnostic.
45///
46/// Lower-cased in the wire format (`"error"`, `"warning"`, `"info"`,
47/// `"hint"`). Renderers map these to terminal colours, LSP severities, and
48/// MCP severity strings.
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
50#[serde(rename_all = "lowercase")]
51pub enum Severity {
52 /// Compilation cannot proceed.
53 Error,
54 /// Compilation proceeds but the user should look.
55 Warning,
56 /// Informational — used for `var` inference inlay-hint suggestions
57 /// (ADR-006 §1.3) and similar non-actionable feedback.
58 Info,
59 /// Hint — soft suggestions, e.g. style nits or refactor proposals
60 /// surfaced by tooling consumers.
61 Hint,
62}
63
64/// Source location of a diagnostic — a 1-based line/column plus an
65/// absolute byte span.
66///
67/// `file` is the canonical path string; absent for synthetic / REPL
68/// diagnostics.
69#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
70pub struct Location {
71 /// File path; absent for synthetic / REPL / in-memory sources.
72 #[serde(skip_serializing_if = "Option::is_none", default)]
73 pub file: Option<String>,
74 /// 1-based line number.
75 pub line: u32,
76 /// 1-based column number.
77 pub col: u32,
78 /// Absolute byte span `[start, end)` into the source buffer.
79 pub span: [u32; 2],
80}
81
82impl Location {
83 /// Construct a `Location` with a 1-based line/column and an absolute
84 /// byte span.
85 pub fn new(file: Option<String>, line: u32, col: u32, span_start: u32, span_end: u32) -> Self {
86 Self {
87 file,
88 line,
89 col,
90 span: [span_start, span_end],
91 }
92 }
93
94 /// Synthetic location with no file and zero positions — used for
95 /// diagnostics not anchored to source (e.g., compiler-internal
96 /// configuration errors).
97 pub fn synthetic() -> Self {
98 Self::new(None, 0, 0, 0, 0)
99 }
100}
101
102/// A type witness — a concrete value that satisfies (`expected`) or
103/// violates (`found`) the type constraint at the diagnostic site, per
104/// ADR-006 §9.3.
105///
106/// `r#type` is the type's surface name (e.g. `"int"`, `"string"`,
107/// `"Array<int>"`). `witness` is an optional concrete example value.
108///
109/// For simple primitive types (`int`, `number`, `bool`, `string`), the
110/// emitter is encouraged to populate `witness`. For recursive / generic /
111/// trait-bounded types, `witness` may be `None`; the surface name alone
112/// communicates the constraint.
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
114pub struct TypeWitness {
115 /// Surface name of the type (`"int"`, `"Option<string>"`, ...).
116 #[serde(rename = "type")]
117 pub r#type: String,
118 /// Optional concrete value satisfying or violating the constraint.
119 /// Encoded as a JSON value; LLM consumers parse it directly.
120 #[serde(skip_serializing_if = "Option::is_none", default)]
121 pub witness: Option<serde_json::Value>,
122}
123
124impl TypeWitness {
125 /// Construct a witness from a type name and an optional JSON value.
126 pub fn new(type_name: impl Into<String>, witness: Option<serde_json::Value>) -> Self {
127 Self {
128 r#type: type_name.into(),
129 witness,
130 }
131 }
132
133 /// Construct a witness with only a type name and no concrete value.
134 pub fn type_only(type_name: impl Into<String>) -> Self {
135 Self::new(type_name, None)
136 }
137}
138
139/// A suggested fix — a ranked, optionally-diff-bearing proposal that a
140/// renderer (LSP code action, MCP `apply_fix` tool call) can apply.
141///
142/// `confidence` is in `[0.0, 1.0]`. Phase-2 first-session emitters may
143/// produce empty `fixes` lists; richer fix generation is later-session
144/// scope per the dispatch.
145#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
146pub struct SuggestedFix {
147 /// Short user-facing label (e.g. `"convert string to int"`).
148 pub label: String,
149 /// Optional unified-diff fragment. Renderers that can apply diffs
150 /// (LSP, MCP) consume this directly. May be empty.
151 #[serde(skip_serializing_if = "Option::is_none", default)]
152 pub diff: Option<String>,
153 /// Confidence in the fix, `0.0..=1.0`. Renderers may rank by this.
154 pub confidence: f32,
155}
156
157impl SuggestedFix {
158 /// Construct a suggestion with a label and a confidence.
159 pub fn new(label: impl Into<String>, confidence: f32) -> Self {
160 Self {
161 label: label.into(),
162 diff: None,
163 confidence,
164 }
165 }
166
167 /// Attach a unified-diff fragment.
168 pub fn with_diff(mut self, diff: impl Into<String>) -> Self {
169 self.diff = Some(diff.into());
170 self
171 }
172}
173
174/// A token-budgeted context window — the smallest set of source spans
175/// needed to understand the diagnostic, with a token count.
176///
177/// Per ADR-006 §9.5. LLM consumers use this to bound the source they
178/// must include alongside the diagnostic. `tokens` is an estimate against
179/// the cl100k tokenizer (per ADR-006 §13.5 success metric).
180#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
181pub struct ContextWindow {
182 /// Estimated token count for the included spans (cl100k).
183 pub tokens: u32,
184 /// Spans that comprise the context window.
185 pub spans: Vec<ContextSpan>,
186}
187
188impl ContextWindow {
189 /// Construct an empty context window with a token budget of zero.
190 pub fn empty() -> Self {
191 Self {
192 tokens: 0,
193 spans: Vec::new(),
194 }
195 }
196}
197
198/// A span of source — a file plus an inclusive line range.
199#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
200pub struct ContextSpan {
201 /// File path; absent for synthetic / REPL.
202 #[serde(skip_serializing_if = "Option::is_none", default)]
203 pub file: Option<String>,
204 /// Inclusive 1-based line range `[start, end]`.
205 pub lines: [u32; 2],
206}
207
208/// The canonical LSDS diagnostic.
209///
210/// JSON shape matches ADR-006 §9.2. Field names are part of the public
211/// wire format; see crate-level docs for the stability contract.
212#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
213pub struct Diagnostic {
214 /// Stable diagnostic identifier — e.g. `"B0013"`, `"E0100"`. The
215 /// scheme matches the existing `BorrowErrorCode` (`B`-series for
216 /// borrow / lifetime / aliasing) and `ErrorCode` (`E`-series for
217 /// type, parse, semantic) namespaces.
218 pub diagnostic_id: String,
219 /// Severity bucket.
220 pub severity: Severity,
221 /// Primary source location.
222 pub location: Location,
223 /// Expected type at this site, when applicable. `None` for
224 /// non-type-related diagnostics (e.g. parse errors).
225 #[serde(skip_serializing_if = "Option::is_none", default)]
226 pub expected: Option<TypeWitness>,
227 /// Found type at this site, when applicable.
228 #[serde(skip_serializing_if = "Option::is_none", default)]
229 pub found: Option<TypeWitness>,
230 /// Human-readable message body (does NOT include the
231 /// `[B00XX]` prefix — that's the `diagnostic_id` field's job;
232 /// renderers prepend it on output).
233 pub message: String,
234 /// Ranked suggested fixes; may be empty.
235 #[serde(default)]
236 pub fixes: Vec<SuggestedFix>,
237 /// Token-budgeted context window for LLM consumers.
238 #[serde(skip_serializing_if = "Option::is_none", default)]
239 pub context_window: Option<ContextWindow>,
240 /// Citation pointing at the binding spec section that governs this
241 /// diagnostic. E.g. `"ADR-006-§1.1"` or `"ADR-005-§4"`.
242 #[serde(skip_serializing_if = "Option::is_none", default)]
243 pub rule: Option<String>,
244 /// Auxiliary notes. Each note has its own location (e.g. "borrow
245 /// originates here") so renderers can present them as related-info
246 /// callouts.
247 #[serde(default, skip_serializing_if = "Vec::is_empty")]
248 pub notes: Vec<DiagnosticNote>,
249}
250
251/// Auxiliary note attached to a diagnostic — e.g. "borrow originates
252/// here", "binding declared here". Mirrors the existing `ErrorNote`
253/// structure used by `ShapeError::SemanticError.location.notes`.
254#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
255pub struct DiagnosticNote {
256 /// Note message.
257 pub message: String,
258 /// Location the note refers to; `None` for synthetic notes.
259 #[serde(skip_serializing_if = "Option::is_none", default)]
260 pub location: Option<Location>,
261}
262
263impl DiagnosticNote {
264 /// Construct a note with a message and an optional location.
265 pub fn new(message: impl Into<String>, location: Option<Location>) -> Self {
266 Self {
267 message: message.into(),
268 location,
269 }
270 }
271}
272
273/// Builder for [`Diagnostic`]. Use this rather than struct literal at
274/// emission sites so future schema evolution doesn't ripple through.
275#[derive(Debug)]
276pub struct DiagnosticBuilder {
277 diagnostic_id: String,
278 severity: Severity,
279 location: Location,
280 expected: Option<TypeWitness>,
281 found: Option<TypeWitness>,
282 message: String,
283 fixes: Vec<SuggestedFix>,
284 context_window: Option<ContextWindow>,
285 rule: Option<String>,
286 notes: Vec<DiagnosticNote>,
287}
288
289impl DiagnosticBuilder {
290 /// Start building a diagnostic with the required minimum (id,
291 /// severity, location, message).
292 pub fn new(
293 diagnostic_id: impl Into<String>,
294 severity: Severity,
295 location: Location,
296 message: impl Into<String>,
297 ) -> Self {
298 Self {
299 diagnostic_id: diagnostic_id.into(),
300 severity,
301 location,
302 expected: None,
303 found: None,
304 message: message.into(),
305 fixes: Vec::new(),
306 context_window: None,
307 rule: None,
308 notes: Vec::new(),
309 }
310 }
311
312 /// Attach an `expected` type witness.
313 pub fn expected(mut self, witness: TypeWitness) -> Self {
314 self.expected = Some(witness);
315 self
316 }
317
318 /// Attach a `found` type witness.
319 pub fn found(mut self, witness: TypeWitness) -> Self {
320 self.found = Some(witness);
321 self
322 }
323
324 /// Append a suggested fix.
325 pub fn with_fix(mut self, fix: SuggestedFix) -> Self {
326 self.fixes.push(fix);
327 self
328 }
329
330 /// Attach a context window.
331 pub fn context_window(mut self, window: ContextWindow) -> Self {
332 self.context_window = Some(window);
333 self
334 }
335
336 /// Attach a rule citation (`"ADR-006-§1.1"` etc.).
337 pub fn rule(mut self, rule: impl Into<String>) -> Self {
338 self.rule = Some(rule.into());
339 self
340 }
341
342 /// Append an auxiliary note.
343 pub fn with_note(mut self, note: DiagnosticNote) -> Self {
344 self.notes.push(note);
345 self
346 }
347
348 /// Finalize.
349 pub fn build(self) -> Diagnostic {
350 Diagnostic {
351 diagnostic_id: self.diagnostic_id,
352 severity: self.severity,
353 location: self.location,
354 expected: self.expected,
355 found: self.found,
356 message: self.message,
357 fixes: self.fixes,
358 context_window: self.context_window,
359 rule: self.rule,
360 notes: self.notes,
361 }
362 }
363}
364
365#[cfg(test)]
366mod tests {
367 use super::*;
368 use serde_json::json;
369
370 #[test]
371 fn schema_version_is_one() {
372 assert_eq!(SCHEMA_VERSION, 1);
373 }
374
375 #[test]
376 fn diagnostic_round_trips_through_json() {
377 let diag = DiagnosticBuilder::new(
378 "B0013",
379 Severity::Error,
380 Location::new(Some("src/main.shape".into()), 12, 4, 102, 145),
381 "expected int, found string",
382 )
383 .expected(TypeWitness::new("int", Some(json!(42))))
384 .found(TypeWitness::new("string", Some(json!("hello"))))
385 .with_fix(
386 SuggestedFix::new("convert string to int", 0.85)
387 .with_diff("let x: int = parse_int(value)?"),
388 )
389 .rule("ADR-006-§1.1")
390 .build();
391
392 let s = serde_json::to_string(&diag).expect("serialize");
393 let back: Diagnostic = serde_json::from_str(&s).expect("deserialize");
394 assert_eq!(diag, back);
395 }
396
397 #[test]
398 fn omitted_optional_fields_round_trip() {
399 let diag = DiagnosticBuilder::new(
400 "E0100",
401 Severity::Error,
402 Location::synthetic(),
403 "type mismatch",
404 )
405 .build();
406
407 let s = serde_json::to_string(&diag).expect("serialize");
408 // No expected/found/fixes/context_window/rule/notes appear when empty.
409 assert!(!s.contains("\"expected\""));
410 assert!(!s.contains("\"found\""));
411 // `fixes` is `default` (empty Vec) — not skipped, but encoded as `[]`.
412 assert!(s.contains("\"fixes\":[]"));
413 assert!(!s.contains("\"context_window\""));
414 assert!(!s.contains("\"rule\""));
415 assert!(!s.contains("\"notes\""));
416
417 let back: Diagnostic = serde_json::from_str(&s).expect("deserialize");
418 assert_eq!(diag, back);
419 }
420
421 #[test]
422 fn severity_serializes_lowercase() {
423 let s = serde_json::to_string(&Severity::Error).unwrap();
424 assert_eq!(s, "\"error\"");
425 let s = serde_json::to_string(&Severity::Warning).unwrap();
426 assert_eq!(s, "\"warning\"");
427 let s = serde_json::to_string(&Severity::Info).unwrap();
428 assert_eq!(s, "\"info\"");
429 let s = serde_json::to_string(&Severity::Hint).unwrap();
430 assert_eq!(s, "\"hint\"");
431 }
432}