facet_kdl/
parser.rs

1//! KDL parser implementation using FormatParser trait.
2//!
3//! KDL documents consist of nodes, where each node has:
4//! - A name (identifier)
5//! - Positional arguments (values after the name)
6//! - Properties (key=value pairs)
7//! - Child nodes (inside braces)
8//!
9//! This maps to the FormatParser model as:
10//! - Node → StructStart(Element) ... StructEnd
11//! - Arguments → FieldKey(Argument) + Scalar
12//! - Properties → FieldKey(Property) + Scalar
13//! - Children → FieldKey(Child) + nested node events
14
15extern crate alloc;
16
17use alloc::borrow::Cow;
18use alloc::string::String;
19use alloc::vec::Vec;
20use core::fmt;
21
22use facet_core::Shape;
23use facet_format::{
24    ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
25    ProbeStream, ScalarValue,
26};
27use miette::{LabeledSpan, NamedSource};
28
29/// KDL parser that converts KDL documents to FormatParser events.
30pub struct KdlParser<'de> {
31    events: Vec<ParseEvent<'de>>,
32    /// Source spans for each event (parallel to events vec).
33    spans: Vec<facet_reflect::Span>,
34    idx: usize,
35    pending_error: Option<KdlError>,
36}
37
38impl<'de> KdlParser<'de> {
39    /// Create a new KDL parser from input string.
40    pub fn new(input: &'de str) -> Self {
41        match build_events(input) {
42            Ok((events, spans)) => Self {
43                events,
44                spans,
45                idx: 0,
46                pending_error: None,
47            },
48            Err(err) => Self {
49                events: Vec::new(),
50                spans: Vec::new(),
51                idx: 0,
52                pending_error: Some(err),
53            },
54        }
55    }
56}
57
58/// Error type for KDL parsing.
59///
60/// This error type preserves the original `kdl::KdlError` for parse errors,
61/// allowing full miette diagnostic information to be displayed including
62/// source spans, labels, and help text.
63#[derive(Debug, Clone)]
64pub enum KdlError {
65    /// Parse error from the kdl crate (preserved for full diagnostics).
66    ParseError(kdl::KdlError),
67    /// Unexpected end of input.
68    UnexpectedEof,
69    /// Invalid KDL structure.
70    InvalidStructure(String),
71    /// Invalid UTF-8 in input.
72    InvalidUtf8(core::str::Utf8Error),
73}
74
75impl fmt::Display for KdlError {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        match self {
78            KdlError::ParseError(e) => write!(f, "{}", e),
79            KdlError::UnexpectedEof => write!(f, "Unexpected end of KDL"),
80            KdlError::InvalidStructure(msg) => write!(f, "Invalid KDL structure: {}", msg),
81            KdlError::InvalidUtf8(e) => write!(f, "Invalid UTF-8: {}", e),
82        }
83    }
84}
85
86impl std::error::Error for KdlError {
87    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
88        match self {
89            KdlError::ParseError(e) => Some(e),
90            KdlError::InvalidUtf8(e) => Some(e),
91            _ => None,
92        }
93    }
94}
95
96impl miette::Diagnostic for KdlError {
97    fn code<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
98        match self {
99            KdlError::ParseError(e) => e.code(),
100            _ => None,
101        }
102    }
103
104    fn severity(&self) -> Option<miette::Severity> {
105        match self {
106            KdlError::ParseError(e) => e.severity(),
107            _ => Some(miette::Severity::Error),
108        }
109    }
110
111    fn help<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
112        match self {
113            KdlError::ParseError(e) => e.help(),
114            _ => None,
115        }
116    }
117
118    fn url<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
119        match self {
120            KdlError::ParseError(e) => e.url(),
121            _ => None,
122        }
123    }
124
125    fn source_code(&self) -> Option<&dyn miette::SourceCode> {
126        match self {
127            KdlError::ParseError(e) => e.source_code(),
128            _ => None,
129        }
130    }
131
132    fn labels(&self) -> Option<Box<dyn Iterator<Item = miette::LabeledSpan> + '_>> {
133        match self {
134            KdlError::ParseError(e) => e.labels(),
135            _ => None,
136        }
137    }
138
139    fn related<'a>(&'a self) -> Option<Box<dyn Iterator<Item = &'a dyn miette::Diagnostic> + 'a>> {
140        match self {
141            KdlError::ParseError(e) => e.related(),
142            _ => None,
143        }
144    }
145
146    fn diagnostic_source(&self) -> Option<&dyn miette::Diagnostic> {
147        match self {
148            KdlError::ParseError(e) => e.diagnostic_source(),
149            _ => None,
150        }
151    }
152}
153
154// Type context diagnostic is provided by facet_path::PathDiagnostic
155
156/// A KDL deserialization error with source code context for rich diagnostics.
157///
158/// This wrapper type carries the original input alongside the error, enabling
159/// miette to display the source with highlighted error locations. It also
160/// optionally carries the target Rust type to show what structure was expected.
161#[derive(Debug)]
162pub struct KdlDeserializeError {
163    /// The underlying deserialization error.
164    pub inner: facet_format::DeserializeError<KdlError>,
165    /// The original KDL source input (named for syntax highlighting).
166    pub source_input: NamedSource<String>,
167    /// The target type we were deserializing into (for help messages).
168    target_shape: Option<&'static Shape>,
169    /// Type context diagnostic showing the target Rust type (computed once at construction).
170    type_context: Option<Box<facet_path::pretty::PathDiagnostic>>,
171}
172
173impl KdlDeserializeError {
174    /// Create a new KdlDeserializeError with the target shape for better diagnostics.
175    pub fn new(
176        inner: facet_format::DeserializeError<KdlError>,
177        source_input: String,
178        target_shape: Option<&'static Shape>,
179    ) -> Self {
180        // Compute type context upfront (only for non-parse errors)
181        let type_context = Self::compute_type_context(&inner, target_shape);
182
183        Self {
184            inner,
185            // Name with .kdl extension so miette-arborium can syntax highlight
186            source_input: NamedSource::new("input.kdl", source_input),
187            target_shape,
188            type_context,
189        }
190    }
191
192    /// Compute the type context diagnostic if applicable.
193    fn compute_type_context(
194        inner: &facet_format::DeserializeError<KdlError>,
195        target_shape: Option<&'static Shape>,
196    ) -> Option<Box<facet_path::pretty::PathDiagnostic>> {
197        // Don't show type context for parse errors - syntax errors aren't about types
198        if matches!(
199            inner,
200            facet_format::DeserializeError::Parser(KdlError::ParseError(_))
201        ) {
202            return None;
203        }
204
205        let shape = target_shape?;
206
207        // Get the path from the inner error (if available)
208        let path = inner.path().cloned().unwrap_or_else(facet_path::Path::new);
209
210        // For MissingField errors, extract the field name so we can highlight
211        // the specific missing field in the type definition
212        let leaf_field = match inner {
213            facet_format::DeserializeError::MissingField { field, .. } => Some(*field),
214            _ => None,
215        };
216
217        // Use facet-path's PathDiagnostic to show the type with the error location highlighted
218        Some(Box::new(path.to_diagnostic(
219            shape,
220            alloc::format!("expected type `{}`", shape.type_identifier),
221            None,
222            leaf_field,
223        )))
224    }
225
226    /// Get the inner kdl::KdlError if this is a parse error.
227    fn get_kdl_parse_error(&self) -> Option<&kdl::KdlError> {
228        match &self.inner {
229            facet_format::DeserializeError::Parser(KdlError::ParseError(e)) => Some(e),
230            _ => None,
231        }
232    }
233
234    /// Get the type context diagnostic showing the target Rust type.
235    fn get_type_context(&self) -> Option<&facet_path::pretty::PathDiagnostic> {
236        self.type_context.as_deref()
237    }
238}
239
240impl fmt::Display for KdlDeserializeError {
241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
242        write!(f, "{}", self.inner)
243    }
244}
245
246impl std::error::Error for KdlDeserializeError {
247    // Don't return inner as source - we're a wrapper providing source code context,
248    // not a cause chain. Returning inner here causes duplicate error messages since
249    // our Display just delegates to inner.
250}
251
252impl miette::Diagnostic for KdlDeserializeError {
253    fn code<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
254        self.inner.code()
255    }
256
257    fn severity(&self) -> Option<miette::Severity> {
258        self.inner.severity()
259    }
260
261    fn help<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
262        // Check for "expected scalar, got struct" which suggests property vs child mismatch
263        if let facet_format::DeserializeError::ExpectedScalarGotStruct {
264            path: Some(path), ..
265        } = &self.inner
266            && let Some(target_shape) = self.target_shape
267            && let Some(field) = path.resolve_leaf_field(target_shape)
268            && field.get_attr(Some("kdl"), "property").is_some()
269        {
270            return Some(Box::new(alloc::format!(
271                "field `{}` is marked with `#[facet(kdl::property)]`, so use `{}=\"value\"` syntax instead of `{} \"value\"`",
272                field.name,
273                field.name,
274                field.name
275            )));
276        }
277        self.inner.help()
278    }
279
280    fn url<'a>(&'a self) -> Option<Box<dyn fmt::Display + 'a>> {
281        self.inner.url()
282    }
283
284    fn source_code(&self) -> Option<&dyn miette::SourceCode> {
285        // Always use our named source (with .kdl extension for syntax highlighting)
286        Some(&self.source_input)
287    }
288
289    fn labels(&self) -> Option<Box<dyn Iterator<Item = LabeledSpan> + '_>> {
290        // For parse errors, extract labels from the kdl diagnostics
291        // (kdl::KdlError stores its diagnostics in related(), not labels())
292        if let Some(kdl_error) = self.get_kdl_parse_error() {
293            // Collect all labels from all diagnostics
294            let labels: Vec<LabeledSpan> = kdl_error
295                .diagnostics
296                .iter()
297                .filter_map(|diag| diag.labels())
298                .flatten()
299                .collect();
300            if !labels.is_empty() {
301                return Some(Box::new(labels.into_iter()));
302            }
303        }
304        // For other errors, forward to inner
305        self.inner.labels()
306    }
307
308    fn related<'a>(&'a self) -> Option<Box<dyn Iterator<Item = &'a dyn miette::Diagnostic> + 'a>> {
309        // Show the target Rust type as a related diagnostic (when available)
310        // Don't forward to inner.related() - we want all rendering to use our NamedSource
311        self.get_type_context().map(|type_ctx| {
312            Box::new(core::iter::once(type_ctx as &dyn miette::Diagnostic))
313                as Box<dyn Iterator<Item = &'a dyn miette::Diagnostic> + 'a>
314        })
315    }
316
317    fn diagnostic_source(&self) -> Option<&dyn miette::Diagnostic> {
318        // Don't forward to inner - we want all rendering to use our NamedSource
319        // for consistent file names and syntax highlighting
320        None
321    }
322}
323
324impl<'de> FormatParser<'de> for KdlParser<'de> {
325    type Error = KdlError;
326    type Probe<'a>
327        = KdlProbe<'de>
328    where
329        Self: 'a;
330
331    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
332        if let Some(err) = &self.pending_error {
333            return Err(err.clone());
334        }
335        if self.idx >= self.events.len() {
336            return Ok(None);
337        }
338        let event = self.events[self.idx].clone();
339        self.idx += 1;
340        Ok(Some(event))
341    }
342
343    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
344        if let Some(err) = &self.pending_error {
345            return Err(err.clone());
346        }
347        Ok(self.events.get(self.idx).cloned())
348    }
349
350    fn skip_value(&mut self) -> Result<(), Self::Error> {
351        let mut depth = 0usize;
352        let mut pending_field_value = false;
353
354        loop {
355            let event = self.next_event()?.ok_or(KdlError::UnexpectedEof)?;
356            match event {
357                ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
358                    pending_field_value = false;
359                    depth += 1;
360                }
361                ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
362                    if depth == 0 {
363                        break;
364                    } else {
365                        depth -= 1;
366                        if depth == 0 && !pending_field_value {
367                            break;
368                        }
369                    }
370                }
371                ParseEvent::Scalar(_) | ParseEvent::VariantTag(_) => {
372                    if depth == 0 && !pending_field_value {
373                        break;
374                    }
375                    pending_field_value = false;
376                }
377                ParseEvent::FieldKey(_) | ParseEvent::OrderedField => {
378                    pending_field_value = true;
379                }
380            }
381        }
382        Ok(())
383    }
384
385    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
386        let evidence = self.build_probe();
387        Ok(KdlProbe { evidence, idx: 0 })
388    }
389
390    fn current_span(&self) -> Option<facet_reflect::Span> {
391        // Return the span of the most recently consumed event (idx was incremented after consuming)
392        if self.idx > 0 && self.idx <= self.spans.len() {
393            Some(self.spans[self.idx - 1])
394        } else {
395            None
396        }
397    }
398}
399
400impl<'de> KdlParser<'de> {
401    /// Build field evidence by looking ahead at remaining events.
402    fn build_probe(&self) -> Vec<FieldEvidence<'de>> {
403        let mut evidence = Vec::new();
404
405        if self.idx >= self.events.len() {
406            return evidence;
407        }
408
409        // Check if we're about to read a struct
410        if !matches!(
411            self.events.get(self.idx),
412            Some(ParseEvent::StructStart(ContainerKind::Element))
413        ) {
414            return evidence;
415        }
416
417        // Scan the struct's fields
418        let mut i = self.idx + 1;
419        let mut depth = 0usize;
420
421        while i < self.events.len() {
422            match &self.events[i] {
423                ParseEvent::StructStart(_) | ParseEvent::SequenceStart(_) => {
424                    depth += 1;
425                    i += 1;
426                }
427                ParseEvent::StructEnd | ParseEvent::SequenceEnd => {
428                    if depth == 0 {
429                        break;
430                    }
431                    depth -= 1;
432                    i += 1;
433                }
434                ParseEvent::FieldKey(key) if depth == 0 => {
435                    // Top-level field - check if next is scalar
436                    let scalar_value = if let Some(ParseEvent::Scalar(sv)) = self.events.get(i + 1)
437                    {
438                        Some(sv.clone())
439                    } else {
440                        None
441                    };
442
443                    if let Some(sv) = scalar_value {
444                        evidence.push(FieldEvidence::with_scalar_value(
445                            key.name.clone(),
446                            key.location,
447                            None,
448                            sv,
449                            key.namespace.clone(),
450                        ));
451                    } else {
452                        evidence.push(FieldEvidence::new(
453                            key.name.clone(),
454                            key.location,
455                            None,
456                            key.namespace.clone(),
457                        ));
458                    }
459                    i += 1;
460                }
461                _ => {
462                    i += 1;
463                }
464            }
465        }
466
467        evidence
468    }
469}
470
471/// Probe stream for KDL parser.
472pub struct KdlProbe<'de> {
473    evidence: Vec<FieldEvidence<'de>>,
474    idx: usize,
475}
476
477impl<'de> ProbeStream<'de> for KdlProbe<'de> {
478    type Error = KdlError;
479
480    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
481        if self.idx >= self.evidence.len() {
482            Ok(None)
483        } else {
484            let ev = self.evidence[self.idx].clone();
485            self.idx += 1;
486            Ok(Some(ev))
487        }
488    }
489}
490
491/// A buffer for events and their corresponding source spans.
492struct EventBuffer<'de> {
493    events: Vec<ParseEvent<'de>>,
494    spans: Vec<facet_reflect::Span>,
495}
496
497impl<'de> EventBuffer<'de> {
498    fn new() -> Self {
499        Self {
500            events: Vec::new(),
501            spans: Vec::new(),
502        }
503    }
504
505    /// Push an event with its source span.
506    fn push(&mut self, event: ParseEvent<'de>, span: miette::SourceSpan) {
507        self.events.push(event);
508        self.spans.push(span.into());
509    }
510
511    fn into_parts(self) -> (Vec<ParseEvent<'de>>, Vec<facet_reflect::Span>) {
512        (self.events, self.spans)
513    }
514}
515
516/// Build ParseEvents from KDL input, along with source spans for each event.
517fn build_events<'de>(
518    input: &str,
519) -> Result<(Vec<ParseEvent<'de>>, Vec<facet_reflect::Span>), KdlError> {
520    let doc: kdl::KdlDocument = input.parse().map_err(KdlError::ParseError)?;
521
522    let mut buf = EventBuffer::new();
523
524    // A KDL document is a sequence of nodes at the root level.
525    // We always wrap root nodes in a document struct so that the schema (Rust types
526    // with kdl::* attributes) determines how the document is interpreted, not the
527    // document structure itself.
528    //
529    // This means:
530    // - `kdl::children` fields will receive root nodes that match via singularization
531    // - `kdl::child` fields will receive a specific named root node
532    // - For a struct that IS the root node, use a wrapper: `config { ... }`
533    let nodes = doc.nodes();
534
535    if nodes.is_empty() {
536        // Empty document - emit empty struct with document span
537        buf.push(ParseEvent::StructStart(ContainerKind::Element), doc.span());
538        buf.push(ParseEvent::StructEnd, doc.span());
539    } else {
540        // Wrap all root nodes in a document struct
541        // Each node becomes a child field
542        buf.push(ParseEvent::StructStart(ContainerKind::Element), doc.span());
543        for node in nodes {
544            let key = FieldKey::new(
545                Cow::Owned(node.name().value().to_string()),
546                FieldLocationHint::Child,
547            );
548            buf.push(ParseEvent::FieldKey(key), node.name().span());
549            emit_node_events(node, &mut buf);
550        }
551        buf.push(ParseEvent::StructEnd, doc.span());
552    }
553
554    Ok(buf.into_parts())
555}
556
557/// Emit ParseEvents for a single KDL node.
558///
559/// Every KDL node is emitted as a struct. The node name is emitted as `_node_name`,
560/// arguments become `_arg` (single) or `_arguments` (sequence for multiple),
561/// properties become named fields with `FieldLocationHint::Property`, and children
562/// become fields with `FieldLocationHint::Child`.
563fn emit_node_events<'de>(node: &kdl::KdlNode, buf: &mut EventBuffer<'de>) {
564    let entries = node.entries();
565    let children = node.children();
566    let node_span = node.span();
567
568    let args: Vec<_> = entries.iter().filter(|e| e.name().is_none()).collect();
569    let props: Vec<_> = entries.iter().filter(|e| e.name().is_some()).collect();
570    let has_children = children.is_some_and(|c| !c.nodes().is_empty());
571
572    // Case 1: Node with no entries and no children → emit empty struct
573    // Still emit node name for kdl::node_name support
574    if args.is_empty() && props.is_empty() && !has_children {
575        buf.push(ParseEvent::StructStart(ContainerKind::Element), node_span);
576        // Emit node name for kdl::node_name fields
577        let node_name_key = FieldKey::new(Cow::Borrowed("_node_name"), FieldLocationHint::Argument);
578        buf.push(ParseEvent::FieldKey(node_name_key), node.name().span());
579        buf.push(
580            ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
581                node.name().value().to_string(),
582            ))),
583            node.name().span(),
584        );
585        buf.push(ParseEvent::StructEnd, node_span);
586        return;
587    }
588
589    // Case 2: Complex node → emit as struct with fields
590    buf.push(ParseEvent::StructStart(ContainerKind::Element), node_span);
591
592    // Emit node name first for kdl::node_name fields
593    let node_name_key = FieldKey::new(Cow::Borrowed("_node_name"), FieldLocationHint::Argument);
594    buf.push(ParseEvent::FieldKey(node_name_key), node.name().span());
595    buf.push(
596        ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
597            node.name().value().to_string(),
598        ))),
599        node.name().span(),
600    );
601
602    // Emit positional arguments
603    // - Single argument: emit as `_arg` scalar
604    // - Multiple arguments: emit as `_arguments` sequence AND as individual `_arg` scalars
605    //   (the sequence is for kdl::arguments, individual is for backwards compat)
606    if !args.is_empty() {
607        // Always emit _arguments as a sequence for kdl::arguments (plural) support
608        let args_key = FieldKey::new(Cow::Borrowed("_arguments"), FieldLocationHint::Argument);
609        // Use span of first argument for the sequence key
610        buf.push(ParseEvent::FieldKey(args_key), args[0].span());
611        buf.push(
612            ParseEvent::SequenceStart(ContainerKind::Element),
613            args[0].span(),
614        );
615        for entry in &args {
616            emit_kdl_value(entry, buf);
617        }
618        // Use span of last argument for sequence end
619        buf.push(
620            ParseEvent::SequenceEnd,
621            args.last().map(|e| e.span()).unwrap_or(node_span),
622        );
623
624        // Also emit individual arguments for kdl::argument (singular) support
625        if args.len() == 1 {
626            let key = FieldKey::new(Cow::Borrowed("_arg"), FieldLocationHint::Argument);
627            buf.push(ParseEvent::FieldKey(key), args[0].span());
628            emit_kdl_value(args[0], buf);
629        } else {
630            for (idx, entry) in args.iter().enumerate() {
631                let key = FieldKey::new(Cow::Owned(idx.to_string()), FieldLocationHint::Argument);
632                buf.push(ParseEvent::FieldKey(key), entry.span());
633                emit_kdl_value(entry, buf);
634            }
635        }
636    }
637
638    // Emit properties
639    for entry in &props {
640        let name = entry.name().unwrap();
641        let key = FieldKey::new(
642            Cow::Owned(name.value().to_string()),
643            FieldLocationHint::Property,
644        );
645        buf.push(ParseEvent::FieldKey(key), name.span());
646        emit_kdl_value(entry, buf);
647    }
648
649    // Emit children - mark them as child nodes
650    if let Some(children_doc) = children {
651        for child in children_doc.nodes() {
652            let key = FieldKey::new(
653                Cow::Owned(child.name().value().to_string()),
654                FieldLocationHint::Child,
655            );
656            buf.push(ParseEvent::FieldKey(key), child.name().span());
657            emit_node_events(child, buf);
658        }
659    }
660
661    buf.push(ParseEvent::StructEnd, node_span);
662}
663
664/// Emit a KDL entry's value as a ParseEvent scalar, with source span.
665fn emit_kdl_value<'de>(entry: &kdl::KdlEntry, buf: &mut EventBuffer<'de>) {
666    let value = entry.value();
667    let span = entry.span();
668    let scalar = match value {
669        kdl::KdlValue::Null => ScalarValue::Null,
670        kdl::KdlValue::Bool(b) => ScalarValue::Bool(*b),
671        kdl::KdlValue::Integer(n) => {
672            // KdlValue::Integer contains an i128 directly
673            let n: i128 = *n;
674            if let Ok(i) = i64::try_from(n) {
675                if i >= 0 {
676                    ScalarValue::U64(i as u64)
677                } else {
678                    ScalarValue::I64(i)
679                }
680            } else if let Ok(u) = u64::try_from(n) {
681                ScalarValue::U64(u)
682            } else {
683                ScalarValue::I128(n)
684            }
685        }
686        kdl::KdlValue::Float(f) => ScalarValue::F64(*f),
687        kdl::KdlValue::String(s) => ScalarValue::Str(Cow::Owned(s.clone())),
688    };
689    buf.push(ParseEvent::Scalar(scalar), span);
690}