facet_yaml/
parser.rs

1//! Streaming YAML parser implementing the FormatParser trait.
2//!
3//! This parser uses saphyr-parser's event-based API and translates YAML events
4//! into the `ParseEvent` format expected by `facet-format`'s deserializer.
5
6extern crate alloc;
7
8use alloc::{
9    borrow::Cow,
10    format,
11    string::{String, ToString},
12    vec::Vec,
13};
14
15use facet_format::{
16    ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
17    ProbeStream, ScalarValue,
18};
19use saphyr_parser::{Event, Parser, ScalarStyle, Span as SaphyrSpan, SpannedEventReceiver};
20
21use crate::error::{SpanExt, YamlError, YamlErrorKind};
22use facet_reflect::Span;
23
24// ============================================================================
25// Event wrapper with owned strings
26// ============================================================================
27
28/// A YAML event with owned string data and span information.
29/// We convert from saphyr's borrowed events to owned so we can store them.
30#[derive(Debug, Clone)]
31#[allow(dead_code)] // Some variants/fields reserved for future anchor/alias support
32enum OwnedEvent {
33    StreamStart,
34    StreamEnd,
35    DocumentStart,
36    DocumentEnd,
37    Alias(usize),
38    Scalar {
39        value: String,
40        style: ScalarStyle,
41        anchor: usize,
42    },
43    SequenceStart {
44        anchor: usize,
45    },
46    SequenceEnd,
47    MappingStart {
48        anchor: usize,
49    },
50    MappingEnd,
51}
52
53#[derive(Debug, Clone)]
54struct SpannedEvent {
55    event: OwnedEvent,
56    span: SaphyrSpan,
57}
58
59// ============================================================================
60// Event Collector
61// ============================================================================
62
63/// Collects all events from the parser upfront.
64/// This is necessary because saphyr-parser doesn't support seeking/rewinding,
65/// but we need to replay events for flatten deserialization.
66struct EventCollector {
67    events: Vec<SpannedEvent>,
68}
69
70impl EventCollector {
71    fn new() -> Self {
72        Self { events: Vec::new() }
73    }
74}
75
76impl SpannedEventReceiver<'_> for EventCollector {
77    fn on_event(&mut self, event: Event<'_>, span: SaphyrSpan) {
78        let owned = match event {
79            Event::StreamStart => OwnedEvent::StreamStart,
80            Event::StreamEnd => OwnedEvent::StreamEnd,
81            Event::DocumentStart(_) => OwnedEvent::DocumentStart,
82            Event::DocumentEnd => OwnedEvent::DocumentEnd,
83            Event::Alias(id) => OwnedEvent::Alias(id),
84            Event::Scalar(value, style, anchor, _tag) => OwnedEvent::Scalar {
85                value: value.into_owned(),
86                style,
87                anchor,
88            },
89            Event::SequenceStart(anchor, _tag) => OwnedEvent::SequenceStart { anchor },
90            Event::SequenceEnd => OwnedEvent::SequenceEnd,
91            Event::MappingStart(anchor, _tag) => OwnedEvent::MappingStart { anchor },
92            Event::MappingEnd => OwnedEvent::MappingEnd,
93            Event::Nothing => return, // Skip internal events
94        };
95        log::trace!("YAML event: {owned:?}");
96        self.events.push(SpannedEvent { event: owned, span });
97    }
98}
99
100// ============================================================================
101// Parser State
102// ============================================================================
103
104/// Context for tracking where we are in the YAML structure.
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106enum ContextState {
107    /// Inside a mapping, expecting a key or end
108    MappingKey,
109    /// Inside a mapping, expecting a value
110    MappingValue,
111    /// Inside a sequence, expecting a value or end
112    SequenceValue,
113}
114
115// ============================================================================
116// YAML Parser
117// ============================================================================
118
119/// Streaming YAML parser backed by `saphyr-parser`.
120///
121/// This parser translates YAML's event stream into the `ParseEvent` format
122/// expected by `facet-format`'s deserializer.
123pub struct YamlParser<'de> {
124    /// Original input string.
125    input: &'de str,
126    /// Pre-parsed events from saphyr-parser.
127    events: Vec<SpannedEvent>,
128    /// Current position in the event stream.
129    pos: usize,
130    /// Stack tracking nested containers.
131    stack: Vec<ContextState>,
132    /// Cached event for peek_event().
133    event_peek: Option<ParseEvent<'de>>,
134    /// Whether we've consumed the stream/document start events.
135    started: bool,
136    /// The span of the most recently consumed event (for error reporting).
137    last_span: Option<Span>,
138}
139
140impl<'de> YamlParser<'de> {
141    /// Create a new YAML parser from a string slice.
142    pub fn new(input: &'de str) -> Result<Self, YamlError> {
143        let mut collector = EventCollector::new();
144        Parser::new_from_str(input)
145            .load(&mut collector, true)
146            .map_err(|e| {
147                YamlError::without_span(YamlErrorKind::Parse(format!("{e}"))).with_source(input)
148            })?;
149
150        Ok(Self {
151            input,
152            events: collector.events,
153            pos: 0,
154            stack: Vec::new(),
155            event_peek: None,
156            started: false,
157            last_span: None,
158        })
159    }
160
161    /// Get the original input string.
162    pub fn input(&self) -> &'de str {
163        self.input
164    }
165
166    /// Consume and return the current event.
167    fn next_raw(&mut self) -> Option<SpannedEvent> {
168        if self.pos < self.events.len() {
169            let event = self.events[self.pos].clone();
170            self.last_span = Some(Span::from_saphyr_span(&event.span));
171            self.pos += 1;
172            Some(event)
173        } else {
174            None
175        }
176    }
177
178    /// Skip stream/document start events.
179    fn skip_preamble(&mut self) {
180        while self.pos < self.events.len() {
181            match &self.events[self.pos].event {
182                OwnedEvent::StreamStart | OwnedEvent::DocumentStart => {
183                    self.pos += 1;
184                }
185                _ => break,
186            }
187        }
188        self.started = true;
189    }
190
191    /// Convert a YAML scalar to a ScalarValue.
192    fn scalar_to_value(&self, value: &str, style: ScalarStyle) -> ScalarValue<'de> {
193        // If quoted, always treat as string
194        if matches!(style, ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted) {
195            return ScalarValue::Str(Cow::Owned(value.to_string()));
196        }
197
198        // Check for null
199        if is_yaml_null(value) {
200            return ScalarValue::Null;
201        }
202
203        // Check for boolean
204        if let Some(b) = parse_yaml_bool(value) {
205            return ScalarValue::Bool(b);
206        }
207
208        // Check for integer
209        if let Ok(n) = value.parse::<i64>() {
210            return ScalarValue::I64(n);
211        }
212
213        // Check for float
214        if let Ok(f) = value.parse::<f64>() {
215            return ScalarValue::F64(f);
216        }
217
218        // Default to string
219        ScalarValue::Str(Cow::Owned(value.to_string()))
220    }
221
222    /// Produce the next parse event.
223    fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, YamlError> {
224        // Skip preamble if we haven't started
225        if !self.started {
226            self.skip_preamble();
227        }
228
229        // Check current context to know what to expect
230        let context = self.stack.last().copied();
231
232        if self.pos >= self.events.len() {
233            // EOF - we're done
234            return Ok(None);
235        }
236
237        // Clone the event to avoid borrow issues
238        let raw_event = self.events[self.pos].clone();
239
240        match (&raw_event.event, context) {
241            // Stream/Document end - skip and continue
242            (OwnedEvent::StreamEnd, _) | (OwnedEvent::DocumentEnd, _) => {
243                self.next_raw();
244                self.produce_event()
245            }
246
247            // Mapping start
248            (OwnedEvent::MappingStart { .. }, _) => {
249                self.next_raw();
250                self.stack.push(ContextState::MappingKey);
251                Ok(Some(ParseEvent::StructStart(ContainerKind::Object)))
252            }
253
254            // Mapping end
255            (OwnedEvent::MappingEnd, _) => {
256                self.next_raw();
257                self.stack.pop();
258                Ok(Some(ParseEvent::StructEnd))
259            }
260
261            // Sequence start
262            (OwnedEvent::SequenceStart { .. }, _) => {
263                self.next_raw();
264                self.stack.push(ContextState::SequenceValue);
265                Ok(Some(ParseEvent::SequenceStart(ContainerKind::Array)))
266            }
267
268            // Sequence end
269            (OwnedEvent::SequenceEnd, _) => {
270                self.next_raw();
271                self.stack.pop();
272                Ok(Some(ParseEvent::SequenceEnd))
273            }
274
275            // Scalar in mapping key position -> emit FieldKey
276            (OwnedEvent::Scalar { value, .. }, Some(ContextState::MappingKey)) => {
277                let key = value.clone();
278                self.next_raw();
279                // Transition to expecting value
280                if let Some(ctx) = self.stack.last_mut() {
281                    *ctx = ContextState::MappingValue;
282                }
283                Ok(Some(ParseEvent::FieldKey(FieldKey::new(
284                    Cow::Owned(key),
285                    FieldLocationHint::KeyValue,
286                ))))
287            }
288
289            // Scalar in mapping value position -> emit Scalar and transition back to key
290            (OwnedEvent::Scalar { value, style, .. }, Some(ContextState::MappingValue)) => {
291                let value = value.clone();
292                let style = *style;
293                self.next_raw();
294                // Transition back to expecting key
295                if let Some(ctx) = self.stack.last_mut() {
296                    *ctx = ContextState::MappingKey;
297                }
298                Ok(Some(ParseEvent::Scalar(
299                    self.scalar_to_value(&value, style),
300                )))
301            }
302
303            // Scalar in sequence -> emit Scalar
304            (OwnedEvent::Scalar { value, style, .. }, Some(ContextState::SequenceValue)) => {
305                let value = value.clone();
306                let style = *style;
307                self.next_raw();
308                Ok(Some(ParseEvent::Scalar(
309                    self.scalar_to_value(&value, style),
310                )))
311            }
312
313            // Scalar at root level (no context) -> emit Scalar
314            (OwnedEvent::Scalar { value, style, .. }, None) => {
315                let value = value.clone();
316                let style = *style;
317                self.next_raw();
318                Ok(Some(ParseEvent::Scalar(
319                    self.scalar_to_value(&value, style),
320                )))
321            }
322
323            // Alias - not fully supported yet
324            (OwnedEvent::Alias(_), _) => {
325                let span = Span::from_saphyr_span(&raw_event.span);
326                Err(YamlError::new(
327                    YamlErrorKind::Unsupported("YAML aliases are not yet supported".into()),
328                    span,
329                )
330                .with_source(self.input))
331            }
332
333            // Unexpected combinations
334            _ => {
335                let span = Span::from_saphyr_span(&raw_event.span);
336                Err(YamlError::new(
337                    YamlErrorKind::UnexpectedEvent {
338                        got: format!("{:?}", raw_event.event),
339                        expected: "valid YAML structure",
340                    },
341                    span,
342                )
343                .with_source(self.input))
344            }
345        }
346    }
347
348    /// Skip the current value (for unknown fields).
349    fn skip_current_value(&mut self) -> Result<(), YamlError> {
350        if self.pos >= self.events.len() {
351            return Ok(());
352        }
353
354        let raw_event = self.events[self.pos].clone();
355
356        match &raw_event.event {
357            OwnedEvent::Scalar { .. } => {
358                self.next_raw();
359                // Update context if in mapping value position
360                if let Some(ctx) = self.stack.last_mut()
361                    && *ctx == ContextState::MappingValue
362                {
363                    *ctx = ContextState::MappingKey;
364                }
365                Ok(())
366            }
367            OwnedEvent::MappingStart { .. } => {
368                self.next_raw();
369                let mut depth = 1;
370                while depth > 0 {
371                    let Some(event) = self.next_raw() else {
372                        return Err(YamlError::without_span(YamlErrorKind::UnexpectedEof {
373                            expected: "mapping end",
374                        })
375                        .with_source(self.input));
376                    };
377                    match &event.event {
378                        OwnedEvent::MappingStart { .. } => depth += 1,
379                        OwnedEvent::MappingEnd => depth -= 1,
380                        OwnedEvent::SequenceStart { .. } => depth += 1,
381                        OwnedEvent::SequenceEnd => depth -= 1,
382                        _ => {}
383                    }
384                }
385                // Update context if in mapping value position
386                if let Some(ctx) = self.stack.last_mut()
387                    && *ctx == ContextState::MappingValue
388                {
389                    *ctx = ContextState::MappingKey;
390                }
391                Ok(())
392            }
393            OwnedEvent::SequenceStart { .. } => {
394                self.next_raw();
395                let mut depth = 1;
396                while depth > 0 {
397                    let Some(event) = self.next_raw() else {
398                        return Err(YamlError::without_span(YamlErrorKind::UnexpectedEof {
399                            expected: "sequence end",
400                        })
401                        .with_source(self.input));
402                    };
403                    match &event.event {
404                        OwnedEvent::MappingStart { .. } => depth += 1,
405                        OwnedEvent::MappingEnd => depth -= 1,
406                        OwnedEvent::SequenceStart { .. } => depth += 1,
407                        OwnedEvent::SequenceEnd => depth -= 1,
408                        _ => {}
409                    }
410                }
411                // Update context if in mapping value position
412                if let Some(ctx) = self.stack.last_mut()
413                    && *ctx == ContextState::MappingValue
414                {
415                    *ctx = ContextState::MappingKey;
416                }
417                Ok(())
418            }
419            _ => {
420                self.next_raw();
421                Ok(())
422            }
423        }
424    }
425
426    /// Build probe evidence by scanning ahead without consuming.
427    fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, YamlError> {
428        let mut evidence = Vec::new();
429        let mut pos = self.pos;
430
431        // Skip to MappingStart if we have one peeked
432        if pos < self.events.len()
433            && let OwnedEvent::MappingStart { .. } = &self.events[pos].event
434        {
435            pos += 1;
436        }
437
438        // Scan the mapping for keys
439        let mut depth = 1;
440        while pos < self.events.len() && depth > 0 {
441            let event = &self.events[pos];
442            match &event.event {
443                OwnedEvent::MappingStart { .. } => {
444                    depth += 1;
445                    pos += 1;
446                }
447                OwnedEvent::MappingEnd => {
448                    depth -= 1;
449                    pos += 1;
450                }
451                OwnedEvent::SequenceStart { .. } => {
452                    depth += 1;
453                    pos += 1;
454                }
455                OwnedEvent::SequenceEnd => {
456                    depth -= 1;
457                    pos += 1;
458                }
459                OwnedEvent::Scalar { value, .. } if depth == 1 => {
460                    // This is a key at the top level of the mapping
461                    let key = Cow::Owned(value.clone());
462                    pos += 1;
463
464                    // Look at the value
465                    if pos < self.events.len() {
466                        let value_event = &self.events[pos];
467                        let scalar_value = if let OwnedEvent::Scalar {
468                            value: v, style: s, ..
469                        } = &value_event.event
470                        {
471                            Some(self.scalar_to_value(v, *s))
472                        } else {
473                            None
474                        };
475
476                        if let Some(sv) = scalar_value {
477                            evidence.push(FieldEvidence::with_scalar_value(
478                                key,
479                                FieldLocationHint::KeyValue,
480                                None,
481                                sv,
482                                None,
483                            ));
484                        } else {
485                            evidence.push(FieldEvidence::new(
486                                key,
487                                FieldLocationHint::KeyValue,
488                                None,
489                                None,
490                            ));
491                        }
492
493                        // Skip the value
494                        pos = self.skip_value_from(pos);
495                    }
496                }
497                _ => {
498                    pos += 1;
499                }
500            }
501        }
502
503        Ok(evidence)
504    }
505
506    /// Skip a value starting from `pos`, returning the position after the value.
507    fn skip_value_from(&self, start: usize) -> usize {
508        let mut pos = start;
509        if pos >= self.events.len() {
510            return pos;
511        }
512
513        match &self.events[pos].event {
514            OwnedEvent::Scalar { .. } => pos + 1,
515            OwnedEvent::MappingStart { .. } | OwnedEvent::SequenceStart { .. } => {
516                let mut depth = 1;
517                pos += 1;
518                while pos < self.events.len() && depth > 0 {
519                    match &self.events[pos].event {
520                        OwnedEvent::MappingStart { .. } | OwnedEvent::SequenceStart { .. } => {
521                            depth += 1;
522                        }
523                        OwnedEvent::MappingEnd | OwnedEvent::SequenceEnd => {
524                            depth -= 1;
525                        }
526                        _ => {}
527                    }
528                    pos += 1;
529                }
530                pos
531            }
532            _ => pos + 1,
533        }
534    }
535}
536
537impl<'de> FormatParser<'de> for YamlParser<'de> {
538    type Error = YamlError;
539    type Probe<'a>
540        = YamlProbe<'de>
541    where
542        Self: 'a;
543
544    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
545        if let Some(event) = self.event_peek.take() {
546            return Ok(Some(event));
547        }
548        self.produce_event()
549    }
550
551    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
552        if let Some(event) = self.event_peek.clone() {
553            return Ok(Some(event));
554        }
555        let event = self.produce_event()?;
556        if let Some(ref e) = event {
557            self.event_peek = Some(e.clone());
558        }
559        Ok(event)
560    }
561
562    fn skip_value(&mut self) -> Result<(), Self::Error> {
563        debug_assert!(
564            self.event_peek.is_none(),
565            "skip_value called while an event is buffered"
566        );
567        self.skip_current_value()
568    }
569
570    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
571        let evidence = self.build_probe()?;
572        Ok(YamlProbe { evidence, idx: 0 })
573    }
574
575    fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
576        // YAML doesn't support raw capture (unlike JSON with RawJson)
577        self.skip_value()?;
578        Ok(None)
579    }
580
581    fn current_span(&self) -> Option<Span> {
582        self.last_span
583    }
584}
585
586/// Probe stream for YAML.
587pub struct YamlProbe<'de> {
588    evidence: Vec<FieldEvidence<'de>>,
589    idx: usize,
590}
591
592impl<'de> ProbeStream<'de> for YamlProbe<'de> {
593    type Error = YamlError;
594
595    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
596        if self.idx >= self.evidence.len() {
597            Ok(None)
598        } else {
599            let ev = self.evidence[self.idx].clone();
600            self.idx += 1;
601            Ok(Some(ev))
602        }
603    }
604}
605
606// ============================================================================
607// YAML-specific helpers
608// ============================================================================
609
610/// Check if a YAML value represents null.
611fn is_yaml_null(value: &str) -> bool {
612    matches!(
613        value.to_lowercase().as_str(),
614        "null" | "~" | "" | "nil" | "none"
615    )
616}
617
618/// Parse a YAML boolean value.
619fn parse_yaml_bool(value: &str) -> Option<bool> {
620    match value.to_lowercase().as_str() {
621        "true" | "yes" | "on" | "y" => Some(true),
622        "false" | "no" | "off" | "n" => Some(false),
623        _ => None,
624    }
625}