facet_yaml/
parser.rs

1//! Streaming YAML parser implementing the FormatParser trait.
2//!
3//! This parser uses saphyr-parser's event-based API and translates YAML events
4//! into the `ParseEvent` format expected by `facet-format`'s deserializer.
5
6extern crate alloc;
7
8use alloc::{
9    borrow::Cow,
10    format,
11    string::{String, ToString},
12    vec::Vec,
13};
14
15use facet_format::{
16    ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
17    ProbeStream, ScalarValue,
18};
19use saphyr_parser::{Event, Parser, ScalarStyle, Span as SaphyrSpan, SpannedEventReceiver};
20
21use crate::error::{SpanExt, YamlError, YamlErrorKind};
22use facet_reflect::Span;
23
24// ============================================================================
25// Event wrapper with owned strings
26// ============================================================================
27
28/// A YAML event with owned string data and span information.
29/// We convert from saphyr's borrowed events to owned so we can store them.
30#[derive(Debug, Clone)]
31#[allow(dead_code)] // Some variants/fields reserved for future anchor/alias support
32enum OwnedEvent {
33    StreamStart,
34    StreamEnd,
35    DocumentStart,
36    DocumentEnd,
37    Alias(usize),
38    Scalar {
39        value: String,
40        style: ScalarStyle,
41        anchor: usize,
42    },
43    SequenceStart {
44        anchor: usize,
45    },
46    SequenceEnd,
47    MappingStart {
48        anchor: usize,
49    },
50    MappingEnd,
51}
52
53#[derive(Debug, Clone)]
54struct SpannedEvent {
55    event: OwnedEvent,
56    span: SaphyrSpan,
57}
58
59// ============================================================================
60// Event Collector
61// ============================================================================
62
63/// Collects all events from the parser upfront.
64/// This is necessary because saphyr-parser doesn't support seeking/rewinding,
65/// but we need to replay events for flatten deserialization.
66struct EventCollector {
67    events: Vec<SpannedEvent>,
68}
69
70impl EventCollector {
71    fn new() -> Self {
72        Self { events: Vec::new() }
73    }
74}
75
76impl SpannedEventReceiver<'_> for EventCollector {
77    fn on_event(&mut self, event: Event<'_>, span: SaphyrSpan) {
78        let owned = match event {
79            Event::StreamStart => OwnedEvent::StreamStart,
80            Event::StreamEnd => OwnedEvent::StreamEnd,
81            Event::DocumentStart(_) => OwnedEvent::DocumentStart,
82            Event::DocumentEnd => OwnedEvent::DocumentEnd,
83            Event::Alias(id) => OwnedEvent::Alias(id),
84            Event::Scalar(value, style, anchor, _tag) => OwnedEvent::Scalar {
85                value: value.into_owned(),
86                style,
87                anchor,
88            },
89            Event::SequenceStart(anchor, _tag) => OwnedEvent::SequenceStart { anchor },
90            Event::SequenceEnd => OwnedEvent::SequenceEnd,
91            Event::MappingStart(anchor, _tag) => OwnedEvent::MappingStart { anchor },
92            Event::MappingEnd => OwnedEvent::MappingEnd,
93            Event::Nothing => return, // Skip internal events
94        };
95        log::trace!("YAML event: {owned:?}");
96        self.events.push(SpannedEvent { event: owned, span });
97    }
98}
99
100// ============================================================================
101// Parser State
102// ============================================================================
103
104/// Context for tracking where we are in the YAML structure.
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106enum ContextState {
107    /// Inside a mapping, expecting a key or end
108    MappingKey,
109    /// Inside a mapping, expecting a value
110    MappingValue,
111    /// Inside a sequence, expecting a value or end
112    SequenceValue,
113}
114
115// ============================================================================
116// YAML Parser
117// ============================================================================
118
119/// Streaming YAML parser backed by `saphyr-parser`.
120///
121/// This parser translates YAML's event stream into the `ParseEvent` format
122/// expected by `facet-format`'s deserializer.
123pub struct YamlParser<'de> {
124    /// Original input string.
125    input: &'de str,
126    /// Pre-parsed events from saphyr-parser.
127    events: Vec<SpannedEvent>,
128    /// Current position in the event stream.
129    pos: usize,
130    /// Stack tracking nested containers.
131    stack: Vec<ContextState>,
132    /// Cached event for peek_event().
133    event_peek: Option<ParseEvent<'de>>,
134    /// Whether we've consumed the stream/document start events.
135    started: bool,
136    /// The span of the most recently consumed event (for error reporting).
137    last_span: Option<Span>,
138}
139
140impl<'de> YamlParser<'de> {
141    /// Create a new YAML parser from a string slice.
142    pub fn new(input: &'de str) -> Result<Self, YamlError> {
143        let mut collector = EventCollector::new();
144        Parser::new_from_str(input)
145            .load(&mut collector, true)
146            .map_err(|e| {
147                YamlError::without_span(YamlErrorKind::Parse(format!("{e}"))).with_source(input)
148            })?;
149
150        Ok(Self {
151            input,
152            events: collector.events,
153            pos: 0,
154            stack: Vec::new(),
155            event_peek: None,
156            started: false,
157            last_span: None,
158        })
159    }
160
161    /// Get the original input string.
162    pub fn input(&self) -> &'de str {
163        self.input
164    }
165
166    /// Consume and return the current event.
167    fn next_raw(&mut self) -> Option<SpannedEvent> {
168        if self.pos < self.events.len() {
169            let event = self.events[self.pos].clone();
170            self.last_span = Some(Span::from_saphyr_span(&event.span));
171            self.pos += 1;
172            Some(event)
173        } else {
174            None
175        }
176    }
177
178    /// Skip stream/document start events.
179    fn skip_preamble(&mut self) {
180        while self.pos < self.events.len() {
181            match &self.events[self.pos].event {
182                OwnedEvent::StreamStart | OwnedEvent::DocumentStart => {
183                    self.pos += 1;
184                }
185                _ => break,
186            }
187        }
188        self.started = true;
189    }
190
191    /// Convert a YAML scalar to a ScalarValue.
192    fn scalar_to_value(&self, value: &str, style: ScalarStyle) -> ScalarValue<'de> {
193        // If quoted, always treat as string
194        if matches!(style, ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted) {
195            return ScalarValue::Str(Cow::Owned(value.to_string()));
196        }
197
198        // Check for null
199        if is_yaml_null(value) {
200            return ScalarValue::Null;
201        }
202
203        // Check for boolean
204        if let Some(b) = parse_yaml_bool(value) {
205            return ScalarValue::Bool(b);
206        }
207
208        // Check for integer
209        if let Ok(n) = value.parse::<i64>() {
210            return ScalarValue::I64(n);
211        }
212
213        // Check for float
214        if let Ok(f) = value.parse::<f64>() {
215            return ScalarValue::F64(f);
216        }
217
218        // Default to string
219        ScalarValue::Str(Cow::Owned(value.to_string()))
220    }
221
222    /// Produce the next parse event.
223    fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, YamlError> {
224        // Skip preamble if we haven't started
225        if !self.started {
226            self.skip_preamble();
227        }
228
229        // Check current context to know what to expect
230        let context = self.stack.last().copied();
231
232        if self.pos >= self.events.len() {
233            // EOF - we're done
234            return Ok(None);
235        }
236
237        // Clone the event to avoid borrow issues
238        let raw_event = self.events[self.pos].clone();
239
240        match (&raw_event.event, context) {
241            // Stream/Document end - skip and continue
242            (OwnedEvent::StreamEnd, _) | (OwnedEvent::DocumentEnd, _) => {
243                self.next_raw();
244                self.produce_event()
245            }
246
247            // Mapping start
248            (OwnedEvent::MappingStart { .. }, _) => {
249                self.next_raw();
250                // If we're in MappingValue context, this nested struct satisfies the value,
251                // so transition parent back to MappingKey before pushing new context
252                if let Some(ctx) = self.stack.last_mut()
253                    && *ctx == ContextState::MappingValue
254                {
255                    *ctx = ContextState::MappingKey;
256                }
257                self.stack.push(ContextState::MappingKey);
258                Ok(Some(ParseEvent::StructStart(ContainerKind::Object)))
259            }
260
261            // Mapping end
262            (OwnedEvent::MappingEnd, _) => {
263                self.next_raw();
264                self.stack.pop();
265                Ok(Some(ParseEvent::StructEnd))
266            }
267
268            // Sequence start
269            (OwnedEvent::SequenceStart { .. }, _) => {
270                self.next_raw();
271                // If we're in MappingValue context, this sequence satisfies the value,
272                // so transition parent back to MappingKey before pushing new context
273                if let Some(ctx) = self.stack.last_mut()
274                    && *ctx == ContextState::MappingValue
275                {
276                    *ctx = ContextState::MappingKey;
277                }
278                self.stack.push(ContextState::SequenceValue);
279                Ok(Some(ParseEvent::SequenceStart(ContainerKind::Array)))
280            }
281
282            // Sequence end
283            (OwnedEvent::SequenceEnd, _) => {
284                self.next_raw();
285                self.stack.pop();
286                Ok(Some(ParseEvent::SequenceEnd))
287            }
288
289            // Scalar in mapping key position -> emit FieldKey
290            (OwnedEvent::Scalar { value, .. }, Some(ContextState::MappingKey)) => {
291                let key = value.clone();
292                self.next_raw();
293                // Transition to expecting value
294                if let Some(ctx) = self.stack.last_mut() {
295                    *ctx = ContextState::MappingValue;
296                }
297                Ok(Some(ParseEvent::FieldKey(FieldKey::new(
298                    Cow::Owned(key),
299                    FieldLocationHint::KeyValue,
300                ))))
301            }
302
303            // Scalar in mapping value position -> emit Scalar and transition back to key
304            (OwnedEvent::Scalar { value, style, .. }, Some(ContextState::MappingValue)) => {
305                let value = value.clone();
306                let style = *style;
307                self.next_raw();
308                // Transition back to expecting key
309                if let Some(ctx) = self.stack.last_mut() {
310                    *ctx = ContextState::MappingKey;
311                }
312                Ok(Some(ParseEvent::Scalar(
313                    self.scalar_to_value(&value, style),
314                )))
315            }
316
317            // Scalar in sequence -> emit Scalar
318            (OwnedEvent::Scalar { value, style, .. }, Some(ContextState::SequenceValue)) => {
319                let value = value.clone();
320                let style = *style;
321                self.next_raw();
322                Ok(Some(ParseEvent::Scalar(
323                    self.scalar_to_value(&value, style),
324                )))
325            }
326
327            // Scalar at root level (no context) -> emit Scalar
328            (OwnedEvent::Scalar { value, style, .. }, None) => {
329                let value = value.clone();
330                let style = *style;
331                self.next_raw();
332                Ok(Some(ParseEvent::Scalar(
333                    self.scalar_to_value(&value, style),
334                )))
335            }
336
337            // Alias - not fully supported yet
338            (OwnedEvent::Alias(_), _) => {
339                let span = Span::from_saphyr_span(&raw_event.span);
340                Err(YamlError::new(
341                    YamlErrorKind::Unsupported("YAML aliases are not yet supported".into()),
342                    span,
343                )
344                .with_source(self.input))
345            }
346
347            // Unexpected combinations
348            _ => {
349                let span = Span::from_saphyr_span(&raw_event.span);
350                Err(YamlError::new(
351                    YamlErrorKind::UnexpectedEvent {
352                        got: format!("{:?}", raw_event.event),
353                        expected: "valid YAML structure",
354                    },
355                    span,
356                )
357                .with_source(self.input))
358            }
359        }
360    }
361
362    /// Skip the current value (for unknown fields).
363    fn skip_current_value(&mut self) -> Result<(), YamlError> {
364        if self.pos >= self.events.len() {
365            return Ok(());
366        }
367
368        let raw_event = self.events[self.pos].clone();
369
370        match &raw_event.event {
371            OwnedEvent::Scalar { .. } => {
372                self.next_raw();
373                // Update context if in mapping value position
374                if let Some(ctx) = self.stack.last_mut()
375                    && *ctx == ContextState::MappingValue
376                {
377                    *ctx = ContextState::MappingKey;
378                }
379                Ok(())
380            }
381            OwnedEvent::MappingStart { .. } => {
382                self.next_raw();
383                let mut depth = 1;
384                while depth > 0 {
385                    let Some(event) = self.next_raw() else {
386                        return Err(YamlError::without_span(YamlErrorKind::UnexpectedEof {
387                            expected: "mapping end",
388                        })
389                        .with_source(self.input));
390                    };
391                    match &event.event {
392                        OwnedEvent::MappingStart { .. } => depth += 1,
393                        OwnedEvent::MappingEnd => depth -= 1,
394                        OwnedEvent::SequenceStart { .. } => depth += 1,
395                        OwnedEvent::SequenceEnd => depth -= 1,
396                        _ => {}
397                    }
398                }
399                // Update context if in mapping value position
400                if let Some(ctx) = self.stack.last_mut()
401                    && *ctx == ContextState::MappingValue
402                {
403                    *ctx = ContextState::MappingKey;
404                }
405                Ok(())
406            }
407            OwnedEvent::SequenceStart { .. } => {
408                self.next_raw();
409                let mut depth = 1;
410                while depth > 0 {
411                    let Some(event) = self.next_raw() else {
412                        return Err(YamlError::without_span(YamlErrorKind::UnexpectedEof {
413                            expected: "sequence end",
414                        })
415                        .with_source(self.input));
416                    };
417                    match &event.event {
418                        OwnedEvent::MappingStart { .. } => depth += 1,
419                        OwnedEvent::MappingEnd => depth -= 1,
420                        OwnedEvent::SequenceStart { .. } => depth += 1,
421                        OwnedEvent::SequenceEnd => depth -= 1,
422                        _ => {}
423                    }
424                }
425                // Update context if in mapping value position
426                if let Some(ctx) = self.stack.last_mut()
427                    && *ctx == ContextState::MappingValue
428                {
429                    *ctx = ContextState::MappingKey;
430                }
431                Ok(())
432            }
433            _ => {
434                self.next_raw();
435                Ok(())
436            }
437        }
438    }
439
440    /// Build probe evidence by scanning ahead without consuming.
441    fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, YamlError> {
442        let mut evidence = Vec::new();
443        let mut pos = self.pos;
444
445        // Skip preamble (StreamStart, DocumentStart) if we haven't started yet
446        while pos < self.events.len() {
447            match &self.events[pos].event {
448                OwnedEvent::StreamStart | OwnedEvent::DocumentStart => {
449                    pos += 1;
450                }
451                _ => break,
452            }
453        }
454
455        // Skip MappingStart if we have one
456        if pos < self.events.len()
457            && let OwnedEvent::MappingStart { .. } = &self.events[pos].event
458        {
459            pos += 1;
460        }
461
462        // Scan the mapping for keys
463        let mut depth = 1;
464        while pos < self.events.len() && depth > 0 {
465            let event = &self.events[pos];
466            match &event.event {
467                OwnedEvent::MappingStart { .. } => {
468                    depth += 1;
469                    pos += 1;
470                }
471                OwnedEvent::MappingEnd => {
472                    depth -= 1;
473                    pos += 1;
474                }
475                OwnedEvent::SequenceStart { .. } => {
476                    depth += 1;
477                    pos += 1;
478                }
479                OwnedEvent::SequenceEnd => {
480                    depth -= 1;
481                    pos += 1;
482                }
483                OwnedEvent::Scalar { value, .. } if depth == 1 => {
484                    // This is a key at the top level of the mapping
485                    let key = Cow::Owned(value.clone());
486                    pos += 1;
487
488                    // Look at the value
489                    if pos < self.events.len() {
490                        let value_event = &self.events[pos];
491                        let scalar_value = if let OwnedEvent::Scalar {
492                            value: v, style: s, ..
493                        } = &value_event.event
494                        {
495                            Some(self.scalar_to_value(v, *s))
496                        } else {
497                            None
498                        };
499
500                        if let Some(sv) = scalar_value {
501                            evidence.push(FieldEvidence::with_scalar_value(
502                                key,
503                                FieldLocationHint::KeyValue,
504                                None,
505                                sv,
506                                None,
507                            ));
508                        } else {
509                            evidence.push(FieldEvidence::new(
510                                key,
511                                FieldLocationHint::KeyValue,
512                                None,
513                                None,
514                            ));
515                        }
516
517                        // Skip the value
518                        pos = self.skip_value_from(pos);
519                    }
520                }
521                _ => {
522                    pos += 1;
523                }
524            }
525        }
526
527        Ok(evidence)
528    }
529
530    /// Skip a value starting from `pos`, returning the position after the value.
531    fn skip_value_from(&self, start: usize) -> usize {
532        let mut pos = start;
533        if pos >= self.events.len() {
534            return pos;
535        }
536
537        match &self.events[pos].event {
538            OwnedEvent::Scalar { .. } => pos + 1,
539            OwnedEvent::MappingStart { .. } | OwnedEvent::SequenceStart { .. } => {
540                let mut depth = 1;
541                pos += 1;
542                while pos < self.events.len() && depth > 0 {
543                    match &self.events[pos].event {
544                        OwnedEvent::MappingStart { .. } | OwnedEvent::SequenceStart { .. } => {
545                            depth += 1;
546                        }
547                        OwnedEvent::MappingEnd | OwnedEvent::SequenceEnd => {
548                            depth -= 1;
549                        }
550                        _ => {}
551                    }
552                    pos += 1;
553                }
554                pos
555            }
556            _ => pos + 1,
557        }
558    }
559}
560
561impl<'de> FormatParser<'de> for YamlParser<'de> {
562    type Error = YamlError;
563    type Probe<'a>
564        = YamlProbe<'de>
565    where
566        Self: 'a;
567
568    fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
569        if let Some(event) = self.event_peek.take() {
570            return Ok(Some(event));
571        }
572        self.produce_event()
573    }
574
575    fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
576        if let Some(event) = self.event_peek.clone() {
577            return Ok(Some(event));
578        }
579        let event = self.produce_event()?;
580        if let Some(ref e) = event {
581            self.event_peek = Some(e.clone());
582        }
583        Ok(event)
584    }
585
586    fn skip_value(&mut self) -> Result<(), Self::Error> {
587        debug_assert!(
588            self.event_peek.is_none(),
589            "skip_value called while an event is buffered"
590        );
591        self.skip_current_value()
592    }
593
594    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
595        let evidence = self.build_probe()?;
596        Ok(YamlProbe { evidence, idx: 0 })
597    }
598
599    fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
600        // YAML doesn't support raw capture (unlike JSON with RawJson)
601        self.skip_value()?;
602        Ok(None)
603    }
604
605    fn current_span(&self) -> Option<Span> {
606        self.last_span
607    }
608}
609
610/// Probe stream for YAML.
611pub struct YamlProbe<'de> {
612    evidence: Vec<FieldEvidence<'de>>,
613    idx: usize,
614}
615
616impl<'de> ProbeStream<'de> for YamlProbe<'de> {
617    type Error = YamlError;
618
619    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
620        if self.idx >= self.evidence.len() {
621            Ok(None)
622        } else {
623            let ev = self.evidence[self.idx].clone();
624            self.idx += 1;
625            Ok(Some(ev))
626        }
627    }
628}
629
630// ============================================================================
631// YAML-specific helpers
632// ============================================================================
633
634/// Check if a YAML value represents null.
635fn is_yaml_null(value: &str) -> bool {
636    matches!(
637        value.to_lowercase().as_str(),
638        "null" | "~" | "" | "nil" | "none"
639    )
640}
641
642/// Parse a YAML boolean value.
643fn parse_yaml_bool(value: &str) -> Option<bool> {
644    match value.to_lowercase().as_str() {
645        "true" | "yes" | "on" | "y" => Some(true),
646        "false" | "no" | "off" | "n" => Some(false),
647        _ => None,
648    }
649}