1use std::borrow::Cow;
4
5use facet_core::Facet;
6use facet_format::{
7 ContainerKind, FieldEvidence, FieldKey, FieldLocationHint, FormatParser, ParseEvent,
8 ProbeStream, ScalarValue, ValueTypeHint,
9};
10use styx_parse::{Lexer, ScalarKind, Span, Token, TokenKind};
11
12use crate::error::{StyxError, StyxErrorKind};
13use crate::trace;
14
15#[derive(Clone)]
17pub struct StyxParser<'de> {
18 input: &'de str,
19 lexer: Lexer<'de>,
20 stack: Vec<ContextState>,
22 peeked_token: Option<Token<'de>>,
24 peeked_events: Vec<ParseEvent<'de>>,
26 root_started: bool,
28 complete: bool,
30 current_span: Option<Span>,
32 pending_key: Option<Cow<'de, str>>,
34 expecting_value: bool,
36 expr_mode: bool,
38 peek_start_offset: Option<usize>,
40 pending_doc: Vec<Cow<'de, str>>,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq)]
45enum ContextState {
46 Object { implicit: bool },
48 Sequence,
50}
51
52impl<'de> StyxParser<'de> {
53 pub fn new(source: &'de str) -> Self {
55 Self {
56 input: source,
57 lexer: Lexer::new(source),
58 stack: Vec::new(),
59 peeked_token: None,
60 peeked_events: Vec::new(),
61 root_started: false,
62 complete: false,
63 current_span: None,
64 pending_key: None,
65 expecting_value: false,
66 expr_mode: false,
67 peek_start_offset: None,
68 pending_doc: Vec::new(),
69 }
70 }
71
72 pub fn new_expr(source: &'de str) -> Self {
77 Self {
78 input: source,
79 lexer: Lexer::new(source),
80 stack: Vec::new(),
81 peeked_token: None,
82 peeked_events: Vec::new(),
83 root_started: false,
84 complete: false,
85 current_span: None,
86 pending_key: None,
87 expecting_value: true, expr_mode: true,
89 peek_start_offset: None,
90 pending_doc: Vec::new(),
91 }
92 }
93
94 fn peek_token(&mut self) -> Option<&Token<'de>> {
96 if self.peeked_token.is_none() {
97 loop {
98 let token = self.lexer.next_token();
99 match token.kind {
101 TokenKind::Whitespace | TokenKind::LineComment => continue,
102 TokenKind::Eof => {
103 self.peeked_token = Some(token);
104 break;
105 }
106 _ => {
107 self.peeked_token = Some(token);
108 break;
109 }
110 }
111 }
112 }
113 self.peeked_token.as_ref()
114 }
115
116 fn next_token(&mut self) -> Token<'de> {
118 if let Some(token) = self.peeked_token.take() {
119 self.current_span = Some(token.span);
120 return token;
121 }
122 loop {
123 let token = self.lexer.next_token();
124 match token.kind {
125 TokenKind::Whitespace | TokenKind::LineComment => continue,
126 _ => {
127 self.current_span = Some(token.span);
128 return token;
129 }
130 }
131 }
132 }
133
134 fn skip_newlines(&mut self) -> bool {
136 let mut found = false;
137 loop {
138 if let Some(token) = self.peek_token()
139 && token.kind == TokenKind::Newline
140 {
141 self.next_token();
142 found = true;
143 continue;
144 }
145 break;
146 }
147 found
148 }
149
150 fn parse_scalar(&self, text: &'de str, kind: ScalarKind) -> ScalarValue<'de> {
152 match kind {
153 ScalarKind::Bare => {
154 if text == "true" {
156 ScalarValue::Bool(true)
157 } else if text == "false" {
158 ScalarValue::Bool(false)
159 } else if text == "null" {
160 ScalarValue::Null
161 } else if let Ok(n) = text.parse::<i64>() {
162 ScalarValue::I64(n)
163 } else if let Ok(n) = text.parse::<u64>() {
164 ScalarValue::U64(n)
165 } else if let Ok(n) = text.parse::<f64>() {
166 ScalarValue::F64(n)
167 } else {
168 ScalarValue::Str(Cow::Borrowed(text))
170 }
171 }
172 ScalarKind::Quoted => {
173 let inner = self.unescape_quoted(text);
175 ScalarValue::Str(inner)
176 }
177 ScalarKind::Raw | ScalarKind::Heredoc => {
178 ScalarValue::Str(Cow::Borrowed(text))
180 }
181 }
182 }
183
184 fn unescape_quoted(&self, text: &'de str) -> Cow<'de, str> {
186 let inner = if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
188 &text[1..text.len() - 1]
189 } else {
190 text
191 };
192
193 if !inner.contains('\\') {
195 return Cow::Borrowed(inner);
196 }
197
198 let mut result = String::with_capacity(inner.len());
200 let mut chars = inner.chars().peekable();
201
202 while let Some(c) = chars.next() {
203 if c == '\\' {
204 match chars.next() {
205 Some('n') => result.push('\n'),
206 Some('r') => result.push('\r'),
207 Some('t') => result.push('\t'),
208 Some('\\') => result.push('\\'),
209 Some('"') => result.push('"'),
210 Some('u') => {
211 if chars.next() == Some('{') {
212 let mut hex = String::new();
213 while let Some(&c) = chars.peek() {
214 if c == '}' {
215 chars.next();
216 break;
217 }
218 hex.push(chars.next().unwrap());
219 }
220 if let Ok(code) = u32::from_str_radix(&hex, 16)
221 && let Some(ch) = char::from_u32(code)
222 {
223 result.push(ch);
224 }
225 }
226 }
227 Some(c) => {
228 result.push('\\');
229 result.push(c);
230 }
231 None => {
232 result.push('\\');
233 }
234 }
235 } else {
236 result.push(c);
237 }
238 }
239
240 Cow::Owned(result)
241 }
242
243 fn token_to_scalar_kind(&self, kind: TokenKind) -> ScalarKind {
245 match kind {
246 TokenKind::BareScalar => ScalarKind::Bare,
247 TokenKind::QuotedScalar => ScalarKind::Quoted,
248 TokenKind::RawScalar => ScalarKind::Raw,
249 TokenKind::HeredocStart | TokenKind::HeredocContent | TokenKind::HeredocEnd => {
250 ScalarKind::Heredoc
251 }
252 _ => ScalarKind::Bare,
253 }
254 }
255
256 fn error(&self, kind: StyxErrorKind) -> StyxError {
257 StyxError::new(kind, self.current_span)
258 }
259
260 fn build_probe(&self) -> Result<Vec<FieldEvidence<'de>>, StyxError> {
263 let mut probe_parser = self.clone();
265
266 let mut evidence = Vec::new();
267 let mut depth = 1usize;
268
269 loop {
270 let event = probe_parser.next_event()?;
271 match event {
272 Some(ParseEvent::FieldKey(key)) if depth == 1 => {
273 let name = key.name.unwrap_or(Cow::Borrowed(""));
275 evidence.push(FieldEvidence::new(
276 name,
277 FieldLocationHint::KeyValue,
278 Some(ValueTypeHint::Map),
279 ));
280 probe_parser.skip_value()?;
282 }
283 Some(ParseEvent::FieldKey(_)) => {
284 probe_parser.skip_value()?;
286 }
287 Some(ParseEvent::StructStart(_)) => {
288 depth += 1;
289 }
290 Some(ParseEvent::SequenceStart(_)) => {
291 depth += 1;
292 }
293 Some(ParseEvent::StructEnd) => {
294 depth -= 1;
295 if depth == 0 {
296 break;
297 }
298 }
299 Some(ParseEvent::SequenceEnd) => {
300 depth -= 1;
301 if depth == 0 {
302 break;
303 }
304 }
305 Some(ParseEvent::Scalar(_)) | Some(ParseEvent::VariantTag(_)) => {
306 }
308 Some(ParseEvent::OrderedField) => {
309 }
311 None => break,
312 }
313 }
314
315 Ok(evidence)
316 }
317
318 fn parse_tag(&mut self, at_span_end: u32) -> ParseEvent<'de> {
322 if let Some(next) = self.peek_token()
324 && next.kind == TokenKind::BareScalar
325 && next.span.start == at_span_end
326 {
327 let name_token = self.next_token();
328 let tag_name = name_token.text;
329
330 if let Some(next) = self.peek_token() {
332 if next.kind == TokenKind::At && next.span.start == name_token.span.end {
333 self.next_token(); self.peeked_events
336 .push(ParseEvent::Scalar(ScalarValue::Unit));
337 return ParseEvent::VariantTag(Some(tag_name));
338 } else if next.kind == TokenKind::LBrace && next.span.start == name_token.span.end {
339 self.next_token(); self.stack.push(ContextState::Object { implicit: false });
342 self.peeked_events
343 .push(ParseEvent::StructStart(ContainerKind::Object));
344 return ParseEvent::VariantTag(Some(tag_name));
345 } else if next.kind == TokenKind::LParen && next.span.start == name_token.span.end {
346 self.next_token(); self.stack.push(ContextState::Sequence);
349 self.peeked_events
350 .push(ParseEvent::SequenceStart(ContainerKind::Array));
351 return ParseEvent::VariantTag(Some(tag_name));
352 }
353 }
354
355 self.peeked_events
357 .push(ParseEvent::Scalar(ScalarValue::Unit));
358 return ParseEvent::VariantTag(Some(tag_name));
359 }
360
361 self.peeked_events
363 .push(ParseEvent::Scalar(ScalarValue::Unit));
364 ParseEvent::VariantTag(None)
365 }
366}
367
368impl<'de> FormatParser<'de> for StyxParser<'de> {
369 type Error = StyxError;
370 type Probe<'a>
371 = StyxProbe<'de>
372 where
373 Self: 'a;
374
375 fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
376 if !self.peeked_events.is_empty() {
378 let event = self.peeked_events.remove(0);
379 if self.peeked_events.is_empty() {
381 self.peek_start_offset = None;
382 }
383 trace!(?event, "next_event: returning queued event");
384 return Ok(Some(event));
385 }
386
387 if self.complete {
388 trace!("next_event: parsing complete");
389 return Ok(None);
390 }
391
392 if !self.expecting_value {
395 self.skip_newlines();
396 }
397
398 if !self.root_started && !self.expr_mode {
400 self.root_started = true;
401 self.stack.push(ContextState::Object { implicit: true });
402 trace!("next_event: emitting root StructStart");
403 return Ok(Some(ParseEvent::StructStart(ContainerKind::Object)));
404 }
405 self.root_started = true;
406
407 if self.expecting_value {
409 self.expecting_value = false;
410 trace!("next_event: expecting value after key");
411
412 let token = self.peek_token().cloned();
413 if let Some(token) = token {
414 match token.kind {
415 TokenKind::Newline | TokenKind::Eof | TokenKind::RBrace | TokenKind::Comma => {
416 trace!("next_event: no value found, emitting Unit");
418 return Ok(Some(ParseEvent::Scalar(ScalarValue::Unit)));
419 }
420 TokenKind::LBrace => {
421 self.next_token();
423 self.stack.push(ContextState::Object { implicit: false });
424 trace!("next_event: nested object StructStart");
425 return Ok(Some(ParseEvent::StructStart(ContainerKind::Object)));
426 }
427 TokenKind::LParen => {
428 self.next_token();
430 self.stack.push(ContextState::Sequence);
431 trace!("next_event: SequenceStart");
432 return Ok(Some(ParseEvent::SequenceStart(ContainerKind::Array)));
433 }
434 TokenKind::At => {
435 self.next_token();
437 let event = self.parse_tag(token.span.end);
438 trace!(?event, "next_event: parsed tag");
439 return Ok(Some(event));
440 }
441 TokenKind::BareScalar
442 | TokenKind::QuotedScalar
443 | TokenKind::RawScalar
444 | TokenKind::HeredocStart => {
445 let token = self.next_token();
446 let kind = self.token_to_scalar_kind(token.kind);
447
448 let text = if token.kind == TokenKind::HeredocStart {
450 let mut content = String::new();
452 loop {
453 let next = self.next_token();
454 match next.kind {
455 TokenKind::HeredocContent => {
456 content.push_str(next.text);
457 }
458 TokenKind::HeredocEnd => break,
459 _ => break,
460 }
461 }
462 trace!(?content, "next_event: heredoc scalar");
463 return Ok(Some(ParseEvent::Scalar(ScalarValue::Str(Cow::Owned(
464 content,
465 )))));
466 } else {
467 token.text
468 };
469
470 let scalar = self.parse_scalar(text, kind);
471 trace!(?scalar, "next_event: scalar value");
472 return Ok(Some(ParseEvent::Scalar(scalar)));
473 }
474 _ => {}
475 }
476 }
477 }
478
479 let token = self.peek_token().cloned();
481 if let Some(token) = token {
482 match token.kind {
483 TokenKind::Eof => {
484 if let Some(ctx) = self.stack.pop() {
486 match ctx {
487 ContextState::Object { .. } => {
488 if self.stack.is_empty() {
489 self.complete = true;
490 }
491 trace!("next_event: EOF StructEnd");
492 return Ok(Some(ParseEvent::StructEnd));
493 }
494 ContextState::Sequence => {
495 trace!("next_event: EOF SequenceEnd");
496 return Ok(Some(ParseEvent::SequenceEnd));
497 }
498 }
499 }
500 self.complete = true;
502 return Ok(None);
503 }
504 TokenKind::RBrace => {
505 self.next_token();
506 match self.stack.pop() {
507 Some(ContextState::Object { implicit: false }) => {
508 trace!("next_event: RBrace StructEnd");
509 return Ok(Some(ParseEvent::StructEnd));
510 }
511 _ => {
512 return Err(self.error(StyxErrorKind::UnexpectedToken {
514 got: "}".to_string(),
515 expected: "key or value",
516 }));
517 }
518 }
519 }
520 TokenKind::RParen => {
521 self.next_token();
522 match self.stack.pop() {
523 Some(ContextState::Sequence) => {
524 trace!("next_event: RParen SequenceEnd");
525 return Ok(Some(ParseEvent::SequenceEnd));
526 }
527 _ => {
528 return Err(self.error(StyxErrorKind::UnexpectedToken {
529 got: ")".to_string(),
530 expected: "value",
531 }));
532 }
533 }
534 }
535 TokenKind::Comma => {
536 self.next_token();
538 self.skip_newlines();
539 return self.next_event();
540 }
541 TokenKind::Newline => {
542 self.next_token();
543 return self.next_event();
544 }
545 TokenKind::DocComment => {
546 let token = self.next_token();
548 let text = token.text.strip_prefix("///").unwrap_or(token.text);
550 let text = text.strip_prefix(' ').unwrap_or(text);
551 self.pending_doc.push(Cow::Borrowed(text));
552 return self.next_event();
553 }
554 _ => {}
555 }
556 }
557
558 if matches!(self.stack.last(), Some(ContextState::Object { .. })) {
560 let token = self.peek_token().cloned();
561 if let Some(token) = token {
562 match token.kind {
563 TokenKind::BareScalar | TokenKind::QuotedScalar => {
564 let key_token = self.next_token();
565 let key = if key_token.kind == TokenKind::QuotedScalar {
566 self.unescape_quoted(key_token.text)
567 } else {
568 Cow::Borrowed(key_token.text)
569 };
570
571 self.pending_key = Some(key.clone());
572 self.expecting_value = true;
573
574 let doc = std::mem::take(&mut self.pending_doc);
576
577 trace!(?key, ?doc, "next_event: FieldKey");
578 return Ok(Some(ParseEvent::FieldKey(FieldKey::with_doc(
579 key,
580 FieldLocationHint::KeyValue,
581 doc,
582 ))));
583 }
584 TokenKind::At => {
585 let at_token = self.next_token();
599
600 if let Some(next) = self.peek_token()
602 && next.kind == TokenKind::BareScalar
603 && next.span.start == at_token.span.end
604 {
605 let name_token = self.next_token();
606 let tag_name = name_token.text.to_string();
607 let name_end = name_token.span.end;
608
609 let after_info = self.peek_token().map(|t| (t.span.start, t.kind));
611 if let Some((after_start, after_kind)) = after_info
612 && after_start == name_end
613 {
614 match after_kind {
615 TokenKind::LBrace | TokenKind::LParen | TokenKind::At => {
616 return Err(self.error(StyxErrorKind::UnexpectedToken {
619 expected: "simple key",
620 got: format!(
621 "complex tagged value @{}{} cannot be used as object key",
622 tag_name,
623 match after_kind {
624 TokenKind::LBrace => "{...}",
625 TokenKind::LParen => "(...)",
626 TokenKind::At => "@",
627 _ => "",
628 }
629 ),
630 }));
631 }
632 _ => {}
633 }
634 }
635
636 let tag_name_str = name_token.text;
638 self.pending_key = Some(Cow::Owned(format!("@{}", tag_name_str)));
640 self.expecting_value = true;
641 let doc = std::mem::take(&mut self.pending_doc);
642 trace!(tag = tag_name_str, ?doc, "next_event: FieldKey (tagged)");
643 return Ok(Some(ParseEvent::FieldKey(FieldKey::tagged_with_doc(
644 tag_name_str,
645 FieldLocationHint::KeyValue,
646 doc,
647 ))));
648 }
649
650 self.pending_key = Some(Cow::Borrowed("@"));
652 self.expecting_value = true;
653 let doc = std::mem::take(&mut self.pending_doc);
654 trace!(?doc, "next_event: FieldKey (unit)");
655 return Ok(Some(ParseEvent::FieldKey(FieldKey::unit_with_doc(
656 FieldLocationHint::KeyValue,
657 doc,
658 ))));
659 }
660 _ => {}
661 }
662 }
663 }
664
665 if matches!(self.stack.last(), Some(ContextState::Sequence)) {
667 let token = self.peek_token().cloned();
668 if let Some(token) = token {
669 match token.kind {
670 TokenKind::BareScalar
671 | TokenKind::QuotedScalar
672 | TokenKind::RawScalar
673 | TokenKind::HeredocStart => {
674 let token = self.next_token();
675 let kind = self.token_to_scalar_kind(token.kind);
676 let scalar = self.parse_scalar(token.text, kind);
677 return Ok(Some(ParseEvent::Scalar(scalar)));
678 }
679 TokenKind::LBrace => {
680 self.next_token();
681 self.stack.push(ContextState::Object { implicit: false });
682 return Ok(Some(ParseEvent::StructStart(ContainerKind::Object)));
683 }
684 TokenKind::LParen => {
685 self.next_token();
686 self.stack.push(ContextState::Sequence);
687 return Ok(Some(ParseEvent::SequenceStart(ContainerKind::Array)));
688 }
689 TokenKind::At => {
690 self.next_token();
692 let event = self.parse_tag(token.span.end);
693 return Ok(Some(event));
694 }
695 _ => {}
696 }
697 }
698 }
699
700 Ok(None)
701 }
702
703 fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, Self::Error> {
704 if self.peeked_events.is_empty() {
705 self.peek_start_offset = Some(self.lexer.position() as usize);
707 if let Some(event) = self.next_event()? {
708 self.peeked_events.insert(0, event);
710 }
711 }
712 Ok(self.peeked_events.first().cloned())
713 }
714
715 fn skip_value(&mut self) -> Result<(), Self::Error> {
716 let mut depth = 0i32;
718 loop {
719 let event = self.next_event()?;
720 trace!(?event, depth, "skip_value");
721 match event {
722 Some(ParseEvent::StructStart(_)) | Some(ParseEvent::SequenceStart(_)) => {
723 depth += 1;
724 }
725 Some(ParseEvent::StructEnd) | Some(ParseEvent::SequenceEnd) => {
726 if depth == 0 {
727 break;
729 }
730 depth -= 1;
731 if depth == 0 {
732 break;
734 }
735 }
736 Some(ParseEvent::Scalar(_)) => {
737 if depth == 0 {
738 break;
739 }
740 }
741 Some(ParseEvent::VariantTag(_)) => {
742 }
744 Some(ParseEvent::FieldKey(_)) | Some(ParseEvent::OrderedField) => {
745 }
747 None => break,
748 }
749 }
750 Ok(())
751 }
752
753 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error> {
754 let evidence = self.build_probe()?;
755 Ok(StyxProbe { evidence, idx: 0 })
756 }
757
758 fn current_span(&self) -> Option<facet_reflect::Span> {
759 self.current_span.map(|s| facet_reflect::Span {
760 offset: s.start as usize,
761 len: (s.end - s.start) as usize,
762 })
763 }
764
765 fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
766 Some(crate::RawStyx::SHAPE)
767 }
768
769 fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
770 let start_offset = self
772 .peek_start_offset
773 .take()
774 .unwrap_or_else(|| self.lexer.position() as usize);
775
776 self.skip_value()?;
778
779 let end_offset = self.lexer.position() as usize;
780
781 let raw_str = &self.input[start_offset..end_offset];
783
784 let raw_str = raw_str.trim();
786
787 Ok(Some(raw_str))
788 }
789}
790
791pub struct StyxProbe<'de> {
795 evidence: Vec<FieldEvidence<'de>>,
797 idx: usize,
799}
800
801impl<'de> ProbeStream<'de> for StyxProbe<'de> {
802 type Error = StyxError;
803
804 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error> {
805 if self.idx >= self.evidence.len() {
806 Ok(None)
807 } else {
808 let ev = self.evidence[self.idx].clone();
809 self.idx += 1;
810 Ok(Some(ev))
811 }
812 }
813}