Skip to main content

styx_parse/
lexer.rs

1//! Lexer for Styx - produces lexemes from tokens.
2//!
3//! The Lexer sits between the Tokenizer and Parser:
4//! - Tokenizer → Token (raw: At, BareScalar, LBrace, etc.)
5//! - Lexer → Lexeme (atoms: Scalar, Tag, Unit, structural markers)
6//! - Parser → Events (structure: entries, objects, sequences)
7
8use std::borrow::Cow;
9
10use styx_tokenizer::{Span, Token, TokenKind, Tokenizer};
11
12use crate::events::ScalarKind;
13
14/// A lexeme produced by the Lexer from raw tokens.
15#[derive(Debug, Clone, PartialEq)]
16pub enum Lexeme<'src> {
17    /// A scalar value (bare, quoted, raw, or heredoc)
18    Scalar {
19        span: Span,
20        value: Cow<'src, str>,
21        kind: ScalarKind,
22    },
23
24    /// Unit value: standalone `@`
25    Unit { span: Span },
26
27    /// A tag: `@name`
28    /// The payload (if any) comes as the next lexeme
29    Tag {
30        span: Span,
31        name: &'src str,
32        /// True if an immediate payload follows (no whitespace): `@tag{}`, `@tag()`, `@tag"x"`, `@tag@`
33        has_payload: bool,
34    },
35
36    /// Start of object `{`
37    ObjectStart { span: Span },
38
39    /// End of object `}`
40    ObjectEnd { span: Span },
41
42    /// Start of sequence `(`
43    SeqStart { span: Span },
44
45    /// End of sequence `)`
46    SeqEnd { span: Span },
47
48    /// An attribute key `key>` - value follows as next lexeme(s)
49    AttrKey {
50        /// Span of the full `key>` including the `>`
51        span: Span,
52        /// Span of just the key (excluding `>`)
53        key_span: Span,
54        /// The key text
55        key: &'src str,
56    },
57
58    /// Comma separator
59    Comma { span: Span },
60
61    /// Newline (significant for separator detection)
62    Newline { span: Span },
63
64    /// Line comment `// ...`
65    Comment { span: Span, text: &'src str },
66
67    /// Doc comment `/// ...`
68    DocComment { span: Span, text: &'src str },
69
70    /// End of input
71    Eof,
72
73    /// Tokenizer error
74    Error { span: Span, message: &'static str },
75}
76
77impl Lexeme<'_> {
78    /// Get the span of this lexeme.
79    /// Returns a zero-length span at position 0 for Eof.
80    pub fn span(&self) -> Span {
81        match self {
82            Lexeme::Scalar { span, .. }
83            | Lexeme::Unit { span }
84            | Lexeme::Tag { span, .. }
85            | Lexeme::ObjectStart { span }
86            | Lexeme::ObjectEnd { span }
87            | Lexeme::SeqStart { span }
88            | Lexeme::SeqEnd { span }
89            | Lexeme::AttrKey { span, .. }
90            | Lexeme::Comma { span }
91            | Lexeme::Newline { span }
92            | Lexeme::Comment { span, .. }
93            | Lexeme::DocComment { span, .. }
94            | Lexeme::Error { span, .. } => *span,
95            Lexeme::Eof => Span::new(0, 0),
96        }
97    }
98}
99
100/// Lexer that produces lexemes from tokens.
101#[derive(Clone)]
102pub struct Lexer<'src> {
103    tokenizer: Tokenizer<'src>,
104    /// Peeked token (if any)
105    peeked: Option<Token<'src>>,
106}
107
108impl<'src> Lexer<'src> {
109    /// Create a new lexer for the given source.
110    pub fn new(source: &'src str) -> Self {
111        Self {
112            tokenizer: Tokenizer::new(source),
113            peeked: None,
114        }
115    }
116
117    /// Peek at the next token without consuming it.
118    fn peek_token(&mut self) -> &Token<'src> {
119        if self.peeked.is_none() {
120            self.peeked = Some(self.tokenizer.next_token());
121        }
122        self.peeked.as_ref().unwrap()
123    }
124
125    /// Consume and return the next token.
126    fn next_token(&mut self) -> Token<'src> {
127        self.peeked
128            .take()
129            .unwrap_or_else(|| self.tokenizer.next_token())
130    }
131
132    /// Get the next lexeme.
133    pub fn next_lexeme(&mut self) -> Lexeme<'src> {
134        // Skip whitespace (but not newlines - those are significant)
135        loop {
136            let tok = self.peek_token();
137            if tok.kind == TokenKind::Whitespace {
138                self.next_token();
139            } else {
140                break;
141            }
142        }
143
144        let tok = self.next_token();
145
146        match tok.kind {
147            TokenKind::Eof => Lexeme::Eof,
148
149            TokenKind::LBrace => Lexeme::ObjectStart { span: tok.span },
150            TokenKind::RBrace => Lexeme::ObjectEnd { span: tok.span },
151            TokenKind::LParen => Lexeme::SeqStart { span: tok.span },
152            TokenKind::RParen => Lexeme::SeqEnd { span: tok.span },
153            TokenKind::Comma => Lexeme::Comma { span: tok.span },
154            TokenKind::Gt => {
155                // Standalone `>` (with whitespace before it) - not valid in Styx
156                // Attribute syntax requires no space: `key>value`
157                Lexeme::Error {
158                    span: tok.span,
159                    message: "unexpected `>` (attribute syntax requires no spaces: key>value)",
160                }
161            }
162            TokenKind::Newline => Lexeme::Newline { span: tok.span },
163
164            TokenKind::LineComment => Lexeme::Comment {
165                span: tok.span,
166                text: tok.text,
167            },
168            TokenKind::DocComment => Lexeme::DocComment {
169                span: tok.span,
170                text: tok.text,
171            },
172
173            TokenKind::At => {
174                // Check if followed immediately by a bare scalar (invalid tag like @123)
175                let next = self.peek_token();
176                if next.span.start == tok.span.end && next.kind == TokenKind::BareScalar {
177                    // Consume the adjacent token to include it in the error span
178                    let bad_tok = self.next_token();
179                    return Lexeme::Error {
180                        span: Span::new(tok.span.start, bad_tok.span.end),
181                        message: "invalid tag name",
182                    };
183                }
184                // Standalone @ = unit
185                Lexeme::Unit { span: tok.span }
186            }
187
188            TokenKind::Tag => {
189                // Tag token includes the @ and name, e.g. "@foo"
190                // Extract the name (skip the @)
191                let name = &tok.text[1..];
192
193                // Check if payload follows immediately (no whitespace)
194                // Payload can be: { ( " r#" @ or Tag
195                let payload_tok = self.peek_token();
196                let is_adjacent = payload_tok.span.start == tok.span.end;
197                let is_valid_payload = matches!(
198                    payload_tok.kind,
199                    TokenKind::LBrace
200                        | TokenKind::LParen
201                        | TokenKind::QuotedScalar
202                        | TokenKind::RawScalar
203                        | TokenKind::At
204                        | TokenKind::Tag
205                );
206
207                // If a bare scalar is adjacent (no whitespace), it's an invalid tag name
208                // e.g., @org/package where /package is adjacent
209                // But structural tokens like ) } , or newlines are fine - they end the tag
210                if is_adjacent && !is_valid_payload && payload_tok.kind == TokenKind::BareScalar {
211                    // Consume the adjacent token to include it in the error span
212                    let bad_tok = self.next_token();
213                    return Lexeme::Error {
214                        span: Span::new(tok.span.start, bad_tok.span.end),
215                        message: "invalid tag name",
216                    };
217                }
218
219                Lexeme::Tag {
220                    span: tok.span,
221                    name,
222                    has_payload: is_adjacent && is_valid_payload,
223                }
224            }
225
226            TokenKind::BareScalar => {
227                // Check if followed by `>` (attribute syntax)
228                let next = self.peek_token();
229                let is_attr = next.kind == TokenKind::Gt && next.span.start == tok.span.end;
230                let gt_end = next.span.end;
231                if is_attr {
232                    // Attribute: key>
233                    self.next_token(); // consume `>`
234
235                    // Check that value follows immediately (no whitespace after `>`)
236                    let value_tok = self.peek_token();
237                    let gt_span = Span::new(gt_end - 1, gt_end);
238                    if value_tok.kind == TokenKind::Newline || value_tok.kind == TokenKind::Eof {
239                        return Lexeme::Error {
240                            span: gt_span,
241                            message: "expected a value",
242                        };
243                    }
244                    if value_tok.kind == TokenKind::Whitespace {
245                        return Lexeme::Error {
246                            span: gt_span,
247                            message: "whitespace after `>` in attribute (use key>value with no spaces)",
248                        };
249                    }
250
251                    return Lexeme::AttrKey {
252                        span: Span::new(tok.span.start, gt_end),
253                        key_span: tok.span,
254                        key: tok.text,
255                    };
256                }
257
258                Lexeme::Scalar {
259                    span: tok.span,
260                    value: Cow::Borrowed(tok.text),
261                    kind: ScalarKind::Bare,
262                }
263            }
264
265            TokenKind::QuotedScalar => {
266                // Process escape sequences
267                let inner = &tok.text[1..tok.text.len() - 1]; // strip quotes
268                match process_escapes(inner) {
269                    Ok(value) => Lexeme::Scalar {
270                        span: tok.span,
271                        value,
272                        kind: ScalarKind::Quoted,
273                    },
274                    Err(msg) => Lexeme::Error {
275                        span: tok.span,
276                        message: msg,
277                    },
278                }
279            }
280
281            TokenKind::RawScalar => {
282                // r#"..."# - extract content between quotes
283                let text = tok.text;
284                // Count leading #s after 'r'
285                let hash_count = text[1..].chars().take_while(|&c| c == '#').count();
286                // Content is between r##" and "##
287                let start = 1 + hash_count + 1; // r + hashes + quote
288                let end = text.len() - hash_count - 1; // quote + hashes
289                let content = &text[start..end];
290
291                Lexeme::Scalar {
292                    span: tok.span,
293                    value: Cow::Borrowed(content),
294                    kind: ScalarKind::Raw,
295                }
296            }
297
298            TokenKind::HeredocStart => {
299                // Collect heredoc content
300                let start_span = tok.span;
301                let mut content = String::new();
302                let end_span;
303                let mut closing_indent = 0usize;
304
305                loop {
306                    // Check for closing indent before consuming content token
307                    // (it's set after HeredocContent is produced, before HeredocEnd)
308                    if let Some(indent) = self.tokenizer.heredoc_closing_indent() {
309                        closing_indent = indent;
310                    }
311
312                    let next = self.next_token();
313                    match next.kind {
314                        TokenKind::HeredocContent => {
315                            content.push_str(next.text);
316                        }
317                        TokenKind::HeredocEnd => {
318                            end_span = next.span;
319                            break;
320                        }
321                        TokenKind::Eof => {
322                            return Lexeme::Error {
323                                span: start_span,
324                                message: "unterminated heredoc",
325                            };
326                        }
327                        _ => {
328                            return Lexeme::Error {
329                                span: next.span,
330                                message: "unexpected token in heredoc",
331                            };
332                        }
333                    }
334                }
335
336                // Apply dedent if closing delimiter was indented
337                if closing_indent > 0 {
338                    content = dedent_heredoc(&content, closing_indent);
339                }
340
341                Lexeme::Scalar {
342                    span: Span::new(start_span.start, end_span.end),
343                    value: Cow::Owned(content),
344                    kind: ScalarKind::Heredoc,
345                }
346            }
347
348            TokenKind::HeredocContent | TokenKind::HeredocEnd => {
349                // Should not see these outside heredoc context
350                Lexeme::Error {
351                    span: tok.span,
352                    message: "unexpected heredoc token",
353                }
354            }
355
356            TokenKind::Whitespace => {
357                // Should have been skipped above
358                unreachable!("whitespace should be skipped")
359            }
360
361            TokenKind::Error => Lexeme::Error {
362                span: tok.span,
363                message: "tokenizer error",
364            },
365        }
366    }
367}
368
369impl<'src> Iterator for Lexer<'src> {
370    type Item = Lexeme<'src>;
371
372    fn next(&mut self) -> Option<Self::Item> {
373        let lexeme = self.next_lexeme();
374        if matches!(lexeme, Lexeme::Eof) {
375            None
376        } else {
377            Some(lexeme)
378        }
379    }
380}
381
382/// Strip up to `indent_len` whitespace characters from the start of each line.
383fn dedent_heredoc(content: &str, indent_len: usize) -> String {
384    let mut result = String::with_capacity(content.len());
385    for (i, line) in content.split('\n').enumerate() {
386        if i > 0 {
387            result.push('\n');
388        }
389        // Strip up to indent_len whitespace chars from start of line
390        let mut stripped = 0;
391        let mut char_indices = line.char_indices().peekable();
392        while stripped < indent_len {
393            if let Some(&(_, ch)) = char_indices.peek() {
394                if ch == ' ' || ch == '\t' {
395                    char_indices.next();
396                    stripped += 1;
397                } else {
398                    break;
399                }
400            } else {
401                break;
402            }
403        }
404        // Append the rest of the line
405        if let Some(&(idx, _)) = char_indices.peek() {
406            result.push_str(&line[idx..]);
407        }
408    }
409    result
410}
411
412/// Process escape sequences in a quoted string.
413fn process_escapes(s: &str) -> Result<Cow<'_, str>, &'static str> {
414    // Fast path: no escapes
415    if !s.contains('\\') {
416        return Ok(Cow::Borrowed(s));
417    }
418
419    let mut result = String::with_capacity(s.len());
420    let mut chars = s.chars().peekable();
421
422    while let Some(c) = chars.next() {
423        if c != '\\' {
424            result.push(c);
425            continue;
426        }
427
428        match chars.next() {
429            Some('\\') => result.push('\\'),
430            Some('"') => result.push('"'),
431            Some('n') => result.push('\n'),
432            Some('r') => result.push('\r'),
433            Some('t') => result.push('\t'),
434            Some('u') => {
435                // Unicode escape: \uXXXX or \u{X...}
436                match chars.peek() {
437                    Some('{') => {
438                        chars.next(); // consume '{'
439                        let mut hex = String::new();
440                        loop {
441                            match chars.next() {
442                                Some('}') => break,
443                                Some(c) if c.is_ascii_hexdigit() => hex.push(c),
444                                _ => return Err("invalid unicode escape"),
445                            }
446                        }
447                        let code =
448                            u32::from_str_radix(&hex, 16).map_err(|_| "invalid unicode escape")?;
449                        let ch = char::from_u32(code).ok_or("invalid unicode code point")?;
450                        result.push(ch);
451                    }
452                    Some(_) => {
453                        // \uXXXX - exactly 4 hex digits
454                        let mut hex = String::with_capacity(4);
455                        for _ in 0..4 {
456                            match chars.next() {
457                                Some(c) if c.is_ascii_hexdigit() => hex.push(c),
458                                _ => return Err("invalid unicode escape"),
459                            }
460                        }
461                        let code =
462                            u32::from_str_radix(&hex, 16).map_err(|_| "invalid unicode escape")?;
463                        let ch = char::from_u32(code).ok_or("invalid unicode code point")?;
464                        result.push(ch);
465                    }
466                    None => return Err("invalid unicode escape"),
467                }
468            }
469            Some(_) => return Err("invalid escape sequence"),
470            None => return Err("trailing backslash"),
471        }
472    }
473
474    Ok(Cow::Owned(result))
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    #[test]
482    fn test_process_escapes_double_backslash() {
483        // Input: path\\to\\file (two backslash pairs)
484        // Expected: path\to\file (two literal backslashes)
485        let result = process_escapes(r"path\\to\\file").unwrap();
486        assert_eq!(result, r"path\to\file");
487    }
488
489    fn lex(source: &str) -> Vec<Lexeme<'_>> {
490        Lexer::new(source).collect()
491    }
492
493    #[test]
494    fn test_unit() {
495        let lexemes = lex("@");
496        assert!(matches!(&lexemes[0], Lexeme::Unit { .. }));
497    }
498
499    #[test]
500    fn test_tag_no_payload() {
501        let lexemes = lex("@foo");
502        assert!(matches!(
503            &lexemes[0],
504            Lexeme::Tag {
505                name: "foo",
506                has_payload: false,
507                ..
508            }
509        ));
510    }
511
512    #[test]
513    fn test_tag_with_object_payload() {
514        let lexemes = lex("@tag{}");
515        assert!(matches!(
516            &lexemes[0],
517            Lexeme::Tag {
518                name: "tag",
519                has_payload: true,
520                ..
521            }
522        ));
523        assert!(matches!(&lexemes[1], Lexeme::ObjectStart { .. }));
524        assert!(matches!(&lexemes[2], Lexeme::ObjectEnd { .. }));
525    }
526
527    #[test]
528    fn test_tag_with_space_before_object() {
529        // @tag {} - space means NOT a payload
530        let lexemes = lex("@tag {}");
531        assert!(matches!(
532            &lexemes[0],
533            Lexeme::Tag {
534                name: "tag",
535                has_payload: false,
536                ..
537            }
538        ));
539    }
540
541    #[test]
542    fn test_bare_scalar() {
543        let lexemes = lex("hello");
544        assert!(matches!(
545            &lexemes[0],
546            Lexeme::Scalar {
547                kind: ScalarKind::Bare,
548                ..
549            }
550        ));
551    }
552
553    #[test]
554    fn test_quoted_scalar() {
555        let lexemes = lex(r#""hello\nworld""#);
556        match &lexemes[0] {
557            Lexeme::Scalar {
558                value,
559                kind: ScalarKind::Quoted,
560                ..
561            } => {
562                assert_eq!(value.as_ref(), "hello\nworld");
563            }
564            other => panic!("expected quoted scalar, got {:?}", other),
565        }
566    }
567
568    #[test]
569    fn test_raw_scalar() {
570        let lexemes = lex(r##"r#"hello"#"##);
571        match &lexemes[0] {
572            Lexeme::Scalar {
573                value,
574                kind: ScalarKind::Raw,
575                ..
576            } => {
577                assert_eq!(value.as_ref(), "hello");
578            }
579            other => panic!("expected raw scalar, got {:?}", other),
580        }
581    }
582
583    #[test]
584    fn test_tag_with_quoted_payload() {
585        let lexemes = lex(r#"@env"staging""#);
586        assert!(matches!(
587            &lexemes[0],
588            Lexeme::Tag {
589                name: "env",
590                has_payload: true,
591                ..
592            }
593        ));
594        match &lexemes[1] {
595            Lexeme::Scalar {
596                value,
597                kind: ScalarKind::Quoted,
598                ..
599            } => {
600                assert_eq!(value.as_ref(), "staging");
601            }
602            other => panic!("expected quoted scalar, got {:?}", other),
603        }
604    }
605
606    #[test]
607    fn test_tag_with_sequence_payload() {
608        let lexemes = lex("@rgb(255 128 0)");
609        assert!(matches!(
610            &lexemes[0],
611            Lexeme::Tag {
612                name: "rgb",
613                has_payload: true,
614                ..
615            }
616        ));
617        assert!(matches!(&lexemes[1], Lexeme::SeqStart { .. }));
618    }
619
620    #[test]
621    fn test_tag_with_unit_payload() {
622        // @tag@ - tag with explicit unit payload
623        let lexemes = lex("@tag@");
624        assert!(matches!(
625            &lexemes[0],
626            Lexeme::Tag {
627                name: "tag",
628                has_payload: true,
629                ..
630            }
631        ));
632        assert!(matches!(&lexemes[1], Lexeme::Unit { .. }));
633    }
634
635    #[test]
636    fn test_tag_with_raw_payload() {
637        // @tagr#"x"# - tag "tag" with raw string payload
638        let lexemes = lex(r##"@tagr#"x"#"##);
639        assert!(matches!(
640            &lexemes[0],
641            Lexeme::Tag {
642                name: "tag",
643                has_payload: true,
644                ..
645            }
646        ));
647        match &lexemes[1] {
648            Lexeme::Scalar {
649                value,
650                kind: ScalarKind::Raw,
651                ..
652            } => {
653                assert_eq!(value.as_ref(), "x");
654            }
655            other => panic!("expected raw scalar, got {:?}", other),
656        }
657    }
658
659    #[test]
660    fn test_tag_with_space_before_sequence() {
661        let lexemes = lex("@tag (a b)");
662        assert!(matches!(
663            &lexemes[0],
664            Lexeme::Tag {
665                name: "tag",
666                has_payload: false,
667                ..
668            }
669        ));
670    }
671
672    #[test]
673    fn test_tag_with_space_before_quoted() {
674        let lexemes = lex(r#"@tag "value""#);
675        assert!(matches!(
676            &lexemes[0],
677            Lexeme::Tag {
678                name: "tag",
679                has_payload: false,
680                ..
681            }
682        ));
683    }
684
685    // Note: @tag@ (explicit unit payload) requires tokenizer changes
686    // The tokenizer currently produces `At` + `BareScalar("tag@")` because
687    // `@` is allowed in bare scalars after the first char.
688    // This will be addressed when we update the tokenizer.
689
690    #[test]
691    fn test_at_followed_by_digit() {
692        // @123 is an invalid tag name - the error span includes both @ and 123
693        let lexemes = lex("@123");
694        assert!(matches!(
695            &lexemes[0],
696            Lexeme::Error {
697                message: "invalid tag name",
698                ..
699            }
700        ));
701    }
702
703    #[test]
704    fn test_structural() {
705        let lexemes = lex("{x 1}");
706        assert!(matches!(&lexemes[0], Lexeme::ObjectStart { .. }));
707        assert!(matches!(&lexemes[1], Lexeme::Scalar { .. }));
708        assert!(matches!(&lexemes[2], Lexeme::Scalar { .. }));
709        assert!(matches!(&lexemes[3], Lexeme::ObjectEnd { .. }));
710    }
711
712    #[test]
713    fn test_sequence() {
714        let lexemes = lex("(a b)");
715        assert!(matches!(&lexemes[0], Lexeme::SeqStart { .. }));
716        assert!(matches!(&lexemes[1], Lexeme::Scalar { .. }));
717        assert!(matches!(&lexemes[2], Lexeme::Scalar { .. }));
718        assert!(matches!(&lexemes[3], Lexeme::SeqEnd { .. }));
719    }
720
721    #[test]
722    fn test_newlines_preserved() {
723        let lexemes = lex("a\nb");
724        assert!(matches!(&lexemes[0], Lexeme::Scalar { .. }));
725        assert!(matches!(&lexemes[1], Lexeme::Newline { .. }));
726        assert!(matches!(&lexemes[2], Lexeme::Scalar { .. }));
727    }
728
729    #[test]
730    fn test_unicode_escape_braces() {
731        let lexemes = lex(r#""\u{1F600}""#);
732        match &lexemes[0] {
733            Lexeme::Scalar { value, .. } => {
734                assert_eq!(value.as_ref(), "😀");
735            }
736            other => panic!("expected scalar, got {:?}", other),
737        }
738    }
739
740    #[test]
741    fn test_unicode_escape_4digit() {
742        let lexemes = lex(r#""\u0041""#);
743        match &lexemes[0] {
744            Lexeme::Scalar { value, .. } => {
745                assert_eq!(value.as_ref(), "A");
746            }
747            other => panic!("expected scalar, got {:?}", other),
748        }
749    }
750
751    #[test]
752    fn test_dotted_value_is_scalar() {
753        // Dots in bare scalars are just part of the value
754        // Parser handles dot-splitting for keys
755        let lexemes = lex("a.b.c");
756        match &lexemes[0] {
757            Lexeme::Scalar {
758                value,
759                kind: ScalarKind::Bare,
760                ..
761            } => {
762                assert_eq!(value.as_ref(), "a.b.c");
763            }
764            other => panic!("expected scalar, got {:?}", other),
765        }
766    }
767
768    #[test]
769    fn test_attr_key() {
770        let lexemes = lex("name>value");
771        assert!(matches!(&lexemes[0], Lexeme::AttrKey { key: "name", .. }));
772        assert!(matches!(&lexemes[1], Lexeme::Scalar { .. }));
773    }
774
775    #[test]
776    fn test_attr_key_with_object() {
777        let lexemes = lex("opts>{x 1}");
778        assert!(matches!(&lexemes[0], Lexeme::AttrKey { key: "opts", .. }));
779        assert!(matches!(&lexemes[1], Lexeme::ObjectStart { .. }));
780    }
781
782    #[test]
783    fn test_attr_key_with_sequence() {
784        let lexemes = lex("tags>(a b)");
785        assert!(matches!(&lexemes[0], Lexeme::AttrKey { key: "tags", .. }));
786        assert!(matches!(&lexemes[1], Lexeme::SeqStart { .. }));
787    }
788
789    #[test]
790    fn test_standalone_gt_error() {
791        // `x > y` with spaces - the `>` is not attribute syntax
792        let lexemes = lex("x > y");
793        assert!(matches!(&lexemes[0], Lexeme::Scalar { .. }));
794        assert!(matches!(&lexemes[1], Lexeme::Error { .. }));
795    }
796
797    #[test]
798    fn test_attr_whitespace_after_gt_error() {
799        // `name> value` with space after `>` is an error
800        let lexemes = lex("name> value");
801        assert!(matches!(
802            &lexemes[0],
803            Lexeme::Error {
804                message: "whitespace after `>` in attribute (use key>value with no spaces)",
805                ..
806            }
807        ));
808    }
809}