Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use std::{borrow::Cow, sync::Arc};
11
12use crate::{
13    ansi::{Color, Ground, NamedColor, Style},
14    errors::LexError,
15    registry::search_registry,
16};
17
18/// A text emphasis modifier supported by farben markup.
19#[derive(Debug, PartialEq, Clone)]
20pub enum EmphasisType {
21    /// Reduced intensity (SGR 2).
22    Dim,
23    /// Italic text (SGR 3).
24    Italic,
25    /// Underlined text (SGR 4).
26    Underline,
27    /// Double-underlined text (SGR 21).
28    DoubleUnderline,
29    /// Bold text (SGR 1).
30    Bold,
31    /// Crossed-out text (SGR 9).
32    Strikethrough,
33    /// Blinking text (SGR 5). Terminal support varies.
34    Blink,
35    /// Overlined text (SGR 53).
36    Overline,
37    /// Invisible text (SGR 8). Text is hidden but selectable.
38    Invisible,
39    /// Reverse video (SGR 7). Swaps foreground and background.
40    Reverse,
41    /// Rapid blinking (SGR 6). Faster than Blink. Terminal support varies.
42    RapidBlink,
43}
44
45/// The kind of styling operation a tag represents.
46#[derive(Debug, PartialEq, Clone)]
47pub enum TagType {
48    /// Resets all active styles (`[/]`).
49    ResetAll,
50    /// Resets one specific active style (`[/bold]`, `[/red]`, etc.), then re-applies the rest.
51    ResetOne(Box<TagType>),
52    /// Applies a text emphasis attribute.
53    Emphasis(EmphasisType),
54    /// Sets a foreground or background color.
55    Color { color: Color, ground: Ground },
56    /// A literal prefix string injected before the style sequence by the registry.
57    Prefix(String),
58}
59
60/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
61#[derive(Debug, PartialEq)]
62pub enum Token {
63    /// A parsed styling tag.
64    Tag(TagType),
65    /// A run of plain text with no markup.
66    Text(Cow<'static, str>),
67}
68
69impl EmphasisType {
70    /// Parses an emphasis keyword into an `EmphasisType`.
71    ///
72    /// Returns `None` if the string is not a recognized emphasis name.
73    /// Matching is case-sensitive.
74    fn from_str(input: &str) -> Option<Self> {
75        match input {
76            "dim" => Some(Self::Dim),
77            "italic" => Some(Self::Italic),
78            "underline" => Some(Self::Underline),
79            "double-underline" => Some(Self::DoubleUnderline),
80            "bold" => Some(Self::Bold),
81            "strikethrough" => Some(Self::Strikethrough),
82            "blink" => Some(Self::Blink),
83            "overline" => Some(Self::Overline),
84            "invisible" => Some(Self::Invisible),
85            "reverse" => Some(Self::Reverse),
86            "rapid-blink" => Some(Self::RapidBlink),
87            _ => None,
88        }
89    }
90}
91
92/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
93///
94/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
95/// after the prefix: no emphasis or color tags are emitted.
96fn style_to_tags(style: Arc<Style>) -> Vec<TagType> {
97    let mut res: Vec<TagType> = Vec::new();
98    let prefix = style.prefix.clone();
99
100    if style.reset {
101        if let Some(p) = prefix {
102            res.push(TagType::Prefix(p));
103        }
104        res.push(TagType::ResetAll);
105        return res;
106    }
107
108    for (enabled, tag) in [
109        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
110        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
111        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
112        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
113        (
114            style.strikethrough,
115            TagType::Emphasis(EmphasisType::Strikethrough),
116        ),
117        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
118        (
119            style.double_underline,
120            TagType::Emphasis(EmphasisType::DoubleUnderline),
121        ),
122        (style.overline, TagType::Emphasis(EmphasisType::Overline)),
123        (style.invisible, TagType::Emphasis(EmphasisType::Invisible)),
124        (style.reverse, TagType::Emphasis(EmphasisType::Reverse)),
125        (
126            style.rapid_blink,
127            TagType::Emphasis(EmphasisType::RapidBlink),
128        ),
129    ] {
130        if enabled {
131            res.push(tag);
132        }
133    }
134
135    if let Some(fg) = style.fg.clone() {
136        res.push(TagType::Color {
137            color: fg,
138            ground: Ground::Foreground,
139        })
140    }
141    if let Some(bg) = style.bg.clone() {
142        res.push(TagType::Color {
143            color: bg,
144            ground: Ground::Background,
145        })
146    }
147
148    if let Some(p) = prefix {
149        res.push(TagType::Prefix(p));
150    }
151
152    res
153}
154
155/// Parses a single whitespace-delimited tag part into a `TagType`.
156///
157/// Recognizes:
158/// - `/` as a reset
159/// - Named colors (`red`, `blue`, etc.)
160/// - Emphasis keywords (`bold`, `italic`, etc.)
161/// - `ansi(N)` for ANSI 256-palette colors
162/// - `rgb(R,G,B)` for true-color values
163/// - A named style from the registry as a fallback
164///
165/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
166/// explicitly target the foreground. Unprefixed color parts default to foreground.
167///
168/// # Errors
169///
170/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
171/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
172/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
173fn parse_part(part: &str, position: usize) -> Result<Vec<TagType>, LexError> {
174    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
175        (Ground::Background, rest)
176    } else if let Some(rest) = part.strip_prefix("fg:") {
177        (Ground::Foreground, rest)
178    } else {
179        (Ground::Foreground, part)
180    };
181    if let Some(remainder) = part.strip_prefix('/') {
182        if remainder.is_empty() {
183            Ok(vec![TagType::ResetAll])
184        } else {
185            let inner = parse_part(remainder, position + 1)?;
186            match inner.as_slice() {
187                [tag] => match tag {
188                    TagType::ResetAll | TagType::ResetOne(_) | TagType::Prefix(_) => {
189                        Err(LexError::InvalidResetTarget(position))
190                    }
191                    _ => Ok(vec![TagType::ResetOne(Box::new(tag.clone()))]),
192                },
193                _ => Err(LexError::InvalidTag {
194                    tag_content: part.to_string(),
195                    position,
196                }),
197            }
198        }
199    } else if let Some(color) = NamedColor::from_str(part) {
200        Ok(vec![TagType::Color {
201            color: Color::Named(color),
202            ground,
203        }])
204    } else if let Some(emphasis) = EmphasisType::from_str(part) {
205        Ok(vec![TagType::Emphasis(emphasis)])
206    } else if let Some(rest) = part.strip_prefix("ansi(") {
207        if !rest.ends_with(')') {
208            return Err(LexError::UnclosedValue(position));
209        }
210        let ansi_val = &rest[..rest.len() - 1];
211        match ansi_val.trim().parse::<u8>() {
212            Ok(code) => Ok(vec![TagType::Color {
213                color: Color::Ansi256(code),
214                ground,
215            }]),
216            Err(_) => Err(LexError::InvalidValue {
217                value: ansi_val.to_string(),
218                position,
219            }),
220        }
221    } else if let Some(rest) = part.strip_prefix("rgb(") {
222        if !rest.ends_with(')') {
223            return Err(LexError::UnclosedValue(position));
224        }
225        let rgb_val = &rest[..rest.len() - 1];
226        let parts: Result<Vec<u8>, _> =
227            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
228        match parts {
229            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
230                color: Color::Rgb(v[0], v[1], v[2]),
231                ground,
232            }]),
233            Ok(v) => Err(LexError::InvalidArgumentCount {
234                expected: 3,
235                got: v.len(),
236                position,
237            }),
238            Err(_) => Err(LexError::InvalidValue {
239                value: rgb_val.to_string(),
240                position,
241            }),
242        }
243    } else {
244        match search_registry(part) {
245            Ok(style) => Ok(style_to_tags(style)),
246            Err(_) => Err(LexError::InvalidTag {
247                tag_content: part.to_string(),
248                position,
249            }),
250        }
251    }
252}
253
254/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
255///
256/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
257/// is consumed and does not appear in the output.
258///
259/// # Errors
260///
261/// Propagates any error from `parse_part`.
262fn parse_tag(raw_tag: &str, tag_start: usize) -> Result<Vec<TagType>, LexError> {
263    let mut result = Vec::new();
264    let mut search_from = 0;
265
266    for part in raw_tag.split_whitespace() {
267        let part_offset = raw_tag[search_from..].find(part).unwrap() + search_from;
268        let abs_position = tag_start + part_offset;
269        result.extend(parse_part(part, abs_position)?);
270        search_from = part_offset + part.len();
271    }
272
273    Ok(result)
274}
275
276/// Tokenizes a farben markup string into a sequence of `Token`s.
277///
278/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
279/// bracket rather than the start of a tag. Text between tags is emitted as
280/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
281///
282/// # Errors
283///
284/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
285/// Returns any error produced by `parse_tag` for malformed tag contents.
286///
287/// # Example
288///
289/// ```ignore
290/// let tokens = tokenize("[red]hello")?;
291/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
292/// //     Token::Text("hello".into())]
293/// ```
294pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
295    let input = input.into();
296    let mut tokens: Vec<Token> = Vec::with_capacity(input.len() / 4);
297    let mut pos = 0;
298    loop {
299        let Some(starting) = input[pos..].find('[') else {
300            if pos < input.len() {
301                tokens.push(Token::Text(Cow::Owned(input[pos..].to_string())));
302            }
303            break;
304        };
305        let abs_starting = starting + pos;
306        // wtf does this mean
307        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\\') {
308            let before = &input[pos..abs_starting - 1];
309            if !before.is_empty() {
310                tokens.push(Token::Text(Cow::Owned(before.to_string())));
311            }
312            tokens.push(Token::Text(Cow::Borrowed("[")));
313            pos = abs_starting + 1;
314            continue;
315        }
316
317        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\x1b')
318        {
319            tokens.push(Token::Text(Cow::Owned(
320                input[pos..abs_starting + 1].to_string(),
321            )));
322            pos = abs_starting + 1;
323            continue;
324        }
325
326        if pos != abs_starting {
327            tokens.push(Token::Text(Cow::Owned(
328                input[pos..abs_starting].to_string(),
329            )));
330        }
331
332        let Some(closing) = input[abs_starting..].find(']') else {
333            return Err(LexError::UnclosedTag(abs_starting));
334        };
335        let abs_closing = closing + abs_starting;
336        let raw_tag = &input[abs_starting + 1..abs_closing];
337        for tag in parse_tag(raw_tag, abs_starting)? {
338            tokens.push(Token::Tag(tag));
339        }
340        pos = abs_closing + 1;
341    }
342    Ok(tokens)
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348    use crate::ansi::{Color, Ground, NamedColor};
349
350    // --- EmphasisType::from_str ---
351
352    #[test]
353    fn test_emphasis_from_str_all_known() {
354        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
355        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
356        assert_eq!(
357            EmphasisType::from_str("underline"),
358            Some(EmphasisType::Underline)
359        );
360        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
361        assert_eq!(
362            EmphasisType::from_str("strikethrough"),
363            Some(EmphasisType::Strikethrough)
364        );
365        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
366    }
367
368    #[test]
369    fn test_emphasis_from_str_unknown_returns_none() {
370        assert_eq!(EmphasisType::from_str("flash"), None);
371    }
372
373    #[test]
374    fn test_emphasis_from_str_case_sensitive() {
375        assert_eq!(EmphasisType::from_str("Bold"), None);
376    }
377
378    // --- parse_part ---
379
380    #[test]
381    fn test_parse_part_reset() {
382        assert_eq!(parse_part("/", 0).unwrap(), vec![TagType::ResetAll]);
383    }
384
385    #[test]
386    fn test_parse_part_named_color_foreground_default() {
387        assert_eq!(
388            parse_part("red", 0).unwrap(),
389            vec![TagType::Color {
390                color: Color::Named(NamedColor::Red),
391                ground: Ground::Foreground,
392            }]
393        );
394    }
395
396    #[test]
397    fn test_parse_part_named_color_explicit_fg() {
398        assert_eq!(
399            parse_part("fg:red", 0).unwrap(),
400            vec![TagType::Color {
401                color: Color::Named(NamedColor::Red),
402                ground: Ground::Foreground,
403            }]
404        );
405    }
406
407    #[test]
408    fn test_parse_part_named_color_bg() {
409        assert_eq!(
410            parse_part("bg:red", 0).unwrap(),
411            vec![TagType::Color {
412                color: Color::Named(NamedColor::Red),
413                ground: Ground::Background,
414            }]
415        );
416    }
417
418    #[test]
419    fn test_parse_part_emphasis_bold() {
420        assert_eq!(
421            parse_part("bold", 0).unwrap(),
422            vec![TagType::Emphasis(EmphasisType::Bold)]
423        );
424    }
425
426    #[test]
427    fn test_parse_part_ansi256_valid() {
428        assert_eq!(
429            parse_part("ansi(200)", 0).unwrap(),
430            vec![TagType::Color {
431                color: Color::Ansi256(200),
432                ground: Ground::Foreground,
433            }]
434        );
435    }
436
437    #[test]
438    fn test_parse_part_ansi256_bg() {
439        assert_eq!(
440            parse_part("bg:ansi(200)", 0).unwrap(),
441            vec![TagType::Color {
442                color: Color::Ansi256(200),
443                ground: Ground::Background,
444            }]
445        );
446    }
447
448    #[test]
449    fn test_parse_part_ansi256_with_whitespace() {
450        assert_eq!(
451            parse_part("ansi( 42 )", 0).unwrap(),
452            vec![TagType::Color {
453                color: Color::Ansi256(42),
454                ground: Ground::Foreground,
455            }]
456        );
457    }
458
459    #[test]
460    fn test_parse_part_ansi256_invalid_value() {
461        assert!(parse_part("ansi(abc)", 0).is_err());
462    }
463
464    #[test]
465    fn test_parse_part_rgb_valid() {
466        assert_eq!(
467            parse_part("rgb(255,128,0)", 0).unwrap(),
468            vec![TagType::Color {
469                color: Color::Rgb(255, 128, 0),
470                ground: Ground::Foreground,
471            }]
472        );
473    }
474
475    #[test]
476    fn test_parse_part_rgb_bg() {
477        assert_eq!(
478            parse_part("bg:rgb(255,128,0)", 0).unwrap(),
479            vec![TagType::Color {
480                color: Color::Rgb(255, 128, 0),
481                ground: Ground::Background,
482            }]
483        );
484    }
485
486    #[test]
487    fn test_parse_part_rgb_with_spaces() {
488        assert_eq!(
489            parse_part("rgb( 10 , 20 , 30 )", 0).unwrap(),
490            vec![TagType::Color {
491                color: Color::Rgb(10, 20, 30),
492                ground: Ground::Foreground,
493            }]
494        );
495    }
496
497    #[test]
498    fn test_parse_part_rgb_wrong_arg_count() {
499        let result = parse_part("rgb(1,2)", 0);
500        assert!(result.is_err());
501        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got, .. }) = result {
502            assert_eq!(expected, 3);
503            assert_eq!(got, 2);
504        }
505    }
506
507    #[test]
508    fn test_parse_part_rgb_invalid_value() {
509        assert!(parse_part("rgb(r,g,b)", 0).is_err());
510    }
511
512    #[test]
513    fn test_parse_part_unknown_tag_returns_error() {
514        assert!(parse_part("fuchsia", 0).is_err());
515    }
516
517    // --- tokenize ---
518
519    #[test]
520    fn test_tokenize_plain_text() {
521        let tokens = tokenize("hello world").unwrap();
522        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
523    }
524
525    #[test]
526    fn test_tokenize_empty_string() {
527        assert!(tokenize("").unwrap().is_empty());
528    }
529
530    #[test]
531    fn test_tokenize_single_color_tag() {
532        let tokens = tokenize("[red]text").unwrap();
533        assert_eq!(
534            tokens,
535            vec![
536                Token::Tag(TagType::Color {
537                    color: Color::Named(NamedColor::Red),
538                    ground: Ground::Foreground
539                }),
540                Token::Text("text".into()),
541            ]
542        );
543    }
544
545    #[test]
546    fn test_tokenize_bg_color_tag() {
547        let tokens = tokenize("[bg:red]text").unwrap();
548        assert_eq!(
549            tokens,
550            vec![
551                Token::Tag(TagType::Color {
552                    color: Color::Named(NamedColor::Red),
553                    ground: Ground::Background
554                }),
555                Token::Text("text".into()),
556            ]
557        );
558    }
559
560    #[test]
561    fn test_tokenize_fg_and_bg_in_same_bracket() {
562        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
563        assert_eq!(
564            tokens,
565            vec![
566                Token::Tag(TagType::Color {
567                    color: Color::Named(NamedColor::White),
568                    ground: Ground::Foreground
569                }),
570                Token::Tag(TagType::Color {
571                    color: Color::Named(NamedColor::Blue),
572                    ground: Ground::Background
573                }),
574                Token::Text("text".into()),
575            ]
576        );
577    }
578
579    #[test]
580    fn test_tokenize_reset_tag() {
581        assert_eq!(
582            tokenize("[/]").unwrap(),
583            vec![Token::Tag(TagType::ResetAll)]
584        );
585    }
586
587    #[test]
588    fn test_tokenize_compound_tag() {
589        let tokens = tokenize("[bold red]hi").unwrap();
590        assert_eq!(
591            tokens,
592            vec![
593                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
594                Token::Tag(TagType::Color {
595                    color: Color::Named(NamedColor::Red),
596                    ground: Ground::Foreground
597                }),
598                Token::Text("hi".into()),
599            ]
600        );
601    }
602
603    #[test]
604    fn test_tokenize_escaped_bracket_at_start() {
605        let tokens = tokenize("\\[not a tag]").unwrap();
606        assert_eq!(
607            tokens,
608            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
609        );
610    }
611
612    #[test]
613    fn test_tokenize_escaped_bracket_with_prefix() {
614        let tokens = tokenize("before\\[not a tag]").unwrap();
615        assert_eq!(
616            tokens,
617            vec![
618                Token::Text("before".into()),
619                Token::Text("[".into()),
620                Token::Text("not a tag]".into()),
621            ]
622        );
623    }
624
625    #[test]
626    fn test_tokenize_unclosed_tag_returns_error() {
627        assert!(tokenize("[red").is_err());
628    }
629
630    #[test]
631    fn test_tokenize_invalid_tag_name_returns_error() {
632        assert!(tokenize("[fuchsia]").is_err());
633    }
634
635    #[test]
636    fn test_tokenize_text_before_and_after_tag() {
637        let tokens = tokenize("before[red]after").unwrap();
638        assert_eq!(
639            tokens,
640            vec![
641                Token::Text("before".into()),
642                Token::Tag(TagType::Color {
643                    color: Color::Named(NamedColor::Red),
644                    ground: Ground::Foreground
645                }),
646                Token::Text("after".into()),
647            ]
648        );
649    }
650
651    #[test]
652    fn test_tokenize_ansi256_tag() {
653        let tokens = tokenize("[ansi(1)]text").unwrap();
654        assert_eq!(
655            tokens[0],
656            Token::Tag(TagType::Color {
657                color: Color::Ansi256(1),
658                ground: Ground::Foreground,
659            })
660        );
661    }
662
663    #[test]
664    fn test_tokenize_rgb_tag() {
665        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
666        assert_eq!(
667            tokens[0],
668            Token::Tag(TagType::Color {
669                color: Color::Rgb(255, 0, 128),
670                ground: Ground::Foreground,
671            })
672        );
673    }
674
675    #[test]
676    fn test_tokenize_bg_rgb_tag() {
677        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
678        assert_eq!(
679            tokens[0],
680            Token::Tag(TagType::Color {
681                color: Color::Rgb(0, 255, 0),
682                ground: Ground::Background,
683            })
684        );
685    }
686
687    #[test]
688    fn test_parse_part_custom_style_from_registry() {
689        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
690        let result = parse_part("danger", 0).unwrap();
691        assert_eq!(
692            result,
693            vec![
694                TagType::Emphasis(EmphasisType::Bold),
695                TagType::Color {
696                    color: Color::Named(NamedColor::Red),
697                    ground: Ground::Foreground
698                },
699            ]
700        );
701    }
702}