Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use crate::{
11    ansi::{Color, Ground, NamedColor, Style},
12    errors::LexError,
13    registry::search_registry,
14};
15
16/// A text emphasis modifier supported by farben markup.
17#[derive(Debug, PartialEq, Clone)]
18pub enum EmphasisType {
19    /// Reduced intensity (SGR 2).
20    Dim,
21    /// Italic text (SGR 3).
22    Italic,
23    /// Underlined text (SGR 4).
24    Underline,
25    /// Bold text (SGR 1).
26    Bold,
27    /// Crossed-out text (SGR 9).
28    Strikethrough,
29    /// Blinking text (SGR 5). Terminal support varies.
30    Blink,
31}
32
33/// The kind of styling operation a tag represents.
34#[derive(Debug, PartialEq, Clone)]
35pub enum TagType {
36    /// Resets all active styles (`[/]`).
37    Reset(Option<Box<TagType>>),
38    /// Applies a text emphasis attribute.
39    Emphasis(EmphasisType),
40    /// Sets a foreground or background color.
41    Color { color: Color, ground: Ground },
42    /// A literal prefix string injected before the style sequence by the registry.
43    Prefix(String),
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    /// A parsed styling tag.
50    Tag(TagType),
51    /// A run of plain text with no markup.
52    Text(String),
53}
54
55impl EmphasisType {
56    /// Parses an emphasis keyword into an `EmphasisType`.
57    ///
58    /// Returns `None` if the string is not a recognized emphasis name.
59    /// Matching is case-sensitive.
60    fn from_str(input: &str) -> Option<Self> {
61        match input {
62            "dim" => Some(Self::Dim),
63            "italic" => Some(Self::Italic),
64            "underline" => Some(Self::Underline),
65            "bold" => Some(Self::Bold),
66            "strikethrough" => Some(Self::Strikethrough),
67            "blink" => Some(Self::Blink),
68            _ => None,
69        }
70    }
71}
72
73/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
74///
75/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
76/// after the prefix: no emphasis or color tags are emitted.
77fn style_to_tags(style: Style) -> Vec<TagType> {
78    let mut res: Vec<TagType> = Vec::new();
79    let prefix = style.prefix;
80
81    if style.reset {
82        if let Some(p) = prefix {
83            res.push(TagType::Prefix(p));
84        }
85        res.push(TagType::Reset(None));
86        return res;
87    }
88
89    for (enabled, tag) in [
90        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
91        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
92        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
93        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
94        (
95            style.strikethrough,
96            TagType::Emphasis(EmphasisType::Strikethrough),
97        ),
98        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
99    ] {
100        if enabled {
101            res.push(tag);
102        }
103    }
104
105    if let Some(fg) = style.fg {
106        res.push(TagType::Color {
107            color: fg,
108            ground: Ground::Foreground,
109        })
110    }
111    if let Some(bg) = style.bg {
112        res.push(TagType::Color {
113            color: bg,
114            ground: Ground::Background,
115        })
116    }
117
118    if let Some(p) = prefix {
119        res.push(TagType::Prefix(p));
120    }
121
122    res
123}
124
125/// Parses a single whitespace-delimited tag part into a `TagType`.
126///
127/// Recognizes:
128/// - `/` as a reset
129/// - Named colors (`red`, `blue`, etc.)
130/// - Emphasis keywords (`bold`, `italic`, etc.)
131/// - `ansi(N)` for ANSI 256-palette colors
132/// - `rgb(R,G,B)` for true-color values
133/// - A named style from the registry as a fallback
134///
135/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
136/// explicitly target the foreground. Unprefixed color parts default to foreground.
137///
138/// # Errors
139///
140/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
141/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
142/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
143fn parse_part(part: &str) -> Result<Vec<TagType>, LexError> {
144    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
145        (Ground::Background, rest)
146    } else if let Some(rest) = part.strip_prefix("fg:") {
147        (Ground::Foreground, rest)
148    } else {
149        (Ground::Foreground, part)
150    };
151    if part.starts_with("/") {
152        let remainder = &part[1..];
153        if remainder.is_empty() {
154            Ok(vec![TagType::Reset(None)])
155        } else {
156            let inner = parse_part(remainder)?;
157            match inner.as_slice() {
158                [tag] => match tag {
159                    TagType::Reset(_) | TagType::Prefix(_) => {
160                        panic!("invalid reset target: cannot reset a reset or prefix")
161                    }
162                    _ => Ok(vec![TagType::Reset(Some(Box::new(tag.clone())))]),
163                },
164                _ => Err(LexError::InvalidTag(part.to_string())),
165            }
166        }
167    } else if let Some(color) = NamedColor::from_str(part) {
168        Ok(vec![TagType::Color {
169            color: Color::Named(color),
170            ground,
171        }])
172    } else if let Some(emphasis) = EmphasisType::from_str(part) {
173        Ok(vec![TagType::Emphasis(emphasis)])
174    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
175        match ansi_val.trim().parse::<u8>() {
176            Ok(code) => Ok(vec![TagType::Color {
177                color: Color::Ansi256(code),
178                ground,
179            }]),
180            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
181        }
182    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
183        let parts: Result<Vec<u8>, _> =
184            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
185        match parts {
186            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
187                color: Color::Rgb(v[0], v[1], v[2]),
188                ground,
189            }]),
190            Ok(v) => Err(LexError::InvalidArgumentCount {
191                expected: 3,
192                got: v.len(),
193            }),
194            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
195        }
196    } else {
197        match search_registry(part) {
198            Ok(style) => Ok(style_to_tags(style)),
199            Err(e) => Err(e),
200        }
201    }
202}
203
204/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
205///
206/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
207/// is consumed and does not appear in the output.
208///
209/// # Errors
210///
211/// Propagates any error from `parse_part`.
212fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
213    let nested: Result<Vec<Vec<TagType>>, LexError> =
214        raw_tag.split_whitespace().map(parse_part).collect();
215    Ok(nested?.into_iter().flatten().collect())
216}
217
218/// Tokenizes a farben markup string into a sequence of `Token`s.
219///
220/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
221/// bracket rather than the start of a tag. Text between tags is emitted as
222/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
223///
224/// # Errors
225///
226/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
227/// Returns any error produced by `parse_tag` for malformed tag contents.
228///
229/// # Example
230///
231/// ```ignore
232/// let tokens = tokenize("[red]hello")?;
233/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
234/// //     Token::Text("hello".into())]
235/// ```
236pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
237    let mut tokens: Vec<Token> = Vec::new();
238    let input = input.into();
239    let mut pos = 0;
240    loop {
241        let Some(starting) = input[pos..].find('[') else {
242            if pos < input.len() {
243                tokens.push(Token::Text(input[pos..].to_string()));
244            }
245            break;
246        };
247        let abs_starting = starting + pos;
248        // escape logic
249        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
250            let before = &input[pos..abs_starting - 1];
251            if !before.is_empty() {
252                tokens.push(Token::Text(before.to_string()));
253            }
254            tokens.push(Token::Text(String::from('[')));
255            pos = abs_starting + 1;
256            continue;
257        }
258
259        if pos != abs_starting {
260            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
261        }
262
263        let Some(closing) = input[abs_starting..].find(']') else {
264            return Err(LexError::UnclosedTag);
265        };
266        let abs_closing = closing + abs_starting;
267        let raw_tag = &input[abs_starting + 1..abs_closing];
268        for tag in parse_tag(raw_tag)? {
269            tokens.push(Token::Tag(tag));
270        }
271        pos = abs_closing + 1;
272    }
273    Ok(tokens)
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use crate::ansi::{Color, Ground, NamedColor};
280
281    // --- EmphasisType::from_str ---
282
283    #[test]
284    fn test_emphasis_from_str_all_known() {
285        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
286        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
287        assert_eq!(
288            EmphasisType::from_str("underline"),
289            Some(EmphasisType::Underline)
290        );
291        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
292        assert_eq!(
293            EmphasisType::from_str("strikethrough"),
294            Some(EmphasisType::Strikethrough)
295        );
296        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
297    }
298
299    #[test]
300    fn test_emphasis_from_str_unknown_returns_none() {
301        assert_eq!(EmphasisType::from_str("flash"), None);
302    }
303
304    #[test]
305    fn test_emphasis_from_str_case_sensitive() {
306        assert_eq!(EmphasisType::from_str("Bold"), None);
307    }
308
309    // --- parse_part ---
310
311    #[test]
312    fn test_parse_part_reset() {
313        assert_eq!(parse_part("/").unwrap(), vec![TagType::Reset(None)]);
314    }
315
316    #[test]
317    fn test_parse_part_named_color_foreground_default() {
318        assert_eq!(
319            parse_part("red").unwrap(),
320            vec![TagType::Color {
321                color: Color::Named(NamedColor::Red),
322                ground: Ground::Foreground,
323            }]
324        );
325    }
326
327    #[test]
328    fn test_parse_part_named_color_explicit_fg() {
329        assert_eq!(
330            parse_part("fg:red").unwrap(),
331            vec![TagType::Color {
332                color: Color::Named(NamedColor::Red),
333                ground: Ground::Foreground,
334            }]
335        );
336    }
337
338    #[test]
339    fn test_parse_part_named_color_bg() {
340        assert_eq!(
341            parse_part("bg:red").unwrap(),
342            vec![TagType::Color {
343                color: Color::Named(NamedColor::Red),
344                ground: Ground::Background,
345            }]
346        );
347    }
348
349    #[test]
350    fn test_parse_part_emphasis_bold() {
351        assert_eq!(
352            parse_part("bold").unwrap(),
353            vec![TagType::Emphasis(EmphasisType::Bold)]
354        );
355    }
356
357    #[test]
358    fn test_parse_part_ansi256_valid() {
359        assert_eq!(
360            parse_part("ansi(200)").unwrap(),
361            vec![TagType::Color {
362                color: Color::Ansi256(200),
363                ground: Ground::Foreground,
364            }]
365        );
366    }
367
368    #[test]
369    fn test_parse_part_ansi256_bg() {
370        assert_eq!(
371            parse_part("bg:ansi(200)").unwrap(),
372            vec![TagType::Color {
373                color: Color::Ansi256(200),
374                ground: Ground::Background,
375            }]
376        );
377    }
378
379    #[test]
380    fn test_parse_part_ansi256_with_whitespace() {
381        assert_eq!(
382            parse_part("ansi( 42 )").unwrap(),
383            vec![TagType::Color {
384                color: Color::Ansi256(42),
385                ground: Ground::Foreground,
386            }]
387        );
388    }
389
390    #[test]
391    fn test_parse_part_ansi256_invalid_value() {
392        assert!(parse_part("ansi(abc)").is_err());
393    }
394
395    #[test]
396    fn test_parse_part_rgb_valid() {
397        assert_eq!(
398            parse_part("rgb(255,128,0)").unwrap(),
399            vec![TagType::Color {
400                color: Color::Rgb(255, 128, 0),
401                ground: Ground::Foreground,
402            }]
403        );
404    }
405
406    #[test]
407    fn test_parse_part_rgb_bg() {
408        assert_eq!(
409            parse_part("bg:rgb(255,128,0)").unwrap(),
410            vec![TagType::Color {
411                color: Color::Rgb(255, 128, 0),
412                ground: Ground::Background,
413            }]
414        );
415    }
416
417    #[test]
418    fn test_parse_part_rgb_with_spaces() {
419        assert_eq!(
420            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
421            vec![TagType::Color {
422                color: Color::Rgb(10, 20, 30),
423                ground: Ground::Foreground,
424            }]
425        );
426    }
427
428    #[test]
429    fn test_parse_part_rgb_wrong_arg_count() {
430        let result = parse_part("rgb(1,2)");
431        assert!(result.is_err());
432        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
433            assert_eq!(expected, 3);
434            assert_eq!(got, 2);
435        }
436    }
437
438    #[test]
439    fn test_parse_part_rgb_invalid_value() {
440        assert!(parse_part("rgb(r,g,b)").is_err());
441    }
442
443    #[test]
444    fn test_parse_part_unknown_tag_returns_error() {
445        assert!(parse_part("fuchsia").is_err());
446    }
447
448    // --- tokenize ---
449
450    #[test]
451    fn test_tokenize_plain_text() {
452        let tokens = tokenize("hello world").unwrap();
453        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
454    }
455
456    #[test]
457    fn test_tokenize_empty_string() {
458        assert!(tokenize("").unwrap().is_empty());
459    }
460
461    #[test]
462    fn test_tokenize_single_color_tag() {
463        let tokens = tokenize("[red]text").unwrap();
464        assert_eq!(
465            tokens,
466            vec![
467                Token::Tag(TagType::Color {
468                    color: Color::Named(NamedColor::Red),
469                    ground: Ground::Foreground
470                }),
471                Token::Text("text".into()),
472            ]
473        );
474    }
475
476    #[test]
477    fn test_tokenize_bg_color_tag() {
478        let tokens = tokenize("[bg:red]text").unwrap();
479        assert_eq!(
480            tokens,
481            vec![
482                Token::Tag(TagType::Color {
483                    color: Color::Named(NamedColor::Red),
484                    ground: Ground::Background
485                }),
486                Token::Text("text".into()),
487            ]
488        );
489    }
490
491    #[test]
492    fn test_tokenize_fg_and_bg_in_same_bracket() {
493        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
494        assert_eq!(
495            tokens,
496            vec![
497                Token::Tag(TagType::Color {
498                    color: Color::Named(NamedColor::White),
499                    ground: Ground::Foreground
500                }),
501                Token::Tag(TagType::Color {
502                    color: Color::Named(NamedColor::Blue),
503                    ground: Ground::Background
504                }),
505                Token::Text("text".into()),
506            ]
507        );
508    }
509
510    #[test]
511    fn test_tokenize_reset_tag() {
512        assert_eq!(
513            tokenize("[/]").unwrap(),
514            vec![Token::Tag(TagType::Reset(None))]
515        );
516    }
517
518    #[test]
519    fn test_tokenize_compound_tag() {
520        let tokens = tokenize("[bold red]hi").unwrap();
521        assert_eq!(
522            tokens,
523            vec![
524                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
525                Token::Tag(TagType::Color {
526                    color: Color::Named(NamedColor::Red),
527                    ground: Ground::Foreground
528                }),
529                Token::Text("hi".into()),
530            ]
531        );
532    }
533
534    #[test]
535    fn test_tokenize_escaped_bracket_at_start() {
536        let tokens = tokenize("\\[not a tag]").unwrap();
537        assert_eq!(
538            tokens,
539            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
540        );
541    }
542
543    #[test]
544    fn test_tokenize_escaped_bracket_with_prefix() {
545        let tokens = tokenize("before\\[not a tag]").unwrap();
546        assert_eq!(
547            tokens,
548            vec![
549                Token::Text("before".into()),
550                Token::Text("[".into()),
551                Token::Text("not a tag]".into()),
552            ]
553        );
554    }
555
556    #[test]
557    fn test_tokenize_unclosed_tag_returns_error() {
558        assert!(tokenize("[red").is_err());
559    }
560
561    #[test]
562    fn test_tokenize_invalid_tag_name_returns_error() {
563        assert!(tokenize("[fuchsia]").is_err());
564    }
565
566    #[test]
567    fn test_tokenize_text_before_and_after_tag() {
568        let tokens = tokenize("before[red]after").unwrap();
569        assert_eq!(
570            tokens,
571            vec![
572                Token::Text("before".into()),
573                Token::Tag(TagType::Color {
574                    color: Color::Named(NamedColor::Red),
575                    ground: Ground::Foreground
576                }),
577                Token::Text("after".into()),
578            ]
579        );
580    }
581
582    #[test]
583    fn test_tokenize_ansi256_tag() {
584        let tokens = tokenize("[ansi(1)]text").unwrap();
585        assert_eq!(
586            tokens[0],
587            Token::Tag(TagType::Color {
588                color: Color::Ansi256(1),
589                ground: Ground::Foreground,
590            })
591        );
592    }
593
594    #[test]
595    fn test_tokenize_rgb_tag() {
596        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
597        assert_eq!(
598            tokens[0],
599            Token::Tag(TagType::Color {
600                color: Color::Rgb(255, 0, 128),
601                ground: Ground::Foreground,
602            })
603        );
604    }
605
606    #[test]
607    fn test_tokenize_bg_rgb_tag() {
608        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
609        assert_eq!(
610            tokens[0],
611            Token::Tag(TagType::Color {
612                color: Color::Rgb(0, 255, 0),
613                ground: Ground::Background,
614            })
615        );
616    }
617
618    #[test]
619    fn test_parse_part_custom_style_from_registry() {
620        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
621        let result = parse_part("danger").unwrap();
622        assert_eq!(
623            result,
624            vec![
625                TagType::Emphasis(EmphasisType::Bold),
626                TagType::Color {
627                    color: Color::Named(NamedColor::Red),
628                    ground: Ground::Foreground
629                },
630            ]
631        );
632    }
633}