Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use crate::{
11    ansi::{Color, Ground, NamedColor, Style},
12    errors::LexError,
13    registry::search_registry,
14};
15
16/// A text emphasis modifier supported by farben markup.
17#[derive(Debug, PartialEq)]
18pub enum EmphasisType {
19    /// Reduced intensity (SGR 2).
20    Dim,
21    /// Italic text (SGR 3).
22    Italic,
23    /// Underlined text (SGR 4).
24    Underline,
25    /// Bold text (SGR 1).
26    Bold,
27    /// Crossed-out text (SGR 9).
28    Strikethrough,
29    /// Blinking text (SGR 5). Terminal support varies.
30    Blink,
31}
32
33/// The kind of styling operation a tag represents.
34#[derive(Debug, PartialEq)]
35pub enum TagType {
36    /// Resets all active styles (`[/]`).
37    Reset,
38    /// Applies a text emphasis attribute.
39    Emphasis(EmphasisType),
40    /// Sets a foreground or background color.
41    Color { color: Color, ground: Ground },
42    /// A literal prefix string injected before the style sequence by the registry.
43    Prefix(String),
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    /// A parsed styling tag.
50    Tag(TagType),
51    /// A run of plain text with no markup.
52    Text(String),
53}
54
55impl EmphasisType {
56    /// Parses an emphasis keyword into an `EmphasisType`.
57    ///
58    /// Returns `None` if the string is not a recognized emphasis name.
59    /// Matching is case-sensitive.
60    fn from_str(input: &str) -> Option<Self> {
61        match input {
62            "dim" => Some(Self::Dim),
63            "italic" => Some(Self::Italic),
64            "underline" => Some(Self::Underline),
65            "bold" => Some(Self::Bold),
66            "strikethrough" => Some(Self::Strikethrough),
67            "blink" => Some(Self::Blink),
68            _ => None,
69        }
70    }
71}
72
73/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
74///
75/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
76/// after the prefix: no emphasis or color tags are emitted.
77fn style_to_tags(style: Style) -> Vec<TagType> {
78    let mut res: Vec<TagType> = Vec::new();
79
80    if let Some(prefix) = style.prefix {
81        res.push(TagType::Prefix(prefix));
82    }
83
84    if style.reset {
85        // We don't directly return Reset because someone might put Prefix.
86        res.push(TagType::Reset);
87        return res;
88    }
89
90    for (enabled, tag) in [
91        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
92        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
93        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
94        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
95        (
96            style.strikethrough,
97            TagType::Emphasis(EmphasisType::Strikethrough),
98        ),
99        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
100    ] {
101        if enabled {
102            res.push(tag);
103        }
104    }
105
106    if let Some(fg) = style.fg {
107        res.push(TagType::Color {
108            color: fg,
109            ground: Ground::Foreground,
110        })
111    }
112    if let Some(bg) = style.bg {
113        res.push(TagType::Color {
114            color: bg,
115            ground: Ground::Background,
116        })
117    }
118
119    res
120}
121
122/// Parses a single whitespace-delimited tag part into a `TagType`.
123///
124/// Recognizes:
125/// - `/` as a reset
126/// - Named colors (`red`, `blue`, etc.)
127/// - Emphasis keywords (`bold`, `italic`, etc.)
128/// - `ansi(N)` for ANSI 256-palette colors
129/// - `rgb(R,G,B)` for true-color values
130/// - A named style from the registry as a fallback
131///
132/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
133/// explicitly target the foreground. Unprefixed color parts default to foreground.
134///
135/// # Errors
136///
137/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
138/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
139/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
140fn parse_part(part: &str) -> Result<Vec<TagType>, LexError> {
141    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
142        (Ground::Background, rest)
143    } else if let Some(rest) = part.strip_prefix("fg:") {
144        (Ground::Foreground, rest)
145    } else {
146        (Ground::Foreground, part)
147    };
148    if part == "/" {
149        Ok(vec![TagType::Reset])
150    } else if let Some(color) = NamedColor::from_str(part) {
151        Ok(vec![TagType::Color {
152            color: Color::Named(color),
153            ground,
154        }])
155    } else if let Some(emphasis) = EmphasisType::from_str(part) {
156        Ok(vec![TagType::Emphasis(emphasis)])
157    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
158        match ansi_val.trim().parse::<u8>() {
159            Ok(code) => Ok(vec![TagType::Color {
160                color: Color::Ansi256(code),
161                ground,
162            }]),
163            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
164        }
165    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
166        let parts: Result<Vec<u8>, _> =
167            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
168        match parts {
169            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
170                color: Color::Rgb(v[0], v[1], v[2]),
171                ground,
172            }]),
173            Ok(v) => Err(LexError::InvalidArgumentCount {
174                expected: 3,
175                got: v.len(),
176            }),
177            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
178        }
179    } else {
180        match search_registry(part) {
181            Ok(style) => Ok(style_to_tags(style)),
182            Err(e) => Err(e),
183        }
184    }
185}
186
187/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
188///
189/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
190/// is consumed and does not appear in the output.
191///
192/// # Errors
193///
194/// Propagates any error from `parse_part`.
195fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
196    let nested: Result<Vec<Vec<TagType>>, LexError> =
197        raw_tag.split_whitespace().map(parse_part).collect();
198    Ok(nested?.into_iter().flatten().collect())
199}
200
201/// Tokenizes a farben markup string into a sequence of `Token`s.
202///
203/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
204/// bracket rather than the start of a tag. Text between tags is emitted as
205/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
206///
207/// # Errors
208///
209/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
210/// Returns any error produced by `parse_tag` for malformed tag contents.
211///
212/// # Example
213///
214/// ```ignore
215/// let tokens = tokenize("[red]hello")?;
216/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
217/// //     Token::Text("hello".into())]
218/// ```
219pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
220    let mut tokens: Vec<Token> = Vec::new();
221    let input = input.into();
222    let mut pos = 0;
223    loop {
224        let Some(starting) = input[pos..].find('[') else {
225            if pos < input.len() {
226                tokens.push(Token::Text(input[pos..].to_string()));
227            }
228            break;
229        };
230        let abs_starting = starting + pos;
231        // escape logic
232        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
233            let before = &input[pos..abs_starting - 1];
234            if !before.is_empty() {
235                tokens.push(Token::Text(before.to_string()));
236            }
237            tokens.push(Token::Text(String::from('[')));
238            pos = abs_starting + 1;
239            continue;
240        }
241
242        if pos != abs_starting {
243            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
244        }
245
246        let Some(closing) = input[abs_starting..].find(']') else {
247            return Err(LexError::UnclosedTag);
248        };
249        let abs_closing = closing + abs_starting;
250        let raw_tag = &input[abs_starting + 1..abs_closing];
251        for tag in parse_tag(raw_tag)? {
252            tokens.push(Token::Tag(tag));
253        }
254        pos = abs_closing + 1;
255    }
256    Ok(tokens)
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::ansi::{Color, Ground, NamedColor};
263
264    // --- EmphasisType::from_str ---
265
266    #[test]
267    fn test_emphasis_from_str_all_known() {
268        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
269        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
270        assert_eq!(
271            EmphasisType::from_str("underline"),
272            Some(EmphasisType::Underline)
273        );
274        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
275        assert_eq!(
276            EmphasisType::from_str("strikethrough"),
277            Some(EmphasisType::Strikethrough)
278        );
279        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
280    }
281
282    #[test]
283    fn test_emphasis_from_str_unknown_returns_none() {
284        assert_eq!(EmphasisType::from_str("flash"), None);
285    }
286
287    #[test]
288    fn test_emphasis_from_str_case_sensitive() {
289        assert_eq!(EmphasisType::from_str("Bold"), None);
290    }
291
292    // --- parse_part ---
293
294    #[test]
295    fn test_parse_part_reset() {
296        assert_eq!(parse_part("/").unwrap(), vec![TagType::Reset]);
297    }
298
299    #[test]
300    fn test_parse_part_named_color_foreground_default() {
301        assert_eq!(
302            parse_part("red").unwrap(),
303            vec![TagType::Color {
304                color: Color::Named(NamedColor::Red),
305                ground: Ground::Foreground,
306            }]
307        );
308    }
309
310    #[test]
311    fn test_parse_part_named_color_explicit_fg() {
312        assert_eq!(
313            parse_part("fg:red").unwrap(),
314            vec![TagType::Color {
315                color: Color::Named(NamedColor::Red),
316                ground: Ground::Foreground,
317            }]
318        );
319    }
320
321    #[test]
322    fn test_parse_part_named_color_bg() {
323        assert_eq!(
324            parse_part("bg:red").unwrap(),
325            vec![TagType::Color {
326                color: Color::Named(NamedColor::Red),
327                ground: Ground::Background,
328            }]
329        );
330    }
331
332    #[test]
333    fn test_parse_part_emphasis_bold() {
334        assert_eq!(
335            parse_part("bold").unwrap(),
336            vec![TagType::Emphasis(EmphasisType::Bold)]
337        );
338    }
339
340    #[test]
341    fn test_parse_part_ansi256_valid() {
342        assert_eq!(
343            parse_part("ansi(200)").unwrap(),
344            vec![TagType::Color {
345                color: Color::Ansi256(200),
346                ground: Ground::Foreground,
347            }]
348        );
349    }
350
351    #[test]
352    fn test_parse_part_ansi256_bg() {
353        assert_eq!(
354            parse_part("bg:ansi(200)").unwrap(),
355            vec![TagType::Color {
356                color: Color::Ansi256(200),
357                ground: Ground::Background,
358            }]
359        );
360    }
361
362    #[test]
363    fn test_parse_part_ansi256_with_whitespace() {
364        assert_eq!(
365            parse_part("ansi( 42 )").unwrap(),
366            vec![TagType::Color {
367                color: Color::Ansi256(42),
368                ground: Ground::Foreground,
369            }]
370        );
371    }
372
373    #[test]
374    fn test_parse_part_ansi256_invalid_value() {
375        assert!(parse_part("ansi(abc)").is_err());
376    }
377
378    #[test]
379    fn test_parse_part_rgb_valid() {
380        assert_eq!(
381            parse_part("rgb(255,128,0)").unwrap(),
382            vec![TagType::Color {
383                color: Color::Rgb(255, 128, 0),
384                ground: Ground::Foreground,
385            }]
386        );
387    }
388
389    #[test]
390    fn test_parse_part_rgb_bg() {
391        assert_eq!(
392            parse_part("bg:rgb(255,128,0)").unwrap(),
393            vec![TagType::Color {
394                color: Color::Rgb(255, 128, 0),
395                ground: Ground::Background,
396            }]
397        );
398    }
399
400    #[test]
401    fn test_parse_part_rgb_with_spaces() {
402        assert_eq!(
403            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
404            vec![TagType::Color {
405                color: Color::Rgb(10, 20, 30),
406                ground: Ground::Foreground,
407            }]
408        );
409    }
410
411    #[test]
412    fn test_parse_part_rgb_wrong_arg_count() {
413        let result = parse_part("rgb(1,2)");
414        assert!(result.is_err());
415        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
416            assert_eq!(expected, 3);
417            assert_eq!(got, 2);
418        }
419    }
420
421    #[test]
422    fn test_parse_part_rgb_invalid_value() {
423        assert!(parse_part("rgb(r,g,b)").is_err());
424    }
425
426    #[test]
427    fn test_parse_part_unknown_tag_returns_error() {
428        assert!(parse_part("fuchsia").is_err());
429    }
430
431    // --- tokenize ---
432
433    #[test]
434    fn test_tokenize_plain_text() {
435        let tokens = tokenize("hello world").unwrap();
436        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
437    }
438
439    #[test]
440    fn test_tokenize_empty_string() {
441        assert!(tokenize("").unwrap().is_empty());
442    }
443
444    #[test]
445    fn test_tokenize_single_color_tag() {
446        let tokens = tokenize("[red]text").unwrap();
447        assert_eq!(
448            tokens,
449            vec![
450                Token::Tag(TagType::Color {
451                    color: Color::Named(NamedColor::Red),
452                    ground: Ground::Foreground
453                }),
454                Token::Text("text".into()),
455            ]
456        );
457    }
458
459    #[test]
460    fn test_tokenize_bg_color_tag() {
461        let tokens = tokenize("[bg:red]text").unwrap();
462        assert_eq!(
463            tokens,
464            vec![
465                Token::Tag(TagType::Color {
466                    color: Color::Named(NamedColor::Red),
467                    ground: Ground::Background
468                }),
469                Token::Text("text".into()),
470            ]
471        );
472    }
473
474    #[test]
475    fn test_tokenize_fg_and_bg_in_same_bracket() {
476        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
477        assert_eq!(
478            tokens,
479            vec![
480                Token::Tag(TagType::Color {
481                    color: Color::Named(NamedColor::White),
482                    ground: Ground::Foreground
483                }),
484                Token::Tag(TagType::Color {
485                    color: Color::Named(NamedColor::Blue),
486                    ground: Ground::Background
487                }),
488                Token::Text("text".into()),
489            ]
490        );
491    }
492
493    #[test]
494    fn test_tokenize_reset_tag() {
495        assert_eq!(tokenize("[/]").unwrap(), vec![Token::Tag(TagType::Reset)]);
496    }
497
498    #[test]
499    fn test_tokenize_compound_tag() {
500        let tokens = tokenize("[bold red]hi").unwrap();
501        assert_eq!(
502            tokens,
503            vec![
504                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
505                Token::Tag(TagType::Color {
506                    color: Color::Named(NamedColor::Red),
507                    ground: Ground::Foreground
508                }),
509                Token::Text("hi".into()),
510            ]
511        );
512    }
513
514    #[test]
515    fn test_tokenize_escaped_bracket_at_start() {
516        let tokens = tokenize("\\[not a tag]").unwrap();
517        assert_eq!(
518            tokens,
519            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
520        );
521    }
522
523    #[test]
524    fn test_tokenize_escaped_bracket_with_prefix() {
525        let tokens = tokenize("before\\[not a tag]").unwrap();
526        assert_eq!(
527            tokens,
528            vec![
529                Token::Text("before".into()),
530                Token::Text("[".into()),
531                Token::Text("not a tag]".into()),
532            ]
533        );
534    }
535
536    #[test]
537    fn test_tokenize_unclosed_tag_returns_error() {
538        assert!(tokenize("[red").is_err());
539    }
540
541    #[test]
542    fn test_tokenize_invalid_tag_name_returns_error() {
543        assert!(tokenize("[fuchsia]").is_err());
544    }
545
546    #[test]
547    fn test_tokenize_text_before_and_after_tag() {
548        let tokens = tokenize("before[red]after").unwrap();
549        assert_eq!(
550            tokens,
551            vec![
552                Token::Text("before".into()),
553                Token::Tag(TagType::Color {
554                    color: Color::Named(NamedColor::Red),
555                    ground: Ground::Foreground
556                }),
557                Token::Text("after".into()),
558            ]
559        );
560    }
561
562    #[test]
563    fn test_tokenize_ansi256_tag() {
564        let tokens = tokenize("[ansi(1)]text").unwrap();
565        assert_eq!(
566            tokens[0],
567            Token::Tag(TagType::Color {
568                color: Color::Ansi256(1),
569                ground: Ground::Foreground,
570            })
571        );
572    }
573
574    #[test]
575    fn test_tokenize_rgb_tag() {
576        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
577        assert_eq!(
578            tokens[0],
579            Token::Tag(TagType::Color {
580                color: Color::Rgb(255, 0, 128),
581                ground: Ground::Foreground,
582            })
583        );
584    }
585
586    #[test]
587    fn test_tokenize_bg_rgb_tag() {
588        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
589        assert_eq!(
590            tokens[0],
591            Token::Tag(TagType::Color {
592                color: Color::Rgb(0, 255, 0),
593                ground: Ground::Background,
594            })
595        );
596    }
597
598    #[test]
599    fn test_parse_part_custom_style_from_registry() {
600        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
601        let result = parse_part("danger").unwrap();
602        assert_eq!(
603            result,
604            vec![
605                TagType::Emphasis(EmphasisType::Bold),
606                TagType::Color {
607                    color: Color::Named(NamedColor::Red),
608                    ground: Ground::Foreground
609                },
610            ]
611        );
612    }
613}