Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use crate::{
11    ansi::{Color, Ground, NamedColor, Style},
12    errors::LexError,
13    registry::search_registry,
14};
15
16/// A text emphasis modifier supported by farben markup.
17#[derive(Debug, PartialEq, Clone)]
18pub enum EmphasisType {
19    /// Reduced intensity (SGR 2).
20    Dim,
21    /// Italic text (SGR 3).
22    Italic,
23    /// Underlined text (SGR 4).
24    Underline,
25    /// Bold text (SGR 1).
26    Bold,
27    /// Crossed-out text (SGR 9).
28    Strikethrough,
29    /// Blinking text (SGR 5). Terminal support varies.
30    Blink,
31}
32
33/// The kind of styling operation a tag represents.
34#[derive(Debug, PartialEq, Clone)]
35pub enum TagType {
36    /// Resets all active styles (`[/]`).
37    Reset(Option<Box<TagType>>),
38    /// Applies a text emphasis attribute.
39    Emphasis(EmphasisType),
40    /// Sets a foreground or background color.
41    Color { color: Color, ground: Ground },
42    /// A literal prefix string injected before the style sequence by the registry.
43    Prefix(String),
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    /// A parsed styling tag.
50    Tag(TagType),
51    /// A run of plain text with no markup.
52    Text(String),
53}
54
55impl EmphasisType {
56    /// Parses an emphasis keyword into an `EmphasisType`.
57    ///
58    /// Returns `None` if the string is not a recognized emphasis name.
59    /// Matching is case-sensitive.
60    fn from_str(input: &str) -> Option<Self> {
61        match input {
62            "dim" => Some(Self::Dim),
63            "italic" => Some(Self::Italic),
64            "underline" => Some(Self::Underline),
65            "bold" => Some(Self::Bold),
66            "strikethrough" => Some(Self::Strikethrough),
67            "blink" => Some(Self::Blink),
68            _ => None,
69        }
70    }
71}
72
73/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
74///
75/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
76/// after the prefix: no emphasis or color tags are emitted.
77fn style_to_tags(style: Style) -> Vec<TagType> {
78    let mut res: Vec<TagType> = Vec::new();
79    let prefix = style.prefix;
80
81    if style.reset {
82        if let Some(p) = prefix {
83            res.push(TagType::Prefix(p));
84        }
85        res.push(TagType::Reset(None));
86        return res;
87    }
88
89    for (enabled, tag) in [
90        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
91        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
92        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
93        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
94        (
95            style.strikethrough,
96            TagType::Emphasis(EmphasisType::Strikethrough),
97        ),
98        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
99    ] {
100        if enabled {
101            res.push(tag);
102        }
103    }
104
105    if let Some(fg) = style.fg {
106        res.push(TagType::Color {
107            color: fg,
108            ground: Ground::Foreground,
109        })
110    }
111    if let Some(bg) = style.bg {
112        res.push(TagType::Color {
113            color: bg,
114            ground: Ground::Background,
115        })
116    }
117
118    if let Some(p) = prefix {
119        res.push(TagType::Prefix(p));
120    }
121
122    res
123}
124
125/// Parses a single whitespace-delimited tag part into a `TagType`.
126///
127/// Recognizes:
128/// - `/` as a reset
129/// - Named colors (`red`, `blue`, etc.)
130/// - Emphasis keywords (`bold`, `italic`, etc.)
131/// - `ansi(N)` for ANSI 256-palette colors
132/// - `rgb(R,G,B)` for true-color values
133/// - A named style from the registry as a fallback
134///
135/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
136/// explicitly target the foreground. Unprefixed color parts default to foreground.
137///
138/// # Errors
139///
140/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
141/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
142/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
143fn parse_part(part: &str) -> Result<Vec<TagType>, LexError> {
144    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
145        (Ground::Background, rest)
146    } else if let Some(rest) = part.strip_prefix("fg:") {
147        (Ground::Foreground, rest)
148    } else {
149        (Ground::Foreground, part)
150    };
151    if let Some(remainder) = part.strip_prefix('/') {
152        if remainder.is_empty() {
153            Ok(vec![TagType::Reset(None)])
154        } else {
155            let inner = parse_part(remainder)?;
156            match inner.as_slice() {
157                [tag] => match tag {
158                    TagType::Reset(_) | TagType::Prefix(_) => Err(LexError::InvalidResetTarget),
159                    _ => Ok(vec![TagType::Reset(Some(Box::new(tag.clone())))]),
160                },
161                _ => Err(LexError::InvalidTag(part.to_string())),
162            }
163        }
164    } else if let Some(color) = NamedColor::from_str(part) {
165        Ok(vec![TagType::Color {
166            color: Color::Named(color),
167            ground,
168        }])
169    } else if let Some(emphasis) = EmphasisType::from_str(part) {
170        Ok(vec![TagType::Emphasis(emphasis)])
171    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
172        match ansi_val.trim().parse::<u8>() {
173            Ok(code) => Ok(vec![TagType::Color {
174                color: Color::Ansi256(code),
175                ground,
176            }]),
177            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
178        }
179    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
180        let parts: Result<Vec<u8>, _> =
181            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
182        match parts {
183            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
184                color: Color::Rgb(v[0], v[1], v[2]),
185                ground,
186            }]),
187            Ok(v) => Err(LexError::InvalidArgumentCount {
188                expected: 3,
189                got: v.len(),
190            }),
191            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
192        }
193    } else {
194        match search_registry(part) {
195            Ok(style) => Ok(style_to_tags(style)),
196            Err(e) => Err(e),
197        }
198    }
199}
200
201/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
202///
203/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
204/// is consumed and does not appear in the output.
205///
206/// # Errors
207///
208/// Propagates any error from `parse_part`.
209fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
210    let nested: Result<Vec<Vec<TagType>>, LexError> =
211        raw_tag.split_whitespace().map(parse_part).collect();
212    Ok(nested?.into_iter().flatten().collect())
213}
214
215/// Tokenizes a farben markup string into a sequence of `Token`s.
216///
217/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
218/// bracket rather than the start of a tag. Text between tags is emitted as
219/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
220///
221/// # Errors
222///
223/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
224/// Returns any error produced by `parse_tag` for malformed tag contents.
225///
226/// # Example
227///
228/// ```ignore
229/// let tokens = tokenize("[red]hello")?;
230/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
231/// //     Token::Text("hello".into())]
232/// ```
233pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
234    let mut tokens: Vec<Token> = Vec::new();
235    let input = input.into();
236    let mut pos = 0;
237    loop {
238        let Some(starting) = input[pos..].find('[') else {
239            if pos < input.len() {
240                tokens.push(Token::Text(input[pos..].to_string()));
241            }
242            break;
243        };
244        let abs_starting = starting + pos;
245        // wtf does this mean
246        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\\') {
247            let before = &input[pos..abs_starting - 1];
248            if !before.is_empty() {
249                tokens.push(Token::Text(before.to_string()));
250            }
251            tokens.push(Token::Text(String::from('[')));
252            pos = abs_starting + 1;
253            continue;
254        }
255
256        if pos != abs_starting {
257            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
258        }
259
260        let Some(closing) = input[abs_starting..].find(']') else {
261            return Err(LexError::UnclosedTag);
262        };
263        let abs_closing = closing + abs_starting;
264        let raw_tag = &input[abs_starting + 1..abs_closing];
265        for tag in parse_tag(raw_tag)? {
266            tokens.push(Token::Tag(tag));
267        }
268        pos = abs_closing + 1;
269    }
270    Ok(tokens)
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276    use crate::ansi::{Color, Ground, NamedColor};
277
278    // --- EmphasisType::from_str ---
279
280    #[test]
281    fn test_emphasis_from_str_all_known() {
282        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
283        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
284        assert_eq!(
285            EmphasisType::from_str("underline"),
286            Some(EmphasisType::Underline)
287        );
288        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
289        assert_eq!(
290            EmphasisType::from_str("strikethrough"),
291            Some(EmphasisType::Strikethrough)
292        );
293        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
294    }
295
296    #[test]
297    fn test_emphasis_from_str_unknown_returns_none() {
298        assert_eq!(EmphasisType::from_str("flash"), None);
299    }
300
301    #[test]
302    fn test_emphasis_from_str_case_sensitive() {
303        assert_eq!(EmphasisType::from_str("Bold"), None);
304    }
305
306    // --- parse_part ---
307
308    #[test]
309    fn test_parse_part_reset() {
310        assert_eq!(parse_part("/").unwrap(), vec![TagType::Reset(None)]);
311    }
312
313    #[test]
314    fn test_parse_part_named_color_foreground_default() {
315        assert_eq!(
316            parse_part("red").unwrap(),
317            vec![TagType::Color {
318                color: Color::Named(NamedColor::Red),
319                ground: Ground::Foreground,
320            }]
321        );
322    }
323
324    #[test]
325    fn test_parse_part_named_color_explicit_fg() {
326        assert_eq!(
327            parse_part("fg:red").unwrap(),
328            vec![TagType::Color {
329                color: Color::Named(NamedColor::Red),
330                ground: Ground::Foreground,
331            }]
332        );
333    }
334
335    #[test]
336    fn test_parse_part_named_color_bg() {
337        assert_eq!(
338            parse_part("bg:red").unwrap(),
339            vec![TagType::Color {
340                color: Color::Named(NamedColor::Red),
341                ground: Ground::Background,
342            }]
343        );
344    }
345
346    #[test]
347    fn test_parse_part_emphasis_bold() {
348        assert_eq!(
349            parse_part("bold").unwrap(),
350            vec![TagType::Emphasis(EmphasisType::Bold)]
351        );
352    }
353
354    #[test]
355    fn test_parse_part_ansi256_valid() {
356        assert_eq!(
357            parse_part("ansi(200)").unwrap(),
358            vec![TagType::Color {
359                color: Color::Ansi256(200),
360                ground: Ground::Foreground,
361            }]
362        );
363    }
364
365    #[test]
366    fn test_parse_part_ansi256_bg() {
367        assert_eq!(
368            parse_part("bg:ansi(200)").unwrap(),
369            vec![TagType::Color {
370                color: Color::Ansi256(200),
371                ground: Ground::Background,
372            }]
373        );
374    }
375
376    #[test]
377    fn test_parse_part_ansi256_with_whitespace() {
378        assert_eq!(
379            parse_part("ansi( 42 )").unwrap(),
380            vec![TagType::Color {
381                color: Color::Ansi256(42),
382                ground: Ground::Foreground,
383            }]
384        );
385    }
386
387    #[test]
388    fn test_parse_part_ansi256_invalid_value() {
389        assert!(parse_part("ansi(abc)").is_err());
390    }
391
392    #[test]
393    fn test_parse_part_rgb_valid() {
394        assert_eq!(
395            parse_part("rgb(255,128,0)").unwrap(),
396            vec![TagType::Color {
397                color: Color::Rgb(255, 128, 0),
398                ground: Ground::Foreground,
399            }]
400        );
401    }
402
403    #[test]
404    fn test_parse_part_rgb_bg() {
405        assert_eq!(
406            parse_part("bg:rgb(255,128,0)").unwrap(),
407            vec![TagType::Color {
408                color: Color::Rgb(255, 128, 0),
409                ground: Ground::Background,
410            }]
411        );
412    }
413
414    #[test]
415    fn test_parse_part_rgb_with_spaces() {
416        assert_eq!(
417            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
418            vec![TagType::Color {
419                color: Color::Rgb(10, 20, 30),
420                ground: Ground::Foreground,
421            }]
422        );
423    }
424
425    #[test]
426    fn test_parse_part_rgb_wrong_arg_count() {
427        let result = parse_part("rgb(1,2)");
428        assert!(result.is_err());
429        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
430            assert_eq!(expected, 3);
431            assert_eq!(got, 2);
432        }
433    }
434
435    #[test]
436    fn test_parse_part_rgb_invalid_value() {
437        assert!(parse_part("rgb(r,g,b)").is_err());
438    }
439
440    #[test]
441    fn test_parse_part_unknown_tag_returns_error() {
442        assert!(parse_part("fuchsia").is_err());
443    }
444
445    // --- tokenize ---
446
447    #[test]
448    fn test_tokenize_plain_text() {
449        let tokens = tokenize("hello world").unwrap();
450        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
451    }
452
453    #[test]
454    fn test_tokenize_empty_string() {
455        assert!(tokenize("").unwrap().is_empty());
456    }
457
458    #[test]
459    fn test_tokenize_single_color_tag() {
460        let tokens = tokenize("[red]text").unwrap();
461        assert_eq!(
462            tokens,
463            vec![
464                Token::Tag(TagType::Color {
465                    color: Color::Named(NamedColor::Red),
466                    ground: Ground::Foreground
467                }),
468                Token::Text("text".into()),
469            ]
470        );
471    }
472
473    #[test]
474    fn test_tokenize_bg_color_tag() {
475        let tokens = tokenize("[bg:red]text").unwrap();
476        assert_eq!(
477            tokens,
478            vec![
479                Token::Tag(TagType::Color {
480                    color: Color::Named(NamedColor::Red),
481                    ground: Ground::Background
482                }),
483                Token::Text("text".into()),
484            ]
485        );
486    }
487
488    #[test]
489    fn test_tokenize_fg_and_bg_in_same_bracket() {
490        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
491        assert_eq!(
492            tokens,
493            vec![
494                Token::Tag(TagType::Color {
495                    color: Color::Named(NamedColor::White),
496                    ground: Ground::Foreground
497                }),
498                Token::Tag(TagType::Color {
499                    color: Color::Named(NamedColor::Blue),
500                    ground: Ground::Background
501                }),
502                Token::Text("text".into()),
503            ]
504        );
505    }
506
507    #[test]
508    fn test_tokenize_reset_tag() {
509        assert_eq!(
510            tokenize("[/]").unwrap(),
511            vec![Token::Tag(TagType::Reset(None))]
512        );
513    }
514
515    #[test]
516    fn test_tokenize_compound_tag() {
517        let tokens = tokenize("[bold red]hi").unwrap();
518        assert_eq!(
519            tokens,
520            vec![
521                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
522                Token::Tag(TagType::Color {
523                    color: Color::Named(NamedColor::Red),
524                    ground: Ground::Foreground
525                }),
526                Token::Text("hi".into()),
527            ]
528        );
529    }
530
531    #[test]
532    fn test_tokenize_escaped_bracket_at_start() {
533        let tokens = tokenize("\\[not a tag]").unwrap();
534        assert_eq!(
535            tokens,
536            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
537        );
538    }
539
540    #[test]
541    fn test_tokenize_escaped_bracket_with_prefix() {
542        let tokens = tokenize("before\\[not a tag]").unwrap();
543        assert_eq!(
544            tokens,
545            vec![
546                Token::Text("before".into()),
547                Token::Text("[".into()),
548                Token::Text("not a tag]".into()),
549            ]
550        );
551    }
552
553    #[test]
554    fn test_tokenize_unclosed_tag_returns_error() {
555        assert!(tokenize("[red").is_err());
556    }
557
558    #[test]
559    fn test_tokenize_invalid_tag_name_returns_error() {
560        assert!(tokenize("[fuchsia]").is_err());
561    }
562
563    #[test]
564    fn test_tokenize_text_before_and_after_tag() {
565        let tokens = tokenize("before[red]after").unwrap();
566        assert_eq!(
567            tokens,
568            vec![
569                Token::Text("before".into()),
570                Token::Tag(TagType::Color {
571                    color: Color::Named(NamedColor::Red),
572                    ground: Ground::Foreground
573                }),
574                Token::Text("after".into()),
575            ]
576        );
577    }
578
579    #[test]
580    fn test_tokenize_ansi256_tag() {
581        let tokens = tokenize("[ansi(1)]text").unwrap();
582        assert_eq!(
583            tokens[0],
584            Token::Tag(TagType::Color {
585                color: Color::Ansi256(1),
586                ground: Ground::Foreground,
587            })
588        );
589    }
590
591    #[test]
592    fn test_tokenize_rgb_tag() {
593        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
594        assert_eq!(
595            tokens[0],
596            Token::Tag(TagType::Color {
597                color: Color::Rgb(255, 0, 128),
598                ground: Ground::Foreground,
599            })
600        );
601    }
602
603    #[test]
604    fn test_tokenize_bg_rgb_tag() {
605        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
606        assert_eq!(
607            tokens[0],
608            Token::Tag(TagType::Color {
609                color: Color::Rgb(0, 255, 0),
610                ground: Ground::Background,
611            })
612        );
613    }
614
615    #[test]
616    fn test_parse_part_custom_style_from_registry() {
617        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
618        let result = parse_part("danger").unwrap();
619        assert_eq!(
620            result,
621            vec![
622                TagType::Emphasis(EmphasisType::Bold),
623                TagType::Color {
624                    color: Color::Named(NamedColor::Red),
625                    ground: Ground::Foreground
626                },
627            ]
628        );
629    }
630}