Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use crate::{
11    ansi::{Color, Ground, NamedColor, Style},
12    errors::LexError,
13    registry::search_registry,
14};
15
16/// A text emphasis modifier supported by farben markup.
17#[derive(Debug, PartialEq)]
18pub enum EmphasisType {
19    /// Reduced intensity (SGR 2).
20    Dim,
21    /// Italic text (SGR 3).
22    Italic,
23    /// Underlined text (SGR 4).
24    Underline,
25    /// Bold text (SGR 1).
26    Bold,
27    /// Crossed-out text (SGR 9).
28    Strikethrough,
29    /// Blinking text (SGR 5). Terminal support varies.
30    Blink,
31}
32
33/// The kind of styling operation a tag represents.
34#[derive(Debug, PartialEq)]
35pub enum TagType {
36    /// Resets all active styles (`[/]`).
37    Reset,
38    /// Applies a text emphasis attribute.
39    Emphasis(EmphasisType),
40    /// Sets a foreground or background color.
41    Color { color: Color, ground: Ground },
42    /// A literal prefix string injected before the style sequence by the registry.
43    Prefix(String),
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    /// A parsed styling tag.
50    Tag(TagType),
51    /// A run of plain text with no markup.
52    Text(String),
53}
54
55impl EmphasisType {
56    /// Parses an emphasis keyword into an `EmphasisType`.
57    ///
58    /// Returns `None` if the string is not a recognized emphasis name.
59    /// Matching is case-sensitive.
60    fn from_str(input: &str) -> Option<Self> {
61        match input {
62            "dim" => Some(Self::Dim),
63            "italic" => Some(Self::Italic),
64            "underline" => Some(Self::Underline),
65            "bold" => Some(Self::Bold),
66            "strikethrough" => Some(Self::Strikethrough),
67            "blink" => Some(Self::Blink),
68            _ => None,
69        }
70    }
71}
72
73/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
74///
75/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
76/// after the prefix: no emphasis or color tags are emitted.
77fn style_to_tags(style: Style) -> Vec<TagType> {
78    let mut res: Vec<TagType> = Vec::new();
79    let prefix = style.prefix;
80
81    if style.reset {
82        if let Some(p) = prefix {
83            res.push(TagType::Prefix(p));
84        }
85        res.push(TagType::Reset);
86        return res;
87    }
88
89    for (enabled, tag) in [
90        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
91        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
92        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
93        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
94        (
95            style.strikethrough,
96            TagType::Emphasis(EmphasisType::Strikethrough),
97        ),
98        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
99    ] {
100        if enabled {
101            res.push(tag);
102        }
103    }
104
105    if let Some(fg) = style.fg {
106        res.push(TagType::Color {
107            color: fg,
108            ground: Ground::Foreground,
109        })
110    }
111    if let Some(bg) = style.bg {
112        res.push(TagType::Color {
113            color: bg,
114            ground: Ground::Background,
115        })
116    }
117
118    if let Some(p) = prefix {
119        res.push(TagType::Prefix(p));
120    }
121
122    res
123}
124
125/// Parses a single whitespace-delimited tag part into a `TagType`.
126///
127/// Recognizes:
128/// - `/` as a reset
129/// - Named colors (`red`, `blue`, etc.)
130/// - Emphasis keywords (`bold`, `italic`, etc.)
131/// - `ansi(N)` for ANSI 256-palette colors
132/// - `rgb(R,G,B)` for true-color values
133/// - A named style from the registry as a fallback
134///
135/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
136/// explicitly target the foreground. Unprefixed color parts default to foreground.
137///
138/// # Errors
139///
140/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
141/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
142/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
143fn parse_part(part: &str) -> Result<Vec<TagType>, LexError> {
144    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
145        (Ground::Background, rest)
146    } else if let Some(rest) = part.strip_prefix("fg:") {
147        (Ground::Foreground, rest)
148    } else {
149        (Ground::Foreground, part)
150    };
151    if part == "/" {
152        Ok(vec![TagType::Reset])
153    } else if let Some(color) = NamedColor::from_str(part) {
154        Ok(vec![TagType::Color {
155            color: Color::Named(color),
156            ground,
157        }])
158    } else if let Some(emphasis) = EmphasisType::from_str(part) {
159        Ok(vec![TagType::Emphasis(emphasis)])
160    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
161        match ansi_val.trim().parse::<u8>() {
162            Ok(code) => Ok(vec![TagType::Color {
163                color: Color::Ansi256(code),
164                ground,
165            }]),
166            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
167        }
168    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
169        let parts: Result<Vec<u8>, _> =
170            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
171        match parts {
172            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
173                color: Color::Rgb(v[0], v[1], v[2]),
174                ground,
175            }]),
176            Ok(v) => Err(LexError::InvalidArgumentCount {
177                expected: 3,
178                got: v.len(),
179            }),
180            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
181        }
182    } else {
183        match search_registry(part) {
184            Ok(style) => Ok(style_to_tags(style)),
185            Err(e) => Err(e),
186        }
187    }
188}
189
190/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
191///
192/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
193/// is consumed and does not appear in the output.
194///
195/// # Errors
196///
197/// Propagates any error from `parse_part`.
198fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
199    let nested: Result<Vec<Vec<TagType>>, LexError> =
200        raw_tag.split_whitespace().map(parse_part).collect();
201    Ok(nested?.into_iter().flatten().collect())
202}
203
204/// Tokenizes a farben markup string into a sequence of `Token`s.
205///
206/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
207/// bracket rather than the start of a tag. Text between tags is emitted as
208/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
209///
210/// # Errors
211///
212/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
213/// Returns any error produced by `parse_tag` for malformed tag contents.
214///
215/// # Example
216///
217/// ```ignore
218/// let tokens = tokenize("[red]hello")?;
219/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
220/// //     Token::Text("hello".into())]
221/// ```
222pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
223    let mut tokens: Vec<Token> = Vec::new();
224    let input = input.into();
225    let mut pos = 0;
226    loop {
227        let Some(starting) = input[pos..].find('[') else {
228            if pos < input.len() {
229                tokens.push(Token::Text(input[pos..].to_string()));
230            }
231            break;
232        };
233        let abs_starting = starting + pos;
234        // escape logic
235        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
236            let before = &input[pos..abs_starting - 1];
237            if !before.is_empty() {
238                tokens.push(Token::Text(before.to_string()));
239            }
240            tokens.push(Token::Text(String::from('[')));
241            pos = abs_starting + 1;
242            continue;
243        }
244
245        if pos != abs_starting {
246            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
247        }
248
249        let Some(closing) = input[abs_starting..].find(']') else {
250            return Err(LexError::UnclosedTag);
251        };
252        let abs_closing = closing + abs_starting;
253        let raw_tag = &input[abs_starting + 1..abs_closing];
254        for tag in parse_tag(raw_tag)? {
255            tokens.push(Token::Tag(tag));
256        }
257        pos = abs_closing + 1;
258    }
259    Ok(tokens)
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265    use crate::ansi::{Color, Ground, NamedColor};
266
267    // --- EmphasisType::from_str ---
268
269    #[test]
270    fn test_emphasis_from_str_all_known() {
271        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
272        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
273        assert_eq!(
274            EmphasisType::from_str("underline"),
275            Some(EmphasisType::Underline)
276        );
277        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
278        assert_eq!(
279            EmphasisType::from_str("strikethrough"),
280            Some(EmphasisType::Strikethrough)
281        );
282        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
283    }
284
285    #[test]
286    fn test_emphasis_from_str_unknown_returns_none() {
287        assert_eq!(EmphasisType::from_str("flash"), None);
288    }
289
290    #[test]
291    fn test_emphasis_from_str_case_sensitive() {
292        assert_eq!(EmphasisType::from_str("Bold"), None);
293    }
294
295    // --- parse_part ---
296
297    #[test]
298    fn test_parse_part_reset() {
299        assert_eq!(parse_part("/").unwrap(), vec![TagType::Reset]);
300    }
301
302    #[test]
303    fn test_parse_part_named_color_foreground_default() {
304        assert_eq!(
305            parse_part("red").unwrap(),
306            vec![TagType::Color {
307                color: Color::Named(NamedColor::Red),
308                ground: Ground::Foreground,
309            }]
310        );
311    }
312
313    #[test]
314    fn test_parse_part_named_color_explicit_fg() {
315        assert_eq!(
316            parse_part("fg:red").unwrap(),
317            vec![TagType::Color {
318                color: Color::Named(NamedColor::Red),
319                ground: Ground::Foreground,
320            }]
321        );
322    }
323
324    #[test]
325    fn test_parse_part_named_color_bg() {
326        assert_eq!(
327            parse_part("bg:red").unwrap(),
328            vec![TagType::Color {
329                color: Color::Named(NamedColor::Red),
330                ground: Ground::Background,
331            }]
332        );
333    }
334
335    #[test]
336    fn test_parse_part_emphasis_bold() {
337        assert_eq!(
338            parse_part("bold").unwrap(),
339            vec![TagType::Emphasis(EmphasisType::Bold)]
340        );
341    }
342
343    #[test]
344    fn test_parse_part_ansi256_valid() {
345        assert_eq!(
346            parse_part("ansi(200)").unwrap(),
347            vec![TagType::Color {
348                color: Color::Ansi256(200),
349                ground: Ground::Foreground,
350            }]
351        );
352    }
353
354    #[test]
355    fn test_parse_part_ansi256_bg() {
356        assert_eq!(
357            parse_part("bg:ansi(200)").unwrap(),
358            vec![TagType::Color {
359                color: Color::Ansi256(200),
360                ground: Ground::Background,
361            }]
362        );
363    }
364
365    #[test]
366    fn test_parse_part_ansi256_with_whitespace() {
367        assert_eq!(
368            parse_part("ansi( 42 )").unwrap(),
369            vec![TagType::Color {
370                color: Color::Ansi256(42),
371                ground: Ground::Foreground,
372            }]
373        );
374    }
375
376    #[test]
377    fn test_parse_part_ansi256_invalid_value() {
378        assert!(parse_part("ansi(abc)").is_err());
379    }
380
381    #[test]
382    fn test_parse_part_rgb_valid() {
383        assert_eq!(
384            parse_part("rgb(255,128,0)").unwrap(),
385            vec![TagType::Color {
386                color: Color::Rgb(255, 128, 0),
387                ground: Ground::Foreground,
388            }]
389        );
390    }
391
392    #[test]
393    fn test_parse_part_rgb_bg() {
394        assert_eq!(
395            parse_part("bg:rgb(255,128,0)").unwrap(),
396            vec![TagType::Color {
397                color: Color::Rgb(255, 128, 0),
398                ground: Ground::Background,
399            }]
400        );
401    }
402
403    #[test]
404    fn test_parse_part_rgb_with_spaces() {
405        assert_eq!(
406            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
407            vec![TagType::Color {
408                color: Color::Rgb(10, 20, 30),
409                ground: Ground::Foreground,
410            }]
411        );
412    }
413
414    #[test]
415    fn test_parse_part_rgb_wrong_arg_count() {
416        let result = parse_part("rgb(1,2)");
417        assert!(result.is_err());
418        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
419            assert_eq!(expected, 3);
420            assert_eq!(got, 2);
421        }
422    }
423
424    #[test]
425    fn test_parse_part_rgb_invalid_value() {
426        assert!(parse_part("rgb(r,g,b)").is_err());
427    }
428
429    #[test]
430    fn test_parse_part_unknown_tag_returns_error() {
431        assert!(parse_part("fuchsia").is_err());
432    }
433
434    // --- tokenize ---
435
436    #[test]
437    fn test_tokenize_plain_text() {
438        let tokens = tokenize("hello world").unwrap();
439        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
440    }
441
442    #[test]
443    fn test_tokenize_empty_string() {
444        assert!(tokenize("").unwrap().is_empty());
445    }
446
447    #[test]
448    fn test_tokenize_single_color_tag() {
449        let tokens = tokenize("[red]text").unwrap();
450        assert_eq!(
451            tokens,
452            vec![
453                Token::Tag(TagType::Color {
454                    color: Color::Named(NamedColor::Red),
455                    ground: Ground::Foreground
456                }),
457                Token::Text("text".into()),
458            ]
459        );
460    }
461
462    #[test]
463    fn test_tokenize_bg_color_tag() {
464        let tokens = tokenize("[bg:red]text").unwrap();
465        assert_eq!(
466            tokens,
467            vec![
468                Token::Tag(TagType::Color {
469                    color: Color::Named(NamedColor::Red),
470                    ground: Ground::Background
471                }),
472                Token::Text("text".into()),
473            ]
474        );
475    }
476
477    #[test]
478    fn test_tokenize_fg_and_bg_in_same_bracket() {
479        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
480        assert_eq!(
481            tokens,
482            vec![
483                Token::Tag(TagType::Color {
484                    color: Color::Named(NamedColor::White),
485                    ground: Ground::Foreground
486                }),
487                Token::Tag(TagType::Color {
488                    color: Color::Named(NamedColor::Blue),
489                    ground: Ground::Background
490                }),
491                Token::Text("text".into()),
492            ]
493        );
494    }
495
496    #[test]
497    fn test_tokenize_reset_tag() {
498        assert_eq!(tokenize("[/]").unwrap(), vec![Token::Tag(TagType::Reset)]);
499    }
500
501    #[test]
502    fn test_tokenize_compound_tag() {
503        let tokens = tokenize("[bold red]hi").unwrap();
504        assert_eq!(
505            tokens,
506            vec![
507                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
508                Token::Tag(TagType::Color {
509                    color: Color::Named(NamedColor::Red),
510                    ground: Ground::Foreground
511                }),
512                Token::Text("hi".into()),
513            ]
514        );
515    }
516
517    #[test]
518    fn test_tokenize_escaped_bracket_at_start() {
519        let tokens = tokenize("\\[not a tag]").unwrap();
520        assert_eq!(
521            tokens,
522            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
523        );
524    }
525
526    #[test]
527    fn test_tokenize_escaped_bracket_with_prefix() {
528        let tokens = tokenize("before\\[not a tag]").unwrap();
529        assert_eq!(
530            tokens,
531            vec![
532                Token::Text("before".into()),
533                Token::Text("[".into()),
534                Token::Text("not a tag]".into()),
535            ]
536        );
537    }
538
539    #[test]
540    fn test_tokenize_unclosed_tag_returns_error() {
541        assert!(tokenize("[red").is_err());
542    }
543
544    #[test]
545    fn test_tokenize_invalid_tag_name_returns_error() {
546        assert!(tokenize("[fuchsia]").is_err());
547    }
548
549    #[test]
550    fn test_tokenize_text_before_and_after_tag() {
551        let tokens = tokenize("before[red]after").unwrap();
552        assert_eq!(
553            tokens,
554            vec![
555                Token::Text("before".into()),
556                Token::Tag(TagType::Color {
557                    color: Color::Named(NamedColor::Red),
558                    ground: Ground::Foreground
559                }),
560                Token::Text("after".into()),
561            ]
562        );
563    }
564
565    #[test]
566    fn test_tokenize_ansi256_tag() {
567        let tokens = tokenize("[ansi(1)]text").unwrap();
568        assert_eq!(
569            tokens[0],
570            Token::Tag(TagType::Color {
571                color: Color::Ansi256(1),
572                ground: Ground::Foreground,
573            })
574        );
575    }
576
577    #[test]
578    fn test_tokenize_rgb_tag() {
579        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
580        assert_eq!(
581            tokens[0],
582            Token::Tag(TagType::Color {
583                color: Color::Rgb(255, 0, 128),
584                ground: Ground::Foreground,
585            })
586        );
587    }
588
589    #[test]
590    fn test_tokenize_bg_rgb_tag() {
591        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
592        assert_eq!(
593            tokens[0],
594            Token::Tag(TagType::Color {
595                color: Color::Rgb(0, 255, 0),
596                ground: Ground::Background,
597            })
598        );
599    }
600
601    #[test]
602    fn test_parse_part_custom_style_from_registry() {
603        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
604        let result = parse_part("danger").unwrap();
605        assert_eq!(
606            result,
607            vec![
608                TagType::Emphasis(EmphasisType::Bold),
609                TagType::Color {
610                    color: Color::Named(NamedColor::Red),
611                    ground: Ground::Foreground
612                },
613            ]
614        );
615    }
616}