Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level `parse_tag` and `parse_part`
8//! functions handle individual tag strings and are not part of the public API.
9
10use std::{borrow::Cow, sync::Arc};
11
12use crate::{
13    ansi::{Color, Ground, NamedColor, Style},
14    errors::LexError,
15    registry::search_registry,
16};
17
18/// A text emphasis modifier supported by farben markup.
19#[derive(Debug, PartialEq, Clone)]
20pub enum EmphasisType {
21    /// Reduced intensity (SGR 2). Lower intensity.
22    Dim,
23    /// Italic text (SGR 3). Slanted text.
24    Italic,
25    /// Underlined text (SGR 4). Single underline.
26    Underline,
27    /// Double-underlined text (SGR 21). Two lines.
28    DoubleUnderline,
29    /// Bold text (SGR 1). Increased intensity.
30    Bold,
31    /// Crossed-out text (SGR 9). Strikethrough.
32    Strikethrough,
33    /// Blinking text (SGR 5). Slow blink.
34    Blink,
35    /// Overlined text (SGR 53). Line above text.
36    Overline,
37    /// Invisible text (SGR 8). Hidden but selectable.
38    Invisible,
39    /// Reverse video (SGR 7). Swaps foreground and background.
40    Reverse,
41    /// Rapid blinking (SGR 6). Faster than Blink.
42    RapidBlink,
43}
44
45/// The kind of styling operation a tag represents.
46#[derive(Debug, PartialEq, Clone)]
47pub enum TagType {
48    /// Resets all active styles (`[/]`).
49    ResetAll,
50    /// Resets one specific active style, then re-applies the rest.
51    /// Example: `[/bold]` resets bold but keeps other active styles.
52    ResetOne(Box<TagType>),
53    /// Applies a text emphasis attribute.
54    Emphasis(EmphasisType),
55    /// Sets a foreground or background color.
56    Color {
57        /// The color to apply.
58        color: Color,
59        /// Whether foreground or background.
60        ground: Ground,
61    },
62    /// A literal prefix string injected before the style sequence by the registry.
63    Prefix(String),
64}
65
66/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
67#[derive(Debug, PartialEq)]
68pub enum Token {
69    /// A parsed styling tag (color, emphasis, reset).
70    Tag(TagType),
71    /// A run of plain text with no markup.
72    Text(Cow<'static, str>),
73}
74
75impl EmphasisType {
76    /// Parses an emphasis keyword into an `EmphasisType`.
77    ///
78    /// Returns `None` if the string is not a recognized emphasis name.
79    /// Matching is case-sensitive.
80    fn from_str(input: &str) -> Option<Self> {
81        match input {
82            "dim" => Some(Self::Dim),
83            "italic" => Some(Self::Italic),
84            "underline" => Some(Self::Underline),
85            "double-underline" => Some(Self::DoubleUnderline),
86            "bold" => Some(Self::Bold),
87            "strikethrough" => Some(Self::Strikethrough),
88            "blink" => Some(Self::Blink),
89            "overline" => Some(Self::Overline),
90            "invisible" => Some(Self::Invisible),
91            "reverse" => Some(Self::Reverse),
92            "rapid-blink" => Some(Self::RapidBlink),
93            _ => None,
94        }
95    }
96}
97
98/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
99///
100/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
101/// after the prefix: no emphasis or color tags are emitted.
102fn style_to_tags(style: Arc<Style>) -> Vec<TagType> {
103    let mut res: Vec<TagType> = Vec::new();
104    let prefix = style.prefix.clone();
105
106    if style.reset {
107        if let Some(p) = prefix {
108            res.push(TagType::Prefix(p));
109        }
110        res.push(TagType::ResetAll);
111        return res;
112    }
113
114    for (enabled, tag) in [
115        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
116        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
117        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
118        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
119        (
120            style.strikethrough,
121            TagType::Emphasis(EmphasisType::Strikethrough),
122        ),
123        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
124        (
125            style.double_underline,
126            TagType::Emphasis(EmphasisType::DoubleUnderline),
127        ),
128        (style.overline, TagType::Emphasis(EmphasisType::Overline)),
129        (style.invisible, TagType::Emphasis(EmphasisType::Invisible)),
130        (style.reverse, TagType::Emphasis(EmphasisType::Reverse)),
131        (
132            style.rapid_blink,
133            TagType::Emphasis(EmphasisType::RapidBlink),
134        ),
135    ] {
136        if enabled {
137            res.push(tag);
138        }
139    }
140
141    if let Some(fg) = style.fg.clone() {
142        res.push(TagType::Color {
143            color: fg,
144            ground: Ground::Foreground,
145        })
146    }
147    if let Some(bg) = style.bg.clone() {
148        res.push(TagType::Color {
149            color: bg,
150            ground: Ground::Background,
151        })
152    }
153
154    if let Some(p) = prefix {
155        res.push(TagType::Prefix(p));
156    }
157
158    res
159}
160
161/// Parses a single whitespace-delimited tag part into a `TagType`.
162///
163/// Recognizes:
164/// - `/` as a reset
165/// - Named colors (`red`, `blue`, etc.)
166/// - Emphasis keywords (`bold`, `italic`, etc.)
167/// - `ansi(N)` for ANSI 256-palette colors
168/// - `rgb(R,G,B)` for true-color values
169/// - A named style from the registry as a fallback
170///
171/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
172/// explicitly target the foreground. Unprefixed color parts default to foreground.
173///
174/// # Errors
175///
176/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
177/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
178/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
179fn parse_part(part: &str, position: usize) -> Result<Vec<TagType>, LexError> {
180    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
181        (Ground::Background, rest)
182    } else if let Some(rest) = part.strip_prefix("fg:") {
183        (Ground::Foreground, rest)
184    } else {
185        (Ground::Foreground, part)
186    };
187    if let Some(remainder) = part.strip_prefix('/') {
188        if remainder.is_empty() {
189            Ok(vec![TagType::ResetAll])
190        } else {
191            let inner = parse_part(remainder, position + 1)?;
192            match inner.as_slice() {
193                [tag] => match tag {
194                    TagType::ResetAll | TagType::ResetOne(_) | TagType::Prefix(_) => {
195                        Err(LexError::InvalidResetTarget(position))
196                    }
197                    _ => Ok(vec![TagType::ResetOne(Box::new(tag.clone()))]),
198                },
199                _ => Err(LexError::InvalidTag {
200                    tag_content: part.to_string(),
201                    position,
202                }),
203            }
204        }
205    } else if let Some(color) = NamedColor::from_str(part) {
206        Ok(vec![TagType::Color {
207            color: Color::Named(color),
208            ground,
209        }])
210    } else if let Some(emphasis) = EmphasisType::from_str(part) {
211        Ok(vec![TagType::Emphasis(emphasis)])
212    } else if let Some(rest) = part.strip_prefix("ansi(") {
213        if !rest.ends_with(')') {
214            return Err(LexError::UnclosedValue(position));
215        }
216        let ansi_val = &rest[..rest.len() - 1];
217        match ansi_val.trim().parse::<u8>() {
218            Ok(code) => Ok(vec![TagType::Color {
219                color: Color::Ansi256(code),
220                ground,
221            }]),
222            Err(_) => Err(LexError::InvalidValue {
223                value: ansi_val.to_string(),
224                position,
225            }),
226        }
227    } else if let Some(rest) = part.strip_prefix("rgb(") {
228        if !rest.ends_with(')') {
229            return Err(LexError::UnclosedValue(position));
230        }
231        let rgb_val = &rest[..rest.len() - 1];
232        let parts: Result<Vec<u8>, _> =
233            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
234        match parts {
235            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
236                color: Color::Rgb(v[0], v[1], v[2]),
237                ground,
238            }]),
239            Ok(v) => Err(LexError::InvalidArgumentCount {
240                expected: 3,
241                got: v.len(),
242                position,
243            }),
244            Err(_) => Err(LexError::InvalidValue {
245                value: rgb_val.to_string(),
246                position,
247            }),
248        }
249    } else {
250        match search_registry(part) {
251            Ok(style) => Ok(style_to_tags(style)),
252            Err(_) => Err(LexError::InvalidTag {
253                tag_content: part.to_string(),
254                position,
255            }),
256        }
257    }
258}
259
260/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
261///
262/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
263/// is consumed and does not appear in the output.
264///
265/// # Errors
266///
267/// Propagates any error from `parse_part`.
268fn parse_tag(raw_tag: &str, tag_start: usize) -> Result<Vec<TagType>, LexError> {
269    let mut result = Vec::new();
270    let mut search_from = 0;
271
272    for part in raw_tag.split_whitespace() {
273        let part_offset = raw_tag[search_from..].find(part).unwrap() + search_from;
274        let abs_position = tag_start + part_offset;
275        result.extend(parse_part(part, abs_position)?);
276        search_from = part_offset + part.len();
277    }
278
279    Ok(result)
280}
281
282/// Tokenizes a farben markup string into a sequence of `Token`s.
283///
284/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
285/// bracket rather than the start of a tag. Text between tags is emitted as
286/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
287///
288/// # Errors
289///
290/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
291/// Returns any error produced by `parse_tag` for malformed tag contents.
292///
293/// # Example
294///
295/// ```ignore
296/// let tokens = tokenize("[red]hello")?;
297/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
298/// //     Token::Text("hello".into())]
299/// ```
300pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
301    let input = input.into();
302    let mut tokens: Vec<Token> = Vec::with_capacity(input.len() / 4);
303    let mut pos = 0;
304    loop {
305        let Some(starting) = input[pos..].find('[') else {
306            if pos < input.len() {
307                tokens.push(Token::Text(Cow::Owned(input[pos..].to_string())));
308            }
309            break;
310        };
311        let abs_starting = starting + pos;
312        // wtf does this mean
313        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\\') {
314            let before = &input[pos..abs_starting - 1];
315            if !before.is_empty() {
316                tokens.push(Token::Text(Cow::Owned(before.to_string())));
317            }
318            tokens.push(Token::Text(Cow::Borrowed("[")));
319            pos = abs_starting + 1;
320            continue;
321        }
322
323        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\x1b')
324        {
325            tokens.push(Token::Text(Cow::Owned(
326                input[pos..abs_starting + 1].to_string(),
327            )));
328            pos = abs_starting + 1;
329            continue;
330        }
331
332        if pos != abs_starting {
333            tokens.push(Token::Text(Cow::Owned(
334                input[pos..abs_starting].to_string(),
335            )));
336        }
337
338        let Some(closing) = input[abs_starting..].find(']') else {
339            return Err(LexError::UnclosedTag(abs_starting));
340        };
341        let abs_closing = closing + abs_starting;
342        let raw_tag = &input[abs_starting + 1..abs_closing];
343        for tag in parse_tag(raw_tag, abs_starting)? {
344            tokens.push(Token::Tag(tag));
345        }
346        pos = abs_closing + 1;
347    }
348    Ok(tokens)
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354    use crate::ansi::{Color, Ground, NamedColor};
355
356    // --- EmphasisType::from_str ---
357
358    #[test]
359    fn test_emphasis_from_str_all_known() {
360        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
361        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
362        assert_eq!(
363            EmphasisType::from_str("underline"),
364            Some(EmphasisType::Underline)
365        );
366        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
367        assert_eq!(
368            EmphasisType::from_str("strikethrough"),
369            Some(EmphasisType::Strikethrough)
370        );
371        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
372    }
373
374    #[test]
375    fn test_emphasis_from_str_unknown_returns_none() {
376        assert_eq!(EmphasisType::from_str("flash"), None);
377    }
378
379    #[test]
380    fn test_emphasis_from_str_case_sensitive() {
381        assert_eq!(EmphasisType::from_str("Bold"), None);
382    }
383
384    // --- parse_part ---
385
386    #[test]
387    fn test_parse_part_reset() {
388        assert_eq!(parse_part("/", 0).unwrap(), vec![TagType::ResetAll]);
389    }
390
391    #[test]
392    fn test_parse_part_named_color_foreground_default() {
393        assert_eq!(
394            parse_part("red", 0).unwrap(),
395            vec![TagType::Color {
396                color: Color::Named(NamedColor::Red),
397                ground: Ground::Foreground,
398            }]
399        );
400    }
401
402    #[test]
403    fn test_parse_part_named_color_explicit_fg() {
404        assert_eq!(
405            parse_part("fg:red", 0).unwrap(),
406            vec![TagType::Color {
407                color: Color::Named(NamedColor::Red),
408                ground: Ground::Foreground,
409            }]
410        );
411    }
412
413    #[test]
414    fn test_parse_part_named_color_bg() {
415        assert_eq!(
416            parse_part("bg:red", 0).unwrap(),
417            vec![TagType::Color {
418                color: Color::Named(NamedColor::Red),
419                ground: Ground::Background,
420            }]
421        );
422    }
423
424    #[test]
425    fn test_parse_part_emphasis_bold() {
426        assert_eq!(
427            parse_part("bold", 0).unwrap(),
428            vec![TagType::Emphasis(EmphasisType::Bold)]
429        );
430    }
431
432    #[test]
433    fn test_parse_part_ansi256_valid() {
434        assert_eq!(
435            parse_part("ansi(200)", 0).unwrap(),
436            vec![TagType::Color {
437                color: Color::Ansi256(200),
438                ground: Ground::Foreground,
439            }]
440        );
441    }
442
443    #[test]
444    fn test_parse_part_ansi256_bg() {
445        assert_eq!(
446            parse_part("bg:ansi(200)", 0).unwrap(),
447            vec![TagType::Color {
448                color: Color::Ansi256(200),
449                ground: Ground::Background,
450            }]
451        );
452    }
453
454    #[test]
455    fn test_parse_part_ansi256_with_whitespace() {
456        assert_eq!(
457            parse_part("ansi( 42 )", 0).unwrap(),
458            vec![TagType::Color {
459                color: Color::Ansi256(42),
460                ground: Ground::Foreground,
461            }]
462        );
463    }
464
465    #[test]
466    fn test_parse_part_ansi256_invalid_value() {
467        assert!(parse_part("ansi(abc)", 0).is_err());
468    }
469
470    #[test]
471    fn test_parse_part_rgb_valid() {
472        assert_eq!(
473            parse_part("rgb(255,128,0)", 0).unwrap(),
474            vec![TagType::Color {
475                color: Color::Rgb(255, 128, 0),
476                ground: Ground::Foreground,
477            }]
478        );
479    }
480
481    #[test]
482    fn test_parse_part_rgb_bg() {
483        assert_eq!(
484            parse_part("bg:rgb(255,128,0)", 0).unwrap(),
485            vec![TagType::Color {
486                color: Color::Rgb(255, 128, 0),
487                ground: Ground::Background,
488            }]
489        );
490    }
491
492    #[test]
493    fn test_parse_part_rgb_with_spaces() {
494        assert_eq!(
495            parse_part("rgb( 10 , 20 , 30 )", 0).unwrap(),
496            vec![TagType::Color {
497                color: Color::Rgb(10, 20, 30),
498                ground: Ground::Foreground,
499            }]
500        );
501    }
502
503    #[test]
504    fn test_parse_part_rgb_wrong_arg_count() {
505        let result = parse_part("rgb(1,2)", 0);
506        assert!(result.is_err());
507        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got, .. }) = result {
508            assert_eq!(expected, 3);
509            assert_eq!(got, 2);
510        }
511    }
512
513    #[test]
514    fn test_parse_part_rgb_invalid_value() {
515        assert!(parse_part("rgb(r,g,b)", 0).is_err());
516    }
517
518    #[test]
519    fn test_parse_part_unknown_tag_returns_error() {
520        assert!(parse_part("fuchsia", 0).is_err());
521    }
522
523    // --- tokenize ---
524
525    #[test]
526    fn test_tokenize_plain_text() {
527        let tokens = tokenize("hello world").unwrap();
528        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
529    }
530
531    #[test]
532    fn test_tokenize_empty_string() {
533        assert!(tokenize("").unwrap().is_empty());
534    }
535
536    #[test]
537    fn test_tokenize_single_color_tag() {
538        let tokens = tokenize("[red]text").unwrap();
539        assert_eq!(
540            tokens,
541            vec![
542                Token::Tag(TagType::Color {
543                    color: Color::Named(NamedColor::Red),
544                    ground: Ground::Foreground
545                }),
546                Token::Text("text".into()),
547            ]
548        );
549    }
550
551    #[test]
552    fn test_tokenize_bg_color_tag() {
553        let tokens = tokenize("[bg:red]text").unwrap();
554        assert_eq!(
555            tokens,
556            vec![
557                Token::Tag(TagType::Color {
558                    color: Color::Named(NamedColor::Red),
559                    ground: Ground::Background
560                }),
561                Token::Text("text".into()),
562            ]
563        );
564    }
565
566    #[test]
567    fn test_tokenize_fg_and_bg_in_same_bracket() {
568        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
569        assert_eq!(
570            tokens,
571            vec![
572                Token::Tag(TagType::Color {
573                    color: Color::Named(NamedColor::White),
574                    ground: Ground::Foreground
575                }),
576                Token::Tag(TagType::Color {
577                    color: Color::Named(NamedColor::Blue),
578                    ground: Ground::Background
579                }),
580                Token::Text("text".into()),
581            ]
582        );
583    }
584
585    #[test]
586    fn test_tokenize_reset_tag() {
587        assert_eq!(
588            tokenize("[/]").unwrap(),
589            vec![Token::Tag(TagType::ResetAll)]
590        );
591    }
592
593    #[test]
594    fn test_tokenize_compound_tag() {
595        let tokens = tokenize("[bold red]hi").unwrap();
596        assert_eq!(
597            tokens,
598            vec![
599                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
600                Token::Tag(TagType::Color {
601                    color: Color::Named(NamedColor::Red),
602                    ground: Ground::Foreground
603                }),
604                Token::Text("hi".into()),
605            ]
606        );
607    }
608
609    #[test]
610    fn test_tokenize_escaped_bracket_at_start() {
611        let tokens = tokenize("\\[not a tag]").unwrap();
612        assert_eq!(
613            tokens,
614            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
615        );
616    }
617
618    #[test]
619    fn test_tokenize_escaped_bracket_with_prefix() {
620        let tokens = tokenize("before\\[not a tag]").unwrap();
621        assert_eq!(
622            tokens,
623            vec![
624                Token::Text("before".into()),
625                Token::Text("[".into()),
626                Token::Text("not a tag]".into()),
627            ]
628        );
629    }
630
631    #[test]
632    fn test_tokenize_unclosed_tag_returns_error() {
633        assert!(tokenize("[red").is_err());
634    }
635
636    #[test]
637    fn test_tokenize_invalid_tag_name_returns_error() {
638        assert!(tokenize("[fuchsia]").is_err());
639    }
640
641    #[test]
642    fn test_tokenize_text_before_and_after_tag() {
643        let tokens = tokenize("before[red]after").unwrap();
644        assert_eq!(
645            tokens,
646            vec![
647                Token::Text("before".into()),
648                Token::Tag(TagType::Color {
649                    color: Color::Named(NamedColor::Red),
650                    ground: Ground::Foreground
651                }),
652                Token::Text("after".into()),
653            ]
654        );
655    }
656
657    #[test]
658    fn test_tokenize_ansi256_tag() {
659        let tokens = tokenize("[ansi(1)]text").unwrap();
660        assert_eq!(
661            tokens[0],
662            Token::Tag(TagType::Color {
663                color: Color::Ansi256(1),
664                ground: Ground::Foreground,
665            })
666        );
667    }
668
669    #[test]
670    fn test_tokenize_rgb_tag() {
671        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
672        assert_eq!(
673            tokens[0],
674            Token::Tag(TagType::Color {
675                color: Color::Rgb(255, 0, 128),
676                ground: Ground::Foreground,
677            })
678        );
679    }
680
681    #[test]
682    fn test_tokenize_bg_rgb_tag() {
683        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
684        assert_eq!(
685            tokens[0],
686            Token::Tag(TagType::Color {
687                color: Color::Rgb(0, 255, 0),
688                ground: Ground::Background,
689            })
690        );
691    }
692
693    #[test]
694    fn test_parse_part_custom_style_from_registry() {
695        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
696        let result = parse_part("danger", 0).unwrap();
697        assert_eq!(
698            result,
699            vec![
700                TagType::Emphasis(EmphasisType::Bold),
701                TagType::Color {
702                    color: Color::Named(NamedColor::Red),
703                    ground: Ground::Foreground
704                },
705            ]
706        );
707    }
708}