Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use std::{borrow::Cow, sync::Arc};
11
12use crate::{
13    ansi::{Color, Ground, NamedColor, Style},
14    errors::LexError,
15    registry::search_registry,
16};
17
18/// A text emphasis modifier supported by farben markup.
19#[derive(Debug, PartialEq, Clone)]
20pub enum EmphasisType {
21    /// Reduced intensity (SGR 2).
22    Dim,
23    /// Italic text (SGR 3).
24    Italic,
25    /// Underlined text (SGR 4).
26    Underline,
27    /// Bold text (SGR 1).
28    Bold,
29    /// Crossed-out text (SGR 9).
30    Strikethrough,
31    /// Blinking text (SGR 5). Terminal support varies.
32    Blink,
33}
34
35/// The kind of styling operation a tag represents.
36#[derive(Debug, PartialEq, Clone)]
37pub enum TagType {
38    /// Resets all active styles (`[/]`).
39    ResetAll,
40    /// Resets one specific active style (`[/bold]`, `[/red]`, etc.), then re-applies the rest.
41    ResetOne(Box<TagType>),
42    /// Applies a text emphasis attribute.
43    Emphasis(EmphasisType),
44    /// Sets a foreground or background color.
45    Color { color: Color, ground: Ground },
46    /// A literal prefix string injected before the style sequence by the registry.
47    Prefix(String),
48}
49
50/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
51#[derive(Debug, PartialEq)]
52pub enum Token {
53    /// A parsed styling tag.
54    Tag(TagType),
55    /// A run of plain text with no markup.
56    Text(Cow<'static, str>),
57}
58
59impl EmphasisType {
60    /// Parses an emphasis keyword into an `EmphasisType`.
61    ///
62    /// Returns `None` if the string is not a recognized emphasis name.
63    /// Matching is case-sensitive.
64    fn from_str(input: &str) -> Option<Self> {
65        match input {
66            "dim" => Some(Self::Dim),
67            "italic" => Some(Self::Italic),
68            "underline" => Some(Self::Underline),
69            "bold" => Some(Self::Bold),
70            "strikethrough" => Some(Self::Strikethrough),
71            "blink" => Some(Self::Blink),
72            _ => None,
73        }
74    }
75}
76
77/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
78///
79/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
80/// after the prefix: no emphasis or color tags are emitted.
81fn style_to_tags(style: Arc<Style>) -> Vec<TagType> {
82    let mut res: Vec<TagType> = Vec::new();
83    let prefix = style.prefix.clone();
84
85    if style.reset {
86        if let Some(p) = prefix {
87            res.push(TagType::Prefix(p));
88        }
89        res.push(TagType::ResetAll);
90        return res;
91    }
92
93    for (enabled, tag) in [
94        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
95        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
96        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
97        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
98        (
99            style.strikethrough,
100            TagType::Emphasis(EmphasisType::Strikethrough),
101        ),
102        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
103    ] {
104        if enabled {
105            res.push(tag);
106        }
107    }
108
109    if let Some(fg) = style.fg.clone() {
110        res.push(TagType::Color {
111            color: fg,
112            ground: Ground::Foreground,
113        })
114    }
115    if let Some(bg) = style.bg.clone() {
116        res.push(TagType::Color {
117            color: bg,
118            ground: Ground::Background,
119        })
120    }
121
122    if let Some(p) = prefix {
123        res.push(TagType::Prefix(p));
124    }
125
126    res
127}
128
129/// Parses a single whitespace-delimited tag part into a `TagType`.
130///
131/// Recognizes:
132/// - `/` as a reset
133/// - Named colors (`red`, `blue`, etc.)
134/// - Emphasis keywords (`bold`, `italic`, etc.)
135/// - `ansi(N)` for ANSI 256-palette colors
136/// - `rgb(R,G,B)` for true-color values
137/// - A named style from the registry as a fallback
138///
139/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
140/// explicitly target the foreground. Unprefixed color parts default to foreground.
141///
142/// # Errors
143///
144/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
145/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
146/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
147fn parse_part(part: &str, position: usize) -> Result<Vec<TagType>, LexError> {
148    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
149        (Ground::Background, rest)
150    } else if let Some(rest) = part.strip_prefix("fg:") {
151        (Ground::Foreground, rest)
152    } else {
153        (Ground::Foreground, part)
154    };
155    if let Some(remainder) = part.strip_prefix('/') {
156        if remainder.is_empty() {
157            Ok(vec![TagType::ResetAll])
158        } else {
159            let inner = parse_part(remainder, position + 1)?;
160            match inner.as_slice() {
161                [tag] => match tag {
162                    TagType::ResetAll | TagType::ResetOne(_) | TagType::Prefix(_) => {
163                        Err(LexError::InvalidResetTarget(position))
164                    }
165                    _ => Ok(vec![TagType::ResetOne(Box::new(tag.clone()))]),
166                },
167                _ => Err(LexError::InvalidTag {
168                    tag_content: part.to_string(),
169                    position,
170                }),
171            }
172        }
173    } else if let Some(color) = NamedColor::from_str(part) {
174        Ok(vec![TagType::Color {
175            color: Color::Named(color),
176            ground,
177        }])
178    } else if let Some(emphasis) = EmphasisType::from_str(part) {
179        Ok(vec![TagType::Emphasis(emphasis)])
180    } else if let Some(rest) = part.strip_prefix("ansi(") {
181        if !rest.ends_with(')') {
182            return Err(LexError::UnclosedValue(position));
183        }
184        let ansi_val = &rest[..rest.len() - 1];
185        match ansi_val.trim().parse::<u8>() {
186            Ok(code) => Ok(vec![TagType::Color {
187                color: Color::Ansi256(code),
188                ground,
189            }]),
190            Err(_) => Err(LexError::InvalidValue {
191                value: ansi_val.to_string(),
192                position,
193            }),
194        }
195    } else if let Some(rest) = part.strip_prefix("rgb(") {
196        if !rest.ends_with(')') {
197            return Err(LexError::UnclosedValue(position));
198        }
199        let rgb_val = &rest[..rest.len() - 1];
200        let parts: Result<Vec<u8>, _> =
201            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
202        match parts {
203            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
204                color: Color::Rgb(v[0], v[1], v[2]),
205                ground,
206            }]),
207            Ok(v) => Err(LexError::InvalidArgumentCount {
208                expected: 3,
209                got: v.len(),
210                position,
211            }),
212            Err(_) => Err(LexError::InvalidValue {
213                value: rgb_val.to_string(),
214                position,
215            }),
216        }
217    } else {
218        match search_registry(part) {
219            Ok(style) => Ok(style_to_tags(style)),
220            Err(_) => Err(LexError::InvalidTag {
221                tag_content: part.to_string(),
222                position,
223            }),
224        }
225    }
226}
227
228/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
229///
230/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
231/// is consumed and does not appear in the output.
232///
233/// # Errors
234///
235/// Propagates any error from `parse_part`.
236fn parse_tag(raw_tag: &str, tag_start: usize) -> Result<Vec<TagType>, LexError> {
237    let mut result = Vec::new();
238    let mut search_from = 0;
239
240    for part in raw_tag.split_whitespace() {
241        let part_offset = raw_tag[search_from..].find(part).unwrap() + search_from;
242        let abs_position = tag_start + part_offset;
243        result.extend(parse_part(part, abs_position)?);
244        search_from = part_offset + part.len();
245    }
246
247    Ok(result)
248}
249
250/// Tokenizes a farben markup string into a sequence of `Token`s.
251///
252/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
253/// bracket rather than the start of a tag. Text between tags is emitted as
254/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
255///
256/// # Errors
257///
258/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
259/// Returns any error produced by `parse_tag` for malformed tag contents.
260///
261/// # Example
262///
263/// ```ignore
264/// let tokens = tokenize("[red]hello")?;
265/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
266/// //     Token::Text("hello".into())]
267/// ```
268pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
269    let input = input.into();
270    let mut tokens: Vec<Token> = Vec::with_capacity(input.len() / 4);
271    let mut pos = 0;
272    loop {
273        let Some(starting) = input[pos..].find('[') else {
274            if pos < input.len() {
275                tokens.push(Token::Text(Cow::Owned(input[pos..].to_string())));
276            }
277            break;
278        };
279        let abs_starting = starting + pos;
280        // wtf does this mean
281        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\\') {
282            let before = &input[pos..abs_starting - 1];
283            if !before.is_empty() {
284                tokens.push(Token::Text(Cow::Owned(before.to_string())));
285            }
286            tokens.push(Token::Text(Cow::Borrowed("[")));
287            pos = abs_starting + 1;
288            continue;
289        }
290
291        if pos != abs_starting {
292            tokens.push(Token::Text(Cow::Owned(input[pos..abs_starting].to_string())));
293        }
294
295        let Some(closing) = input[abs_starting..].find(']') else {
296            return Err(LexError::UnclosedTag(abs_starting));
297        };
298        let abs_closing = closing + abs_starting;
299        let raw_tag = &input[abs_starting + 1..abs_closing];
300        for tag in parse_tag(raw_tag, abs_starting)? {
301            tokens.push(Token::Tag(tag));
302        }
303        pos = abs_closing + 1;
304    }
305    Ok(tokens)
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311    use crate::ansi::{Color, Ground, NamedColor};
312
313    // --- EmphasisType::from_str ---
314
315    #[test]
316    fn test_emphasis_from_str_all_known() {
317        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
318        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
319        assert_eq!(
320            EmphasisType::from_str("underline"),
321            Some(EmphasisType::Underline)
322        );
323        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
324        assert_eq!(
325            EmphasisType::from_str("strikethrough"),
326            Some(EmphasisType::Strikethrough)
327        );
328        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
329    }
330
331    #[test]
332    fn test_emphasis_from_str_unknown_returns_none() {
333        assert_eq!(EmphasisType::from_str("flash"), None);
334    }
335
336    #[test]
337    fn test_emphasis_from_str_case_sensitive() {
338        assert_eq!(EmphasisType::from_str("Bold"), None);
339    }
340
341    // --- parse_part ---
342
343    #[test]
344    fn test_parse_part_reset() {
345        assert_eq!(parse_part("/", 0).unwrap(), vec![TagType::ResetAll]);
346    }
347
348    #[test]
349    fn test_parse_part_named_color_foreground_default() {
350        assert_eq!(
351            parse_part("red", 0).unwrap(),
352            vec![TagType::Color {
353                color: Color::Named(NamedColor::Red),
354                ground: Ground::Foreground,
355            }]
356        );
357    }
358
359    #[test]
360    fn test_parse_part_named_color_explicit_fg() {
361        assert_eq!(
362            parse_part("fg:red", 0).unwrap(),
363            vec![TagType::Color {
364                color: Color::Named(NamedColor::Red),
365                ground: Ground::Foreground,
366            }]
367        );
368    }
369
370    #[test]
371    fn test_parse_part_named_color_bg() {
372        assert_eq!(
373            parse_part("bg:red", 0).unwrap(),
374            vec![TagType::Color {
375                color: Color::Named(NamedColor::Red),
376                ground: Ground::Background,
377            }]
378        );
379    }
380
381    #[test]
382    fn test_parse_part_emphasis_bold() {
383        assert_eq!(
384            parse_part("bold", 0).unwrap(),
385            vec![TagType::Emphasis(EmphasisType::Bold)]
386        );
387    }
388
389    #[test]
390    fn test_parse_part_ansi256_valid() {
391        assert_eq!(
392            parse_part("ansi(200)", 0).unwrap(),
393            vec![TagType::Color {
394                color: Color::Ansi256(200),
395                ground: Ground::Foreground,
396            }]
397        );
398    }
399
400    #[test]
401    fn test_parse_part_ansi256_bg() {
402        assert_eq!(
403            parse_part("bg:ansi(200)", 0).unwrap(),
404            vec![TagType::Color {
405                color: Color::Ansi256(200),
406                ground: Ground::Background,
407            }]
408        );
409    }
410
411    #[test]
412    fn test_parse_part_ansi256_with_whitespace() {
413        assert_eq!(
414            parse_part("ansi( 42 )", 0).unwrap(),
415            vec![TagType::Color {
416                color: Color::Ansi256(42),
417                ground: Ground::Foreground,
418            }]
419        );
420    }
421
422    #[test]
423    fn test_parse_part_ansi256_invalid_value() {
424        assert!(parse_part("ansi(abc)", 0).is_err());
425    }
426
427    #[test]
428    fn test_parse_part_rgb_valid() {
429        assert_eq!(
430            parse_part("rgb(255,128,0)", 0).unwrap(),
431            vec![TagType::Color {
432                color: Color::Rgb(255, 128, 0),
433                ground: Ground::Foreground,
434            }]
435        );
436    }
437
438    #[test]
439    fn test_parse_part_rgb_bg() {
440        assert_eq!(
441            parse_part("bg:rgb(255,128,0)", 0).unwrap(),
442            vec![TagType::Color {
443                color: Color::Rgb(255, 128, 0),
444                ground: Ground::Background,
445            }]
446        );
447    }
448
449    #[test]
450    fn test_parse_part_rgb_with_spaces() {
451        assert_eq!(
452            parse_part("rgb( 10 , 20 , 30 )", 0).unwrap(),
453            vec![TagType::Color {
454                color: Color::Rgb(10, 20, 30),
455                ground: Ground::Foreground,
456            }]
457        );
458    }
459
460    #[test]
461    fn test_parse_part_rgb_wrong_arg_count() {
462        let result = parse_part("rgb(1,2)", 0);
463        assert!(result.is_err());
464        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got, .. }) = result {
465            assert_eq!(expected, 3);
466            assert_eq!(got, 2);
467        }
468    }
469
470    #[test]
471    fn test_parse_part_rgb_invalid_value() {
472        assert!(parse_part("rgb(r,g,b)", 0).is_err());
473    }
474
475    #[test]
476    fn test_parse_part_unknown_tag_returns_error() {
477        assert!(parse_part("fuchsia", 0).is_err());
478    }
479
480    // --- tokenize ---
481
482    #[test]
483    fn test_tokenize_plain_text() {
484        let tokens = tokenize("hello world").unwrap();
485        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
486    }
487
488    #[test]
489    fn test_tokenize_empty_string() {
490        assert!(tokenize("").unwrap().is_empty());
491    }
492
493    #[test]
494    fn test_tokenize_single_color_tag() {
495        let tokens = tokenize("[red]text").unwrap();
496        assert_eq!(
497            tokens,
498            vec![
499                Token::Tag(TagType::Color {
500                    color: Color::Named(NamedColor::Red),
501                    ground: Ground::Foreground
502                }),
503                Token::Text("text".into()),
504            ]
505        );
506    }
507
508    #[test]
509    fn test_tokenize_bg_color_tag() {
510        let tokens = tokenize("[bg:red]text").unwrap();
511        assert_eq!(
512            tokens,
513            vec![
514                Token::Tag(TagType::Color {
515                    color: Color::Named(NamedColor::Red),
516                    ground: Ground::Background
517                }),
518                Token::Text("text".into()),
519            ]
520        );
521    }
522
523    #[test]
524    fn test_tokenize_fg_and_bg_in_same_bracket() {
525        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
526        assert_eq!(
527            tokens,
528            vec![
529                Token::Tag(TagType::Color {
530                    color: Color::Named(NamedColor::White),
531                    ground: Ground::Foreground
532                }),
533                Token::Tag(TagType::Color {
534                    color: Color::Named(NamedColor::Blue),
535                    ground: Ground::Background
536                }),
537                Token::Text("text".into()),
538            ]
539        );
540    }
541
542    #[test]
543    fn test_tokenize_reset_tag() {
544        assert_eq!(
545            tokenize("[/]").unwrap(),
546            vec![Token::Tag(TagType::ResetAll)]
547        );
548    }
549
550    #[test]
551    fn test_tokenize_compound_tag() {
552        let tokens = tokenize("[bold red]hi").unwrap();
553        assert_eq!(
554            tokens,
555            vec![
556                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
557                Token::Tag(TagType::Color {
558                    color: Color::Named(NamedColor::Red),
559                    ground: Ground::Foreground
560                }),
561                Token::Text("hi".into()),
562            ]
563        );
564    }
565
566    #[test]
567    fn test_tokenize_escaped_bracket_at_start() {
568        let tokens = tokenize("\\[not a tag]").unwrap();
569        assert_eq!(
570            tokens,
571            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
572        );
573    }
574
575    #[test]
576    fn test_tokenize_escaped_bracket_with_prefix() {
577        let tokens = tokenize("before\\[not a tag]").unwrap();
578        assert_eq!(
579            tokens,
580            vec![
581                Token::Text("before".into()),
582                Token::Text("[".into()),
583                Token::Text("not a tag]".into()),
584            ]
585        );
586    }
587
588    #[test]
589    fn test_tokenize_unclosed_tag_returns_error() {
590        assert!(tokenize("[red").is_err());
591    }
592
593    #[test]
594    fn test_tokenize_invalid_tag_name_returns_error() {
595        assert!(tokenize("[fuchsia]").is_err());
596    }
597
598    #[test]
599    fn test_tokenize_text_before_and_after_tag() {
600        let tokens = tokenize("before[red]after").unwrap();
601        assert_eq!(
602            tokens,
603            vec![
604                Token::Text("before".into()),
605                Token::Tag(TagType::Color {
606                    color: Color::Named(NamedColor::Red),
607                    ground: Ground::Foreground
608                }),
609                Token::Text("after".into()),
610            ]
611        );
612    }
613
614    #[test]
615    fn test_tokenize_ansi256_tag() {
616        let tokens = tokenize("[ansi(1)]text").unwrap();
617        assert_eq!(
618            tokens[0],
619            Token::Tag(TagType::Color {
620                color: Color::Ansi256(1),
621                ground: Ground::Foreground,
622            })
623        );
624    }
625
626    #[test]
627    fn test_tokenize_rgb_tag() {
628        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
629        assert_eq!(
630            tokens[0],
631            Token::Tag(TagType::Color {
632                color: Color::Rgb(255, 0, 128),
633                ground: Ground::Foreground,
634            })
635        );
636    }
637
638    #[test]
639    fn test_tokenize_bg_rgb_tag() {
640        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
641        assert_eq!(
642            tokens[0],
643            Token::Tag(TagType::Color {
644                color: Color::Rgb(0, 255, 0),
645                ground: Ground::Background,
646            })
647        );
648    }
649
650    #[test]
651    fn test_parse_part_custom_style_from_registry() {
652        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
653        let result = parse_part("danger", 0).unwrap();
654        assert_eq!(
655            result,
656            vec![
657                TagType::Emphasis(EmphasisType::Bold),
658                TagType::Color {
659                    color: Color::Named(NamedColor::Red),
660                    ground: Ground::Foreground
661                },
662            ]
663        );
664    }
665}