Skip to main content

farben_core/
lexer.rs

1//! Tokenizer for farben markup strings.
2//!
3//! Parses bracket-delimited tag syntax (`[bold red]text[/]`) into a flat sequence of
4//! [`Token`] values. Each token is either a [`Token::Tag`] carrying styling information
5//! or a [`Token::Text`] carrying a run of literal characters.
6//!
7//! The main entry point is [`tokenize`]. The lower-level [`parse_tag`] and [`parse_part`]
8//! functions handle individual tag strings and are not part of the public API.
9
10use crate::{
11    ansi::{Color, Ground, NamedColor, Style},
12    errors::LexError,
13    registry::search_registry,
14};
15
16/// A text emphasis modifier supported by farben markup.
17#[derive(Debug, PartialEq, Clone)]
18pub enum EmphasisType {
19    /// Reduced intensity (SGR 2).
20    Dim,
21    /// Italic text (SGR 3).
22    Italic,
23    /// Underlined text (SGR 4).
24    Underline,
25    /// Bold text (SGR 1).
26    Bold,
27    /// Crossed-out text (SGR 9).
28    Strikethrough,
29    /// Blinking text (SGR 5). Terminal support varies.
30    Blink,
31}
32
33/// The kind of styling operation a tag represents.
34#[derive(Debug, PartialEq, Clone)]
35pub enum TagType {
36    /// Resets all active styles (`[/]`).
37    Reset(Option<Box<TagType>>),
38    /// Applies a text emphasis attribute.
39    Emphasis(EmphasisType),
40    /// Sets a foreground or background color.
41    Color { color: Color, ground: Ground },
42    /// A literal prefix string injected before the style sequence by the registry.
43    Prefix(String),
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    /// A parsed styling tag.
50    Tag(TagType),
51    /// A run of plain text with no markup.
52    Text(String),
53}
54
55impl EmphasisType {
56    /// Parses an emphasis keyword into an `EmphasisType`.
57    ///
58    /// Returns `None` if the string is not a recognized emphasis name.
59    /// Matching is case-sensitive.
60    fn from_str(input: &str) -> Option<Self> {
61        match input {
62            "dim" => Some(Self::Dim),
63            "italic" => Some(Self::Italic),
64            "underline" => Some(Self::Underline),
65            "bold" => Some(Self::Bold),
66            "strikethrough" => Some(Self::Strikethrough),
67            "blink" => Some(Self::Blink),
68            _ => None,
69        }
70    }
71}
72
73/// Expands a [`Style`] from the registry into its equivalent sequence of [`TagType`] values.
74///
75/// A `Prefix` tag is always prepended first, if one is set. A `reset` style short-circuits
76/// after the prefix: no emphasis or color tags are emitted.
77fn style_to_tags(style: Style) -> Vec<TagType> {
78    let mut res: Vec<TagType> = Vec::new();
79    let prefix = style.prefix;
80
81    if style.reset {
82        if let Some(p) = prefix {
83            res.push(TagType::Prefix(p));
84        }
85        res.push(TagType::Reset(None));
86        return res;
87    }
88
89    for (enabled, tag) in [
90        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
91        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
92        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
93        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
94        (
95            style.strikethrough,
96            TagType::Emphasis(EmphasisType::Strikethrough),
97        ),
98        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
99    ] {
100        if enabled {
101            res.push(tag);
102        }
103    }
104
105    if let Some(fg) = style.fg {
106        res.push(TagType::Color {
107            color: fg,
108            ground: Ground::Foreground,
109        })
110    }
111    if let Some(bg) = style.bg {
112        res.push(TagType::Color {
113            color: bg,
114            ground: Ground::Background,
115        })
116    }
117
118    if let Some(p) = prefix {
119        res.push(TagType::Prefix(p));
120    }
121
122    res
123}
124
125/// Parses a single whitespace-delimited tag part into a `TagType`.
126///
127/// Recognizes:
128/// - `/` as a reset
129/// - Named colors (`red`, `blue`, etc.)
130/// - Emphasis keywords (`bold`, `italic`, etc.)
131/// - `ansi(N)` for ANSI 256-palette colors
132/// - `rgb(R,G,B)` for true-color values
133/// - A named style from the registry as a fallback
134///
135/// Parts may be prefixed with `bg:` to target the background ground, or `fg:` to
136/// explicitly target the foreground. Unprefixed color parts default to foreground.
137///
138/// # Errors
139///
140/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
141/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
142/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
143fn parse_part(part: &str, position: usize) -> Result<Vec<TagType>, LexError> {
144    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
145        (Ground::Background, rest)
146    } else if let Some(rest) = part.strip_prefix("fg:") {
147        (Ground::Foreground, rest)
148    } else {
149        (Ground::Foreground, part)
150    };
151    if let Some(remainder) = part.strip_prefix('/') {
152        if remainder.is_empty() {
153            Ok(vec![TagType::Reset(None)])
154        } else {
155            let inner = parse_part(remainder, position + 1)?;
156            match inner.as_slice() {
157                [tag] => match tag {
158                    TagType::Reset(_) | TagType::Prefix(_) => {
159                        Err(LexError::InvalidResetTarget(position))
160                    }
161                    _ => Ok(vec![TagType::Reset(Some(Box::new(tag.clone())))]),
162                },
163                _ => Err(LexError::InvalidTag {
164                    tag_content: part.to_string(),
165                    position,
166                }),
167            }
168        }
169    } else if let Some(color) = NamedColor::from_str(part) {
170        Ok(vec![TagType::Color {
171            color: Color::Named(color),
172            ground,
173        }])
174    } else if let Some(emphasis) = EmphasisType::from_str(part) {
175        Ok(vec![TagType::Emphasis(emphasis)])
176    } else if part.starts_with("ansi(") && !part.ends_with(')') {
177        Err(LexError::UnclosedValue(position))
178    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
179        match ansi_val.trim().parse::<u8>() {
180            Ok(code) => Ok(vec![TagType::Color {
181                color: Color::Ansi256(code),
182                ground,
183            }]),
184            Err(_) => Err(LexError::InvalidValue {
185                value: ansi_val.to_string(),
186                position,
187            }),
188        }
189    } else if part.starts_with("rgb(") && !part.ends_with(')') {
190        Err(LexError::UnclosedValue(position))
191    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
192        let parts: Result<Vec<u8>, _> =
193            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
194        match parts {
195            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
196                color: Color::Rgb(v[0], v[1], v[2]),
197                ground,
198            }]),
199            Ok(v) => Err(LexError::InvalidArgumentCount {
200                expected: 3,
201                got: v.len(),
202                position,
203            }),
204            Err(_) => Err(LexError::InvalidValue {
205                value: rgb_val.to_string(),
206                position,
207            }),
208        }
209    } else {
210        match search_registry(part) {
211            Ok(style) => Ok(style_to_tags(style)),
212            Err(_) => Err(LexError::InvalidTag {
213                tag_content: part.to_string(),
214                position,
215            }),
216        }
217    }
218}
219
220/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
221///
222/// A tag like `"bold red"` produces two `TagType` values. Whitespace between parts
223/// is consumed and does not appear in the output.
224///
225/// # Errors
226///
227/// Propagates any error from `parse_part`.
228fn parse_tag(raw_tag: &str, tag_start: usize) -> Result<Vec<TagType>, LexError> {
229    let mut result = Vec::new();
230    let mut search_from = 0;
231
232    for part in raw_tag.split_whitespace() {
233        let part_offset = raw_tag[search_from..].find(part).unwrap() + search_from;
234        let abs_position = tag_start + part_offset;
235        result.extend(parse_part(part, abs_position)?);
236        search_from = part_offset + part.len();
237    }
238
239    Ok(result)
240}
241
242/// Tokenizes a farben markup string into a sequence of `Token`s.
243///
244/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
245/// bracket rather than the start of a tag. Text between tags is emitted as
246/// [`Token::Text`]; tags are parsed and emitted as [`Token::Tag`].
247///
248/// # Errors
249///
250/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
251/// Returns any error produced by `parse_tag` for malformed tag contents.
252///
253/// # Example
254///
255/// ```ignore
256/// let tokens = tokenize("[red]hello")?;
257/// // => [Token::Tag(TagType::Color { color: Color::Named(NamedColor::Red), ground: Ground::Foreground }),
258/// //     Token::Text("hello".into())]
259/// ```
260pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
261    let mut tokens: Vec<Token> = Vec::new();
262    let input = input.into();
263    let mut pos = 0;
264    loop {
265        let Some(starting) = input[pos..].find('[') else {
266            if pos < input.len() {
267                tokens.push(Token::Text(input[pos..].to_string()));
268            }
269            break;
270        };
271        let abs_starting = starting + pos;
272        // wtf does this mean
273        if abs_starting > 0 && input.as_bytes().get(abs_starting.wrapping_sub(1)) == Some(&b'\\') {
274            let before = &input[pos..abs_starting - 1];
275            if !before.is_empty() {
276                tokens.push(Token::Text(before.to_string()));
277            }
278            tokens.push(Token::Text(String::from('[')));
279            pos = abs_starting + 1;
280            continue;
281        }
282
283        if pos != abs_starting {
284            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
285        }
286
287        let Some(closing) = input[abs_starting..].find(']') else {
288            return Err(LexError::UnclosedTag(abs_starting));
289        };
290        let abs_closing = closing + abs_starting;
291        let raw_tag = &input[abs_starting + 1..abs_closing];
292        for tag in parse_tag(raw_tag, abs_starting)? {
293            tokens.push(Token::Tag(tag));
294        }
295        pos = abs_closing + 1;
296    }
297    Ok(tokens)
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303    use crate::ansi::{Color, Ground, NamedColor};
304
305    // --- EmphasisType::from_str ---
306
307    #[test]
308    fn test_emphasis_from_str_all_known() {
309        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
310        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
311        assert_eq!(
312            EmphasisType::from_str("underline"),
313            Some(EmphasisType::Underline)
314        );
315        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
316        assert_eq!(
317            EmphasisType::from_str("strikethrough"),
318            Some(EmphasisType::Strikethrough)
319        );
320        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
321    }
322
323    #[test]
324    fn test_emphasis_from_str_unknown_returns_none() {
325        assert_eq!(EmphasisType::from_str("flash"), None);
326    }
327
328    #[test]
329    fn test_emphasis_from_str_case_sensitive() {
330        assert_eq!(EmphasisType::from_str("Bold"), None);
331    }
332
333    // --- parse_part ---
334
335    #[test]
336    fn test_parse_part_reset() {
337        assert_eq!(parse_part("/", 0).unwrap(), vec![TagType::Reset(None)]);
338    }
339
340    #[test]
341    fn test_parse_part_named_color_foreground_default() {
342        assert_eq!(
343            parse_part("red", 0).unwrap(),
344            vec![TagType::Color {
345                color: Color::Named(NamedColor::Red),
346                ground: Ground::Foreground,
347            }]
348        );
349    }
350
351    #[test]
352    fn test_parse_part_named_color_explicit_fg() {
353        assert_eq!(
354            parse_part("fg:red", 0).unwrap(),
355            vec![TagType::Color {
356                color: Color::Named(NamedColor::Red),
357                ground: Ground::Foreground,
358            }]
359        );
360    }
361
362    #[test]
363    fn test_parse_part_named_color_bg() {
364        assert_eq!(
365            parse_part("bg:red", 0).unwrap(),
366            vec![TagType::Color {
367                color: Color::Named(NamedColor::Red),
368                ground: Ground::Background,
369            }]
370        );
371    }
372
373    #[test]
374    fn test_parse_part_emphasis_bold() {
375        assert_eq!(
376            parse_part("bold", 0).unwrap(),
377            vec![TagType::Emphasis(EmphasisType::Bold)]
378        );
379    }
380
381    #[test]
382    fn test_parse_part_ansi256_valid() {
383        assert_eq!(
384            parse_part("ansi(200)", 0).unwrap(),
385            vec![TagType::Color {
386                color: Color::Ansi256(200),
387                ground: Ground::Foreground,
388            }]
389        );
390    }
391
392    #[test]
393    fn test_parse_part_ansi256_bg() {
394        assert_eq!(
395            parse_part("bg:ansi(200)", 0).unwrap(),
396            vec![TagType::Color {
397                color: Color::Ansi256(200),
398                ground: Ground::Background,
399            }]
400        );
401    }
402
403    #[test]
404    fn test_parse_part_ansi256_with_whitespace() {
405        assert_eq!(
406            parse_part("ansi( 42 )", 0).unwrap(),
407            vec![TagType::Color {
408                color: Color::Ansi256(42),
409                ground: Ground::Foreground,
410            }]
411        );
412    }
413
414    #[test]
415    fn test_parse_part_ansi256_invalid_value() {
416        assert!(parse_part("ansi(abc)", 0).is_err());
417    }
418
419    #[test]
420    fn test_parse_part_rgb_valid() {
421        assert_eq!(
422            parse_part("rgb(255,128,0)", 0).unwrap(),
423            vec![TagType::Color {
424                color: Color::Rgb(255, 128, 0),
425                ground: Ground::Foreground,
426            }]
427        );
428    }
429
430    #[test]
431    fn test_parse_part_rgb_bg() {
432        assert_eq!(
433            parse_part("bg:rgb(255,128,0)", 0).unwrap(),
434            vec![TagType::Color {
435                color: Color::Rgb(255, 128, 0),
436                ground: Ground::Background,
437            }]
438        );
439    }
440
441    #[test]
442    fn test_parse_part_rgb_with_spaces() {
443        assert_eq!(
444            parse_part("rgb( 10 , 20 , 30 )", 0).unwrap(),
445            vec![TagType::Color {
446                color: Color::Rgb(10, 20, 30),
447                ground: Ground::Foreground,
448            }]
449        );
450    }
451
452    #[test]
453    fn test_parse_part_rgb_wrong_arg_count() {
454        let result = parse_part("rgb(1,2)", 0);
455        assert!(result.is_err());
456        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got, .. }) = result {
457            assert_eq!(expected, 3);
458            assert_eq!(got, 2);
459        }
460    }
461
462    #[test]
463    fn test_parse_part_rgb_invalid_value() {
464        assert!(parse_part("rgb(r,g,b)", 0).is_err());
465    }
466
467    #[test]
468    fn test_parse_part_unknown_tag_returns_error() {
469        assert!(parse_part("fuchsia", 0).is_err());
470    }
471
472    // --- tokenize ---
473
474    #[test]
475    fn test_tokenize_plain_text() {
476        let tokens = tokenize("hello world").unwrap();
477        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
478    }
479
480    #[test]
481    fn test_tokenize_empty_string() {
482        assert!(tokenize("").unwrap().is_empty());
483    }
484
485    #[test]
486    fn test_tokenize_single_color_tag() {
487        let tokens = tokenize("[red]text").unwrap();
488        assert_eq!(
489            tokens,
490            vec![
491                Token::Tag(TagType::Color {
492                    color: Color::Named(NamedColor::Red),
493                    ground: Ground::Foreground
494                }),
495                Token::Text("text".into()),
496            ]
497        );
498    }
499
500    #[test]
501    fn test_tokenize_bg_color_tag() {
502        let tokens = tokenize("[bg:red]text").unwrap();
503        assert_eq!(
504            tokens,
505            vec![
506                Token::Tag(TagType::Color {
507                    color: Color::Named(NamedColor::Red),
508                    ground: Ground::Background
509                }),
510                Token::Text("text".into()),
511            ]
512        );
513    }
514
515    #[test]
516    fn test_tokenize_fg_and_bg_in_same_bracket() {
517        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
518        assert_eq!(
519            tokens,
520            vec![
521                Token::Tag(TagType::Color {
522                    color: Color::Named(NamedColor::White),
523                    ground: Ground::Foreground
524                }),
525                Token::Tag(TagType::Color {
526                    color: Color::Named(NamedColor::Blue),
527                    ground: Ground::Background
528                }),
529                Token::Text("text".into()),
530            ]
531        );
532    }
533
534    #[test]
535    fn test_tokenize_reset_tag() {
536        assert_eq!(
537            tokenize("[/]").unwrap(),
538            vec![Token::Tag(TagType::Reset(None))]
539        );
540    }
541
542    #[test]
543    fn test_tokenize_compound_tag() {
544        let tokens = tokenize("[bold red]hi").unwrap();
545        assert_eq!(
546            tokens,
547            vec![
548                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
549                Token::Tag(TagType::Color {
550                    color: Color::Named(NamedColor::Red),
551                    ground: Ground::Foreground
552                }),
553                Token::Text("hi".into()),
554            ]
555        );
556    }
557
558    #[test]
559    fn test_tokenize_escaped_bracket_at_start() {
560        let tokens = tokenize("\\[not a tag]").unwrap();
561        assert_eq!(
562            tokens,
563            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
564        );
565    }
566
567    #[test]
568    fn test_tokenize_escaped_bracket_with_prefix() {
569        let tokens = tokenize("before\\[not a tag]").unwrap();
570        assert_eq!(
571            tokens,
572            vec![
573                Token::Text("before".into()),
574                Token::Text("[".into()),
575                Token::Text("not a tag]".into()),
576            ]
577        );
578    }
579
580    #[test]
581    fn test_tokenize_unclosed_tag_returns_error() {
582        assert!(tokenize("[red").is_err());
583    }
584
585    #[test]
586    fn test_tokenize_invalid_tag_name_returns_error() {
587        assert!(tokenize("[fuchsia]").is_err());
588    }
589
590    #[test]
591    fn test_tokenize_text_before_and_after_tag() {
592        let tokens = tokenize("before[red]after").unwrap();
593        assert_eq!(
594            tokens,
595            vec![
596                Token::Text("before".into()),
597                Token::Tag(TagType::Color {
598                    color: Color::Named(NamedColor::Red),
599                    ground: Ground::Foreground
600                }),
601                Token::Text("after".into()),
602            ]
603        );
604    }
605
606    #[test]
607    fn test_tokenize_ansi256_tag() {
608        let tokens = tokenize("[ansi(1)]text").unwrap();
609        assert_eq!(
610            tokens[0],
611            Token::Tag(TagType::Color {
612                color: Color::Ansi256(1),
613                ground: Ground::Foreground,
614            })
615        );
616    }
617
618    #[test]
619    fn test_tokenize_rgb_tag() {
620        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
621        assert_eq!(
622            tokens[0],
623            Token::Tag(TagType::Color {
624                color: Color::Rgb(255, 0, 128),
625                ground: Ground::Foreground,
626            })
627        );
628    }
629
630    #[test]
631    fn test_tokenize_bg_rgb_tag() {
632        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
633        assert_eq!(
634            tokens[0],
635            Token::Tag(TagType::Color {
636                color: Color::Rgb(0, 255, 0),
637                ground: Ground::Background,
638            })
639        );
640    }
641
642    #[test]
643    fn test_parse_part_custom_style_from_registry() {
644        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
645        let result = parse_part("danger", 0).unwrap();
646        assert_eq!(
647            result,
648            vec![
649                TagType::Emphasis(EmphasisType::Bold),
650                TagType::Color {
651                    color: Color::Named(NamedColor::Red),
652                    ground: Ground::Foreground
653                },
654            ]
655        );
656    }
657}