Skip to main content

farben_core/
lexer.rs

1use crate::{
2    ansi::{Color, Ground, NamedColor, Style},
3    errors::LexError,
4    registry::search_registry,
5};
6
7/// A text emphasis modifier supported by farben markup.
8#[derive(Debug, PartialEq)]
9pub enum EmphasisType {
10    /// Reduced intensity (SGR 2).
11    Dim,
12    /// Italic text (SGR 3).
13    Italic,
14    /// Underlined text (SGR 4).
15    Underline,
16    /// Bold text (SGR 1).
17    Bold,
18    /// Crossed-out text (SGR 9).
19    Strikethrough,
20    /// Blinking text (SGR 5). Terminal support varies.
21    Blink,
22}
23
24/// The kind of styling operation a tag represents.
25#[derive(Debug, PartialEq)]
26pub enum TagType {
27    /// Resets all active styles (`[/]`).
28    Reset,
29    /// Applies a text emphasis attribute.
30    Emphasis(EmphasisType),
31    /// Sets a foreground or background color.
32    Color { color: Color, ground: Ground },
33}
34
35/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
36#[derive(Debug, PartialEq)]
37pub enum Token {
38    /// A parsed styling tag.
39    Tag(TagType),
40    /// A run of plain text with no markup.
41    Text(String),
42}
43
44impl EmphasisType {
45    /// Parses an emphasis keyword into an `EmphasisType`.
46    ///
47    /// Returns `None` if the string is not a recognized emphasis name.
48    /// Matching is case-sensitive.
49    fn from_str(input: &str) -> Option<Self> {
50        match input {
51            "dim" => Some(Self::Dim),
52            "italic" => Some(Self::Italic),
53            "underline" => Some(Self::Underline),
54            "bold" => Some(Self::Bold),
55            "strikethrough" => Some(Self::Strikethrough),
56            "blink" => Some(Self::Blink),
57            _ => None,
58        }
59    }
60}
61
62fn style_to_tags(style: Style) -> Vec<TagType> {
63    let mut res: Vec<TagType> = Vec::new();
64    if style.reset {
65        return vec![TagType::Reset];
66    }
67
68    for (enabled, tag) in [
69        (style.bold, TagType::Emphasis(EmphasisType::Bold)),
70        (style.blink, TagType::Emphasis(EmphasisType::Blink)),
71        (style.dim, TagType::Emphasis(EmphasisType::Dim)),
72        (style.italic, TagType::Emphasis(EmphasisType::Italic)),
73        (
74            style.strikethrough,
75            TagType::Emphasis(EmphasisType::Strikethrough),
76        ),
77        (style.underline, TagType::Emphasis(EmphasisType::Underline)),
78    ] {
79        if enabled {
80            res.push(tag);
81        }
82    }
83
84    if let Some(fg) = style.fg {
85        res.push(TagType::Color {
86            color: fg,
87            ground: Ground::Foreground,
88        })
89    }
90    if let Some(bg) = style.bg {
91        res.push(TagType::Color {
92            color: bg,
93            ground: Ground::Background,
94        })
95    }
96
97    res
98}
99
100/// Parses a single whitespace-delimited tag part into a `TagType`.
101///
102/// Recognizes:
103/// - `/` as a reset
104/// - Named colors (`red`, `blue`, etc.)
105/// - Emphasis keywords (`bold`, `italic`, etc.)
106/// - `ansi(N)` for ANSI 256-palette colors
107/// - `rgb(R,G,B)` for true-color values
108///
109/// # Errors
110///
111/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
112/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
113/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
114fn parse_part(part: &str) -> Result<Vec<TagType>, LexError> {
115    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
116        (Ground::Background, rest)
117    } else if let Some(rest) = part.strip_prefix("fg:") {
118        (Ground::Foreground, rest)
119    } else {
120        (Ground::Foreground, part)
121    };
122    if part == "/" {
123        Ok(vec![TagType::Reset])
124    } else if let Some(color) = NamedColor::from_str(part) {
125        Ok(vec![TagType::Color {
126            color: Color::Named(color),
127            ground,
128        }])
129    } else if let Some(emphasis) = EmphasisType::from_str(part) {
130        Ok(vec![TagType::Emphasis(emphasis)])
131    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
132        match ansi_val.trim().parse::<u8>() {
133            Ok(code) => Ok(vec![TagType::Color {
134                color: Color::Ansi256(code),
135                ground,
136            }]),
137            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
138        }
139    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
140        let parts: Result<Vec<u8>, _> =
141            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
142        match parts {
143            Ok(v) if v.len() == 3 => Ok(vec![TagType::Color {
144                color: Color::Rgb(v[0], v[1], v[2]),
145                ground,
146            }]),
147            Ok(v) => Err(LexError::InvalidArgumentCount {
148                expected: 3,
149                got: v.len(),
150            }),
151            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
152        }
153    } else {
154        match search_registry(part) {
155            Ok(style) => Ok(style_to_tags(style)),
156            Err(e) => Err(e),
157        }
158    }
159}
160
161/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
162///
163/// A tag like `"bold red"` produces two `TagType` values.
164///
165/// # Errors
166///
167/// Propagates any error from `parse_part`.
168fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
169    let nested: Result<Vec<Vec<TagType>>, LexError> =
170        raw_tag.split_whitespace().map(parse_part).collect();
171    Ok(nested?.into_iter().flatten().collect())
172}
173
174/// Tokenizes a farben markup string into a sequence of `Token`s.
175///
176/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
177/// bracket rather than the start of a tag.
178///
179/// # Errors
180///
181/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
182/// Returns any error produced by `parse_tag` for malformed tag contents.
183///
184/// # Example
185///
186/// ```ignore
187/// let tokens = tokenize("[red]hello")?;
188/// // => [Token::Tag(TagType::Color(Color::Named(NamedColor::Red))), Token::Text("hello".into())]
189/// ```
190pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
191    let mut tokens: Vec<Token> = Vec::new();
192    let input = input.into();
193    let mut pos = 0;
194    loop {
195        let Some(starting) = input[pos..].find('[') else {
196            if pos < input.len() {
197                tokens.push(Token::Text(input[pos..].to_string()));
198            }
199            break;
200        };
201        let abs_starting = starting + pos;
202        // escape logic
203        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
204            let before = &input[pos..abs_starting - 1];
205            if !before.is_empty() {
206                tokens.push(Token::Text(before.to_string()));
207            }
208            tokens.push(Token::Text(String::from('[')));
209            pos = abs_starting + 1;
210            continue;
211        }
212
213        if pos != abs_starting {
214            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
215        }
216
217        let Some(closing) = input[abs_starting..].find(']') else {
218            return Err(LexError::UnclosedTag);
219        };
220        let abs_closing = closing + abs_starting;
221        let raw_tag = &input[abs_starting + 1..abs_closing];
222        for tag in parse_tag(raw_tag)? {
223            tokens.push(Token::Tag(tag));
224        }
225        pos = abs_closing + 1;
226    }
227    Ok(tokens)
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233    use crate::ansi::{Color, Ground, NamedColor};
234
235    // --- EmphasisType::from_str ---
236
237    #[test]
238    fn test_emphasis_from_str_all_known() {
239        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
240        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
241        assert_eq!(
242            EmphasisType::from_str("underline"),
243            Some(EmphasisType::Underline)
244        );
245        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
246        assert_eq!(
247            EmphasisType::from_str("strikethrough"),
248            Some(EmphasisType::Strikethrough)
249        );
250        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
251    }
252
253    #[test]
254    fn test_emphasis_from_str_unknown_returns_none() {
255        assert_eq!(EmphasisType::from_str("flash"), None);
256    }
257
258    #[test]
259    fn test_emphasis_from_str_case_sensitive() {
260        assert_eq!(EmphasisType::from_str("Bold"), None);
261    }
262
263    // --- parse_part ---
264
265    #[test]
266    fn test_parse_part_reset() {
267        assert_eq!(parse_part("/").unwrap(), vec![TagType::Reset]);
268    }
269
270    #[test]
271    fn test_parse_part_named_color_foreground_default() {
272        assert_eq!(
273            parse_part("red").unwrap(),
274            vec![TagType::Color {
275                color: Color::Named(NamedColor::Red),
276                ground: Ground::Foreground,
277            }]
278        );
279    }
280
281    #[test]
282    fn test_parse_part_named_color_explicit_fg() {
283        assert_eq!(
284            parse_part("fg:red").unwrap(),
285            vec![TagType::Color {
286                color: Color::Named(NamedColor::Red),
287                ground: Ground::Foreground,
288            }]
289        );
290    }
291
292    #[test]
293    fn test_parse_part_named_color_bg() {
294        assert_eq!(
295            parse_part("bg:red").unwrap(),
296            vec![TagType::Color {
297                color: Color::Named(NamedColor::Red),
298                ground: Ground::Background,
299            }]
300        );
301    }
302
303    #[test]
304    fn test_parse_part_emphasis_bold() {
305        assert_eq!(
306            parse_part("bold").unwrap(),
307            vec![TagType::Emphasis(EmphasisType::Bold)]
308        );
309    }
310
311    #[test]
312    fn test_parse_part_ansi256_valid() {
313        assert_eq!(
314            parse_part("ansi(200)").unwrap(),
315            vec![TagType::Color {
316                color: Color::Ansi256(200),
317                ground: Ground::Foreground,
318            }]
319        );
320    }
321
322    #[test]
323    fn test_parse_part_ansi256_bg() {
324        assert_eq!(
325            parse_part("bg:ansi(200)").unwrap(),
326            vec![TagType::Color {
327                color: Color::Ansi256(200),
328                ground: Ground::Background,
329            }]
330        );
331    }
332
333    #[test]
334    fn test_parse_part_ansi256_with_whitespace() {
335        assert_eq!(
336            parse_part("ansi( 42 )").unwrap(),
337            vec![TagType::Color {
338                color: Color::Ansi256(42),
339                ground: Ground::Foreground,
340            }]
341        );
342    }
343
344    #[test]
345    fn test_parse_part_ansi256_invalid_value() {
346        assert!(parse_part("ansi(abc)").is_err());
347    }
348
349    #[test]
350    fn test_parse_part_rgb_valid() {
351        assert_eq!(
352            parse_part("rgb(255,128,0)").unwrap(),
353            vec![TagType::Color {
354                color: Color::Rgb(255, 128, 0),
355                ground: Ground::Foreground,
356            }]
357        );
358    }
359
360    #[test]
361    fn test_parse_part_rgb_bg() {
362        assert_eq!(
363            parse_part("bg:rgb(255,128,0)").unwrap(),
364            vec![TagType::Color {
365                color: Color::Rgb(255, 128, 0),
366                ground: Ground::Background,
367            }]
368        );
369    }
370
371    #[test]
372    fn test_parse_part_rgb_with_spaces() {
373        assert_eq!(
374            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
375            vec![TagType::Color {
376                color: Color::Rgb(10, 20, 30),
377                ground: Ground::Foreground,
378            }]
379        );
380    }
381
382    #[test]
383    fn test_parse_part_rgb_wrong_arg_count() {
384        let result = parse_part("rgb(1,2)");
385        assert!(result.is_err());
386        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
387            assert_eq!(expected, 3);
388            assert_eq!(got, 2);
389        }
390    }
391
392    #[test]
393    fn test_parse_part_rgb_invalid_value() {
394        assert!(parse_part("rgb(r,g,b)").is_err());
395    }
396
397    #[test]
398    fn test_parse_part_unknown_tag_returns_error() {
399        assert!(parse_part("fuchsia").is_err());
400    }
401
402    // --- tokenize ---
403
404    #[test]
405    fn test_tokenize_plain_text() {
406        let tokens = tokenize("hello world").unwrap();
407        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
408    }
409
410    #[test]
411    fn test_tokenize_empty_string() {
412        assert!(tokenize("").unwrap().is_empty());
413    }
414
415    #[test]
416    fn test_tokenize_single_color_tag() {
417        let tokens = tokenize("[red]text").unwrap();
418        assert_eq!(
419            tokens,
420            vec![
421                Token::Tag(TagType::Color {
422                    color: Color::Named(NamedColor::Red),
423                    ground: Ground::Foreground
424                }),
425                Token::Text("text".into()),
426            ]
427        );
428    }
429
430    #[test]
431    fn test_tokenize_bg_color_tag() {
432        let tokens = tokenize("[bg:red]text").unwrap();
433        assert_eq!(
434            tokens,
435            vec![
436                Token::Tag(TagType::Color {
437                    color: Color::Named(NamedColor::Red),
438                    ground: Ground::Background
439                }),
440                Token::Text("text".into()),
441            ]
442        );
443    }
444
445    #[test]
446    fn test_tokenize_fg_and_bg_in_same_bracket() {
447        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
448        assert_eq!(
449            tokens,
450            vec![
451                Token::Tag(TagType::Color {
452                    color: Color::Named(NamedColor::White),
453                    ground: Ground::Foreground
454                }),
455                Token::Tag(TagType::Color {
456                    color: Color::Named(NamedColor::Blue),
457                    ground: Ground::Background
458                }),
459                Token::Text("text".into()),
460            ]
461        );
462    }
463
464    #[test]
465    fn test_tokenize_reset_tag() {
466        assert_eq!(tokenize("[/]").unwrap(), vec![Token::Tag(TagType::Reset)]);
467    }
468
469    #[test]
470    fn test_tokenize_compound_tag() {
471        let tokens = tokenize("[bold red]hi").unwrap();
472        assert_eq!(
473            tokens,
474            vec![
475                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
476                Token::Tag(TagType::Color {
477                    color: Color::Named(NamedColor::Red),
478                    ground: Ground::Foreground
479                }),
480                Token::Text("hi".into()),
481            ]
482        );
483    }
484
485    #[test]
486    fn test_tokenize_escaped_bracket_at_start() {
487        let tokens = tokenize("\\[not a tag]").unwrap();
488        assert_eq!(
489            tokens,
490            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
491        );
492    }
493
494    #[test]
495    fn test_tokenize_escaped_bracket_with_prefix() {
496        let tokens = tokenize("before\\[not a tag]").unwrap();
497        assert_eq!(
498            tokens,
499            vec![
500                Token::Text("before".into()),
501                Token::Text("[".into()),
502                Token::Text("not a tag]".into()),
503            ]
504        );
505    }
506
507    #[test]
508    fn test_tokenize_unclosed_tag_returns_error() {
509        assert!(tokenize("[red").is_err());
510    }
511
512    #[test]
513    fn test_tokenize_invalid_tag_name_returns_error() {
514        assert!(tokenize("[fuchsia]").is_err());
515    }
516
517    #[test]
518    fn test_tokenize_text_before_and_after_tag() {
519        let tokens = tokenize("before[red]after").unwrap();
520        assert_eq!(
521            tokens,
522            vec![
523                Token::Text("before".into()),
524                Token::Tag(TagType::Color {
525                    color: Color::Named(NamedColor::Red),
526                    ground: Ground::Foreground
527                }),
528                Token::Text("after".into()),
529            ]
530        );
531    }
532
533    #[test]
534    fn test_tokenize_ansi256_tag() {
535        let tokens = tokenize("[ansi(1)]text").unwrap();
536        assert_eq!(
537            tokens[0],
538            Token::Tag(TagType::Color {
539                color: Color::Ansi256(1),
540                ground: Ground::Foreground,
541            })
542        );
543    }
544
545    #[test]
546    fn test_tokenize_rgb_tag() {
547        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
548        assert_eq!(
549            tokens[0],
550            Token::Tag(TagType::Color {
551                color: Color::Rgb(255, 0, 128),
552                ground: Ground::Foreground,
553            })
554        );
555    }
556
557    #[test]
558    fn test_tokenize_bg_rgb_tag() {
559        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
560        assert_eq!(
561            tokens[0],
562            Token::Tag(TagType::Color {
563                color: Color::Rgb(0, 255, 0),
564                ground: Ground::Background,
565            })
566        );
567    }
568
569    #[test]
570    fn test_parse_part_custom_style_from_registry() {
571        crate::registry::insert_style("danger", crate::ansi::Style::parse("[bold red]").unwrap());
572        let result = parse_part("danger").unwrap();
573        assert_eq!(
574            result,
575            vec![
576                TagType::Emphasis(EmphasisType::Bold),
577                TagType::Color {
578                    color: Color::Named(NamedColor::Red),
579                    ground: Ground::Foreground
580                },
581            ]
582        );
583    }
584}