Skip to main content

farben_core/
lexer.rs

1use crate::{
2    ansi::{Color, Ground, NamedColor},
3    errors::LexError,
4};
5
6/// A text emphasis modifier supported by farben markup.
7#[derive(Debug, PartialEq)]
8pub enum EmphasisType {
9    /// Reduced intensity (SGR 2).
10    Dim,
11    /// Italic text (SGR 3).
12    Italic,
13    /// Underlined text (SGR 4).
14    Underline,
15    /// Bold text (SGR 1).
16    Bold,
17    /// Crossed-out text (SGR 9).
18    Strikethrough,
19    /// Blinking text (SGR 5). Terminal support varies.
20    Blink,
21}
22
23/// The kind of styling operation a tag represents.
24#[derive(Debug, PartialEq)]
25pub enum TagType {
26    /// Resets all active styles (`[/]`).
27    Reset,
28    /// Applies a text emphasis attribute.
29    Emphasis(EmphasisType),
30    /// Sets a foreground or background color.
31    Color { color: Color, ground: Ground },
32}
33
34/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
35#[derive(Debug, PartialEq)]
36pub enum Token {
37    /// A parsed styling tag.
38    Tag(TagType),
39    /// A run of plain text with no markup.
40    Text(String),
41}
42
43impl EmphasisType {
44    /// Parses an emphasis keyword into an `EmphasisType`.
45    ///
46    /// Returns `None` if the string is not a recognized emphasis name.
47    /// Matching is case-sensitive.
48    fn from_str(input: &str) -> Option<Self> {
49        match input {
50            "dim" => Some(Self::Dim),
51            "italic" => Some(Self::Italic),
52            "underline" => Some(Self::Underline),
53            "bold" => Some(Self::Bold),
54            "strikethrough" => Some(Self::Strikethrough),
55            "blink" => Some(Self::Blink),
56            _ => None,
57        }
58    }
59}
60
61/// Parses a single whitespace-delimited tag part into a `TagType`.
62///
63/// Recognizes:
64/// - `/` as a reset
65/// - Named colors (`red`, `blue`, etc.)
66/// - Emphasis keywords (`bold`, `italic`, etc.)
67/// - `ansi(N)` for ANSI 256-palette colors
68/// - `rgb(R,G,B)` for true-color values
69///
70/// # Errors
71///
72/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
73/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
74/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
75fn parse_part(part: &str) -> Result<TagType, LexError> {
76    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
77        (Ground::Background, rest)
78    } else if let Some(rest) = part.strip_prefix("fg:") {
79        (Ground::Foreground, rest)
80    } else {
81        (Ground::Foreground, part)
82    };
83    if part == "/" {
84        Ok(TagType::Reset)
85    } else if let Some(color) = NamedColor::from_str(part) {
86        Ok(TagType::Color {
87            color: Color::Named(color),
88            ground,
89        })
90    } else if let Some(emphasis) = EmphasisType::from_str(part) {
91        Ok(TagType::Emphasis(emphasis))
92    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
93        match ansi_val.trim().parse::<u8>() {
94            Ok(code) => Ok(TagType::Color {
95                color: Color::Ansi256(code),
96                ground,
97            }),
98            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
99        }
100    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
101        let parts: Result<Vec<u8>, _> =
102            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
103        match parts {
104            Ok(v) if v.len() == 3 => Ok(TagType::Color {
105                color: Color::Rgb(v[0], v[1], v[2]),
106                ground,
107            }),
108            Ok(v) => Err(LexError::InvalidArgumentCount {
109                expected: 3,
110                got: v.len(),
111            }),
112            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
113        }
114    } else {
115        Err(LexError::InvalidTag(part.to_string()))
116    }
117}
118
119/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
120///
121/// A tag like `"bold red"` produces two `TagType` values.
122///
123/// # Errors
124///
125/// Propagates any error from `parse_part`.
126fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
127    raw_tag.split_whitespace().map(parse_part).collect()
128}
129
130/// Tokenizes a farben markup string into a sequence of `Token`s.
131///
132/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
133/// bracket rather than the start of a tag.
134///
135/// # Errors
136///
137/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
138/// Returns any error produced by `parse_tag` for malformed tag contents.
139///
140/// # Example
141///
142/// ```ignore
143/// let tokens = tokenize("[red]hello")?;
144/// // => [Token::Tag(TagType::Color(Color::Named(NamedColor::Red))), Token::Text("hello".into())]
145/// ```
146pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
147    let mut tokens: Vec<Token> = Vec::new();
148    let input = input.into();
149    let mut pos = 0;
150    loop {
151        let Some(starting) = input[pos..].find('[') else {
152            if pos < input.len() {
153                tokens.push(Token::Text(input[pos..].to_string()));
154            }
155            break;
156        };
157        let abs_starting = starting + pos;
158        // escape logic
159        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
160            let before = &input[pos..abs_starting - 1];
161            if !before.is_empty() {
162                tokens.push(Token::Text(before.to_string()));
163            }
164            tokens.push(Token::Text(String::from('[')));
165            pos = abs_starting + 1;
166            continue;
167        }
168
169        if pos != abs_starting {
170            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
171        }
172
173        let Some(closing) = input[abs_starting..].find(']') else {
174            return Err(LexError::UnclosedTag);
175        };
176        let abs_closing = closing + abs_starting;
177        let raw_tag = &input[abs_starting + 1..abs_closing];
178        for tag in parse_tag(raw_tag)? {
179            tokens.push(Token::Tag(tag));
180        }
181        pos = abs_closing + 1;
182    }
183    Ok(tokens)
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use crate::ansi::{Color, Ground, NamedColor};
190
191    // --- EmphasisType::from_str ---
192
193    #[test]
194    fn test_emphasis_from_str_all_known() {
195        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
196        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
197        assert_eq!(
198            EmphasisType::from_str("underline"),
199            Some(EmphasisType::Underline)
200        );
201        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
202        assert_eq!(
203            EmphasisType::from_str("strikethrough"),
204            Some(EmphasisType::Strikethrough)
205        );
206        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
207    }
208
209    #[test]
210    fn test_emphasis_from_str_unknown_returns_none() {
211        assert_eq!(EmphasisType::from_str("flash"), None);
212    }
213
214    #[test]
215    fn test_emphasis_from_str_case_sensitive() {
216        assert_eq!(EmphasisType::from_str("Bold"), None);
217    }
218
219    // --- parse_part ---
220
221    #[test]
222    fn test_parse_part_reset() {
223        assert_eq!(parse_part("/").unwrap(), TagType::Reset);
224    }
225
226    #[test]
227    fn test_parse_part_named_color_foreground_default() {
228        assert_eq!(
229            parse_part("red").unwrap(),
230            TagType::Color {
231                color: Color::Named(NamedColor::Red),
232                ground: Ground::Foreground,
233            }
234        );
235    }
236
237    #[test]
238    fn test_parse_part_named_color_explicit_fg() {
239        assert_eq!(
240            parse_part("fg:red").unwrap(),
241            TagType::Color {
242                color: Color::Named(NamedColor::Red),
243                ground: Ground::Foreground,
244            }
245        );
246    }
247
248    #[test]
249    fn test_parse_part_named_color_bg() {
250        assert_eq!(
251            parse_part("bg:red").unwrap(),
252            TagType::Color {
253                color: Color::Named(NamedColor::Red),
254                ground: Ground::Background,
255            }
256        );
257    }
258
259    #[test]
260    fn test_parse_part_emphasis_bold() {
261        assert_eq!(
262            parse_part("bold").unwrap(),
263            TagType::Emphasis(EmphasisType::Bold)
264        );
265    }
266
267    #[test]
268    fn test_parse_part_ansi256_valid() {
269        assert_eq!(
270            parse_part("ansi(200)").unwrap(),
271            TagType::Color {
272                color: Color::Ansi256(200),
273                ground: Ground::Foreground,
274            }
275        );
276    }
277
278    #[test]
279    fn test_parse_part_ansi256_bg() {
280        assert_eq!(
281            parse_part("bg:ansi(200)").unwrap(),
282            TagType::Color {
283                color: Color::Ansi256(200),
284                ground: Ground::Background,
285            }
286        );
287    }
288
289    #[test]
290    fn test_parse_part_ansi256_with_whitespace() {
291        assert_eq!(
292            parse_part("ansi( 42 )").unwrap(),
293            TagType::Color {
294                color: Color::Ansi256(42),
295                ground: Ground::Foreground,
296            }
297        );
298    }
299
300    #[test]
301    fn test_parse_part_ansi256_invalid_value() {
302        assert!(parse_part("ansi(abc)").is_err());
303    }
304
305    #[test]
306    fn test_parse_part_rgb_valid() {
307        assert_eq!(
308            parse_part("rgb(255,128,0)").unwrap(),
309            TagType::Color {
310                color: Color::Rgb(255, 128, 0),
311                ground: Ground::Foreground,
312            }
313        );
314    }
315
316    #[test]
317    fn test_parse_part_rgb_bg() {
318        assert_eq!(
319            parse_part("bg:rgb(255,128,0)").unwrap(),
320            TagType::Color {
321                color: Color::Rgb(255, 128, 0),
322                ground: Ground::Background,
323            }
324        );
325    }
326
327    #[test]
328    fn test_parse_part_rgb_with_spaces() {
329        assert_eq!(
330            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
331            TagType::Color {
332                color: Color::Rgb(10, 20, 30),
333                ground: Ground::Foreground,
334            }
335        );
336    }
337
338    #[test]
339    fn test_parse_part_rgb_wrong_arg_count() {
340        let result = parse_part("rgb(1,2)");
341        assert!(result.is_err());
342        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
343            assert_eq!(expected, 3);
344            assert_eq!(got, 2);
345        }
346    }
347
348    #[test]
349    fn test_parse_part_rgb_invalid_value() {
350        assert!(parse_part("rgb(r,g,b)").is_err());
351    }
352
353    #[test]
354    fn test_parse_part_unknown_tag_returns_error() {
355        assert!(parse_part("fuchsia").is_err());
356    }
357
358    // --- tokenize ---
359
360    #[test]
361    fn test_tokenize_plain_text() {
362        let tokens = tokenize("hello world").unwrap();
363        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
364    }
365
366    #[test]
367    fn test_tokenize_empty_string() {
368        assert!(tokenize("").unwrap().is_empty());
369    }
370
371    #[test]
372    fn test_tokenize_single_color_tag() {
373        let tokens = tokenize("[red]text").unwrap();
374        assert_eq!(
375            tokens,
376            vec![
377                Token::Tag(TagType::Color {
378                    color: Color::Named(NamedColor::Red),
379                    ground: Ground::Foreground
380                }),
381                Token::Text("text".into()),
382            ]
383        );
384    }
385
386    #[test]
387    fn test_tokenize_bg_color_tag() {
388        let tokens = tokenize("[bg:red]text").unwrap();
389        assert_eq!(
390            tokens,
391            vec![
392                Token::Tag(TagType::Color {
393                    color: Color::Named(NamedColor::Red),
394                    ground: Ground::Background
395                }),
396                Token::Text("text".into()),
397            ]
398        );
399    }
400
401    #[test]
402    fn test_tokenize_fg_and_bg_in_same_bracket() {
403        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
404        assert_eq!(
405            tokens,
406            vec![
407                Token::Tag(TagType::Color {
408                    color: Color::Named(NamedColor::White),
409                    ground: Ground::Foreground
410                }),
411                Token::Tag(TagType::Color {
412                    color: Color::Named(NamedColor::Blue),
413                    ground: Ground::Background
414                }),
415                Token::Text("text".into()),
416            ]
417        );
418    }
419
420    #[test]
421    fn test_tokenize_reset_tag() {
422        assert_eq!(tokenize("[/]").unwrap(), vec![Token::Tag(TagType::Reset)]);
423    }
424
425    #[test]
426    fn test_tokenize_compound_tag() {
427        let tokens = tokenize("[bold red]hi").unwrap();
428        assert_eq!(
429            tokens,
430            vec![
431                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
432                Token::Tag(TagType::Color {
433                    color: Color::Named(NamedColor::Red),
434                    ground: Ground::Foreground
435                }),
436                Token::Text("hi".into()),
437            ]
438        );
439    }
440
441    #[test]
442    fn test_tokenize_escaped_bracket_at_start() {
443        let tokens = tokenize("\\[not a tag]").unwrap();
444        assert_eq!(
445            tokens,
446            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
447        );
448    }
449
450    #[test]
451    fn test_tokenize_escaped_bracket_with_prefix() {
452        let tokens = tokenize("before\\[not a tag]").unwrap();
453        assert_eq!(
454            tokens,
455            vec![
456                Token::Text("before".into()),
457                Token::Text("[".into()),
458                Token::Text("not a tag]".into()),
459            ]
460        );
461    }
462
463    #[test]
464    fn test_tokenize_unclosed_tag_returns_error() {
465        assert!(tokenize("[red").is_err());
466    }
467
468    #[test]
469    fn test_tokenize_invalid_tag_name_returns_error() {
470        assert!(tokenize("[fuchsia]").is_err());
471    }
472
473    #[test]
474    fn test_tokenize_text_before_and_after_tag() {
475        let tokens = tokenize("before[red]after").unwrap();
476        assert_eq!(
477            tokens,
478            vec![
479                Token::Text("before".into()),
480                Token::Tag(TagType::Color {
481                    color: Color::Named(NamedColor::Red),
482                    ground: Ground::Foreground
483                }),
484                Token::Text("after".into()),
485            ]
486        );
487    }
488
489    #[test]
490    fn test_tokenize_ansi256_tag() {
491        let tokens = tokenize("[ansi(1)]text").unwrap();
492        assert_eq!(
493            tokens[0],
494            Token::Tag(TagType::Color {
495                color: Color::Ansi256(1),
496                ground: Ground::Foreground,
497            })
498        );
499    }
500
501    #[test]
502    fn test_tokenize_rgb_tag() {
503        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
504        assert_eq!(
505            tokens[0],
506            Token::Tag(TagType::Color {
507                color: Color::Rgb(255, 0, 128),
508                ground: Ground::Foreground,
509            })
510        );
511    }
512
513    #[test]
514    fn test_tokenize_bg_rgb_tag() {
515        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
516        assert_eq!(
517            tokens[0],
518            Token::Tag(TagType::Color {
519                color: Color::Rgb(0, 255, 0),
520                ground: Ground::Background,
521            })
522        );
523    }
524}