Skip to main content

farben_core/
lexer.rs

1use crate::{
2    ansi::{Color, Ground, NamedColor},
3    errors::LexError,
4};
5
6/// A text emphasis modifier supported by farben markup.
7#[derive(Debug, PartialEq)]
8pub enum EmphasisType {
9    Dim,
10    Italic,
11    Underline,
12    Bold,
13    Strikethrough,
14    Blink,
15}
16
17impl EmphasisType {
18    /// Parses an emphasis keyword into an `EmphasisType`.
19    ///
20    /// Returns `None` if the string is not a recognized emphasis name.
21    /// Matching is case-sensitive.
22    fn from_str(input: &str) -> Option<Self> {
23        match input {
24            "dim" => Some(Self::Dim),
25            "italic" => Some(Self::Italic),
26            "underline" => Some(Self::Underline),
27            "bold" => Some(Self::Bold),
28            "strikethrough" => Some(Self::Strikethrough),
29            "blink" => Some(Self::Blink),
30            _ => None,
31        }
32    }
33}
34
35/// The kind of styling operation a tag represents.
36#[derive(Debug, PartialEq)]
37pub enum TagType {
38    /// Resets all active styles (`[/]`).
39    Reset,
40    /// Applies a text emphasis attribute.
41    Emphasis(EmphasisType),
42    /// Sets the foreground color.
43    Color { color: Color, ground: Ground },
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    Tag(TagType),
50    Text(String),
51}
52
53/// Parses a single whitespace-delimited tag part into a `TagType`.
54///
55/// Recognizes:
56/// - `/` as a reset
57/// - Named colors (`red`, `blue`, etc.)
58/// - Emphasis keywords (`bold`, `italic`, etc.)
59/// - `ansi(N)` for ANSI 256-palette colors
60/// - `rgb(R,G,B)` for true-color values
61///
62/// # Errors
63///
64/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
65/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
66/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
67fn parse_part(part: &str) -> Result<TagType, LexError> {
68    let (ground, part) = if let Some(rest) = part.strip_prefix("bg:") {
69        (Ground::Background, rest)
70    } else if let Some(rest) = part.strip_prefix("fg:") {
71        (Ground::Foreground, rest)
72    } else {
73        (Ground::Foreground, part)
74    };
75    if part == "/" {
76        Ok(TagType::Reset)
77    } else if let Some(color) = NamedColor::from_str(part) {
78        Ok(TagType::Color {
79            color: Color::Named(color),
80            ground,
81        })
82    } else if let Some(emphasis) = EmphasisType::from_str(part) {
83        Ok(TagType::Emphasis(emphasis))
84    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
85        match ansi_val.trim().parse::<u8>() {
86            Ok(code) => Ok(TagType::Color {
87                color: Color::Ansi256(code),
88                ground,
89            }),
90            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
91        }
92    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
93        let parts: Result<Vec<u8>, _> =
94            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
95        match parts {
96            Ok(v) if v.len() == 3 => Ok(TagType::Color {
97                color: Color::Rgb(v[0], v[1], v[2]),
98                ground,
99            }),
100            Ok(v) => Err(LexError::InvalidArgumentCount {
101                expected: 3,
102                got: v.len(),
103            }),
104            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
105        }
106    } else {
107        Err(LexError::InvalidTag(part.to_string()))
108    }
109}
110
111/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
112///
113/// A tag like `"bold red"` produces two `TagType` values.
114///
115/// # Errors
116///
117/// Propagates any error from `parse_part`.
118fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
119    raw_tag.split_whitespace().map(parse_part).collect()
120}
121
122/// Tokenizes a farben markup string into a sequence of `Token`s.
123///
124/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
125/// bracket rather than the start of a tag.
126///
127/// # Errors
128///
129/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
130/// Returns any error produced by `parse_tag` for malformed tag contents.
131///
132/// # Example
133///
134/// ```ignore
135/// let tokens = tokenize("[red]hello")?;
136/// // => [Token::Tag(TagType::Color(Color::Named(NamedColor::Red))), Token::Text("hello".into())]
137/// ```
138pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
139    let mut tokens: Vec<Token> = Vec::new();
140    let input = input.into();
141    let mut pos = 0;
142    loop {
143        let Some(starting) = input[pos..].find('[') else {
144            if pos < input.len() {
145                tokens.push(Token::Text(input[pos..].to_string()));
146            }
147            break;
148        };
149        let abs_starting = starting + pos;
150        // escape logic
151        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
152            let before = &input[pos..abs_starting - 1];
153            if !before.is_empty() {
154                tokens.push(Token::Text(before.to_string()));
155            }
156            tokens.push(Token::Text(String::from('[')));
157            pos = abs_starting + 1;
158            continue;
159        }
160
161        if pos != abs_starting {
162            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
163        }
164
165        let Some(closing) = input[abs_starting..].find(']') else {
166            return Err(LexError::UnclosedTag);
167        };
168        let abs_closing = closing + abs_starting;
169        let raw_tag = &input[abs_starting + 1..abs_closing];
170        for tag in parse_tag(raw_tag)? {
171            tokens.push(Token::Tag(tag));
172        }
173        pos = abs_closing + 1;
174    }
175    Ok(tokens)
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181    use crate::ansi::{Color, Ground, NamedColor};
182
183    // --- EmphasisType::from_str ---
184
185    #[test]
186    fn test_emphasis_from_str_all_known() {
187        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
188        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
189        assert_eq!(
190            EmphasisType::from_str("underline"),
191            Some(EmphasisType::Underline)
192        );
193        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
194        assert_eq!(
195            EmphasisType::from_str("strikethrough"),
196            Some(EmphasisType::Strikethrough)
197        );
198        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
199    }
200
201    #[test]
202    fn test_emphasis_from_str_unknown_returns_none() {
203        assert_eq!(EmphasisType::from_str("flash"), None);
204    }
205
206    #[test]
207    fn test_emphasis_from_str_case_sensitive() {
208        assert_eq!(EmphasisType::from_str("Bold"), None);
209    }
210
211    // --- parse_part ---
212
213    #[test]
214    fn test_parse_part_reset() {
215        assert_eq!(parse_part("/").unwrap(), TagType::Reset);
216    }
217
218    #[test]
219    fn test_parse_part_named_color_foreground_default() {
220        assert_eq!(
221            parse_part("red").unwrap(),
222            TagType::Color {
223                color: Color::Named(NamedColor::Red),
224                ground: Ground::Foreground,
225            }
226        );
227    }
228
229    #[test]
230    fn test_parse_part_named_color_explicit_fg() {
231        assert_eq!(
232            parse_part("fg:red").unwrap(),
233            TagType::Color {
234                color: Color::Named(NamedColor::Red),
235                ground: Ground::Foreground,
236            }
237        );
238    }
239
240    #[test]
241    fn test_parse_part_named_color_bg() {
242        assert_eq!(
243            parse_part("bg:red").unwrap(),
244            TagType::Color {
245                color: Color::Named(NamedColor::Red),
246                ground: Ground::Background,
247            }
248        );
249    }
250
251    #[test]
252    fn test_parse_part_emphasis_bold() {
253        assert_eq!(
254            parse_part("bold").unwrap(),
255            TagType::Emphasis(EmphasisType::Bold)
256        );
257    }
258
259    #[test]
260    fn test_parse_part_ansi256_valid() {
261        assert_eq!(
262            parse_part("ansi(200)").unwrap(),
263            TagType::Color {
264                color: Color::Ansi256(200),
265                ground: Ground::Foreground,
266            }
267        );
268    }
269
270    #[test]
271    fn test_parse_part_ansi256_bg() {
272        assert_eq!(
273            parse_part("bg:ansi(200)").unwrap(),
274            TagType::Color {
275                color: Color::Ansi256(200),
276                ground: Ground::Background,
277            }
278        );
279    }
280
281    #[test]
282    fn test_parse_part_ansi256_with_whitespace() {
283        assert_eq!(
284            parse_part("ansi( 42 )").unwrap(),
285            TagType::Color {
286                color: Color::Ansi256(42),
287                ground: Ground::Foreground,
288            }
289        );
290    }
291
292    #[test]
293    fn test_parse_part_ansi256_invalid_value() {
294        assert!(parse_part("ansi(abc)").is_err());
295    }
296
297    #[test]
298    fn test_parse_part_rgb_valid() {
299        assert_eq!(
300            parse_part("rgb(255,128,0)").unwrap(),
301            TagType::Color {
302                color: Color::Rgb(255, 128, 0),
303                ground: Ground::Foreground,
304            }
305        );
306    }
307
308    #[test]
309    fn test_parse_part_rgb_bg() {
310        assert_eq!(
311            parse_part("bg:rgb(255,128,0)").unwrap(),
312            TagType::Color {
313                color: Color::Rgb(255, 128, 0),
314                ground: Ground::Background,
315            }
316        );
317    }
318
319    #[test]
320    fn test_parse_part_rgb_with_spaces() {
321        assert_eq!(
322            parse_part("rgb( 10 , 20 , 30 )").unwrap(),
323            TagType::Color {
324                color: Color::Rgb(10, 20, 30),
325                ground: Ground::Foreground,
326            }
327        );
328    }
329
330    #[test]
331    fn test_parse_part_rgb_wrong_arg_count() {
332        let result = parse_part("rgb(1,2)");
333        assert!(result.is_err());
334        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
335            assert_eq!(expected, 3);
336            assert_eq!(got, 2);
337        }
338    }
339
340    #[test]
341    fn test_parse_part_rgb_invalid_value() {
342        assert!(parse_part("rgb(r,g,b)").is_err());
343    }
344
345    #[test]
346    fn test_parse_part_unknown_tag_returns_error() {
347        assert!(parse_part("fuchsia").is_err());
348    }
349
350    // --- tokenize ---
351
352    #[test]
353    fn test_tokenize_plain_text() {
354        let tokens = tokenize("hello world").unwrap();
355        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
356    }
357
358    #[test]
359    fn test_tokenize_empty_string() {
360        assert!(tokenize("").unwrap().is_empty());
361    }
362
363    #[test]
364    fn test_tokenize_single_color_tag() {
365        let tokens = tokenize("[red]text").unwrap();
366        assert_eq!(
367            tokens,
368            vec![
369                Token::Tag(TagType::Color {
370                    color: Color::Named(NamedColor::Red),
371                    ground: Ground::Foreground
372                }),
373                Token::Text("text".into()),
374            ]
375        );
376    }
377
378    #[test]
379    fn test_tokenize_bg_color_tag() {
380        let tokens = tokenize("[bg:red]text").unwrap();
381        assert_eq!(
382            tokens,
383            vec![
384                Token::Tag(TagType::Color {
385                    color: Color::Named(NamedColor::Red),
386                    ground: Ground::Background
387                }),
388                Token::Text("text".into()),
389            ]
390        );
391    }
392
393    #[test]
394    fn test_tokenize_fg_and_bg_in_same_bracket() {
395        let tokens = tokenize("[fg:white bg:blue]text").unwrap();
396        assert_eq!(
397            tokens,
398            vec![
399                Token::Tag(TagType::Color {
400                    color: Color::Named(NamedColor::White),
401                    ground: Ground::Foreground
402                }),
403                Token::Tag(TagType::Color {
404                    color: Color::Named(NamedColor::Blue),
405                    ground: Ground::Background
406                }),
407                Token::Text("text".into()),
408            ]
409        );
410    }
411
412    #[test]
413    fn test_tokenize_reset_tag() {
414        assert_eq!(tokenize("[/]").unwrap(), vec![Token::Tag(TagType::Reset)]);
415    }
416
417    #[test]
418    fn test_tokenize_compound_tag() {
419        let tokens = tokenize("[bold red]hi").unwrap();
420        assert_eq!(
421            tokens,
422            vec![
423                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
424                Token::Tag(TagType::Color {
425                    color: Color::Named(NamedColor::Red),
426                    ground: Ground::Foreground
427                }),
428                Token::Text("hi".into()),
429            ]
430        );
431    }
432
433    #[test]
434    fn test_tokenize_escaped_bracket_at_start() {
435        let tokens = tokenize("\\[not a tag]").unwrap();
436        assert_eq!(
437            tokens,
438            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
439        );
440    }
441
442    #[test]
443    fn test_tokenize_escaped_bracket_with_prefix() {
444        let tokens = tokenize("before\\[not a tag]").unwrap();
445        assert_eq!(
446            tokens,
447            vec![
448                Token::Text("before".into()),
449                Token::Text("[".into()),
450                Token::Text("not a tag]".into()),
451            ]
452        );
453    }
454
455    #[test]
456    fn test_tokenize_unclosed_tag_returns_error() {
457        assert!(tokenize("[red").is_err());
458    }
459
460    #[test]
461    fn test_tokenize_invalid_tag_name_returns_error() {
462        assert!(tokenize("[fuchsia]").is_err());
463    }
464
465    #[test]
466    fn test_tokenize_text_before_and_after_tag() {
467        let tokens = tokenize("before[red]after").unwrap();
468        assert_eq!(
469            tokens,
470            vec![
471                Token::Text("before".into()),
472                Token::Tag(TagType::Color {
473                    color: Color::Named(NamedColor::Red),
474                    ground: Ground::Foreground
475                }),
476                Token::Text("after".into()),
477            ]
478        );
479    }
480
481    #[test]
482    fn test_tokenize_ansi256_tag() {
483        let tokens = tokenize("[ansi(1)]text").unwrap();
484        assert_eq!(
485            tokens[0],
486            Token::Tag(TagType::Color {
487                color: Color::Ansi256(1),
488                ground: Ground::Foreground,
489            })
490        );
491    }
492
493    #[test]
494    fn test_tokenize_rgb_tag() {
495        let tokens = tokenize("[rgb(255,0,128)]text").unwrap();
496        assert_eq!(
497            tokens[0],
498            Token::Tag(TagType::Color {
499                color: Color::Rgb(255, 0, 128),
500                ground: Ground::Foreground,
501            })
502        );
503    }
504
505    #[test]
506    fn test_tokenize_bg_rgb_tag() {
507        let tokens = tokenize("[bg:rgb(0,255,0)]text").unwrap();
508        assert_eq!(
509            tokens[0],
510            Token::Tag(TagType::Color {
511                color: Color::Rgb(0, 255, 0),
512                ground: Ground::Background,
513            })
514        );
515    }
516}