Skip to main content

farben_core/
lexer.rs

1use crate::{
2    ansi::{Color, NamedColor},
3    errors::LexError,
4};
5
6/// A text emphasis modifier supported by farben markup.
7#[derive(Debug, PartialEq)]
8pub enum EmphasisType {
9    Dim,
10    Italic,
11    Underline,
12    Bold,
13    Strikethrough,
14    Blink,
15}
16
17impl EmphasisType {
18    /// Parses an emphasis keyword into an `EmphasisType`.
19    ///
20    /// Returns `None` if the string is not a recognized emphasis name.
21    /// Matching is case-sensitive.
22    fn from_str(input: &str) -> Option<Self> {
23        match input {
24            "dim" => Some(Self::Dim),
25            "italic" => Some(Self::Italic),
26            "underline" => Some(Self::Underline),
27            "bold" => Some(Self::Bold),
28            "strikethrough" => Some(Self::Strikethrough),
29            "blink" => Some(Self::Blink),
30            _ => None,
31        }
32    }
33}
34
35/// The kind of styling operation a tag represents.
36#[derive(Debug, PartialEq)]
37pub enum TagType {
38    /// Resets all active styles (`[/]`).
39    Reset,
40    /// Applies a text emphasis attribute.
41    Emphasis(EmphasisType),
42    /// Sets the foreground color.
43    Color(Color),
44}
45
46/// A single unit produced by the tokenizer: either a styling tag or a run of plain text.
47#[derive(Debug, PartialEq)]
48pub enum Token {
49    Tag(TagType),
50    Text(String),
51}
52
53/// Parses a single whitespace-delimited tag part into a `TagType`.
54///
55/// Recognizes:
56/// - `/` as a reset
57/// - Named colors (`red`, `blue`, etc.)
58/// - Emphasis keywords (`bold`, `italic`, etc.)
59/// - `ansi(N)` for ANSI 256-palette colors
60/// - `rgb(R,G,B)` for true-color values
61///
62/// # Errors
63///
64/// Returns `LexError::InvalidTag` if the part matches none of the above forms.
65/// Returns `LexError::InvalidValue` if a numeric argument cannot be parsed.
66/// Returns `LexError::InvalidArgumentCount` if `rgb(...)` does not receive exactly three values.
67fn parse_part(part: &str) -> Result<TagType, LexError> {
68    if part == "/" {
69        Ok(TagType::Reset)
70    } else if let Some(color) = NamedColor::from_str(part) {
71        Ok(TagType::Color(Color::Named(color)))
72    } else if let Some(emphasis) = EmphasisType::from_str(part) {
73        Ok(TagType::Emphasis(emphasis))
74    } else if let Some(ansi_val) = part.strip_prefix("ansi(").and_then(|s| s.strip_suffix(")")) {
75        match ansi_val.trim().parse::<u8>() {
76            Ok(code) => Ok(TagType::Color(Color::Ansi256(code))),
77            Err(_) => Err(LexError::InvalidValue(ansi_val.to_string())),
78        }
79    } else if let Some(rgb_val) = part.strip_prefix("rgb(").and_then(|s| s.strip_suffix(")")) {
80        let parts: Result<Vec<u8>, _> =
81            rgb_val.split(',').map(|v| v.trim().parse::<u8>()).collect();
82        match parts {
83            Ok(v) if v.len() == 3 => Ok(TagType::Color(Color::Rgb(v[0], v[1], v[2]))),
84            Ok(v) => Err(LexError::InvalidArgumentCount {
85                expected: 3,
86                got: v.len(),
87            }),
88            Err(_) => Err(LexError::InvalidValue(rgb_val.to_string())),
89        }
90    } else {
91        Err(LexError::InvalidTag(part.to_string()))
92    }
93}
94
95/// Splits a raw tag string on whitespace and parses each part into a `TagType`.
96///
97/// A tag like `"bold red"` produces two `TagType` values.
98///
99/// # Errors
100///
101/// Propagates any error from `parse_part`.
102fn parse_tag(raw_tag: &str) -> Result<Vec<TagType>, LexError> {
103    raw_tag.split_whitespace().map(parse_part).collect()
104}
105
106/// Tokenizes a farben markup string into a sequence of `Token`s.
107///
108/// Tags are delimited by `[` and `]`. A `[` preceded by `\` is treated as a literal
109/// bracket rather than the start of a tag.
110///
111/// # Errors
112///
113/// Returns `LexError::UnclosedTag` if a `[` has no matching `]`.
114/// Returns any error produced by `parse_tag` for malformed tag contents.
115///
116/// # Example
117///
118/// ```ignore
119/// let tokens = tokenize("[red]hello")?;
120/// // => [Token::Tag(TagType::Color(Color::Named(NamedColor::Red))), Token::Text("hello".into())]
121/// ```
122pub fn tokenize(input: impl Into<String>) -> Result<Vec<Token>, LexError> {
123    let mut tokens: Vec<Token> = Vec::new();
124    let input = input.into();
125    let mut pos = 0;
126    loop {
127        let Some(starting) = input[pos..].find('[') else {
128            if pos < input.len() {
129                tokens.push(Token::Text(input[pos..].to_string()));
130            }
131            break;
132        };
133        let abs_starting = starting + pos;
134        // escape logic
135        if abs_starting > 0 && input[abs_starting - 1..abs_starting] == "\\".to_string() {
136            let before = &input[pos..abs_starting - 1];
137            if !before.is_empty() {
138                tokens.push(Token::Text(before.to_string()));
139            }
140            tokens.push(Token::Text(String::from('[')));
141            pos = abs_starting + 1;
142            continue;
143        }
144
145        if pos != abs_starting {
146            tokens.push(Token::Text(input[pos..abs_starting].to_string()));
147        }
148
149        let Some(closing) = input[abs_starting..].find(']') else {
150            return Err(LexError::UnclosedTag);
151        };
152        let abs_closing = closing + abs_starting;
153        let raw_tag = &input[abs_starting + 1..abs_closing];
154        for tag in parse_tag(raw_tag)? {
155            tokens.push(Token::Tag(tag));
156        }
157        pos = abs_closing + 1;
158    }
159    Ok(tokens)
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165    use crate::ansi::{Color, NamedColor};
166
167    // --- EmphasisType::from_str ---
168
169    #[test]
170    fn test_emphasis_from_str_all_known() {
171        assert_eq!(EmphasisType::from_str("dim"), Some(EmphasisType::Dim));
172        assert_eq!(EmphasisType::from_str("italic"), Some(EmphasisType::Italic));
173        assert_eq!(
174            EmphasisType::from_str("underline"),
175            Some(EmphasisType::Underline)
176        );
177        assert_eq!(EmphasisType::from_str("bold"), Some(EmphasisType::Bold));
178        assert_eq!(
179            EmphasisType::from_str("strikethrough"),
180            Some(EmphasisType::Strikethrough)
181        );
182        assert_eq!(EmphasisType::from_str("blink"), Some(EmphasisType::Blink));
183    }
184
185    #[test]
186    fn test_emphasis_from_str_unknown_returns_none() {
187        assert_eq!(EmphasisType::from_str("flash"), None);
188    }
189
190    #[test]
191    fn test_emphasis_from_str_case_sensitive() {
192        assert_eq!(EmphasisType::from_str("Bold"), None);
193    }
194
195    // --- parse_part ---
196
197    #[test]
198    fn test_parse_part_reset() {
199        let result = parse_part("/");
200        assert_eq!(result.unwrap(), TagType::Reset);
201    }
202
203    #[test]
204    fn test_parse_part_named_color() {
205        let result = parse_part("red");
206        assert_eq!(
207            result.unwrap(),
208            TagType::Color(Color::Named(NamedColor::Red))
209        );
210    }
211
212    #[test]
213    fn test_parse_part_emphasis_bold() {
214        let result = parse_part("bold");
215        assert_eq!(result.unwrap(), TagType::Emphasis(EmphasisType::Bold));
216    }
217
218    #[test]
219    fn test_parse_part_ansi256_valid() {
220        let result = parse_part("ansi(200)");
221        assert_eq!(result.unwrap(), TagType::Color(Color::Ansi256(200)));
222    }
223
224    #[test]
225    fn test_parse_part_ansi256_with_whitespace() {
226        let result = parse_part("ansi( 42 )");
227        assert_eq!(result.unwrap(), TagType::Color(Color::Ansi256(42)));
228    }
229
230    #[test]
231    fn test_parse_part_ansi256_invalid_value() {
232        let result = parse_part("ansi(abc)");
233        assert!(result.is_err());
234    }
235
236    #[test]
237    fn test_parse_part_rgb_valid() {
238        let result = parse_part("rgb(255,128,0)");
239        assert_eq!(result.unwrap(), TagType::Color(Color::Rgb(255, 128, 0)));
240    }
241
242    #[test]
243    fn test_parse_part_rgb_with_spaces() {
244        let result = parse_part("rgb( 10 , 20 , 30 )");
245        assert_eq!(result.unwrap(), TagType::Color(Color::Rgb(10, 20, 30)));
246    }
247
248    #[test]
249    fn test_parse_part_rgb_wrong_arg_count() {
250        let result = parse_part("rgb(1,2)");
251        assert!(result.is_err());
252        if let Err(crate::errors::LexError::InvalidArgumentCount { expected, got }) = result {
253            assert_eq!(expected, 3);
254            assert_eq!(got, 2);
255        }
256    }
257
258    #[test]
259    fn test_parse_part_rgb_invalid_value() {
260        let result = parse_part("rgb(r,g,b)");
261        assert!(result.is_err());
262    }
263
264    #[test]
265    fn test_parse_part_unknown_tag_returns_error() {
266        let result = parse_part("fuchsia");
267        assert!(result.is_err());
268    }
269
270    // --- tokenize ---
271
272    #[test]
273    fn test_tokenize_plain_text() {
274        let result = tokenize("hello world");
275        assert!(result.is_ok());
276        let tokens = result.unwrap();
277        assert_eq!(tokens, vec![Token::Text("hello world".into())]);
278    }
279
280    #[test]
281    fn test_tokenize_empty_string() {
282        let result = tokenize("");
283        assert!(result.is_ok());
284        assert!(result.unwrap().is_empty());
285    }
286
287    #[test]
288    fn test_tokenize_single_color_tag() {
289        let result = tokenize("[red]text");
290        assert!(result.is_ok());
291        let tokens = result.unwrap();
292        assert_eq!(
293            tokens,
294            vec![
295                Token::Tag(TagType::Color(Color::Named(NamedColor::Red))),
296                Token::Text("text".into()),
297            ]
298        );
299    }
300
301    #[test]
302    fn test_tokenize_reset_tag() {
303        let result = tokenize("[/]");
304        assert!(result.is_ok());
305        assert_eq!(result.unwrap(), vec![Token::Tag(TagType::Reset)]);
306    }
307
308    #[test]
309    fn test_tokenize_compound_tag() {
310        let result = tokenize("[bold red]hi");
311        assert!(result.is_ok());
312        let tokens = result.unwrap();
313        assert_eq!(
314            tokens,
315            vec![
316                Token::Tag(TagType::Emphasis(EmphasisType::Bold)),
317                Token::Tag(TagType::Color(Color::Named(NamedColor::Red))),
318                Token::Text("hi".into()),
319            ]
320        );
321    }
322
323    #[test]
324    fn test_tokenize_escaped_bracket_at_start() {
325        let result = tokenize("\\[not a tag]");
326        assert!(result.is_ok());
327        let tokens = result.unwrap();
328        assert_eq!(
329            tokens,
330            vec![Token::Text("[".into()), Token::Text("not a tag]".into()),]
331        );
332    }
333
334    #[test]
335    fn test_tokenize_escaped_bracket_with_prefix() {
336        let result = tokenize("before\\[not a tag]");
337        assert!(result.is_ok());
338        let tokens = result.unwrap();
339        assert_eq!(
340            tokens,
341            vec![
342                Token::Text("before".into()),
343                Token::Text("[".into()),
344                Token::Text("not a tag]".into()),
345            ]
346        );
347    }
348
349    #[test]
350    fn test_tokenize_unclosed_tag_returns_error() {
351        let result = tokenize("[red");
352        assert!(result.is_err());
353    }
354
355    #[test]
356    fn test_tokenize_invalid_tag_name_returns_error() {
357        let result = tokenize("[fuchsia]");
358        assert!(result.is_err());
359    }
360
361    #[test]
362    fn test_tokenize_text_before_and_after_tag() {
363        let result = tokenize("before[red]after");
364        assert!(result.is_ok());
365        let tokens = result.unwrap();
366        assert_eq!(
367            tokens,
368            vec![
369                Token::Text("before".into()),
370                Token::Tag(TagType::Color(Color::Named(NamedColor::Red))),
371                Token::Text("after".into()),
372            ]
373        );
374    }
375
376    #[test]
377    fn test_tokenize_ansi256_tag() {
378        let result = tokenize("[ansi(1)]text");
379        assert!(result.is_ok());
380        let tokens = result.unwrap();
381        assert_eq!(tokens[0], Token::Tag(TagType::Color(Color::Ansi256(1))));
382    }
383
384    #[test]
385    fn test_tokenize_rgb_tag() {
386        let result = tokenize("[rgb(255,0,128)]text");
387        assert!(result.is_ok());
388        let tokens = result.unwrap();
389        assert_eq!(
390            tokens[0],
391            Token::Tag(TagType::Color(Color::Rgb(255, 0, 128)))
392        );
393    }
394}
395
396// Skipped (side effects): none: all functions in lexer.rs are pure.