cyndikator_dispatch/dispatch/token/
mod.rs

1use super::ParseError;
2
3use nom::{
4    branch::alt,
5    bytes::complete::{escaped, is_a, is_not, tag},
6    character::complete::{char, multispace1, one_of},
7    combinator::{all_consuming, opt},
8    multi::many0,
9    IResult,
10};
11
12#[derive(Debug)]
13pub(crate) enum Token<'input> {
14    Comment {
15        content: &'input str,
16    },
17
18    Str {
19        content: &'input str,
20        interpolated: bool,
21    },
22
23    Ident {
24        content: &'input str,
25    },
26
27    Regex {
28        content: &'input str,
29        flags: &'input str,
30    },
31
32    Begin {
33        sym: char,
34    },
35
36    End {
37        sym: char,
38    },
39
40    Space,
41}
42
43impl<'a> Token<'a> {
44    pub(crate) fn tokenize(input: &'a str) -> Result<Vec<Token<'a>>, ParseError> {
45        all_consuming(many0(alt((
46            parse_comment,
47            parse_str,
48            parse_ident,
49            parse_context,
50            parse_space,
51            parse_regex,
52        ))))(input)
53        .or(Err(ParseError::Tokenize))
54        .map(|s| s.1)
55    }
56
57    pub(crate) fn tokenize_significant(input: &'a str) -> Result<Vec<Token<'a>>, ParseError> {
58        let mut tokens = Token::tokenize(input)?;
59
60        let mut i = 0;
61        while i < tokens.len() {
62            if !tokens[i].is_significant() {
63                tokens.remove(i);
64            } else {
65                i += 1;
66            }
67        }
68
69        Ok(tokens)
70    }
71
72    pub(crate) fn is_significant(&self) -> bool {
73        !matches!(self, Token::Space | Token::Comment { .. })
74    }
75}
76
77fn parse_comment(input: &str) -> IResult<&str, Token> {
78    let (input, _) = tag("#")(input)?;
79    let (input, content) = is_not("\r\n")(input)?;
80
81    Ok((input, Token::Comment { content }))
82}
83
84fn parse_str(input: &str) -> IResult<&str, Token> {
85    let (input, ch) = one_of("\"'")(input)?;
86
87    let (input, content) = match ch {
88        '\'' => {
89            let (input, content) = is_not("'")(input)?;
90            let (input, _) = tag("'")(input)?;
91
92            (input, content)
93        }
94
95        '"' => {
96            let (input, content) = is_not("\"")(input)?;
97            let (input, _) = tag("\"")(input)?;
98
99            (input, content)
100        }
101
102        _ => unreachable!(),
103    };
104
105    let interpolated = ch == '"';
106
107    Ok((
108        input,
109        Token::Str {
110            content,
111            interpolated,
112        },
113    ))
114}
115
116fn parse_ident(input: &str) -> IResult<&str, Token> {
117    let (input, content) = is_a("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")(input)?;
118
119    Ok((input, Token::Ident { content }))
120}
121
122fn parse_context(input: &str) -> IResult<&str, Token> {
123    let (input, sym) = one_of("{}()")(input)?;
124
125    match sym {
126        '{' => Ok((input, Token::Begin { sym })),
127        '}' => Ok((input, Token::End { sym })),
128        '(' => Ok((input, Token::Begin { sym })),
129        ')' => Ok((input, Token::End { sym })),
130
131        _ => unreachable!(),
132    }
133}
134
135fn parse_space(input: &str) -> IResult<&str, Token> {
136    let (input, _) = multispace1(input)?;
137
138    Ok((input, Token::Space))
139}
140
141fn parse_regex(input: &str) -> IResult<&str, Token> {
142    let (input, _) = char('/')(input)?;
143
144    let (input, content) = escaped(
145        is_not("/\\"),
146        '\\',
147        one_of("/npbdwsDWSBAZ\\*+?()|[]{}^$aftrvuU"),
148    )(input)?;
149
150    let (input, _) = char('/')(input)?;
151
152    let (input, flags) = opt(is_a("i"))(input)?;
153    let flags = flags.unwrap_or_default();
154
155    Ok((input, Token::Regex { content, flags }))
156}