cyndikator_dispatch/dispatch/token/
mod.rs1use super::ParseError;
2
3use nom::{
4 branch::alt,
5 bytes::complete::{escaped, is_a, is_not, tag},
6 character::complete::{char, multispace1, one_of},
7 combinator::{all_consuming, opt},
8 multi::many0,
9 IResult,
10};
11
12#[derive(Debug)]
13pub(crate) enum Token<'input> {
14 Comment {
15 content: &'input str,
16 },
17
18 Str {
19 content: &'input str,
20 interpolated: bool,
21 },
22
23 Ident {
24 content: &'input str,
25 },
26
27 Regex {
28 content: &'input str,
29 flags: &'input str,
30 },
31
32 Begin {
33 sym: char,
34 },
35
36 End {
37 sym: char,
38 },
39
40 Space,
41}
42
43impl<'a> Token<'a> {
44 pub(crate) fn tokenize(input: &'a str) -> Result<Vec<Token<'a>>, ParseError> {
45 all_consuming(many0(alt((
46 parse_comment,
47 parse_str,
48 parse_ident,
49 parse_context,
50 parse_space,
51 parse_regex,
52 ))))(input)
53 .or(Err(ParseError::Tokenize))
54 .map(|s| s.1)
55 }
56
57 pub(crate) fn tokenize_significant(input: &'a str) -> Result<Vec<Token<'a>>, ParseError> {
58 let mut tokens = Token::tokenize(input)?;
59
60 let mut i = 0;
61 while i < tokens.len() {
62 if !tokens[i].is_significant() {
63 tokens.remove(i);
64 } else {
65 i += 1;
66 }
67 }
68
69 Ok(tokens)
70 }
71
72 pub(crate) fn is_significant(&self) -> bool {
73 !matches!(self, Token::Space | Token::Comment { .. })
74 }
75}
76
77fn parse_comment(input: &str) -> IResult<&str, Token> {
78 let (input, _) = tag("#")(input)?;
79 let (input, content) = is_not("\r\n")(input)?;
80
81 Ok((input, Token::Comment { content }))
82}
83
84fn parse_str(input: &str) -> IResult<&str, Token> {
85 let (input, ch) = one_of("\"'")(input)?;
86
87 let (input, content) = match ch {
88 '\'' => {
89 let (input, content) = is_not("'")(input)?;
90 let (input, _) = tag("'")(input)?;
91
92 (input, content)
93 }
94
95 '"' => {
96 let (input, content) = is_not("\"")(input)?;
97 let (input, _) = tag("\"")(input)?;
98
99 (input, content)
100 }
101
102 _ => unreachable!(),
103 };
104
105 let interpolated = ch == '"';
106
107 Ok((
108 input,
109 Token::Str {
110 content,
111 interpolated,
112 },
113 ))
114}
115
116fn parse_ident(input: &str) -> IResult<&str, Token> {
117 let (input, content) = is_a("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_")(input)?;
118
119 Ok((input, Token::Ident { content }))
120}
121
122fn parse_context(input: &str) -> IResult<&str, Token> {
123 let (input, sym) = one_of("{}()")(input)?;
124
125 match sym {
126 '{' => Ok((input, Token::Begin { sym })),
127 '}' => Ok((input, Token::End { sym })),
128 '(' => Ok((input, Token::Begin { sym })),
129 ')' => Ok((input, Token::End { sym })),
130
131 _ => unreachable!(),
132 }
133}
134
135fn parse_space(input: &str) -> IResult<&str, Token> {
136 let (input, _) = multispace1(input)?;
137
138 Ok((input, Token::Space))
139}
140
141fn parse_regex(input: &str) -> IResult<&str, Token> {
142 let (input, _) = char('/')(input)?;
143
144 let (input, content) = escaped(
145 is_not("/\\"),
146 '\\',
147 one_of("/npbdwsDWSBAZ\\*+?()|[]{}^$aftrvuU"),
148 )(input)?;
149
150 let (input, _) = char('/')(input)?;
151
152 let (input, flags) = opt(is_a("i"))(input)?;
153 let flags = flags.unwrap_or_default();
154
155 Ok((input, Token::Regex { content, flags }))
156}