Skip to main content

revolt_parser/
lib.rs

1use std::collections::{HashSet, VecDeque};
2
3use logos::Logos;
4
5#[derive(Debug, Clone, Logos, PartialEq)]
6#[logos(skip "\n")]
7#[logos(subpattern id="[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}")]
8pub enum MessageToken<'a> {
9    #[token("\\")]
10    Escape,
11    #[regex("```[^`\n]*", |_| 3)]
12    #[regex("``", |_| 2)]
13    #[regex("`", |_| 1)]
14    CodeblockMarker(usize),
15    #[regex("<@(?&id)>", |lex| &lex.slice()[2..lex.slice().len() - 1])]
16    UserMention(&'a str),
17    #[regex("<%(?&id)>", |lex| &lex.slice()[2..lex.slice().len() - 1],)]
18    RoleMention(&'a str),
19    #[regex("<#(?&id)>", |lex| &lex.slice()[2..lex.slice().len() - 1],)]
20    ChannelMention(&'a str),
21    #[regex(":(?&id):", |lex| &lex.slice()[1..lex.slice().len() - 1],)]
22    Emoji(&'a str),
23    #[token("@everyone")]
24    MentionEveryone,
25    #[token("@online")]
26    MentionOnline,
27}
28
29#[derive(Debug, Clone, PartialEq, Default)]
30pub struct MessageResults {
31    pub user_mentions: HashSet<String>,
32    pub role_mentions: HashSet<String>,
33    pub channel_mentions: HashSet<String>,
34    pub emojis: HashSet<String>,
35    pub mentions_everyone: bool,
36    pub mentions_online: bool,
37}
38
39struct MessageParserIterator<'a, I> {
40    inner: I,
41    temp: VecDeque<MessageToken<'a>>,
42}
43
44impl<'a, I: Iterator<Item = MessageToken<'a>>> Iterator for MessageParserIterator<'a, I> {
45    type Item = MessageToken<'a>;
46
47    fn next(&mut self) -> Option<Self::Item> {
48        if !self.temp.is_empty() {
49            self.temp.pop_front()
50        } else {
51            let token = self.inner.next();
52
53            if token == Some(MessageToken::Escape) {
54                self.inner.next();
55
56                token
57            } else if let Some(MessageToken::CodeblockMarker(ty)) = token {
58                loop {
59                    let next_token = self.inner.next();
60
61                    if next_token == Some(MessageToken::CodeblockMarker(ty)) {
62                        self.temp.clear();
63                        self.temp.push_back(MessageToken::CodeblockMarker(ty));
64                        break next_token;
65                    } else if let Some(token) = next_token {
66                        self.temp.push_back(token);
67                    } else {
68                        break Some(MessageToken::CodeblockMarker(ty));
69                    }
70                }
71            } else {
72                token
73            }
74        }
75    }
76}
77
78pub fn parse_message_iter(text: &str) -> impl Iterator<Item = MessageToken<'_>> + '_ {
79    MessageParserIterator {
80        inner: MessageToken::lexer(text).flatten(),
81        temp: VecDeque::new(),
82    }
83}
84
85pub fn parse_message(text: &str) -> MessageResults {
86    let mut results = MessageResults::default();
87
88    for token in parse_message_iter(text) {
89        match token {
90            MessageToken::Escape => {}
91            MessageToken::CodeblockMarker(_) => {}
92            MessageToken::UserMention(id) => {
93                results.user_mentions.insert(id.to_string());
94            }
95            MessageToken::RoleMention(id) => {
96                results.role_mentions.insert(id.to_string());
97            }
98            MessageToken::ChannelMention(id) => {
99                results.channel_mentions.insert(id.to_string());
100            }
101            MessageToken::Emoji(id) => {
102                results.emojis.insert(id.to_string());
103            }
104            MessageToken::MentionEveryone => results.mentions_everyone = true,
105            MessageToken::MentionOnline => results.mentions_online = true,
106        };
107    }
108
109    results
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn test_no_nodes() {
118        let output = parse_message_iter("Hello everyone").collect::<Vec<_>>();
119
120        assert_eq!(output.len(), 0);
121    }
122
123    #[test]
124    fn test_simple_user_mention() {
125        let output = parse_message_iter("Hello <@01FD58YK5W7QRV5H3D64KTQYX3>.").collect::<Vec<_>>();
126
127        assert_eq!(output.len(), 1);
128        assert_eq!(
129            output[0],
130            MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3")
131        );
132    }
133
134    #[test]
135    fn test_simple_role_mention() {
136        let output = parse_message_iter("Hello <%01FD58YK5W7QRV5H3D64KTQYX3>.").collect::<Vec<_>>();
137
138        assert_eq!(output.len(), 1);
139        assert_eq!(
140            output[0],
141            MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3")
142        );
143    }
144
145    #[test]
146    fn test_mention_everyone() {
147        let output = parse_message_iter("Hello @everyone.").collect::<Vec<_>>();
148
149        assert_eq!(output.len(), 1);
150        assert_eq!(output[0], MessageToken::MentionEveryone);
151    }
152
153    #[test]
154    fn test_mention_online() {
155        let output = parse_message_iter("Hello @online.").collect::<Vec<_>>();
156
157        assert_eq!(output.len(), 1);
158        assert_eq!(output[0], MessageToken::MentionOnline);
159    }
160
161    #[test]
162    fn test_everything() {
163        let output = parse_message_iter("Hello <@01FD58YK5W7QRV5H3D64KTQYX3>, <%01FD58YK5W7QRV5H3D64KTQYX3>, <#01FD58YK5W7QRV5H3D64KTQYX3> @everyone and @online. :01FD58YK5W7QRV5H3D64KTQYX3:").collect::<Vec<_>>();
164
165        assert_eq!(output.len(), 6);
166        assert_eq!(
167            output[0],
168            MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3")
169        );
170        assert_eq!(
171            output[1],
172            MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3")
173        );
174        assert_eq!(
175            output[2],
176            MessageToken::ChannelMention("01FD58YK5W7QRV5H3D64KTQYX3")
177        );
178        assert_eq!(output[3], MessageToken::MentionEveryone);
179        assert_eq!(output[4], MessageToken::MentionOnline);
180        assert_eq!(output[5], MessageToken::Emoji("01FD58YK5W7QRV5H3D64KTQYX3"));
181    }
182
183    #[test]
184    fn test_everything_no_spaces() {
185        let output = parse_message_iter(
186            "<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3><#01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online:01FD58YK5W7QRV5H3D64KTQYX3:",
187        )
188        .collect::<Vec<_>>();
189
190        assert_eq!(output.len(), 6);
191        assert_eq!(
192            output[0],
193            MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3")
194        );
195        assert_eq!(
196            output[1],
197            MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3")
198        );
199        assert_eq!(
200            output[2],
201            MessageToken::ChannelMention("01FD58YK5W7QRV5H3D64KTQYX3")
202        );
203        assert_eq!(output[3], MessageToken::MentionEveryone);
204        assert_eq!(output[4], MessageToken::MentionOnline);
205        assert_eq!(output[5], MessageToken::Emoji("01FD58YK5W7QRV5H3D64KTQYX3"));
206    }
207
208    #[test]
209    fn test_codeblock_no_mentions() {
210        let output = parse_message_iter(
211            "```\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3><#01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online:01FD58YK5W7QRV5H3D64KTQYX3:\n```",
212        )
213        .collect::<Vec<_>>();
214
215        assert_eq!(output.len(), 2);
216        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
217        assert_eq!(output[1], MessageToken::CodeblockMarker(3));
218    }
219
220    #[test]
221    fn test_uncontained_codeblock_should_mention() {
222        let output = parse_message_iter(
223            "```\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3><#01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online:01FD58YK5W7QRV5H3D64KTQYX3:",
224        )
225        .collect::<Vec<_>>();
226
227        assert_eq!(output.len(), 7);
228        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
229        assert_eq!(
230            output[1],
231            MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3")
232        );
233        assert_eq!(
234            output[2],
235            MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3")
236        );
237        assert_eq!(
238            output[3],
239            MessageToken::ChannelMention("01FD58YK5W7QRV5H3D64KTQYX3")
240        );
241        assert_eq!(output[4], MessageToken::MentionEveryone);
242        assert_eq!(output[5], MessageToken::MentionOnline);
243        assert_eq!(output[6], MessageToken::Emoji("01FD58YK5W7QRV5H3D64KTQYX3"));
244    }
245
246    #[test]
247    fn test_inline_codeblock_no_mentions() {
248        let output = parse_message_iter(
249            "`<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3><#01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online:01FD58YK5W7QRV5H3D64KTQYX3:`",
250        )
251        .collect::<Vec<_>>();
252
253        assert_eq!(output.len(), 2);
254        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
255        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
256    }
257
258    #[test]
259    fn test_uncontained_inline_codeblock_should_mention() {
260        let output = parse_message_iter(
261            "`<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3><#01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online:01FD58YK5W7QRV5H3D64KTQYX3:",
262        )
263        .collect::<Vec<_>>();
264
265        assert_eq!(output.len(), 7);
266        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
267        assert_eq!(
268            output[1],
269            MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3")
270        );
271        assert_eq!(
272            output[2],
273            MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3")
274        );
275        assert_eq!(
276            output[3],
277            MessageToken::ChannelMention("01FD58YK5W7QRV5H3D64KTQYX3")
278        );
279        assert_eq!(output[4], MessageToken::MentionEveryone);
280        assert_eq!(output[5], MessageToken::MentionOnline);
281        assert_eq!(output[6], MessageToken::Emoji("01FD58YK5W7QRV5H3D64KTQYX3"));
282    }
283
284    #[test]
285    fn test_codeblock_with_language_no_mentions() {
286        let output = parse_message_iter("```rust\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3><#01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online:01FD58YK5W7QRV5H3D64KTQYX3:```").collect::<Vec<_>>();
287
288        assert_eq!(output.len(), 2);
289        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
290        assert_eq!(output[1], MessageToken::CodeblockMarker(3));
291    }
292
293    #[test]
294    fn test_double_inline_codeblock() {
295        let output = parse_message_iter("``this should not ping @everyone``").collect::<Vec<_>>();
296
297        assert_eq!(output.len(), 2);
298        assert_eq!(output[0], MessageToken::CodeblockMarker(2));
299        assert_eq!(output[1], MessageToken::CodeblockMarker(2));
300    }
301
302    #[test]
303    fn test_double_inline_codeblock_with_backticks_inside() {
304        let output =
305            parse_message_iter("``this `should` not `ping` @everyone``").collect::<Vec<_>>();
306
307        assert_eq!(output.len(), 2);
308        assert_eq!(output[0], MessageToken::CodeblockMarker(2));
309        assert_eq!(output[1], MessageToken::CodeblockMarker(2));
310    }
311
312    #[test]
313    fn test_in_middle() {
314        let output = parse_message_iter("i am not pinging `@everyone`.").collect::<Vec<_>>();
315
316        assert_eq!(output.len(), 2);
317        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
318        assert_eq!(output[1], MessageToken::CodeblockMarker(1));
319    }
320
321    #[test]
322    fn test_escaped_codeblock() {
323        let output =
324            parse_message_iter("i am ~~not~~ pinging \\`@everyone` ok.").collect::<Vec<_>>();
325
326        assert_eq!(output.len(), 3);
327        assert_eq!(output[0], MessageToken::Escape);
328        assert_eq!(output[1], MessageToken::MentionEveryone);
329        assert_eq!(output[2], MessageToken::CodeblockMarker(1));
330    }
331
332    #[test]
333    fn test_escape_mention() {
334        let output = parse_message_iter("i wont ping \\@everyone").collect::<Vec<_>>();
335
336        assert_eq!(output.len(), 1);
337        assert_eq!(output[0], MessageToken::Escape);
338    }
339}