revolt_parser/
lib.rs

1use std::collections::{HashSet, VecDeque};
2
3use logos::Logos;
4
5#[derive(Debug, Clone, Logos, PartialEq)]
6#[logos(skip "\n")]
7#[logos(subpattern id="[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}")]
8pub enum MessageToken<'a> {
9    #[token("\\")]
10    Escape,
11    #[regex("```[^`\n]*", |_| 3)]
12    #[regex("``", |_| 2)]
13    #[regex("`", |_| 1)]
14    CodeblockMarker(usize),
15    #[regex("<@(?&id)>", |lex| &lex.slice()[2..lex.slice().len() - 1])]
16    UserMention(&'a str),
17    #[regex("<%(?&id)>", |lex| &lex.slice()[2..lex.slice().len() - 1],)]
18    RoleMention(&'a str),
19    #[token("@everyone")]
20    MentionEveryone,
21    #[token("@online")]
22    MentionOnline
23}
24
25#[derive(Debug, Clone, PartialEq, Default)]
26pub struct MessageResults {
27    pub user_mentions: HashSet<String>,
28    pub role_mentions: HashSet<String>,
29    pub mentions_everyone: bool,
30    pub mentions_online: bool
31}
32
33struct MessageParserIterator<'a, I> {
34    inner: I,
35    temp: VecDeque<MessageToken<'a>>
36}
37
38impl<'a, I: Iterator<Item = MessageToken<'a>>> Iterator for MessageParserIterator<'a, I> {
39    type Item = MessageToken<'a>;
40
41    fn next(&mut self) -> Option<Self::Item> {
42        if !self.temp.is_empty() {
43            self.temp.pop_front()
44        } else {
45            let token = self.inner.next();
46
47            if token == Some(MessageToken::Escape) {
48                self.inner.next();
49
50                token
51            } else if let Some(MessageToken::CodeblockMarker(ty)) = token {
52                loop {
53                    let next_token = self.inner.next();
54
55                    if next_token == Some(MessageToken::CodeblockMarker(ty)) {
56                        self.temp.clear();
57                        self.temp.push_back(MessageToken::CodeblockMarker(ty));
58                        break next_token
59                    } else if let Some(token) = next_token {
60                        self.temp.push_back(token);
61                    } else {
62                        break Some(MessageToken::CodeblockMarker(ty))
63                    }
64                }
65            } else {
66                token
67            }
68        }
69    }
70}
71
72pub fn parse_message_iter(text: &str) -> impl Iterator<Item = MessageToken> + '_ {
73    MessageParserIterator {
74        inner: MessageToken::lexer(text).flatten(),
75        temp: VecDeque::new()
76    }
77}
78
79pub fn parse_message(text: &str) -> MessageResults {
80    let mut results = MessageResults::default();
81
82    for token in parse_message_iter(text) {
83        match token {
84            MessageToken::Escape => {}
85            MessageToken::CodeblockMarker(_) => {},
86            MessageToken::UserMention(id) => { results.user_mentions.insert(id.to_string()); },
87            MessageToken::RoleMention(id) => { results.role_mentions.insert(id.to_string()); },
88            MessageToken::MentionEveryone => results.mentions_everyone = true,
89            MessageToken::MentionOnline => results.mentions_online = true,
90        };
91    };
92
93    results
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99
100    #[test]
101    fn test_no_nodes() {
102        let output = parse_message_iter("Hello everyone").collect::<Vec<_>>();
103
104        assert_eq!(output.len(), 0);
105    }
106
107    #[test]
108    fn test_simple_user_mention() {
109        let output = parse_message_iter("Hello <@01FD58YK5W7QRV5H3D64KTQYX3>.").collect::<Vec<_>>();
110
111        assert_eq!(output.len(), 1);
112        assert_eq!(output[0], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3"));
113    }
114
115    #[test]
116    fn test_simple_role_mention() {
117        let output = parse_message_iter("Hello <%01FD58YK5W7QRV5H3D64KTQYX3>.").collect::<Vec<_>>();
118
119        assert_eq!(output.len(), 1);
120        assert_eq!(output[0], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3"));
121    }
122
123    #[test]
124    fn test_mention_everyone() {
125        let output = parse_message_iter("Hello @everyone.").collect::<Vec<_>>();
126
127        assert_eq!(output.len(), 1);
128        assert_eq!(output[0], MessageToken::MentionEveryone);
129    }
130
131    #[test]
132    fn test_mention_online() {
133        let output = parse_message_iter("Hello @online.").collect::<Vec<_>>();
134
135        assert_eq!(output.len(), 1);
136        assert_eq!(output[0], MessageToken::MentionOnline);
137    }
138
139    #[test]
140    fn test_everything() {
141        let output = parse_message_iter("Hello <@01FD58YK5W7QRV5H3D64KTQYX3>, <%01FD58YK5W7QRV5H3D64KTQYX3>, @everyone and @online.").collect::<Vec<_>>();
142
143        assert_eq!(output.len(), 4);
144        assert_eq!(output[0], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3"));
145        assert_eq!(output[1], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3"));
146        assert_eq!(output[2], MessageToken::MentionEveryone);
147        assert_eq!(output[3], MessageToken::MentionOnline);
148    }
149
150    #[test]
151    fn test_everything_no_spaces() {
152        let output = parse_message_iter("<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online").collect::<Vec<_>>();
153
154        assert_eq!(output.len(), 4);
155        assert_eq!(output[0], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3"));
156        assert_eq!(output[1], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3"));
157        assert_eq!(output[2], MessageToken::MentionEveryone);
158        assert_eq!(output[3], MessageToken::MentionOnline);
159    }
160
161    #[test]
162    fn test_codeblock_no_mentions() {
163        let output = parse_message_iter("```\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online\n```").collect::<Vec<_>>();
164
165        assert_eq!(output.len(), 2);
166        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
167        assert_eq!(output[1], MessageToken::CodeblockMarker(3));
168    }
169
170    #[test]
171    fn test_uncontained_codeblock_should_mention() {
172        let output = parse_message_iter("```\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online").collect::<Vec<_>>();
173
174        assert_eq!(output.len(), 5);
175        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
176        assert_eq!(output[1], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3"));
177        assert_eq!(output[2], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3"));
178        assert_eq!(output[3], MessageToken::MentionEveryone);
179        assert_eq!(output[4], MessageToken::MentionOnline);
180    }
181
182    #[test]
183    fn test_inline_codeblock_no_mentions() {
184        let output = parse_message_iter("`<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online`").collect::<Vec<_>>();
185
186        assert_eq!(output.len(), 2);
187        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
188        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
189    }
190
191    #[test]
192    fn test_uncontained_inline_codeblock_should_mention() {
193        let output = parse_message_iter("`<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online").collect::<Vec<_>>();
194
195        assert_eq!(output.len(), 5);
196        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
197        assert_eq!(output[1], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3"));
198        assert_eq!(output[2], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3"));
199        assert_eq!(output[3], MessageToken::MentionEveryone);
200        assert_eq!(output[4], MessageToken::MentionOnline);
201    }
202
203    #[test]
204    fn test_codeblock_with_language_no_mentions() {
205        let output = parse_message_iter("```rust\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online```").collect::<Vec<_>>();
206
207        assert_eq!(output.len(), 2);
208        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
209        assert_eq!(output[1], MessageToken::CodeblockMarker(3));
210    }
211
212    #[test]
213    fn test_double_inline_codeblock() {
214        let output = parse_message_iter("``this should not ping @everyone``").collect::<Vec<_>>();
215
216        assert_eq!(output.len(), 2);
217        assert_eq!(output[0], MessageToken::CodeblockMarker(2));
218        assert_eq!(output[1], MessageToken::CodeblockMarker(2));
219    }
220
221    #[test]
222    fn test_double_inline_codeblock_with_backticks_inside() {
223        let output = parse_message_iter("``this `should` not `ping` @everyone``").collect::<Vec<_>>();
224
225        assert_eq!(output.len(), 2);
226        assert_eq!(output[0], MessageToken::CodeblockMarker(2));
227        assert_eq!(output[1], MessageToken::CodeblockMarker(2));
228    }
229
230    #[test]
231    fn test_in_middle() {
232        let output = parse_message_iter("i am not pinging `@everyone`.").collect::<Vec<_>>();
233
234        assert_eq!(output.len(), 2);
235        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
236        assert_eq!(output[1], MessageToken::CodeblockMarker(1));
237    }
238
239    #[test]
240    fn test_escaped_codeblock() {
241        let output = parse_message_iter("i am ~~not~~ pinging \\`@everyone` ok.").collect::<Vec<_>>();
242
243        assert_eq!(output.len(), 3);
244        assert_eq!(output[0], MessageToken::Escape);
245        assert_eq!(output[1], MessageToken::MentionEveryone);
246        assert_eq!(output[2], MessageToken::CodeblockMarker(1));
247    }
248
249    #[test]
250    fn test_escape_mention() {
251        let output = parse_message_iter("i wont ping \\@everyone").collect::<Vec<_>>();
252
253        assert_eq!(output.len(), 1);
254        assert_eq!(output[0], MessageToken::Escape);
255    }
256}