revolt_parser/
lib.rs

1use std::collections::{HashSet, VecDeque};
2
3use logos::Logos;
4
5#[derive(Debug, Clone, Logos, PartialEq)]
6#[logos(skip "\n")]
7pub enum MessageToken {
8    #[token("\\")]
9    Escape,
10    #[regex("(```[^`\n]*)|(``)|`", |lex| lex.slice().to_owned().chars().filter(|&c| c == '`').count())]
11    CodeblockMarker(usize),
12    #[regex("<@[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}>", |lex| lex.slice()[2..lex.slice().len() - 1].to_owned())]
13    UserMention(String),
14    #[regex("<%[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{26}>", |lex| lex.slice()[2..lex.slice().len() - 1].to_owned())]
15    RoleMention(String),
16    #[token("@everyone")]
17    MentionEveryone,
18    #[token("@online")]
19    MentionOnline
20}
21
22#[derive(Debug, Clone, PartialEq, Default)]
23pub struct MessageResults {
24    pub user_mentions: HashSet<String>,
25    pub role_mentions: HashSet<String>,
26    pub mentions_everyone: bool,
27    pub mentions_online: bool
28}
29
30struct MessageParserIterator<I> {
31    inner: I,
32    temp: VecDeque<MessageToken>
33}
34
35impl<I: Iterator<Item = MessageToken>> Iterator for MessageParserIterator<I> {
36    type Item = MessageToken;
37
38    fn next(&mut self) -> Option<Self::Item> {
39        if !self.temp.is_empty() {
40            self.temp.pop_front()
41        } else {
42            let token = self.inner.next();
43
44            if token == Some(MessageToken::Escape) {
45                self.inner.next();
46
47                token
48            } else if let Some(MessageToken::CodeblockMarker(ty)) = token {
49                loop {
50                    let next_token = self.inner.next();
51
52                    if next_token == Some(MessageToken::CodeblockMarker(ty)) {
53                        self.temp.clear();
54                        self.temp.push_back(MessageToken::CodeblockMarker(ty));
55                        break next_token
56                    } else if let Some(token) = next_token {
57                        self.temp.push_back(token);
58                    } else {
59                        break Some(MessageToken::CodeblockMarker(ty))
60                    }
61                }
62            } else {
63                token
64            }
65        }
66    }
67}
68
69pub fn parse_message_iter(text: &str) -> impl Iterator<Item = MessageToken> + '_ {
70    MessageParserIterator {
71        inner: MessageToken::lexer(text).flatten(),
72        temp: VecDeque::new()
73    }
74}
75
76pub fn parse_message(text: &str) -> MessageResults {
77    let mut results = MessageResults::default();
78
79    for token in parse_message_iter(text) {
80        match token {
81            MessageToken::Escape => {}
82            MessageToken::CodeblockMarker(_) => {},
83            MessageToken::UserMention(id) => { results.user_mentions.insert(id); },
84            MessageToken::RoleMention(id) => { results.role_mentions.insert(id); },
85            MessageToken::MentionEveryone => results.mentions_everyone = true,
86            MessageToken::MentionOnline => results.mentions_online = true,
87        };
88    };
89
90    results
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn test_no_nodes() {
99        let output = parse_message_iter("Hello everyone").collect::<Vec<_>>();
100
101        assert_eq!(output.len(), 0);
102    }
103
104    #[test]
105    fn test_simple_user_mention() {
106        let output = parse_message_iter("Hello <@01FD58YK5W7QRV5H3D64KTQYX3>.").collect::<Vec<_>>();
107
108        assert_eq!(output.len(), 1);
109        assert_eq!(output[0], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
110    }
111
112    #[test]
113    fn test_simple_role_mention() {
114        let output = parse_message_iter("Hello <%01FD58YK5W7QRV5H3D64KTQYX3>.").collect::<Vec<_>>();
115
116        assert_eq!(output.len(), 1);
117        assert_eq!(output[0], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
118    }
119
120    #[test]
121    fn test_mention_everyone() {
122        let output = parse_message_iter("Hello @everyone.").collect::<Vec<_>>();
123
124        assert_eq!(output.len(), 1);
125        assert_eq!(output[0], MessageToken::MentionEveryone);
126    }
127
128    #[test]
129    fn test_mention_online() {
130        let output = parse_message_iter("Hello @online.").collect::<Vec<_>>();
131
132        assert_eq!(output.len(), 1);
133        assert_eq!(output[0], MessageToken::MentionOnline);
134    }
135
136    #[test]
137    fn test_everything() {
138        let output = parse_message_iter("Hello <@01FD58YK5W7QRV5H3D64KTQYX3>, <%01FD58YK5W7QRV5H3D64KTQYX3>, @everyone and @online.").collect::<Vec<_>>();
139
140        assert_eq!(output.len(), 4);
141        assert_eq!(output[0], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
142        assert_eq!(output[1], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
143        assert_eq!(output[2], MessageToken::MentionEveryone);
144        assert_eq!(output[3], MessageToken::MentionOnline);
145    }
146
147    #[test]
148    fn test_everything_no_spaces() {
149        let output = parse_message_iter("<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online").collect::<Vec<_>>();
150
151        assert_eq!(output.len(), 4);
152        assert_eq!(output[0], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
153        assert_eq!(output[1], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
154        assert_eq!(output[2], MessageToken::MentionEveryone);
155        assert_eq!(output[3], MessageToken::MentionOnline);
156    }
157
158    #[test]
159    fn test_codeblock_no_mentions() {
160        let output = parse_message_iter("```\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online\n```").collect::<Vec<_>>();
161
162        assert_eq!(output.len(), 2);
163        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
164        assert_eq!(output[1], MessageToken::CodeblockMarker(3));
165    }
166
167    #[test]
168    fn test_uncontained_codeblock_should_mention() {
169        let output = parse_message_iter("```\n<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online").collect::<Vec<_>>();
170
171        assert_eq!(output.len(), 5);
172        assert_eq!(output[0], MessageToken::CodeblockMarker(3));
173        assert_eq!(output[1], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
174        assert_eq!(output[2], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
175        assert_eq!(output[3], MessageToken::MentionEveryone);
176        assert_eq!(output[4], MessageToken::MentionOnline);
177    }
178
179    #[test]
180    fn test_inline_codeblock_no_mentions() {
181
182        let output = parse_message_iter("`<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online`").collect::<Vec<_>>();
183
184        assert_eq!(output.len(), 2);
185        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
186        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
187    }
188
189    #[test]
190    fn test_uncontained_inline_codeblock_should_mention() {
191        let output = parse_message_iter("`<@01FD58YK5W7QRV5H3D64KTQYX3><%01FD58YK5W7QRV5H3D64KTQYX3>@everyone@online").collect::<Vec<_>>();
192
193        assert_eq!(output.len(), 5);
194        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
195        assert_eq!(output[1], MessageToken::UserMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
196        assert_eq!(output[2], MessageToken::RoleMention("01FD58YK5W7QRV5H3D64KTQYX3".to_string()));
197        assert_eq!(output[3], MessageToken::MentionEveryone);
198        assert_eq!(output[4], MessageToken::MentionOnline);
199    }
200
201    #[test]
202    fn test_double_inline_codeblock() {
203        let output = parse_message_iter("``this should not ping @everyone``").collect::<Vec<_>>();
204
205        assert_eq!(output.len(), 2);
206        assert_eq!(output[0], MessageToken::CodeblockMarker(2));
207        assert_eq!(output[1], MessageToken::CodeblockMarker(2));
208    }
209
210    #[test]
211    fn test_double_inline_codeblock_with_backticks_inside() {
212        let output = parse_message_iter("``this `should` not `ping` @everyone``").collect::<Vec<_>>();
213
214        assert_eq!(output.len(), 2);
215        assert_eq!(output[0], MessageToken::CodeblockMarker(2));
216        assert_eq!(output[1], MessageToken::CodeblockMarker(2));
217    }
218
219    #[test]
220    fn test_in_middle() {
221        let output = parse_message_iter("i am not pinging `@everyone`.").collect::<Vec<_>>();
222
223        assert_eq!(output.len(), 2);
224        assert_eq!(output[0], MessageToken::CodeblockMarker(1));
225        assert_eq!(output[1], MessageToken::CodeblockMarker(1));
226    }
227
228    #[test]
229    fn test_escaped_codeblock() {
230        let output = parse_message_iter("i am ~~not~~ pinging \\`@everyone` ok.").collect::<Vec<_>>();
231
232        assert_eq!(output.len(), 3);
233        assert_eq!(output[0], MessageToken::Escape);
234        assert_eq!(output[1], MessageToken::MentionEveryone);
235        assert_eq!(output[2], MessageToken::CodeblockMarker(1));
236    }
237
238    #[test]
239    fn test_escape_mention() {
240        let output = parse_message_iter("i wont ping \\@everyone").collect::<Vec<_>>();
241
242        assert_eq!(output.len(), 1);
243        assert_eq!(output[0], MessageToken::Escape);
244    }
245}