nntp_proxy/command/
classifier.rs

1//! Command classification logic for NNTP commands
2
3// Case permutations for fast matching without allocation
4// Covers: UPPERCASE, lowercase, Titlecase
5const ARTICLE_CASES: &[&[u8]] = &[b"ARTICLE", b"article", b"Article"];
6const BODY_CASES: &[&[u8]] = &[b"BODY", b"body", b"Body"];
7const HEAD_CASES: &[&[u8]] = &[b"HEAD", b"head", b"Head"];
8const STAT_CASES: &[&[u8]] = &[b"STAT", b"stat", b"Stat"];
9const GROUP_CASES: &[&[u8]] = &[b"GROUP", b"group", b"Group"];
10const AUTHINFO_CASES: &[&[u8]] = &[b"AUTHINFO", b"authinfo", b"Authinfo"];
11const LIST_CASES: &[&[u8]] = &[b"LIST", b"list", b"List"];
12const DATE_CASES: &[&[u8]] = &[b"DATE", b"date", b"Date"];
13const CAPABILITIES_CASES: &[&[u8]] = &[b"CAPABILITIES", b"capabilities", b"Capabilities"];
14const MODE_CASES: &[&[u8]] = &[b"MODE", b"mode", b"Mode"];
15const HELP_CASES: &[&[u8]] = &[b"HELP", b"help", b"Help"];
16const QUIT_CASES: &[&[u8]] = &[b"QUIT", b"quit", b"Quit"];
17const XOVER_CASES: &[&[u8]] = &[b"XOVER", b"xover", b"Xover"];
18const OVER_CASES: &[&[u8]] = &[b"OVER", b"over", b"Over"];
19const XHDR_CASES: &[&[u8]] = &[b"XHDR", b"xhdr", b"Xhdr"];
20const HDR_CASES: &[&[u8]] = &[b"HDR", b"hdr", b"Hdr"];
21const NEXT_CASES: &[&[u8]] = &[b"NEXT", b"next", b"Next"];
22const LAST_CASES: &[&[u8]] = &[b"LAST", b"last", b"Last"];
23const LISTGROUP_CASES: &[&[u8]] = &[b"LISTGROUP", b"listgroup", b"Listgroup"];
24const POST_CASES: &[&[u8]] = &[b"POST", b"post", b"Post"];
25const IHAVE_CASES: &[&[u8]] = &[b"IHAVE", b"ihave", b"Ihave"];
26const NEWGROUPS_CASES: &[&[u8]] = &[b"NEWGROUPS", b"newgroups", b"Newgroups"];
27const NEWNEWS_CASES: &[&[u8]] = &[b"NEWNEWS", b"newnews", b"Newnews"];
28
29/// Helper: Check if command matches any case variation
30#[inline]
31fn matches_any(cmd: &[u8], cases: &[&[u8]]) -> bool {
32    cases.contains(&cmd)
33}
34
35/// NNTP command classification for different handling strategies
36#[derive(Debug, PartialEq)]
37pub enum NntpCommand {
38    /// Authentication commands (AUTHINFO USER/PASS) - intercepted locally
39    AuthUser,
40    AuthPass,
41    /// Stateful commands that require GROUP context - REJECTED in stateless mode
42    Stateful,
43    /// Commands that cannot work with per-command routing - REJECTED in per-command routing mode
44    NonRoutable,
45    /// Stateless commands that can be safely proxied without state
46    Stateless,
47    /// Article retrieval by message-ID (stateless) - can be proxied
48    ArticleByMessageId,
49}
50
51impl NntpCommand {
52    /// Classify an NNTP command based on its content using fast byte-level parsing
53    /// Ordered by frequency (most common first) for optimal short-circuit performance
54    ///
55    /// Performance: Zero allocations - uses direct byte slice comparison with hardcoded
56    /// case permutations instead of case conversion.
57    #[inline]
58    pub fn classify(command: &str) -> Self {
59        let trimmed = command.trim();
60        let bytes = trimmed.as_bytes();
61
62        // Fast path: find space to separate command from arguments
63        let cmd_end = memchr::memchr(b' ', bytes).unwrap_or(bytes.len());
64        let cmd = &bytes[..cmd_end];
65
66        // Helper: Check if arguments start with '<' (message-ID indicator)
67        #[inline]
68        fn is_message_id_arg(bytes: &[u8], cmd_end: usize) -> bool {
69            if cmd_end >= bytes.len() {
70                return false;
71            }
72            let args = &bytes[cmd_end + 1..];
73            // Fast skip whitespace using memchr - find first non-whitespace
74            let first_non_ws = args.iter().position(|&b| !b.is_ascii_whitespace());
75
76            if let Some(pos) = first_non_ws {
77                args[pos] == b'<'
78            } else {
79                false
80            }
81        }
82
83        // Ordered by frequency: ARTICLE/BODY/HEAD/STAT are 70%+ of traffic
84        // Article retrieval commands (MOST FREQUENT ~70% of traffic)
85        if matches_any(cmd, ARTICLE_CASES)
86            || matches_any(cmd, BODY_CASES)
87            || matches_any(cmd, HEAD_CASES)
88            || matches_any(cmd, STAT_CASES)
89        {
90            return if is_message_id_arg(bytes, cmd_end) {
91                Self::ArticleByMessageId
92            } else {
93                Self::Stateful
94            };
95        }
96
97        // GROUP - common for switching groups (~10% of traffic)
98        if matches_any(cmd, GROUP_CASES) {
99            return Self::Stateful;
100        }
101
102        // Authentication - once per connection but checked early
103        if matches_any(cmd, AUTHINFO_CASES) {
104            if cmd_end + 1 < bytes.len() {
105                let args = &bytes[cmd_end + 1..];
106                if args.len() >= 4 {
107                    match &args[..4] {
108                        b"USER" | b"user" | b"User" => return Self::AuthUser,
109                        b"PASS" | b"pass" | b"Pass" => return Self::AuthPass,
110                        _ => {}
111                    }
112                }
113            }
114            return Self::Stateless;
115        }
116
117        // Stateless commands (moderately common ~5-10%)
118        if matches_any(cmd, LIST_CASES)
119            || matches_any(cmd, DATE_CASES)
120            || matches_any(cmd, CAPABILITIES_CASES)
121            || matches_any(cmd, MODE_CASES)
122            || matches_any(cmd, HELP_CASES)
123            || matches_any(cmd, QUIT_CASES)
124        {
125            return Self::Stateless;
126        }
127
128        // Header retrieval commands (~5%)
129        if matches_any(cmd, XOVER_CASES)
130            || matches_any(cmd, OVER_CASES)
131            || matches_any(cmd, XHDR_CASES)
132            || matches_any(cmd, HDR_CASES)
133        {
134            return Self::Stateful;
135        }
136
137        // Other stateful commands (rare)
138        if matches_any(cmd, NEXT_CASES)
139            || matches_any(cmd, LAST_CASES)
140            || matches_any(cmd, LISTGROUP_CASES)
141        {
142            return Self::Stateful;
143        }
144
145        // Non-routable commands (very rare in typical usage)
146        if matches_any(cmd, POST_CASES)
147            || matches_any(cmd, IHAVE_CASES)
148            || matches_any(cmd, NEWGROUPS_CASES)
149            || matches_any(cmd, NEWNEWS_CASES)
150        {
151            return Self::NonRoutable;
152        }
153
154        // Unknown commands - treat as stateless (forward and let backend decide)
155        Self::Stateless
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn test_nntp_command_classification() {
165        // Test authentication commands
166        assert_eq!(
167            NntpCommand::classify("AUTHINFO USER testuser"),
168            NntpCommand::AuthUser
169        );
170        assert_eq!(
171            NntpCommand::classify("AUTHINFO PASS testpass"),
172            NntpCommand::AuthPass
173        );
174        assert_eq!(
175            NntpCommand::classify("  AUTHINFO USER  whitespace  "),
176            NntpCommand::AuthUser
177        );
178
179        // Test stateful commands (should be rejected)
180        assert_eq!(
181            NntpCommand::classify("GROUP alt.test"),
182            NntpCommand::Stateful
183        );
184        assert_eq!(NntpCommand::classify("NEXT"), NntpCommand::Stateful);
185        assert_eq!(NntpCommand::classify("LAST"), NntpCommand::Stateful);
186        assert_eq!(
187            NntpCommand::classify("LISTGROUP alt.test"),
188            NntpCommand::Stateful
189        );
190        assert_eq!(
191            NntpCommand::classify("ARTICLE 12345"),
192            NntpCommand::Stateful
193        );
194        assert_eq!(NntpCommand::classify("ARTICLE"), NntpCommand::Stateful);
195        assert_eq!(NntpCommand::classify("HEAD 67890"), NntpCommand::Stateful);
196        assert_eq!(NntpCommand::classify("STAT"), NntpCommand::Stateful);
197        assert_eq!(NntpCommand::classify("XOVER 1-100"), NntpCommand::Stateful);
198
199        // Test article retrieval by message-ID (stateless - allowed)
200        assert_eq!(
201            NntpCommand::classify("ARTICLE <message@example.com>"),
202            NntpCommand::ArticleByMessageId
203        );
204        assert_eq!(
205            NntpCommand::classify("BODY <test@server.org>"),
206            NntpCommand::ArticleByMessageId
207        );
208        assert_eq!(
209            NntpCommand::classify("HEAD <another@example.net>"),
210            NntpCommand::ArticleByMessageId
211        );
212        assert_eq!(
213            NntpCommand::classify("STAT <id@host.com>"),
214            NntpCommand::ArticleByMessageId
215        );
216
217        // Test stateless commands (allowed)
218        assert_eq!(NntpCommand::classify("HELP"), NntpCommand::Stateless);
219        assert_eq!(NntpCommand::classify("LIST"), NntpCommand::Stateless);
220        assert_eq!(NntpCommand::classify("DATE"), NntpCommand::Stateless);
221        assert_eq!(
222            NntpCommand::classify("CAPABILITIES"),
223            NntpCommand::Stateless
224        );
225        assert_eq!(NntpCommand::classify("QUIT"), NntpCommand::Stateless);
226        assert_eq!(NntpCommand::classify("LIST ACTIVE"), NntpCommand::Stateless);
227        assert_eq!(
228            NntpCommand::classify("UNKNOWN COMMAND"),
229            NntpCommand::Stateless
230        );
231    }
232
233    #[test]
234    fn test_case_insensitivity() {
235        // Commands should be case-insensitive per NNTP spec
236        assert_eq!(NntpCommand::classify("list"), NntpCommand::Stateless);
237        assert_eq!(NntpCommand::classify("LiSt"), NntpCommand::Stateless);
238        assert_eq!(NntpCommand::classify("QUIT"), NntpCommand::Stateless);
239        assert_eq!(NntpCommand::classify("quit"), NntpCommand::Stateless);
240        assert_eq!(
241            NntpCommand::classify("group alt.test"),
242            NntpCommand::Stateful
243        );
244        assert_eq!(
245            NntpCommand::classify("GROUP alt.test"),
246            NntpCommand::Stateful
247        );
248    }
249
250    #[test]
251    fn test_empty_and_whitespace_commands() {
252        // Empty command
253        assert_eq!(NntpCommand::classify(""), NntpCommand::Stateless);
254
255        // Only whitespace
256        assert_eq!(NntpCommand::classify("   "), NntpCommand::Stateless);
257
258        // Tabs and spaces
259        assert_eq!(NntpCommand::classify("\t\t  "), NntpCommand::Stateless);
260    }
261
262    #[test]
263    fn test_malformed_authinfo_commands() {
264        // AUTHINFO without USER or PASS
265        assert_eq!(NntpCommand::classify("AUTHINFO"), NntpCommand::Stateless);
266
267        // AUTHINFO with unknown subcommand
268        assert_eq!(
269            NntpCommand::classify("AUTHINFO INVALID"),
270            NntpCommand::Stateless
271        );
272
273        // AUTHINFO USER without username
274        assert_eq!(
275            NntpCommand::classify("AUTHINFO USER"),
276            NntpCommand::AuthUser
277        );
278
279        // AUTHINFO PASS without password
280        assert_eq!(
281            NntpCommand::classify("AUTHINFO PASS"),
282            NntpCommand::AuthPass
283        );
284    }
285
286    #[test]
287    fn test_article_commands_with_various_message_ids() {
288        // Standard message-ID
289        assert_eq!(
290            NntpCommand::classify("ARTICLE <test@example.com>"),
291            NntpCommand::ArticleByMessageId
292        );
293
294        // Message-ID with complex domain
295        assert_eq!(
296            NntpCommand::classify("ARTICLE <msg.123@news.example.co.uk>"),
297            NntpCommand::ArticleByMessageId
298        );
299
300        // Message-ID with special characters
301        assert_eq!(
302            NntpCommand::classify("ARTICLE <user+tag@domain.com>"),
303            NntpCommand::ArticleByMessageId
304        );
305
306        // BODY with message-ID
307        assert_eq!(
308            NntpCommand::classify("BODY <test@test.com>"),
309            NntpCommand::ArticleByMessageId
310        );
311
312        // HEAD with message-ID
313        assert_eq!(
314            NntpCommand::classify("HEAD <id@host>"),
315            NntpCommand::ArticleByMessageId
316        );
317
318        // STAT with message-ID
319        assert_eq!(
320            NntpCommand::classify("STAT <msg@server>"),
321            NntpCommand::ArticleByMessageId
322        );
323    }
324
325    #[test]
326    fn test_article_commands_without_message_id() {
327        // ARTICLE with number (stateful - requires GROUP context)
328        assert_eq!(
329            NntpCommand::classify("ARTICLE 12345"),
330            NntpCommand::Stateful
331        );
332
333        // ARTICLE without argument (stateful - uses current article)
334        assert_eq!(NntpCommand::classify("ARTICLE"), NntpCommand::Stateful);
335
336        // BODY with number
337        assert_eq!(NntpCommand::classify("BODY 999"), NntpCommand::Stateful);
338
339        // HEAD with number
340        assert_eq!(NntpCommand::classify("HEAD 123"), NntpCommand::Stateful);
341    }
342
343    #[test]
344    fn test_special_characters_in_commands() {
345        // Command with newlines
346        assert_eq!(NntpCommand::classify("LIST\r\n"), NntpCommand::Stateless);
347
348        // Command with extra whitespace
349        assert_eq!(
350            NntpCommand::classify("  LIST   ACTIVE  "),
351            NntpCommand::Stateless
352        );
353
354        // Command with tabs
355        assert_eq!(
356            NntpCommand::classify("LIST\tACTIVE"),
357            NntpCommand::Stateless
358        );
359    }
360
361    #[test]
362    fn test_very_long_commands() {
363        // Very long command line
364        let long_command = format!("LIST {}", "A".repeat(1000));
365        assert_eq!(NntpCommand::classify(&long_command), NntpCommand::Stateless);
366
367        // Very long GROUP name
368        let long_group = format!("GROUP {}", "alt.".repeat(100));
369        assert_eq!(NntpCommand::classify(&long_group), NntpCommand::Stateful);
370
371        // Very long message-ID
372        let long_msgid = format!("ARTICLE <{}@example.com>", "x".repeat(500));
373        assert_eq!(
374            NntpCommand::classify(&long_msgid),
375            NntpCommand::ArticleByMessageId
376        );
377    }
378
379    #[test]
380    fn test_list_command_variations() {
381        // LIST without arguments
382        assert_eq!(NntpCommand::classify("LIST"), NntpCommand::Stateless);
383
384        // LIST ACTIVE
385        assert_eq!(NntpCommand::classify("LIST ACTIVE"), NntpCommand::Stateless);
386
387        // LIST NEWSGROUPS
388        assert_eq!(
389            NntpCommand::classify("LIST NEWSGROUPS"),
390            NntpCommand::Stateless
391        );
392
393        // LIST OVERVIEW.FMT
394        assert_eq!(
395            NntpCommand::classify("LIST OVERVIEW.FMT"),
396            NntpCommand::Stateless
397        );
398    }
399
400    #[test]
401    fn test_boundary_conditions() {
402        // Single character command
403        assert_eq!(NntpCommand::classify("X"), NntpCommand::Stateless);
404
405        // Command that looks like message-ID but isn't
406        assert_eq!(
407            NntpCommand::classify("NOTARTICLE <test@example.com>"),
408            NntpCommand::Stateless
409        );
410
411        // Message-ID without angle brackets (not valid, treated as number)
412        assert_eq!(
413            NntpCommand::classify("ARTICLE test@example.com"),
414            NntpCommand::Stateful
415        );
416    }
417
418    #[test]
419    fn test_non_routable_commands() {
420        // POST command - cannot be routed per-command
421        assert_eq!(NntpCommand::classify("POST"), NntpCommand::NonRoutable);
422
423        // IHAVE command - cannot be routed per-command
424        assert_eq!(
425            NntpCommand::classify("IHAVE <test@example.com>"),
426            NntpCommand::NonRoutable
427        );
428
429        // NEWGROUPS command - cannot be routed per-command
430        assert_eq!(
431            NntpCommand::classify("NEWGROUPS 20240101 000000 GMT"),
432            NntpCommand::NonRoutable
433        );
434
435        // NEWNEWS command - cannot be routed per-command
436        assert_eq!(
437            NntpCommand::classify("NEWNEWS * 20240101 000000 GMT"),
438            NntpCommand::NonRoutable
439        );
440    }
441
442    #[test]
443    fn test_non_routable_case_insensitive() {
444        assert_eq!(NntpCommand::classify("post"), NntpCommand::NonRoutable);
445
446        assert_eq!(NntpCommand::classify("Post"), NntpCommand::NonRoutable);
447
448        assert_eq!(
449            NntpCommand::classify("IHAVE <msg>"),
450            NntpCommand::NonRoutable
451        );
452
453        assert_eq!(
454            NntpCommand::classify("ihave <msg>"),
455            NntpCommand::NonRoutable
456        );
457    }
458}