nntp_proxy/protocol/
response.rs

1//! NNTP Response Parsing and Handling
2//!
3//! This module implements efficient parsing of NNTP server responses according to
4//! [RFC 3977](https://datatracker.ietf.org/doc/html/rfc3977) with optimizations
5//! for high-throughput proxy use.
6//!
7//! # NNTP Protocol References
8//!
9//! - **[RFC 3977 §3.2]** - Response format and status codes
10//! - **[RFC 3977 §3.4.1]** - Multiline data blocks
11//! - **[RFC 5536 §3.1.3]** - Message-ID format specification
12//!
13//! [RFC 3977 §3.2]: https://datatracker.ietf.org/doc/html/rfc3977#section-3.2
14//! [RFC 3977 §3.4.1]: https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1
15//! [RFC 5536 §3.1.3]: https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3
16//!
17//! # Response Format
18//!
19//! Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2):
20//! ```text
21//! response     = status-line [CRLF multiline-data]
22//! status-line  = status-code SP status-text CRLF
23//! status-code  = 3DIGIT
24//! ```
25//!
26//! # Multiline Responses
27//!
28//! Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
29//! ```text
30//! Multiline responses end with a line containing a single period:
31//! CRLF "." CRLF
32//! ```
33
34use crate::types::MessageId;
35
36/// Categorized NNTP response code for type-safe handling
37///
38/// This enum categorizes NNTP response codes based on their semantics and
39/// handling requirements per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2).
40///
41/// # Response Code Ranges
42///
43/// Per [RFC 3977 §3.2.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2.1):
44/// - **1xx**: Informational (multiline data follows)
45/// - **2xx**: Success (may be multiline)
46/// - **3xx**: Success so far, further input expected
47/// - **4xx**: Temporary failure
48/// - **5xx**: Permanent failure
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum ResponseCode {
51    /// Server greeting - [RFC 3977 §5.1](https://datatracker.ietf.org/doc/html/rfc3977#section-5.1)
52    /// - 200: Posting allowed
53    /// - 201: No posting allowed
54    Greeting(u16),
55
56    /// Disconnect/goodbye - [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4)
57    /// - 205: Connection closing
58    Disconnect,
59
60    /// Authentication required - [RFC 4643 §2.3](https://datatracker.ietf.org/doc/html/rfc4643#section-2.3)
61    /// - 381: Password required
62    /// - 480: Authentication required
63    AuthRequired(u16),
64
65    /// Authentication successful - [RFC 4643 §2.5.1](https://datatracker.ietf.org/doc/html/rfc4643#section-2.5.1)
66    /// - 281: Authentication accepted
67    AuthSuccess,
68
69    /// Multiline data response
70    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
71    /// - All 1xx codes (100-199)
72    /// - Specific 2xx codes: 215, 220, 221, 222, 224, 225, 230, 231, 282
73    MultilineData(u16),
74
75    /// Single-line response (everything else)
76    SingleLine(u16),
77
78    /// Invalid or unparseable response
79    Invalid,
80}
81
82impl ResponseCode {
83    /// Parse response data into a categorized response code
84    ///
85    /// Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2),
86    /// responses start with a 3-digit status code.
87    ///
88    /// **Optimization**: Direct byte-to-digit conversion avoids UTF-8 overhead.
89    #[inline]
90    pub fn parse(data: &[u8]) -> Self {
91        let code = match NntpResponse::parse_status_code(data) {
92            Some(c) => c,
93            None => return Self::Invalid,
94        };
95
96        match code {
97            // [RFC 3977 §5.1](https://datatracker.ietf.org/doc/html/rfc3977#section-5.1)
98            200 | 201 => Self::Greeting(code),
99
100            // [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4)
101            205 => Self::Disconnect,
102
103            // [RFC 4643 §2.5.1](https://datatracker.ietf.org/doc/html/rfc4643#section-2.5.1)
104            281 => Self::AuthSuccess,
105
106            // [RFC 4643 §2.3](https://datatracker.ietf.org/doc/html/rfc4643#section-2.3)
107            381 | 480 => Self::AuthRequired(code),
108
109            // Multiline responses per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1)
110            // All 1xx are informational multiline
111            100..=199 => Self::MultilineData(code),
112            // Specific 2xx multiline responses
113            215 | 220 | 221 | 222 | 224 | 225 | 230 | 231 | 282 => Self::MultilineData(code),
114
115            // Everything else is a single-line response
116            _ => Self::SingleLine(code),
117        }
118    }
119
120    /// Check if this response type is multiline
121    ///
122    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
123    /// multiline responses require special handling with terminator detection.
124    #[inline]
125    pub fn is_multiline(&self) -> bool {
126        matches!(self, Self::MultilineData(_))
127    }
128
129    /// Get the numeric status code if available
130    #[inline]
131    pub fn status_code(&self) -> Option<u16> {
132        match self {
133            Self::Greeting(c)
134            | Self::AuthRequired(c)
135            | Self::MultilineData(c)
136            | Self::SingleLine(c) => Some(*c),
137            Self::Disconnect => Some(205),
138            Self::AuthSuccess => Some(281),
139            Self::Invalid => None,
140        }
141    }
142
143    /// Check if this is a success response (2xx or 3xx)
144    ///
145    /// Per [RFC 3977 §3.2.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2.1):
146    /// - 2xx: Success
147    /// - 3xx: Success so far, send more input
148    #[inline]
149    pub fn is_success(&self) -> bool {
150        self.status_code()
151            .is_some_and(|code| (200..400).contains(&code))
152    }
153}
154
155/// Represents a parsed NNTP response
156#[derive(Debug, Clone, PartialEq)]
157pub struct NntpResponse {
158    /// Status code (e.g., 200, 381, 500)
159    pub status_code: u16,
160    /// Whether this is a multiline response
161    pub is_multiline: bool,
162    /// Complete response data including status line
163    pub data: Vec<u8>,
164}
165
166impl NntpResponse {
167    /// Parse a status code from response data
168    ///
169    /// Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2),
170    /// responses begin with a 3-digit status code (ASCII digits '0'-'9').
171    ///
172    /// **Optimization**: Direct byte-to-digit conversion without UTF-8 validation.
173    /// Status codes are guaranteed to be ASCII digits per the RFC.
174    #[inline]
175    pub fn parse_status_code(data: &[u8]) -> Option<u16> {
176        if data.len() < 3 {
177            return None;
178        }
179
180        // Fast path: Direct ASCII digit conversion without UTF-8 overhead
181        // Per RFC 3977, status codes are exactly 3 ASCII digits
182        let d0 = data[0].wrapping_sub(b'0');
183        let d1 = data[1].wrapping_sub(b'0');
184        let d2 = data[2].wrapping_sub(b'0');
185
186        // Validate all three are digits (0-9)
187        if d0 > 9 || d1 > 9 || d2 > 9 {
188            return None;
189        }
190
191        // Combine into u16: d0*100 + d1*10 + d2
192        Some((d0 as u16) * 100 + (d1 as u16) * 10 + (d2 as u16))
193    }
194
195    /// Check if a response indicates a multiline response
196    ///
197    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
198    /// certain status codes indicate multiline data follows.
199    ///
200    /// # Multiline Response Codes
201    /// - **1xx**: All informational responses (100-199)
202    /// - **2xx**: Specific codes - 215, 220, 221, 222, 224, 225, 230, 231, 282
203    #[inline]
204    pub fn is_multiline_response(status_code: u16) -> bool {
205        match status_code {
206            100..=199 => true, // All 1xx are multiline
207            215 | 220 | 221 | 222 | 224 | 225 | 230 | 231 | 282 => true, // Specific 2xx codes
208            _ => false,
209        }
210    }
211
212    /// Check if data ends with the NNTP multiline terminator
213    ///
214    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
215    /// ```text
216    /// Multiline blocks are terminated by a line containing only a period:
217    /// CRLF "." CRLF
218    /// Which appears in the data stream as: \r\n.\r\n
219    /// ```
220    ///
221    /// **Optimization**: Single suffix check, no scanning.
222    #[inline]
223    pub fn has_terminator_at_end(data: &[u8]) -> bool {
224        let n = data.len();
225        // Only check for proper RFC 3977 terminator: \r\n.\r\n
226        n >= 5 && data[n - 5..n] == *b"\r\n.\r\n"
227    }
228
229    /// Find the position of the NNTP multiline terminator in data
230    ///
231    /// Returns the position AFTER the terminator (exclusive end), or None if not found.
232    /// This handles the case where extra data appears after the terminator in the same chunk.
233    ///
234    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
235    /// the terminator is exactly "\r\n.\r\n" (CRLF, dot, CRLF).
236    ///
237    /// **Optimization**: Uses `memchr::memchr_iter()` to find '\r' bytes (SIMD-accelerated),
238    /// then validates the full 5-byte pattern. This eliminates the need to create new slices
239    /// on each iteration (which the manual loop approach requires with `&data[pos..]`).
240    ///
241    /// Benchmarks show this is **72% faster for small responses** (37ns → 13ns) and
242    /// **64% faster for medium responses** (109ns → 40ns) compared to the manual loop
243    /// that creates a new slice on each iteration.
244    #[inline]
245    pub fn find_terminator_end(data: &[u8]) -> Option<usize> {
246        let n = data.len();
247        if n < 5 {
248            return None;
249        }
250
251        // Use memchr::Memchr iterator to avoid repeated slice creation
252        for r_pos in memchr::memchr_iter(b'\r', data) {
253            // Not enough space for full terminator
254            if r_pos + 5 > n {
255                return None;
256            }
257
258            // Check for full terminator pattern
259            if &data[r_pos..r_pos + 5] == b"\r\n.\r\n" {
260                return Some(r_pos + 5);
261            }
262        }
263
264        None
265    }
266
267    /// Check if a terminator spans across a boundary between tail and current chunk
268    ///
269    /// This handles the case where a multiline terminator is split across two read chunks.
270    /// For example: previous chunk ends with "\r\n." and current starts with "\r\n"
271    ///
272    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
273    /// the terminator is exactly "\r\n.\r\n" (CRLF, dot, CRLF).
274    #[inline]
275    pub fn has_spanning_terminator(
276        tail: &[u8],
277        tail_len: usize,
278        current: &[u8],
279        current_len: usize,
280    ) -> bool {
281        // Only check if we have a tail and current chunk is small enough for spanning
282        if tail_len < 1 || !(1..=4).contains(&current_len) {
283            return false;
284        }
285
286        // Check all possible split positions of the 5-byte terminator "\r\n.\r\n"
287        // Split after byte 1: tail ends with "\r", current starts with "\n.\r\n"
288        if tail_len >= 1
289            && current_len >= 4
290            && tail[tail_len - 1] == b'\r'
291            && current[..4] == *b"\n.\r\n"
292        {
293            return true;
294        }
295        // Split after byte 2: tail ends with "\r\n", current starts with ".\r\n"
296        if tail_len >= 2
297            && current_len >= 3
298            && tail[tail_len - 2..tail_len] == *b"\r\n"
299            && current[..3] == *b".\r\n"
300        {
301            return true;
302        }
303        // Split after byte 3: tail ends with "\r\n.", current starts with "\r\n"
304        if tail_len >= 3
305            && current_len >= 2
306            && tail[tail_len - 3..tail_len] == *b"\r\n."
307            && current[..2] == *b"\r\n"
308        {
309            return true;
310        }
311        // Split after byte 4: tail ends with "\r\n.\r", current starts with "\n"
312        if tail_len >= 4
313            && current_len >= 1
314            && tail[tail_len - 4..tail_len] == *b"\r\n.\r"
315            && current[0] == b'\n'
316        {
317            return true;
318        }
319
320        false
321    }
322
323    /// Check if response is a disconnect/goodbye (205)
324    ///
325    /// Per [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4),
326    /// code 205 indicates "Connection closing" / "Goodbye".
327    ///
328    /// **Optimization**: Direct byte prefix check, no parsing.
329    #[inline]
330    pub fn is_disconnect(data: &[u8]) -> bool {
331        data.len() >= 3 && data.starts_with(b"205")
332    }
333
334    /// Extract message-ID from command arguments using fast byte searching
335    ///
336    /// Per [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3),
337    /// message-IDs have the format `<local-part@domain>`.
338    ///
339    /// Examples:
340    /// - `<article123@news.example.com>`
341    /// - `<20231014.123456@server.domain>`
342    ///
343    /// **Optimization**: Uses `memchr` for fast '<' and '>' detection.
344    #[inline]
345    pub fn extract_message_id(command: &str) -> Option<MessageId<'_>> {
346        let trimmed = command.trim();
347        let bytes = trimmed.as_bytes();
348
349        // Find opening '<' using fast memchr
350        let start = memchr::memchr(b'<', bytes)?;
351
352        // Find closing '>' after the '<'
353        // Since end is relative to &bytes[start..], the actual position is start + end
354        let end = memchr::memchr(b'>', &bytes[start + 1..])?;
355        let msgid_end = start + end + 2; // +1 for the slice offset, +1 to include '>'
356
357        // Safety: Message-IDs are ASCII, so no need for is_char_boundary checks
358        // We already know msgid_end is valid since memchr found '>' at that position
359        MessageId::from_borrowed(&trimmed[start..msgid_end]).ok()
360    }
361
362    /// Validate message-ID format according to [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3)
363    ///
364    /// A valid message-ID must:
365    /// - Start with '<' and end with '>'
366    /// - Contain exactly one '@' character
367    /// - Have content before and after the '@' (local-part and domain)
368    ///
369    /// Per [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3):
370    /// ```text
371    /// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
372    /// ```
373    ///
374    /// **Note**: This is a basic validation - full RFC 5536 validation is more complex.
375    #[inline]
376    pub fn validate_message_id(msgid: &str) -> bool {
377        let trimmed = msgid.trim();
378
379        // Must start with < and end with >
380        if !trimmed.starts_with('<') || !trimmed.ends_with('>') {
381            return false;
382        }
383
384        // Extract content between < and >
385        let content = &trimmed[1..trimmed.len() - 1];
386
387        // Must contain exactly one @
388        let at_count = content.bytes().filter(|&b| b == b'@').count();
389        if at_count != 1 {
390            return false;
391        }
392
393        // Must have content before and after @
394        if let Some(at_pos) = content.find('@') {
395            at_pos > 0 && at_pos < content.len() - 1
396        } else {
397            false
398        }
399    }
400
401    /// Check if data contains the end-of-multiline marker (legacy method)
402    #[inline]
403    #[allow(dead_code)]
404    pub fn has_multiline_terminator(data: &[u8]) -> bool {
405        // NNTP multiline responses end with "\r\n.\r\n"
406        if data.len() < 5 {
407            return false;
408        }
409
410        // Look for the terminator at the end
411        data.ends_with(b"\r\n.\r\n") || data.ends_with(b"\n.\r\n")
412    }
413}
414
415/// Response parser for NNTP protocol
416pub struct ResponseParser;
417
418impl ResponseParser {
419    /// Check if a response starts with a success code (2xx or 3xx)
420    #[inline]
421    #[allow(dead_code)]
422    pub fn is_success_response(data: &[u8]) -> bool {
423        ResponseCode::parse(data).is_success()
424    }
425
426    /// Check if response is a greeting (200 or 201)
427    #[inline]
428    #[allow(dead_code)]
429    pub fn is_greeting(data: &[u8]) -> bool {
430        matches!(ResponseCode::parse(data), ResponseCode::Greeting(_))
431    }
432
433    /// Check if response indicates authentication is required (381 or 480)
434    #[inline]
435    #[allow(dead_code)]
436    pub fn is_auth_required(data: &[u8]) -> bool {
437        matches!(ResponseCode::parse(data), ResponseCode::AuthRequired(_))
438    }
439
440    /// Check if response indicates successful authentication (281)
441    #[inline]
442    #[allow(dead_code)]
443    pub fn is_auth_success(data: &[u8]) -> bool {
444        matches!(ResponseCode::parse(data), ResponseCode::AuthSuccess)
445    }
446
447    /// Check if response has a specific status code
448    ///
449    /// This is useful for checking specific response codes like 111 (DATE response),
450    /// or any other specific code that doesn't have a dedicated helper.
451    #[inline]
452    pub fn is_response_code(data: &[u8], code: u16) -> bool {
453        NntpResponse::parse_status_code(data) == Some(code)
454    }
455}
456
457#[cfg(test)]
458mod tests;