nntp_proxy/protocol/
response.rs

1//! NNTP Response Parsing and Handling
2//!
3//! This module implements efficient parsing of NNTP server responses according to
4//! [RFC 3977](https://datatracker.ietf.org/doc/html/rfc3977) with optimizations
5//! for high-throughput proxy use.
6//!
7//! # NNTP Protocol References
8//!
9//! - **[RFC 3977 §3.2]** - Response format and status codes
10//! - **[RFC 3977 §3.4.1]** - Multiline data blocks
11//! - **[RFC 5536 §3.1.3]** - Message-ID format specification
12//!
13//! [RFC 3977 §3.2]: https://datatracker.ietf.org/doc/html/rfc3977#section-3.2
14//! [RFC 3977 §3.4.1]: https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1
15//! [RFC 5536 §3.1.3]: https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3
16//!
17//! # Response Format
18//!
19//! Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2):
20//! ```text
21//! response     = status-line [CRLF multiline-data]
22//! status-line  = status-code SP status-text CRLF
23//! status-code  = 3DIGIT
24//! ```
25//!
26//! # Multiline Responses
27//!
28//! Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
29//! ```text
30//! Multiline responses end with a line containing a single period:
31//! CRLF "." CRLF
32//! ```
33
34use crate::types::MessageId;
35
36/// Categorized NNTP response code for type-safe handling
37///
38/// This enum categorizes NNTP response codes based on their semantics and
39/// handling requirements per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2).
40///
41/// # Response Code Ranges
42///
43/// Per [RFC 3977 §3.2.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2.1):
44/// - **1xx**: Informational (multiline data follows)
45/// - **2xx**: Success (may be multiline)
46/// - **3xx**: Success so far, further input expected
47/// - **4xx**: Temporary failure
48/// - **5xx**: Permanent failure
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum ResponseCode {
51    /// Server greeting - [RFC 3977 §5.1](https://datatracker.ietf.org/doc/html/rfc3977#section-5.1)
52    /// - 200: Posting allowed
53    /// - 201: No posting allowed
54    Greeting(u16),
55
56    /// Disconnect/goodbye - [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4)
57    /// - 205: Connection closing
58    Disconnect,
59
60    /// Authentication required - [RFC 4643 §2.3](https://datatracker.ietf.org/doc/html/rfc4643#section-2.3)
61    /// - 381: Password required
62    /// - 480: Authentication required
63    AuthRequired(u16),
64
65    /// Authentication successful - [RFC 4643 §2.5.1](https://datatracker.ietf.org/doc/html/rfc4643#section-2.5.1)
66    /// - 281: Authentication accepted
67    AuthSuccess,
68
69    /// Multiline data response
70    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
71    /// - All 1xx codes (100-199)
72    /// - Specific 2xx codes: 215, 220, 221, 222, 224, 225, 230, 231, 282
73    MultilineData(u16),
74
75    /// Single-line response (everything else)
76    SingleLine(u16),
77
78    /// Invalid or unparseable response
79    Invalid,
80}
81
82impl ResponseCode {
83    /// Parse response data into a categorized response code
84    ///
85    /// Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2),
86    /// responses start with a 3-digit status code.
87    ///
88    /// **Optimization**: Direct byte-to-digit conversion avoids UTF-8 overhead.
89    #[inline]
90    pub fn parse(data: &[u8]) -> Self {
91        let code = match NntpResponse::parse_status_code(data) {
92            Some(c) => c,
93            None => return Self::Invalid,
94        };
95
96        match code {
97            // [RFC 3977 §5.1](https://datatracker.ietf.org/doc/html/rfc3977#section-5.1)
98            200 | 201 => Self::Greeting(code),
99
100            // [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4)
101            205 => Self::Disconnect,
102
103            // [RFC 4643 §2.5.1](https://datatracker.ietf.org/doc/html/rfc4643#section-2.5.1)
104            281 => Self::AuthSuccess,
105
106            // [RFC 4643 §2.3](https://datatracker.ietf.org/doc/html/rfc4643#section-2.3)
107            381 | 480 => Self::AuthRequired(code),
108
109            // Multiline responses per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1)
110            // All 1xx are informational multiline
111            100..=199 => Self::MultilineData(code),
112            // Specific 2xx multiline responses
113            215 | 220 | 221 | 222 | 224 | 225 | 230 | 231 | 282 => Self::MultilineData(code),
114
115            // Everything else is a single-line response
116            _ => Self::SingleLine(code),
117        }
118    }
119
120    /// Check if this response type is multiline
121    ///
122    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
123    /// multiline responses require special handling with terminator detection.
124    #[inline]
125    pub fn is_multiline(&self) -> bool {
126        matches!(self, Self::MultilineData(_))
127    }
128
129    /// Get the numeric status code if available
130    #[inline]
131    pub fn status_code(&self) -> Option<u16> {
132        match self {
133            Self::Greeting(c)
134            | Self::AuthRequired(c)
135            | Self::MultilineData(c)
136            | Self::SingleLine(c) => Some(*c),
137            Self::Disconnect => Some(205),
138            Self::AuthSuccess => Some(281),
139            Self::Invalid => None,
140        }
141    }
142
143    /// Check if this is a success response (2xx or 3xx)
144    ///
145    /// Per [RFC 3977 §3.2.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2.1):
146    /// - 2xx: Success
147    /// - 3xx: Success so far, send more input
148    #[inline]
149    pub fn is_success(&self) -> bool {
150        self.status_code()
151            .is_some_and(|code| (200..400).contains(&code))
152    }
153}
154
155/// Represents a parsed NNTP response
156#[derive(Debug, Clone, PartialEq)]
157pub struct NntpResponse {
158    /// Status code (e.g., 200, 381, 500)
159    pub status_code: u16,
160    /// Whether this is a multiline response
161    pub is_multiline: bool,
162    /// Complete response data including status line
163    pub data: Vec<u8>,
164}
165
166impl NntpResponse {
167    /// Parse a status code from response data
168    ///
169    /// Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2),
170    /// responses begin with a 3-digit status code (ASCII digits '0'-'9').
171    ///
172    /// **Optimization**: Direct byte-to-digit conversion without UTF-8 validation.
173    /// Status codes are guaranteed to be ASCII digits per the RFC.
174    #[inline]
175    pub fn parse_status_code(data: &[u8]) -> Option<u16> {
176        if data.len() < 3 {
177            return None;
178        }
179
180        // Fast path: Direct ASCII digit conversion without UTF-8 overhead
181        // Per RFC 3977, status codes are exactly 3 ASCII digits
182        let d0 = data[0].wrapping_sub(b'0');
183        let d1 = data[1].wrapping_sub(b'0');
184        let d2 = data[2].wrapping_sub(b'0');
185
186        // Validate all three are digits (0-9)
187        if d0 > 9 || d1 > 9 || d2 > 9 {
188            return None;
189        }
190
191        // Combine into u16: d0*100 + d1*10 + d2
192        Some((d0 as u16) * 100 + (d1 as u16) * 10 + (d2 as u16))
193    }
194
195    /// Check if a response indicates a multiline response
196    ///
197    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
198    /// certain status codes indicate multiline data follows.
199    ///
200    /// # Multiline Response Codes
201    /// - **1xx**: All informational responses (100-199)
202    /// - **2xx**: Specific codes - 215, 220, 221, 222, 224, 225, 230, 231, 282
203    #[inline]
204    pub fn is_multiline_response(status_code: u16) -> bool {
205        match status_code {
206            100..=199 => true, // All 1xx are multiline
207            215 | 220 | 221 | 222 | 224 | 225 | 230 | 231 | 282 => true, // Specific 2xx codes
208            _ => false,
209        }
210    }
211
212    /// Check if data ends with the NNTP multiline terminator
213    ///
214    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
215    /// ```text
216    /// Multiline blocks are terminated by a line containing only a period:
217    /// CRLF "." CRLF
218    /// Which appears in the data stream as: \r\n.\r\n
219    /// ```
220    ///
221    /// **Optimization**: Single suffix check, no scanning.
222    #[inline]
223    pub fn has_terminator_at_end(data: &[u8]) -> bool {
224        let n = data.len();
225        // Only check for proper RFC 3977 terminator: \r\n.\r\n
226        n >= 5 && data[n - 5..n] == *b"\r\n.\r\n"
227    }
228
229    /// Find the position of the NNTP multiline terminator in data
230    ///
231    /// Returns the position AFTER the terminator (exclusive end), or None if not found.
232    /// This handles the case where extra data appears after the terminator in the same chunk.
233    ///
234    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
235    /// the terminator is exactly "\r\n.\r\n" (CRLF, dot, CRLF).
236    ///
237    /// **Optimization**: Uses `memchr::memchr_iter()` to find '\r' bytes (SIMD-accelerated),
238    /// then validates the full 5-byte pattern. This eliminates the need to create new slices
239    /// on each iteration (which the manual loop approach requires with `&data[pos..]`).
240    ///
241    /// Benchmarks show this is **72% faster for small responses** (37ns → 13ns) and
242    /// **64% faster for medium responses** (109ns → 40ns) compared to the manual loop
243    /// that creates a new slice on each iteration.
244    #[inline]
245    pub fn find_terminator_end(data: &[u8]) -> Option<usize> {
246        let n = data.len();
247        if n < 5 {
248            return None;
249        }
250
251        // Use memchr::Memchr iterator to avoid repeated slice creation
252        for r_pos in memchr::memchr_iter(b'\r', data) {
253            // Not enough space for full terminator
254            if r_pos + 5 > n {
255                return None;
256            }
257
258            // Check for full terminator pattern
259            if &data[r_pos..r_pos + 5] == b"\r\n.\r\n" {
260                return Some(r_pos + 5);
261            }
262        }
263
264        None
265    }
266
267    /// Check if a terminator spans across a boundary between tail and current chunk
268    ///
269    /// This handles the case where a multiline terminator is split across two read chunks.
270    /// For example: previous chunk ends with "\r\n." and current starts with "\r\n"
271    ///
272    /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
273    /// the terminator is exactly "\r\n.\r\n" (CRLF, dot, CRLF).
274    #[inline]
275    pub fn has_spanning_terminator(
276        tail: &[u8],
277        tail_len: usize,
278        current: &[u8],
279        current_len: usize,
280    ) -> bool {
281        // Only check if we have a tail and current chunk is small enough for spanning
282        if tail_len < 2 || !(1..=4).contains(&current_len) {
283            return false;
284        }
285
286        // Build combined view: tail + start of current chunk
287        let mut check_buf = [0u8; 9]; // max: 4 tail + 5 current bytes
288        check_buf[..tail_len].copy_from_slice(&tail[..tail_len]);
289        let curr_copy = current_len.min(5);
290        check_buf[tail_len..tail_len + curr_copy].copy_from_slice(&current[..curr_copy]);
291        let total = tail_len + curr_copy;
292
293        // RFC 3977 requires exactly \r\n.\r\n (5 bytes)
294        total >= 5 && check_buf[total - 5..total] == *b"\r\n.\r\n"
295    }
296
297    /// Check if response is a disconnect/goodbye (205)
298    ///
299    /// Per [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4),
300    /// code 205 indicates "Connection closing" / "Goodbye".
301    ///
302    /// **Optimization**: Direct byte prefix check, no parsing.
303    #[inline]
304    pub fn is_disconnect(data: &[u8]) -> bool {
305        data.len() >= 3 && data.starts_with(b"205")
306    }
307
308    /// Extract message-ID from command arguments using fast byte searching
309    ///
310    /// Per [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3),
311    /// message-IDs have the format `<local-part@domain>`.
312    ///
313    /// Examples:
314    /// - `<article123@news.example.com>`
315    /// - `<20231014.123456@server.domain>`
316    ///
317    /// **Optimization**: Uses `memchr` for fast '<' and '>' detection.
318    #[inline]
319    pub fn extract_message_id(command: &str) -> Option<MessageId<'_>> {
320        let trimmed = command.trim();
321        let bytes = trimmed.as_bytes();
322
323        // Find opening '<' using fast memchr
324        let start = memchr::memchr(b'<', bytes)?;
325
326        // Find closing '>' after the '<'
327        // Since end is relative to &bytes[start..], the actual position is start + end
328        let end = memchr::memchr(b'>', &bytes[start + 1..])?;
329        let msgid_end = start + end + 2; // +1 for the slice offset, +1 to include '>'
330
331        // Safety: Message-IDs are ASCII, so no need for is_char_boundary checks
332        // We already know msgid_end is valid since memchr found '>' at that position
333        MessageId::from_borrowed(&trimmed[start..msgid_end]).ok()
334    }
335
336    /// Validate message-ID format according to [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3)
337    ///
338    /// A valid message-ID must:
339    /// - Start with '<' and end with '>'
340    /// - Contain exactly one '@' character
341    /// - Have content before and after the '@' (local-part and domain)
342    ///
343    /// Per [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3):
344    /// ```text
345    /// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
346    /// ```
347    ///
348    /// **Note**: This is a basic validation - full RFC 5536 validation is more complex.
349    #[inline]
350    pub fn validate_message_id(msgid: &str) -> bool {
351        let trimmed = msgid.trim();
352
353        // Must start with < and end with >
354        if !trimmed.starts_with('<') || !trimmed.ends_with('>') {
355            return false;
356        }
357
358        // Extract content between < and >
359        let content = &trimmed[1..trimmed.len() - 1];
360
361        // Must contain exactly one @
362        let at_count = content.bytes().filter(|&b| b == b'@').count();
363        if at_count != 1 {
364            return false;
365        }
366
367        // Must have content before and after @
368        if let Some(at_pos) = content.find('@') {
369            at_pos > 0 && at_pos < content.len() - 1
370        } else {
371            false
372        }
373    }
374
375    /// Check if data contains the end-of-multiline marker (legacy method)
376    #[inline]
377    #[allow(dead_code)]
378    pub fn has_multiline_terminator(data: &[u8]) -> bool {
379        // NNTP multiline responses end with "\r\n.\r\n"
380        if data.len() < 5 {
381            return false;
382        }
383
384        // Look for the terminator at the end
385        data.ends_with(b"\r\n.\r\n") || data.ends_with(b"\n.\r\n")
386    }
387}
388
389/// Response parser for NNTP protocol
390pub struct ResponseParser;
391
392impl ResponseParser {
393    /// Check if a response starts with a success code (2xx or 3xx)
394    #[inline]
395    #[allow(dead_code)]
396    pub fn is_success_response(data: &[u8]) -> bool {
397        ResponseCode::parse(data).is_success()
398    }
399
400    /// Check if response is a greeting (200 or 201)
401    #[inline]
402    #[allow(dead_code)]
403    pub fn is_greeting(data: &[u8]) -> bool {
404        matches!(ResponseCode::parse(data), ResponseCode::Greeting(_))
405    }
406
407    /// Check if response indicates authentication is required (381 or 480)
408    #[inline]
409    #[allow(dead_code)]
410    pub fn is_auth_required(data: &[u8]) -> bool {
411        matches!(ResponseCode::parse(data), ResponseCode::AuthRequired(_))
412    }
413
414    /// Check if response indicates successful authentication (281)
415    #[inline]
416    #[allow(dead_code)]
417    pub fn is_auth_success(data: &[u8]) -> bool {
418        matches!(ResponseCode::parse(data), ResponseCode::AuthSuccess)
419    }
420
421    /// Check if response has a specific status code
422    ///
423    /// This is useful for checking specific response codes like 111 (DATE response),
424    /// or any other specific code that doesn't have a dedicated helper.
425    #[inline]
426    pub fn is_response_code(data: &[u8], code: u16) -> bool {
427        NntpResponse::parse_status_code(data) == Some(code)
428    }
429}
430
431#[cfg(test)]
432mod tests;