nntp_proxy/protocol/response.rs
1//! NNTP Response Parsing and Handling
2//!
3//! This module implements efficient parsing of NNTP server responses according to
4//! [RFC 3977](https://datatracker.ietf.org/doc/html/rfc3977) with optimizations
5//! for high-throughput proxy use.
6//!
7//! # NNTP Protocol References
8//!
9//! - **[RFC 3977 §3.2]** - Response format and status codes
10//! - **[RFC 3977 §3.4.1]** - Multiline data blocks
11//! - **[RFC 5536 §3.1.3]** - Message-ID format specification
12//!
13//! [RFC 3977 §3.2]: https://datatracker.ietf.org/doc/html/rfc3977#section-3.2
14//! [RFC 3977 §3.4.1]: https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1
15//! [RFC 5536 §3.1.3]: https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3
16//!
17//! # Response Format
18//!
19//! Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2):
20//! ```text
21//! response = status-line [CRLF multiline-data]
22//! status-line = status-code SP status-text CRLF
23//! status-code = 3DIGIT
24//! ```
25//!
26//! # Multiline Responses
27//!
28//! Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
29//! ```text
30//! Multiline responses end with a line containing a single period:
31//! CRLF "." CRLF
32//! ```
33
34use crate::types::MessageId;
35
36/// Categorized NNTP response code for type-safe handling
37///
38/// This enum categorizes NNTP response codes based on their semantics and
39/// handling requirements per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2).
40///
41/// # Response Code Ranges
42///
43/// Per [RFC 3977 §3.2.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2.1):
44/// - **1xx**: Informational (multiline data follows)
45/// - **2xx**: Success (may be multiline)
46/// - **3xx**: Success so far, further input expected
47/// - **4xx**: Temporary failure
48/// - **5xx**: Permanent failure
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
50pub enum ResponseCode {
51 /// Server greeting - [RFC 3977 §5.1](https://datatracker.ietf.org/doc/html/rfc3977#section-5.1)
52 /// - 200: Posting allowed
53 /// - 201: No posting allowed
54 Greeting(u16),
55
56 /// Disconnect/goodbye - [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4)
57 /// - 205: Connection closing
58 Disconnect,
59
60 /// Authentication required - [RFC 4643 §2.3](https://datatracker.ietf.org/doc/html/rfc4643#section-2.3)
61 /// - 381: Password required
62 /// - 480: Authentication required
63 AuthRequired(u16),
64
65 /// Authentication successful - [RFC 4643 §2.5.1](https://datatracker.ietf.org/doc/html/rfc4643#section-2.5.1)
66 /// - 281: Authentication accepted
67 AuthSuccess,
68
69 /// Multiline data response
70 /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
71 /// - All 1xx codes (100-199)
72 /// - Specific 2xx codes: 215, 220, 221, 222, 224, 225, 230, 231, 282
73 MultilineData(u16),
74
75 /// Single-line response (everything else)
76 SingleLine(u16),
77
78 /// Invalid or unparseable response
79 Invalid,
80}
81
82impl ResponseCode {
83 /// Parse response data into a categorized response code
84 ///
85 /// Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2),
86 /// responses start with a 3-digit status code.
87 ///
88 /// **Optimization**: Direct byte-to-digit conversion avoids UTF-8 overhead.
89 #[inline]
90 pub fn parse(data: &[u8]) -> Self {
91 let code = match NntpResponse::parse_status_code(data) {
92 Some(c) => c,
93 None => return Self::Invalid,
94 };
95
96 match code {
97 // [RFC 3977 §5.1](https://datatracker.ietf.org/doc/html/rfc3977#section-5.1)
98 200 | 201 => Self::Greeting(code),
99
100 // [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4)
101 205 => Self::Disconnect,
102
103 // [RFC 4643 §2.5.1](https://datatracker.ietf.org/doc/html/rfc4643#section-2.5.1)
104 281 => Self::AuthSuccess,
105
106 // [RFC 4643 §2.3](https://datatracker.ietf.org/doc/html/rfc4643#section-2.3)
107 381 | 480 => Self::AuthRequired(code),
108
109 // Multiline responses per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1)
110 // All 1xx are informational multiline
111 100..=199 => Self::MultilineData(code),
112 // Specific 2xx multiline responses
113 215 | 220 | 221 | 222 | 224 | 225 | 230 | 231 | 282 => Self::MultilineData(code),
114
115 // Everything else is a single-line response
116 _ => Self::SingleLine(code),
117 }
118 }
119
120 /// Check if this response type is multiline
121 ///
122 /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
123 /// multiline responses require special handling with terminator detection.
124 #[inline]
125 pub fn is_multiline(&self) -> bool {
126 matches!(self, Self::MultilineData(_))
127 }
128
129 /// Get the numeric status code if available
130 #[inline]
131 pub fn status_code(&self) -> Option<u16> {
132 match self {
133 Self::Greeting(c)
134 | Self::AuthRequired(c)
135 | Self::MultilineData(c)
136 | Self::SingleLine(c) => Some(*c),
137 Self::Disconnect => Some(205),
138 Self::AuthSuccess => Some(281),
139 Self::Invalid => None,
140 }
141 }
142
143 /// Check if this is a success response (2xx or 3xx)
144 ///
145 /// Per [RFC 3977 §3.2.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2.1):
146 /// - 2xx: Success
147 /// - 3xx: Success so far, send more input
148 #[inline]
149 pub fn is_success(&self) -> bool {
150 self.status_code()
151 .is_some_and(|code| (200..400).contains(&code))
152 }
153}
154
155/// Represents a parsed NNTP response
156#[derive(Debug, Clone, PartialEq)]
157pub struct NntpResponse {
158 /// Status code (e.g., 200, 381, 500)
159 pub status_code: u16,
160 /// Whether this is a multiline response
161 pub is_multiline: bool,
162 /// Complete response data including status line
163 pub data: Vec<u8>,
164}
165
166impl NntpResponse {
167 /// Parse a status code from response data
168 ///
169 /// Per [RFC 3977 §3.2](https://datatracker.ietf.org/doc/html/rfc3977#section-3.2),
170 /// responses begin with a 3-digit status code (ASCII digits '0'-'9').
171 ///
172 /// **Optimization**: Direct byte-to-digit conversion without UTF-8 validation.
173 /// Status codes are guaranteed to be ASCII digits per the RFC.
174 #[inline]
175 pub fn parse_status_code(data: &[u8]) -> Option<u16> {
176 if data.len() < 3 {
177 return None;
178 }
179
180 // Fast path: Direct ASCII digit conversion without UTF-8 overhead
181 // Per RFC 3977, status codes are exactly 3 ASCII digits
182 let d0 = data[0].wrapping_sub(b'0');
183 let d1 = data[1].wrapping_sub(b'0');
184 let d2 = data[2].wrapping_sub(b'0');
185
186 // Validate all three are digits (0-9)
187 if d0 > 9 || d1 > 9 || d2 > 9 {
188 return None;
189 }
190
191 // Combine into u16: d0*100 + d1*10 + d2
192 Some((d0 as u16) * 100 + (d1 as u16) * 10 + (d2 as u16))
193 }
194
195 /// Check if a response indicates a multiline response
196 ///
197 /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
198 /// certain status codes indicate multiline data follows.
199 ///
200 /// # Multiline Response Codes
201 /// - **1xx**: All informational responses (100-199)
202 /// - **2xx**: Specific codes - 215, 220, 221, 222, 224, 225, 230, 231, 282
203 #[inline]
204 pub fn is_multiline_response(status_code: u16) -> bool {
205 match status_code {
206 100..=199 => true, // All 1xx are multiline
207 215 | 220 | 221 | 222 | 224 | 225 | 230 | 231 | 282 => true, // Specific 2xx codes
208 _ => false,
209 }
210 }
211
212 /// Check if data ends with the NNTP multiline terminator
213 ///
214 /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1):
215 /// ```text
216 /// Multiline blocks are terminated by a line containing only a period:
217 /// CRLF "." CRLF
218 /// Which appears in the data stream as: \r\n.\r\n
219 /// ```
220 ///
221 /// **Optimization**: Single suffix check, no scanning.
222 #[inline]
223 pub fn has_terminator_at_end(data: &[u8]) -> bool {
224 let n = data.len();
225 // Only check for proper RFC 3977 terminator: \r\n.\r\n
226 n >= 5 && data[n - 5..n] == *b"\r\n.\r\n"
227 }
228
229 /// Find the position of the NNTP multiline terminator in data
230 ///
231 /// Returns the position AFTER the terminator (exclusive end), or None if not found.
232 /// This handles the case where extra data appears after the terminator in the same chunk.
233 ///
234 /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
235 /// the terminator is exactly "\r\n.\r\n" (CRLF, dot, CRLF).
236 ///
237 /// **Optimization**: Uses `memchr::memchr_iter()` to find '\r' bytes (SIMD-accelerated),
238 /// then validates the full 5-byte pattern. This eliminates the need to create new slices
239 /// on each iteration (which the manual loop approach requires with `&data[pos..]`).
240 ///
241 /// Benchmarks show this is **72% faster for small responses** (37ns → 13ns) and
242 /// **64% faster for medium responses** (109ns → 40ns) compared to the manual loop
243 /// that creates a new slice on each iteration.
244 #[inline]
245 pub fn find_terminator_end(data: &[u8]) -> Option<usize> {
246 let n = data.len();
247 if n < 5 {
248 return None;
249 }
250
251 // Use memchr::Memchr iterator to avoid repeated slice creation
252 for r_pos in memchr::memchr_iter(b'\r', data) {
253 // Not enough space for full terminator
254 if r_pos + 5 > n {
255 return None;
256 }
257
258 // Check for full terminator pattern
259 if &data[r_pos..r_pos + 5] == b"\r\n.\r\n" {
260 return Some(r_pos + 5);
261 }
262 }
263
264 None
265 }
266
267 /// Check if a terminator spans across a boundary between tail and current chunk
268 ///
269 /// This handles the case where a multiline terminator is split across two read chunks.
270 /// For example: previous chunk ends with "\r\n." and current starts with "\r\n"
271 ///
272 /// Per [RFC 3977 §3.4.1](https://datatracker.ietf.org/doc/html/rfc3977#section-3.4.1),
273 /// the terminator is exactly "\r\n.\r\n" (CRLF, dot, CRLF).
274 #[inline]
275 pub fn has_spanning_terminator(
276 tail: &[u8],
277 tail_len: usize,
278 current: &[u8],
279 current_len: usize,
280 ) -> bool {
281 // Only check if we have a tail and current chunk is small enough for spanning
282 if tail_len < 2 || !(1..=4).contains(¤t_len) {
283 return false;
284 }
285
286 // Build combined view: tail + start of current chunk
287 let mut check_buf = [0u8; 9]; // max: 4 tail + 5 current bytes
288 check_buf[..tail_len].copy_from_slice(&tail[..tail_len]);
289 let curr_copy = current_len.min(5);
290 check_buf[tail_len..tail_len + curr_copy].copy_from_slice(¤t[..curr_copy]);
291 let total = tail_len + curr_copy;
292
293 // RFC 3977 requires exactly \r\n.\r\n (5 bytes)
294 total >= 5 && check_buf[total - 5..total] == *b"\r\n.\r\n"
295 }
296
297 /// Check if response is a disconnect/goodbye (205)
298 ///
299 /// Per [RFC 3977 §5.4](https://datatracker.ietf.org/doc/html/rfc3977#section-5.4),
300 /// code 205 indicates "Connection closing" / "Goodbye".
301 ///
302 /// **Optimization**: Direct byte prefix check, no parsing.
303 #[inline]
304 pub fn is_disconnect(data: &[u8]) -> bool {
305 data.len() >= 3 && data.starts_with(b"205")
306 }
307
308 /// Extract message-ID from command arguments using fast byte searching
309 ///
310 /// Per [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3),
311 /// message-IDs have the format `<local-part@domain>`.
312 ///
313 /// Examples:
314 /// - `<article123@news.example.com>`
315 /// - `<20231014.123456@server.domain>`
316 ///
317 /// **Optimization**: Uses `memchr` for fast '<' and '>' detection.
318 #[inline]
319 pub fn extract_message_id(command: &str) -> Option<MessageId<'_>> {
320 let trimmed = command.trim();
321 let bytes = trimmed.as_bytes();
322
323 // Find opening '<' using fast memchr
324 let start = memchr::memchr(b'<', bytes)?;
325
326 // Find closing '>' after the '<'
327 // Since end is relative to &bytes[start..], the actual position is start + end
328 let end = memchr::memchr(b'>', &bytes[start + 1..])?;
329 let msgid_end = start + end + 2; // +1 for the slice offset, +1 to include '>'
330
331 // Safety: Message-IDs are ASCII, so no need for is_char_boundary checks
332 // We already know msgid_end is valid since memchr found '>' at that position
333 MessageId::from_borrowed(&trimmed[start..msgid_end]).ok()
334 }
335
336 /// Validate message-ID format according to [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3)
337 ///
338 /// A valid message-ID must:
339 /// - Start with '<' and end with '>'
340 /// - Contain exactly one '@' character
341 /// - Have content before and after the '@' (local-part and domain)
342 ///
343 /// Per [RFC 5536 §3.1.3](https://datatracker.ietf.org/doc/html/rfc5536#section-3.1.3):
344 /// ```text
345 /// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
346 /// ```
347 ///
348 /// **Note**: This is a basic validation - full RFC 5536 validation is more complex.
349 #[inline]
350 pub fn validate_message_id(msgid: &str) -> bool {
351 let trimmed = msgid.trim();
352
353 // Must start with < and end with >
354 if !trimmed.starts_with('<') || !trimmed.ends_with('>') {
355 return false;
356 }
357
358 // Extract content between < and >
359 let content = &trimmed[1..trimmed.len() - 1];
360
361 // Must contain exactly one @
362 let at_count = content.bytes().filter(|&b| b == b'@').count();
363 if at_count != 1 {
364 return false;
365 }
366
367 // Must have content before and after @
368 if let Some(at_pos) = content.find('@') {
369 at_pos > 0 && at_pos < content.len() - 1
370 } else {
371 false
372 }
373 }
374
375 /// Check if data contains the end-of-multiline marker (legacy method)
376 #[inline]
377 #[allow(dead_code)]
378 pub fn has_multiline_terminator(data: &[u8]) -> bool {
379 // NNTP multiline responses end with "\r\n.\r\n"
380 if data.len() < 5 {
381 return false;
382 }
383
384 // Look for the terminator at the end
385 data.ends_with(b"\r\n.\r\n") || data.ends_with(b"\n.\r\n")
386 }
387}
388
389/// Response parser for NNTP protocol
390pub struct ResponseParser;
391
392impl ResponseParser {
393 /// Check if a response starts with a success code (2xx or 3xx)
394 #[inline]
395 #[allow(dead_code)]
396 pub fn is_success_response(data: &[u8]) -> bool {
397 ResponseCode::parse(data).is_success()
398 }
399
400 /// Check if response is a greeting (200 or 201)
401 #[inline]
402 #[allow(dead_code)]
403 pub fn is_greeting(data: &[u8]) -> bool {
404 matches!(ResponseCode::parse(data), ResponseCode::Greeting(_))
405 }
406
407 /// Check if response indicates authentication is required (381 or 480)
408 #[inline]
409 #[allow(dead_code)]
410 pub fn is_auth_required(data: &[u8]) -> bool {
411 matches!(ResponseCode::parse(data), ResponseCode::AuthRequired(_))
412 }
413
414 /// Check if response indicates successful authentication (281)
415 #[inline]
416 #[allow(dead_code)]
417 pub fn is_auth_success(data: &[u8]) -> bool {
418 matches!(ResponseCode::parse(data), ResponseCode::AuthSuccess)
419 }
420
421 /// Check if response has a specific status code
422 ///
423 /// This is useful for checking specific response codes like 111 (DATE response),
424 /// or any other specific code that doesn't have a dedicated helper.
425 #[inline]
426 pub fn is_response_code(data: &[u8], code: u16) -> bool {
427 NntpResponse::parse_status_code(data) == Some(code)
428 }
429}
430
431#[cfg(test)]
432mod tests;