Skip to main content

netstring_parser/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![deny(
4    nonstandard_style,
5    rust_2018_idioms,
6    rustdoc::broken_intra_doc_links,
7    rustdoc::private_intra_doc_links
8)]
9#![forbid(non_ascii_idents, unsafe_code)]
10#![warn(
11    deprecated_in_future,
12    missing_copy_implementations,
13    missing_debug_implementations,
14    missing_docs,
15    unreachable_pub,
16    unused_import_braces,
17    unused_labels,
18    unused_lifetimes,
19    unused_qualifications,
20    unused_results
21)]
22#![allow(clippy::uninlined_format_args)]
23use std::{ops::Deref, str::Utf8Error};
24
25use thiserror::Error;
26
27/// A parser for **netstrings** (length-prefixed strings of the form `len:data,`).
28///
29/// This parser maintains an internal buffer of received bytes. You can append
30/// data to the buffer, parse complete netstrings, and discard processed data.
31#[derive(Debug)]
32pub struct NetstringParser {
33    buf: Vec<u8>,
34    len: usize,
35}
36
37impl NetstringParser {
38    /// Creates a new parser with a buffer of the given size.
39    pub fn new(buf_size: usize) -> Self {
40        Self {
41            buf: vec![0; buf_size],
42            len: 0,
43        }
44    }
45
46    /// Returns a mutable slice of the unused portion of the internal buffer.
47    ///
48    /// You can write data directly into this slice. After writing, you **must**
49    /// call [`advance`] with the number of bytes actually written
50    /// to update the parser's internal length.
51    ///
52    /// # Example
53    /// ```rust,ignore
54    /// let mut parser = NetstringParser::new(1024);
55    /// let buf = parser.available_buffer();
56    /// let bytes_written = some_io_read(buf); // hypothetical function
57    /// parser.advance(bytes_written);
58    /// ```
59    ///
60    /// [`advance`]: Self::advance
61    pub fn available_buffer(&mut self) -> &mut [u8] {
62        &mut self.buf[self.len..]
63    }
64
65    /// Advances the internal buffer position by `count` bytes.
66    ///
67    /// This method **must** be called after writing to the slice returned by
68    /// [`available_buffer`] to update the parser state.
69    ///
70    /// [`available_buffer`]: Self::available_buffer
71    pub fn advance(&mut self, count: usize) {
72        self.len += count;
73    }
74
75    /// Writes data into the parser's internal buffer.
76    ///
77    /// # Note
78    /// In most cases, you should prefer using [`available_buffer`] to get a mutable slice
79    /// and [`advance`] to indicate how many bytes were written. This avoids unnecessary
80    /// copying with the typical I/O methods.
81    ///
82    /// [`available_buffer`]: Self::available_buffer
83    /// [`advance`]: Self::advance
84    pub fn write(&mut self, data: &[u8]) -> Result<(), WriteError> {
85        let remaining = self.buf.len() - self.len;
86        if data.len() <= remaining {
87            self.buf[self.len..self.len + data.len()].copy_from_slice(data);
88            self.len += data.len();
89            Ok(())
90        } else {
91            Err(WriteError::BufferTooSmall)
92        }
93    }
94
95    /// Returns true if the internal buffer is full.
96    pub fn is_buffer_full(&self) -> bool {
97        self.len >= self.buf.len()
98    }
99
100    /// Returns true if the internal buffer is empty.
101    pub fn is_buffer_empty(&self) -> bool {
102        self.len == 0
103    }
104
105    /// Attempts to parse the next complete netstring from the buffer.
106    ///
107    /// Returns `Ok(Some(Netstring))` if a full netstring is available, `Ok(None)` if
108    /// more data is needed, or an error if the data is malformed.
109    pub fn parse_next<'a>(&'a mut self) -> Result<Option<Netstring<'a>>, NetstringError> {
110        match parse_length(&self.buf[..self.len])? {
111            None => Ok(None),
112            Some((len, rest)) => {
113                if rest.len() < len + 1 {
114                    return Ok(None); // need more data
115                }
116                if rest[len] != b',' {
117                    return Err(NetstringError::MissingComma);
118                }
119                let offset = self.len - rest.len();
120                Ok(Some(Netstring {
121                    parser: self,
122                    offset,
123                    length: len,
124                }))
125            }
126        }
127    }
128
129    /// Clears the parser, discarding all buffered data.
130    pub fn clear(&mut self) {
131        self.len = 0;
132    }
133
134    /// Discards the first `count` bytes from the buffer.
135    ///
136    /// Internal helper used by [`Netstring`] when a netstring is dropped.
137    fn discard(&mut self, count: usize) {
138        self.buf.copy_within(count..self.len, 0);
139        self.len = self.len.saturating_sub(count);
140    }
141}
142
143/// This error is returned by `Netstring::parse_next`
144#[derive(Debug, Error, Copy, Clone)]
145pub enum NetstringError {
146    /// The parsed string will be longer than the available buffer.
147    #[error("String too long")]
148    StringTooLong,
149    /// The given data is invalid.
150    #[error("Invalid data")]
151    InvalidData,
152    /// No colon found within the first 20 characters.
153    #[error("No colon found")]
154    NoColonFound,
155    /// Missing comma at end of string
156    #[error("Missing comma")]
157    MissingComma,
158    /// The length is not a decimal number
159    #[error("Invalid length")]
160    InvalidLength,
161}
162
163/// This error is returned by `NetstringParser::write`.
164#[derive(Debug, Error, Copy, Clone)]
165pub enum WriteError {
166    /// Buffer is too small for the data that is to be written
167    #[error("Buffer too small")]
168    BufferTooSmall,
169}
170
171/// A parsed netstring slice.
172///
173/// Automatically discards the underlying bytes when dropped.
174pub struct Netstring<'a> {
175    parser: &'a mut NetstringParser,
176    offset: usize,
177    length: usize,
178}
179
180impl Netstring<'_> {
181    /// Converts the netstring which consists of a slice of bytes
182    /// to a string slice.
183    pub fn to_str(&self) -> Result<&str, Utf8Error> {
184        std::str::from_utf8(self)
185    }
186    /// Get netstring as byte slice.
187    pub fn as_bytes(&self) -> &[u8] {
188        self
189    }
190}
191
192impl<'a> std::fmt::Debug for Netstring<'a> {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        f.debug_tuple("Netstring").field(&self.as_bytes()).finish()
195    }
196}
197
198impl<'a> std::fmt::Display for Netstring<'a> {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        match self.to_str() {
201            Ok(s) => f.write_str(s),
202            Err(_) => write!(f, "<invalid utf-8: {:?}>", self.as_bytes()),
203        }
204    }
205}
206
207impl<'a> Deref for Netstring<'a> {
208    type Target = [u8];
209    fn deref(&self) -> &Self::Target {
210        &self.parser.buf[self.offset..self.offset + self.length]
211    }
212}
213
214impl<'a> Drop for Netstring<'a> {
215    fn drop(&mut self) {
216        // Consume the netstring including the trailing comma
217        self.parser.discard(self.offset + self.length + 1);
218    }
219}
220
221fn parse_length(input: &[u8]) -> Result<Option<(usize, &[u8])>, NetstringError> {
222    let Some(colon_pos) = input.iter().position(|&b| b == b':') else {
223        if input.len() > 20 {
224            // It is safe to assume that if within the first 20 characters
225            // no `:` appeared that the message is invalid. This would fit
226            // message lengths up to 2^64 characters which is an unrealistic
227            // length for a netstring anyways.
228            return Err(NetstringError::NoColonFound);
229        }
230        return Ok(None);
231    };
232    let len = &input[..colon_pos];
233    let rest = &input[colon_pos + 1..];
234    let Ok(len) = std::str::from_utf8(len) else {
235        return Err(NetstringError::InvalidLength);
236    };
237    let Ok(len) = len.parse::<usize>() else {
238        return Err(NetstringError::InvalidLength);
239    };
240    Ok(Some((len, rest)))
241}