tokio_scgi/
server.rs

1#![deny(warnings)]
2
3use bytes::{Buf, BufMut, BytesMut};
4use std::{io, mem};
5use tokio_util::codec::{Decoder, Encoder};
6
7const NUL: u8 = b'\0';
8/// The maximum size in bytes of a single header name or value. This limit is far greater than the
9/// 4k-8k that is enforced by most web servers.
10const MAX_HEADER_STRING_BYTES: usize = 32 * 1024;
11/// The maximum size in bytes for all header content. This limit is far greater than the 4k-8k that
12/// is enforced by most web servers.
13const MAX_HEADER_BYTES: usize = 256 * 1024;
14
15/// A parsed SCGI request header with key/value header data, and/or bytes from the raw request body.
16#[derive(Clone, Debug, Eq, PartialEq)]
17pub enum SCGIRequest {
18    /// The Vec contains the headers. The BytesMut optionally contains raw byte data from
19    /// the request body, which may be followed by additional `BodyFragment`s in later calls.
20    /// The `Content-Length` header, required by SCGI, can be used to detect whether to wait for
21    /// additional `BodyFragment`s.
22    Request(Vec<(String, String)>, BytesMut),
23
24    /// Additional body fragment(s), used for streaming fragmented request body data. These should
25    /// only be relevant in cases where the leading `Request` value doesn't contain all of the body.
26    BodyFragment(BytesMut),
27}
28
29/// Internal state while parsing the SCGI request
30#[derive(Clone, Debug, Eq, PartialEq)]
31enum CodecState {
32    /// Getting the initial netstring size.
33    /// => HeaderKey when ':' is encountered and header_size > 0.
34    /// => ContentSeparator when ':' is encountered and header_size == 0.
35    HeaderSize,
36
37    /// Getting a header key.
38    /// => HeaderValue when NUL is encountered.
39    HeaderKey,
40
41    /// Getting a header value.
42    /// => HeaderKey when NUL is encountered and remaining_header_size > 0.
43    /// => ContentSeparator when NUL is encountered and remaining_header_size == 0.
44    HeaderValue,
45
46    /// Getting the ',' separating headers from content.
47    /// => Content when ',' is encountered.
48    ContentSeparator,
49
50    /// Forwarding any payload content, may match CONTENT_SIZE header.
51    Content,
52}
53
54/// A `Codec` implementation that parses SCGI requests for SCGI servers like backend services.
55/// The Decoder parses and returns `SCGIRequest` objects containing header/body request data from an
56/// SCGI client such as a frontend web server. The Encoder passes through the raw response to be sent
57/// back to the SCGI client.
58#[derive(Clone, Debug, Eq, PartialEq)]
59pub struct SCGICodec {
60    /// Decoder state. See `CodecState` for transition info.
61    decoder_state: CodecState,
62
63    /// The amount of unconsumed header remaining. There should be a ',' at this index.
64    header_remaining: usize,
65
66    /// The accumulated header_key, assigned when exiting HeaderKey state and cleared/consumed when
67    /// leaving HeaderValue state
68    header_key: String,
69
70    /// The accumulated headers, populated when leaving HeaderValue states and forwarded to caller
71    /// when entering Content state from last HeaderValue state. Intentionally using a `Vec` to
72    /// preserve ordering.
73    headers: Vec<(String, String)>,
74
75    /// Pointer to index where searches should begin for a character in the provided buffer. Must be
76    /// reset to 0 after consuming from the buffer.
77    next_search_index: usize,
78}
79
80/// Macro for simplifying creation of io::Errors
81macro_rules! io_err {
82    ($($arg:tt)*) => (Err(io::Error::new(io::ErrorKind::InvalidData, format!($($arg)+))))
83}
84
85impl SCGICodec {
86    /// Returns a client `SCGICodec` for accepting and parsing SCGI-format requests by SCGI servers
87    /// like backend services.
88    pub fn new() -> SCGICodec {
89        SCGICodec {
90            decoder_state: CodecState::HeaderSize,
91            header_remaining: 0,
92            header_key: String::new(),
93            headers: Vec::new(),
94            next_search_index: 0,
95        }
96    }
97
98    /// Loops and consumes all available headers in the buffer, returning a `SCGIRequest::Headers`
99    /// result if complete headers were available, or `None` if the end of the headers wasn't yet
100    /// reachable in the buffer.
101    fn consume_headers(&mut self, buf: &mut BytesMut) -> Result<Option<SCGIRequest>, io::Error> {
102        loop {
103            match self.decoder_state {
104                CodecState::ContentSeparator => {
105                    // Just consume the ',' that should be present, or complain if it isn't found
106                    if buf.len() == 0 {
107                        return Ok(None);
108                    } else if buf[0] == b',' {
109                        // Cut the ',' from the buffer, return headers and switch to content mode
110                        buf.advance(1);
111                        self.next_search_index = 0;
112                        self.decoder_state = CodecState::Content;
113                        return Ok(Some(SCGIRequest::Request(
114                            mem::replace(&mut self.headers, Vec::new()),
115                            // Include any remaining body content in this output as well.
116                            // In most cases this should effectively conclude the request.
117                            buf.split_to(buf.len()),
118                        )));
119                    } else {
120                        // Should always have the comma, missing it implies corrupt input.
121                        return io_err!("Missing ',' separating headers from content");
122                    }
123                }
124                CodecState::HeaderKey | CodecState::HeaderValue => {
125                    if let Some(end_offset) =
126                        buf[self.next_search_index..].iter().position(|b| *b == NUL)
127                    {
128                        // Consume string and trailing NUL from buffer:
129                        let bytes_with_nul = buf.split_to(self.next_search_index + end_offset + 1);
130                        self.next_search_index = 0;
131                        self.header_remaining -= bytes_with_nul.len();
132                        // Found NUL for end of a header string, consume
133                        match self.decoder_state {
134                            CodecState::HeaderKey => {
135                                // Store the header key and enter header value state.
136                                match consume_header_string(bytes_with_nul) {
137                                    Ok(key) => self.header_key = key,
138                                    Err(e) => return io_err!("Failed to parse header key: {}", e),
139                                }
140                                self.decoder_state = CodecState::HeaderValue;
141                            }
142                            CodecState::HeaderValue => {
143                                // Store the header key+value entry and enter header key OR content state.
144                                match consume_header_string(bytes_with_nul) {
145                                    Ok(val) => self.headers.push((
146                                        mem::replace(&mut self.header_key, String::new()),
147                                        val,
148                                    )),
149                                    Err(e) => {
150                                        return io_err!(
151                                            "Failed to parse value for header {}: {}",
152                                            self.header_key,
153                                            e
154                                        )
155                                    }
156                                };
157                                if self.header_remaining > 0 {
158                                    // Still in headers, set up search for next key
159                                    self.decoder_state = CodecState::HeaderKey;
160                                } else {
161                                    // Reached end of headers, but consume separator ',' before returning
162                                    self.decoder_state = CodecState::ContentSeparator;
163                                }
164                            }
165                            _ => panic!("Unexpected state {:?}", self.decoder_state),
166                        }
167                    } else {
168                        // No NUL available yet, try again
169                        self.next_search_index = buf.len();
170                        if self.next_search_index > MAX_HEADER_STRING_BYTES {
171                            // This string is getting to be way too long. Bad data? Give up.
172                            return io_err!(
173                                "Header key or value size exceeds maximum {} bytes",
174                                MAX_HEADER_STRING_BYTES
175                            );
176                        }
177                        return Ok(None);
178                    }
179                }
180                CodecState::HeaderSize | CodecState::Content => {
181                    panic!("Unexpected state {:?}", self.decoder_state);
182                }
183            }
184        }
185    }
186}
187
188/// Decodes SCGI-format requests, while forwarding through any content payload
189impl Decoder for SCGICodec {
190    type Item = SCGIRequest;
191    type Error = io::Error;
192
193    fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<SCGIRequest>, io::Error> {
194        match self.decoder_state {
195            CodecState::HeaderSize => {
196                // Search for ':' which follows the header size int
197                if let Some(end_offset) = buf[self.next_search_index..]
198                    .iter()
199                    .position(|b| *b == b':')
200                {
201                    // Consume size string and trailing ':' from start of buffer
202                    // Store the header size and enter header key state
203                    let size_with_colon = buf.split_to(self.next_search_index + end_offset + 1);
204                    // Always ensure next_search_index is updated, even if there's an error.
205                    // This avoids index bounds errors in future passes.
206                    self.next_search_index = 0;
207                    self.header_remaining = consume_header_size(size_with_colon)?;
208                    if self.header_remaining > MAX_HEADER_BYTES {
209                        // This declared size is way too long. Bad data? Give up. We just want to
210                        // avoid accumulating too much data on the header `Vec`. When we've consumed
211                        // all `header_remaining` bytes we will switch to content forwarding mode.
212                        return io_err!("Header size exceeds maximum {} bytes", MAX_HEADER_BYTES);
213                    }
214                    if self.header_remaining > 0 {
215                        // Start consuming header(s)
216                        self.decoder_state = CodecState::HeaderKey;
217                        self.consume_headers(buf)
218                    } else {
219                        // No headers, skip straight to content separator.
220                        // According to the scgi spec this shouldn't happen but let's allow it.
221                        self.decoder_state = CodecState::ContentSeparator;
222                        // Handles consuming the content separator (and emitting the empty headers)
223                        // internally.
224                        self.consume_headers(buf)
225                    }
226                } else {
227                    // No ':' yet, try again
228                    self.next_search_index = buf.len();
229                    Ok(None)
230                }
231            }
232            CodecState::HeaderKey | CodecState::HeaderValue | CodecState::ContentSeparator => {
233                // Resumable internal loop to consume all available headers in buffer
234                self.consume_headers(buf)
235            }
236            CodecState::Content => {
237                // Consume and forward whatever was received
238                if buf.is_empty() {
239                    Ok(None)
240                } else {
241                    Ok(Some(SCGIRequest::BodyFragment(buf.split_to(buf.len()))))
242                }
243            }
244        }
245    }
246}
247
248fn consume_header_size(bytes_with_colon: BytesMut) -> Result<usize, io::Error> {
249    if bytes_with_colon.len() == 1 {
250        // Got an empty size value, i.e. ':' with no preceding integers.
251        // The header size value cannot be empty, must at least provide a '0:'.
252        return io_err!("Header size cannot be an empty string");
253    } else if bytes_with_colon.len() > 2 && bytes_with_colon[0] == b'0' {
254        // Size cannot start with a '0' unless it's literally '0:' for empty headers
255        return io_err!("Header size cannot be a non-zero value with a leading '0'");
256    }
257    // Omit trailing ':' to parse buffer:
258    let size_str = String::from_utf8(bytes_with_colon[..bytes_with_colon.len() - 1].to_vec())
259        .or_else(|_| io_err!("Header size is not a UTF-8 string"))?;
260    size_str
261        .parse()
262        .or_else(|size_str| io_err!("Header size is not an integer: '{}'", size_str))
263}
264
265fn consume_header_string(bytes_with_nul: BytesMut) -> Result<String, io::Error> {
266    // Omit trailing NUL to parse buffer as string.
267    String::from_utf8(bytes_with_nul[..bytes_with_nul.len() - 1].to_vec())
268        .or_else(|_| io_err!("Header key or value is not a UTF-8 string"))
269}
270
271/// Forwards a raw response to an SCGI request back to the client.
272impl Encoder<Vec<u8>> for SCGICodec {
273    type Error = io::Error;
274
275    fn encode(&mut self, data: Vec<u8>, buf: &mut BytesMut) -> Result<(), io::Error> {
276        // Forward content (HTTP response, typically?) as-is
277        buf.reserve(data.len());
278        buf.put_slice(data.as_slice());
279        Ok(())
280    }
281}