tokio_scgi/server.rs
1#![deny(warnings)]
2
3use bytes::{Buf, BufMut, BytesMut};
4use std::{io, mem};
5use tokio_util::codec::{Decoder, Encoder};
6
7const NUL: u8 = b'\0';
8/// The maximum size in bytes of a single header name or value. This limit is far greater than the
9/// 4k-8k that is enforced by most web servers.
10const MAX_HEADER_STRING_BYTES: usize = 32 * 1024;
11/// The maximum size in bytes for all header content. This limit is far greater than the 4k-8k that
12/// is enforced by most web servers.
13const MAX_HEADER_BYTES: usize = 256 * 1024;
14
15/// A parsed SCGI request header with key/value header data, and/or bytes from the raw request body.
16#[derive(Clone, Debug, Eq, PartialEq)]
17pub enum SCGIRequest {
18 /// The Vec contains the headers. The BytesMut optionally contains raw byte data from
19 /// the request body, which may be followed by additional `BodyFragment`s in later calls.
20 /// The `Content-Length` header, required by SCGI, can be used to detect whether to wait for
21 /// additional `BodyFragment`s.
22 Request(Vec<(String, String)>, BytesMut),
23
24 /// Additional body fragment(s), used for streaming fragmented request body data. These should
25 /// only be relevant in cases where the leading `Request` value doesn't contain all of the body.
26 BodyFragment(BytesMut),
27}
28
29/// Internal state while parsing the SCGI request
30#[derive(Clone, Debug, Eq, PartialEq)]
31enum CodecState {
32 /// Getting the initial netstring size.
33 /// => HeaderKey when ':' is encountered and header_size > 0.
34 /// => ContentSeparator when ':' is encountered and header_size == 0.
35 HeaderSize,
36
37 /// Getting a header key.
38 /// => HeaderValue when NUL is encountered.
39 HeaderKey,
40
41 /// Getting a header value.
42 /// => HeaderKey when NUL is encountered and remaining_header_size > 0.
43 /// => ContentSeparator when NUL is encountered and remaining_header_size == 0.
44 HeaderValue,
45
46 /// Getting the ',' separating headers from content.
47 /// => Content when ',' is encountered.
48 ContentSeparator,
49
50 /// Forwarding any payload content, may match CONTENT_SIZE header.
51 Content,
52}
53
54/// A `Codec` implementation that parses SCGI requests for SCGI servers like backend services.
55/// The Decoder parses and returns `SCGIRequest` objects containing header/body request data from an
56/// SCGI client such as a frontend web server. The Encoder passes through the raw response to be sent
57/// back to the SCGI client.
58#[derive(Clone, Debug, Eq, PartialEq)]
59pub struct SCGICodec {
60 /// Decoder state. See `CodecState` for transition info.
61 decoder_state: CodecState,
62
63 /// The amount of unconsumed header remaining. There should be a ',' at this index.
64 header_remaining: usize,
65
66 /// The accumulated header_key, assigned when exiting HeaderKey state and cleared/consumed when
67 /// leaving HeaderValue state
68 header_key: String,
69
70 /// The accumulated headers, populated when leaving HeaderValue states and forwarded to caller
71 /// when entering Content state from last HeaderValue state. Intentionally using a `Vec` to
72 /// preserve ordering.
73 headers: Vec<(String, String)>,
74
75 /// Pointer to index where searches should begin for a character in the provided buffer. Must be
76 /// reset to 0 after consuming from the buffer.
77 next_search_index: usize,
78}
79
80/// Macro for simplifying creation of io::Errors
81macro_rules! io_err {
82 ($($arg:tt)*) => (Err(io::Error::new(io::ErrorKind::InvalidData, format!($($arg)+))))
83}
84
85impl SCGICodec {
86 /// Returns a client `SCGICodec` for accepting and parsing SCGI-format requests by SCGI servers
87 /// like backend services.
88 pub fn new() -> SCGICodec {
89 SCGICodec {
90 decoder_state: CodecState::HeaderSize,
91 header_remaining: 0,
92 header_key: String::new(),
93 headers: Vec::new(),
94 next_search_index: 0,
95 }
96 }
97
98 /// Loops and consumes all available headers in the buffer, returning a `SCGIRequest::Headers`
99 /// result if complete headers were available, or `None` if the end of the headers wasn't yet
100 /// reachable in the buffer.
101 fn consume_headers(&mut self, buf: &mut BytesMut) -> Result<Option<SCGIRequest>, io::Error> {
102 loop {
103 match self.decoder_state {
104 CodecState::ContentSeparator => {
105 // Just consume the ',' that should be present, or complain if it isn't found
106 if buf.len() == 0 {
107 return Ok(None);
108 } else if buf[0] == b',' {
109 // Cut the ',' from the buffer, return headers and switch to content mode
110 buf.advance(1);
111 self.next_search_index = 0;
112 self.decoder_state = CodecState::Content;
113 return Ok(Some(SCGIRequest::Request(
114 mem::replace(&mut self.headers, Vec::new()),
115 // Include any remaining body content in this output as well.
116 // In most cases this should effectively conclude the request.
117 buf.split_to(buf.len()),
118 )));
119 } else {
120 // Should always have the comma, missing it implies corrupt input.
121 return io_err!("Missing ',' separating headers from content");
122 }
123 }
124 CodecState::HeaderKey | CodecState::HeaderValue => {
125 if let Some(end_offset) =
126 buf[self.next_search_index..].iter().position(|b| *b == NUL)
127 {
128 // Consume string and trailing NUL from buffer:
129 let bytes_with_nul = buf.split_to(self.next_search_index + end_offset + 1);
130 self.next_search_index = 0;
131 self.header_remaining -= bytes_with_nul.len();
132 // Found NUL for end of a header string, consume
133 match self.decoder_state {
134 CodecState::HeaderKey => {
135 // Store the header key and enter header value state.
136 match consume_header_string(bytes_with_nul) {
137 Ok(key) => self.header_key = key,
138 Err(e) => return io_err!("Failed to parse header key: {}", e),
139 }
140 self.decoder_state = CodecState::HeaderValue;
141 }
142 CodecState::HeaderValue => {
143 // Store the header key+value entry and enter header key OR content state.
144 match consume_header_string(bytes_with_nul) {
145 Ok(val) => self.headers.push((
146 mem::replace(&mut self.header_key, String::new()),
147 val,
148 )),
149 Err(e) => {
150 return io_err!(
151 "Failed to parse value for header {}: {}",
152 self.header_key,
153 e
154 )
155 }
156 };
157 if self.header_remaining > 0 {
158 // Still in headers, set up search for next key
159 self.decoder_state = CodecState::HeaderKey;
160 } else {
161 // Reached end of headers, but consume separator ',' before returning
162 self.decoder_state = CodecState::ContentSeparator;
163 }
164 }
165 _ => panic!("Unexpected state {:?}", self.decoder_state),
166 }
167 } else {
168 // No NUL available yet, try again
169 self.next_search_index = buf.len();
170 if self.next_search_index > MAX_HEADER_STRING_BYTES {
171 // This string is getting to be way too long. Bad data? Give up.
172 return io_err!(
173 "Header key or value size exceeds maximum {} bytes",
174 MAX_HEADER_STRING_BYTES
175 );
176 }
177 return Ok(None);
178 }
179 }
180 CodecState::HeaderSize | CodecState::Content => {
181 panic!("Unexpected state {:?}", self.decoder_state);
182 }
183 }
184 }
185 }
186}
187
188/// Decodes SCGI-format requests, while forwarding through any content payload
189impl Decoder for SCGICodec {
190 type Item = SCGIRequest;
191 type Error = io::Error;
192
193 fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<SCGIRequest>, io::Error> {
194 match self.decoder_state {
195 CodecState::HeaderSize => {
196 // Search for ':' which follows the header size int
197 if let Some(end_offset) = buf[self.next_search_index..]
198 .iter()
199 .position(|b| *b == b':')
200 {
201 // Consume size string and trailing ':' from start of buffer
202 // Store the header size and enter header key state
203 let size_with_colon = buf.split_to(self.next_search_index + end_offset + 1);
204 // Always ensure next_search_index is updated, even if there's an error.
205 // This avoids index bounds errors in future passes.
206 self.next_search_index = 0;
207 self.header_remaining = consume_header_size(size_with_colon)?;
208 if self.header_remaining > MAX_HEADER_BYTES {
209 // This declared size is way too long. Bad data? Give up. We just want to
210 // avoid accumulating too much data on the header `Vec`. When we've consumed
211 // all `header_remaining` bytes we will switch to content forwarding mode.
212 return io_err!("Header size exceeds maximum {} bytes", MAX_HEADER_BYTES);
213 }
214 if self.header_remaining > 0 {
215 // Start consuming header(s)
216 self.decoder_state = CodecState::HeaderKey;
217 self.consume_headers(buf)
218 } else {
219 // No headers, skip straight to content separator.
220 // According to the scgi spec this shouldn't happen but let's allow it.
221 self.decoder_state = CodecState::ContentSeparator;
222 // Handles consuming the content separator (and emitting the empty headers)
223 // internally.
224 self.consume_headers(buf)
225 }
226 } else {
227 // No ':' yet, try again
228 self.next_search_index = buf.len();
229 Ok(None)
230 }
231 }
232 CodecState::HeaderKey | CodecState::HeaderValue | CodecState::ContentSeparator => {
233 // Resumable internal loop to consume all available headers in buffer
234 self.consume_headers(buf)
235 }
236 CodecState::Content => {
237 // Consume and forward whatever was received
238 if buf.is_empty() {
239 Ok(None)
240 } else {
241 Ok(Some(SCGIRequest::BodyFragment(buf.split_to(buf.len()))))
242 }
243 }
244 }
245 }
246}
247
248fn consume_header_size(bytes_with_colon: BytesMut) -> Result<usize, io::Error> {
249 if bytes_with_colon.len() == 1 {
250 // Got an empty size value, i.e. ':' with no preceding integers.
251 // The header size value cannot be empty, must at least provide a '0:'.
252 return io_err!("Header size cannot be an empty string");
253 } else if bytes_with_colon.len() > 2 && bytes_with_colon[0] == b'0' {
254 // Size cannot start with a '0' unless it's literally '0:' for empty headers
255 return io_err!("Header size cannot be a non-zero value with a leading '0'");
256 }
257 // Omit trailing ':' to parse buffer:
258 let size_str = String::from_utf8(bytes_with_colon[..bytes_with_colon.len() - 1].to_vec())
259 .or_else(|_| io_err!("Header size is not a UTF-8 string"))?;
260 size_str
261 .parse()
262 .or_else(|size_str| io_err!("Header size is not an integer: '{}'", size_str))
263}
264
265fn consume_header_string(bytes_with_nul: BytesMut) -> Result<String, io::Error> {
266 // Omit trailing NUL to parse buffer as string.
267 String::from_utf8(bytes_with_nul[..bytes_with_nul.len() - 1].to_vec())
268 .or_else(|_| io_err!("Header key or value is not a UTF-8 string"))
269}
270
271/// Forwards a raw response to an SCGI request back to the client.
272impl Encoder<Vec<u8>> for SCGICodec {
273 type Error = io::Error;
274
275 fn encode(&mut self, data: Vec<u8>, buf: &mut BytesMut) -> Result<(), io::Error> {
276 // Forward content (HTTP response, typically?) as-is
277 buf.reserve(data.len());
278 buf.put_slice(data.as_slice());
279 Ok(())
280 }
281}