untrusted/untrusted.rs
1// Copyright 2015-2016 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15//! untrusted.rs: Safe, fast, zero-panic, zero-crashing, zero-allocation
16//! parsing of untrusted inputs in Rust.
17//!
18//! <code>git clone https://github.com/briansmith/untrusted</code>
19//!
20//! untrusted.rs goes beyond Rust's normal safety guarantees by also
21//! guaranteeing that parsing will be panic-free, as long as
22//! `untrusted::Input::as_slice_less_safe()` is not used. It avoids copying
23//! data and heap allocation and strives to prevent common pitfalls such as
24//! accidentally parsing input bytes multiple times. In order to meet these
25//! goals, untrusted.rs is limited in functionality such that it works best for
26//! input languages with a small fixed amount of lookahead such as ASN.1, TLS,
27//! TCP/IP, and many other networking, IPC, and related protocols. Languages
28//! that require more lookahead and/or backtracking require some significant
29//! contortions to parse using this framework. It would not be realistic to use
30//! it for parsing programming language code, for example.
31//!
32//! The overall pattern for using untrusted.rs is:
33//!
34//! 1. Write a recursive-descent-style parser for the input language, where the
35//! input data is given as a `&mut untrusted::Reader` parameter to each
36//! function. Each function should have a return type of `Result<V, E>` for
37//! some value type `V` and some error type `E`, either or both of which may
38//! be `()`. Functions for parsing the lowest-level language constructs
39//! should be defined. Those lowest-level functions will parse their inputs
40//! using `::read_byte()`, `Reader::peek()`, and similar functions.
41//! Higher-level language constructs are then parsed by calling the
42//! lower-level functions in sequence.
43//!
44//! 2. Wrap the top-most functions of your recursive-descent parser in
45//! functions that take their input data as an `untrusted::Input`. The
46//! wrapper functions should call the `Input`'s `read_all` (or a variant
47//! thereof) method. The wrapper functions are the only ones that should be
48//! exposed outside the parser's module.
49//!
50//! 3. After receiving the input data to parse, wrap it in an `untrusted::Input`
51//! using `untrusted::Input::from()` as early as possible. Pass the
52//! `untrusted::Input` to the wrapper functions when they need to be parsed.
53//!
54//! In general parsers built using `untrusted::Reader` do not need to explicitly
55//! check for end-of-input unless they are parsing optional constructs, because
56//! `Reader::read_byte()` will return `Err(EndOfInput)` on end-of-input.
57//! Similarly, parsers using `untrusted::Reader` generally don't need to check
58//! for extra junk at the end of the input as long as the parser's API uses the
59//! pattern described above, as `read_all` and its variants automatically check
60//! for trailing junk. `Reader::skip_to_end()` must be used when any remaining
61//! unread input should be ignored without triggering an error.
62//!
63//! untrusted.rs works best when all processing of the input data is done
64//! through the `untrusted::Input` and `untrusted::Reader` types. In
65//! particular, avoid trying to parse input data using functions that take
66//! byte slices. However, when you need to access a part of the input data as
67//! a slice to use a function that isn't written using untrusted.rs,
68//! `Input::as_slice_less_safe()` can be used.
69//!
70//! It is recommend to use `use untrusted;` and then `untrusted::Input`,
71//! `untrusted::Reader`, etc., instead of using `use untrusted::*`. Qualifying
72//! the names with `untrusted` helps remind the reader of the code that it is
73//! dealing with *untrusted* input.
74//!
75//! # Examples
76//!
77//! [*ring*](https://github.com/briansmith/ring)'s parser for the subset of
78//! ASN.1 DER it needs to understand,
79//! [`ring::der`](https://github.com/briansmith/ring/blob/master/src/der.rs),
80//! is built on top of untrusted.rs. *ring* also uses untrusted.rs to parse ECC
81//! public keys, RSA PKCS#1 1.5 padding, and for all other parsing it does.
82//!
83//! All of [webpki](https://github.com/briansmith/webpki)'s parsing of X.509
84//! certificates (also ASN.1 DER) is done using untrusted.rs.
85
86#![doc(html_root_url = "https://briansmith.org/rustdoc/")]
87// `#[derive(...)]` uses `#[allow(unused_qualifications)]` internally.
88#![deny(unused_qualifications)]
89#![forbid(
90 anonymous_parameters,
91 box_pointers,
92 legacy_directory_ownership,
93 missing_docs,
94 trivial_casts,
95 trivial_numeric_casts,
96 unsafe_code,
97 unstable_features,
98 unused_extern_crates,
99 unused_import_braces,
100 unused_results,
101 variant_size_differences,
102 warnings
103)]
104#![no_std]
105
106/// A wrapper around `&'a [u8]` that helps in writing panic-free code.
107///
108/// No methods of `Input` will ever panic.
109#[derive(Clone, Copy, Debug, Eq)]
110pub struct Input<'a> {
111 value: no_panic::Slice<'a>,
112}
113
114impl<'a> Input<'a> {
115 /// Construct a new `Input` for the given input `bytes`.
116 pub const fn from(bytes: &'a [u8]) -> Self {
117 // This limit is important for avoiding integer overflow. In particular,
118 // `Reader` assumes that an `i + 1 > i` if `input.value.get(i)` does
119 // not return `None`. According to the Rust language reference, the
120 // maximum object size is `core::isize::MAX`, and in practice it is
121 // impossible to create an object of size `core::usize::MAX` or larger.
122 Self {
123 value: no_panic::Slice::new(bytes),
124 }
125 }
126
127 /// Returns `true` if the input is empty and false otherwise.
128 #[inline]
129 pub fn is_empty(&self) -> bool { self.value.is_empty() }
130
131 /// Returns the length of the `Input`.
132 #[inline]
133 pub fn len(&self) -> usize { self.value.len() }
134
135 /// Calls `read` with the given input as a `Reader`, ensuring that `read`
136 /// consumed the entire input. If `read` does not consume the entire input,
137 /// `incomplete_read` is returned.
138 pub fn read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E>
139 where
140 F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
141 {
142 let mut input = Reader::new(*self);
143 let result = read(&mut input)?;
144 if input.at_end() {
145 Ok(result)
146 } else {
147 Err(incomplete_read)
148 }
149 }
150
151 /// Access the input as a slice so it can be processed by functions that
152 /// are not written using the Input/Reader framework.
153 #[inline]
154 pub fn as_slice_less_safe(&self) -> &'a [u8] { self.value.as_slice_less_safe() }
155}
156
157impl<'a> From<&'a [u8]> for Input<'a> {
158 #[inline]
159 fn from(value: &'a [u8]) -> Self { Self { value: no_panic::Slice::new(value)} }
160}
161
162// #[derive(PartialEq)] would result in lifetime bounds that are
163// unnecessarily restrictive; see
164// https://github.com/rust-lang/rust/issues/26925.
165impl PartialEq<Input<'_>> for Input<'_> {
166 #[inline]
167 fn eq(&self, other: &Input) -> bool {
168 self.as_slice_less_safe() == other.as_slice_less_safe()
169 }
170}
171
172impl PartialEq<[u8]> for Input<'_> {
173 #[inline]
174 fn eq(&self, other: &[u8]) -> bool { self.as_slice_less_safe() == other }
175}
176
177impl PartialEq<Input<'_>> for [u8] {
178 #[inline]
179 fn eq(&self, other: &Input) -> bool { other.as_slice_less_safe() == self }
180}
181
182/// Calls `read` with the given input as a `Reader`, ensuring that `read`
183/// consumed the entire input. When `input` is `None`, `read` will be
184/// called with `None`.
185pub fn read_all_optional<'a, F, R, E>(
186 input: Option<Input<'a>>, incomplete_read: E, read: F,
187) -> Result<R, E>
188where
189 F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,
190{
191 match input {
192 Some(input) => {
193 let mut input = Reader::new(input);
194 let result = read(Some(&mut input))?;
195 if input.at_end() {
196 Ok(result)
197 } else {
198 Err(incomplete_read)
199 }
200 },
201 None => read(None),
202 }
203}
204
205/// A read-only, forward-only* cursor into the data in an `Input`.
206///
207/// Using `Reader` to parse input helps to ensure that no byte of the input
208/// will be accidentally processed more than once. Using `Reader` in
209/// conjunction with `read_all` and `read_all_optional` helps ensure that no
210/// byte of the input is accidentally left unprocessed. The methods of `Reader`
211/// never panic, so `Reader` also assists the writing of panic-free code.
212///
213/// \* `Reader` is not strictly forward-only because of the method
214/// `get_input_between_marks`, which is provided mainly to support calculating
215/// digests over parsed data.
216#[derive(Debug)]
217pub struct Reader<'a> {
218 input: no_panic::Slice<'a>,
219 i: usize,
220}
221
222/// An index into the already-parsed input of a `Reader`.
223pub struct Mark {
224 i: usize,
225}
226
227impl<'a> Reader<'a> {
228 /// Construct a new Reader for the given input. Use `read_all` or
229 /// `read_all_optional` instead of `Reader::new` whenever possible.
230 #[inline]
231 pub fn new(input: Input<'a>) -> Self {
232 Self {
233 input: input.value,
234 i: 0,
235 }
236 }
237
238 /// Returns `true` if the reader is at the end of the input, and `false`
239 /// otherwise.
240 #[inline]
241 pub fn at_end(&self) -> bool { self.i == self.input.len() }
242
243 /// Returns an `Input` for already-parsed input that has had its boundaries
244 /// marked using `mark`.
245 #[inline]
246 pub fn get_input_between_marks(
247 &self, mark1: Mark, mark2: Mark,
248 ) -> Result<Input<'a>, EndOfInput> {
249 self.input
250 .subslice(mark1.i..mark2.i)
251 .map(|subslice| Input { value: subslice })
252 .ok_or(EndOfInput)
253 }
254
255 /// Return the current position of the `Reader` for future use in a call
256 /// to `get_input_between_marks`.
257 #[inline]
258 pub fn mark(&self) -> Mark { Mark { i: self.i } }
259
260 /// Returns `true` if there is at least one more byte in the input and that
261 /// byte is equal to `b`, and false otherwise.
262 #[inline]
263 pub fn peek(&self, b: u8) -> bool {
264 match self.input.get(self.i) {
265 Some(actual_b) => b == *actual_b,
266 None => false,
267 }
268 }
269
270 /// Reads the next input byte.
271 ///
272 /// Returns `Ok(b)` where `b` is the next input byte, or `Err(EndOfInput)`
273 /// if the `Reader` is at the end of the input.
274 #[inline]
275 pub fn read_byte(&mut self) -> Result<u8, EndOfInput> {
276 match self.input.get(self.i) {
277 Some(b) => {
278 self.i += 1; // safe from overflow; see Input::from().
279 Ok(*b)
280 },
281 None => Err(EndOfInput),
282 }
283 }
284
285 /// Skips `num_bytes` of the input, returning the skipped input as an
286 /// `Input`.
287 ///
288 /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
289 /// and `Err(EndOfInput)` otherwise.
290 #[inline]
291 pub fn read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput> {
292 let new_i = self.i.checked_add(num_bytes).ok_or(EndOfInput)?;
293 let ret = self
294 .input
295 .subslice(self.i..new_i)
296 .map(|subslice| Input { value: subslice })
297 .ok_or(EndOfInput)?;
298 self.i = new_i;
299 Ok(ret)
300 }
301
302 /// Skips the reader to the end of the input, returning the skipped input
303 /// as an `Input`.
304 #[inline]
305 pub fn read_bytes_to_end(&mut self) -> Input<'a> {
306 let to_skip = self.input.len() - self.i;
307 self.read_bytes(to_skip).unwrap()
308 }
309
310 /// Calls `read()` with the given input as a `Reader`. On success, returns a
311 /// pair `(bytes_read, r)` where `bytes_read` is what `read()` consumed and
312 /// `r` is `read()`'s return value.
313 pub fn read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E>
314 where
315 F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
316 {
317 let start = self.i;
318 let r = read(self)?;
319 let bytes_read = Input {
320 value: self.input.subslice(start..self.i).unwrap()
321 };
322 Ok((bytes_read, r))
323 }
324
325 /// Skips `num_bytes` of the input.
326 ///
327 /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
328 /// and `Err(EndOfInput)` otherwise.
329 #[inline]
330 pub fn skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput> {
331 self.read_bytes(num_bytes).map(|_| ())
332 }
333
334 /// Skips the reader to the end of the input.
335 #[inline]
336 pub fn skip_to_end(&mut self) -> () { let _ = self.read_bytes_to_end(); }
337}
338
339/// The error type used to indicate the end of the input was reached before the
340/// operation could be completed.
341#[derive(Clone, Copy, Debug, Eq, PartialEq)]
342pub struct EndOfInput;
343
344mod no_panic {
345 use core;
346
347 /// A wrapper around a slice that exposes no functions that can panic.
348 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
349 pub struct Slice<'a> {
350 bytes: &'a [u8],
351 }
352
353 impl<'a> Slice<'a> {
354 #[inline]
355 pub const fn new(bytes: &'a [u8]) -> Self { Self { bytes } }
356
357 #[inline]
358 pub fn get(&self, i: usize) -> Option<&u8> { self.bytes.get(i) }
359
360 #[inline]
361 pub fn subslice(&self, r: core::ops::Range<usize>) -> Option<Self> {
362 self.bytes.get(r).map(|bytes| Self { bytes })
363 }
364
365 #[inline]
366 pub fn is_empty(&self) -> bool { self.bytes.is_empty() }
367
368 #[inline]
369 pub fn len(&self) -> usize { self.bytes.len() }
370
371 #[inline]
372 pub fn as_slice_less_safe(&self) -> &'a [u8] { self.bytes }
373 }
374
375} // mod no_panic