http_auth/parser.rs
1// Copyright (C) 2021 Scott Lamb <slamb@slamb.org>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Parses as in [RFC 7235](https://datatracker.ietf.org/doc/html/rfc7235).
5//!
6//! Most callers don't need to directly parse; see [`crate::PasswordClient`] instead.
7
8// State machine implementation of challenge parsing with a state machine.
9// Nice qualities: predictable performance (no backtracking), low dependencies.
10//
11// The implementation is *not* a straightforward translation of the ABNF
12// grammar, so we verify correctness via a fuzz tester that compares with a
13// nom-based parser. See `fuzz/fuzz_targets/parse_challenges.rs`.
14
15use std::{fmt::Display, ops::Range};
16
17use crate::{ChallengeRef, ParamValue};
18
19use crate::{char_classes, C_ESCAPABLE, C_OWS, C_QDTEXT, C_TCHAR};
20
21/// Calls `log::trace!` only if the `trace` cargo feature is enabled.
22macro_rules! trace {
23 ($($arg:tt)+) => (#[cfg(feature = "trace")] log::trace!($($arg)+))
24}
25
26/// Parses a list of challenges as in [RFC
27/// 7235](https://datatracker.ietf.org/doc/html/rfc7235) `Proxy-Authenticate`
28/// or `WWW-Authenticate` header values.
29///
30/// Most callers don't need to directly parse; see [`crate::PasswordClient`] instead.
31///
32/// This is an iterator that parses lazily, returning each challenge as soon as
33/// its end has been found. (Due to the grammar's ambiguous use of commas to
34/// separate both challenges and parameters, a challenge's end is found after
35/// parsing the *following* challenge's scheme name.) On encountering a syntax
36/// error, it yields `Some(Err(_))` and fuses: all subsequent calls to
37/// [`Iterator::next`] will return `None`.
38///
39/// See also the [`crate::parse_challenges`] convenience wrapper.
40///
41/// ## Example
42///
43/// ```rust
44/// use http_auth::{parser::ChallengeParser, ChallengeRef, ParamValue};
45/// let challenges = "UnsupportedSchemeA, Basic realm=\"foo\", error error";
46/// let mut parser = ChallengeParser::new(challenges);
47/// let c = parser.next().unwrap().unwrap();
48/// assert_eq!(c, ChallengeRef {
49/// scheme: "UnsupportedSchemeA",
50/// params: vec![],
51/// });
52/// let c = parser.next().unwrap().unwrap();
53/// assert_eq!(c, ChallengeRef {
54/// scheme: "Basic",
55/// params: vec![("realm", ParamValue::try_from_escaped("foo").unwrap())],
56/// });
57/// let c = parser.next().unwrap().unwrap_err();
58/// ```
59///
60/// ## Implementation notes
61///
62/// This rigorously matches the official ABNF grammar except as follows:
63///
64/// * Doesn't allow non-ASCII characters. [RFC 7235 Appendix
65/// B](https://datatracker.ietf.org/doc/html/rfc7235#appendix-B) references
66/// the `quoted-string` rule from [RFC 7230 section
67/// 3.2.6](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6),
68/// which allows these via `obs-text`, but the meaning is ill-defined in
69/// the context of RFC 7235.
70/// * Doesn't allow `token68`, which as far as I know has never been and will
71/// never be used in a `challenge`:
72/// * [RFC 2617](https://datatracker.ietf.org/doc/html/rfc2617) never
73/// allowed `token68` for challenges.
74/// * [RFC 7235 Appendix
75/// A](https://datatracker.ietf.org/doc/html/rfc7235#appendix-A) says
76/// `token68` "was added for consistency with legacy authentication
77/// schemes such as `Basic`", but `Basic` only uses `token68` in
78/// `credential`, not `challenge`.
79/// * [RFC 7235 section
80/// 5.1.2](https://datatracker.ietf.org/doc/html/rfc7235#section-5.1.2)
81/// says "new schemes ought to use the `auth-param` syntax instead
82/// [of `token68`], because otherwise future extensions will be
83/// impossible."
84/// * No scheme in the [registry](https://www.iana.org/assignments/http-authschemes/http-authschemes.xhtml)
85/// uses `token68` challenges as of 2021-10-19.
86pub struct ChallengeParser<'i> {
87 input: &'i str,
88 pos: usize,
89 state: State<'i>,
90}
91
92impl<'i> ChallengeParser<'i> {
93 pub fn new(input: &'i str) -> Self {
94 ChallengeParser {
95 input,
96 pos: 0,
97 state: State::PreToken {
98 challenge: None,
99 next: Possibilities(P_SCHEME),
100 },
101 }
102 }
103}
104
105/// Describes a parse error and where in the input it occurs.
106#[derive(Copy, Clone, Debug, Eq, PartialEq)]
107pub struct Error<'i> {
108 input: &'i str,
109 pos: usize,
110 error: &'static str,
111}
112
113impl<'i> Error<'i> {
114 fn invalid_byte(input: &'i str, pos: usize) -> Self {
115 Self {
116 input,
117 pos,
118 error: "invalid byte",
119 }
120 }
121}
122
123impl<'i> Display for Error<'i> {
124 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125 write!(
126 f,
127 "{} at byte {}: {:?}",
128 self.error,
129 self.pos,
130 format_args!(
131 "{}(HERE-->){}",
132 &self.input[..self.pos],
133 &self.input[self.pos..]
134 ),
135 )
136 }
137}
138
139impl<'i> std::error::Error for Error<'i> {}
140
141/// A set of zero or more `P_*` values indicating possibilities for the current
142/// and/or upcoming tokens.
143#[derive(Copy, Clone, PartialEq, Eq)]
144struct Possibilities(u8);
145
146const P_SCHEME: u8 = 1;
147const P_PARAM_KEY: u8 = 2;
148const P_EOF: u8 = 4;
149const P_WHITESPACE: u8 = 8;
150const P_COMMA_PARAM_KEY: u8 = 16; // a comma, then a param_key.
151const P_COMMA_EOF: u8 = 32; // a comma, then eof.
152
153impl std::fmt::Debug for Possibilities {
154 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
155 let mut l = f.debug_set();
156 if (self.0 & P_SCHEME) != 0 {
157 l.entry(&"scheme");
158 }
159 if (self.0 & P_PARAM_KEY) != 0 {
160 l.entry(&"param_key");
161 }
162 if (self.0 & P_EOF) != 0 {
163 l.entry(&"eof");
164 }
165 if (self.0 & P_WHITESPACE) != 0 {
166 l.entry(&"whitespace");
167 }
168 if (self.0 & P_COMMA_PARAM_KEY) != 0 {
169 l.entry(&"comma_param_key");
170 }
171 if (self.0 & P_COMMA_EOF) != 0 {
172 l.entry(&"comma_eof");
173 }
174 l.finish()
175 }
176}
177
178enum State<'i> {
179 Done,
180
181 /// Consuming OWS and commas, then advancing to `Token`.
182 PreToken {
183 challenge: Option<ChallengeRef<'i>>,
184 next: Possibilities,
185 },
186
187 /// Parsing a scheme/parameter key, or the whitespace immediately following it.
188 Token {
189 /// Current `challenge`, if any. If none, this token must be a scheme.
190 challenge: Option<ChallengeRef<'i>>,
191 token_pos: Range<usize>,
192 cur: Possibilities, // subset of P_SCHEME|P_PARAM_KEY
193 },
194
195 /// Transitioned from `Token` or `PostToken` on first `=` after parameter key.
196 /// Kept there for BWS in param case.
197 PostEquals {
198 challenge: ChallengeRef<'i>,
199 key_pos: Range<usize>,
200 },
201
202 /// Transitioned from `Equals` on initial `C_TCHAR`.
203 ParamUnquotedValue {
204 challenge: ChallengeRef<'i>,
205 key_pos: Range<usize>,
206 value_start: usize,
207 },
208
209 /// Transitioned from `Equals` on initial `"`.
210 ParamQuotedValue {
211 challenge: ChallengeRef<'i>,
212 key_pos: Range<usize>,
213 value_start: usize,
214 escapes: usize,
215 in_backslash: bool,
216 },
217}
218
219impl<'i> Iterator for ChallengeParser<'i> {
220 type Item = Result<ChallengeRef<'i>, Error<'i>>;
221
222 fn next(&mut self) -> Option<Self::Item> {
223 while self.pos < self.input.len() {
224 let b = self.input.as_bytes()[self.pos];
225 let classes = char_classes(b);
226 match std::mem::replace(&mut self.state, State::Done) {
227 State::Done => return None,
228 State::PreToken { challenge, next } => {
229 trace!(
230 "PreToken({:?}) pos={} b={:?}",
231 next,
232 self.pos,
233 char::from(b)
234 );
235 if (classes & C_OWS) != 0 && (next.0 & P_WHITESPACE) != 0 {
236 self.state = State::PreToken {
237 challenge,
238 next: Possibilities(next.0 & !P_EOF),
239 }
240 } else if b == b',' {
241 let next = Possibilities(
242 next.0
243 | P_WHITESPACE
244 | P_SCHEME
245 | if (next.0 & P_COMMA_PARAM_KEY) != 0 {
246 P_PARAM_KEY
247 } else {
248 0
249 }
250 | if (next.0 & P_COMMA_EOF) != 0 {
251 P_EOF
252 } else {
253 0
254 },
255 );
256 self.state = State::PreToken { challenge, next }
257 } else if (classes & C_TCHAR) != 0 {
258 self.state = State::Token {
259 challenge,
260 token_pos: self.pos..self.pos + 1,
261 cur: Possibilities(next.0 & (P_SCHEME | P_PARAM_KEY)),
262 }
263 } else {
264 return Some(Err(Error::invalid_byte(self.input, self.pos)));
265 }
266 }
267 State::Token {
268 challenge,
269 token_pos,
270 cur,
271 } => {
272 trace!(
273 "Token({:?}, {:?}) pos={} b={:?}, cur challenge = {:#?}",
274 token_pos,
275 cur,
276 self.pos,
277 char::from(b),
278 challenge
279 );
280 if (classes & C_TCHAR) != 0 {
281 if token_pos.end == self.pos {
282 self.state = State::Token {
283 challenge,
284 token_pos: token_pos.start..self.pos + 1,
285 cur,
286 };
287 } else {
288 // Ending a scheme, starting a parameter key without an intermediate comma.
289 // The whitespace between must be exactly one space.
290 if (cur.0 & P_SCHEME) == 0
291 || &self.input[token_pos.end..self.pos] != " "
292 {
293 return Some(Err(Error::invalid_byte(self.input, self.pos)));
294 }
295 self.state = State::Token {
296 challenge: Some(ChallengeRef::new(&self.input[token_pos])),
297 token_pos: self.pos..self.pos + 1,
298 cur: Possibilities(P_PARAM_KEY),
299 };
300 if let Some(c) = challenge {
301 self.pos += 1;
302 return Some(Ok(c));
303 }
304 }
305 } else {
306 match b {
307 b',' if (cur.0 & P_SCHEME) != 0 => {
308 self.state = State::PreToken {
309 challenge: Some(ChallengeRef::new(&self.input[token_pos])),
310 next: Possibilities(
311 P_SCHEME | P_WHITESPACE | P_EOF | P_COMMA_EOF,
312 ),
313 };
314 if let Some(c) = challenge {
315 self.pos += 1;
316 return Some(Ok(c));
317 }
318 }
319 b'=' if (cur.0 & P_PARAM_KEY) != 0 => match challenge {
320 Some(challenge) => {
321 self.state = State::PostEquals {
322 challenge,
323 key_pos: token_pos,
324 }
325 }
326 None => {
327 return Some(Err(Error {
328 input: self.input,
329 pos: self.pos,
330 error: "= without existing challenge",
331 }));
332 }
333 },
334
335 b' ' | b'\t' => {
336 self.state = State::Token {
337 challenge,
338 token_pos,
339 cur,
340 }
341 }
342
343 _ => return Some(Err(Error::invalid_byte(self.input, self.pos))),
344 }
345 }
346 }
347 State::PostEquals { challenge, key_pos } => {
348 trace!("PostEquals pos={} b={:?}", self.pos, char::from(b));
349 if (classes & C_OWS) != 0 {
350 // Note this doesn't advance key_pos.end, so in the token68 case, another
351 // `=` will not be allowed.
352 self.state = State::PostEquals { challenge, key_pos };
353 } else if b == b'"' {
354 self.state = State::ParamQuotedValue {
355 challenge,
356 key_pos,
357 value_start: self.pos + 1,
358 escapes: 0,
359 in_backslash: false,
360 };
361 } else if (classes & C_TCHAR) != 0 {
362 self.state = State::ParamUnquotedValue {
363 challenge,
364 key_pos,
365 value_start: self.pos,
366 };
367 } else {
368 return Some(Err(Error::invalid_byte(self.input, self.pos)));
369 }
370 }
371 State::ParamUnquotedValue {
372 mut challenge,
373 key_pos,
374 value_start,
375 } => {
376 trace!("ParamUnquotedValue pos={} b={:?}", self.pos, char::from(b));
377 if (classes & C_TCHAR) != 0 {
378 self.state = State::ParamUnquotedValue {
379 challenge,
380 key_pos,
381 value_start,
382 };
383 } else if (classes & C_OWS) != 0 {
384 challenge.params.push((
385 &self.input[key_pos],
386 ParamValue {
387 escapes: 0,
388 escaped: &self.input[value_start..self.pos],
389 },
390 ));
391 self.state = State::PreToken {
392 challenge: Some(challenge),
393 next: Possibilities(P_WHITESPACE | P_COMMA_PARAM_KEY | P_COMMA_EOF),
394 };
395 } else if b == b',' {
396 challenge.params.push((
397 &self.input[key_pos],
398 ParamValue {
399 escapes: 0,
400 escaped: &self.input[value_start..self.pos],
401 },
402 ));
403 self.state = State::PreToken {
404 challenge: Some(challenge),
405 next: Possibilities(
406 P_WHITESPACE
407 | P_PARAM_KEY
408 | P_SCHEME
409 | P_EOF
410 | P_COMMA_PARAM_KEY
411 | P_COMMA_EOF,
412 ),
413 };
414 } else {
415 return Some(Err(Error::invalid_byte(self.input, self.pos)));
416 }
417 }
418 State::ParamQuotedValue {
419 mut challenge,
420 key_pos,
421 value_start,
422 escapes,
423 in_backslash,
424 } => {
425 trace!("ParamQuotedValue pos={} b={:?}", self.pos, char::from(b));
426 if in_backslash {
427 if (classes & C_ESCAPABLE) == 0 {
428 return Some(Err(Error::invalid_byte(self.input, self.pos)));
429 }
430 self.state = State::ParamQuotedValue {
431 challenge,
432 key_pos,
433 value_start,
434 escapes: escapes + 1,
435 in_backslash: false,
436 };
437 } else if b == b'\\' {
438 self.state = State::ParamQuotedValue {
439 challenge,
440 key_pos,
441 value_start,
442 escapes,
443 in_backslash: true,
444 };
445 } else if b == b'"' {
446 challenge.params.push((
447 &self.input[key_pos],
448 ParamValue {
449 escapes,
450 escaped: &self.input[value_start..self.pos],
451 },
452 ));
453 self.state = State::PreToken {
454 challenge: Some(challenge),
455 next: Possibilities(
456 P_WHITESPACE | P_EOF | P_COMMA_PARAM_KEY | P_COMMA_EOF,
457 ),
458 };
459 } else if (classes & C_QDTEXT) != 0 {
460 self.state = State::ParamQuotedValue {
461 challenge,
462 key_pos,
463 value_start,
464 escapes,
465 in_backslash,
466 };
467 } else {
468 return Some(Err(Error::invalid_byte(self.input, self.pos)));
469 }
470 }
471 };
472 self.pos += 1;
473 }
474 match std::mem::replace(&mut self.state, State::Done) {
475 State::Done => {}
476 State::PreToken {
477 challenge, next, ..
478 } => {
479 trace!("eof, PreToken({:?})", next);
480 if (next.0 & P_EOF) == 0 {
481 return Some(Err(Error {
482 input: self.input,
483 pos: self.input.len(),
484 error: "unexpected EOF",
485 }));
486 }
487 if let Some(challenge) = challenge {
488 return Some(Ok(challenge));
489 }
490 }
491 State::Token {
492 challenge,
493 token_pos,
494 cur,
495 } => {
496 trace!("eof, Token({:?})", cur);
497 if (cur.0 & P_SCHEME) == 0 {
498 return Some(Err(Error {
499 input: self.input,
500 pos: self.input.len(),
501 error: "unexpected EOF expecting =",
502 }));
503 }
504 if token_pos.end != self.input.len() && &self.input[token_pos.end..] != " " {
505 return Some(Err(Error {
506 input: self.input,
507 pos: self.input.len(),
508 error: "EOF after whitespace",
509 }));
510 }
511 if let Some(challenge) = challenge {
512 self.state = State::Token {
513 challenge: None,
514 token_pos,
515 cur,
516 };
517 return Some(Ok(challenge));
518 }
519 return Some(Ok(ChallengeRef::new(&self.input[token_pos])));
520 }
521 State::PostEquals { .. } => {
522 trace!("eof, PostEquals");
523 return Some(Err(Error {
524 input: self.input,
525 pos: self.input.len(),
526 error: "unexpected EOF expecting param value",
527 }));
528 }
529 State::ParamUnquotedValue {
530 mut challenge,
531 key_pos,
532 value_start,
533 } => {
534 trace!("eof, ParamUnquotedValue");
535 challenge.params.push((
536 &self.input[key_pos],
537 ParamValue {
538 escapes: 0,
539 escaped: &self.input[value_start..],
540 },
541 ));
542 return Some(Ok(challenge));
543 }
544 State::ParamQuotedValue { .. } => {
545 trace!("eof, ParamQuotedValue");
546 return Some(Err(Error {
547 input: self.input,
548 pos: self.input.len(),
549 error: "unexpected EOF in quoted param value",
550 }));
551 }
552 }
553 None
554 }
555}
556
557impl std::iter::FusedIterator for ChallengeParser<'_> {}
558
559#[cfg(test)]
560mod tests {
561 use crate::{ChallengeRef, ParamValue};
562
563 // A couple basic tests. The fuzz testing is far more comprehensive.
564
565 #[test]
566 fn multi_challenge() {
567 // https://datatracker.ietf.org/doc/html/rfc7235#section-4.1
568 let input =
569 r#"Newauth realm="apps", type=1, title="Login to \"apps\"", Basic realm="simple""#;
570 let challenges = crate::parse_challenges(input).unwrap();
571 assert_eq!(
572 &challenges[..],
573 &[
574 ChallengeRef {
575 scheme: "Newauth",
576 params: vec![
577 ("realm", ParamValue::new(0, "apps")),
578 ("type", ParamValue::new(0, "1")),
579 ("title", ParamValue::new(2, r#"Login to \"apps\""#)),
580 ],
581 },
582 ChallengeRef {
583 scheme: "Basic",
584 params: vec![("realm", ParamValue::new(0, "simple")),],
585 },
586 ]
587 );
588 }
589
590 #[test]
591 fn empty() {
592 crate::parse_challenges("").unwrap_err();
593 crate::parse_challenges(",").unwrap_err();
594 }
595}