http_auth/
parser.rs

1// Copyright (C) 2021 Scott Lamb <slamb@slamb.org>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Parses as in [RFC 7235](https://datatracker.ietf.org/doc/html/rfc7235).
5//!
6//! Most callers don't need to directly parse; see [`crate::PasswordClient`] instead.
7
8// State machine implementation of challenge parsing with a state machine.
9// Nice qualities: predictable performance (no backtracking), low dependencies.
10//
11// The implementation is *not* a straightforward translation of the ABNF
12// grammar, so we verify correctness via a fuzz tester that compares with a
13// nom-based parser. See `fuzz/fuzz_targets/parse_challenges.rs`.
14
15use std::{fmt::Display, ops::Range};
16
17use crate::{ChallengeRef, ParamValue};
18
19use crate::{char_classes, C_ESCAPABLE, C_OWS, C_QDTEXT, C_TCHAR};
20
21/// Calls `log::trace!` only if the `trace` cargo feature is enabled.
22macro_rules! trace {
23    ($($arg:tt)+) => (#[cfg(feature = "trace")] log::trace!($($arg)+))
24}
25
26/// Parses a list of challenges as in [RFC
27/// 7235](https://datatracker.ietf.org/doc/html/rfc7235) `Proxy-Authenticate`
28/// or `WWW-Authenticate` header values.
29///
30/// Most callers don't need to directly parse; see [`crate::PasswordClient`] instead.
31///
32/// This is an iterator that parses lazily, returning each challenge as soon as
33/// its end has been found. (Due to the grammar's ambiguous use of commas to
34/// separate both challenges and parameters, a challenge's end is found after
35/// parsing the *following* challenge's scheme name.) On encountering a syntax
36/// error, it yields `Some(Err(_))` and fuses: all subsequent calls to
37/// [`Iterator::next`] will return `None`.
38///
39/// See also the [`crate::parse_challenges`] convenience wrapper.
40///
41/// ## Example
42///
43/// ```rust
44/// use http_auth::{parser::ChallengeParser, ChallengeRef, ParamValue};
45/// let challenges = "UnsupportedSchemeA, Basic realm=\"foo\", error error";
46/// let mut parser = ChallengeParser::new(challenges);
47/// let c = parser.next().unwrap().unwrap();
48/// assert_eq!(c, ChallengeRef {
49///     scheme: "UnsupportedSchemeA",
50///     params: vec![],
51/// });
52/// let c = parser.next().unwrap().unwrap();
53/// assert_eq!(c, ChallengeRef {
54///     scheme: "Basic",
55///     params: vec![("realm", ParamValue::try_from_escaped("foo").unwrap())],
56/// });
57/// let c = parser.next().unwrap().unwrap_err();
58/// ```
59///
60/// ## Implementation notes
61///
62/// This rigorously matches the official ABNF grammar except as follows:
63///
64/// *   Doesn't allow non-ASCII characters. [RFC 7235 Appendix
65///     B](https://datatracker.ietf.org/doc/html/rfc7235#appendix-B) references
66///     the `quoted-string` rule from [RFC 7230 section
67///     3.2.6](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6),
68///     which allows these via `obs-text`, but the meaning is ill-defined in
69///     the context of RFC 7235.
70/// *   Doesn't allow `token68`, which as far as I know has never been and will
71///     never be used in a `challenge`:
72///     *   [RFC 2617](https://datatracker.ietf.org/doc/html/rfc2617) never
73///         allowed `token68` for challenges.
74///     *   [RFC 7235 Appendix
75///         A](https://datatracker.ietf.org/doc/html/rfc7235#appendix-A) says
76///         `token68` "was added for consistency with legacy authentication
77///         schemes such as `Basic`", but `Basic` only uses `token68` in
78///         `credential`, not `challenge`.
79///     *   [RFC 7235 section
80///         5.1.2](https://datatracker.ietf.org/doc/html/rfc7235#section-5.1.2)
81///         says "new schemes ought to use the `auth-param` syntax instead
82///         [of `token68`], because otherwise future extensions will be
83///         impossible."
84///     *   No scheme in the [registry](https://www.iana.org/assignments/http-authschemes/http-authschemes.xhtml)
85///         uses `token68` challenges as of 2021-10-19.
86pub struct ChallengeParser<'i> {
87    input: &'i str,
88    pos: usize,
89    state: State<'i>,
90}
91
92impl<'i> ChallengeParser<'i> {
93    pub fn new(input: &'i str) -> Self {
94        ChallengeParser {
95            input,
96            pos: 0,
97            state: State::PreToken {
98                challenge: None,
99                next: Possibilities(P_SCHEME),
100            },
101        }
102    }
103}
104
105/// Describes a parse error and where in the input it occurs.
106#[derive(Copy, Clone, Debug, Eq, PartialEq)]
107pub struct Error<'i> {
108    input: &'i str,
109    pos: usize,
110    error: &'static str,
111}
112
113impl<'i> Error<'i> {
114    fn invalid_byte(input: &'i str, pos: usize) -> Self {
115        Self {
116            input,
117            pos,
118            error: "invalid byte",
119        }
120    }
121}
122
123impl<'i> Display for Error<'i> {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        write!(
126            f,
127            "{} at byte {}: {:?}",
128            self.error,
129            self.pos,
130            format_args!(
131                "{}(HERE-->){}",
132                &self.input[..self.pos],
133                &self.input[self.pos..]
134            ),
135        )
136    }
137}
138
139impl<'i> std::error::Error for Error<'i> {}
140
141/// A set of zero or more `P_*` values indicating possibilities for the current
142/// and/or upcoming tokens.
143#[derive(Copy, Clone, PartialEq, Eq)]
144struct Possibilities(u8);
145
146const P_SCHEME: u8 = 1;
147const P_PARAM_KEY: u8 = 2;
148const P_EOF: u8 = 4;
149const P_WHITESPACE: u8 = 8;
150const P_COMMA_PARAM_KEY: u8 = 16; // a comma, then a param_key.
151const P_COMMA_EOF: u8 = 32; // a comma, then eof.
152
153impl std::fmt::Debug for Possibilities {
154    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
155        let mut l = f.debug_set();
156        if (self.0 & P_SCHEME) != 0 {
157            l.entry(&"scheme");
158        }
159        if (self.0 & P_PARAM_KEY) != 0 {
160            l.entry(&"param_key");
161        }
162        if (self.0 & P_EOF) != 0 {
163            l.entry(&"eof");
164        }
165        if (self.0 & P_WHITESPACE) != 0 {
166            l.entry(&"whitespace");
167        }
168        if (self.0 & P_COMMA_PARAM_KEY) != 0 {
169            l.entry(&"comma_param_key");
170        }
171        if (self.0 & P_COMMA_EOF) != 0 {
172            l.entry(&"comma_eof");
173        }
174        l.finish()
175    }
176}
177
178enum State<'i> {
179    Done,
180
181    /// Consuming OWS and commas, then advancing to `Token`.
182    PreToken {
183        challenge: Option<ChallengeRef<'i>>,
184        next: Possibilities,
185    },
186
187    /// Parsing a scheme/parameter key, or the whitespace immediately following it.
188    Token {
189        /// Current `challenge`, if any. If none, this token must be a scheme.
190        challenge: Option<ChallengeRef<'i>>,
191        token_pos: Range<usize>,
192        cur: Possibilities, // subset of P_SCHEME|P_PARAM_KEY
193    },
194
195    /// Transitioned from `Token` or `PostToken` on first `=` after parameter key.
196    /// Kept there for BWS in param case.
197    PostEquals {
198        challenge: ChallengeRef<'i>,
199        key_pos: Range<usize>,
200    },
201
202    /// Transitioned from `Equals` on initial `C_TCHAR`.
203    ParamUnquotedValue {
204        challenge: ChallengeRef<'i>,
205        key_pos: Range<usize>,
206        value_start: usize,
207    },
208
209    /// Transitioned from `Equals` on initial `"`.
210    ParamQuotedValue {
211        challenge: ChallengeRef<'i>,
212        key_pos: Range<usize>,
213        value_start: usize,
214        escapes: usize,
215        in_backslash: bool,
216    },
217}
218
219impl<'i> Iterator for ChallengeParser<'i> {
220    type Item = Result<ChallengeRef<'i>, Error<'i>>;
221
222    fn next(&mut self) -> Option<Self::Item> {
223        while self.pos < self.input.len() {
224            let b = self.input.as_bytes()[self.pos];
225            let classes = char_classes(b);
226            match std::mem::replace(&mut self.state, State::Done) {
227                State::Done => return None,
228                State::PreToken { challenge, next } => {
229                    trace!(
230                        "PreToken({:?}) pos={} b={:?}",
231                        next,
232                        self.pos,
233                        char::from(b)
234                    );
235                    if (classes & C_OWS) != 0 && (next.0 & P_WHITESPACE) != 0 {
236                        self.state = State::PreToken {
237                            challenge,
238                            next: Possibilities(next.0 & !P_EOF),
239                        }
240                    } else if b == b',' {
241                        let next = Possibilities(
242                            next.0
243                                | P_WHITESPACE
244                                | P_SCHEME
245                                | if (next.0 & P_COMMA_PARAM_KEY) != 0 {
246                                    P_PARAM_KEY
247                                } else {
248                                    0
249                                }
250                                | if (next.0 & P_COMMA_EOF) != 0 {
251                                    P_EOF
252                                } else {
253                                    0
254                                },
255                        );
256                        self.state = State::PreToken { challenge, next }
257                    } else if (classes & C_TCHAR) != 0 {
258                        self.state = State::Token {
259                            challenge,
260                            token_pos: self.pos..self.pos + 1,
261                            cur: Possibilities(next.0 & (P_SCHEME | P_PARAM_KEY)),
262                        }
263                    } else {
264                        return Some(Err(Error::invalid_byte(self.input, self.pos)));
265                    }
266                }
267                State::Token {
268                    challenge,
269                    token_pos,
270                    cur,
271                } => {
272                    trace!(
273                        "Token({:?}, {:?}) pos={} b={:?}, cur challenge = {:#?}",
274                        token_pos,
275                        cur,
276                        self.pos,
277                        char::from(b),
278                        challenge
279                    );
280                    if (classes & C_TCHAR) != 0 {
281                        if token_pos.end == self.pos {
282                            self.state = State::Token {
283                                challenge,
284                                token_pos: token_pos.start..self.pos + 1,
285                                cur,
286                            };
287                        } else {
288                            // Ending a scheme, starting a parameter key without an intermediate comma.
289                            // The whitespace between must be exactly one space.
290                            if (cur.0 & P_SCHEME) == 0
291                                || &self.input[token_pos.end..self.pos] != " "
292                            {
293                                return Some(Err(Error::invalid_byte(self.input, self.pos)));
294                            }
295                            self.state = State::Token {
296                                challenge: Some(ChallengeRef::new(&self.input[token_pos])),
297                                token_pos: self.pos..self.pos + 1,
298                                cur: Possibilities(P_PARAM_KEY),
299                            };
300                            if let Some(c) = challenge {
301                                self.pos += 1;
302                                return Some(Ok(c));
303                            }
304                        }
305                    } else {
306                        match b {
307                            b',' if (cur.0 & P_SCHEME) != 0 => {
308                                self.state = State::PreToken {
309                                    challenge: Some(ChallengeRef::new(&self.input[token_pos])),
310                                    next: Possibilities(
311                                        P_SCHEME | P_WHITESPACE | P_EOF | P_COMMA_EOF,
312                                    ),
313                                };
314                                if let Some(c) = challenge {
315                                    self.pos += 1;
316                                    return Some(Ok(c));
317                                }
318                            }
319                            b'=' if (cur.0 & P_PARAM_KEY) != 0 => match challenge {
320                                Some(challenge) => {
321                                    self.state = State::PostEquals {
322                                        challenge,
323                                        key_pos: token_pos,
324                                    }
325                                }
326                                None => {
327                                    return Some(Err(Error {
328                                        input: self.input,
329                                        pos: self.pos,
330                                        error: "= without existing challenge",
331                                    }));
332                                }
333                            },
334
335                            b' ' | b'\t' => {
336                                self.state = State::Token {
337                                    challenge,
338                                    token_pos,
339                                    cur,
340                                }
341                            }
342
343                            _ => return Some(Err(Error::invalid_byte(self.input, self.pos))),
344                        }
345                    }
346                }
347                State::PostEquals { challenge, key_pos } => {
348                    trace!("PostEquals pos={} b={:?}", self.pos, char::from(b));
349                    if (classes & C_OWS) != 0 {
350                        // Note this doesn't advance key_pos.end, so in the token68 case, another
351                        // `=` will not be allowed.
352                        self.state = State::PostEquals { challenge, key_pos };
353                    } else if b == b'"' {
354                        self.state = State::ParamQuotedValue {
355                            challenge,
356                            key_pos,
357                            value_start: self.pos + 1,
358                            escapes: 0,
359                            in_backslash: false,
360                        };
361                    } else if (classes & C_TCHAR) != 0 {
362                        self.state = State::ParamUnquotedValue {
363                            challenge,
364                            key_pos,
365                            value_start: self.pos,
366                        };
367                    } else {
368                        return Some(Err(Error::invalid_byte(self.input, self.pos)));
369                    }
370                }
371                State::ParamUnquotedValue {
372                    mut challenge,
373                    key_pos,
374                    value_start,
375                } => {
376                    trace!("ParamUnquotedValue pos={} b={:?}", self.pos, char::from(b));
377                    if (classes & C_TCHAR) != 0 {
378                        self.state = State::ParamUnquotedValue {
379                            challenge,
380                            key_pos,
381                            value_start,
382                        };
383                    } else if (classes & C_OWS) != 0 {
384                        challenge.params.push((
385                            &self.input[key_pos],
386                            ParamValue {
387                                escapes: 0,
388                                escaped: &self.input[value_start..self.pos],
389                            },
390                        ));
391                        self.state = State::PreToken {
392                            challenge: Some(challenge),
393                            next: Possibilities(P_WHITESPACE | P_COMMA_PARAM_KEY | P_COMMA_EOF),
394                        };
395                    } else if b == b',' {
396                        challenge.params.push((
397                            &self.input[key_pos],
398                            ParamValue {
399                                escapes: 0,
400                                escaped: &self.input[value_start..self.pos],
401                            },
402                        ));
403                        self.state = State::PreToken {
404                            challenge: Some(challenge),
405                            next: Possibilities(
406                                P_WHITESPACE
407                                    | P_PARAM_KEY
408                                    | P_SCHEME
409                                    | P_EOF
410                                    | P_COMMA_PARAM_KEY
411                                    | P_COMMA_EOF,
412                            ),
413                        };
414                    } else {
415                        return Some(Err(Error::invalid_byte(self.input, self.pos)));
416                    }
417                }
418                State::ParamQuotedValue {
419                    mut challenge,
420                    key_pos,
421                    value_start,
422                    escapes,
423                    in_backslash,
424                } => {
425                    trace!("ParamQuotedValue pos={} b={:?}", self.pos, char::from(b));
426                    if in_backslash {
427                        if (classes & C_ESCAPABLE) == 0 {
428                            return Some(Err(Error::invalid_byte(self.input, self.pos)));
429                        }
430                        self.state = State::ParamQuotedValue {
431                            challenge,
432                            key_pos,
433                            value_start,
434                            escapes: escapes + 1,
435                            in_backslash: false,
436                        };
437                    } else if b == b'\\' {
438                        self.state = State::ParamQuotedValue {
439                            challenge,
440                            key_pos,
441                            value_start,
442                            escapes,
443                            in_backslash: true,
444                        };
445                    } else if b == b'"' {
446                        challenge.params.push((
447                            &self.input[key_pos],
448                            ParamValue {
449                                escapes,
450                                escaped: &self.input[value_start..self.pos],
451                            },
452                        ));
453                        self.state = State::PreToken {
454                            challenge: Some(challenge),
455                            next: Possibilities(
456                                P_WHITESPACE | P_EOF | P_COMMA_PARAM_KEY | P_COMMA_EOF,
457                            ),
458                        };
459                    } else if (classes & C_QDTEXT) != 0 {
460                        self.state = State::ParamQuotedValue {
461                            challenge,
462                            key_pos,
463                            value_start,
464                            escapes,
465                            in_backslash,
466                        };
467                    } else {
468                        return Some(Err(Error::invalid_byte(self.input, self.pos)));
469                    }
470                }
471            };
472            self.pos += 1;
473        }
474        match std::mem::replace(&mut self.state, State::Done) {
475            State::Done => {}
476            State::PreToken {
477                challenge, next, ..
478            } => {
479                trace!("eof, PreToken({:?})", next);
480                if (next.0 & P_EOF) == 0 {
481                    return Some(Err(Error {
482                        input: self.input,
483                        pos: self.input.len(),
484                        error: "unexpected EOF",
485                    }));
486                }
487                if let Some(challenge) = challenge {
488                    return Some(Ok(challenge));
489                }
490            }
491            State::Token {
492                challenge,
493                token_pos,
494                cur,
495            } => {
496                trace!("eof, Token({:?})", cur);
497                if (cur.0 & P_SCHEME) == 0 {
498                    return Some(Err(Error {
499                        input: self.input,
500                        pos: self.input.len(),
501                        error: "unexpected EOF expecting =",
502                    }));
503                }
504                if token_pos.end != self.input.len() && &self.input[token_pos.end..] != " " {
505                    return Some(Err(Error {
506                        input: self.input,
507                        pos: self.input.len(),
508                        error: "EOF after whitespace",
509                    }));
510                }
511                if let Some(challenge) = challenge {
512                    self.state = State::Token {
513                        challenge: None,
514                        token_pos,
515                        cur,
516                    };
517                    return Some(Ok(challenge));
518                }
519                return Some(Ok(ChallengeRef::new(&self.input[token_pos])));
520            }
521            State::PostEquals { .. } => {
522                trace!("eof, PostEquals");
523                return Some(Err(Error {
524                    input: self.input,
525                    pos: self.input.len(),
526                    error: "unexpected EOF expecting param value",
527                }));
528            }
529            State::ParamUnquotedValue {
530                mut challenge,
531                key_pos,
532                value_start,
533            } => {
534                trace!("eof, ParamUnquotedValue");
535                challenge.params.push((
536                    &self.input[key_pos],
537                    ParamValue {
538                        escapes: 0,
539                        escaped: &self.input[value_start..],
540                    },
541                ));
542                return Some(Ok(challenge));
543            }
544            State::ParamQuotedValue { .. } => {
545                trace!("eof, ParamQuotedValue");
546                return Some(Err(Error {
547                    input: self.input,
548                    pos: self.input.len(),
549                    error: "unexpected EOF in quoted param value",
550                }));
551            }
552        }
553        None
554    }
555}
556
557impl std::iter::FusedIterator for ChallengeParser<'_> {}
558
559#[cfg(test)]
560mod tests {
561    use crate::{ChallengeRef, ParamValue};
562
563    // A couple basic tests. The fuzz testing is far more comprehensive.
564
565    #[test]
566    fn multi_challenge() {
567        // https://datatracker.ietf.org/doc/html/rfc7235#section-4.1
568        let input =
569            r#"Newauth realm="apps", type=1, title="Login to \"apps\"", Basic realm="simple""#;
570        let challenges = crate::parse_challenges(input).unwrap();
571        assert_eq!(
572            &challenges[..],
573            &[
574                ChallengeRef {
575                    scheme: "Newauth",
576                    params: vec![
577                        ("realm", ParamValue::new(0, "apps")),
578                        ("type", ParamValue::new(0, "1")),
579                        ("title", ParamValue::new(2, r#"Login to \"apps\""#)),
580                    ],
581                },
582                ChallengeRef {
583                    scheme: "Basic",
584                    params: vec![("realm", ParamValue::new(0, "simple")),],
585                },
586            ]
587        );
588    }
589
590    #[test]
591    fn empty() {
592        crate::parse_challenges("").unwrap_err();
593        crate::parse_challenges(",").unwrap_err();
594    }
595}