1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use lut::{Table, Access};
use lookup_tables::{
    MediaTypeChars,
    ObsQText, QText,
    ObsQTextWs, QTextWs,
    Ws
};
use qs::error::CoreError;
use qs::spec::{
    PartialCodePoint,
    ParsingImpl,
    State
};

mod other;
pub use self::other::*;
mod http;
pub use self::http::*;
mod mime;
pub use self::mime::*;

/// This is an extension trait for implementing MediaType parsing in context of Mime
pub trait MimeParsingExt: ParsingImpl {
    /// is true if utf8 is allowed
    const ALLOW_UTF8: bool;
    /// is true if the `obs-` part of the grammar is supported
    const OBS: bool;

    /// crate the custom state based on the `FWSState` state and `emit`
    ///
    /// # Example
    ///
    /// ```ignore
    /// fn custom_state(state: FWSState, emit: bool) -> (State<Self>, bool) {
    ///     (State::Custom(MyCustomType(state)), emit)
    /// }
    /// ```
    ///
    fn custom_state(state: FWSState, emit: bool) -> (State<Self>, bool);

    /// default impl. to handle the normal state of the `State` automaton
    ///
    /// It works following:
    ///
    /// 1. return `Ok((State::Normal, true))` if it is qtext in context of
    ///    `Self::ALLOW_UTF8` and `Self::OBS`
    /// 2. return `Ok(Self::custom_state(FWSState::HitCr, false))` if the input
    ///    was `'\r'`
    /// 3. else return `Err(CoreError::InvalidChar)`
    ///
    /// Note if `Self::ALLOW_UTF8` is set to true any `bch.as_u8() > 0x7f` will be treated
    /// as non-us-ascii utf8. This state machine does **not** validated if it is valid utf8
    /// so if it is used on a byte sequence which is not known to be a valid utf8 string it
    /// is still necessary to validate if it is utf8 and not e.g. latin1.
    fn handle_normal_state(bch: PartialCodePoint) -> Result<(State<Self>, bool), CoreError> {
        let iu8 = bch.as_u8();

        let is_qtext_ws = if Self::OBS {
            MediaTypeChars::check_at(iu8 as usize, ObsQTextWs)
        } else {
            MediaTypeChars::check_at(iu8 as usize, QTextWs)
        };

        if is_qtext_ws || (Self::ALLOW_UTF8 && iu8 > 0x7f) {
            Ok((State::Normal, true))
        } else if iu8 == b'\r' {
            Ok(Self::custom_state(FWSState::HitCr, false))
        } else {
            Err(CoreError::InvalidChar)
        }
    }
}

/// A enum to represent the sate in a quoted string parser for specifications with FWS
///
/// FWS are forward white spaces, they can appear in media-types in the mime specification
/// a FWS is a `"\r\n"` seq followed by either `' '` or `'\t'`.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum FWSState {
    /// the state after the first `'\r'`
    HitCr,
    /// the state after `"\r\n"`
    HitNl,
    /// the state after `"\r\n "` or `"\r\n\t"` + any number of ws chars
    HadFws
}

impl FWSState {

    /// default implementation for handing FWSState state transitions
    ///
    /// Handles the state transition wrt. a given MimeParsingExt implementation.
    /// It assures that after a `\r` only `\n` can follow and after a `\n` either
    /// `' '` or `'\t'` has to follow. Lastly it makes sure that between two
    /// FWS there has to be at last one non ws character (at last in the non obs grammar).
    ///
    pub fn advance<Impl: MimeParsingExt>(self, bch: PartialCodePoint)
                                         -> Result<(State<Impl>, bool), CoreError>
    {
        use self::FWSState::*;
        let iu8 = bch.as_u8();
        match self {
            HitCr => {
                if iu8 == b'\n' {
                    Ok(Impl::custom_state(FWSState::HitNl, false))
                } else {
                    Err(CoreError::InvalidChar)
                }
            },
            HitNl => {
                if iu8 == b' ' || iu8 == b'\t' {
                    if Impl::OBS {
                        Ok((State::Normal, true))
                    } else {
                        //the new grammar does not allow ws-only lines, `obs-` one does
                        Ok(Impl::custom_state(FWSState::HadFws, true))
                    }
                } else {
                    Err(CoreError::InvalidChar)
                }
            },
            HadFws => {
                let lres = MediaTypeChars::lookup(iu8 as usize);
                // QText will be zero-sized so default etc. will be optimized awy
                let is_qtext = if Impl::OBS {
                    // we really should not ever end up in this branch as this state is
                    // meant to be used with non `obs-` grammar, but then it can be used
                    // differently too, and the if get's compiler optimized awy so
                    // it should be fine
                    QText.check(lres)
                } else {
                    ObsQText.check(lres)
                };
                if is_qtext || (Impl::ALLOW_UTF8 && iu8 > 0x7f) {
                    Ok((State::Normal, true))
                } else if Ws.check(lres) {
                    Ok(Impl::custom_state(FWSState::HadFws, true))
                } else if iu8 == b'"' {
                    Ok((State::End, false))
                } else if iu8 == b'\\' {
                    Ok((State::QPStart, false))
                } else {
                    Err(CoreError::InvalidChar)
                }
            }
        }
    }
}