1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
use lut::{Table, Access};
use lookup_tables::{
MediaTypeChars,
ObsQText, QText,
ObsQTextWs, QTextWs,
Ws
};
use qs::error::CoreError;
use qs::spec::{
PartialCodePoint,
ParsingImpl,
State
};
mod other;
pub use self::other::*;
mod http;
pub use self::http::*;
mod mime;
pub use self::mime::*;
/// This is an extension trait for implementing MediaType parsing in context of Mime
pub trait MimeParsingExt: ParsingImpl {
/// is true if utf8 is allowed
const ALLOW_UTF8: bool;
/// is true if the `obs-` part of the grammar is supported
const OBS: bool;
/// crate the custom state based on the `FWSState` state and `emit`
///
/// # Example
///
/// ```ignore
/// fn custom_state(state: FWSState, emit: bool) -> (State<Self>, bool) {
/// (State::Custom(MyCustomType(state)), emit)
/// }
/// ```
///
fn custom_state(state: FWSState, emit: bool) -> (State<Self>, bool);
/// default impl. to handle the normal state of the `State` automaton
///
/// It works following:
///
/// 1. return `Ok((State::Normal, true))` if it is qtext in context of
/// `Self::ALLOW_UTF8` and `Self::OBS`
/// 2. return `Ok(Self::custom_state(FWSState::HitCr, false))` if the input
/// was `'\r'`
/// 3. else return `Err(CoreError::InvalidChar)`
///
/// Note if `Self::ALLOW_UTF8` is set to true any `bch.as_u8() > 0x7f` will be treated
/// as non-us-ascii utf8. This state machine does **not** validated if it is valid utf8
/// so if it is used on a byte sequence which is not known to be a valid utf8 string it
/// is still necessary to validate if it is utf8 and not e.g. latin1.
fn handle_normal_state(bch: PartialCodePoint) -> Result<(State<Self>, bool), CoreError> {
let iu8 = bch.as_u8();
let is_qtext_ws = if Self::OBS {
MediaTypeChars::check_at(iu8 as usize, ObsQTextWs)
} else {
MediaTypeChars::check_at(iu8 as usize, QTextWs)
};
if is_qtext_ws || (Self::ALLOW_UTF8 && iu8 > 0x7f) {
Ok((State::Normal, true))
} else if iu8 == b'\r' {
Ok(Self::custom_state(FWSState::HitCr, false))
} else {
Err(CoreError::InvalidChar)
}
}
}
/// A enum to represent the sate in a quoted string parser for specifications with FWS
///
/// FWS are forward white spaces, they can appear in media-types in the mime specification
/// a FWS is a `"\r\n"` seq followed by either `' '` or `'\t'`.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum FWSState {
/// the state after the first `'\r'`
HitCr,
/// the state after `"\r\n"`
HitNl,
/// the state after `"\r\n "` or `"\r\n\t"` + any number of ws chars
HadFws
}
impl FWSState {
/// default implementation for handing FWSState state transitions
///
/// Handles the state transition wrt. a given MimeParsingExt implementation.
/// It assures that after a `\r` only `\n` can follow and after a `\n` either
/// `' '` or `'\t'` has to follow. Lastly it makes sure that between two
/// FWS there has to be at last one non ws character (at last in the non obs grammar).
///
pub fn advance<Impl: MimeParsingExt>(self, bch: PartialCodePoint)
-> Result<(State<Impl>, bool), CoreError>
{
use self::FWSState::*;
let iu8 = bch.as_u8();
match self {
HitCr => {
if iu8 == b'\n' {
Ok(Impl::custom_state(FWSState::HitNl, false))
} else {
Err(CoreError::InvalidChar)
}
},
HitNl => {
if iu8 == b' ' || iu8 == b'\t' {
if Impl::OBS {
Ok((State::Normal, true))
} else {
//the new grammar does not allow ws-only lines, `obs-` one does
Ok(Impl::custom_state(FWSState::HadFws, true))
}
} else {
Err(CoreError::InvalidChar)
}
},
HadFws => {
let lres = MediaTypeChars::lookup(iu8 as usize);
// QText will be zero-sized so default etc. will be optimized awy
let is_qtext = if Impl::OBS {
// we really should not ever end up in this branch as this state is
// meant to be used with non `obs-` grammar, but then it can be used
// differently too, and the if get's compiler optimized awy so
// it should be fine
QText.check(lres)
} else {
ObsQText.check(lres)
};
if is_qtext || (Impl::ALLOW_UTF8 && iu8 > 0x7f) {
Ok((State::Normal, true))
} else if Ws.check(lres) {
Ok(Impl::custom_state(FWSState::HadFws, true))
} else if iu8 == b'"' {
Ok((State::End, false))
} else if iu8 == b'\\' {
Ok((State::QPStart, false))
} else {
Err(CoreError::InvalidChar)
}
}
}
}
}