1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
//! Generic base module. //! //! This module defines a generic interface, namely //! [`Base`](trait.Base.html), and an optimized implementation, namely //! [`Opt`](struct.Opt.html), for positional numerical systems with //! radix ranging from 2 to 64 (powers of two only). Other base //! constraints are described in the [`Base`](trait.Base.html) //! interface. use std::marker::PhantomData; /// Generic interface. /// /// A base implementation needs to define at least its padding `pad` /// and its symbol-to-value function `val`. Its power of two `bit` and /// its value-to-symbol function `sym` are uniquely defined from the /// `pad` and `val` functions. However, for performance reasons, all /// functions may be defined directly. /// /// # Vocabulary /// /// We call _ascii_ a 7-bit code represented as a `u8`. In other /// words, any `u8` value with most significant bit cleared (or /// equivalently, any `u8` value between 0 and 127 inclusive) is an /// ascii, and reciprocally. /// /// We call _value_ a _b_-bit code represented as a `u8`, where _b_ is /// the power of two of the base, _i.e._ 4 for `base16` and 6 for /// `base64` for instance. The values for `base64` are thus any `u8` /// value from 0 to 63, and similarly the values for `base16` are any /// `u8` value from 0 to 15. More generally, values are any `u8` value /// between 0 and 2^b - 1 inclusive. Each symbol is uniquely /// associated to a value. /// /// We call _symbol_ the symbols represented as ascii (as such, only /// ascii symbols are allowed). For instance, in `base64`, the symbols /// are the ascii from `A` to `Z`, the ascii from `a` to `z`, the /// ascii from `0` to `9`, the ascii `+`, and the ascii `/` in value /// order. And the `base16` symbols are the ascii from `0` to `9` and /// the ascii from `A` to `F`. /// /// We call _padding_ the padding represented as ascii (as such, only /// ascii padding is allowed). For instance, the ascii `=` is used as /// the padding for `base64` and `base16`. /// /// # Constraints /// /// The base interface comes with invariants which must be satisfied /// by all implementations. Although it cannot be checked, all /// implementations must be deterministic, _i.e._ they never return /// two different outputs for the same input and they never panic /// (unless specified otherwise). The other constraints are described /// in the [`ValidError`](enum.ValidError.html) enum and may be /// checked by the [`valid`](fn.valid.html) function. Implementations /// should also be pure. pub trait Base { /// Returns the padding. fn pad(&self) -> u8; /// Returns the value of a symbol. /// /// This function defines what the symbols are, to which value /// they are associated, and the base size: /// /// - A symbol is an input that does not return `None`. /// - The value of a symbol is its associated output. /// - The base size is the number of symbols. /// /// In other words, when `val(s)` returns: /// /// - `Some(v)`: `s` is a symbol with value `v`. /// - `None`: `s` is not a symbol. fn val(&self, x: u8) -> Option<u8>; /// Returns the power of two of the base. fn bit(&self) -> usize { let mut n = 0; for s in 0..128u8 { if self.val(s).is_some() { n += 1; } } let mut b = 0; while n > 1 { n /= 2; b += 1; } return b; } /// Returns the symbol of a value. /// /// This function must only be called on values. /// /// # Panics /// /// This function may panic when input is not a value. fn sym(&self, x: u8) -> u8 { for s in 0..128u8 { match self.val(s) { Some(v) if v == x => return s, _ => (), } } unreachable!(); } } /// Returns the bit-mask of a base. /// /// The bit-mask of a base is the set of bits used by values. In other /// words, the bit-mask is `(1 << base.bit()) - 1`. pub fn mask<B: Base>(base: &B) -> u8 { (1 << base.bit()) - 1 } /// Returns the period length of a base. /// /// The period length of a base is the number of significant bits /// after which the encoding or decoding mechanism loops. In other /// words, the period length is the least common multiple of 8 and /// `base.bit()`. pub fn len<B: Base>(base: &B) -> usize { match base.bit() { 1 | 2 | 4 => 8, 3 | 6 => 24, 5 => 40, _ => unreachable!(), } } /// Returns the encoding length of a base. /// /// The encoding length of a base is the number of ascii it takes /// before encoding loops. In other words, the encoding length is /// `len(base) / 8`. pub fn enc<B: Base>(base: &B) -> usize { len(base) / 8 } /// Returns the decoding length of a base. /// /// The decoding length of a base is the number of symbols it takes /// before decoding loops. In other words, the decoding length is /// `len(base) / base.bit()`. pub fn dec<B: Base>(base: &B) -> usize { len(base) / base.bit() } /// Optimized implementation. /// /// This implementation uses static arrays for constant-time lookup. /// It also uses a phantom type to enable static dispatch on demand. pub struct Opt<T> { /// Symbol to value association. /// /// This array must have size 256 and defines `val(s)` as /// `Some(val[s])` if `val[s] < 128` and `None` otherwise. pub val: &'static [u8], /// Value to symbol association. /// /// This array must have size `1 << b` and defines `sym(v)` as /// `sym[v]`. pub sym: &'static [u8], /// The power of two of the base. /// /// This value defines `bit()` as `bit`. pub bit: u8, /// The padding. /// /// This value defines `pad()` as `pad`. pub pad: u8, pub _phantom: PhantomData<T>, } impl<T> Base for Opt<T> { fn bit(&self) -> usize { self.bit as usize } fn pad(&self) -> u8 { self.pad } fn val(&self, x: u8) -> Option<u8> { let v = self.val[x as usize]; if v < 128 { Some(v) } else { None } } fn sym(&self, x: u8) -> u8 { self.sym[x as usize] } } /// Specification implementation. /// /// This implementation uses an array of inclusive ranges to easily /// describe symbol to value assocation. It is not meant for /// performance but for specification using the /// [`equal`](fn.equal.html) function. pub struct Spec { /// Symbol to value association. /// /// This array defines inclusive ranges of symbols in value /// order. These ranges must not overlap. For instance, for /// `&[(b'0', b'9'), (b'A', b'F')]`, `b'0'` has value 0, /// `b'8'` has value 8, and `b'E'` has value 14. pub val: &'static [(u8, u8)], /// The padding. pub pad: u8, } impl Base for Spec { fn pad(&self) -> u8 { self.pad } fn val(&self, x: u8) -> Option<u8> { let mut t = 0; for &(l, u) in self.val { if l <= x && x <= u { return Some(t + x - l); } t += u - l + 1; } None } } /// Validity errors. /// /// This enum defines the invariants of the [`Base`](trait.Base.html) /// trait and is returned by the [`valid`](fn.valid.html) function /// when a constraint check fails. #[derive(Clone,Copy,Debug,PartialEq,Eq)] pub enum ValidError { /// The base must be a power of two between 2 and 64 /// inclusive. /// /// The check `1 <= bit() && bit() <= 6` failed. BadBit, /// The padding must be an ascii. /// /// The check `pad() < 128` failed. PadNotAscii, /// The padding must not be a symbol. /// /// The check `val(pad()) == None` failed. PadSymbol, /// Symbols must be ascii. /// /// The check `val(s) == None || s < 128` failed. In other /// words, `s` is a symbol and `s` is not ascii. SymNotAscii(u8), /// Symbols must be mapped to values. /// /// The check that if `val(s) == Some(v)` then `v < 1 << /// bit()` failed. In other words, `s` is associated to `v`, /// but `v` is not a value. NotValue(u8), /// Symbols must be uniquely mapped. /// /// The check that if `val(s) == Some(v)` then `sym(v) == s` /// failed. In other words, `s` has value `v` but `v` is not /// associated to symbol `s`. The [`val`](trait.Base.html) /// function must be injective on symbols. This is checked using /// [`sym`](trait.Base.html) because it is the inverse of /// [`val`](trait.Base.html) on symbols. NotInj(u8), /// Symbols must be mapped to all values. /// /// The check `card(val) == 1 << bit()` failed. The `card` /// function returns the number of inputs for which its argument /// does not return `None`. In other words, the number of symbols /// is not equal to the number of values. The /// [`val`](trait.Base.html) function must be surjective on /// symbols to values. NotSurj, } /// Checks whether a base is valid. /// /// This function checks whether a base satisfies the /// [`Base`](trait.Base.html) constraints, given that the /// implementation is deterministic. pub fn valid<B: Base>(base: &B) -> Result<(), ValidError> { use self::ValidError::*; check!(BadBit, 1 <= base.bit() && base.bit() <= 6); check!(PadNotAscii, base.pad() < 128); check!(PadSymbol, base.val(base.pad()) == None); let mut card = 0u8; for s in 0..128u8 { if let Some(v) = base.val(s) { check!(NotValue(s), v < 1 << base.bit()); check!(NotInj(s), base.sym(v) == s); card += 1; } let x = s + 128; check!(SymNotAscii(x), base.val(x) == None); } check!(NotSurj, card == 1 << base.bit()); Ok(()) } /// Equality errors. #[derive(Clone,Copy,Debug,PartialEq,Eq)] pub enum EqualError { /// The two bases differ on a symbol or its associated value. Symbol(u8), /// The two bases differ on the padding. Padding, } /// Checks whether two bases are equal. /// /// This function checks whether the symbols, their associated value, /// and the padding of two bases are equal. This is enough if both /// bases are valid. pub fn equal<B1: Base, B2: Base>(b1: &B1, b2: &B2) -> Result<(), EqualError> { use self::EqualError::*; check!(Padding, b1.pad() == b2.pad()); for s in 0..128u8 { check!(Symbol(s), b1.val(s) == b2.val(s)); } Ok(()) }