1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
//! Generic base module.
//!
//! This module defines a generic interface, namely
//! [`Base`](trait.Base.html), and an optimized implementation, namely
//! [`Opt`](struct.Opt.html), for positional numerical systems with
//! radix ranging from 2 to 64 (powers of two only). Other base
//! constraints are described in the [`Base`](trait.Base.html)
//! interface.

use std::marker::PhantomData;

/// Generic interface.
///
/// A base implementation needs to define at least its padding `pad`
/// and its symbol-to-value function `val`. Its power of two `bit` and
/// its value-to-symbol function `sym` are uniquely defined from the
/// `pad` and `val` functions. However, for performance reasons, all
/// functions may be defined directly.
///
/// # Vocabulary
///
/// We call _ascii_ a 7-bit code represented as a `u8`. In other
/// words, any `u8` value with most significant bit cleared (or
/// equivalently, any `u8` value between 0 and 127 inclusive) is an
/// ascii, and reciprocally.
///
/// We call _value_ a _b_-bit code represented as a `u8`, where _b_ is
/// the power of two of the base, _i.e._ 4 for `base16` and 6 for
/// `base64` for instance. The values for `base64` are thus any `u8`
/// value from 0 to 63, and similarly the values for `base16` are any
/// `u8` value from 0 to 15. More generally, values are any `u8` value
/// between 0 and 2^b - 1 inclusive. Each symbol is uniquely
/// associated to a value.
///
/// We call _symbol_ the symbols represented as ascii (as such, only
/// ascii symbols are allowed). For instance, in `base64`, the symbols
/// are the ascii from `A` to `Z`, the ascii from `a` to `z`, the
/// ascii from `0` to `9`, the ascii `+`, and the ascii `/` in value
/// order. And the `base16` symbols are the ascii from `0` to `9` and
/// the ascii from `A` to `F`.
///
/// We call _padding_ the padding represented as ascii (as such, only
/// ascii padding is allowed). For instance, the ascii `=` is used as
/// the padding for `base64` and `base16`.
///
/// # Constraints
///
/// The base interface comes with invariants which must be satisfied
/// by all implementations. Although it cannot be checked, all
/// implementations must be deterministic, _i.e._ they never return
/// two different outputs for the same input and they never panic
/// (unless specified otherwise). The other constraints are described
/// in the [`ValidError`](enum.ValidError.html) enum and may be
/// checked by the [`valid`](fn.valid.html) function. Implementations
/// should also be pure.
pub trait Base {
    /// Returns the padding.
    fn pad(&self) -> u8;

    /// Returns the value of a symbol.
    ///
    /// This function defines what the symbols are, to which value
    /// they are associated, and the base size:
    ///
    /// - A symbol is an input that does not return `None`.
    /// - The value of a symbol is its associated output.
    /// - The base size is the number of symbols.
    ///
    /// In other words, when `val(s)` returns:
    ///
    /// - `Some(v)`: `s` is a symbol with value `v`.
    /// - `None`: `s` is not a symbol.
    fn val(&self, x: u8) -> Option<u8>;

    /// Returns the power of two of the base.
    fn bit(&self) -> usize {
        let mut n = 0;
        for s in 0..128u8 {
            if self.val(s).is_some() {
                n += 1;
            }
        }
        let mut b = 0;
        while n > 1 {
            n /= 2;
            b += 1;
        }
        return b;
    }

    /// Returns the symbol of a value.
    ///
    /// This function must only be called on values.
    ///
    /// # Panics
    ///
    /// This function may panic when input is not a value.
    fn sym(&self, x: u8) -> u8 {
        for s in 0..128u8 {
            match self.val(s) {
                Some(v) if v == x => return s,
                _ => (),
            }
        }
        unreachable!();
    }
}

/// Returns the bit-mask of a base.
///
/// The bit-mask of a base is the set of bits used by values. In other
/// words, the bit-mask is `(1 << base.bit()) - 1`.
pub fn mask<B: Base>(base: &B) -> u8 {
    (1 << base.bit()) - 1
}

/// Returns the period length of a base.
///
/// The period length of a base is the number of significant bits
/// after which the encoding or decoding mechanism loops. In other
/// words, the period length is the least common multiple of 8 and
/// `base.bit()`.
pub fn len<B: Base>(base: &B) -> usize {
    match base.bit() {
        1 | 2 | 4 => 8,
        3 | 6 => 24,
        5 => 40,
        _ => unreachable!(),
    }
}

/// Returns the encoding length of a base.
///
/// The encoding length of a base is the number of ascii it takes
/// before encoding loops. In other words, the encoding length is
/// `len(base) / 8`.
pub fn enc<B: Base>(base: &B) -> usize {
    len(base) / 8
}

/// Returns the decoding length of a base.
///
/// The decoding length of a base is the number of symbols it takes
/// before decoding loops. In other words, the decoding length is
/// `len(base) / base.bit()`.
pub fn dec<B: Base>(base: &B) -> usize {
    len(base) / base.bit()
}

/// Optimized implementation.
///
/// This implementation uses static arrays for constant-time lookup.
/// It also uses a phantom type to enable static dispatch on demand.
pub struct Opt<T> {
    /// Symbol to value association.
    ///
    /// This array must have size 256 and defines `val(s)` as
    /// `Some(val[s])` if `val[s] < 128` and `None` otherwise.
    pub val: &'static [u8],

    /// Value to symbol association.
    ///
    /// This array must have size `1 << b` and defines `sym(v)` as
    /// `sym[v]`.
    pub sym: &'static [u8],

    /// The power of two of the base.
    ///
    /// This value defines `bit()` as `bit`.
    pub bit: u8,

    /// The padding.
    ///
    /// This value defines `pad()` as `pad`.
    pub pad: u8,

    pub _phantom: PhantomData<T>,
}

impl<T> Base for Opt<T> {
    fn bit(&self) -> usize {
        self.bit as usize
    }

    fn pad(&self) -> u8 {
        self.pad
    }

    fn val(&self, x: u8) -> Option<u8> {
        let v = self.val[x as usize];
        if v < 128 { Some(v) } else { None }
    }

    fn sym(&self, x: u8) -> u8 {
        self.sym[x as usize]
    }
}

/// Specification implementation.
///
/// This implementation uses an array of inclusive ranges to easily
/// describe symbol to value assocation. It is not meant for
/// performance but for specification using the
/// [`equal`](fn.equal.html) function.
pub struct Spec {
    /// Symbol to value association.
    ///
    /// This array defines inclusive ranges of symbols in value
    /// order. These ranges must not overlap. For instance, for
    /// `&[(b'0', b'9'), (b'A', b'F')]`, `b'0'` has value 0,
    /// `b'8'` has value 8, and `b'E'` has value 14.
    pub val: &'static [(u8, u8)],

    /// The padding.
    pub pad: u8,
}

impl Base for Spec {
    fn pad(&self) -> u8 {
        self.pad
    }

    fn val(&self, x: u8) -> Option<u8> {
        let mut t = 0;
        for &(l, u) in self.val {
            if l <= x && x <= u {
                return Some(t + x - l);
            }
            t += u - l + 1;
        }
        None
    }
}

/// Validity errors.
///
/// This enum defines the invariants of the [`Base`](trait.Base.html)
/// trait and is returned by the [`valid`](fn.valid.html) function
/// when a constraint check fails.
#[derive(Clone,Copy,Debug,PartialEq,Eq)]
pub enum ValidError {
    /// The base must be a power of two between 2 and 64
    /// inclusive.
    ///
    /// The check `1 <= bit() && bit() <= 6` failed.
    BadBit,

    /// The padding must be an ascii.
    ///
    /// The check `pad() < 128` failed.
    PadNotAscii,

    /// The padding must not be a symbol.
    ///
    /// The check `val(pad()) == None` failed.
    PadSymbol,

    /// Symbols must be ascii.
    ///
    /// The check `val(s) == None || s < 128` failed. In other
    /// words, `s` is a symbol and `s` is not ascii.
    SymNotAscii(u8),

    /// Symbols must be mapped to values.
    ///
    /// The check that if `val(s) == Some(v)` then `v < 1 <<
    /// bit()` failed. In other words, `s` is associated to `v`,
    /// but `v` is not a value.
    NotValue(u8),

    /// Symbols must be uniquely mapped.
    ///
    /// The check that if `val(s) == Some(v)` then `sym(v) == s`
    /// failed. In other words, `s` has value `v` but `v` is not
    /// associated to symbol `s`. The [`val`](trait.Base.html)
    /// function must be injective on symbols. This is checked using
    /// [`sym`](trait.Base.html) because it is the inverse of
    /// [`val`](trait.Base.html) on symbols.
    NotInj(u8),

    /// Symbols must be mapped to all values.
    ///
    /// The check `card(val) == 1 << bit()` failed. The `card`
    /// function returns the number of inputs for which its argument
    /// does not return `None`. In other words, the number of symbols
    /// is not equal to the number of values. The
    /// [`val`](trait.Base.html) function must be surjective on
    /// symbols to values.
    NotSurj,
}

/// Checks whether a base is valid.
///
/// This function checks whether a base satisfies the
/// [`Base`](trait.Base.html) constraints, given that the
/// implementation is deterministic.
pub fn valid<B: Base>(base: &B) -> Result<(), ValidError> {
    use self::ValidError::*;
    check!(BadBit, 1 <= base.bit() && base.bit() <= 6);
    check!(PadNotAscii, base.pad() < 128);
    check!(PadSymbol, base.val(base.pad()) == None);
    let mut card = 0u8;
    for s in 0..128u8 {
        if let Some(v) = base.val(s) {
            check!(NotValue(s), v < 1 << base.bit());
            check!(NotInj(s), base.sym(v) == s);
            card += 1;
        }
        let x = s + 128;
        check!(SymNotAscii(x), base.val(x) == None);
    }
    check!(NotSurj, card == 1 << base.bit());
    Ok(())
}

/// Equality errors.
#[derive(Clone,Copy,Debug,PartialEq,Eq)]
pub enum EqualError {
    /// The two bases differ on a symbol or its associated value.
    Symbol(u8),

    /// The two bases differ on the padding.
    Padding,
}

/// Checks whether two bases are equal.
///
/// This function checks whether the symbols, their associated value,
/// and the padding of two bases are equal. This is enough if both
/// bases are valid.
pub fn equal<B1: Base, B2: Base>(b1: &B1, b2: &B2) -> Result<(), EqualError> {
    use self::EqualError::*;
    check!(Padding, b1.pad() == b2.pad());
    for s in 0..128u8 {
        check!(Symbol(s), b1.val(s) == b2.val(s));
    }
    Ok(())
}