1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
//! caBLE Base10 encoder.
//!
//! QR codes store arbitrary binary data very inefficiently, but it has
//! alternate modes (such as numeric and alphanumeric) which can store it more
//! efficiently.
//!
//! While [RFC 9285] presents an encoding for efficiently encoding binary data
//! in QR's alphanumeric mode, there are additional issues:
//!
//! * caBLE pairing codes must be valid URLs (for mobile intent handling)
//!
//! * QR's alphanumeric mode does not allow all [URL-safe characters][url-chars],
//!   reducing efficiency
//!
//! * QR's alphanumeric mode allows [non-URL-safe characters][url-chars],
//!   reducing efficiency
//!
//! As a result, caBLE uses a novel Base10 encoding for the payload, which
//! achieves comparable density (in QR code bits), though with longer URLs.
//!
//! In absence of a publicly-published caBLE specification, this is a port of
//! [Chromium's `BytesToDigits` and `DigitsToBytes` functions][crbase10].
//!
//! [crbase10]: https://source.chromium.org/chromium/chromium/src/+/main:device/fido/cable/v2_handshake.cc;l=471-568;drc=6767131b3528fefd866f604b32ebbb278c35d395
//! [RFC 9285]: https://www.rfc-editor.org/rfc/rfc9285.html
//! [url-chars]: https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3

use std::fmt::Write;
/// Size of a chunk of data in its original form
const CHUNK_SIZE: usize = 7;

/// Size of a chunk of data in its encoded form
const CHUNK_DIGITS: usize = 17;

/// Encodes binary data into Base10 format.
///
/// See Chromium's `BytesToDigits`.
pub fn encode(i: &[u8]) -> String {
    i.chunks(CHUNK_SIZE).fold(String::new(), |mut out, c| {
        let chunk_len = c.len();
        let w = match chunk_len {
            CHUNK_SIZE => CHUNK_DIGITS,
            6 => 15,
            5 => 13,
            4 => 10,
            3 => 8,
            2 => 5,
            1 => 3,
            // This should never happen
            _ => 0,
        };

        let mut chunk: [u8; 8] = [0; 8];
        chunk[0..chunk_len].copy_from_slice(c);
        let v = u64::from_le_bytes(chunk);
        let _ = write!(out, "{:0width$}", v, width = w);
        out
    })
}

#[derive(Debug, PartialEq, Eq)]
pub enum DecodeError {
    /// The input value contained non-ASCII-digit characters.
    ContainsNonDigitChars,
    /// The input value was not a valid length.
    InvalidLength,
    /// The input value contained a value which was out of range.
    OutOfRange,
}

/// Decodes Base10 formatted data into binary form.
///
/// See Chromium's `DigitsToBytes`.
pub fn decode(i: &str) -> Result<Vec<u8>, DecodeError> {
    // Check that i only contains ASCII digits
    if i.chars().any(|c| !c.is_ascii_digit()) {
        return Err(DecodeError::ContainsNonDigitChars);
    }

    // It's safe to operate on the string in bytes now because:
    //
    // - we've previously thrown an error for anything containing non-ASCII digits.
    // - each ASCII digit is exactly 1 byte in UTF-8.
    // - &str is always valid UTF-8.
    let mut o = Vec::with_capacity(((i.len() + CHUNK_DIGITS - 1) / CHUNK_DIGITS) * CHUNK_SIZE);

    i.as_bytes()
        .chunks(CHUNK_DIGITS)
        .map(|b| unsafe { std::str::from_utf8_unchecked(b) })
        .try_for_each(|s| {
            let d = s
                .parse::<u64>()
                .map_err(|_| DecodeError::ContainsNonDigitChars)?;
            let w = match s.len() {
                CHUNK_DIGITS => CHUNK_SIZE,
                15 => 6,
                13 => 5,
                10 => 4,
                8 => 3,
                5 => 2,
                3 => 1,
                _ => return Err(DecodeError::InvalidLength),
            };

            if d >> (w * 8) != 0 {
                return Err(DecodeError::OutOfRange);
            }

            o.extend_from_slice(&d.to_le_bytes()[..w]);
            Ok(())
        })?;

    Ok(o)
}

#[cfg(test)]
mod test {
    use super::*;

    fn decoder_err_test(i: &str, e: DecodeError) {
        assert_eq!(Err(e), decode(i), "decode({:?})", i);
    }

    #[test]
    fn invalid_decode() {
        use DecodeError::*;
        // Non-digit characters
        decoder_err_test("abc", ContainsNonDigitChars);
        decoder_err_test("abc1234", ContainsNonDigitChars);

        // Full-width romaji digits
        decoder_err_test("\u{ff11}\u{ff12}\u{ff13}", ContainsNonDigitChars);

        // Digits with umlauts (decomposed combining diacriticals on digits)
        decoder_err_test("1\u{308}2\u{308}3\u{308}", ContainsNonDigitChars);

        // Incorrect lengths
        decoder_err_test("1", InvalidLength);
        decoder_err_test("12", InvalidLength);
        decoder_err_test("1234", InvalidLength);
        decoder_err_test("123456789012345678", InvalidLength);

        // Valid length, but results in bytes > 0xff
        decoder_err_test("999", OutOfRange);
        decoder_err_test("99999999999999999", OutOfRange);
    }

    #[test]
    fn decoding_zero() {
        let lengths = [
            (0, 0),
            (1, 3),
            (2, 5),
            (3, 8),
            (4, 10),
            (5, 13),
            (6, 15),
            (7, 17),
            (8, 20),
        ];
        for (bl, dl) in lengths {
            let bytes = vec![0; bl];
            let digits = "0".repeat(dl);

            assert_eq!(encode(bytes.as_slice()), digits);
            assert_eq!(decode(&digits), Ok(bytes));
        }
    }

    #[test]
    fn encoding_survives_roundtrips() {
        let i: Vec<u8> = (0..255).collect();

        for len in 0..i.len() {
            let i = &i[0..len];
            assert_eq!(decode(&encode(i)), Ok(i.to_vec()), "length = {}", len);
        }
    }

    #[test]
    fn encoding_should_not_change() {
        let i: [u8; 3] = [0x61, 0x62, 0xff];
        assert_eq!(encode(&i), "16736865");
        assert_eq!(decode("16736865").expect("unexpected error"), i);
    }
}