Skip to main content

bunk/
decode.rs

1use thiserror::Error;
2use crate::*;
3
4/// Error type for decoding data. 
5#[derive(Error, Clone, Copy, Debug, Hash, PartialEq, Eq)]
6pub enum InvalidData {
7    /// A syllable not present in the lookup table was found. 
8    #[error("Unrecognized syllable")]
9    Syllable, 
10
11    /// The number of syllables was not enough to contain the checksum. Returned only when a checksum is
12    /// used; empty strings are otherwise allowed. 
13    #[error("Encoded data was too short")]
14    TooShort, 
15
16    /// The checksum didn't match that of the decoded data. Returned only when a checksum is used. 
17    #[error("Data integrity check failed")]
18    Checksum, 
19}
20
21/// Result of decoding data. 
22pub type Result<T> = std::result::Result<T, InvalidData>;
23
24/// Decodes a string using the default [checksum settings](Checksum). 
25/// 
26/// Use this only if the default checksum setting was used when encoding the string (e.g., if
27/// [`encode`](encode()) was used). All other [encoding settings](crate::Settings) are ignored when
28/// decoding. 
29/// 
30/// # Examples
31/// 
32/// ```
33/// let encoded = bunk::encode(b"aftersun");
34/// let decoded = bunk::decode(encoded)?;
35/// 
36/// assert_eq!(decoded, b"aftersun");
37/// # Ok::<(), bunk::InvalidData>(())
38/// ```
39pub fn decode(string: impl AsRef<str>) -> Result<Vec<u8>> {
40    decode_with_settings(string, Checksum::default())
41}
42
43/// Decodes a string using given [checksum settings](Checksum). 
44/// 
45/// The checksum setting must match the one used when the string was encoded. All other
46/// [encoding settings](crate::Settings) are ignored when decoding. 
47/// 
48/// # Examples
49/// 
50/// Disabled checksum: 
51/// ```
52/// use bunk::{Checksum, Settings};
53/// 
54/// let settings = Settings {
55///     checksum: Checksum::Disabled, 
56///     ..Default::default()
57/// };
58/// let encoded = bunk::encode_with_settings(b"aftersun", settings);
59/// let decoded = bunk::decode_with_settings(encoded, settings.checksum)?;
60/// 
61/// assert_eq!(decoded, b"aftersun");
62/// # Ok::<(), bunk::InvalidData>(())
63/// ```
64pub fn decode_with_settings(string: impl AsRef<str>, checksum: Checksum) -> Result<Vec<u8>> {
65    // factored out non-generic code to reduce code size
66    decode_mono(string.as_ref(), checksum)
67}
68
69/// Monomorphised decode implementation. 
70#[inline(never)]
71fn decode_mono(mut string: &str, checksum: Checksum) -> Result<Vec<u8>> {
72    let mut buffer = Vec::with_capacity(string.len() / 2);
73
74    // decode raw bytes from string. the bytes are still run-encoded and may have a checksum at the end
75    while !string.is_empty() {
76        // find the longest valid syllable at the beginning of the string
77        let (index, length) = syllables::longest_prefix_of(string)
78            .ok_or(InvalidData::Syllable)?;
79
80        // the index of the syllable is its payload
81        buffer.push(index);
82
83        // gobble until start of next syllable
84        string = &string[length..];
85        string = string
86            .find(char::is_alphabetic)
87            .map(|index| string.split_at(index))
88            .map(|(_, next)| next)
89            .unwrap_or("");
90    }
91
92    // compute the number of bytes constituting the payload vs checksum
93    let payload_len = buffer
94        .len()
95        .checked_sub(checksum.len())
96        .ok_or(InvalidData::TooShort)?;
97
98    // decode the payload bytes and compute their hash
99    let mut hash = Fnv1a::new();
100
101    for (i, byte) in buffer.iter_mut().enumerate().take(payload_len) {
102        *byte = running_code(*byte, i);
103        hash.update(*byte);
104    }
105
106    // remove checksum from the end and check whether it matches hash
107    let checksum_match = buffer
108        .drain(payload_len..)
109        .zip(hash.bytes())
110        .all(|(a, b)| a == b);
111
112    // if so, return the fully decoded payload bytes
113    checksum_match
114        .then_some(buffer)
115        .ok_or(InvalidData::Checksum)
116}
117
118#[cfg(test)]
119mod tests {
120    use crate::*;
121
122    #[test]
123    fn outliers() {
124        let test = |input| {
125            decode_with_settings(input, Checksum::Disabled).unwrap();
126        };
127        test("uuuuuuuuuuu");
128        test("u  u  u  u  u  u  u  u  u  u  u  ");
129        test("sive123sive@tive  πŸ˜€πŸ˜€πŸ˜€πŸ˜€ sonπŸ‘€");
130    }
131
132    #[test]
133    fn syllable_err() {
134        let test = |input| {
135            let result = decode_with_settings(input, Checksum::Disabled);
136            assert_eq!(result, Err(InvalidData::Syllable));
137        };
138        test("πŸ˜€");
139        test("b");
140        test("siv");
141        test("faevlesa");
142    }
143
144    #[test]
145    fn too_short_err() {
146        let test = |input, checksum| {
147            let result = decode_with_settings(input, checksum);
148            assert_eq!(result, Err(InvalidData::TooShort));
149        };
150        test("",     Checksum::Length1);
151        test("sive", Checksum::Length2);
152        test("uu",   Checksum::Length3);
153    }
154}