bunk/decode.rs
1use thiserror::Error;
2use crate::*;
3
4/// Error type for decoding data.
5#[derive(Error, Clone, Copy, Debug, Hash, PartialEq, Eq)]
6pub enum InvalidData {
7 /// A syllable not present in the lookup table was found.
8 #[error("Unrecognized syllable")]
9 Syllable,
10
11 /// The number of syllables was not enough to contain the checksum. Returned only when a checksum is
12 /// used; empty strings are otherwise allowed.
13 #[error("Encoded data was too short")]
14 TooShort,
15
16 /// The checksum didn't match that of the decoded data. Returned only when a checksum is used.
17 #[error("Data integrity check failed")]
18 Checksum,
19}
20
21/// Result of decoding data.
22pub type Result<T> = std::result::Result<T, InvalidData>;
23
24/// Decodes a string using the default [checksum settings](Checksum).
25///
26/// Use this only if the default checksum setting was used when encoding the string (e.g., if
27/// [`encode`](encode()) was used). All other [encoding settings](crate::Settings) are ignored when
28/// decoding.
29///
30/// # Examples
31///
32/// ```
33/// let encoded = bunk::encode(b"aftersun");
34/// let decoded = bunk::decode(encoded)?;
35///
36/// assert_eq!(decoded, b"aftersun");
37/// # Ok::<(), bunk::InvalidData>(())
38/// ```
39pub fn decode(string: impl AsRef<str>) -> Result<Vec<u8>> {
40 decode_with_settings(string, Checksum::default())
41}
42
43/// Decodes a string using given [checksum settings](Checksum).
44///
45/// The checksum setting must match the one used when the string was encoded. All other
46/// [encoding settings](crate::Settings) are ignored when decoding.
47///
48/// # Examples
49///
50/// Disabled checksum:
51/// ```
52/// use bunk::{Checksum, Settings};
53///
54/// let settings = Settings {
55/// checksum: Checksum::Disabled,
56/// ..Default::default()
57/// };
58/// let encoded = bunk::encode_with_settings(b"aftersun", settings);
59/// let decoded = bunk::decode_with_settings(encoded, settings.checksum)?;
60///
61/// assert_eq!(decoded, b"aftersun");
62/// # Ok::<(), bunk::InvalidData>(())
63/// ```
64pub fn decode_with_settings(string: impl AsRef<str>, checksum: Checksum) -> Result<Vec<u8>> {
65 // factored out non-generic code to reduce code size
66 decode_mono(string.as_ref(), checksum)
67}
68
69/// Monomorphised decode implementation.
70#[inline(never)]
71fn decode_mono(mut string: &str, checksum: Checksum) -> Result<Vec<u8>> {
72 let mut buffer = Vec::with_capacity(string.len() / 2);
73
74 // decode raw bytes from string. the bytes are still run-encoded and may have a checksum at the end
75 while !string.is_empty() {
76 // find the longest valid syllable at the beginning of the string
77 let (index, length) = syllables::longest_prefix_of(string)
78 .ok_or(InvalidData::Syllable)?;
79
80 // the index of the syllable is its payload
81 buffer.push(index);
82
83 // gobble until start of next syllable
84 string = &string[length..];
85 string = string
86 .find(char::is_alphabetic)
87 .map(|index| string.split_at(index))
88 .map(|(_, next)| next)
89 .unwrap_or("");
90 }
91
92 // compute the number of bytes constituting the payload vs checksum
93 let payload_len = buffer
94 .len()
95 .checked_sub(checksum.len())
96 .ok_or(InvalidData::TooShort)?;
97
98 // decode the payload bytes and compute their hash
99 let mut hash = Fnv1a::new();
100
101 for (i, byte) in buffer.iter_mut().enumerate().take(payload_len) {
102 *byte = running_code(*byte, i);
103 hash.update(*byte);
104 }
105
106 // remove checksum from the end and check whether it matches hash
107 let checksum_match = buffer
108 .drain(payload_len..)
109 .zip(hash.bytes())
110 .all(|(a, b)| a == b);
111
112 // if so, return the fully decoded payload bytes
113 checksum_match
114 .then_some(buffer)
115 .ok_or(InvalidData::Checksum)
116}
117
118#[cfg(test)]
119mod tests {
120 use crate::*;
121
122 #[test]
123 fn outliers() {
124 let test = |input| {
125 decode_with_settings(input, Checksum::Disabled).unwrap();
126 };
127 test("uuuuuuuuuuu");
128 test("u u u u u u u u u u u ");
129 test("sive123sive@tive ππππ sonπ");
130 }
131
132 #[test]
133 fn syllable_err() {
134 let test = |input| {
135 let result = decode_with_settings(input, Checksum::Disabled);
136 assert_eq!(result, Err(InvalidData::Syllable));
137 };
138 test("π");
139 test("b");
140 test("siv");
141 test("faevlesa");
142 }
143
144 #[test]
145 fn too_short_err() {
146 let test = |input, checksum| {
147 let result = decode_with_settings(input, checksum);
148 assert_eq!(result, Err(InvalidData::TooShort));
149 };
150 test("", Checksum::Length1);
151 test("sive", Checksum::Length2);
152 test("uu", Checksum::Length3);
153 }
154}