foundation_ur/bytewords/
mod.rs

1// SPDX-FileCopyrightText: © 2023 Foundation Devices, Inc. <hello@foundationdevices.com>
2// SPDX-FileCopyrightText: © 2020 Dominik Spicher <dominikspicher@gmail.com>
3// SPDX-License-Identifier: MIT
4
5//! # Bytewords
6//!
7//! Encode and decode byte payloads according to the [bytewords] scheme.
8//!
9//! [bytewords]: https://github.com/BlockchainCommons/Research/blob/master/papers/bcr-2020-012-bytewords.md.
10
11pub mod constants;
12pub mod minicbor;
13
14use core::fmt;
15
16use crate::{
17    bytewords::constants::{MINIMALS, MINIMAL_IDXS, WORDS, WORD_IDXS},
18    CRC32,
19};
20
21use itertools::Either;
22
23/// The three different `bytewords` encoding styles.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
26pub enum Style {
27    /// Four-letter words, separated by spaces.
28    Standard,
29    /// Four-letter words, separated by dashes.
30    Uri,
31    /// Two-letter words, concatenated without separators.
32    Minimal,
33}
34
35impl Style {
36    const fn separator_str(self) -> &'static str {
37        match self {
38            Style::Standard => " ",
39            Style::Uri => "-",
40            _ => panic!("minimal style does not use separators"),
41        }
42    }
43}
44
45/// The different errors that can be returned when decoding.
46#[derive(Debug, PartialEq, Eq)]
47pub enum DecodeError {
48    /// Usually indicates a wrong encoding [`Style`] was passed.
49    InvalidWord {
50        /// Position where the invalid word was encountered.
51        position: Option<usize>,
52    },
53    /// The CRC32 checksum doesn't validate.
54    InvalidChecksum {
55        /// The expected checksum from the last bytes of the bytewords string.
56        expected: [u8; 4],
57        /// The calculated checksum from the payload bytes of the bytewords string.
58        calculated: [u8; 4],
59    },
60    /// The CRC32 checksum is not present.
61    ChecksumNotPresent,
62    /// Invalid bytewords string length.
63    InvalidLength,
64    /// The bytewords string contains non-ASCII characters.
65    NonAscii,
66    /// Not enough space to decode the bytewords into.
67    NotEnoughSpace {
68        /// Available space to decode the bytewords.
69        available: usize,
70        /// Needed space to decode the bytewords.
71        needed: usize,
72    },
73}
74
75impl fmt::Display for DecodeError {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        match self {
78            DecodeError::InvalidWord {
79                position: Some(position),
80            } => write!(f, "Invalid word found at position {position}"),
81            DecodeError::InvalidWord { position: None } => write!(f, "Invalid word found"),
82            DecodeError::InvalidChecksum {
83                expected,
84                calculated,
85            } => write!(
86                f,
87                "Expected checksum '{}' is different than the calculated '{}'",
88                u32::from_be_bytes(*expected),
89                u32::from_be_bytes(*calculated)
90            ),
91            DecodeError::ChecksumNotPresent => write!(f, "Checksum is not present"),
92            DecodeError::InvalidLength => write!(f, "Invalid length"),
93            DecodeError::NonAscii => {
94                write!(f, "Bytewords string contains non-ASCII characters")
95            }
96            DecodeError::NotEnoughSpace { needed, available } => {
97                write!(f, "Not enough space to decode the bytewords, needed {needed} but only {available} bytes available")
98            }
99        }
100    }
101}
102
103/// The errors that can be returned when encoding.
104#[derive(Debug, PartialEq, Eq)]
105pub enum EncodeError {
106    /// Not enough space to decode the bytewords into.
107    NotEnoughSpace,
108}
109
110impl fmt::Display for EncodeError {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        match self {
113            EncodeError::NotEnoughSpace => {
114                write!(f, "Not enough space to encode the bytewords into")
115            }
116        }
117    }
118}
119
120#[cfg(feature = "std")]
121impl std::error::Error for EncodeError {}
122
123/// Decodes a `bytewords`-encoded String back into a byte payload. The encoding
124/// must contain a four-byte checksum.
125///
126/// # Examples
127///
128/// ```
129/// # use foundation_ur::bytewords::{decode, Style};
130/// assert_eq!(
131///     decode("able tied also webs lung", Style::Standard).unwrap(),
132///     vec![0]
133/// );
134/// assert_eq!(
135///     decode("able-tied-also-webs-lung", Style::Uri).unwrap(),
136///     vec![0]
137/// );
138/// // Notice how the minimal encoding consists of the start and end letters of the bytewords
139/// assert_eq!(decode("aetdaowslg", Style::Minimal).unwrap(), vec![0]);
140/// ```
141///
142/// # Errors
143///
144/// If the encoded string contains unrecognized words, is inconsistent with
145/// the provided `style`, or contains an invalid checksum, an error will be
146/// returned.
147#[cfg(feature = "alloc")]
148pub fn decode(encoded: &str, style: Style) -> Result<alloc::vec::Vec<u8>, DecodeError> {
149    let (bytes, expected_checksum) = decoder(encoded, style)?;
150    let bytes = bytes
151        .enumerate()
152        .map(|(i, b)| b.ok_or(DecodeError::InvalidWord { position: Some(i) }))
153        .collect::<Result<alloc::vec::Vec<u8>, _>>()?;
154
155    let calculated_checksum = CRC32.checksum(&bytes).to_be_bytes();
156    if calculated_checksum != expected_checksum {
157        return Err(DecodeError::InvalidChecksum {
158            expected: expected_checksum,
159            calculated: calculated_checksum,
160        });
161    }
162
163    Ok(bytes)
164}
165
166/// Try to validate a bytewords encoded string and calculate its length in
167/// bytes.
168pub fn validate(encoded: &str, style: Style) -> Result<usize, DecodeError> {
169    let (bytes, expected_checksum) = decoder(encoded, style)?;
170    let mut digest = CRC32.digest();
171
172    let mut n = 0;
173    for maybe_byte in bytes {
174        digest.update(&[maybe_byte.ok_or(DecodeError::InvalidWord { position: Some(n) })?]);
175        n += 1;
176    }
177
178    let calculated_checksum = digest.finalize().to_be_bytes();
179    if calculated_checksum != expected_checksum {
180        return Err(DecodeError::InvalidChecksum {
181            expected: expected_checksum,
182            calculated: calculated_checksum,
183        });
184    }
185
186    Ok(n)
187}
188
189/// Decodes a `bytewords`-encoded string back into a byte payload onto an
190/// existing slice. The encoding must contain a four-byte checksum.
191///
192/// If the return value of this method is `Ok(n)`, then `n` is the number of
193/// bytes written into `result`.
194///
195/// # Errors
196///
197/// This function returns an error if the `bytewords`-encoded string is larger
198/// than `result`.
199// FIXME: Refactor the loop to solve this:
200#[allow(clippy::while_let_on_iterator)]
201pub fn decode_to_slice(
202    encoded: &str,
203    result: &mut [u8],
204    style: Style,
205) -> Result<usize, DecodeError> {
206    let (mut bytes, expected_checksum) = decoder(encoded, style)?;
207
208    let mut n = 0;
209    while let Some(maybe_byte) = bytes.next() {
210        if n >= result.len() {
211            while let Some(maybe_byte) = bytes.next() {
212                maybe_byte.ok_or(DecodeError::InvalidWord { position: Some(n) })?;
213
214                n += 1;
215            }
216
217            return Err(DecodeError::NotEnoughSpace {
218                available: result.len(),
219                needed: n,
220            });
221        }
222
223        result[n] = maybe_byte.ok_or(DecodeError::InvalidWord { position: Some(n) })?;
224        n += 1;
225    }
226
227    let calculated_checksum = CRC32.checksum(&result[..n]).to_be_bytes();
228    if calculated_checksum != expected_checksum {
229        return Err(DecodeError::InvalidChecksum {
230            expected: expected_checksum,
231            calculated: calculated_checksum,
232        });
233    }
234
235    Ok(n)
236}
237
238fn decoder(
239    encoded: &str,
240    style: Style,
241) -> Result<(impl Iterator<Item = Option<u8>> + '_, [u8; 4]), DecodeError> {
242    if !encoded.is_ascii() {
243        return Err(DecodeError::NonAscii);
244    }
245
246    if encoded.is_empty() {
247        return Err(DecodeError::ChecksumNotPresent);
248    }
249
250    let (keys, indexes) = match style {
251        Style::Standard => (Either::Left(encoded.split(' ')), &WORD_IDXS),
252        Style::Uri => (Either::Left(encoded.split('-')), &WORD_IDXS),
253        Style::Minimal => {
254            if encoded.len() % 2 != 0 {
255                return Err(DecodeError::InvalidLength);
256            }
257
258            let keys = Either::Right(
259                (0..encoded.len())
260                    .step_by(2)
261                    .map(|idx| &encoded[idx..idx + 2]),
262            );
263
264            (keys, &MINIMAL_IDXS)
265        }
266    };
267
268    let mut bytes = keys.map(|k| indexes.get(k).copied());
269
270    // Consume checksum bytes before anything else.
271    let mut checksum = [0u8; 4];
272    for b in checksum.iter_mut().rev() {
273        match bytes.next_back() {
274            Some(Some(byte)) => *b = byte,
275            Some(None) => return Err(DecodeError::InvalidWord { position: None }),
276            None => return Err(DecodeError::ChecksumNotPresent),
277        }
278    }
279
280    Ok((bytes, checksum))
281}
282
283fn encoder<'a>(
284    data: &'a [u8],
285    checksum: &'a [u8],
286    style: Style,
287) -> impl Iterator<Item = &'static str> + 'a {
288    let table = match style {
289        Style::Standard | Style::Uri => &WORDS,
290        Style::Minimal => &MINIMALS,
291    };
292
293    data.iter()
294        .chain(checksum.iter())
295        .map(|&b| table[b as usize])
296}
297
298/// Encodes a byte payload into a `bytewords` encoded String.
299///
300/// # Examples
301///
302/// ```
303/// # use foundation_ur::bytewords::{encode, Style};
304/// assert_eq!(encode(&[0], Style::Standard), "able tied also webs lung");
305/// assert_eq!(encode(&[0], Style::Uri), "able-tied-also-webs-lung");
306/// // Notice how the minimal encoding consists of the start and end letters of the bytewords
307/// assert_eq!(encode(&[0], Style::Minimal), "aetdaowslg");
308/// ```
309#[must_use]
310#[cfg(feature = "alloc")]
311pub fn encode(data: &[u8], style: Style) -> alloc::string::String {
312    #[cfg(not(feature = "std"))]
313    use alloc::string::ToString;
314
315    Bytewords(data, style).to_string()
316}
317
318/// Encodes a byte payload into a `bytewords` encoded string on an existing slice.
319///
320/// The return value of this method is `n` and is the number of bytes written
321/// into `result`.
322pub fn encode_to_slice(data: &[u8], result: &mut [u8], style: Style) -> Result<usize, EncodeError> {
323    let checksum = CRC32.checksum(data).to_be_bytes();
324
325    let mut encoder = encoder(data, &checksum, style).map(|w| w.as_bytes());
326    let mut n = 0;
327
328    if style == Style::Minimal {
329        for word in encoder {
330            debug_assert!(word.len() == 2);
331
332            if n >= result.len() {
333                return Err(EncodeError::NotEnoughSpace);
334            }
335            result[n..n + 2].copy_from_slice(word);
336            n += 2;
337        }
338
339        Ok(n)
340    } else {
341        let separator = match style {
342            Style::Standard => b' ',
343            Style::Uri => b'-',
344            _ => unreachable!(),
345        };
346
347        if let Some(first_word) = encoder.next() {
348            debug_assert!(first_word.len() == 4);
349
350            result[0..4].copy_from_slice(first_word);
351            n += 4;
352        } else {
353            return Ok(n);
354        }
355
356        for word in encoder {
357            debug_assert!(word.len() == 4);
358
359            if n + 5 >= result.len() {
360                return Err(EncodeError::NotEnoughSpace);
361            }
362
363            result[n] = separator;
364            result[n + 1..n + 5].copy_from_slice(word);
365            n += 5;
366        }
367
368        Ok(n)
369    }
370}
371
372/// Structure to format bytewords using [`Display`](fmt::Display).
373///
374/// The implementation does not allocate and writes bytewords
375/// directly to the formatter.
376///
377/// # Examples
378///
379/// Printing bytewords to stdout:
380///
381/// ```
382/// # use foundation_ur::bytewords::{Bytewords, Style};
383/// let data = b"bytewords encodable message :)";
384/// println!("{}", Bytewords(data, Style::Minimal));
385/// ```
386pub struct Bytewords<'a>(pub &'a [u8], pub Style);
387
388impl<'a> fmt::Display for Bytewords<'a> {
389    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
390        let &Bytewords(data, style) = self;
391        let checksum = CRC32.checksum(data).to_be_bytes();
392
393        let mut encoder = encoder(data, &checksum, style);
394        if style == Style::Minimal {
395            for word in encoder {
396                write!(f, "{word}")?;
397            }
398        } else {
399            if let Some(first_word) = encoder.next() {
400                write!(f, "{first_word}")?;
401            } else {
402                return Ok(());
403            }
404
405            let separator = style.separator_str();
406            for word in encoder {
407                write!(f, "{separator}{word}")?;
408            }
409        }
410
411        Ok(())
412    }
413}
414
415#[cfg(test)]
416#[cfg(feature = "alloc")]
417mod tests {
418    use super::*;
419
420    #[test]
421    fn test_bytewords() {
422        let input = vec![0, 1, 2, 128, 255];
423        assert_eq!(
424            encode(&input, Style::Standard),
425            "able acid also lava zoom jade need echo taxi"
426        );
427        assert_eq!(
428            encode(&input, Style::Uri),
429            "able-acid-also-lava-zoom-jade-need-echo-taxi"
430        );
431        assert_eq!(encode(&input, Style::Minimal), "aeadaolazmjendeoti");
432
433        decode("lpayaacfaddscypyuesfqzhdgeetldfzhywslusacppddspsdwgefyrdlsfzaadrdtlrdatlbbgyfyuydygrwewyjyolvtsphhmkgowdamvowfmhbnwkimrndepebtwnrpwzintihgsffznyvshftyqzoylftybykthlgerdolbwfpzoltghrd", Style::Minimal).unwrap();
434        assert_eq!(
435            decode(
436                "able acid also lava zoom jade need echo taxi",
437                Style::Standard
438            )
439            .unwrap(),
440            input
441        );
442        assert_eq!(
443            decode("able-acid-also-lava-zoom-jade-need-echo-taxi", Style::Uri).unwrap(),
444            input
445        );
446        assert_eq!(decode("aeadaolazmjendeoti", Style::Minimal).unwrap(), input);
447
448        // empty payload is allowed
449        decode(&encode(&[], Style::Minimal), Style::Minimal).unwrap();
450
451        // bad checksum
452        assert_eq!(
453            decode(
454                "able acid also lava zero jade need echo wolf",
455                Style::Standard
456            )
457            .unwrap_err(),
458            DecodeError::InvalidChecksum {
459                expected: [107, 155, 51, 243],
460                calculated: [108, 246, 247, 201]
461            }
462        );
463        assert_eq!(
464            decode("able-acid-also-lava-zero-jade-need-echo-wolf", Style::Uri).unwrap_err(),
465            DecodeError::InvalidChecksum {
466                expected: [107, 155, 51, 243],
467                calculated: [108, 246, 247, 201]
468            }
469        );
470        assert_eq!(
471            decode("aeadaolazojendeowf", Style::Minimal).unwrap_err(),
472            DecodeError::InvalidChecksum {
473                expected: [107, 155, 51, 243],
474                calculated: [108, 246, 247, 201]
475            }
476        );
477
478        // too short
479        assert_eq!(
480            decode("wolf", Style::Standard).unwrap_err(),
481            DecodeError::ChecksumNotPresent
482        );
483        assert_eq!(
484            decode("", Style::Standard).unwrap_err(),
485            DecodeError::ChecksumNotPresent
486        );
487
488        // invalid length
489        assert_eq!(
490            decode("aea", Style::Minimal).unwrap_err(),
491            DecodeError::InvalidLength
492        );
493
494        // non ASCII
495        assert_eq!(
496            decode("₿", Style::Standard).unwrap_err(),
497            DecodeError::NonAscii
498        );
499        assert_eq!(decode("₿", Style::Uri).unwrap_err(), DecodeError::NonAscii);
500        assert_eq!(
501            decode("₿", Style::Minimal).unwrap_err(),
502            DecodeError::NonAscii
503        );
504    }
505
506    #[test]
507    fn test_encoding() {
508        let input: [u8; 100] = [
509            245, 215, 20, 198, 241, 235, 69, 59, 209, 205, 165, 18, 150, 158, 116, 135, 229, 212,
510            19, 159, 17, 37, 239, 240, 253, 11, 109, 191, 37, 242, 38, 120, 223, 41, 156, 189, 242,
511            254, 147, 204, 66, 163, 216, 175, 191, 72, 169, 54, 32, 60, 144, 230, 210, 137, 184,
512            197, 33, 113, 88, 14, 157, 31, 177, 46, 1, 115, 205, 69, 225, 150, 65, 235, 58, 144,
513            65, 240, 133, 69, 113, 247, 63, 53, 242, 165, 160, 144, 26, 13, 79, 237, 133, 71, 82,
514            69, 254, 165, 138, 41, 85, 24,
515        ];
516
517        let encoded = "yank toys bulb skew when warm free fair tent swan \
518                       open brag mint noon jury list view tiny brew note \
519                       body data webs what zinc bald join runs data whiz \
520                       days keys user diet news ruby whiz zone menu surf \
521                       flew omit trip pose runs fund part even crux fern \
522                       math visa tied loud redo silk curl jugs hard beta \
523                       next cost puma drum acid junk swan free very mint \
524                       flap warm fact math flap what limp free jugs yell \
525                       fish epic whiz open numb math city belt glow wave \
526                       limp fuel grim free zone open love diet gyro cats \
527                       fizz holy city puff";
528
529        let encoded_minimal = "yktsbbswwnwmfefrttsnonbgmtnnjyltvwtybwne\
530                                    bydawswtzcbdjnrsdawzdsksurdtnsrywzzemusf\
531                                    fwottppersfdptencxfnmhvatdldroskcljshdba\
532                                    ntctpadmadjksnfevymtfpwmftmhfpwtlpfejsyl\
533                                    fhecwzonnbmhcybtgwwelpflgmfezeonledtgocs\
534                                    fzhycypf";
535
536        assert_eq!(decode(encoded, Style::Standard).unwrap(), input.to_vec());
537        assert_eq!(
538            decode(encoded_minimal, Style::Minimal).unwrap(),
539            input.to_vec()
540        );
541        assert_eq!(encode(&input, Style::Standard), encoded);
542        assert_eq!(encode(&input, Style::Minimal), encoded_minimal);
543    }
544}