Skip to main content

mnumonic/
lib.rs

1/// Return the list of words used for doing conversion. The words are
2/// read from a source file at compile time. The word list must have
3/// exactly 256 words, all in lowercase, and sorted alphabetically so
4/// that the list can be binary-searched. Care should be taken to avoid
5/// words that are commonly mis-spelled or that may have multiple valid
6/// spellings.
7fn words() -> Vec<&'static str> {
8    #[cfg(feature = "en")]
9    let list = include_str!("../words/en.txt");
10
11    let words : Vec<&'static str> = list.lines().collect();
12    assert_eq!(words.len(), 256);
13    // assert!(words.is_sorted()); // nightly only for now, see https://github.com/rust-lang/rust/issues/53485
14    for word in &words {
15        assert!(word.len() < 7);
16    }
17    words
18}
19
20/// Encode an array of bytes into a corresponding vector of human readable words.
21/// The output vector will be the same length as the input vector.
22pub fn encode_bytes(bytes: &[u8]) -> Vec<&'static str> {
23    let words = words();
24    let mut result = Vec::new();
25    for b in bytes {
26        result.push(words[*b as usize]);
27    }
28    result
29}
30
31/// Encode a u32 into a vector of words. The output vector will have at least one
32/// entry, and no more than four entries; each entry is a single human-language word.
33/// Each byte of the u32 is converted to a word, with leading zero bytes dropped.
34pub fn encode_u32(num: u32) -> Vec<&'static str> {
35    let mut bytes = vec![
36        ((num >> 24) & 0xFF) as u8,
37        ((num >> 16) & 0xFF) as u8,
38        ((num >> 8) & 0xFF) as u8,
39        (num & 0xFF) as u8,
40    ];
41    // Drop leading zeros
42    while bytes.len() > 1 {
43        if bytes[0] == 0 {
44            bytes.remove(0);
45        } else {
46            break;
47        }
48    }
49    encode_bytes(&bytes)
50}
51
52/// Same as encode_u32, except this also joins the individual words into a
53/// phrase. The words are joined using an ascii space character as delimiter.
54pub fn encode_u32_joined(num: u32) -> String {
55    encode_u32(num).join(" ")
56}
57
58/// Decode a vector of words (as produced by `encode_bytes`) back into the
59/// vector of bytes that produced them. In case of error (e.g. the provided
60/// vector has an invalid word), the index of the invalid entry is returned
61/// as an `Err`. Note that the words are lowercased per Unicode rules before
62/// decoding. This allows for scenarios where the human transcribing the
63/// words mixes up the case of the words, or some other system's autocorrect
64/// mutates the case.
65pub fn decode_bytes(encoded_words: &[&str]) -> Result<Vec<u8>, usize> {
66    let mut result = Vec::new();
67    let words = words();
68    for (i, encoded) in encoded_words.iter().enumerate() {
69        let ix = words.binary_search(&encoded.to_lowercase().as_str()).map_err(|_| i)?;
70        result.push(ix as u8);
71    }
72    Ok(result)
73}
74
75/// Decode a vector of words (as produced by `encode_u32`) back into the
76/// u32 that produced them. If the provided word vector has more than 4
77/// words, then an `Err(4)` is produced; if any the words are invalid, then
78/// an `Err` is produced with the index of the bad entry in the input vector.
79pub fn decode_u32(encoded_words: &[&str]) -> Result<u32, usize> {
80    let bytes = decode_bytes(encoded_words)?;
81    if bytes.len() > 4 {
82        return Err(4);
83    }
84    let mut result : u32 = 0;
85    for b in bytes {
86        result = (result << 8) | (b as u32 & 0xFF);
87    }
88    Ok(result)
89}
90
91/// Decode a phrase (as produced by `encode_u32_joined`). Basically the
92/// same as `decode_u32` except it splits the phrase on whitespace to generate
93/// the words to decode.
94pub fn decode_u32_joined(joined: &str) -> Result<u32, usize> {
95    decode_u32(&joined.split_whitespace().collect::<Vec<&str>>())
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn test_encode() {
104        assert_eq!(encode_bytes(&[]), Vec::<&str>::new());
105        assert_eq!(encode_u32(0xDEADBEEF).join(" "), "table potato school true".to_string());
106        assert_eq!(encode_u32(1234).join(" "), "ant stamp".to_string());
107        assert_eq!(encode_u32(5).join(" "), "apple".to_string());
108        assert_eq!(encode_u32(0).join(" "), "able".to_string());
109    }
110
111    #[test]
112    fn test_decode() {
113        assert_eq!(decode_bytes(&[]).unwrap(), vec![]);
114        assert_eq!(decode_bytes(&["taBLE", "potato", "school", "TRUE"]).unwrap(), vec![0xDE, 0xAD, 0xBE, 0xEF]);
115        assert_eq!(decode_u32(&["ant", "stamp"]).unwrap(), 1234);
116        assert_eq!(decode_u32_joined("table potato school true").unwrap(), 0xDEADBEEF);
117    }
118
119    #[test]
120    fn test_decode_failure() {
121        assert_eq!(decode_bytes(&["nonsense"]), Err(0));
122        assert_eq!(decode_u32(&["table", "potato", "school", "true", "true"]), Err(4));
123    }
124}