1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#![forbid(unsafe_code)]
#![deny(missing_docs)]
#![deny(unused_must_use)]
#![deny(unused_mut)]

//! mnemonic-16bit is a mnemonic library that will take any binary data and convert it into a
//! phrase which is more human friendly. Each word of the phrase maps to 16 bits, where the first
//! 10 bits are represented by one word from the seed15 dictionary, and the remaining 6 bits are
//! represented by a number between 0 and 63. If the number is '64', that signifies that the word
//! only represents 1 byte instead of 2. Only the final word of a phrase may use the numerical
//! suffix 64.
//!
//! ```
//! use mnemonic_16bit::{binary_to_phrase, phrase_to_binary};
//!
//! fn main() {
//!     let my_data = [0u8; 2];
//!     let phrase = binary_to_phrase(&my_data); // "abbey0"
//!     let data = phrase_to_binary(&phrase).unwrap();
//!     assert!(data[..] == my_data[..]);
//! }
//! ```

use anyhow::{bail, Context, Error, Result};
use dictionary_1024::{word_at_index, index_of_word};

/// binary_to_phrase will convert a binary string to a phrase.
pub fn binary_to_phrase(data: &[u8]) -> String {
    // Base case, no data means no mnemonic.
    let mut phrase = "".to_string();
    if data.len() == 0 {
        return phrase;
    }

    // Parse out all of the even-numbered bytes.
    let mut i = 0;
    while i+1 < data.len() {
        // Determine the dictionary offset.
        let mut word_index = data[i] as u16;
        word_index *= 4;
        let word_bits = data[i+1] / 64;
        word_index += word_bits as u16;
        let word = word_at_index(word_index as usize);

        // Determine the accompanying number.
        let num = data[i+1] % 64;

        // Compose the word into the phrase.
        if phrase.len() != 0 {
            phrase += " ";
        }
        phrase += &word;
        phrase += &format!("{}", num);
        i += 2;
    }

    // Parse out the final word.
    if data.len() % 2 == 1 {
        let word = word_at_index(data[i] as usize);
        if phrase.len() != 0 {
            phrase += " ";
        }
        phrase += &word;
        phrase += "64";
    }

    phrase
}

/// phrase_to_binary is the inverse of binary_to_phrase, it will take a mnonmic-16bit phrase and
/// parse it into a set of bytes.
pub fn phrase_to_binary(phrase: &str) -> Result<Vec<u8>, Error> {
    if phrase == "" {
        return Ok(vec![0u8; 0]);
    }

    // Parse the words one at a time.
    let mut finalized = false;
    let mut result: Vec<u8> = Vec::new();
    let words = phrase.split(" ");
    for word in words {
        if finalized {
            bail!("only the last word may contain the number '64'");
        }

        // Make sure there are only numeric characters at the end of the string.
        let mut digits = 0;
        for c in word.chars() {
            if digits > 0 && !c.is_ascii_digit() {
                bail!("number must appear as suffix only");
            }
            if digits > 1 {
                bail!("number must be at most 2 digits");
            }
            if c.is_ascii_digit() {
                digits += 1;
            }
        }
        if digits == 0 {
            bail!("word must have a numerical suffix");
        }

        // We have validated the word, now we need to parse the bytes. We start with the numerical
        // suffix because that indicates whether we are pulling 8 bits from the word or 10.
        let numerical_suffix;
        if digits == 1 {
            numerical_suffix = &word[word.len()-1..];
        } else {
            numerical_suffix = &word[word.len()-2..];
        }

        // Parse the rest of the data based on whether the final digit is 64 or less.
        if numerical_suffix == "64" {
            finalized = true;
            let word_index = index_of_word(word).context(format!("invalid word {} in phrase", word))?;
            if word_index > 255 {
                bail!("final word is invalid, needs to be among the first 255 words in the dictionary");
            }
            result.push(word_index as u8);
        } else {
            let mut bits = index_of_word(word).context(format!("invalid word {} in phrase", word))? as u16;
            bits *= 64;
            let numerical_bits: u16 = numerical_suffix.parse().unwrap();
            if numerical_bits > 64 {
                bail!("numerical suffix must have a value [0, 64]");
            }
            bits += numerical_bits;
            result.push((bits / 256) as u8);
            result.push((bits % 256) as u8);
        }
    }

    Ok(result)
}

#[cfg(test)]
mod tests {
    use super::*;
    use userspace_rng::Csprng;
    use rand_core::RngCore;

    #[test]
    // Try a bunch of binary arrays and see that they all correctly convert into phrases and then
    // back into the same binary.
    fn check_seed_phrases() {
        // Try empty array.
        let basic = [0u8; 0];
        let phrase = binary_to_phrase(&basic);
        let result = phrase_to_binary(&phrase).unwrap();
        assert!(basic[..] == result[..]);

        // Try all possible 1 byte values.
        for i in 0..=255 {
            let basic = [i as u8; 1];
            let phrase = binary_to_phrase(&basic);
            let result = phrase_to_binary(&phrase).unwrap();
            assert!(basic[..] == result[..]);
        }

        // Try zero values for all possible array sizes 0-255.
        for i in 0..=255 {
            let basic = vec![0u8; i];
            let phrase = binary_to_phrase(&basic);
            let result = phrase_to_binary(&phrase).unwrap();
            assert!(basic[..] == result[..]);
        }

        // Try random data for all array sizes 0-255, 8 variations each size.
        let mut rng = Csprng {};
        for _ in 0..8 {
            for i in 0..=255 {
                let mut basic = vec![0u8; i];
                rng.fill_bytes(&mut basic);
                let phrase = binary_to_phrase(&basic);
                let result = phrase_to_binary(&phrase).unwrap();
                assert!(basic[..] == result[..]);
            }
        }

        // Try all possible 2 byte values.
        for i in 0..=255 {
            for j in 0..=255 {
                let mut basic = [0u8; 2];
                basic[0] = i;
                basic[1] = j;
                let phrase = binary_to_phrase(&basic);
                let result = phrase_to_binary(&phrase).unwrap();
                assert!(basic[..] == result[..]);
            }
        }
    }

    #[test]
    // Check a variety of invalid phrases.
    fn check_bad_phrases() {
        phrase_to_binary("a").unwrap_err();
        phrase_to_binary("a64").unwrap_err();
        phrase_to_binary("abbey").unwrap_err();
        phrase_to_binary("abbey65").unwrap_err();
        phrase_to_binary("yacht64").unwrap_err();
        phrase_to_binary("sugar21 ab55 mob32").unwrap_err();
        phrase_to_binary("sugar21 toffee mob32").unwrap_err();

        // This one should work even though we trucated the words.
        phrase_to_binary("sug21 tof21 mob32").unwrap();
    }
}