base_d/encoders/algorithms/
word_alternating.rs

1//! Alternating word-based encoding for PGP biometric word lists.
2//!
3//! Unlike standard word encoding which uses radix conversion, this encoder provides
4//! a direct 1:1 mapping where each byte is encoded as a single word, with the
5//! dictionary selection alternating based on byte position.
6//!
7//! This is specifically designed for PGP biometric word lists where:
8//! - Each byte (0-255) maps to exactly one word
9//! - Even byte positions use one dictionary (e.g., "even" words)
10//! - Odd byte positions use another dictionary (e.g., "odd" words)
11//!
12//! # Example
13//!
14//! ```
15//! use base_d::{WordDictionary, AlternatingWordDictionary, word_alternating};
16//!
17//! // Create dictionaries with 256 words each
18//! let even_words: Vec<String> = (0..256).map(|i| format!("even{}", i)).collect();
19//! let odd_words: Vec<String> = (0..256).map(|i| format!("odd{}", i)).collect();
20//!
21//! let even = WordDictionary::builder()
22//!     .words(even_words)
23//!     .build()
24//!     .unwrap();
25//!
26//! let odd = WordDictionary::builder()
27//!     .words(odd_words)
28//!     .build()
29//!     .unwrap();
30//!
31//! let dict = AlternatingWordDictionary::new(
32//!     vec![even, odd],
33//!     "-".to_string(),
34//! );
35//!
36//! let data = vec![0x42, 0xAB];
37//! let encoded = word_alternating::encode(&data, &dict).unwrap();
38//! // "even66-odd171" (0x42 = 66, 0xAB = 171)
39//!
40//! let decoded = word_alternating::decode(&encoded, &dict).unwrap();
41//! assert_eq!(decoded, data);
42//! ```
43
44use super::errors::DecodeError;
45use crate::core::alternating_dictionary::AlternatingWordDictionary;
46
47/// Encodes binary data using alternating word dictionaries.
48///
49/// Each byte is encoded as a single word, with the dictionary selection
50/// alternating based on byte position.
51///
52/// # Parameters
53///
54/// - `data`: The binary data to encode
55/// - `dictionary`: The alternating word dictionary to use
56///
57/// # Returns
58///
59/// A string with words joined by the dictionary's delimiter, or an error
60/// if any byte cannot be encoded (e.g., byte value exceeds dictionary size).
61///
62/// # Errors
63///
64/// Returns `DecodeError::InvalidCharacter` if a byte value exceeds the
65/// dictionary size at that position.
66///
67/// # Example
68///
69/// ```
70/// use base_d::{WordDictionary, AlternatingWordDictionary, word_alternating};
71///
72/// let even_words: Vec<String> = (0..256).map(|i| format!("e{}", i)).collect();
73/// let odd_words: Vec<String> = (0..256).map(|i| format!("o{}", i)).collect();
74///
75/// let even = WordDictionary::builder().words(even_words).build().unwrap();
76/// let odd = WordDictionary::builder().words(odd_words).build().unwrap();
77///
78/// let dict = AlternatingWordDictionary::new(vec![even, odd], " ".to_string());
79///
80/// let data = vec![0x00, 0x01, 0x02];
81/// let encoded = word_alternating::encode(&data, &dict).unwrap();
82/// assert_eq!(encoded, "e0 o1 e2");
83/// ```
84pub fn encode(data: &[u8], dictionary: &AlternatingWordDictionary) -> Result<String, DecodeError> {
85    if data.is_empty() {
86        return Ok(String::new());
87    }
88
89    let mut words: Vec<&str> = Vec::with_capacity(data.len());
90
91    for (pos, &byte) in data.iter().enumerate() {
92        let word =
93            dictionary
94                .encode_byte(byte, pos)
95                .ok_or_else(|| DecodeError::InvalidCharacter {
96                    char: byte as char,
97                    position: pos,
98                    input: format!("byte {} at position {}", byte, pos),
99                    valid_chars: "bytes 0-255".to_string(),
100                })?;
101        words.push(word);
102    }
103
104    Ok(words.join(dictionary.delimiter()))
105}
106
107/// Decodes an alternating word sequence back to binary data.
108///
109/// Splits the input on the dictionary's delimiter and decodes each word
110/// using the appropriate dictionary for that position.
111///
112/// # Parameters
113///
114/// - `encoded`: The encoded word sequence
115/// - `dictionary`: The alternating word dictionary to use
116///
117/// # Returns
118///
119/// The decoded binary data, or a DecodeError if decoding fails.
120///
121/// # Errors
122///
123/// Returns `DecodeError::InvalidCharacter` if:
124/// - A word is not found in the appropriate dictionary for its position
125///
126/// # Example
127///
128/// ```
129/// use base_d::{WordDictionary, AlternatingWordDictionary, word_alternating};
130///
131/// let even_words: Vec<String> = (0..256).map(|i| format!("e{}", i)).collect();
132/// let odd_words: Vec<String> = (0..256).map(|i| format!("o{}", i)).collect();
133///
134/// let even = WordDictionary::builder().words(even_words).build().unwrap();
135/// let odd = WordDictionary::builder().words(odd_words).build().unwrap();
136///
137/// let dict = AlternatingWordDictionary::new(vec![even, odd], " ".to_string());
138///
139/// let encoded = "e0 o1 e2";
140/// let decoded = word_alternating::decode(encoded, &dict).unwrap();
141/// assert_eq!(decoded, vec![0x00, 0x01, 0x02]);
142/// ```
143pub fn decode(
144    encoded: &str,
145    dictionary: &AlternatingWordDictionary,
146) -> Result<Vec<u8>, DecodeError> {
147    if encoded.is_empty() {
148        return Ok(Vec::new());
149    }
150
151    let delimiter = dictionary.delimiter();
152    let words: Vec<&str> = if delimiter.is_empty() {
153        vec![encoded]
154    } else {
155        encoded.split(delimiter).collect()
156    };
157
158    let mut result = Vec::with_capacity(words.len());
159
160    for (pos, word) in words.iter().enumerate() {
161        let byte =
162            dictionary
163                .decode_word(word.trim(), pos)
164                .ok_or_else(|| DecodeError::InvalidWord {
165                    word: word.to_string(),
166                    position: pos,
167                    input: encoded.to_string(),
168                })?;
169        result.push(byte);
170    }
171
172    Ok(result)
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178    use crate::WordDictionary;
179
180    fn create_full_dictionaries() -> AlternatingWordDictionary {
181        // Create dictionaries with 256 words each (full byte range)
182        let even_words: Vec<String> = (0..256).map(|i| format!("even{}", i)).collect();
183        let odd_words: Vec<String> = (0..256).map(|i| format!("odd{}", i)).collect();
184
185        let even = WordDictionary::builder().words(even_words).build().unwrap();
186
187        let odd = WordDictionary::builder().words(odd_words).build().unwrap();
188
189        AlternatingWordDictionary::new(vec![even, odd], "-".to_string())
190    }
191
192    fn create_small_dictionaries() -> AlternatingWordDictionary {
193        // Use named words for first few entries to make tests readable
194        let mut even_words: Vec<String> = vec![
195            "aardvark".to_string(),
196            "absurd".to_string(),
197            "accrue".to_string(),
198            "acme".to_string(),
199        ];
200        // Fill remaining entries to reach 256
201        even_words.extend((even_words.len()..256).map(|i| format!("even{}", i)));
202
203        let mut odd_words: Vec<String> = vec![
204            "adroitness".to_string(),
205            "adviser".to_string(),
206            "aftermath".to_string(),
207            "aggregate".to_string(),
208        ];
209        // Fill remaining entries to reach 256
210        odd_words.extend((odd_words.len()..256).map(|i| format!("odd{}", i)));
211
212        let even = WordDictionary::builder().words(even_words).build().unwrap();
213
214        let odd = WordDictionary::builder().words(odd_words).build().unwrap();
215
216        AlternatingWordDictionary::new(vec![even, odd], "-".to_string())
217    }
218
219    #[test]
220    fn test_encode_empty() {
221        let dict = create_full_dictionaries();
222        assert_eq!(encode(&[], &dict).unwrap(), "");
223    }
224
225    #[test]
226    fn test_encode_single_byte() {
227        let dict = create_full_dictionaries();
228        let data = vec![0x42];
229        let encoded = encode(&data, &dict).unwrap();
230        assert_eq!(encoded, "even66"); // 0x42 = 66
231    }
232
233    #[test]
234    fn test_encode_two_bytes() {
235        let dict = create_full_dictionaries();
236        let data = vec![0x42, 0xAB];
237        let encoded = encode(&data, &dict).unwrap();
238        assert_eq!(encoded, "even66-odd171"); // 0x42 = 66, 0xAB = 171
239    }
240
241    #[test]
242    fn test_encode_decode_roundtrip() {
243        let dict = create_full_dictionaries();
244        let data = vec![0x00, 0x01, 0x42, 0xAB, 0xFF];
245        let encoded = encode(&data, &dict).unwrap();
246        let decoded = decode(&encoded, &dict).unwrap();
247        assert_eq!(decoded, data);
248    }
249
250    #[test]
251    fn test_decode_empty() {
252        let dict = create_full_dictionaries();
253        let decoded = decode("", &dict).unwrap();
254        assert_eq!(decoded, Vec::<u8>::new());
255    }
256
257    #[test]
258    fn test_decode_single_word() {
259        let dict = create_full_dictionaries();
260        let decoded = decode("even66", &dict).unwrap();
261        assert_eq!(decoded, vec![0x42]);
262    }
263
264    #[test]
265    fn test_decode_multiple_words() {
266        let dict = create_full_dictionaries();
267        let decoded = decode("even66-odd171", &dict).unwrap();
268        assert_eq!(decoded, vec![0x42, 0xAB]);
269    }
270
271    #[test]
272    fn test_decode_case_insensitive() {
273        let dict = create_small_dictionaries();
274        let data = vec![0, 1];
275        let encoded = encode(&data, &dict).unwrap();
276
277        // Should decode regardless of case
278        let decoded_upper = decode(&encoded.to_uppercase(), &dict).unwrap();
279        let decoded_lower = decode(&encoded.to_lowercase(), &dict).unwrap();
280        assert_eq!(decoded_upper, data);
281        assert_eq!(decoded_lower, data);
282    }
283
284    #[test]
285    fn test_decode_unknown_word() {
286        let dict = create_full_dictionaries();
287        let result = decode("even0-unknown-even2", &dict);
288        assert!(result.is_err());
289        assert!(matches!(result, Err(DecodeError::InvalidWord { .. })));
290    }
291
292    #[test]
293    fn test_decode_wrong_dictionary_for_position() {
294        let dict = create_small_dictionaries();
295        // "adroitness" is an odd word, but position 0 expects even
296        let result = decode("adroitness-absurd", &dict);
297        assert!(result.is_err());
298    }
299
300    #[test]
301    fn test_alternating_pattern() {
302        let dict = create_small_dictionaries();
303        let data = vec![0, 1, 2, 3];
304        let encoded = encode(&data, &dict).unwrap();
305
306        // Position 0 (even): aardvark (0)
307        // Position 1 (odd): adviser (1)
308        // Position 2 (even): accrue (2)
309        // Position 3 (odd): aggregate (3)
310        assert_eq!(encoded, "aardvark-adviser-accrue-aggregate");
311
312        let decoded = decode(&encoded, &dict).unwrap();
313        assert_eq!(decoded, data);
314    }
315
316    #[test]
317    fn test_custom_delimiter() {
318        let even = WordDictionary::builder()
319            .words((0..256).map(|i| format!("e{}", i)).collect::<Vec<_>>())
320            .build()
321            .unwrap();
322
323        let odd = WordDictionary::builder()
324            .words((0..256).map(|i| format!("o{}", i)).collect::<Vec<_>>())
325            .build()
326            .unwrap();
327
328        let dict = AlternatingWordDictionary::new(vec![even, odd], " ".to_string());
329
330        let data = vec![0, 1, 2];
331        let encoded = encode(&data, &dict).unwrap();
332        assert_eq!(encoded, "e0 o1 e2");
333
334        let decoded = decode(&encoded, &dict).unwrap();
335        assert_eq!(decoded, data);
336    }
337
338    #[test]
339    fn test_whitespace_handling() {
340        let dict = create_small_dictionaries();
341        // Decode should trim whitespace from words
342        let decoded = decode("  aardvark  -  adviser  ", &dict).unwrap();
343        assert_eq!(decoded, vec![0, 1]);
344    }
345
346    #[test]
347    fn test_encode_all_bytes() {
348        let dict = create_full_dictionaries();
349        // Test encoding all possible byte values
350        let data: Vec<u8> = (0..=255).collect();
351        let encoded = encode(&data, &dict).unwrap();
352        let decoded = decode(&encoded, &dict).unwrap();
353        assert_eq!(decoded, data);
354    }
355
356    #[test]
357    fn test_pgp_wordlists_roundtrip() {
358        use crate::wordlists;
359
360        // Load the real PGP wordlists
361        let pgp_even = wordlists::pgp_even();
362        let pgp_odd = wordlists::pgp_odd();
363
364        // Create alternating dictionary
365        let dict = AlternatingWordDictionary::new(vec![pgp_even, pgp_odd], "-".to_string());
366
367        // Test encoding all possible byte values (0-255)
368        let all_bytes: Vec<u8> = (0..=255).collect();
369        let encoded = encode(&all_bytes, &dict).unwrap();
370
371        // Decode and verify roundtrip
372        let decoded = decode(&encoded, &dict).unwrap();
373        assert_eq!(decoded, all_bytes);
374
375        // Test a specific pattern
376        let test_data = vec![0x42, 0xAB, 0xCD, 0xEF];
377        let encoded_test = encode(&test_data, &dict).unwrap();
378        let decoded_test = decode(&encoded_test, &dict).unwrap();
379        assert_eq!(decoded_test, test_data);
380    }
381
382    #[test]
383    fn test_pgp_wordlists_have_256_words() {
384        use crate::wordlists;
385
386        let pgp_even = wordlists::pgp_even();
387        let pgp_odd = wordlists::pgp_odd();
388
389        // Verify both dictionaries have exactly 256 words
390        assert_eq!(pgp_even.base(), 256);
391        assert_eq!(pgp_odd.base(), 256);
392    }
393}