use super::WordlistSubset;
#[derive(Clone, Debug)]
pub struct EffDecode<I: Iterator> {
iter: I,
candidate_wl_subsets_remaining: Vec<WordlistSubset<'static>>,
prev_match_len: usize,
curr_match_len: usize,
}
impl<I> Iterator for EffDecode<I>
where
I: Iterator<Item = Result<char, std::io::Error>>,
{
type Item = Result<u8, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
for word_byte in self.iter.by_ref() {
let Ok(word_char) = word_byte else { return Some(Err(word_byte.unwrap_err())) };
let word_chars: Vec<_> = word_char.to_lowercase().collect();
if word_chars == [' '] || word_chars == ['\n'] || word_chars == ['\r'] {
continue;
}
self.curr_match_len += word_chars.len();
let first_subset_remaining = self
.candidate_wl_subsets_remaining
.partition_point(|wl| wl.word_len < self.curr_match_len);
self.candidate_wl_subsets_remaining =
self.candidate_wl_subsets_remaining[first_subset_remaining..].to_owned();
for subset in self.candidate_wl_subsets_remaining.iter_mut() {
for (i, word_char) in word_chars.iter().enumerate() {
let subset_words_idx_low = subset.words.partition_point(|entry| {
entry.word.chars().nth(self.prev_match_len + i).unwrap() < *word_char
});
subset.words = &subset.words[subset_words_idx_low..];
}
let subset_words_idx_high = subset.words.partition_point(|entry| {
let word_remainder_to_match = &entry.word[self.prev_match_len..];
word_remainder_to_match.starts_with(&*word_chars)
});
subset.words = &subset.words[..subset_words_idx_high];
}
self.candidate_wl_subsets_remaining = self
.candidate_wl_subsets_remaining
.clone()
.into_iter()
.filter(|wl| !wl.words.is_empty())
.collect();
self.prev_match_len = self.curr_match_len;
if self.candidate_wl_subsets_remaining.is_empty() {
return Some(Err(std::io::Error::from(std::io::ErrorKind::InvalidData)));
}
if self.candidate_wl_subsets_remaining.len() == 1 {
if self.candidate_wl_subsets_remaining[0].words.len() == 1
&& self.curr_match_len == self.candidate_wl_subsets_remaining[0].word_len
{
let ret_byte = self.candidate_wl_subsets_remaining[0].words[0].byte;
self.candidate_wl_subsets_remaining = super::WL_EFF_DECODE.to_vec();
self.prev_match_len = 0;
self.curr_match_len = 0;
return Some(Ok(ret_byte));
}
}
}
None
}
}
impl<I: Iterator<Item = Result<char, E>>, E> crate::Decode<I, EffDecode<I>> for I {
fn decode(self) -> EffDecode<I> {
EffDecode {
iter: self,
candidate_wl_subsets_remaining: super::WL_EFF_DECODE.to_vec(),
prev_match_len: 0,
curr_match_len: 0,
}
}
}
#[cfg(test)]
mod test_cases_decode {
use super::super::Decode;
use super::EffDecode;
use std::fs::File;
use std::io::{BufReader, Cursor};
use std::path::Path;
use test_case::test_case;
use utf8_chars::BufReadCharsExt;
#[test_case("acuteness acuteness acuteness "; "words spaced")]
#[test_case("acute ness a cute ness acuten ess "; "words extra space")]
#[test_case("acutenessacutenessacuteness"; "words mushed")]
#[test_case("acuteness acuteness \nacuteness "; "words spaced wrapped")]
#[test_case("acutenessacut\nenessacuteness"; "words mushed wrapped")]
#[test_case("ACUTENESS ACUTENESS ACUTENESS "; "words spaced uppercase")]
#[test_case("Acuteness ACUTEness acuteNESS "; "words spaced mixed-case")]
fn test_positive_eff_decoder_0x05_0x05_0x05(words: &str) {
let mut cursor = Cursor::new(words);
let words_chars = cursor.chars().into_iter();
let decoded_bytes = Decode::<_, EffDecode<_>>::decode(words_chars)
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(decoded_bytes, &[0x05u8; 3]);
}
#[test_case("id_ed25519.txt")]
#[test_case("id_ed25519-fold_w_78.txt")]
#[test_case("id_ed25519-fold_w_78_s.txt")]
#[test_case("id_ed25519-fold_w_78_s-trimmed.txt")]
fn test_positive_eff_decoder_sample_data_file_id_ed25519<P: AsRef<Path>>(fpath_encoded: P) {
let fpath_original_id_ed25519 = "sample_data/original/id_ed25519";
let expected_bytes = std::fs::read(fpath_original_id_ed25519).unwrap();
let fpath_encoded = Path::new("sample_data/encoded/eff").join(fpath_encoded);
let mut input_encoded = BufReader::new(File::open(fpath_encoded).unwrap());
let decoded_bytes = Decode::<_, EffDecode<_>>::decode(input_encoded.chars())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(decoded_bytes, expected_bytes);
}
#[test_case("id_ed25519-fold_w_78_s.txt")]
#[test_case("id_ed25519-fold_w_78_s-trimmed.txt")]
fn test_negative_eff_decoder_sample_data_file_id_ed25519<P: AsRef<Path>>(fpath_encoded: P) {
let fpath_encoded = Path::new("sample_data/encoded_corrupted/eff").join(fpath_encoded);
let mut input_encoded = BufReader::new(File::open(fpath_encoded).unwrap());
let decoded_bytes =
Decode::<_, EffDecode<_>>::decode(input_encoded.chars()).collect::<Result<Vec<_>, _>>();
assert_eq!(
decoded_bytes.unwrap_err().kind(),
std::io::ErrorKind::InvalidData
);
}
}