use crate::codecs::balkanoid::Reduction;
use crate::succinct::{CowStr, Errorlike};
use bincode::config;
pub use bincode::error::{DecodeError, EncodeError};
use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::io;
use std::io::{Read, Write};
pub type OverflowError = Errorlike<CowStr>;
#[derive(Default, Debug, bincode::Encode, bincode::Decode, PartialEq, Eq)]
pub struct WordVec(Vec<String>);
impl WordVec {
pub fn new(
words: impl IntoIterator<Item = impl Into<String>>,
) -> Result<WordVec, OverflowError> {
let mut vec = WordVec::default();
for item in words {
vec.push(item)?;
}
Ok(vec)
}
pub fn push(&mut self, word: impl Into<String>) -> Result<(), OverflowError> {
if self.0.len() == u8::MAX as usize {
Err(OverflowError::borrowed("too many words in vector"))
} else {
let word = word.into();
let position = self
.0
.binary_search_by(|existing| comparator(existing, &word));
match position {
Ok(_) => Ok(()),
Err(position) => {
self.0.insert(position, word);
Ok(())
}
}
}
}
}
impl AsRef<Vec<String>> for WordVec {
fn as_ref(&self) -> &Vec<String> {
&self.0
}
}
impl From<WordVec> for Vec<String> {
fn from(vec: WordVec) -> Self {
vec.0
}
}
#[derive(Default, Debug, bincode::Encode, bincode::Decode, PartialEq, Eq)]
pub struct Dict {
entries: HashMap<String, WordVec>,
}
pub type WordResolveError = Errorlike<CowStr>;
fn comparator(lhs: &String, rhs: &String) -> Ordering {
lhs.len().cmp(&rhs.len()).then(lhs.cmp(rhs))
}
impl From<HashMap<String, WordVec>> for Dict {
fn from(entries: HashMap<String, WordVec>) -> Self {
Self { entries }
}
}
#[derive(Debug)]
pub enum ReadFromTextFileError {
Io(io::Error),
DictOverflow(OverflowError),
}
impl From<io::Error> for ReadFromTextFileError {
fn from(error: io::Error) -> Self {
Self::Io(error)
}
}
impl From<OverflowError> for ReadFromTextFileError {
fn from(error: OverflowError) -> Self {
Self::DictOverflow(error)
}
}
impl Dict {
pub fn populate(
&mut self,
words: impl IntoIterator<Item = String>,
) -> Result<(), OverflowError> {
for word in words {
let reduction = Reduction::from(&word as &str).take_if_lowercase();
if let Some(Reduction { fingerprint, .. }) = reduction {
if !fingerprint.is_empty() {
let mapped_words = match self.entries.entry(fingerprint) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => entry.insert(WordVec::default()),
};
mapped_words.push(word)?;
}
}
}
Ok(())
}
pub fn count(&self) -> usize {
self.entries.values().map(AsRef::as_ref).map(Vec::len).sum()
}
pub(crate) fn resolve(
&self,
fingerprint: &str,
position: u8,
) -> Result<Option<&String>, WordResolveError> {
match self.entries.get(fingerprint) {
None => Ok(None),
Some(entry) => match entry.as_ref().get(position as usize) {
None => Err(Errorlike::owned(format!(
"no dictionary word at position {position} for fingerprint '{fingerprint}'"
))),
Some(word) => Ok(Some(word)),
},
}
}
pub(crate) fn position(&self, fingerprint: &str, word: &str) -> Option<u8> {
match self.entries.get(fingerprint) {
None => None,
Some(entry) => entry
.as_ref()
.iter()
.position(|existing| existing == word)
.map(|pos| u8::try_from(pos).unwrap()), }
}
pub(crate) fn contains_fingerprint(&self, fingerprint: &str) -> bool {
self.entries.contains_key(fingerprint)
}
pub fn write_to_binary_image(&self, w: &mut impl Write) -> Result<usize, EncodeError> {
bincode::encode_into_std_write(self, w, config::standard())
}
pub fn read_from_binary_image(r: &mut impl Read) -> Result<Dict, DecodeError> {
bincode::decode_from_std_read(r, config::standard())
}
pub fn read_from_text_file(r: &mut impl Read) -> Result<Dict, ReadFromTextFileError> {
let mut buf = String::new();
r.read_to_string(&mut buf)?;
let mut dict = Dict::default();
for line in buf.lines() {
let line = line.split_whitespace();
dict.populate(line.map(ToOwned::to_owned))?;
}
Ok(dict)
}
}
#[cfg(test)]
pub(in crate::codecs::balkanoid) mod tests;