use super::string_parts;
use crate::tokens::{Token, Tokenizer};
use anyhow::Result;
use std::convert::{From, Into};
use std::fmt;
use std::hash::Hash;
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Word(String);
impl From<String> for Word {
fn from(s: String) -> Self {
Self(s)
}
}
impl Into<String> for Word {
fn into(self) -> String {
self.0
}
}
impl std::fmt::Display for Word {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl Token for Word {
type Tokenizer = WordTokenizer;
type Packer = WordPacker;
fn bit_count(&self) -> usize {
self.0.len() * 8
}
}
pub struct WordTokenizer;
impl Tokenizer for WordTokenizer {
type T = Word;
type Iter<R: std::io::Read> = WordIter;
fn tokenize<R: std::io::Read>(r: R) -> Result<Self::Iter<R>> {
WordIter::new(r)
}
}
pub type WordIter = string_parts::StringPartsIter<Word>;
pub type WordPacker = string_parts::StringPartsPacker<Word>;
#[cfg(test)]
mod tests {
use super::*;
use crate::tokens::TokenPacker;
use std::io::Cursor;
const TEXT: &str = "
Ah! well a-day! what evil looks
Had I from old and young!
Instead of the cross, the Albatross
About my neck was hung.
";
#[test]
fn roundtrip() {
let mut r = Cursor::new(TEXT);
let d = WordTokenizer::tokenize(&mut r).unwrap();
let i = d.map(|i| match i {
Err(e) => panic!("{}", e),
Ok(b) => b,
});
let mut wc: Cursor<Vec<u8>> = Cursor::new(vec![]);
WordPacker::pack(i, &mut wc).unwrap();
let got = std::str::from_utf8(&wc.get_ref()[..]).unwrap();
assert_eq!(got, TEXT);
}
}