use super::string_parts;
use crate::tokens::{Token, Tokenizer};
use anyhow::Result;
use std::convert::{From, Into};
use std::fmt;
use std::hash::Hash;
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct Grapheme(String);
impl From<String> for Grapheme {
fn from(s: String) -> Self {
Self(s)
}
}
impl Into<String> for Grapheme {
fn into(self) -> String {
self.0
}
}
impl std::fmt::Display for Grapheme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl Token for Grapheme {
type Tokenizer = GraphemeTokenizer;
type Packer = GraphemePacker;
fn bit_count(&self) -> usize {
self.0.len() * 8
}
}
pub struct GraphemeTokenizer;
impl Tokenizer for GraphemeTokenizer {
type T = Grapheme;
type Iter<R: std::io::Read> = GraphemeIter;
fn tokenize<R: std::io::Read>(r: R) -> Result<Self::Iter<R>> {
GraphemeIter::new(r)
}
}
pub type GraphemeIter = string_parts::StringPartsIter<Grapheme>;
pub type GraphemePacker = string_parts::StringPartsPacker<Grapheme>;
#[cfg(test)]
mod tests {
use super::*;
use crate::tokens::TokenPacker;
use std::io::Cursor;
const TEXT: &str = "
Ah! well a-day! what evil looks
Had I from old and young!
Instead of the cross, the Albatross
About my neck was hung.
";
#[test]
fn roundtrip() {
let mut r = Cursor::new(TEXT);
let d = GraphemeTokenizer::tokenize(&mut r).unwrap();
let i = d.map(|t| t.unwrap());
let mut wc: Cursor<Vec<u8>> = Cursor::new(vec![]);
GraphemePacker::pack(i, &mut wc).unwrap();
let got = std::str::from_utf8(&wc.get_ref()[..]).unwrap();
assert_eq!(got, TEXT);
}
}