use crate::{NUMBER_MAP, TAIL_NUMBER_MAP};
use std::io::{self, Read, Write};
use unicode_segmentation::UnicodeSegmentation;
pub fn decode(string: impl AsRef<str>) -> Option<Vec<u8>> {
let mut ret = vec![];
let mut remaining = 0u8;
let mut stage = 0x00u32;
let mut chars = string.as_ref().graphemes(false).peekable();
let mut residue = 0;
while let Some(c) = chars.next() {
residue = (residue + 11) % 8;
let (n_new_bits, new_bits) = match NUMBER_MAP.get(c) {
Some(&bits) => {
if chars.peek().is_none() {
(11 - residue, bits)
} else {
(11, bits)
}
}
None => match TAIL_NUMBER_MAP.get(c) {
Some(index) => {
let need = 8 - remaining;
if *index < (1 << need) {
(need, *index)
} else {
return None;
}
}
None => return None,
},
};
remaining += n_new_bits;
stage = (stage << n_new_bits) | u32::from(new_bits);
while remaining >= 8 {
remaining -= 8;
let byte = u8::try_from(stage >> remaining).expect("Decoding byte was higher than 255");
ret.push(byte);
stage &= (1 << remaining) - 1;
}
}
if remaining > 0 {
let byte =
u8::try_from(stage >> (8 - remaining)).expect("Decoding byte was higher than 255");
ret.push(byte);
}
Some(ret)
}
struct GraphemeReader<'a, R: Read> {
reader: &'a mut R,
buffer: Vec<u8>,
}
impl<'a, R: Read> GraphemeReader<'a, R> {
pub fn new(reader: &'a mut R) -> Self {
Self {
reader,
buffer: Vec::new(),
}
}
pub fn read_next_grapheme(&mut self) -> io::Result<Option<String>> {
let mut chunk = [0; 4];
loop {
if let Some(grapheme) = self.get_grapheme() {
let len = grapheme.len();
self.buffer = self.buffer.split_off(len);
return Ok(Some(grapheme));
}
let chunk_size = self.reader.read(&mut chunk)?;
if chunk_size == 0 {
break;
}
self.buffer.extend(&chunk[0..chunk_size]);
}
if self.buffer.is_empty() {
return Ok(None);
}
let str = std::str::from_utf8(&self.buffer)
.map(|i| Some(i.to_string()))
.map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "Invalid input data"));
self.buffer.clear();
str
}
fn get_grapheme(&self) -> Option<String> {
let Ok(s) = std::str::from_utf8(&self.buffer) else {
return None
};
let mut iter = s.graphemes(true).peekable();
let grapheme = iter.next()?.to_string();
iter.peek()?;
Some(grapheme)
}
}
impl<'a, R: Read> Iterator for GraphemeReader<'a, R> {
type Item = io::Result<String>;
fn next(&mut self) -> Option<Self::Item> {
self.read_next_grapheme().transpose()
}
}
#[allow(clippy::module_name_repetitions)]
pub fn decode_stream<R: Read, W: Write>(reader: &mut R, writer: &mut W) -> io::Result<()> {
let mut remaining = 0u8;
let mut stage = 0x00u32;
let mut chars = GraphemeReader::new(reader).peekable();
let mut residue = 0;
while let Some(c) = chars.next() {
let c = c?;
residue = (residue + 11) % 8;
let (n_new_bits, new_bits) = match NUMBER_MAP.get(&c) {
Some(&bits) => {
if chars.peek().is_none() {
(11 - residue, bits)
} else {
(11, bits)
}
}
None => match TAIL_NUMBER_MAP.get(&c) {
Some(index) => {
let need = 8 - remaining;
if *index < (1 << need) {
(need, *index)
} else {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid input data",
));
}
}
None => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid input data",
))
}
},
};
remaining += n_new_bits;
stage = (stage << n_new_bits) | u32::from(new_bits);
while remaining >= 8 {
remaining -= 8;
let byte = u8::try_from(stage >> remaining).expect("LMAO this would be bad");
writer.write_all(&[byte])?;
stage &= (1 << remaining) - 1;
}
}
if remaining > 0 {
let byte =
u8::try_from(stage >> (8 - remaining)).expect("Decoding byte was higher than 255");
writer.write_all(&[byte])?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::EMOJI_MAP;
use std::io::Cursor;
#[test]
fn test_read_next_grapheme() {
let data = EMOJI_MAP.values().copied().collect::<Vec<&str>>().join("");
let mut cursor = Cursor::new(data.as_bytes());
let reader = GraphemeReader::new(&mut cursor);
for (left, right) in reader.zip(data.graphemes(true)) {
assert_eq!(left.unwrap().as_str(), right);
}
}
}