use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use std::io::{self, Read, Write};
use thiserror::Error;
#[derive(Error, Debug)]
pub enum Error {
#[error("IO error: {0:?}")]
Io(#[from] io::Error),
#[error("Invalid dictionary reference offset: {0}")]
InvalidDictionaryReferenceOffset(u16),
#[error("Invalid dictionary reference length: {0}")]
InvalidDictionaryReferenceLength(u8),
}
pub type Result<T> = std::result::Result<T, Error>;
const INITIAL_DICTIONARY: &[u8] = b"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript \\fdecor MS Sans SerifSymbolArialTimes New RomanCourier{\\colortbl\\red0\\green0\\blue0\r\n\\par \\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx";
pub struct TokenDictionary {
buffer: [u8; 4096],
size: usize,
read_offset: usize,
write_offset: usize,
}
impl TokenDictionary {
pub fn read_reference(&mut self, offset: DictionaryReference) -> Option<Vec<u8>> {
let (offset, length) = (offset.offset() as usize, offset.length() as usize);
if offset == self.write_offset {
return None;
}
let mut result = Vec::with_capacity(length);
self.read_offset = offset;
for _ in 0..length {
let byte = self.buffer[self.read_offset];
result.push(byte);
self.read_offset = (self.read_offset + 1) % self.buffer.len();
self.write_byte(byte);
}
Some(result)
}
pub fn write_byte(&mut self, byte: u8) {
self.buffer[self.write_offset] = byte;
self.size = self.buffer.len().min(self.size + 1);
self.write_offset = (self.write_offset + 1) % self.buffer.len();
}
pub fn find_longest_match(&mut self, rtf: &[u8]) -> Result<Option<DictionaryReference>> {
let final_offset = self.write_offset % self.buffer.len();
let mut match_offset = if self.size != self.buffer.len() {
0
} else {
((self.write_offset + 1) % self.buffer.len()) as u16
};
let mut best_match: Option<DictionaryMatch> = None;
loop {
let best_match_length = best_match.map(|m| m.length).unwrap_or_default();
best_match = self
.try_match(rtf, match_offset, best_match_length)?
.or(best_match);
match_offset = (match_offset + 1) % self.buffer.len() as u16;
if match_offset as usize == final_offset {
break;
}
if let Some(best_match) = best_match {
if best_match.length == 17 {
break;
}
}
}
if best_match.map(|m| m.length).unwrap_or_default() == 0 {
self.write_byte(rtf[0]);
}
Ok(best_match.and_then(|m| DictionaryReference::try_from(m).ok()))
}
fn try_match(
&mut self,
rtf: &[u8],
match_offset: u16,
best_match_length: u8,
) -> Result<Option<DictionaryMatch>> {
let max_length = rtf.len().min(17);
let mut match_length = 0_u8;
let mut dictionary_offset = match_offset as usize;
while (match_length as usize) < max_length {
let byte = rtf[match_length as usize];
if self.buffer[dictionary_offset] != byte {
break;
}
match_length += 1;
if match_length > best_match_length {
self.write_byte(byte);
}
dictionary_offset = (dictionary_offset + 1) % self.buffer.len();
}
if match_length > best_match_length {
Ok(Some(DictionaryMatch {
offset: match_offset,
length: match_length,
}))
} else {
Ok(None)
}
}
pub fn final_reference(self) -> DictionaryReference {
DictionaryReference::new(self.write_offset as u16, 0)
}
}
impl Default for TokenDictionary {
fn default() -> Self {
let mut buffer = [0; 4096];
buffer[..INITIAL_DICTIONARY.len()].copy_from_slice(INITIAL_DICTIONARY);
Self {
buffer,
size: INITIAL_DICTIONARY.len(),
read_offset: 0,
write_offset: INITIAL_DICTIONARY.len(),
}
}
}
#[derive(Clone, Copy)]
struct DictionaryMatch {
offset: u16,
length: u8,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct DictionaryReference(u16);
impl DictionaryReference {
fn new(offset: u16, length_minus_2: u8) -> Self {
let value = u16::from(length_minus_2) | (offset << 4);
Self(value)
}
pub fn offset(&self) -> u16 {
(self.0 & 0xFFF0) >> 4
}
pub fn length(&self) -> u8 {
(self.0 & 0x0F) as u8 + 2
}
pub fn read(f: &mut dyn Read) -> io::Result<Self> {
Ok(Self(f.read_u16::<BigEndian>()?))
}
pub fn write(&self, f: &mut dyn Write) -> io::Result<()> {
f.write_u16::<BigEndian>(self.0)
}
}
impl TryFrom<DictionaryMatch> for DictionaryReference {
type Error = Error;
fn try_from(value: DictionaryMatch) -> Result<Self> {
if value.offset > 0x0FFF {
return Err(Error::InvalidDictionaryReferenceOffset(value.offset));
}
if !(2..=0x11).contains(&value.length) {
return Err(Error::InvalidDictionaryReferenceLength(value.length));
}
Ok(Self::new(value.offset, value.length - 2))
}
}