#[derive(PartialEq)]
pub struct LZ78entry<T> {
index: Option<usize>,
next_char: T,
}
pub type LZ78tuple<T> = (Option<usize>, T);
impl<T> From<LZ78tuple<T>> for LZ78entry<T> {
fn from(tuple: LZ78tuple<T>) -> Self {
LZ78entry {
index: tuple.0,
next_char: tuple.1,
}
}
}
impl<T> Into<LZ78tuple<T>> for LZ78entry<T> {
fn into(self) -> LZ78tuple<T> {
(self.index, self.next_char)
}
}
#[cfg(feature = "serde")]
mod lz78_serde {
use super::*;
use serde::{Deserialize, Serialize};
impl Serialize for LZ78entry<u8> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let tuple = (self.index, self.next_char);
tuple.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for LZ78entry<u8> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
Ok(LZ78entry::from(LZ78tuple::deserialize(deserializer)?))
}
}
}
impl<T: Clone> LZ78entry<T> {
fn resolve(&self, dictionary: &Vec<Vec<T>>) -> Vec<T> {
let mut res = if let Some(index) = self.index {
let target = &dictionary[index];
target.clone()
} else {
Vec::with_capacity(1)
};
res.push(self.next_char.clone());
return res;
}
}
pub fn lz78_encode<T: Clone + PartialEq>(
input: &[T],
lookahead_max: usize,
max_dictionary_size: usize,
) -> Vec<LZ78entry<T>> {
let mut output = Vec::new();
let mut dictionary: Vec<Vec<T>> = Vec::with_capacity(max_dictionary_size);
let mut i = 0;
while i < input.len() {
let mut longest_prefix: Option<usize> = None;
for (idx, entry) in dictionary.iter().enumerate() {
let entry_len = entry.len();
if entry_len > lookahead_max
|| i + entry_len + 1 > input.len()
|| input[i..i + entry_len] != *entry
{
continue;
}
if let Some(longest) = &mut longest_prefix {
if entry_len > dictionary[*longest].len() {
*longest = idx;
}
} else {
longest_prefix = Some(idx);
}
}
let new_entry = if let Some(idx) = longest_prefix {
i += dictionary[idx].len() + 1;
LZ78entry {
index: Some(idx),
next_char: input[i - 1].clone(),
}
} else {
i += 1;
LZ78entry {
index: None,
next_char: input[i - 1].clone(),
}
};
let new_dict_entry = new_entry.resolve(&dictionary);
if dictionary.len() == max_dictionary_size {
*dictionary.get_mut(0).unwrap() = new_dict_entry;
} else {
dictionary.push(new_dict_entry);
}
output.push(new_entry);
}
return output;
}
pub fn lz78_decode<T: Clone + PartialEq>(
input: &[LZ78entry<T>],
max_dictionary_size: usize,
) -> Vec<T> {
let mut output = Vec::new();
let mut dictionary: Vec<Vec<T>> = Vec::with_capacity(input.len());
for entry in input {
let resolved = entry.resolve(&dictionary);
for el in &resolved {
output.push(el.clone());
}
if dictionary.len() == max_dictionary_size {
*dictionary.get_mut(0).unwrap() = resolved.clone();
} else {
dictionary.push(resolved.clone());
}
}
return output;
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resolve() {
let other: Vec<char> = "test".chars().collect();
let dictionary: Vec<Vec<char>> = vec![vec!['t'], vec!['t', 'e'], vec!['t', 'e', 's']];
let target = LZ78entry {
index: Some(2),
next_char: 't',
};
assert_eq!(target.resolve(&dictionary), other);
}
#[test]
fn test_lz78_encode_decode() {
let input = b"TAMTARAMTAMTAMRAMTAT";
let encoded = lz78_encode(input, 4, 4);
assert!(encoded.len() < input.len());
let decoded = lz78_decode(&encoded, 4);
assert_eq!(input, decoded.as_slice());
}
}