use crate::{
io::{reader::BinaryReader, writer::BinaryWriter},
search::tst::TST,
};
const R: usize = 1 << 7;
const L: usize = 1 << 12;
const W: usize = 12;
pub fn compress(txt: &str) -> Vec<u8> {
let mut st = TST::default();
for i in 0..R {
st.put(&char::from(i as u8).to_string(), i);
}
let mut code = R + 1;
let mut writer = BinaryWriter::new(txt.len() / 2);
let mut input = txt;
while !input.is_empty() {
let s = st.longest_prefix_of(input);
writer.write(W, *st.get_val(&s).unwrap());
let t = s.len();
if t < input.len() && code < L {
st.put(&input[..=t], code);
code += 1;
}
input = &input[t..];
}
writer.write(W, R);
writer.close()
}
pub fn expand(data: Vec<u8>) -> String {
let mut st = [""; L].map(|f| f.to_string());
(0..R).for_each(|i| {
let key = char::from(i as u8).to_string();
st[i] = key;
});
let mut reader = BinaryReader::new(data);
let mut txt = String::default();
let mut s = "".to_string();
let mut code = R + 1;
loop {
let codeword = reader.read(W);
if codeword == R {
break;
} else {
if code < L {
let suffix = if code == codeword { &s } else { &st[codeword] }.as_bytes()[0];
let suffix = char::from(suffix);
s.push(suffix);
if s.len() > 1 {
st[code] = s.clone();
code += 1;
}
s = st[codeword].to_string();
}
txt.push_str(&st[codeword]);
}
}
txt
}
#[cfg(test)]
mod test {
use crate::string::lzw::{compress, expand};
#[test]
fn test() {
let txt = r#"abcdefgabcdefg1287abcdefgabcdefg128799abcde
fgabcdefg1287abcdefgabcdefg1287abcdefgabcdefg128
cdefgabcdefg128799abcdefgabcdefg1287abcdefgabcdefg1287abcdefgabcdefg128
cdefgabcdefg128799abcdefgabcdefg1287abcdefgabcdefg1287abcdefgabcdefg128
aaaaaa799abcdefgabcdefg128799abcdefgabcdefg1287ab
cdefgabcdefg128799abcdefgabcdefg1287aaaaaaaaaaaaaaaaaaaaaa9"#
.to_string();
let compress = compress(&txt);
let origin_len = txt.as_bytes().len();
let compress_len = compress.len();
let rate = compress_len * 100 / origin_len;
println!(
"len txt :{},compress:{},rate:{}",
origin_len, compress_len, rate
);
let decode: String = expand(compress);
assert_eq!(decode, txt);
}
}