1use std::fs::File;
2use std::io;
3use std::io::BufRead;
4use std::path::Path;
5
6use crate::core::codec::{Decoder, Encoder};
7use crate::core::symbol_table::{SymbolTable, SymbolTableBuilder};
8use crate::core::take_sample;
9
10pub mod core;
11mod util;
12
13pub fn build_table_by_sampling(strings: &Vec<String>) -> Box<dyn SymbolTable> {
30 let sample = take_sample(&strings);
31 SymbolTableBuilder::build_from_samples(&sample)
32}
33
34pub fn encode_all_strings(strings: &Vec<String>) -> (Box<dyn SymbolTable>, Vec<Vec<u8>>) {
37 let symbol_table = build_table_by_sampling(strings);
38 let encoder = Encoder::from_table(&symbol_table);
39 let mut encodings = Vec::with_capacity(strings.len());
40 for str in strings {
41 encodings.push(encoder.encode_str(str));
42 }
43 (symbol_table, encodings)
44}
45
46pub fn encode_string(str: &str, including_table: bool) -> (Box<dyn SymbolTable>, Vec<u8>) {
61 let symbol_table = SymbolTableBuilder::build_from(str);
62 let encoder = Encoder::from_table(&symbol_table);
63 let encoding = encoder.encode(str, including_table);
64 (symbol_table, encoding)
65}
66
67pub fn decode_string(table: &Box<dyn SymbolTable>, encoding: &Vec<u8>) -> String {
69 Decoder::from_table(table).decode(encoding)
70}
71
72pub fn decode_all_strings(table: &Box<dyn SymbolTable>, encodings: &Vec<Vec<u8>>) -> Vec<String> {
74 let mut strings = Vec::with_capacity(encodings.len());
75 let decoder = Decoder::from_table(table);
76 for encoding in encodings {
77 strings.push(decoder.decode(encoding))
78 }
79 strings
80}
81
82pub fn encode_all_strings_from_file<P: AsRef<Path>>(filename: P) -> io::Result<(Box<dyn SymbolTable>, Vec<Vec<u8>>)> {
83 let strings = read_string_lines(filename)?;
84 Ok(encode_all_strings(&strings))
85}
86
87pub fn read_string_lines<P>(filename: P) -> io::Result<Vec<String>>
88where
89 P: AsRef<Path>,
90{
91 let file = File::open(filename)?;
92 let strings: Vec<String> = io::BufReader::new(file)
93 .lines()
94 .map(|l| l.expect("read string failed"))
95 .collect();
96 Ok(strings)
97}
98
99#[cfg(test)]
100mod test {
101 use crate::{decode_all_strings, encode_all_strings, read_string_lines};
102
103 #[test]
104 pub fn test_codec() {
105 let group_test_data_path = "assets/test_data/c_name";
106 let mut strings = read_string_lines(group_test_data_path).unwrap();
107 strings.truncate(1000);
108 let (table, encodings) = encode_all_strings(&strings);
109 let decode_strings = decode_all_strings(&table, &encodings);
110 for i in 0..strings.len() {
111 assert_eq!(strings[i], decode_strings[i]);
112 }
113 }
114}
115