pub mod compress;
pub mod store;
pub mod train;
pub mod types;
pub use compress::{
compress, compression_ratio, decompress, decompress_with_dict_data, is_beneficial,
};
pub use store::{
GlobalStats, auto_train, auto_train_default, clear_all, compress_auto, decompress_auto,
find_dict_for_path, get_all_stats, get_dict, get_global_stats, get_stats, is_dict_compressed,
list_dicts, next_dict_id, register_dict, remove_dict,
};
pub use train::{train, train_dictionary, train_dictionary_default};
pub use types::{
CompressOp, CompressedHeader, CompressionDict, DEFAULT_DICT_SIZE, DICT_MAGIC, DictError,
DictResult, DictStats, MAX_DICT_SIZE, MAX_MATCH_LEN, MIN_DICT_SIZE, MIN_MATCH_LEN, OP_DICT_REF,
OP_LITERAL, SubstringEntry, TrainingOptions,
};
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn test_exports_accessible() {
let _ = DICT_MAGIC;
let _ = DEFAULT_DICT_SIZE;
let _ = TrainingOptions::default();
}
#[test]
fn test_full_workflow() {
let samples: &[&[u8]] = &[
b"header: value\ncontent: data\n",
b"header: other\ncontent: more\n",
b"header: test\ncontent: info\n",
];
let id = next_dict_id() + 1000; let dict = train(
id,
"http_headers",
"*.http",
"test_pool",
samples,
&TrainingOptions::default().with_size(1024),
12345,
)
.unwrap();
register_dict(dict.clone());
let data = b"header: new\ncontent: fresh\n";
let compressed = compress(data, &dict).unwrap();
let decompressed = decompress(&compressed, &dict).unwrap();
assert_eq!(decompressed, data);
}
#[test]
fn test_json_compression() {
let samples: &[&[u8]] = &[
br#"{"type":"user","id":1,"name":"Alice"}"#,
br#"{"type":"user","id":2,"name":"Bob"}"#,
br#"{"type":"user","id":3,"name":"Charlie"}"#,
br#"{"type":"product","id":100,"name":"Widget"}"#,
];
let id = next_dict_id() + 2000;
let dict = train(
id,
"json_dict",
"*.json",
"json_pool",
samples,
&TrainingOptions::default().with_size(512),
0,
)
.unwrap();
register_dict(dict.clone());
let data = br#"{"type":"user","id":99,"name":"Dave"}"#;
let compressed = compress(data, &dict).unwrap();
let decompressed = decompress(&compressed, &dict).unwrap();
assert_eq!(decompressed, data);
}
#[test]
fn test_log_compression() {
let samples: &[&[u8]] = &[
b"2025-01-01 10:00:00 INFO Starting application\n",
b"2025-01-01 10:00:01 DEBUG Loading configuration\n",
b"2025-01-01 10:00:02 INFO Configuration loaded\n",
b"2025-01-01 10:00:03 WARN Low memory warning\n",
];
let id = next_dict_id() + 3000;
let dict = train(
id,
"log_dict",
"*.log",
"log_pool",
samples,
&TrainingOptions::default(),
0,
)
.unwrap();
register_dict(dict.clone());
let data = b"2025-01-01 10:00:04 INFO Processing complete\n";
let compressed = compress(data, &dict).unwrap();
let decompressed = decompress(&compressed, &dict).unwrap();
assert_eq!(decompressed, data);
}
#[test]
fn test_auto_workflow() {
let samples: &[&[u8]] = &[
b"config: value1\noption: true\n",
b"config: value2\noption: false\n",
];
let dict = auto_train(
"auto_config",
"unique_auto_ds",
"*.conf",
samples,
&TrainingOptions::default().with_size(512),
0,
)
.unwrap();
let data = b"config: value3\noption: true\n";
let compressed = compress_auto("unique_auto_ds", "app.conf", data).unwrap();
let decompressed = decompress_auto(&compressed).unwrap();
assert_eq!(decompressed, data);
}
#[test]
fn test_dictionary_pattern_matching() {
let id1 = next_dict_id() + 4000;
let dict1 = CompressionDict::new(id1, "json", vec![1, 2], "*.json", "pattern_ds", 0);
register_dict(dict1);
let id2 = next_dict_id() + 4001;
let dict2 = CompressionDict::new(id2, "xml", vec![3, 4], "*.xml", "pattern_ds", 0);
register_dict(dict2);
let found_json = find_dict_for_path("pattern_ds", "data.json");
assert!(found_json.is_some());
assert_eq!(found_json.unwrap().id, id1);
let found_xml = find_dict_for_path("pattern_ds", "data.xml");
assert!(found_xml.is_some());
assert_eq!(found_xml.unwrap().id, id2);
let not_found = find_dict_for_path("pattern_ds", "data.txt");
assert!(not_found.is_none());
}
#[test]
fn test_statistics_tracking() {
let id = next_dict_id() + 5000;
let dict = CompressionDict::new(
id,
"stats_dict",
b"common data".to_vec(),
"*",
"stats_track_ds",
0,
);
register_dict(dict.clone());
let data1 = b"common data is here";
let data2 = b"more common data";
let _ = compress(data1, &dict);
let _ = compress(data2, &dict);
let global = get_global_stats();
assert!(global.dict_count >= 1);
assert!(global.dict_bytes > 0);
}
#[test]
fn test_compression_ratio_check() {
let dict = CompressionDict::new(
next_dict_id() + 6000,
"ratio_dict",
b"repeated pattern here is a long string".to_vec(),
"*",
"ratio_ds",
0,
);
let data = b"repeated pattern here is a long string and repeated pattern here again";
let ratio = compression_ratio(data, &dict);
assert!(ratio > 0.0);
}
#[test]
fn test_header_format() {
let header = CompressedHeader::new(12345, 1000, 500, 0xABCD);
let bytes = header.to_bytes();
assert_eq!(&bytes[0..4], &DICT_MAGIC);
let parsed = CompressedHeader::from_bytes(&bytes).unwrap();
assert_eq!(parsed.dict_id, 12345);
assert_eq!(parsed.original_size, 1000);
assert_eq!(parsed.compressed_size, 500);
assert_eq!(parsed.checksum, 0xABCD);
}
}