1#![forbid(unsafe_code)]
2
3#[cfg(feature = "dict_builder")]
4pub mod cover;
5#[cfg(feature = "dict_builder")]
6pub mod fastcover;
7#[cfg(feature = "dict_builder")]
8pub mod finalize;
9
10#[cfg(feature = "alloc")]
11use alloc::vec::Vec;
12
13use crate::bitstream::reader::BitReader;
14use crate::error::DecompressError;
15use crate::fse::table_builder::{build_decode_table, parse_fse_table_description};
16use crate::fse::{FseDecodeEntry, LL_MAX_SYMBOL, ML_MAX_SYMBOL, OF_MAX_SYMBOL};
17use crate::huffman::HuffmanDecodeEntry;
18use crate::huffman::weights::{build_huffman_decode_table, parse_huffman_weights};
19
20pub const DICT_MAGIC: u32 = 0xEC30A437;
21
22#[cfg(feature = "alloc")]
27pub struct Dictionary {
28 id: u32,
29 content: Vec<u8>,
30 huf_table: Option<(Vec<HuffmanDecodeEntry>, u8)>,
31 of_table: Option<(Vec<FseDecodeEntry>, u8)>,
32 ml_table: Option<(Vec<FseDecodeEntry>, u8)>,
33 ll_table: Option<(Vec<FseDecodeEntry>, u8)>,
34 rep_offsets: [u32; 3],
35}
36
37#[cfg(feature = "alloc")]
38impl Dictionary {
39 pub fn from_bytes(data: &[u8]) -> Result<Self, DecompressError> {
41 if data.len() < 8 {
42 return Err(DecompressError::InvalidDictionary);
43 }
44
45 let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
46 if magic != DICT_MAGIC {
47 return Err(DecompressError::InvalidDictionary);
48 }
49
50 let id = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
51 let mut pos = 8;
52
53 let huf_table = parse_dict_huffman(&data[pos..])?;
54 pos += huf_table.1;
55 let huf_decode = if huf_table.0.is_some() {
56 huf_table.0
57 } else {
58 None
59 };
60
61 let (of_table, of_consumed) = parse_dict_fse(&data[pos..], OF_MAX_SYMBOL)?;
62 pos += of_consumed;
63
64 let (ml_table, ml_consumed) = parse_dict_fse(&data[pos..], ML_MAX_SYMBOL)?;
65 pos += ml_consumed;
66
67 let (ll_table, ll_consumed) = parse_dict_fse(&data[pos..], LL_MAX_SYMBOL)?;
68 pos += ll_consumed;
69
70 if pos + 12 > data.len() {
71 return Err(DecompressError::InvalidDictionary);
72 }
73
74 let rep1 = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]);
75 let rep2 = u32::from_le_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]]);
76 let rep3 =
77 u32::from_le_bytes([data[pos + 8], data[pos + 9], data[pos + 10], data[pos + 11]]);
78 pos += 12;
79
80 if rep1 == 0 || rep2 == 0 || rep3 == 0 {
81 return Err(DecompressError::InvalidDictionary);
82 }
83
84 let content = data[pos..].to_vec();
85
86 Ok(Self {
87 id,
88 content,
89 huf_table: huf_decode,
90 of_table,
91 ml_table,
92 ll_table,
93 rep_offsets: [rep1, rep2, rep3],
94 })
95 }
96
97 pub fn id(&self) -> u32 {
99 self.id
100 }
101
102 pub fn content(&self) -> &[u8] {
104 &self.content
105 }
106
107 pub fn rep_offsets(&self) -> &[u32; 3] {
109 &self.rep_offsets
110 }
111
112 pub fn huf_table(&self) -> Option<(&[HuffmanDecodeEntry], u8)> {
113 self.huf_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
114 }
115
116 pub fn of_table(&self) -> Option<(&[FseDecodeEntry], u8)> {
117 self.of_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
118 }
119
120 pub fn ml_table(&self) -> Option<(&[FseDecodeEntry], u8)> {
121 self.ml_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
122 }
123
124 pub fn ll_table(&self) -> Option<(&[FseDecodeEntry], u8)> {
125 self.ll_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
126 }
127}
128
129#[cfg(feature = "alloc")]
130fn parse_dict_huffman(
131 data: &[u8],
132) -> Result<(Option<(Vec<HuffmanDecodeEntry>, u8)>, usize), DecompressError> {
133 if data.is_empty() {
134 return Err(DecompressError::InvalidDictionary);
135 }
136
137 let (weights, consumed) = parse_huffman_weights(data)?;
138 if weights.is_empty() {
139 return Ok((None, consumed));
140 }
141 let (table, table_log) = build_huffman_decode_table(&weights)?;
142 Ok((Some((table, table_log)), consumed))
143}
144
145#[cfg(feature = "alloc")]
146fn parse_dict_fse(
147 data: &[u8],
148 max_symbol: u8,
149) -> Result<(Option<(Vec<FseDecodeEntry>, u8)>, usize), DecompressError> {
150 if data.is_empty() {
151 return Err(DecompressError::InvalidDictionary);
152 }
153
154 let mut reader = BitReader::new(data);
155 let (distribution, accuracy_log) = parse_fse_table_description(&mut reader, max_symbol)?;
156 let consumed = reader.bytes_consumed();
157 let table = build_decode_table(&distribution, accuracy_log)
158 .map_err(|_| DecompressError::InvalidDictionary)?;
159 Ok((Some((table, accuracy_log)), consumed))
160}
161
162#[cfg(feature = "dict_builder")]
163pub fn train_dict_fastcover(
165 samples: &[&[u8]],
166 dict_size: usize,
167 params: fastcover::FastCoverParams,
168) -> Dictionary {
169 let content = fastcover::select_segments(samples, dict_size, ¶ms);
170 let dict_bytes = finalize::finalize_dictionary(&content, samples, dict_size);
171 Dictionary::from_bytes(&dict_bytes).expect("finalized dictionary must be valid")
172}