1#![forbid(unsafe_code)]
2
3#[cfg(feature = "dict_builder")]
4pub mod cover;
5#[cfg(feature = "dict_builder")]
6pub mod fastcover;
7#[cfg(feature = "dict_builder")]
8pub mod finalize;
9
10#[cfg(feature = "alloc")]
11use alloc::vec::Vec;
12
13use crate::bitstream::reader::BitReader;
14use crate::error::DecompressError;
15use crate::fse::table_builder::{build_decode_table, parse_fse_table_description};
16use crate::fse::{FseDecodeEntry, LL_MAX_SYMBOL, ML_MAX_SYMBOL, OF_MAX_SYMBOL};
17use crate::huffman::HuffmanDecodeEntry;
18use crate::huffman::weights::{build_huffman_decode_table, parse_huffman_weights};
19
20pub const DICT_MAGIC: u32 = 0xEC30_A437;
21
22#[cfg(feature = "alloc")]
27#[derive(Clone)]
28pub struct Dictionary {
29 id: u32,
30 content: Vec<u8>,
31 huf_table: Option<(Vec<HuffmanDecodeEntry>, u8)>,
32 of_table: Option<(Vec<FseDecodeEntry>, u8)>,
33 ml_table: Option<(Vec<FseDecodeEntry>, u8)>,
34 ll_table: Option<(Vec<FseDecodeEntry>, u8)>,
35 rep_offsets: [u32; 3],
36}
37
38#[cfg(feature = "alloc")]
39impl Dictionary {
40 pub fn from_bytes(data: &[u8]) -> Result<Self, DecompressError> {
42 if data.len() < 8 {
43 return Err(DecompressError::InvalidDictionary);
44 }
45
46 let magic = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
47 if magic != DICT_MAGIC {
48 return Err(DecompressError::InvalidDictionary);
49 }
50
51 let id = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
52 if id == 0 {
53 return Err(DecompressError::InvalidDictionary);
54 }
55 let mut pos = 8;
56
57 let huf_table = parse_dict_huffman(&data[pos..])?;
58 pos += huf_table.1;
59 let huf_decode = if huf_table.0.is_some() {
60 huf_table.0
61 } else {
62 None
63 };
64
65 let (of_table, of_consumed) = parse_dict_fse(&data[pos..], OF_MAX_SYMBOL)?;
66 pos += of_consumed;
67
68 let (ml_table, ml_consumed) = parse_dict_fse(&data[pos..], ML_MAX_SYMBOL)?;
69 pos += ml_consumed;
70
71 let (ll_table, ll_consumed) = parse_dict_fse(&data[pos..], LL_MAX_SYMBOL)?;
72 pos += ll_consumed;
73
74 if pos + 12 > data.len() {
75 return Err(DecompressError::InvalidDictionary);
76 }
77
78 let rep1 = u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]);
79 let rep2 = u32::from_le_bytes([data[pos + 4], data[pos + 5], data[pos + 6], data[pos + 7]]);
80 let rep3 =
81 u32::from_le_bytes([data[pos + 8], data[pos + 9], data[pos + 10], data[pos + 11]]);
82 pos += 12;
83
84 if rep1 == 0 || rep2 == 0 || rep3 == 0 {
85 return Err(DecompressError::InvalidDictionary);
86 }
87
88 let content = data[pos..].to_vec();
89
90 Ok(Self {
91 id,
92 content,
93 huf_table: huf_decode,
94 of_table,
95 ml_table,
96 ll_table,
97 rep_offsets: [rep1, rep2, rep3],
98 })
99 }
100
101 pub fn id(&self) -> u32 {
103 self.id
104 }
105
106 pub fn content(&self) -> &[u8] {
108 &self.content
109 }
110
111 pub fn rep_offsets(&self) -> &[u32; 3] {
113 &self.rep_offsets
114 }
115
116 pub fn huf_table(&self) -> Option<(&[HuffmanDecodeEntry], u8)> {
118 self.huf_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
119 }
120
121 pub fn of_table(&self) -> Option<(&[FseDecodeEntry], u8)> {
123 self.of_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
124 }
125
126 pub fn ml_table(&self) -> Option<(&[FseDecodeEntry], u8)> {
128 self.ml_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
129 }
130
131 pub fn ll_table(&self) -> Option<(&[FseDecodeEntry], u8)> {
133 self.ll_table.as_ref().map(|(t, l)| (t.as_slice(), *l))
134 }
135}
136
137#[cfg(feature = "alloc")]
138#[allow(clippy::type_complexity)]
139fn parse_dict_huffman(
140 data: &[u8],
141) -> Result<(Option<(Vec<HuffmanDecodeEntry>, u8)>, usize), DecompressError> {
142 if data.is_empty() {
143 return Err(DecompressError::InvalidDictionary);
144 }
145
146 let (weights, consumed) = parse_huffman_weights(data)?;
147 if weights.is_empty() {
148 return Ok((None, consumed));
149 }
150 let (table, table_log) = build_huffman_decode_table(&weights)?;
151 Ok((Some((table, table_log)), consumed))
152}
153
154#[cfg(feature = "alloc")]
155#[allow(clippy::type_complexity)]
156fn parse_dict_fse(
157 data: &[u8],
158 max_symbol: u8,
159) -> Result<(Option<(Vec<FseDecodeEntry>, u8)>, usize), DecompressError> {
160 if data.is_empty() {
161 return Err(DecompressError::InvalidDictionary);
162 }
163
164 let mut reader = BitReader::new(data);
165 let (distribution, accuracy_log) = parse_fse_table_description(&mut reader, max_symbol)?;
166 let consumed = reader.bytes_consumed();
167 let table = build_decode_table(&distribution, accuracy_log)
168 .map_err(|_| DecompressError::InvalidDictionary)?;
169 Ok((Some((table, accuracy_log)), consumed))
170}
171
172#[cfg(feature = "dict_builder")]
173pub fn train_dict_fastcover(
175 samples: &[&[u8]],
176 dict_size: usize,
177 params: fastcover::FastCoverParams,
178) -> Dictionary {
179 let content = fastcover::select_segments(samples, dict_size, ¶ms);
180 let dict_bytes = finalize::finalize_dictionary(&content, samples, dict_size);
181 Dictionary::from_bytes(&dict_bytes).expect("finalized dictionary must be valid")
182}