Skip to main content

zstd_nostd/
common.rs

1//! Common constants, error types, and predefined tables for zstd
2
3#[cfg(feature = "alloc")]
4extern crate alloc;
5
6/// Frame magic number
7pub const ZSTD_MAGIC: u32 = 0xFD2FB528;
8
9/// Skippable frame magic range
10pub const SKIPPABLE_MAGIC_LOW: u32 = 0x184D2A50;
11pub const SKIPPABLE_MAGIC_HIGH: u32 = 0x184D2A5F;
12
13/// Maximum block size (128 KB)
14pub const MAX_BLOCK_SIZE: usize = 1 << 17;
15
16/// Minimum match length
17pub const MIN_MATCH: usize = 3;
18
19/// Maximum window log
20pub const MAX_WINDOW_LOG: u32 = 30;
21
22/// Block types (2-bit field in block header)
23#[derive(Debug, Clone, Copy, PartialEq)]
24pub enum BlockType {
25    Raw = 0,
26    Rle = 1,
27    Compressed = 2,
28    Reserved = 3,
29}
30
31impl BlockType {
32    pub fn from_u8(v: u8) -> Self {
33        match v {
34            0 => BlockType::Raw,
35            1 => BlockType::Rle,
36            2 => BlockType::Compressed,
37            _ => BlockType::Reserved,
38        }
39    }
40}
41
42/// Literals section type
43#[derive(Debug, Clone, Copy, PartialEq)]
44pub enum LiteralsType {
45    Raw = 0,
46    Rle = 1,
47    Compressed = 2,
48    Treeless = 3,
49}
50
51impl LiteralsType {
52    pub fn from_u8(v: u8) -> Self {
53        match v {
54            0 => LiteralsType::Raw,
55            1 => LiteralsType::Rle,
56            2 => LiteralsType::Compressed,
57            _ => LiteralsType::Treeless,
58        }
59    }
60}
61
62/// Sequence compression mode
63#[derive(Debug, Clone, Copy, PartialEq)]
64pub enum SeqMode {
65    Predefined = 0,
66    Rle = 1,
67    FseCompressed = 2,
68    Repeat = 3,
69}
70
71impl SeqMode {
72    pub fn from_u8(v: u8) -> Self {
73        match v {
74            0 => SeqMode::Predefined,
75            1 => SeqMode::Rle,
76            2 => SeqMode::FseCompressed,
77            _ => SeqMode::Repeat,
78        }
79    }
80}
81
82/// Error types for zstd operations
83#[derive(Debug, Clone, Copy, PartialEq)]
84pub enum ZstdError {
85    CorruptData,
86    WindowTooLarge,
87    UnsupportedDictionary,
88    ChecksumMismatch,
89    OutputTooLarge,
90}
91
92/// Literal length code baselines and extra bits (codes 0-35)
93/// Per RFC 8878 Table 15
94pub const LL_BASELINES: [u32; 36] = [
95    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
96    16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 128, 256,
97    512, 1024, 2048, 4096, 8192, 16384, 32768, 65536,
98];
99
100pub const LL_EXTRA_BITS: [u8; 36] = [
101    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102    1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8,
103    9, 10, 11, 12, 13, 14, 15, 16,
104];
105
106/// Match length code baselines and extra bits (codes 0-52)
107/// Per RFC 8878 and reference zstd implementation
108/// Note: After code 42 (bits=5), bits jump to 7 and then increment by 1
109pub const ML_BASELINES: [u32; 53] = [
110    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
111    19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
112    33, 34, 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 131,
113    259, 515, 1027, 2051, 4099, 8195, 16387, 32771, 65539,
114];
115
116pub const ML_EXTRA_BITS: [u8; 53] = [
117    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119    0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7,
120    8, 9, 10, 11, 12, 13, 14, 15, 16,
121];
122
123/// Offset codes extra bits (code N uses N extra bits)
124/// The baseline for offset code N is (1 << N)
125
126/// Predefined FSE distribution for literal lengths (accuracy log 6)
127pub const LL_DEFAULT_DIST: [i16; 36] = [
128    4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
129    2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
130    -1, -1, -1, -1,
131];
132pub const LL_DEFAULT_AL: u8 = 6;
133
134/// Predefined FSE distribution for match lengths (accuracy log 6)
135pub const ML_DEFAULT_DIST: [i16; 53] = [
136    1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
137    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
139    -1, -1, -1, -1, -1,
140];
141pub const ML_DEFAULT_AL: u8 = 6;
142
143/// Predefined FSE distribution for offsets (accuracy log 5)
144pub const OF_DEFAULT_DIST: [i16; 29] = [
145    1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
146    1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
147];
148pub const OF_DEFAULT_AL: u8 = 5;
149
150/// Number of literal length symbols
151pub const LL_MAX_SYMBOL: usize = 35;
152/// Max accuracy log for literal lengths
153pub const LL_MAX_AL: u8 = 9;
154
155/// Number of match length symbols
156pub const ML_MAX_SYMBOL: usize = 52;
157/// Max accuracy log for match lengths
158pub const ML_MAX_AL: u8 = 9;
159
160/// Number of offset symbols
161pub const OF_MAX_SYMBOL: usize = 31;
162/// Max accuracy log for offsets
163pub const OF_MAX_AL: u8 = 8;
164
165/// Get literal length code for a given literal length value
166pub fn ll_code(ll: u32) -> u8 {
167    if ll <= 15 {
168        return ll as u8;
169    }
170    // Search backward through baselines to find the right code
171    let mut code = 35u8;
172    while code > 16 {
173        if ll >= LL_BASELINES[code as usize] {
174            return code;
175        }
176        code -= 1;
177    }
178    16
179}
180
181/// Get match length code for a given match length value (ml >= 3)
182pub fn ml_code(ml: u32) -> u8 {
183    // ML_BASELINES[0] = 3, so code 0 = match length 3
184    if ml <= 34 {
185        // Codes 0-31: match lengths 3-34 (direct mapping)
186        return (ml - 3) as u8;
187    }
188    // Search backward through baselines for codes 32-52
189    let mut code = 52u8;
190    while code > 32 {
191        if ml >= ML_BASELINES[code as usize] {
192            return code;
193        }
194        code -= 1;
195    }
196    32
197}
198
199/// Get offset code for a given offset value
200pub fn of_code(offset: u32) -> u8 {
201    if offset == 0 { return 0; }
202    (32 - offset.leading_zeros() - 1) as u8
203}
204
205/// Helper to read a little-endian u32 from a byte slice
206#[inline]
207pub fn read_le32(data: &[u8], offset: usize) -> u32 {
208    if offset + 4 > data.len() { return 0; }
209    u32::from_le_bytes([data[offset], data[offset + 1], data[offset + 2], data[offset + 3]])
210}
211
212/// Helper to read a little-endian u64 from a byte slice
213#[inline]
214pub fn read_le64(data: &[u8], offset: usize) -> u64 {
215    if offset + 8 > data.len() { return 0; }
216    u64::from_le_bytes([
217        data[offset], data[offset + 1], data[offset + 2], data[offset + 3],
218        data[offset + 4], data[offset + 5], data[offset + 6], data[offset + 7],
219    ])
220}