Skip to main content

rust_zstd/
constants.rs

1//! Zstandard format constants, tables, and symbol coding definitions.
2//!
3//! Ported from the zstd C library by Meta Platforms, Inc.
4//! Original source: lib/common/zstd_internal.h, decompress/zstd_decompress_internal.h
5//! Licensed under BSD and GPLv2 (dual license). See LICENSE-ZSTD.
6
7pub const ZSTD_MAGIC: u32 = 0xFD2FB528;
8pub const ZSTD_BLOCKSIZELOG_MAX: u32 = 17;
9pub const ZSTD_BLOCKSIZE_MAX: usize = 1 << ZSTD_BLOCKSIZELOG_MAX; // 128 KiB
10pub const ZSTD_WINDOWLOG_MAX: u32 = 31;
11pub const ZSTD_MINMATCH: usize = 3;
12
13// Block types
14pub const BLOCK_TYPE_RAW: u8 = 0;
15pub const BLOCK_TYPE_RLE: u8 = 1;
16pub const BLOCK_TYPE_COMPRESSED: u8 = 2;
17
18// Literal block types
19pub const LIT_TYPE_RAW: u8 = 0;
20pub const LIT_TYPE_RLE: u8 = 1;
21pub const LIT_TYPE_COMPRESSED: u8 = 2;
22pub const LIT_TYPE_TREELESS: u8 = 3;
23
24// Sequence encoding modes
25pub const SEQ_MODE_PREDEFINED: u8 = 0;
26pub const SEQ_MODE_RLE: u8 = 1;
27pub const SEQ_MODE_FSE: u8 = 2;
28pub const SEQ_MODE_REPEAT: u8 = 3;
29
30// Maximum symbol values
31pub const MAX_LL: usize = 35;
32pub const MAX_ML: usize = 52;
33pub const MAX_OFF: usize = 31;
34
35// FSE table log sizes
36pub const LL_FSE_LOG: u32 = 9;
37pub const ML_FSE_LOG: u32 = 9;
38pub const OFF_FSE_LOG: u32 = 8;
39
40// Default norm log
41pub const LL_DEFAULT_NORM_LOG: u32 = 6;
42pub const ML_DEFAULT_NORM_LOG: u32 = 6;
43pub const OF_DEFAULT_NORM_LOG: u32 = 5;
44
45/// Extra bits for each literal length code.
46pub const LL_BITS: [u8; MAX_LL + 1] = [
47    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11,
48    12, 13, 14, 15, 16,
49];
50
51/// Base value for each literal length code.
52pub const LL_BASE: [u32; MAX_LL + 1] = [
53    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64,
54    0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000,
55];
56
57/// Extra bits for each match length code.
58pub const ML_BITS: [u8; MAX_ML + 1] = [
59    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60    1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
61];
62
63/// Base value for each match length code.
64pub const ML_BASE: [u32; MAX_ML + 1] = [
65    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
66    28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203,
67    0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003,
68];
69
70/// Extra bits for each offset code. offset_code = OF_bits[code].
71pub const OF_BITS: [u8; MAX_OFF + 1] = [
72    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
73    26, 27, 28, 29, 30, 31,
74];
75
76/// Default normalized count for literal length FSE table.
77pub const LL_DEFAULT_NORM: [i16; MAX_LL + 1] = [
78    4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
79    -1, -1, -1, -1,
80];
81
82/// Default normalized count for match length FSE table.
83pub const ML_DEFAULT_NORM: [i16; MAX_ML + 1] = [
84    1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1,
86];
87
88/// Default normalized count for offset FSE table.
89pub const OF_DEFAULT_NORM: [i16; MAX_OFF - 2] = [
90    1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
91];
92
93/// Lookup table for literal length code (values 0..63).
94const LL_CODE_TABLE: [u8; 64] = [
95    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
96    20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
97    24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
98];
99
100/// Lookup table for match length code (values 0..127, where value = matchLength - 3).
101const ML_CODE_TABLE: [u8; 128] = [
102    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
103    26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38,
104    38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
105    40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42,
106    42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
107    42, 42, 42, 42, 42, 42,
108];
109
110/// Convert a literal length value to its code.
111/// Ported from ZSTD_LLcode() in zstd_compress_internal.h.
112pub fn ll_code(litlen: u32) -> u8 {
113    const LL_DELTA: u32 = 19;
114    if litlen <= 63 {
115        LL_CODE_TABLE[litlen as usize]
116    } else {
117        (highbit32(litlen) + LL_DELTA) as u8
118    }
119}
120
121/// Convert a match length base (matchLength - 3) to its code.
122/// Ported from ZSTD_MLcode() in zstd_compress_internal.h.
123pub fn ml_code(ml_base: u32) -> u8 {
124    const ML_DELTA: u32 = 36;
125    if ml_base <= 127 {
126        ML_CODE_TABLE[ml_base as usize]
127    } else {
128        (highbit32(ml_base) + ML_DELTA) as u8
129    }
130}
131
132/// Convert an offset value to its code (highest bit position + 1).
133pub fn off_code(offset: u32) -> u8 {
134    highbit32(offset) as u8
135}
136
137/// Highest set bit (0-indexed). Returns 0 for input 0 or 1.
138fn highbit32(v: u32) -> u32 {
139    if v <= 1 {
140        return 0;
141    }
142    31 - v.leading_zeros()
143}