1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
[]
= "compcol"
= "0.6.0"
= "2024"
# Edition 2024 stabilised in Rust 1.85. Code uses `let chains` (Rust 1.88)
# inside the brotli encoder and the io adapters, so 1.88 is the real floor.
= "1.88"
= "A no_std collection of compression algorithms behind a uniform streaming trait, gated per-algorithm by Cargo features."
= "MIT"
= "https://github.com/KarpelesLab/compcol"
= "https://docs.rs/compcol"
= "README.md"
= ["compression", "no_std", "embedded"]
= ["compression", "no-std"]
# Tell docs.rs to build the doc set with every feature on. Without this
# the rendered docs only show the default-feature surface (rle/deflate/
# zlib/gzip/factory) — two thirds of the crate's public API would be
# invisible to anyone reading docs.rs.
[]
= true
= ["--cfg", "docsrs"]
[]
= ["alloc", "rle", "deflate", "zlib", "gzip", "factory"]
# Convenience meta-feature: every algorithm compiled in. Equivalent to
# `cargo build --all-features` at the CLI level; useful in downstream
# Cargo.toml entries (`features = ["all"]`) instead of a 20-item list.
= [
"alloc", "std", "tokio", "factory",
"rle", "rle90", "deflate", "deflate64", "zlib", "gzip",
"lzma", "xz", "lzma2",
"zstd", "brotli", "lz4", "snappy", "lzw", "lzss", "bzip2",
"zstd", "brotli", "lz4", "lz5", "snappy", "lzw", "bzip2",
"lzo", "lzx", "amiga_lzx", "quantum", "lzfse", "adc",
"ppmd",
"xpress_huffman",
"lznt1",
"xpress",
"packbits",
"zip_implode",
"lzs",
"lzham",
"sit13",
"lzah",
"arsenic",
"rar1", "rar2", "rar3", "rar5",
"zip_shrink",
"zip_reduce",
"lha",
"bcj", "bcj2", "delta",
"arc_crunch", "arc_squeeze", "arc_squash",
]
# Enables `alloc`-backed conveniences (e.g. the `factory` module, the
# `compcol::vec` one-shot helpers). Pulled in automatically by features
# that require heap allocation.
= []
# Enables the `compcol::io` adapters that wrap `std::io::{Read, Write}`
# around any `Encoder`/`Decoder`, plus `From<Error> for std::io::Error`
# and `impl std::error::Error for Error`. Pulls in `alloc`.
= ["alloc"]
# Runtime by-name lookup that returns boxed trait objects. Requires `alloc`.
= ["alloc"]
# Run-length encoding.
= []
# RLE90 — the 0x90/DLE run-length variant (ARC method 3, StuffIt method 1).
= ["alloc"]
# RFC 1951 raw deflate. Requires `alloc` for the 32 KiB sliding window and
# the per-block symbol/bit buffers.
= ["alloc"]
# PKWARE deflate64 (zip method 9). Same framing as deflate but a 64 KiB
# window and an extended length/distance alphabet. Standalone module —
# does not depend on the `deflate` feature.
= ["alloc"]
# RFC 1950 zlib (deflate + Adler-32 + 2-byte header / 4-byte trailer).
= ["deflate"]
# RFC 1952 gzip (deflate + CRC-32 + 10-byte header / 8-byte trailer).
= ["deflate"]
# LZMA (Lempel–Ziv–Markov chain). Range-coded, depends on alloc for the
# probability tables and the LZ window.
= ["alloc"]
# xz container (RFC-style stream/block headers + check codes; the inner
# LZMA2 chunk codec is inlined inside `src/xz/`).
= ["lzma"]
# Raw LZMA2 chunk stream (7-Zip coder id 21) — the dict-reset control bytes
# + LZMA chunks, without the `.xz` container. Decode-only entry point that
# reuses the same LZMA2 chunk codec the `xz` feature uses. The 7z coder
# property is a 1-byte dictionary-size code. The encoder is an
# `Error::Unsupported` stub (produce LZMA2 via the `xz` encoder).
= ["alloc", "lzma"]
# Zstandard (RFC 8478).
= ["alloc"]
# Brotli (RFC 7932). Carries a 170 KiB built-in static dictionary when fully
# implemented; expect a fat .rlib once it is.
= ["alloc"]
# LZ4 block format.
= ["alloc"]
# LZ5 / Lizard frame format (Yann Collet & Przemyslaw Skibinski's modern
# LZ4 descendant). Decoder handles the LZ4-codeword path with all
# sub-streams stored raw (compression levels 10..=19 without Huffman);
# LIZv1 mode and Huffman-coded streams are rejected as `Unsupported`.
# Encoder emits store-only frames (round-trips through reference CLI but
# achieves no compression).
= ["alloc"]
# Google Snappy.
= ["alloc"]
# Lempel–Ziv–Welch (Unix compress / GIF).
= ["alloc"]
# LZSS (Storer–Szymanski). Okumura's reference layout: 4 KiB ring buffer
# initialized to 0x20, 12-bit position + 4-bit length, 8 tokens per flag
# group. Plus a 4-byte LE uncompressed-length header so streams self-
# delimit.
= ["alloc"]
# bzip2 (block-sorted: RLE-1 → BWT → MTF → RLE-2 → Huffman). Encoder
# and decoder both implemented; the encoder uses a naive O(n² log n)
# suffix-array build (rather than SA-IS) for the BWT.
= ["alloc"]
# LZO (Lempel–Ziv–Oberhumer), LZO1X-1 variant.
= ["alloc"]
# LZX (Microsoft CAB / WIM compression).
= ["alloc"]
# Amiga LZX (the original 1995 Forbes LZX as used in Amiga `.lzx` archives).
# Block-level identical to MS-CAB LZX (which is what `lzx` provides) but with
# a fixed 64 KiB window and no 32 KiB chunking or x86 E8-filter framing.
= ["alloc"]
# Quantum (Stac, old CAB format). Decoder-only target.
= ["alloc"]
# LZFSE (Apple's LZ77 + Finite State Entropy). Decoder-only target — the
# encoder permanently returns `Error::Unsupported`. Handles `bvx-` and
# `bvxn` (LZVN) blocks; `bvx2` blocks return Unsupported in this build.
= ["alloc"]
# ADC (Apple Data Compression) — the LZSS-like codec used in Apple DMG
# disk images and HFS+ compressed resource forks.
= ["alloc"]
# PPMd (Dmitry Shkarin's PPMII variant H, used in 7z/RAR/ZIP method 98).
# Decoder-only target — the encoder permanently returns
# `Error::Unsupported` (the PPM model with information-inheritance updates
# is out of scope for this build).
= ["alloc"]
# XPress Huffman ([MS-XCA] §2.1): LZ77 + canonical Huffman used by WIM
# resources and NTFS CompactOS file compression. Encoder and decoder
# both implemented.
= ["alloc"]
# LZNT1 — NTFS native file compression. 4 KiB chunked LZ77, no entropy
# coding. Per Microsoft MS-XCA section 2.5.
= ["alloc"]
# Microsoft Xpress (Plain LZ77 variant from MS-XCA). Byte-aligned
# LZ77 with no entropy coding; encoder and decoder both implemented.
= ["alloc"]
# Apple PackBits — tag-byte RLE used in TIFF, PSD, BMP, and macOS
# metadata. Source-byte oriented; no header/trailer. Encoder and
# decoder both implemented.
= ["alloc"]
# PKZIP Implode (ZIP method 6, PKZIP 1.x). Decoder-only target — the
# encoder permanently returns `Error::Unsupported` (Shannon–Fano tree
# construction from frequencies is intricate and there is no modern
# need for an encoder). Supports both 4 KiB / 8 KiB dictionaries and
# both 2-tree / 3-tree modes.
= ["alloc"]
# Stac LZS (RFC 1974) — the LZ77 variant used by PPP Stac LZS, MPPE,
# IPComp, and Cisco hardware compression cards. Bit-packed (MSB-first)
# with a 2 KiB window. Encoder and decoder both implemented.
= ["alloc"]
# LZHAM (Rich Geldreich's LZMA-class codec, MIT-licensed). This build
# ships the `LZH0` container header parser only — the inner arithmetic-
# coded bitstream is not documented outside the reference C++ source
# and is permanently `Error::Unsupported` in this build. The encoder
# is also permanently `Error::Unsupported`.
= ["alloc"]
# StuffIt compression method 13 ("LZ + Huffman"). This build ships the
# well-defined, unit-tested building blocks (MSB-first bit reader,
# Kraft-validated canonical Huffman decoder, bounds-checked LZSS window)
# but returns `Error::Unsupported` for the payload decode: the format is
# proprietary/undocumented (only an LGPL reverse-engineering exists, which
# this MIT crate must not copy) and there are no public fixtures, so a
# decoder could be neither derived nor validated. The encoder is also
# permanently `Error::Unsupported`.
= ["alloc"]
# StuffIt classic compression method 5 ("LZAH"): LZSS over a pre-seeded
# 4 KiB sliding window with a single adaptive (sibling-property) Huffman
# tree for literal/length tokens and a static canonical prefix code for the
# high offset bits, MSB-first. Decode-only target — the encoder permanently
# returns `Error::Unsupported` (no StuffIt encoder exists). The uncompressed
# length is out of band (the archive entry header), supplied via
# `DecoderConfig::with_len`.
= ["alloc"]
# StuffIt 5 "Arsenic" (compression method 15). BWT-based: carry-less range
# decoder → un-MTF/un-RLE → inverse BWT → optional de-randomization → final
# RLE → CRC-32 check. Decoder-only (self-terminating stream; no out-of-band
# length needed); the encoder permanently returns `Error::Unsupported`.
= ["alloc"]
# RAR decoders (rar1, rar2, rar3, rar5). Decoder-only — RARLAB's unRAR
# license explicitly forbids re-creating the compression algorithm, so
# the encoders permanently return `Error::Unsupported`.
= ["alloc"]
= ["alloc"]
= ["alloc"]
= ["alloc"]
# ZIP method 1 (Shrink): PKZIP 1.x dynamic LZW with partial-clear marker.
# Decoder-only — the encoder permanently returns `Error::Unsupported`.
= ["alloc"]
# PKZip Reduce (methods 2..5). Decoder-only: the encoder permanently
# returns `Error::Unsupported`.
= ["alloc"]
# LHA / LZH compression methods (-lh1-/-lh4-/-lh5-/-lh6-/-lh7-): LZSS
# sliding-dictionary back-references with Huffman-coded literal/length
# and position codes. lh4/lh5/lh6/lh7 share the static-Huffman block
# structure (Okumura's public-domain ar002 layout) and differ only in
# dictionary size and number of position bits; lh1 uses the classic
# adaptive-Huffman LZHUF scheme. These are the raw method payloads (no
# LHA container header). Decoder for all five plus a static-Huffman
# lh5/lh6/lh7 encoder (clean-room, round-trip validated).
= ["alloc"]
# BCJ branch-converter filters (x86/ARM/ARMT/ARM64/PPC/SPARC/IA-64/RISC-V),
# from the public-domain LZMA SDK lineage. Reversible filters, not
# compressors: forward rewrites relative branch operands to absolute form,
# inverse restores them. Encoder + decoder both implemented.
= ["alloc"]
# BCJ2 — the 4-stream x86 branch filter from the public-domain LZMA SDK
# (7-Zip filter id 0303011B), distinct from the single-stream BCJ above.
# Decode recombines four input streams (main + call + jump + a range-coded
# control stream) into the filtered output. Exposes a dedicated function
# API (`compcol::bcj2::decode`) since the 4-input shape does not fit the
# single-input `Decoder` trait. Encoder also provided for round-trip tests.
= ["alloc"]
# Delta filter — byte-wise delta with a configurable distance (1..=256).
# Reversible filter (xz/LZMA SDK lineage). Encoder + decoder both implemented.
= ["alloc"]
# ARC "Crunch" (ARC method 8): dynamic LZW (a `compress`-style variant) with
# a one-byte maxbits header and a block-mode CLEAR code. Encoder and decoder
# both implemented and validated by round-trip.
= ["alloc"]
# ARC "Squeeze" (ARC method 4 / CP-M `.SQ`): a `0x90`-flag run-length
# pre-pass followed by static Huffman coding with the tree serialised in the
# stream header. Encoder and decoder both implemented and validated by
# round-trip.
= ["alloc"]
# ARC "Squashed" (ARC method 9 / PKARC-PKPAK): fixed 13-bit LZW with a
# block-mode CLEAR code and no header byte (no RLE pre-pass). Encoder and
# decoder both implemented and validated by round-trip.
= ["alloc"]
# `compcol::tokio_io` — async mirrors of compcol::io for the tokio
# runtime. Pulls the tokio dependency for its AsyncRead/AsyncWrite
# trait definitions; the rest of the crate stays dep-free.
= ["std", "dep:tokio"]
[]
= "forbid"
[]
# Optional, pulled in only by the `tokio` feature. We need the AsyncRead/
# AsyncWrite traits, which live under the default-off `io-util` feature
# in tokio's modular layout.
= { = "1", = false, = true }
[]
# Test-only: the async tests need a runtime + the AsyncReadExt /
# AsyncWriteExt convenience methods that the library itself doesn't
# use. Production consumers can pick their own tokio feature set.
= { = "1", = false, = ["rt", "macros", "io-util"] }
[[]]
= "compcol"
= "src/bin/compcol.rs"
= ["factory"]
[[]]
= "bench"
= "examples/bench.rs"
= ["factory"]