1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// znippy-cli/src/main.rs
use anyhow::Result;
use clap::{Parser, Subcommand};
use std::path::PathBuf;
use znippy_common::{VerifyReport, list_archive_contents, verify_archive_integrity};
use znippy_common::plugin::PluginRegistry;
use znippy_common::plugins::wasm_loader::WasmPlugin;
use znippy_compress::compress_dir;
use znippy_decompress::{decompress_archive, decompress_archive_filtered};
pub mod handlers;
#[derive(Parser)]
#[command(name = "znippy")]
#[command(about = "Znippy: fast archive format with per-file compression", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// Compress a directory into a .znippy archive
Compress {
#[arg(short, long)]
input: PathBuf,
#[arg(short, long)]
output: PathBuf,
#[arg(long)]
no_skip: bool,
/// Package handler to use: a name or alias (`rust`/`cargo`, `python`, `maven`).
/// One archive carries one package type.
#[arg(long, default_value = "rust")]
format: String,
/// Where the metadata index is written: `arrow-ipc` (default β inline in
/// the .znippy container) or `iceberg` (a real Iceberg table in
/// --warehouse; blobs stay in the .znippy sidecar). The iceberg backend
/// requires the CLI to be built with `--features iceberg`.
#[arg(long, default_value = "arrow-ipc")]
meta_format: String,
/// Warehouse directory for `--meta-format iceberg`. Required for, and
/// only used by, the iceberg backend.
#[arg(long)]
warehouse: Option<PathBuf>,
/// Path to a .wasm plugin for metadata extraction (overrides --format).
#[arg(long)]
plugin: Option<PathBuf>,
/// DenseUnion type_id for the WASM plugin given via --plugin.
#[arg(long, default_value_t = 1)]
plugin_type_id: i8,
},
/// Decompress a .znippy archive
Decompress {
#[arg(short, long)]
input: PathBuf,
#[arg(short, long)]
output: PathBuf,
/// Selective extract: only files of this package type (a handler
/// name/alias, e.g. `maven`/`rust`/`python`). Omit to extract all types.
#[arg(long = "type")]
pkg_type: Option<String>,
/// Selective extract: only files from this repo. Omit to extract all repos.
#[arg(long)]
repo: Option<String>,
},
/// List contents of a .znippy archive
List {
#[arg(short, long)]
input: PathBuf,
},
/// Random-access read of one file by its relative path (O(log n)/O(key) via
/// the lookup sub-index + trie). Writes to --output, or stdout if omitted.
Get {
#[arg(short, long)]
input: PathBuf,
/// Relative path of the file inside the archive.
#[arg(short, long)]
path: String,
/// Destination file. When omitted, the bytes are written to stdout.
#[arg(short, long)]
output: Option<PathBuf>,
},
/// Verify archive integrity (checksum)
Verify {
#[arg(short, long)]
input: PathBuf,
},
/// Seal a dynamic, iceberg-backed archive into a static, immutable native
/// `.znippy` (inline Arrow-IPC sub-indexes + lookup + trie + footer).
///
/// Reads the archive metadata from the skade-iceberg `--warehouse` and
/// writes it as the v0.7 inline container, REUSING the blob bytes already in
/// the `--input` `.znippy` sidecar (no recompress, content-addressed). The
/// sealed artifact opens with the ordinary reader β the warehouse is no
/// longer needed to read it. Requires `--features iceberg`.
Seal {
/// The `.znippy` blob sidecar written when the archive was compressed
/// with `--meta-format iceberg` (pure blobs, no footer).
#[arg(short, long)]
input: PathBuf,
/// The skade-iceberg warehouse holding the archive metadata tables.
#[arg(long)]
warehouse: PathBuf,
/// Iceberg namespace of the archive (its file stem). Defaults to the
/// `--input` file stem, matching how `compress` derives it.
#[arg(long)]
namespace: Option<String>,
/// Destination for the sealed native `.znippy`.
#[arg(short, long)]
output: PathBuf,
},
/// List the available package handlers (the compiled-in register).
Handlers,
/// Run a handler-specific subcommand, e.g. `znippy run rust coords foo.crate`.
Run {
/// Handler name/alias to dispatch to.
format: String,
/// Subcommand advertised by the handler's meta().
cmd: String,
/// Arguments passed to the subcommand.
args: Vec<String>,
},
}
/// Build the metadata-sink factory for `--meta-format` / `--warehouse`.
/// `arrow-ipc` (default) β `None`, so `compress_dir` uses the inline
/// `ArrowIpcSink`. `iceberg` (CLI feature `iceberg`) β a factory that builds an
/// `IcebergSink` over `--warehouse`; the namespace is the archive's file stem.
/// Blobs always stay in the `.znippy` file; only the index location changes.
fn build_meta_sink(
meta_format: &str,
warehouse: Option<PathBuf>,
output: &std::path::Path,
) -> Result<Option<znippy_common::MetaSinkFactory>> {
match meta_format {
"arrow-ipc" => Ok(None),
"iceberg" => {
#[cfg(feature = "iceberg")]
{
let wh = warehouse.ok_or_else(|| {
anyhow::anyhow!("--warehouse <DIR> is required for --meta-format iceberg")
})?;
let namespace = output
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "znippy".to_string());
println!(
"π§ Metadata β Iceberg table (namespace `{namespace}`) in {}",
wh.display()
);
Ok(Some(Box::new(move |_file, _off| {
Box::new(znippy_iceberg::IcebergSink::new(wh, namespace))
as Box<dyn znippy_common::ArchiveMetaSink>
})))
}
#[cfg(not(feature = "iceberg"))]
{
let _ = (warehouse, output);
anyhow::bail!(
"iceberg metadata backend not compiled in; rebuild znippy-cli with `--features iceberg`"
)
}
}
other => anyhow::bail!("unknown --meta-format '{other}' (expected arrow-ipc|iceberg)"),
}
}
pub fn run() -> Result<()> {
env_logger::init();
let cli = Cli::parse();
match cli.command {
Commands::Compress {
input,
output,
no_skip,
format,
meta_format,
warehouse,
plugin,
plugin_type_id,
} => {
let registry = match plugin {
Some(wasm_path) => {
let wp = WasmPlugin::load(&wasm_path.to_string_lossy(), "wasm-plugin", plugin_type_id)?;
PluginRegistry::with_plugin(Box::new(wp))
}
None => {
let handler = handlers::find_handler(&format)?;
println!("π Handler: {} (type_id {})", handler.meta().name, handler.type_id());
PluginRegistry::with_plugin(handler)
}
};
let sink_factory = build_meta_sink(&meta_format, warehouse, &output)?;
let report = compress_dir(&input, &output, no_skip, Some(®istry), None, sink_factory)?;
println!("\nβ
Komprimering klar:");
println!("π Totalt antal filer: {}", report.total_files);
println!("π Totalt antal chunks: {}", report.chunks);
println!("π Totalt antal kataloger: {}", report.total_dirs);
println!("π¦ Filer komprimerade: {}", report.compressed_files);
println!(
"π Filer ej komprimerade: {}",
report.uncompressed_files
);
println!("π₯ Totalt inlΓ€sta bytes: {}", report.total_bytes_in);
println!("π€ Totalt skrivna bytes: {}", report.total_bytes_out);
println!("π Bytes som komprimerades: {}", report.compressed_bytes);
println!(
"π Bytes ej komprimerade: {}",
report.uncompressed_bytes
);
println!(
"π Komprimeringsgrad: {:.2}%",
report.compression_ratio
);
}
Commands::Decompress { input, output, pkg_type, repo } => {
let filter = znippy_common::IndexFilter {
pkg_type: match &pkg_type {
Some(name) => Some(handlers::find_handler(name)?.type_id()),
None => None,
},
repo: repo.clone(),
};
let report: VerifyReport = if filter.is_empty() {
decompress_archive(&input, &output)?
} else {
println!(
"π Selective extract: type={} repo={}",
pkg_type.as_deref().unwrap_or("*"),
repo.as_deref().unwrap_or("*"),
);
decompress_archive_filtered(&input, &output, &filter)?
};
println!("\nβ
Dekomprimering och verifiering klar:");
println!("π Totala filer: {}", report.total_files);
println!("π Verifierade filer: {}", report.verified_files);
println!("π₯ chunks: {}", report.chunks);
println!("β Korrupta filer: {}", report.corrupt_files);
println!("π₯ Totala bytes: {}", report.total_bytes);
println!("π€ Verifierade bytes: {}", report.verified_bytes);
println!("β οΈ Korrupta bytes: {}", report.corrupt_bytes);
}
Commands::List { input } => {
list_archive_contents(&input)?;
}
Commands::Get { input, path, output } => {
let data = znippy_common::get_file(&input, &path)?;
match output {
Some(dest) => {
std::fs::write(&dest, &data)?;
eprintln!("π€ {} ({} bytes) β {}", path, data.len(), dest.display());
}
None => {
use std::io::Write;
std::io::stdout().write_all(&data)?;
}
}
}
Commands::Verify { input } => {
let report: VerifyReport = verify_archive_integrity(&input)?;
println!("\nπ Verifiering klar:");
println!("π Totala filer: {}", report.total_files);
println!("π Verifierade filer: {}", report.verified_files);
println!("β Korrupta filer: {}", report.corrupt_files);
println!("π₯ Totala bytes: {}", report.total_bytes);
println!("π€ Verifierade bytes: {}", report.verified_bytes);
println!("β οΈ Korrupta bytes: {}", report.corrupt_bytes);
}
Commands::Seal { input, warehouse, namespace, output } => {
#[cfg(feature = "iceberg")]
{
let ns = namespace.unwrap_or_else(|| {
input
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "znippy".to_string())
});
println!(
"π§βπ¦ Sealing iceberg archive (namespace `{ns}`) in {} β {}",
warehouse.display(),
output.display()
);
let report = znippy_iceberg::seal(&input, &warehouse, &ns, &output)?;
println!("\nβ
Sealed (static native .znippy):");
println!("π Filer: {}", report.files);
println!("π§± Chunk-rader: {}", report.rows);
println!(
"π€ Blob-bytes Γ₯teranvΓ€nda: {} (ingen omkomprimering)",
report.blob_bytes_copied
);
println!("π¦ Sealad total storlek: {}", report.sealed_total_bytes);
println!(
"π Metadata-svans + footer: {} bytes",
report.sealed_total_bytes - report.blob_bytes_copied
);
}
#[cfg(not(feature = "iceberg"))]
{
let _ = (input, warehouse, namespace, output);
anyhow::bail!(
"iceberg backend not compiled in; rebuild znippy-cli with `--features iceberg`"
);
}
}
Commands::Handlers => {
handlers::print_catalog();
}
Commands::Run { format, cmd, args } => {
let handler = handlers::find_handler(&format)?;
handler.run_command(&cmd, &args)?;
}
}
Ok(())
}