1use anyhow::Result;
11use clap::{Parser, Subcommand};
12use std::path::PathBuf;
13
14pub mod commands;
15pub mod utils;
16
17#[derive(Parser)]
19#[command(name = "embeddenator")]
20#[command(version = env!("CARGO_PKG_VERSION"))]
21#[command(about = "Holographic computing substrate using sparse ternary VSA")]
22#[command(
23 long_about = "Embeddenator - A production-grade holographic computing substrate using Vector Symbolic Architecture (VSA)\n\n\
24 Embeddenator encodes entire filesystems into holographic 'engrams' using sparse ternary vectors,\n\
25 enabling bit-perfect reconstruction and algebraic operations on data.\n\n\
26 Key Features:\n\
27 • 100% bit-perfect reconstruction of all files\n\
28 • Holographic superposition of multiple data sources\n\
29 • Algebraic operations (bundle, bind) on engrams\n\
30 • Hierarchical chunked encoding for TB-scale data\n\
31 • Multi-architecture support (amd64/arm64)\n\n\
32 Examples:\n\
33 embeddenator ingest -i ./mydata -e data.engram -m data.json -v\n\
34 embeddenator extract -e data.engram -m data.json -o ./restored -v\n\
35 embeddenator query -e data.engram -q ./testfile.txt -v"
36)]
37#[command(author = "Tyler Zervas <tz-dev@vectorweight.com>")]
38pub struct Cli {
39 #[command(subcommand)]
40 pub command: Commands,
41}
42
43#[derive(Subcommand)]
44pub enum Commands {
45 #[command(
47 long_about = "Ingest files and directories into a holographic engram\n\n\
48 This command recursively processes all files in the input directory, chunks them,\n\
49 and encodes them into a holographic VSA engram. The result is a single .engram file\n\
50 containing the superposition of all data, plus a manifest tracking file metadata.\n\n\
51 The engram uses sparse ternary vectors to create a holographic representation where:\n\
52 • All files are superimposed in a single root vector\n\
53 • Each chunk is bound to a unique position vector\n\
54 • Reconstruction is bit-perfect for all file types\n\n\
55 Example:\n\
56 embeddenator ingest -i ./myproject -e project.engram -m project.json -v\n\
57 embeddenator ingest --input ~/Documents --engram docs.engram --verbose"
58 )]
59 Ingest {
60 #[arg(
62 short,
63 long,
64 value_name = "PATH",
65 help_heading = "Required",
66 num_args = 1..,
67 action = clap::ArgAction::Append
68 )]
69 input: Vec<PathBuf>,
70
71 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
73 engram: PathBuf,
74
75 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
77 manifest: PathBuf,
78
79 #[arg(short, long)]
81 verbose: bool,
82 },
83
84 #[command(
86 long_about = "Extract and reconstruct files from a holographic engram\n\n\
87 This command performs bit-perfect reconstruction of all files from an engram.\n\
88 It uses the manifest to locate chunks in the codebook and algebraically unbinds\n\
89 them from the holographic root vector to recover the original data.\n\n\
90 The extraction process:\n\
91 • Loads the engram and manifest files\n\
92 • Reconstructs the directory structure\n\
93 • Unbinds and decodes each chunk using VSA operations\n\
94 • Writes bit-perfect copies of all original files\n\n\
95 Example:\n\
96 embeddenator extract -e project.engram -m project.json -o ./restored -v\n\
97 embeddenator extract --engram backup.engram --output-dir ~/restored"
98 )]
99 Extract {
100 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
102 engram: PathBuf,
103
104 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
106 manifest: PathBuf,
107
108 #[arg(short, long, value_name = "DIR", help_heading = "Required")]
110 output_dir: PathBuf,
111
112 #[arg(short, long)]
114 verbose: bool,
115 },
116
117 #[command(
119 long_about = "Query cosine similarity between a file and engram contents\n\n\
120 This command computes the similarity between a query file and the data encoded\n\
121 in an engram using VSA cosine similarity. This enables holographic search and\n\
122 content-based retrieval without full extraction.\n\n\
123 Similarity interpretation:\n\
124 • >0.75: Strong match, likely contains similar content\n\
125 • 0.3-0.75: Moderate similarity, some shared patterns\n\
126 • <0.3: Low similarity, likely unrelated content\n\n\
127 Example:\n\
128 embeddenator query -e archive.engram -q search.txt -v\n\
129 embeddenator query --engram data.engram --query pattern.bin"
130 )]
131 Query {
132 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
134 engram: PathBuf,
135
136 #[arg(short, long, value_name = "FILE", help_heading = "Required")]
138 query: PathBuf,
139
140 #[arg(long, value_name = "FILE")]
142 hierarchical_manifest: Option<PathBuf>,
143
144 #[arg(long, value_name = "DIR")]
146 sub_engrams_dir: Option<PathBuf>,
147
148 #[arg(long, default_value_t = 10, value_name = "K")]
150 k: usize,
151
152 #[arg(short, long)]
154 verbose: bool,
155 },
156
157 #[command(long_about = "Query cosine similarity using a literal text string\n\n\
159 This is a convenience wrapper that encodes the provided text as bytes into a VSA query vector\n\
160 and runs the same retrieval path as `query`.")]
161 QueryText {
162 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
164 engram: PathBuf,
165
166 #[arg(long, value_name = "TEXT", help_heading = "Required")]
168 text: String,
169
170 #[arg(long, value_name = "FILE")]
172 hierarchical_manifest: Option<PathBuf>,
173
174 #[arg(long, value_name = "DIR")]
176 sub_engrams_dir: Option<PathBuf>,
177
178 #[arg(long, default_value_t = 10, value_name = "K")]
180 k: usize,
181
182 #[arg(short, long)]
184 verbose: bool,
185 },
186
187 #[command(
189 long_about = "Build hierarchical retrieval artifacts from an existing engram+manifest\n\n\
190 This command produces a hierarchical manifest JSON and a directory of sub-engrams\n\
191 suitable for store-backed selective unfolding (DirectorySubEngramStore)."
192 )]
193 BundleHier {
194 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
196 engram: PathBuf,
197
198 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
200 manifest: PathBuf,
201
202 #[arg(long, default_value = "hier.json", value_name = "FILE")]
204 out_hierarchical_manifest: PathBuf,
205
206 #[arg(long, default_value = "sub_engrams", value_name = "DIR")]
208 out_sub_engrams_dir: PathBuf,
209
210 #[arg(long, default_value_t = 500, value_name = "N")]
212 max_level_sparsity: usize,
213
214 #[arg(long, value_name = "N")]
216 max_chunks_per_node: Option<usize>,
217
218 #[arg(long, default_value_t = false)]
220 embed_sub_engrams: bool,
221
222 #[arg(short, long)]
224 verbose: bool,
225 },
226
227 #[cfg(feature = "fuse")]
229 #[command(long_about = "Mount an engram as a FUSE filesystem\n\n\
230 This command mounts an engram at the specified mountpoint, making all files\n\
231 accessible through the standard filesystem interface. Files are decoded\n\
232 on-demand from the holographic representation.\n\n\
233 Requirements:\n\
234 • FUSE kernel module must be loaded (modprobe fuse)\n\
235 • libfuse3-dev installed on the system\n\
236 • Build with: cargo build --features fuse\n\n\
237 To unmount:\n\
238 fusermount -u /path/to/mountpoint\n\n\
239 Example:\n\
240 embeddenator mount -e project.engram -m project.json /mnt/engram\n\
241 embeddenator mount --engram backup.engram --mountpoint ~/mnt --allow-other")]
242 Mount {
243 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
245 engram: PathBuf,
246
247 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
249 manifest: PathBuf,
250
251 #[arg(value_name = "MOUNTPOINT", help_heading = "Required")]
253 mountpoint: PathBuf,
254
255 #[arg(long)]
257 allow_other: bool,
258
259 #[arg(short, long)]
261 foreground: bool,
262
263 #[arg(short, long)]
265 verbose: bool,
266 },
267
268 #[command(long_about = "Perform incremental updates to an existing engram\n\n\
270 This command enables efficient updates to engrams without full re-ingestion.\n\
271 Use subcommands to add, remove, or modify files, or to compact the engram.\n\n\
272 Subcommands:\n\
273 • add - Add a new file to the engram\n\
274 • remove - Mark a file as deleted\n\
275 • modify - Update an existing file\n\
276 • compact - Rebuild engram without deleted files\n\n\
277 Examples:\n\
278 embeddenator update add -e data.engram -m data.json -f new.txt\n\
279 embeddenator update remove -e data.engram -m data.json -p old.txt\n\
280 embeddenator update modify -e data.engram -m data.json -f changed.txt\n\
281 embeddenator update compact -e data.engram -m data.json")]
282 #[command(subcommand)]
283 Update(UpdateCommands),
284}
285
286#[derive(Subcommand)]
287pub enum UpdateCommands {
288 #[command(
290 long_about = "Add a new file to an existing engram without full re-ingestion\n\n\
291 This operation bundles the new file's chunks with the existing root vector\n\
292 using VSA's associative bundle operation. Much faster than full re-ingestion.\n\n\
293 Example:\n\
294 embeddenator update add -e data.engram -m data.json -f new_file.txt"
295 )]
296 Add {
297 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
299 engram: PathBuf,
300
301 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
303 manifest: PathBuf,
304
305 #[arg(short, long, value_name = "FILE", help_heading = "Required")]
307 file: PathBuf,
308
309 #[arg(short = 'p', long, value_name = "PATH")]
311 logical_path: Option<String>,
312
313 #[arg(short, long)]
315 verbose: bool,
316 },
317
318 #[command(long_about = "Mark a file as deleted in the engram manifest\n\n\
320 This operation marks the file as deleted without modifying the root vector,\n\
321 since VSA bundling has no clean inverse. Use 'compact' to truly remove chunks.\n\n\
322 Example:\n\
323 embeddenator update remove -e data.engram -m data.json -p old_file.txt")]
324 Remove {
325 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
327 engram: PathBuf,
328
329 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
331 manifest: PathBuf,
332
333 #[arg(short = 'p', long, value_name = "PATH", help_heading = "Required")]
335 path: String,
336
337 #[arg(short, long)]
339 verbose: bool,
340 },
341
342 #[command(long_about = "Update an existing file's content in the engram\n\n\
344 This operation marks the old version as deleted and adds the new version.\n\
345 Use 'compact' periodically to clean up old chunks.\n\n\
346 Example:\n\
347 embeddenator update modify -e data.engram -m data.json -f updated.txt")]
348 Modify {
349 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
351 engram: PathBuf,
352
353 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
355 manifest: PathBuf,
356
357 #[arg(short, long, value_name = "FILE", help_heading = "Required")]
359 file: PathBuf,
360
361 #[arg(short = 'p', long, value_name = "PATH")]
363 logical_path: Option<String>,
364
365 #[arg(short, long)]
367 verbose: bool,
368 },
369
370 #[command(
372 long_about = "Rebuild engram from scratch, excluding deleted files\n\n\
373 This operation recreates the engram with only active files, reclaiming space\n\
374 from deleted chunks. Expensive but necessary after many updates.\n\n\
375 Example:\n\
376 embeddenator update compact -e data.engram -m data.json -v"
377 )]
378 Compact {
379 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
381 engram: PathBuf,
382
383 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
385 manifest: PathBuf,
386
387 #[arg(short, long)]
389 verbose: bool,
390 },
391}
392
393pub fn run() -> Result<()> {
395 let cli = Cli::parse();
396
397 match cli.command {
398 Commands::Ingest {
399 input,
400 engram,
401 manifest,
402 verbose,
403 } => commands::handle_ingest(input, engram, manifest, verbose),
404
405 Commands::Extract {
406 engram,
407 manifest,
408 output_dir,
409 verbose,
410 } => commands::handle_extract(engram, manifest, output_dir, verbose),
411
412 Commands::Query {
413 engram,
414 query,
415 hierarchical_manifest,
416 sub_engrams_dir,
417 k,
418 verbose,
419 } => commands::handle_query(
420 engram,
421 query,
422 hierarchical_manifest,
423 sub_engrams_dir,
424 k,
425 verbose,
426 ),
427
428 Commands::QueryText {
429 engram,
430 text,
431 hierarchical_manifest,
432 sub_engrams_dir,
433 k,
434 verbose,
435 } => commands::handle_query_text(
436 engram,
437 text,
438 hierarchical_manifest,
439 sub_engrams_dir,
440 k,
441 verbose,
442 ),
443
444 Commands::BundleHier {
445 engram,
446 manifest,
447 out_hierarchical_manifest,
448 out_sub_engrams_dir,
449 max_level_sparsity,
450 max_chunks_per_node,
451 embed_sub_engrams,
452 verbose,
453 } => commands::handle_bundle_hier(
454 engram,
455 manifest,
456 out_hierarchical_manifest,
457 out_sub_engrams_dir,
458 max_level_sparsity,
459 max_chunks_per_node,
460 embed_sub_engrams,
461 verbose,
462 ),
463
464 #[cfg(feature = "fuse")]
465 Commands::Mount {
466 engram,
467 manifest,
468 mountpoint,
469 allow_other,
470 foreground,
471 verbose,
472 } => commands::handle_mount(
473 engram,
474 manifest,
475 mountpoint,
476 allow_other,
477 foreground,
478 verbose,
479 ),
480
481 Commands::Update(update_cmd) => match update_cmd {
482 UpdateCommands::Add {
483 engram,
484 manifest,
485 file,
486 logical_path,
487 verbose,
488 } => commands::handle_update_add(engram, manifest, file, logical_path, verbose),
489
490 UpdateCommands::Remove {
491 engram,
492 manifest,
493 path,
494 verbose,
495 } => commands::handle_update_remove(engram, manifest, path, verbose),
496
497 UpdateCommands::Modify {
498 engram,
499 manifest,
500 file,
501 logical_path,
502 verbose,
503 } => commands::handle_update_modify(engram, manifest, file, logical_path, verbose),
504
505 UpdateCommands::Compact {
506 engram,
507 manifest,
508 verbose,
509 } => commands::handle_update_compact(engram, manifest, verbose),
510 },
511 }
512}