1use anyhow::Result;
11use clap::{Parser, Subcommand};
12use std::path::PathBuf;
13
14pub mod commands;
15pub mod utils;
16
17#[derive(Parser)]
19#[command(name = "embeddenator")]
20#[command(version = env!("CARGO_PKG_VERSION"))]
21#[command(about = "Holographic computing substrate using sparse ternary VSA")]
22#[command(
23 long_about = "Embeddenator - A production-grade holographic computing substrate using Vector Symbolic Architecture (VSA)\n\n\
24 Embeddenator encodes entire filesystems into holographic 'engrams' using sparse ternary vectors,\n\
25 enabling bit-perfect reconstruction and algebraic operations on data.\n\n\
26 Key Features:\n\
27 • 100% bit-perfect reconstruction of all files\n\
28 • Holographic superposition of multiple data sources\n\
29 • Algebraic operations (bundle, bind) on engrams\n\
30 • Hierarchical chunked encoding for TB-scale data\n\
31 • Multi-architecture support (amd64/arm64)\n\n\
32 Examples:\n\
33 embeddenator ingest -i ./mydata -e data.engram -m data.json -v\n\
34 embeddenator extract -e data.engram -m data.json -o ./restored -v\n\
35 embeddenator query -e data.engram -q ./testfile.txt -v"
36)]
37#[command(author = "Tyler Zervas <tz-dev@vectorweight.com>")]
38pub struct Cli {
39 #[command(subcommand)]
40 pub command: Commands,
41}
42
43#[derive(Subcommand)]
44pub enum Commands {
45 #[command(
47 long_about = "Ingest files and directories into a holographic engram\n\n\
48 This command recursively processes all files in the input directory, chunks them,\n\
49 and encodes them into a holographic VSA engram. The result is a single .engram file\n\
50 containing the superposition of all data, plus a manifest tracking file metadata.\n\n\
51 The engram uses sparse ternary vectors to create a holographic representation where:\n\
52 • All files are superimposed in a single root vector\n\
53 • Each chunk is bound to a unique position vector\n\
54 • Reconstruction is bit-perfect for all file types\n\n\
55 Example:\n\
56 embeddenator ingest -i ./myproject -e project.engram -m project.json -v\n\
57 embeddenator ingest --input ~/Documents --engram docs.engram --verbose"
58 )]
59 Ingest {
60 #[arg(
62 short,
63 long,
64 value_name = "PATH",
65 help_heading = "Required",
66 num_args = 1..,
67 action = clap::ArgAction::Append
68 )]
69 input: Vec<PathBuf>,
70
71 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
73 engram: PathBuf,
74
75 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
77 manifest: PathBuf,
78
79 #[arg(short, long)]
81 verbose: bool,
82 },
83
84 #[command(
86 long_about = "Extract and reconstruct files from a holographic engram\n\n\
87 This command performs bit-perfect reconstruction of all files from an engram.\n\
88 It uses the manifest to locate chunks in the codebook and algebraically unbinds\n\
89 them from the holographic root vector to recover the original data.\n\n\
90 The extraction process:\n\
91 • Loads the engram and manifest files\n\
92 • Reconstructs the directory structure\n\
93 • Unbinds and decodes each chunk using VSA operations\n\
94 • Writes bit-perfect copies of all original files\n\n\
95 Example:\n\
96 embeddenator extract -e project.engram -m project.json -o ./restored -v\n\
97 embeddenator extract --engram backup.engram --output-dir ~/restored"
98 )]
99 Extract {
100 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
102 engram: PathBuf,
103
104 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
106 manifest: PathBuf,
107
108 #[arg(short, long, value_name = "DIR", help_heading = "Required")]
110 output_dir: PathBuf,
111
112 #[arg(short, long)]
114 verbose: bool,
115 },
116
117 #[command(
119 long_about = "Query cosine similarity between a file and engram contents\n\n\
120 This command computes the similarity between a query file and the data encoded\n\
121 in an engram using VSA cosine similarity. This enables holographic search and\n\
122 content-based retrieval without full extraction.\n\n\
123 Similarity interpretation:\n\
124 • >0.75: Strong match, likely contains similar content\n\
125 • 0.3-0.75: Moderate similarity, some shared patterns\n\
126 • <0.3: Low similarity, likely unrelated content\n\n\
127 Example:\n\
128 embeddenator query -e archive.engram -q search.txt -v\n\
129 embeddenator query --engram data.engram --query pattern.bin"
130 )]
131 Query {
132 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
134 engram: PathBuf,
135
136 #[arg(short, long, value_name = "FILE", help_heading = "Required")]
138 query: PathBuf,
139
140 #[arg(long, value_name = "FILE")]
142 hierarchical_manifest: Option<PathBuf>,
143
144 #[arg(long, value_name = "DIR")]
146 sub_engrams_dir: Option<PathBuf>,
147
148 #[arg(long, default_value_t = 10, value_name = "K")]
150 k: usize,
151
152 #[arg(short, long)]
154 verbose: bool,
155 },
156
157 #[command(
159 long_about = "Query cosine similarity using a literal text string\n\n\
160 This is a convenience wrapper that encodes the provided text as bytes into a VSA query vector\n\
161 and runs the same retrieval path as `query`."
162 )]
163 QueryText {
164 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
166 engram: PathBuf,
167
168 #[arg(long, value_name = "TEXT", help_heading = "Required")]
170 text: String,
171
172 #[arg(long, value_name = "FILE")]
174 hierarchical_manifest: Option<PathBuf>,
175
176 #[arg(long, value_name = "DIR")]
178 sub_engrams_dir: Option<PathBuf>,
179
180 #[arg(long, default_value_t = 10, value_name = "K")]
182 k: usize,
183
184 #[arg(short, long)]
186 verbose: bool,
187 },
188
189 #[command(
191 long_about = "Build hierarchical retrieval artifacts from an existing engram+manifest\n\n\
192 This command produces a hierarchical manifest JSON and a directory of sub-engrams\n\
193 suitable for store-backed selective unfolding (DirectorySubEngramStore)."
194 )]
195 BundleHier {
196 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
198 engram: PathBuf,
199
200 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
202 manifest: PathBuf,
203
204 #[arg(long, default_value = "hier.json", value_name = "FILE")]
206 out_hierarchical_manifest: PathBuf,
207
208 #[arg(long, default_value = "sub_engrams", value_name = "DIR")]
210 out_sub_engrams_dir: PathBuf,
211
212 #[arg(long, default_value_t = 500, value_name = "N")]
214 max_level_sparsity: usize,
215
216 #[arg(long, value_name = "N")]
218 max_chunks_per_node: Option<usize>,
219
220 #[arg(long, default_value_t = false)]
222 embed_sub_engrams: bool,
223
224 #[arg(short, long)]
226 verbose: bool,
227 },
228
229 #[cfg(feature = "fuse")]
231 #[command(
232 long_about = "Mount an engram as a FUSE filesystem\n\n\
233 This command mounts an engram at the specified mountpoint, making all files\n\
234 accessible through the standard filesystem interface. Files are decoded\n\
235 on-demand from the holographic representation.\n\n\
236 Requirements:\n\
237 • FUSE kernel module must be loaded (modprobe fuse)\n\
238 • libfuse3-dev installed on the system\n\
239 • Build with: cargo build --features fuse\n\n\
240 To unmount:\n\
241 fusermount -u /path/to/mountpoint\n\n\
242 Example:\n\
243 embeddenator mount -e project.engram -m project.json /mnt/engram\n\
244 embeddenator mount --engram backup.engram --mountpoint ~/mnt --allow-other"
245 )]
246 Mount {
247 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
249 engram: PathBuf,
250
251 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
253 manifest: PathBuf,
254
255 #[arg(value_name = "MOUNTPOINT", help_heading = "Required")]
257 mountpoint: PathBuf,
258
259 #[arg(long)]
261 allow_other: bool,
262
263 #[arg(short, long)]
265 foreground: bool,
266
267 #[arg(short, long)]
269 verbose: bool,
270 },
271
272 #[command(
274 long_about = "Perform incremental updates to an existing engram\n\n\
275 This command enables efficient updates to engrams without full re-ingestion.\n\
276 Use subcommands to add, remove, or modify files, or to compact the engram.\n\n\
277 Subcommands:\n\
278 • add - Add a new file to the engram\n\
279 • remove - Mark a file as deleted\n\
280 • modify - Update an existing file\n\
281 • compact - Rebuild engram without deleted files\n\n\
282 Examples:\n\
283 embeddenator update add -e data.engram -m data.json -f new.txt\n\
284 embeddenator update remove -e data.engram -m data.json -p old.txt\n\
285 embeddenator update modify -e data.engram -m data.json -f changed.txt\n\
286 embeddenator update compact -e data.engram -m data.json"
287 )]
288 #[command(subcommand)]
289 Update(UpdateCommands),
290}
291
292#[derive(Subcommand)]
293pub enum UpdateCommands {
294 #[command(
296 long_about = "Add a new file to an existing engram without full re-ingestion\n\n\
297 This operation bundles the new file's chunks with the existing root vector\n\
298 using VSA's associative bundle operation. Much faster than full re-ingestion.\n\n\
299 Example:\n\
300 embeddenator update add -e data.engram -m data.json -f new_file.txt"
301 )]
302 Add {
303 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
305 engram: PathBuf,
306
307 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
309 manifest: PathBuf,
310
311 #[arg(short, long, value_name = "FILE", help_heading = "Required")]
313 file: PathBuf,
314
315 #[arg(short = 'p', long, value_name = "PATH")]
317 logical_path: Option<String>,
318
319 #[arg(short, long)]
321 verbose: bool,
322 },
323
324 #[command(
326 long_about = "Mark a file as deleted in the engram manifest\n\n\
327 This operation marks the file as deleted without modifying the root vector,\n\
328 since VSA bundling has no clean inverse. Use 'compact' to truly remove chunks.\n\n\
329 Example:\n\
330 embeddenator update remove -e data.engram -m data.json -p old_file.txt"
331 )]
332 Remove {
333 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
335 engram: PathBuf,
336
337 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
339 manifest: PathBuf,
340
341 #[arg(short = 'p', long, value_name = "PATH", help_heading = "Required")]
343 path: String,
344
345 #[arg(short, long)]
347 verbose: bool,
348 },
349
350 #[command(
352 long_about = "Update an existing file's content in the engram\n\n\
353 This operation marks the old version as deleted and adds the new version.\n\
354 Use 'compact' periodically to clean up old chunks.\n\n\
355 Example:\n\
356 embeddenator update modify -e data.engram -m data.json -f updated.txt"
357 )]
358 Modify {
359 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
361 engram: PathBuf,
362
363 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
365 manifest: PathBuf,
366
367 #[arg(short, long, value_name = "FILE", help_heading = "Required")]
369 file: PathBuf,
370
371 #[arg(short = 'p', long, value_name = "PATH")]
373 logical_path: Option<String>,
374
375 #[arg(short, long)]
377 verbose: bool,
378 },
379
380 #[command(
382 long_about = "Rebuild engram from scratch, excluding deleted files\n\n\
383 This operation recreates the engram with only active files, reclaiming space\n\
384 from deleted chunks. Expensive but necessary after many updates.\n\n\
385 Example:\n\
386 embeddenator update compact -e data.engram -m data.json -v"
387 )]
388 Compact {
389 #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
391 engram: PathBuf,
392
393 #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
395 manifest: PathBuf,
396
397 #[arg(short, long)]
399 verbose: bool,
400 },
401}
402
403pub fn run() -> Result<()> {
405 let cli = Cli::parse();
406
407 match cli.command {
408 Commands::Ingest {
409 input,
410 engram,
411 manifest,
412 verbose,
413 } => commands::handle_ingest(input, engram, manifest, verbose),
414
415 Commands::Extract {
416 engram,
417 manifest,
418 output_dir,
419 verbose,
420 } => commands::handle_extract(engram, manifest, output_dir, verbose),
421
422 Commands::Query {
423 engram,
424 query,
425 hierarchical_manifest,
426 sub_engrams_dir,
427 k,
428 verbose,
429 } => commands::handle_query(
430 engram,
431 query,
432 hierarchical_manifest,
433 sub_engrams_dir,
434 k,
435 verbose,
436 ),
437
438 Commands::QueryText {
439 engram,
440 text,
441 hierarchical_manifest,
442 sub_engrams_dir,
443 k,
444 verbose,
445 } => commands::handle_query_text(
446 engram,
447 text,
448 hierarchical_manifest,
449 sub_engrams_dir,
450 k,
451 verbose,
452 ),
453
454 Commands::BundleHier {
455 engram,
456 manifest,
457 out_hierarchical_manifest,
458 out_sub_engrams_dir,
459 max_level_sparsity,
460 max_chunks_per_node,
461 embed_sub_engrams,
462 verbose,
463 } => commands::handle_bundle_hier(
464 engram,
465 manifest,
466 out_hierarchical_manifest,
467 out_sub_engrams_dir,
468 max_level_sparsity,
469 max_chunks_per_node,
470 embed_sub_engrams,
471 verbose,
472 ),
473
474 #[cfg(feature = "fuse")]
475 Commands::Mount {
476 engram,
477 manifest,
478 mountpoint,
479 allow_other,
480 foreground,
481 verbose,
482 } => commands::handle_mount(
483 engram,
484 manifest,
485 mountpoint,
486 allow_other,
487 foreground,
488 verbose,
489 ),
490
491 Commands::Update(update_cmd) => match update_cmd {
492 UpdateCommands::Add {
493 engram,
494 manifest,
495 file,
496 logical_path,
497 verbose,
498 } => commands::handle_update_add(engram, manifest, file, logical_path, verbose),
499
500 UpdateCommands::Remove {
501 engram,
502 manifest,
503 path,
504 verbose,
505 } => commands::handle_update_remove(engram, manifest, path, verbose),
506
507 UpdateCommands::Modify {
508 engram,
509 manifest,
510 file,
511 logical_path,
512 verbose,
513 } => commands::handle_update_modify(engram, manifest, file, logical_path, verbose),
514
515 UpdateCommands::Compact {
516 engram,
517 manifest,
518 verbose,
519 } => commands::handle_update_compact(engram, manifest, verbose),
520 },
521 }
522}