Skip to main content

embeddenator_cli/
lib.rs

1//! Embeddenator CLI - Command-line interface for holographic computing substrate
2//!
3//! This library provides a modular CLI for Embeddenator operations including:
4//! - Ingesting files/directories into engrams
5//! - Extracting files from engrams
6//! - Querying similarity
7//! - Mounting engrams as FUSE filesystems (requires `fuse` feature)
8//! - Incremental update operations
9
10use anyhow::Result;
11use clap::{Parser, Subcommand};
12use std::path::PathBuf;
13
14pub mod commands;
15pub mod utils;
16
17/// Embeddenator CLI main structure
18#[derive(Parser)]
19#[command(name = "embeddenator")]
20#[command(version = env!("CARGO_PKG_VERSION"))]
21#[command(about = "Holographic computing substrate using sparse ternary VSA")]
22#[command(
23    long_about = "Embeddenator - A production-grade holographic computing substrate using Vector Symbolic Architecture (VSA)\n\n\
24    Embeddenator encodes entire filesystems into holographic 'engrams' using sparse ternary vectors,\n\
25    enabling bit-perfect reconstruction and algebraic operations on data.\n\n\
26    Key Features:\n\
27    • 100% bit-perfect reconstruction of all files\n\
28    • Holographic superposition of multiple data sources\n\
29    • Algebraic operations (bundle, bind) on engrams\n\
30    • Hierarchical chunked encoding for TB-scale data\n\
31    • Multi-architecture support (amd64/arm64)\n\n\
32    Examples:\n\
33      embeddenator ingest -i ./mydata -e data.engram -m data.json -v\n\
34      embeddenator extract -e data.engram -m data.json -o ./restored -v\n\
35      embeddenator query -e data.engram -q ./testfile.txt -v"
36)]
37#[command(author = "Tyler Zervas <tz-dev@vectorweight.com>")]
38pub struct Cli {
39    #[command(subcommand)]
40    pub command: Commands,
41}
42
43#[derive(Subcommand)]
44pub enum Commands {
45    /// Ingest files/directories into a holographic engram
46    #[command(
47        long_about = "Ingest files and directories into a holographic engram\n\n\
48        This command recursively processes all files in the input directory, chunks them,\n\
49        and encodes them into a holographic VSA engram. The result is a single .engram file\n\
50        containing the superposition of all data, plus a manifest tracking file metadata.\n\n\
51        The engram uses sparse ternary vectors to create a holographic representation where:\n\
52        • All files are superimposed in a single root vector\n\
53        • Each chunk is bound to a unique position vector\n\
54        • Reconstruction is bit-perfect for all file types\n\n\
55        Example:\n\
56          embeddenator ingest -i ./myproject -e project.engram -m project.json -v\n\
57          embeddenator ingest --input ~/Documents --engram docs.engram --verbose"
58    )]
59    Ingest {
60        /// Input path(s) to ingest (directory or file). Can be provided multiple times.
61        #[arg(
62            short,
63            long,
64            value_name = "PATH",
65            help_heading = "Required",
66            num_args = 1..,
67            action = clap::ArgAction::Append
68        )]
69        input: Vec<PathBuf>,
70
71        /// Output engram file containing holographic encoding
72        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
73        engram: PathBuf,
74
75        /// Output manifest file containing file metadata and chunk mappings
76        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
77        manifest: PathBuf,
78
79        /// Enable verbose output showing ingestion progress and statistics
80        #[arg(short, long)]
81        verbose: bool,
82    },
83
84    /// Extract and reconstruct files from a holographic engram
85    #[command(
86        long_about = "Extract and reconstruct files from a holographic engram\n\n\
87        This command performs bit-perfect reconstruction of all files from an engram.\n\
88        It uses the manifest to locate chunks in the codebook and algebraically unbinds\n\
89        them from the holographic root vector to recover the original data.\n\n\
90        The extraction process:\n\
91        • Loads the engram and manifest files\n\
92        • Reconstructs the directory structure\n\
93        • Unbinds and decodes each chunk using VSA operations\n\
94        • Writes bit-perfect copies of all original files\n\n\
95        Example:\n\
96          embeddenator extract -e project.engram -m project.json -o ./restored -v\n\
97          embeddenator extract --engram backup.engram --output-dir ~/restored"
98    )]
99    Extract {
100        /// Input engram file to extract from
101        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
102        engram: PathBuf,
103
104        /// Input manifest file with metadata and chunk mappings
105        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
106        manifest: PathBuf,
107
108        /// Output directory where files will be reconstructed
109        #[arg(short, long, value_name = "DIR", help_heading = "Required")]
110        output_dir: PathBuf,
111
112        /// Enable verbose output showing extraction progress
113        #[arg(short, long)]
114        verbose: bool,
115    },
116
117    /// Query similarity between a file and engram contents
118    #[command(
119        long_about = "Query cosine similarity between a file and engram contents\n\n\
120        This command computes the similarity between a query file and the data encoded\n\
121        in an engram using VSA cosine similarity. This enables holographic search and\n\
122        content-based retrieval without full extraction.\n\n\
123        Similarity interpretation:\n\
124        • >0.75: Strong match, likely contains similar content\n\
125        • 0.3-0.75: Moderate similarity, some shared patterns\n\
126        • <0.3: Low similarity, likely unrelated content\n\n\
127        Example:\n\
128          embeddenator query -e archive.engram -q search.txt -v\n\
129          embeddenator query --engram data.engram --query pattern.bin"
130    )]
131    Query {
132        /// Engram file to query
133        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
134        engram: PathBuf,
135
136        /// Query file to search for
137        #[arg(short, long, value_name = "FILE", help_heading = "Required")]
138        query: PathBuf,
139
140        /// Optional hierarchical manifest (enables selective unfolding search)
141        #[arg(long, value_name = "FILE")]
142        hierarchical_manifest: Option<PathBuf>,
143
144        /// Directory containing bincode-serialized sub-engrams (used with --hierarchical-manifest)
145        #[arg(long, value_name = "DIR")]
146        sub_engrams_dir: Option<PathBuf>,
147
148        /// Top-k results to print for codebook/hierarchical search
149        #[arg(long, default_value_t = 10, value_name = "K")]
150        k: usize,
151
152        /// Enable verbose output showing similarity scores and details
153        #[arg(short, long)]
154        verbose: bool,
155    },
156
157    /// Query similarity using a literal text string (basic inference-to-vector)
158    #[command(long_about = "Query cosine similarity using a literal text string\n\n\
159        This is a convenience wrapper that encodes the provided text as bytes into a VSA query vector\n\
160        and runs the same retrieval path as `query`.")]
161    QueryText {
162        /// Engram file to query
163        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
164        engram: PathBuf,
165
166        /// Text to encode and search for
167        #[arg(long, value_name = "TEXT", help_heading = "Required")]
168        text: String,
169
170        /// Optional hierarchical manifest (enables selective unfolding search)
171        #[arg(long, value_name = "FILE")]
172        hierarchical_manifest: Option<PathBuf>,
173
174        /// Directory containing bincode-serialized sub-engrams (used with --hierarchical-manifest)
175        #[arg(long, value_name = "DIR")]
176        sub_engrams_dir: Option<PathBuf>,
177
178        /// Top-k results to print for codebook/hierarchical search
179        #[arg(long, default_value_t = 10, value_name = "K")]
180        k: usize,
181
182        /// Enable verbose output showing similarity scores and details
183        #[arg(short, long)]
184        verbose: bool,
185    },
186
187    /// Build hierarchical retrieval artifacts (manifest + sub-engrams store)
188    #[command(
189        long_about = "Build hierarchical retrieval artifacts from an existing engram+manifest\n\n\
190        This command produces a hierarchical manifest JSON and a directory of sub-engrams\n\
191        suitable for store-backed selective unfolding (DirectorySubEngramStore)."
192    )]
193    BundleHier {
194        /// Input engram file
195        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
196        engram: PathBuf,
197
198        /// Input manifest file
199        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
200        manifest: PathBuf,
201
202        /// Output hierarchical manifest JSON
203        #[arg(long, default_value = "hier.json", value_name = "FILE")]
204        out_hierarchical_manifest: PathBuf,
205
206        /// Output directory to write bincode sub-engrams
207        #[arg(long, default_value = "sub_engrams", value_name = "DIR")]
208        out_sub_engrams_dir: PathBuf,
209
210        /// Maximum sparsity per level bundle
211        #[arg(long, default_value_t = 500, value_name = "N")]
212        max_level_sparsity: usize,
213
214        /// Optional cap on chunk IDs per node (enables deterministic sharding when exceeded)
215        #[arg(long, value_name = "N")]
216        max_chunks_per_node: Option<usize>,
217
218        /// Embed sub-engrams in the manifest JSON (in addition to writing the directory)
219        #[arg(long, default_value_t = false)]
220        embed_sub_engrams: bool,
221
222        /// Enable verbose output
223        #[arg(short, long)]
224        verbose: bool,
225    },
226
227    /// Mount an engram as a FUSE filesystem (requires --features fuse)
228    #[cfg(feature = "fuse")]
229    #[command(long_about = "Mount an engram as a FUSE filesystem\n\n\
230        This command mounts an engram at the specified mountpoint, making all files\n\
231        accessible through the standard filesystem interface. Files are decoded\n\
232        on-demand from the holographic representation.\n\n\
233        Requirements:\n\
234        • FUSE kernel module must be loaded (modprobe fuse)\n\
235        • libfuse3-dev installed on the system\n\
236        • Build with: cargo build --features fuse\n\n\
237        To unmount:\n\
238          fusermount -u /path/to/mountpoint\n\n\
239        Example:\n\
240          embeddenator mount -e project.engram -m project.json /mnt/engram\n\
241          embeddenator mount --engram backup.engram --mountpoint ~/mnt --allow-other")]
242    Mount {
243        /// Engram file to mount
244        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
245        engram: PathBuf,
246
247        /// Manifest file with metadata and chunk mappings
248        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
249        manifest: PathBuf,
250
251        /// Mountpoint directory (must exist and be empty)
252        #[arg(value_name = "MOUNTPOINT", help_heading = "Required")]
253        mountpoint: PathBuf,
254
255        /// Allow other users to access the mount
256        #[arg(long)]
257        allow_other: bool,
258
259        /// Run in foreground (don't daemonize)
260        #[arg(short, long)]
261        foreground: bool,
262
263        /// Enable verbose output
264        #[arg(short, long)]
265        verbose: bool,
266    },
267
268    /// Incremental update operations (add/remove/modify files)
269    #[command(long_about = "Perform incremental updates to an existing engram\n\n\
270        This command enables efficient updates to engrams without full re-ingestion.\n\
271        Use subcommands to add, remove, or modify files, or to compact the engram.\n\n\
272        Subcommands:\n\
273        • add     - Add a new file to the engram\n\
274        • remove  - Mark a file as deleted\n\
275        • modify  - Update an existing file\n\
276        • compact - Rebuild engram without deleted files\n\n\
277        Examples:\n\
278          embeddenator update add -e data.engram -m data.json -f new.txt\n\
279          embeddenator update remove -e data.engram -m data.json -p old.txt\n\
280          embeddenator update modify -e data.engram -m data.json -f changed.txt\n\
281          embeddenator update compact -e data.engram -m data.json")]
282    #[command(subcommand)]
283    Update(UpdateCommands),
284}
285
286#[derive(Subcommand)]
287pub enum UpdateCommands {
288    /// Add a new file to an existing engram
289    #[command(
290        long_about = "Add a new file to an existing engram without full re-ingestion\n\n\
291        This operation bundles the new file's chunks with the existing root vector\n\
292        using VSA's associative bundle operation. Much faster than full re-ingestion.\n\n\
293        Example:\n\
294          embeddenator update add -e data.engram -m data.json -f new_file.txt"
295    )]
296    Add {
297        /// Engram file to update
298        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
299        engram: PathBuf,
300
301        /// Manifest file to update
302        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
303        manifest: PathBuf,
304
305        /// File to add to the engram
306        #[arg(short, long, value_name = "FILE", help_heading = "Required")]
307        file: PathBuf,
308
309        /// Logical path in engram (defaults to filename)
310        #[arg(short = 'p', long, value_name = "PATH")]
311        logical_path: Option<String>,
312
313        /// Enable verbose output
314        #[arg(short, long)]
315        verbose: bool,
316    },
317
318    /// Remove a file from the engram (mark as deleted)
319    #[command(long_about = "Mark a file as deleted in the engram manifest\n\n\
320        This operation marks the file as deleted without modifying the root vector,\n\
321        since VSA bundling has no clean inverse. Use 'compact' to truly remove chunks.\n\n\
322        Example:\n\
323          embeddenator update remove -e data.engram -m data.json -p old_file.txt")]
324    Remove {
325        /// Engram file to update
326        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
327        engram: PathBuf,
328
329        /// Manifest file to update
330        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
331        manifest: PathBuf,
332
333        /// Logical path of file to remove
334        #[arg(short = 'p', long, value_name = "PATH", help_heading = "Required")]
335        path: String,
336
337        /// Enable verbose output
338        #[arg(short, long)]
339        verbose: bool,
340    },
341
342    /// Modify an existing file in the engram
343    #[command(long_about = "Update an existing file's content in the engram\n\n\
344        This operation marks the old version as deleted and adds the new version.\n\
345        Use 'compact' periodically to clean up old chunks.\n\n\
346        Example:\n\
347          embeddenator update modify -e data.engram -m data.json -f updated.txt")]
348    Modify {
349        /// Engram file to update
350        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
351        engram: PathBuf,
352
353        /// Manifest file to update
354        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
355        manifest: PathBuf,
356
357        /// File with new content
358        #[arg(short, long, value_name = "FILE", help_heading = "Required")]
359        file: PathBuf,
360
361        /// Logical path in engram (defaults to filename)
362        #[arg(short = 'p', long, value_name = "PATH")]
363        logical_path: Option<String>,
364
365        /// Enable verbose output
366        #[arg(short, long)]
367        verbose: bool,
368    },
369
370    /// Compact engram by rebuilding without deleted files
371    #[command(
372        long_about = "Rebuild engram from scratch, excluding deleted files\n\n\
373        This operation recreates the engram with only active files, reclaiming space\n\
374        from deleted chunks. Expensive but necessary after many updates.\n\n\
375        Example:\n\
376          embeddenator update compact -e data.engram -m data.json -v"
377    )]
378    Compact {
379        /// Engram file to compact
380        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
381        engram: PathBuf,
382
383        /// Manifest file to update
384        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
385        manifest: PathBuf,
386
387        /// Enable verbose output
388        #[arg(short, long)]
389        verbose: bool,
390    },
391}
392
393/// Main entry point for the CLI
394pub fn run() -> Result<()> {
395    let cli = Cli::parse();
396
397    match cli.command {
398        Commands::Ingest {
399            input,
400            engram,
401            manifest,
402            verbose,
403        } => commands::handle_ingest(input, engram, manifest, verbose),
404
405        Commands::Extract {
406            engram,
407            manifest,
408            output_dir,
409            verbose,
410        } => commands::handle_extract(engram, manifest, output_dir, verbose),
411
412        Commands::Query {
413            engram,
414            query,
415            hierarchical_manifest,
416            sub_engrams_dir,
417            k,
418            verbose,
419        } => commands::handle_query(
420            engram,
421            query,
422            hierarchical_manifest,
423            sub_engrams_dir,
424            k,
425            verbose,
426        ),
427
428        Commands::QueryText {
429            engram,
430            text,
431            hierarchical_manifest,
432            sub_engrams_dir,
433            k,
434            verbose,
435        } => commands::handle_query_text(
436            engram,
437            text,
438            hierarchical_manifest,
439            sub_engrams_dir,
440            k,
441            verbose,
442        ),
443
444        Commands::BundleHier {
445            engram,
446            manifest,
447            out_hierarchical_manifest,
448            out_sub_engrams_dir,
449            max_level_sparsity,
450            max_chunks_per_node,
451            embed_sub_engrams,
452            verbose,
453        } => commands::handle_bundle_hier(
454            engram,
455            manifest,
456            out_hierarchical_manifest,
457            out_sub_engrams_dir,
458            max_level_sparsity,
459            max_chunks_per_node,
460            embed_sub_engrams,
461            verbose,
462        ),
463
464        #[cfg(feature = "fuse")]
465        Commands::Mount {
466            engram,
467            manifest,
468            mountpoint,
469            allow_other,
470            foreground,
471            verbose,
472        } => commands::handle_mount(
473            engram,
474            manifest,
475            mountpoint,
476            allow_other,
477            foreground,
478            verbose,
479        ),
480
481        Commands::Update(update_cmd) => match update_cmd {
482            UpdateCommands::Add {
483                engram,
484                manifest,
485                file,
486                logical_path,
487                verbose,
488            } => commands::handle_update_add(engram, manifest, file, logical_path, verbose),
489
490            UpdateCommands::Remove {
491                engram,
492                manifest,
493                path,
494                verbose,
495            } => commands::handle_update_remove(engram, manifest, path, verbose),
496
497            UpdateCommands::Modify {
498                engram,
499                manifest,
500                file,
501                logical_path,
502                verbose,
503            } => commands::handle_update_modify(engram, manifest, file, logical_path, verbose),
504
505            UpdateCommands::Compact {
506                engram,
507                manifest,
508                verbose,
509            } => commands::handle_update_compact(engram, manifest, verbose),
510        },
511    }
512}