embeddenator_cli/
lib.rs

1//! Embeddenator CLI - Command-line interface for holographic computing substrate
2//!
3//! This library provides a modular CLI for Embeddenator operations including:
4//! - Ingesting files/directories into engrams
5//! - Extracting files from engrams
6//! - Querying similarity
7//! - Mounting engrams as FUSE filesystems (requires `fuse` feature)
8//! - Incremental update operations
9
10use anyhow::Result;
11use clap::{Parser, Subcommand};
12use std::path::PathBuf;
13
14pub mod commands;
15pub mod utils;
16
17/// Embeddenator CLI main structure
18#[derive(Parser)]
19#[command(name = "embeddenator")]
20#[command(version = env!("CARGO_PKG_VERSION"))]
21#[command(about = "Holographic computing substrate using sparse ternary VSA")]
22#[command(
23    long_about = "Embeddenator - A production-grade holographic computing substrate using Vector Symbolic Architecture (VSA)\n\n\
24    Embeddenator encodes entire filesystems into holographic 'engrams' using sparse ternary vectors,\n\
25    enabling bit-perfect reconstruction and algebraic operations on data.\n\n\
26    Key Features:\n\
27    • 100% bit-perfect reconstruction of all files\n\
28    • Holographic superposition of multiple data sources\n\
29    • Algebraic operations (bundle, bind) on engrams\n\
30    • Hierarchical chunked encoding for TB-scale data\n\
31    • Multi-architecture support (amd64/arm64)\n\n\
32    Examples:\n\
33      embeddenator ingest -i ./mydata -e data.engram -m data.json -v\n\
34      embeddenator extract -e data.engram -m data.json -o ./restored -v\n\
35      embeddenator query -e data.engram -q ./testfile.txt -v"
36)]
37#[command(author = "Tyler Zervas <tz-dev@vectorweight.com>")]
38pub struct Cli {
39    #[command(subcommand)]
40    pub command: Commands,
41}
42
43#[derive(Subcommand)]
44pub enum Commands {
45    /// Ingest files/directories into a holographic engram
46    #[command(
47        long_about = "Ingest files and directories into a holographic engram\n\n\
48        This command recursively processes all files in the input directory, chunks them,\n\
49        and encodes them into a holographic VSA engram. The result is a single .engram file\n\
50        containing the superposition of all data, plus a manifest tracking file metadata.\n\n\
51        The engram uses sparse ternary vectors to create a holographic representation where:\n\
52        • All files are superimposed in a single root vector\n\
53        • Each chunk is bound to a unique position vector\n\
54        • Reconstruction is bit-perfect for all file types\n\n\
55        Example:\n\
56          embeddenator ingest -i ./myproject -e project.engram -m project.json -v\n\
57          embeddenator ingest --input ~/Documents --engram docs.engram --verbose"
58    )]
59    Ingest {
60        /// Input path(s) to ingest (directory or file). Can be provided multiple times.
61        #[arg(
62            short,
63            long,
64            value_name = "PATH",
65            help_heading = "Required",
66            num_args = 1..,
67            action = clap::ArgAction::Append
68        )]
69        input: Vec<PathBuf>,
70
71        /// Output engram file containing holographic encoding
72        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
73        engram: PathBuf,
74
75        /// Output manifest file containing file metadata and chunk mappings
76        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
77        manifest: PathBuf,
78
79        /// Enable verbose output showing ingestion progress and statistics
80        #[arg(short, long)]
81        verbose: bool,
82    },
83
84    /// Extract and reconstruct files from a holographic engram
85    #[command(
86        long_about = "Extract and reconstruct files from a holographic engram\n\n\
87        This command performs bit-perfect reconstruction of all files from an engram.\n\
88        It uses the manifest to locate chunks in the codebook and algebraically unbinds\n\
89        them from the holographic root vector to recover the original data.\n\n\
90        The extraction process:\n\
91        • Loads the engram and manifest files\n\
92        • Reconstructs the directory structure\n\
93        • Unbinds and decodes each chunk using VSA operations\n\
94        • Writes bit-perfect copies of all original files\n\n\
95        Example:\n\
96          embeddenator extract -e project.engram -m project.json -o ./restored -v\n\
97          embeddenator extract --engram backup.engram --output-dir ~/restored"
98    )]
99    Extract {
100        /// Input engram file to extract from
101        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
102        engram: PathBuf,
103
104        /// Input manifest file with metadata and chunk mappings
105        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
106        manifest: PathBuf,
107
108        /// Output directory where files will be reconstructed
109        #[arg(short, long, value_name = "DIR", help_heading = "Required")]
110        output_dir: PathBuf,
111
112        /// Enable verbose output showing extraction progress
113        #[arg(short, long)]
114        verbose: bool,
115    },
116
117    /// Query similarity between a file and engram contents
118    #[command(
119        long_about = "Query cosine similarity between a file and engram contents\n\n\
120        This command computes the similarity between a query file and the data encoded\n\
121        in an engram using VSA cosine similarity. This enables holographic search and\n\
122        content-based retrieval without full extraction.\n\n\
123        Similarity interpretation:\n\
124        • >0.75: Strong match, likely contains similar content\n\
125        • 0.3-0.75: Moderate similarity, some shared patterns\n\
126        • <0.3: Low similarity, likely unrelated content\n\n\
127        Example:\n\
128          embeddenator query -e archive.engram -q search.txt -v\n\
129          embeddenator query --engram data.engram --query pattern.bin"
130    )]
131    Query {
132        /// Engram file to query
133        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
134        engram: PathBuf,
135
136        /// Query file to search for
137        #[arg(short, long, value_name = "FILE", help_heading = "Required")]
138        query: PathBuf,
139
140        /// Optional hierarchical manifest (enables selective unfolding search)
141        #[arg(long, value_name = "FILE")]
142        hierarchical_manifest: Option<PathBuf>,
143
144        /// Directory containing bincode-serialized sub-engrams (used with --hierarchical-manifest)
145        #[arg(long, value_name = "DIR")]
146        sub_engrams_dir: Option<PathBuf>,
147
148        /// Top-k results to print for codebook/hierarchical search
149        #[arg(long, default_value_t = 10, value_name = "K")]
150        k: usize,
151
152        /// Enable verbose output showing similarity scores and details
153        #[arg(short, long)]
154        verbose: bool,
155    },
156
157    /// Query similarity using a literal text string (basic inference-to-vector)
158    #[command(
159        long_about = "Query cosine similarity using a literal text string\n\n\
160        This is a convenience wrapper that encodes the provided text as bytes into a VSA query vector\n\
161        and runs the same retrieval path as `query`."
162    )]
163    QueryText {
164        /// Engram file to query
165        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
166        engram: PathBuf,
167
168        /// Text to encode and search for
169        #[arg(long, value_name = "TEXT", help_heading = "Required")]
170        text: String,
171
172        /// Optional hierarchical manifest (enables selective unfolding search)
173        #[arg(long, value_name = "FILE")]
174        hierarchical_manifest: Option<PathBuf>,
175
176        /// Directory containing bincode-serialized sub-engrams (used with --hierarchical-manifest)
177        #[arg(long, value_name = "DIR")]
178        sub_engrams_dir: Option<PathBuf>,
179
180        /// Top-k results to print for codebook/hierarchical search
181        #[arg(long, default_value_t = 10, value_name = "K")]
182        k: usize,
183
184        /// Enable verbose output showing similarity scores and details
185        #[arg(short, long)]
186        verbose: bool,
187    },
188
189    /// Build hierarchical retrieval artifacts (manifest + sub-engrams store)
190    #[command(
191        long_about = "Build hierarchical retrieval artifacts from an existing engram+manifest\n\n\
192        This command produces a hierarchical manifest JSON and a directory of sub-engrams\n\
193        suitable for store-backed selective unfolding (DirectorySubEngramStore)."
194    )]
195    BundleHier {
196        /// Input engram file
197        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
198        engram: PathBuf,
199
200        /// Input manifest file
201        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
202        manifest: PathBuf,
203
204        /// Output hierarchical manifest JSON
205        #[arg(long, default_value = "hier.json", value_name = "FILE")]
206        out_hierarchical_manifest: PathBuf,
207
208        /// Output directory to write bincode sub-engrams
209        #[arg(long, default_value = "sub_engrams", value_name = "DIR")]
210        out_sub_engrams_dir: PathBuf,
211
212        /// Maximum sparsity per level bundle
213        #[arg(long, default_value_t = 500, value_name = "N")]
214        max_level_sparsity: usize,
215
216        /// Optional cap on chunk IDs per node (enables deterministic sharding when exceeded)
217        #[arg(long, value_name = "N")]
218        max_chunks_per_node: Option<usize>,
219
220        /// Embed sub-engrams in the manifest JSON (in addition to writing the directory)
221        #[arg(long, default_value_t = false)]
222        embed_sub_engrams: bool,
223
224        /// Enable verbose output
225        #[arg(short, long)]
226        verbose: bool,
227    },
228
229    /// Mount an engram as a FUSE filesystem (requires --features fuse)
230    #[cfg(feature = "fuse")]
231    #[command(
232        long_about = "Mount an engram as a FUSE filesystem\n\n\
233        This command mounts an engram at the specified mountpoint, making all files\n\
234        accessible through the standard filesystem interface. Files are decoded\n\
235        on-demand from the holographic representation.\n\n\
236        Requirements:\n\
237        • FUSE kernel module must be loaded (modprobe fuse)\n\
238        • libfuse3-dev installed on the system\n\
239        • Build with: cargo build --features fuse\n\n\
240        To unmount:\n\
241          fusermount -u /path/to/mountpoint\n\n\
242        Example:\n\
243          embeddenator mount -e project.engram -m project.json /mnt/engram\n\
244          embeddenator mount --engram backup.engram --mountpoint ~/mnt --allow-other"
245    )]
246    Mount {
247        /// Engram file to mount
248        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
249        engram: PathBuf,
250
251        /// Manifest file with metadata and chunk mappings
252        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
253        manifest: PathBuf,
254
255        /// Mountpoint directory (must exist and be empty)
256        #[arg(value_name = "MOUNTPOINT", help_heading = "Required")]
257        mountpoint: PathBuf,
258
259        /// Allow other users to access the mount
260        #[arg(long)]
261        allow_other: bool,
262
263        /// Run in foreground (don't daemonize)
264        #[arg(short, long)]
265        foreground: bool,
266
267        /// Enable verbose output
268        #[arg(short, long)]
269        verbose: bool,
270    },
271
272    /// Incremental update operations (add/remove/modify files)
273    #[command(
274        long_about = "Perform incremental updates to an existing engram\n\n\
275        This command enables efficient updates to engrams without full re-ingestion.\n\
276        Use subcommands to add, remove, or modify files, or to compact the engram.\n\n\
277        Subcommands:\n\
278        • add     - Add a new file to the engram\n\
279        • remove  - Mark a file as deleted\n\
280        • modify  - Update an existing file\n\
281        • compact - Rebuild engram without deleted files\n\n\
282        Examples:\n\
283          embeddenator update add -e data.engram -m data.json -f new.txt\n\
284          embeddenator update remove -e data.engram -m data.json -p old.txt\n\
285          embeddenator update modify -e data.engram -m data.json -f changed.txt\n\
286          embeddenator update compact -e data.engram -m data.json"
287    )]
288    #[command(subcommand)]
289    Update(UpdateCommands),
290}
291
292#[derive(Subcommand)]
293pub enum UpdateCommands {
294    /// Add a new file to an existing engram
295    #[command(
296        long_about = "Add a new file to an existing engram without full re-ingestion\n\n\
297        This operation bundles the new file's chunks with the existing root vector\n\
298        using VSA's associative bundle operation. Much faster than full re-ingestion.\n\n\
299        Example:\n\
300          embeddenator update add -e data.engram -m data.json -f new_file.txt"
301    )]
302    Add {
303        /// Engram file to update
304        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
305        engram: PathBuf,
306
307        /// Manifest file to update
308        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
309        manifest: PathBuf,
310
311        /// File to add to the engram
312        #[arg(short, long, value_name = "FILE", help_heading = "Required")]
313        file: PathBuf,
314
315        /// Logical path in engram (defaults to filename)
316        #[arg(short = 'p', long, value_name = "PATH")]
317        logical_path: Option<String>,
318
319        /// Enable verbose output
320        #[arg(short, long)]
321        verbose: bool,
322    },
323
324    /// Remove a file from the engram (mark as deleted)
325    #[command(
326        long_about = "Mark a file as deleted in the engram manifest\n\n\
327        This operation marks the file as deleted without modifying the root vector,\n\
328        since VSA bundling has no clean inverse. Use 'compact' to truly remove chunks.\n\n\
329        Example:\n\
330          embeddenator update remove -e data.engram -m data.json -p old_file.txt"
331    )]
332    Remove {
333        /// Engram file to update
334        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
335        engram: PathBuf,
336
337        /// Manifest file to update
338        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
339        manifest: PathBuf,
340
341        /// Logical path of file to remove
342        #[arg(short = 'p', long, value_name = "PATH", help_heading = "Required")]
343        path: String,
344
345        /// Enable verbose output
346        #[arg(short, long)]
347        verbose: bool,
348    },
349
350    /// Modify an existing file in the engram
351    #[command(
352        long_about = "Update an existing file's content in the engram\n\n\
353        This operation marks the old version as deleted and adds the new version.\n\
354        Use 'compact' periodically to clean up old chunks.\n\n\
355        Example:\n\
356          embeddenator update modify -e data.engram -m data.json -f updated.txt"
357    )]
358    Modify {
359        /// Engram file to update
360        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
361        engram: PathBuf,
362
363        /// Manifest file to update
364        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
365        manifest: PathBuf,
366
367        /// File with new content
368        #[arg(short, long, value_name = "FILE", help_heading = "Required")]
369        file: PathBuf,
370
371        /// Logical path in engram (defaults to filename)
372        #[arg(short = 'p', long, value_name = "PATH")]
373        logical_path: Option<String>,
374
375        /// Enable verbose output
376        #[arg(short, long)]
377        verbose: bool,
378    },
379
380    /// Compact engram by rebuilding without deleted files
381    #[command(
382        long_about = "Rebuild engram from scratch, excluding deleted files\n\n\
383        This operation recreates the engram with only active files, reclaiming space\n\
384        from deleted chunks. Expensive but necessary after many updates.\n\n\
385        Example:\n\
386          embeddenator update compact -e data.engram -m data.json -v"
387    )]
388    Compact {
389        /// Engram file to compact
390        #[arg(short, long, default_value = "root.engram", value_name = "FILE")]
391        engram: PathBuf,
392
393        /// Manifest file to update
394        #[arg(short, long, default_value = "manifest.json", value_name = "FILE")]
395        manifest: PathBuf,
396
397        /// Enable verbose output
398        #[arg(short, long)]
399        verbose: bool,
400    },
401}
402
403/// Main entry point for the CLI
404pub fn run() -> Result<()> {
405    let cli = Cli::parse();
406
407    match cli.command {
408        Commands::Ingest {
409            input,
410            engram,
411            manifest,
412            verbose,
413        } => commands::handle_ingest(input, engram, manifest, verbose),
414
415        Commands::Extract {
416            engram,
417            manifest,
418            output_dir,
419            verbose,
420        } => commands::handle_extract(engram, manifest, output_dir, verbose),
421
422        Commands::Query {
423            engram,
424            query,
425            hierarchical_manifest,
426            sub_engrams_dir,
427            k,
428            verbose,
429        } => commands::handle_query(
430            engram,
431            query,
432            hierarchical_manifest,
433            sub_engrams_dir,
434            k,
435            verbose,
436        ),
437
438        Commands::QueryText {
439            engram,
440            text,
441            hierarchical_manifest,
442            sub_engrams_dir,
443            k,
444            verbose,
445        } => commands::handle_query_text(
446            engram,
447            text,
448            hierarchical_manifest,
449            sub_engrams_dir,
450            k,
451            verbose,
452        ),
453
454        Commands::BundleHier {
455            engram,
456            manifest,
457            out_hierarchical_manifest,
458            out_sub_engrams_dir,
459            max_level_sparsity,
460            max_chunks_per_node,
461            embed_sub_engrams,
462            verbose,
463        } => commands::handle_bundle_hier(
464            engram,
465            manifest,
466            out_hierarchical_manifest,
467            out_sub_engrams_dir,
468            max_level_sparsity,
469            max_chunks_per_node,
470            embed_sub_engrams,
471            verbose,
472        ),
473
474        #[cfg(feature = "fuse")]
475        Commands::Mount {
476            engram,
477            manifest,
478            mountpoint,
479            allow_other,
480            foreground,
481            verbose,
482        } => commands::handle_mount(
483            engram,
484            manifest,
485            mountpoint,
486            allow_other,
487            foreground,
488            verbose,
489        ),
490
491        Commands::Update(update_cmd) => match update_cmd {
492            UpdateCommands::Add {
493                engram,
494                manifest,
495                file,
496                logical_path,
497                verbose,
498            } => commands::handle_update_add(engram, manifest, file, logical_path, verbose),
499
500            UpdateCommands::Remove {
501                engram,
502                manifest,
503                path,
504                verbose,
505            } => commands::handle_update_remove(engram, manifest, path, verbose),
506
507            UpdateCommands::Modify {
508                engram,
509                manifest,
510                file,
511                logical_path,
512                verbose,
513            } => commands::handle_update_modify(engram, manifest, file, logical_path, verbose),
514
515            UpdateCommands::Compact {
516                engram,
517                manifest,
518                verbose,
519            } => commands::handle_update_compact(engram, manifest, verbose),
520        },
521    }
522}