Skip to main content

embeddenator_cli/commands/
ingest.rs

1//! Ingest command implementation
2
3use anyhow::Result;
4use embeddenator_fs::embrfs::EmbrFS;
5use embeddenator_vsa::ReversibleVSAConfig;
6use std::collections::HashMap;
7use std::env;
8use std::path::PathBuf;
9
10use crate::utils::logical_path_for_file_input;
11
12pub fn handle_ingest(
13    input: Vec<PathBuf>,
14    engram: PathBuf,
15    manifest: PathBuf,
16    verbose: bool,
17) -> Result<()> {
18    if verbose {
19        println!(
20            "Embeddenator v{} - Holographic Ingestion",
21            env!("CARGO_PKG_VERSION")
22        );
23        println!("=====================================");
24    }
25
26    // Use holographic mode for ~94% encoding accuracy and <10% storage overhead
27    // (vs legacy mode's ~10% accuracy and 200%+ overhead)
28    let mut fs = EmbrFS::new_holographic();
29    let config = ReversibleVSAConfig::default();
30
31    // Backward-compatible behavior: a single directory input ingests with paths
32    // relative to that directory (no namespacing).
33    if input.len() == 1 && input[0].is_dir() {
34        fs.ingest_directory(&input[0], verbose, &config)?;
35    } else {
36        let cwd = env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
37
38        // Ensure deterministic and collision-resistant namespacing for multiple directory roots.
39        let mut dir_prefix_counts: HashMap<String, usize> = HashMap::new();
40
41        for p in &input {
42            if !p.exists() {
43                anyhow::bail!("Input path does not exist: {}", p.display());
44            }
45
46            if p.is_dir() {
47                let base = p
48                    .file_name()
49                    .and_then(|s| s.to_str())
50                    .filter(|s| !s.is_empty())
51                    .unwrap_or("input")
52                    .to_string();
53                let count = dir_prefix_counts.entry(base.clone()).or_insert(0);
54                *count += 1;
55                let prefix = if *count == 1 {
56                    base
57                } else {
58                    format!("{}_{}", base, count)
59                };
60
61                fs.ingest_directory_with_prefix(p, Some(&prefix), verbose, &config)?;
62            } else {
63                let logical = logical_path_for_file_input(p, &cwd);
64                fs.ingest_file(p, logical, verbose, &config)?;
65            }
66        }
67    }
68
69    fs.save_engram(&engram)?;
70    fs.save_manifest(&manifest)?;
71
72    if verbose {
73        let stats = fs.correction_stats();
74        println!("\nIngestion complete!");
75        println!("  Engram: {}", engram.display());
76        println!("  Manifest: {}", manifest.display());
77        println!("  Files: {}", fs.manifest.files.len());
78        println!("  Total chunks: {}", fs.manifest.total_chunks);
79        println!(
80            "  Encoding: {}",
81            if fs.is_holographic() {
82                "holographic (~94% accuracy)"
83            } else {
84                "legacy (~10% accuracy)"
85            }
86        );
87        println!(
88            "  Perfect chunks: {}/{} ({:.1}%)",
89            stats.perfect_chunks,
90            stats.total_chunks,
91            stats.perfect_ratio * 100.0
92        );
93        println!(
94            "  Correction overhead: {:.2}%",
95            stats.correction_ratio * 100.0
96        );
97    }
98
99    Ok(())
100}