Skip to main content

memvid_cli/commands/
enrich.rs

1//! Enrichment command handler for extracting memory cards from frames.
2//!
3//! The `enrich` command runs enrichment engines over MV2 frames to extract
4//! structured memory cards (facts, preferences, events, etc.).
5
6use std::path::PathBuf;
7
8#[cfg(feature = "llama-cpp")]
9use anyhow::bail;
10use anyhow::Result;
11use clap::{Args, ValueEnum};
12use memvid_core::{EnrichmentEngine, Memvid, RulesEngine};
13use serde::Serialize;
14
15#[cfg(feature = "llama-cpp")]
16use crate::commands::{default_enrichment_model, get_installed_model_path, LlmModel};
17use crate::config::CliConfig;
18#[cfg(feature = "candle-llm")]
19use crate::enrich::CandlePhiEngine;
20#[cfg(feature = "llama-cpp")]
21use crate::enrich::LlmEngine;
22use crate::enrich::{
23    ClaudeEngine, GeminiEngine, GroqEngine, MistralEngine, OpenAiEngine, XaiEngine,
24};
25
26/// Engine type for enrichment
27#[derive(Debug, Clone, Copy, ValueEnum, Default)]
28pub enum EnrichEngine {
29    /// Rules-based extraction using regex patterns (fast, no models)
30    #[default]
31    Rules,
32    /// LLM-based extraction with Phi-3.5 Mini via llama.cpp (requires model installation)
33    #[cfg(feature = "llama-cpp")]
34    Llm,
35    /// Candle-based Phi-3 extraction (downloads from Hugging Face)
36    #[cfg(feature = "candle-llm")]
37    Candle,
38    /// OpenAI API-based extraction with GPT-4o-mini (requires OPENAI_API_KEY)
39    Openai,
40    /// Claude (Anthropic) API-based extraction with Claude 3.5 Haiku (requires ANTHROPIC_API_KEY)
41    Claude,
42    /// Gemini (Google) API-based extraction with Gemini 2.0 Flash (requires GOOGLE_API_KEY or GEMINI_API_KEY)
43    Gemini,
44    /// xAI API-based extraction with Grok-2 (requires XAI_API_KEY)
45    Xai,
46    /// Groq API-based extraction with Llama 3.3 70B (requires GROQ_API_KEY)
47    Groq,
48    /// Mistral API-based extraction with Mistral Large (requires MISTRAL_API_KEY)
49    Mistral,
50}
51
52/// Arguments for the `enrich` subcommand
53#[derive(Args)]
54pub struct EnrichArgs {
55    /// Path to the `.mv2` file
56    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
57    pub file: PathBuf,
58
59    /// Enrichment engine to use
60    #[arg(long, value_enum, default_value_t = EnrichEngine::Rules)]
61    pub engine: EnrichEngine,
62
63    /// Only process frames that haven't been enriched yet (default)
64    #[arg(long, default_value_t = true)]
65    pub incremental: bool,
66
67    /// Re-enrich all frames, ignoring previous enrichment records
68    #[arg(long, conflicts_with = "incremental")]
69    pub force: bool,
70
71    /// Output results as JSON
72    #[arg(long)]
73    pub json: bool,
74
75    /// Show extracted memory cards
76    #[arg(long)]
77    pub verbose: bool,
78
79    /// Number of parallel workers for API calls (default: 20)
80    #[arg(long, default_value_t = 20)]
81    pub workers: usize,
82
83    /// Number of frames to batch per API call (default: 10)
84    #[arg(long, default_value_t = 10)]
85    pub batch_size: usize,
86}
87
88/// Result of enrichment for JSON output
89#[derive(Debug, Serialize)]
90pub struct EnrichResult {
91    pub engine: String,
92    pub version: String,
93    pub frames_processed: usize,
94    pub cards_extracted: usize,
95    pub total_cards: usize,
96    pub total_entities: usize,
97}
98
99/// Handle the `enrich` command
100#[allow(unused_variables)]
101pub fn handle_enrich(config: &CliConfig, args: EnrichArgs) -> Result<()> {
102    let mut mem = Memvid::open(&args.file)?;
103
104    // Get initial stats
105    let initial_stats = mem.memories_stats();
106
107    // If force mode, clear existing memories first
108    if args.force {
109        mem.clear_memories();
110    }
111
112    // Run the selected engine
113    let (engine_kind, engine_version, frames, cards) = match args.engine {
114        EnrichEngine::Rules => {
115            let engine = RulesEngine::new();
116            let kind = engine.kind().to_string();
117            let version = engine.version().to_string();
118            let (frames, cards) = mem.run_enrichment(&engine)?;
119            (kind, version, frames, cards)
120        }
121        #[cfg(feature = "llama-cpp")]
122        EnrichEngine::Llm => {
123            // Check if model is installed
124            let model = default_enrichment_model();
125            let model_path = match get_installed_model_path(config, model) {
126                Some(path) => path,
127                None => {
128                    bail!(
129                        "LLM model not installed. Run `memvid models install {}` first.",
130                        match model {
131                            LlmModel::Phi35Mini => "phi-3.5-mini",
132                            LlmModel::Phi35MiniQ8 => "phi-3.5-mini-q8",
133                        }
134                    );
135                }
136            };
137
138            // Create and initialize the LLM engine
139            let mut engine = LlmEngine::new(model_path);
140            eprintln!("Loading LLM model...");
141            engine.init()?;
142
143            let kind = engine.kind().to_string();
144            let version = engine.version().to_string();
145            let (frames, cards) = mem.run_enrichment(&engine)?;
146            (kind, version, frames, cards)
147        }
148        #[cfg(feature = "candle-llm")]
149        EnrichEngine::Candle => {
150            // Create and initialize the Candle Phi-3 engine
151            // Uses Q4 quantized GGUF (~2.4GB) stored in ~/.memvid/models/llm/phi-3-mini-q4/
152            eprintln!("Loading Phi-3-mini Q4 model via Candle (first run downloads ~2.4GB to ~/.memvid/models/llm/)...");
153            let mut engine = CandlePhiEngine::from_memvid_models(config.models_dir.clone());
154            engine.init()?;
155
156            let kind = engine.kind().to_string();
157            let version = engine.version().to_string();
158            let (frames, cards) = mem.run_enrichment(&engine)?;
159            (kind, version, frames, cards)
160        }
161        EnrichEngine::Openai => {
162            // Create and initialize the OpenAI engine with parallel batch support
163            eprintln!("Using OpenAI GPT-4o-mini for enrichment (parallel mode, {} workers, batch size {})...", args.workers, args.batch_size);
164            let mut engine = OpenAiEngine::new()
165                .with_parallelism(args.workers)
166                .with_batch_size(args.batch_size);
167            engine.init()?;
168
169            let kind = engine.kind().to_string();
170            let version = engine.version().to_string();
171
172            // Use parallel batch processing for OpenAI
173            let (frames, cards) = run_openai_parallel(&mut mem, &engine, args.workers)?;
174            (kind, version, frames, cards)
175        }
176        EnrichEngine::Claude => {
177            // Create and initialize the Claude engine with parallel support
178            eprintln!(
179                "Using Claude Haiku 4.5 for enrichment (parallel mode, {} workers)...",
180                args.workers
181            );
182            let mut engine = ClaudeEngine::new().with_parallelism(args.workers);
183            engine.init()?;
184
185            let kind = engine.kind().to_string();
186            let version = engine.version().to_string();
187
188            // Use parallel batch processing for Claude
189            let (frames, cards) = run_claude_parallel(&mut mem, &engine, args.workers)?;
190            (kind, version, frames, cards)
191        }
192        EnrichEngine::Gemini => {
193            // Create and initialize the Gemini engine with parallel support
194            eprintln!(
195                "Using Gemini 2.5 Flash for enrichment (parallel mode, {} workers)...",
196                args.workers
197            );
198            let mut engine = GeminiEngine::new().with_parallelism(args.workers);
199            engine.init()?;
200
201            let kind = engine.kind().to_string();
202            let version = engine.version().to_string();
203
204            // Use parallel batch processing for Gemini
205            let (frames, cards) = run_gemini_parallel(&mut mem, &engine, args.workers)?;
206            (kind, version, frames, cards)
207        }
208        EnrichEngine::Xai => {
209            // Create and initialize the xAI engine with parallel support
210            eprintln!(
211                "Using xAI Grok 4 Fast for enrichment (parallel mode, {} workers)...",
212                args.workers
213            );
214            let mut engine = XaiEngine::new().with_parallelism(args.workers);
215            engine.init()?;
216
217            let kind = engine.kind().to_string();
218            let version = engine.version().to_string();
219
220            // Use parallel batch processing for xAI
221            let (frames, cards) = run_xai_parallel(&mut mem, &engine, args.workers)?;
222            (kind, version, frames, cards)
223        }
224        EnrichEngine::Groq => {
225            // Create and initialize the Groq engine with parallel support
226            eprintln!(
227                "Using Groq Llama 3.3 70B for enrichment (parallel mode, {} workers)...",
228                args.workers
229            );
230            let mut engine = GroqEngine::new().with_parallelism(args.workers);
231            engine.init()?;
232
233            let kind = engine.kind().to_string();
234            let version = engine.version().to_string();
235
236            // Use parallel batch processing for Groq
237            let (frames, cards) = run_groq_parallel(&mut mem, &engine, args.workers)?;
238            (kind, version, frames, cards)
239        }
240        EnrichEngine::Mistral => {
241            // Create and initialize the Mistral engine with parallel support
242            eprintln!(
243                "Using Mistral Large for enrichment (parallel mode, {} workers)...",
244                args.workers
245            );
246            let mut engine = MistralEngine::new().with_parallelism(args.workers);
247            engine.init()?;
248
249            let kind = engine.kind().to_string();
250            let version = engine.version().to_string();
251
252            // Use parallel batch processing for Mistral
253            let (frames, cards) = run_mistral_parallel(&mut mem, &engine, args.workers)?;
254            (kind, version, frames, cards)
255        }
256    };
257
258    // Commit changes
259    mem.commit()?;
260
261    // Get final stats
262    let final_stats = mem.memories_stats();
263
264    if args.json {
265        let result = EnrichResult {
266            engine: engine_kind,
267            version: engine_version,
268            frames_processed: frames,
269            cards_extracted: cards,
270            total_cards: final_stats.card_count,
271            total_entities: final_stats.entity_count,
272        };
273        println!("{}", serde_json::to_string_pretty(&result)?);
274    } else {
275        println!("Enrichment complete:");
276        println!("  Engine: {} v{}", engine_kind, engine_version);
277        println!("  Frames processed: {}", frames);
278        println!("  Cards extracted: {}", cards);
279        println!(
280            "  Total cards: {} (+{})",
281            final_stats.card_count,
282            final_stats
283                .card_count
284                .saturating_sub(initial_stats.card_count)
285        );
286        println!("  Entities: {}", final_stats.entity_count);
287
288        if args.verbose && cards > 0 {
289            println!("\nExtracted memory cards:");
290            for entity in mem.memory_entities() {
291                println!("  {}:", entity);
292                for card in mem.get_entity_memories(&entity) {
293                    println!("    - {}: {} = \"{}\"", card.kind, card.slot, card.value);
294                }
295            }
296        }
297    }
298
299    Ok(())
300}
301
302/// Run OpenAI enrichment with parallel batch processing.
303///
304/// This gathers all unenriched frames, sends them to OpenAI in parallel,
305/// and stores the resulting memory cards.
306fn run_openai_parallel(
307    mem: &mut memvid_core::Memvid,
308    engine: &OpenAiEngine,
309    workers: usize,
310) -> Result<(usize, usize)> {
311    use memvid_core::enrich::EnrichmentContext;
312    use memvid_core::EnrichmentEngine;
313
314    let kind = engine.kind();
315    let version = engine.version();
316
317    // Get all unenriched frames
318    let unenriched = mem.get_unenriched_frames(kind, version);
319    let total_frames = unenriched.len();
320
321    if total_frames == 0 {
322        eprintln!("No unenriched frames found.");
323        return Ok((0, 0));
324    }
325
326    eprintln!(
327        "Gathering {} frames for parallel enrichment...",
328        total_frames
329    );
330
331    // Build enrichment contexts for all frames
332    let mut contexts = Vec::with_capacity(total_frames);
333    for frame_id in &unenriched {
334        let frame = match mem.frame_by_id(*frame_id) {
335            Ok(f) => f,
336            Err(_) => continue,
337        };
338
339        // Get full frame content (not truncated preview)
340        let text = match mem.frame_text_by_id(*frame_id) {
341            Ok(t) => t,
342            Err(_) => continue,
343        };
344
345        let uri = frame
346            .uri
347            .clone()
348            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
349        let metadata_json = frame
350            .metadata
351            .as_ref()
352            .and_then(|m| serde_json::to_string(m).ok());
353
354        let ctx = EnrichmentContext::new(
355            *frame_id,
356            uri,
357            text,
358            frame.title.clone(),
359            frame.timestamp,
360            metadata_json,
361        );
362
363        contexts.push(ctx);
364    }
365
366    eprintln!(
367        "Starting parallel enrichment of {} frames with {} workers...",
368        contexts.len(),
369        workers
370    );
371
372    // Run parallel batch enrichment
373    let results = engine.enrich_batch(contexts)?;
374
375    // Store results back to MV2
376    let mut total_cards = 0;
377    for (frame_id, cards) in results {
378        let card_count = cards.len();
379
380        // Store cards
381        let card_ids = if !cards.is_empty() {
382            mem.put_memory_cards(cards)?
383        } else {
384            Vec::new()
385        };
386
387        // Record enrichment
388        mem.record_enrichment(frame_id, kind, version, card_ids)?;
389
390        total_cards += card_count;
391    }
392
393    Ok((total_frames, total_cards))
394}
395
396/// Run Claude enrichment with parallel batch processing.
397fn run_claude_parallel(
398    mem: &mut memvid_core::Memvid,
399    engine: &ClaudeEngine,
400    workers: usize,
401) -> Result<(usize, usize)> {
402    use memvid_core::enrich::EnrichmentContext;
403    use memvid_core::EnrichmentEngine;
404
405    let kind = engine.kind();
406    let version = engine.version();
407
408    let unenriched = mem.get_unenriched_frames(kind, version);
409    let total_frames = unenriched.len();
410
411    if total_frames == 0 {
412        eprintln!("No unenriched frames found.");
413        return Ok((0, 0));
414    }
415
416    eprintln!(
417        "Gathering {} frames for parallel enrichment...",
418        total_frames
419    );
420
421    let mut contexts = Vec::with_capacity(total_frames);
422    for frame_id in &unenriched {
423        let frame = match mem.frame_by_id(*frame_id) {
424            Ok(f) => f,
425            Err(_) => continue,
426        };
427
428        let text = match mem.frame_text_by_id(*frame_id) {
429            Ok(t) => t,
430            Err(_) => continue,
431        };
432
433        let uri = frame
434            .uri
435            .clone()
436            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
437        let metadata_json = frame
438            .metadata
439            .as_ref()
440            .and_then(|m| serde_json::to_string(m).ok());
441
442        let ctx = EnrichmentContext::new(
443            *frame_id,
444            uri,
445            text,
446            frame.title.clone(),
447            frame.timestamp,
448            metadata_json,
449        );
450
451        contexts.push(ctx);
452    }
453
454    eprintln!(
455        "Starting parallel enrichment of {} frames with {} workers...",
456        contexts.len(),
457        workers
458    );
459
460    let results = engine.enrich_batch(contexts)?;
461
462    let mut total_cards = 0;
463    for (frame_id, cards) in results {
464        let card_count = cards.len();
465
466        let card_ids = if !cards.is_empty() {
467            mem.put_memory_cards(cards)?
468        } else {
469            Vec::new()
470        };
471
472        mem.record_enrichment(frame_id, kind, version, card_ids)?;
473
474        total_cards += card_count;
475    }
476
477    Ok((total_frames, total_cards))
478}
479
480/// Run Gemini enrichment with parallel batch processing.
481fn run_gemini_parallel(
482    mem: &mut memvid_core::Memvid,
483    engine: &GeminiEngine,
484    workers: usize,
485) -> Result<(usize, usize)> {
486    use memvid_core::enrich::EnrichmentContext;
487    use memvid_core::EnrichmentEngine;
488
489    let kind = engine.kind();
490    let version = engine.version();
491
492    let unenriched = mem.get_unenriched_frames(kind, version);
493    let total_frames = unenriched.len();
494
495    if total_frames == 0 {
496        eprintln!("No unenriched frames found.");
497        return Ok((0, 0));
498    }
499
500    eprintln!(
501        "Gathering {} frames for parallel enrichment...",
502        total_frames
503    );
504
505    let mut contexts = Vec::with_capacity(total_frames);
506    for frame_id in &unenriched {
507        let frame = match mem.frame_by_id(*frame_id) {
508            Ok(f) => f,
509            Err(_) => continue,
510        };
511
512        let text = match mem.frame_text_by_id(*frame_id) {
513            Ok(t) => t,
514            Err(_) => continue,
515        };
516
517        let uri = frame
518            .uri
519            .clone()
520            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
521        let metadata_json = frame
522            .metadata
523            .as_ref()
524            .and_then(|m| serde_json::to_string(m).ok());
525
526        let ctx = EnrichmentContext::new(
527            *frame_id,
528            uri,
529            text,
530            frame.title.clone(),
531            frame.timestamp,
532            metadata_json,
533        );
534
535        contexts.push(ctx);
536    }
537
538    eprintln!(
539        "Starting parallel enrichment of {} frames with {} workers...",
540        contexts.len(),
541        workers
542    );
543
544    let results = engine.enrich_batch(contexts)?;
545
546    let mut total_cards = 0;
547    for (frame_id, cards) in results {
548        let card_count = cards.len();
549
550        let card_ids = if !cards.is_empty() {
551            mem.put_memory_cards(cards)?
552        } else {
553            Vec::new()
554        };
555
556        mem.record_enrichment(frame_id, kind, version, card_ids)?;
557
558        total_cards += card_count;
559    }
560
561    Ok((total_frames, total_cards))
562}
563
564/// Run xAI enrichment with parallel batch processing.
565fn run_xai_parallel(
566    mem: &mut memvid_core::Memvid,
567    engine: &XaiEngine,
568    workers: usize,
569) -> Result<(usize, usize)> {
570    use memvid_core::enrich::EnrichmentContext;
571    use memvid_core::EnrichmentEngine;
572
573    let kind = engine.kind();
574    let version = engine.version();
575
576    let unenriched = mem.get_unenriched_frames(kind, version);
577    let total_frames = unenriched.len();
578
579    if total_frames == 0 {
580        eprintln!("No unenriched frames found.");
581        return Ok((0, 0));
582    }
583
584    eprintln!(
585        "Gathering {} frames for parallel enrichment...",
586        total_frames
587    );
588
589    let mut contexts = Vec::with_capacity(total_frames);
590    for frame_id in &unenriched {
591        let frame = match mem.frame_by_id(*frame_id) {
592            Ok(f) => f,
593            Err(_) => continue,
594        };
595
596        let text = match mem.frame_text_by_id(*frame_id) {
597            Ok(t) => t,
598            Err(_) => continue,
599        };
600
601        let uri = frame
602            .uri
603            .clone()
604            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
605        let metadata_json = frame
606            .metadata
607            .as_ref()
608            .and_then(|m| serde_json::to_string(m).ok());
609
610        let ctx = EnrichmentContext::new(
611            *frame_id,
612            uri,
613            text,
614            frame.title.clone(),
615            frame.timestamp,
616            metadata_json,
617        );
618
619        contexts.push(ctx);
620    }
621
622    eprintln!(
623        "Starting parallel enrichment of {} frames with {} workers...",
624        contexts.len(),
625        workers
626    );
627
628    let results = engine.enrich_batch(contexts)?;
629
630    let mut total_cards = 0;
631    for (frame_id, cards) in results {
632        let card_count = cards.len();
633
634        let card_ids = if !cards.is_empty() {
635            mem.put_memory_cards(cards)?
636        } else {
637            Vec::new()
638        };
639
640        mem.record_enrichment(frame_id, kind, version, card_ids)?;
641
642        total_cards += card_count;
643    }
644
645    Ok((total_frames, total_cards))
646}
647
648/// Run Groq enrichment with parallel batch processing.
649fn run_groq_parallel(
650    mem: &mut memvid_core::Memvid,
651    engine: &GroqEngine,
652    workers: usize,
653) -> Result<(usize, usize)> {
654    use memvid_core::enrich::EnrichmentContext;
655    use memvid_core::EnrichmentEngine;
656
657    let kind = engine.kind();
658    let version = engine.version();
659
660    let unenriched = mem.get_unenriched_frames(kind, version);
661    let total_frames = unenriched.len();
662
663    if total_frames == 0 {
664        eprintln!("No unenriched frames found.");
665        return Ok((0, 0));
666    }
667
668    eprintln!(
669        "Gathering {} frames for parallel enrichment...",
670        total_frames
671    );
672
673    let mut contexts = Vec::with_capacity(total_frames);
674    for frame_id in &unenriched {
675        let frame = match mem.frame_by_id(*frame_id) {
676            Ok(f) => f,
677            Err(_) => continue,
678        };
679
680        let text = match mem.frame_text_by_id(*frame_id) {
681            Ok(t) => t,
682            Err(_) => continue,
683        };
684
685        let uri = frame
686            .uri
687            .clone()
688            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
689        let metadata_json = frame
690            .metadata
691            .as_ref()
692            .and_then(|m| serde_json::to_string(m).ok());
693
694        let ctx = EnrichmentContext::new(
695            *frame_id,
696            uri,
697            text,
698            frame.title.clone(),
699            frame.timestamp,
700            metadata_json,
701        );
702
703        contexts.push(ctx);
704    }
705
706    eprintln!(
707        "Starting parallel enrichment of {} frames with {} workers...",
708        contexts.len(),
709        workers
710    );
711
712    let results = engine.enrich_batch(contexts)?;
713
714    let mut total_cards = 0;
715    for (frame_id, cards) in results {
716        let card_count = cards.len();
717
718        let card_ids = if !cards.is_empty() {
719            mem.put_memory_cards(cards)?
720        } else {
721            Vec::new()
722        };
723
724        mem.record_enrichment(frame_id, kind, version, card_ids)?;
725
726        total_cards += card_count;
727    }
728
729    Ok((total_frames, total_cards))
730}
731
732/// Run Mistral enrichment with parallel batch processing.
733fn run_mistral_parallel(
734    mem: &mut memvid_core::Memvid,
735    engine: &MistralEngine,
736    workers: usize,
737) -> Result<(usize, usize)> {
738    use memvid_core::enrich::EnrichmentContext;
739    use memvid_core::EnrichmentEngine;
740
741    let kind = engine.kind();
742    let version = engine.version();
743
744    let unenriched = mem.get_unenriched_frames(kind, version);
745    let total_frames = unenriched.len();
746
747    if total_frames == 0 {
748        eprintln!("No unenriched frames found.");
749        return Ok((0, 0));
750    }
751
752    eprintln!(
753        "Gathering {} frames for parallel enrichment...",
754        total_frames
755    );
756
757    let mut contexts = Vec::with_capacity(total_frames);
758    for frame_id in &unenriched {
759        let frame = match mem.frame_by_id(*frame_id) {
760            Ok(f) => f,
761            Err(_) => continue,
762        };
763
764        let text = match mem.frame_text_by_id(*frame_id) {
765            Ok(t) => t,
766            Err(_) => continue,
767        };
768
769        let uri = frame
770            .uri
771            .clone()
772            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
773        let metadata_json = frame
774            .metadata
775            .as_ref()
776            .and_then(|m| serde_json::to_string(m).ok());
777
778        let ctx = EnrichmentContext::new(
779            *frame_id,
780            uri,
781            text,
782            frame.title.clone(),
783            frame.timestamp,
784            metadata_json,
785        );
786
787        contexts.push(ctx);
788    }
789
790    eprintln!(
791        "Starting parallel enrichment of {} frames with {} workers...",
792        contexts.len(),
793        workers
794    );
795
796    let results = engine.enrich_batch(contexts)?;
797
798    let mut total_cards = 0;
799    for (frame_id, cards) in results {
800        let card_count = cards.len();
801
802        let card_ids = if !cards.is_empty() {
803            mem.put_memory_cards(cards)?
804        } else {
805            Vec::new()
806        };
807
808        mem.record_enrichment(frame_id, kind, version, card_ids)?;
809
810        total_cards += card_count;
811    }
812
813    Ok((total_frames, total_cards))
814}
815
816/// Handle the `memories` subcommand (view memory cards)
817#[derive(Args)]
818pub struct MemoriesArgs {
819    /// Path to the `.mv2` file
820    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
821    pub file: PathBuf,
822
823    /// Filter by entity
824    #[arg(long)]
825    pub entity: Option<String>,
826
827    /// Filter by slot
828    #[arg(long)]
829    pub slot: Option<String>,
830
831    /// Output as JSON
832    #[arg(long)]
833    pub json: bool,
834}
835
836/// Memory card output for JSON serialization
837#[derive(Debug, Serialize)]
838pub struct MemoryOutput {
839    pub id: u64,
840    pub kind: String,
841    pub entity: String,
842    pub slot: String,
843    pub value: String,
844    pub polarity: Option<String>,
845    pub document_date: Option<i64>,
846    pub source_frame_id: u64,
847}
848
849pub fn handle_memories(_config: &CliConfig, args: MemoriesArgs) -> Result<()> {
850    let mem = Memvid::open(&args.file)?;
851
852    let stats = mem.memories_stats();
853
854    if args.json {
855        let mut cards: Vec<MemoryOutput> = Vec::new();
856
857        if let Some(entity) = &args.entity {
858            if let Some(slot) = &args.slot {
859                // Specific entity:slot
860                if let Some(card) = mem.get_current_memory(entity, slot) {
861                    cards.push(card_to_output(card));
862                }
863            } else {
864                // All cards for entity
865                for card in mem.get_entity_memories(entity) {
866                    cards.push(card_to_output(card));
867                }
868            }
869        } else {
870            // All entities
871            for entity in mem.memory_entities() {
872                for card in mem.get_entity_memories(&entity) {
873                    cards.push(card_to_output(card));
874                }
875            }
876        }
877
878        println!("{}", serde_json::to_string_pretty(&cards)?);
879    } else {
880        println!(
881            "Memory cards: {} total, {} entities",
882            stats.card_count, stats.entity_count
883        );
884        println!();
885
886        if let Some(entity) = &args.entity {
887            if let Some(slot) = &args.slot {
888                // Specific entity:slot
889                if let Some(card) = mem.get_current_memory(entity, slot) {
890                    println!("{}:{} = \"{}\"", entity, slot, card.value);
891                } else {
892                    println!("No memory found for {}:{}", entity, slot);
893                }
894            } else {
895                // All cards for entity
896                println!("{}:", entity);
897                for card in mem.get_entity_memories(entity) {
898                    println!("  {}: {} = \"{}\"", card.kind, card.slot, card.value);
899                }
900            }
901        } else {
902            // All entities
903            for entity in mem.memory_entities() {
904                println!("{}:", entity);
905                for card in mem.get_entity_memories(&entity) {
906                    let polarity = card
907                        .polarity
908                        .as_ref()
909                        .map(|p| format!(" [{}]", p))
910                        .unwrap_or_default();
911                    println!(
912                        "  {}: {} = \"{}\"{}",
913                        card.kind, card.slot, card.value, polarity
914                    );
915                }
916                println!();
917            }
918        }
919    }
920
921    Ok(())
922}
923
924fn card_to_output(card: &memvid_core::MemoryCard) -> MemoryOutput {
925    MemoryOutput {
926        id: card.id,
927        kind: card.kind.to_string(),
928        entity: card.entity.clone(),
929        slot: card.slot.clone(),
930        value: card.value.clone(),
931        polarity: card.polarity.as_ref().map(|p| p.to_string()),
932        document_date: card.document_date,
933        source_frame_id: card.source_frame_id,
934    }
935}
936
937// ============================================================================
938// State Command - Entity-Centric Current State Queries
939// ============================================================================
940
941/// Handle the `state` subcommand (query current entity state)
942#[derive(Args)]
943pub struct StateArgs {
944    /// Path to the `.mv2` file
945    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
946    pub file: PathBuf,
947
948    /// Entity to query (required)
949    #[arg(long, short = 'e')]
950    pub entity: String,
951
952    /// Specific slot to query (optional, omit for full entity profile)
953    #[arg(long, short = 's')]
954    pub slot: Option<String>,
955
956    /// Query state at a specific point in time (Unix timestamp)
957    #[arg(long)]
958    pub at_time: Option<i64>,
959
960    /// Output as JSON
961    #[arg(long)]
962    pub json: bool,
963}
964
965/// State output for JSON serialization
966#[derive(Debug, Serialize)]
967pub struct StateOutput {
968    pub entity: String,
969    #[serde(skip_serializing_if = "Option::is_none")]
970    pub slot: Option<String>,
971    #[serde(skip_serializing_if = "Option::is_none")]
972    pub at_time: Option<i64>,
973    pub state: StateValue,
974}
975
976/// The state value - either a single slot or full profile
977#[derive(Debug, Serialize)]
978#[serde(untagged)]
979pub enum StateValue {
980    /// Single slot value
981    Single {
982        value: String,
983        kind: String,
984        polarity: Option<String>,
985        source_frame_id: u64,
986        document_date: Option<i64>,
987    },
988    /// Full entity profile
989    Profile(Vec<SlotState>),
990}
991
992#[derive(Debug, Serialize)]
993pub struct SlotState {
994    pub slot: String,
995    pub value: String,
996    pub kind: String,
997    pub polarity: Option<String>,
998    pub source_frame_id: u64,
999    pub document_date: Option<i64>,
1000}
1001
1002pub fn handle_state(_config: &CliConfig, args: StateArgs) -> Result<()> {
1003    let mem = Memvid::open(&args.file)?;
1004
1005    let entity = args.entity.to_lowercase(); // Normalize entity name
1006
1007    if let Some(slot) = &args.slot {
1008        // O(1) lookup for specific entity:slot
1009        let card = if let Some(ts) = args.at_time {
1010            mem.get_memory_at_time(&entity, slot, ts)
1011        } else {
1012            mem.get_current_memory(&entity, slot)
1013        };
1014
1015        if args.json {
1016            if let Some(card) = card {
1017                let output = StateOutput {
1018                    entity: entity.clone(),
1019                    slot: Some(slot.clone()),
1020                    at_time: args.at_time,
1021                    state: StateValue::Single {
1022                        value: card.value.clone(),
1023                        kind: card.kind.to_string(),
1024                        polarity: card.polarity.as_ref().map(|p| p.to_string()),
1025                        source_frame_id: card.source_frame_id,
1026                        document_date: card.document_date,
1027                    },
1028                };
1029                println!("{}", serde_json::to_string_pretty(&output)?);
1030            } else {
1031                println!("null");
1032            }
1033        } else {
1034            if let Some(card) = card {
1035                let time_info = if let Some(ts) = args.at_time {
1036                    format!(" (at {})", format_timestamp(ts))
1037                } else {
1038                    String::new()
1039                };
1040                let polarity = card
1041                    .polarity
1042                    .as_ref()
1043                    .map(|p| format!(" [{}]", p))
1044                    .unwrap_or_default();
1045                println!(
1046                    "{}:{} = \"{}\"{}{}",
1047                    entity, slot, card.value, polarity, time_info
1048                );
1049                println!("  kind: {}", card.kind);
1050                println!("  source: frame {}", card.source_frame_id);
1051                if let Some(date) = card.document_date {
1052                    println!("  date: {}", format_timestamp(date));
1053                }
1054            } else {
1055                let time_info = if let Some(ts) = args.at_time {
1056                    format!(" at {}", format_timestamp(ts))
1057                } else {
1058                    String::new()
1059                };
1060                println!("No value for {}:{}{}", entity, slot, time_info);
1061            }
1062        }
1063    } else {
1064        // Get full entity profile
1065        let cards = mem.get_entity_memories(&entity);
1066
1067        if cards.is_empty() {
1068            if args.json {
1069                println!("null");
1070            } else {
1071                println!("No state found for entity: {}", entity);
1072            }
1073            return Ok(());
1074        }
1075
1076        // Group by slot and get current value for each
1077        let mut slots: std::collections::HashMap<String, &memvid_core::MemoryCard> =
1078            std::collections::HashMap::new();
1079
1080        for card in &cards {
1081            // Keep the most recent card for each slot
1082            let dominated = slots
1083                .get(&card.slot)
1084                .map(|existing| card.effective_timestamp() > existing.effective_timestamp())
1085                .unwrap_or(true);
1086
1087            if dominated && !card.is_retracted() {
1088                slots.insert(card.slot.clone(), card);
1089            }
1090        }
1091
1092        if args.json {
1093            let mut profile: Vec<SlotState> = slots
1094                .values()
1095                .map(|card| SlotState {
1096                    slot: card.slot.clone(),
1097                    value: card.value.clone(),
1098                    kind: card.kind.to_string(),
1099                    polarity: card.polarity.as_ref().map(|p| p.to_string()),
1100                    source_frame_id: card.source_frame_id,
1101                    document_date: card.document_date,
1102                })
1103                .collect();
1104            profile.sort_by(|a, b| a.slot.cmp(&b.slot));
1105
1106            let output = StateOutput {
1107                entity: entity.clone(),
1108                slot: None,
1109                at_time: args.at_time,
1110                state: StateValue::Profile(profile),
1111            };
1112            println!("{}", serde_json::to_string_pretty(&output)?);
1113        } else {
1114            println!("{}:", entity);
1115            let mut sorted_slots: Vec<_> = slots.into_iter().collect();
1116            sorted_slots.sort_by(|a, b| a.0.cmp(&b.0));
1117
1118            for (slot, card) in sorted_slots {
1119                let polarity = card
1120                    .polarity
1121                    .as_ref()
1122                    .map(|p| format!(" [{}]", p))
1123                    .unwrap_or_default();
1124                println!(
1125                    "  {}: \"{}\"{}  ({})",
1126                    slot, card.value, polarity, card.kind
1127                );
1128            }
1129        }
1130    }
1131
1132    Ok(())
1133}
1134
1135fn format_timestamp(ts: i64) -> String {
1136    use std::time::{Duration, UNIX_EPOCH};
1137    let datetime = UNIX_EPOCH + Duration::from_secs(ts as u64);
1138    let datetime: chrono::DateTime<chrono::Utc> = datetime.into();
1139    datetime.format("%Y-%m-%d %H:%M:%S UTC").to_string()
1140}
1141
1142/// Arguments for the `facts` (entity audit) command.
1143#[derive(Debug, Args)]
1144pub struct FactsArgs {
1145    /// Path to the `.mv2` file
1146    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1147    pub file: PathBuf,
1148
1149    /// Filter by entity (optional)
1150    #[arg(long, short = 'e')]
1151    pub entity: Option<String>,
1152
1153    /// Filter by predicate/slot (optional)
1154    #[arg(long, short = 'p')]
1155    pub predicate: Option<String>,
1156
1157    /// Filter by value (optional)
1158    #[arg(long, short = 'v')]
1159    pub value: Option<String>,
1160
1161    /// Show full history including superseded values
1162    #[arg(long)]
1163    pub history: bool,
1164
1165    /// Output as JSON
1166    #[arg(long)]
1167    pub json: bool,
1168}
1169
1170/// Audit log entry for JSON output.
1171#[derive(Debug, Serialize)]
1172pub struct AuditLogEntry {
1173    pub frame_id: u64,
1174    pub timestamp: Option<i64>,
1175    pub entity: String,
1176    pub slot: String,
1177    pub value: String,
1178    pub relation: String,
1179    pub kind: String,
1180    #[serde(skip_serializing_if = "Option::is_none")]
1181    pub polarity: Option<String>,
1182    #[serde(skip_serializing_if = "Option::is_none")]
1183    pub confidence: Option<f32>,
1184    pub engine: String,
1185    #[serde(skip_serializing_if = "Option::is_none")]
1186    pub supersedes: Option<u64>,
1187}
1188
1189/// Audit output for JSON serialization.
1190#[derive(Debug, Serialize)]
1191pub struct AuditLogOutput {
1192    pub total: usize,
1193    #[serde(skip_serializing_if = "Option::is_none")]
1194    pub entity_filter: Option<String>,
1195    #[serde(skip_serializing_if = "Option::is_none")]
1196    pub predicate_filter: Option<String>,
1197    #[serde(skip_serializing_if = "Option::is_none")]
1198    pub value_filter: Option<String>,
1199    pub entries: Vec<AuditLogEntry>,
1200}
1201
1202/// Format a Unix timestamp as ISO 8601 (without chrono).
1203fn format_audit_timestamp(ts: i64) -> String {
1204    use std::time::{Duration, UNIX_EPOCH};
1205
1206    let datetime = UNIX_EPOCH + Duration::from_secs(ts.unsigned_abs() as u64);
1207    let secs = datetime
1208        .duration_since(UNIX_EPOCH)
1209        .unwrap_or_default()
1210        .as_secs();
1211
1212    let days = secs / 86400;
1213    let remaining = secs % 86400;
1214    let hours = remaining / 3600;
1215    let minutes = (remaining % 3600) / 60;
1216    let seconds = remaining % 60;
1217
1218    let mut year = 1970i32;
1219    let mut remaining_days = days as i32;
1220
1221    loop {
1222        let days_in_year = if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
1223            366
1224        } else {
1225            365
1226        };
1227        if remaining_days < days_in_year {
1228            break;
1229        }
1230        remaining_days -= days_in_year;
1231        year += 1;
1232    }
1233
1234    let mut month = 1u32;
1235    let is_leap = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0);
1236    let days_in_months = if is_leap {
1237        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1238    } else {
1239        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1240    };
1241
1242    for days_in_month in days_in_months {
1243        if remaining_days < days_in_month {
1244            break;
1245        }
1246        remaining_days -= days_in_month;
1247        month += 1;
1248    }
1249
1250    let day = remaining_days + 1;
1251
1252    format!(
1253        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
1254        year, month, day, hours, minutes, seconds
1255    )
1256}
1257
1258pub fn handle_facts(_config: &CliConfig, args: FactsArgs) -> Result<()> {
1259    let mem = Memvid::open(&args.file)?;
1260
1261    // Collect all memory cards matching the filters
1262    let mut entries: Vec<AuditLogEntry> = Vec::new();
1263
1264    // Get entities to iterate
1265    let entities: Vec<String> = if let Some(entity) = &args.entity {
1266        vec![entity.to_lowercase()]
1267    } else {
1268        mem.memory_entities()
1269    };
1270
1271    for entity in entities {
1272        let cards = mem.get_entity_memories(&entity);
1273
1274        for card in cards {
1275            // Filter by predicate
1276            if let Some(pred) = &args.predicate {
1277                if !card.slot.eq_ignore_ascii_case(pred) {
1278                    continue;
1279                }
1280            }
1281
1282            // Filter by value
1283            if let Some(val) = &args.value {
1284                if !card.value.to_lowercase().contains(&val.to_lowercase()) {
1285                    continue;
1286                }
1287            }
1288
1289            entries.push(AuditLogEntry {
1290                frame_id: card.source_frame_id,
1291                timestamp: card.document_date.or(Some(card.created_at)),
1292                entity: card.entity.clone(),
1293                slot: card.slot.clone(),
1294                value: card.value.clone(),
1295                relation: card.version_relation.as_str().to_string(),
1296                kind: card.kind.to_string(),
1297                polarity: card.polarity.as_ref().map(|p| p.to_string()),
1298                confidence: card.confidence,
1299                engine: card.engine.clone(),
1300                supersedes: None, // TODO: track superseded frame IDs
1301            });
1302        }
1303    }
1304
1305    // Sort by timestamp (oldest first for audit trail)
1306    entries.sort_by(|a, b| {
1307        let ts_a = a.timestamp.unwrap_or(0);
1308        let ts_b = b.timestamp.unwrap_or(0);
1309        ts_a.cmp(&ts_b)
1310    });
1311
1312    if args.json {
1313        let output = AuditLogOutput {
1314            total: entries.len(),
1315            entity_filter: args.entity.clone(),
1316            predicate_filter: args.predicate.clone(),
1317            value_filter: args.value.clone(),
1318            entries,
1319        };
1320        println!("{}", serde_json::to_string_pretty(&output)?);
1321    } else {
1322        if entries.is_empty() {
1323            println!("No matching facts found.");
1324            return Ok(());
1325        }
1326
1327        println!("Audit Trail ({} entries):", entries.len());
1328        println!();
1329
1330        for entry in entries {
1331            let ts_str = entry
1332                .timestamp
1333                .map(format_audit_timestamp)
1334                .unwrap_or_else(|| "unknown".to_string());
1335
1336            let polarity_suffix = entry
1337                .polarity
1338                .as_ref()
1339                .map(|p| format!(" [{}]", p))
1340                .unwrap_or_default();
1341
1342            let conf = entry
1343                .confidence
1344                .map(|c| format!(" (conf: {:.2})", c))
1345                .unwrap_or_default();
1346
1347            let _polarity_prefix = if entry.polarity.is_some() {
1348                if entry.polarity.as_deref() == Some("negative") {
1349                    "-"
1350                } else if entry.polarity.as_deref() == Some("positive") {
1351                    "+"
1352                } else {
1353                    ""
1354                }
1355            } else {
1356                ""
1357            };
1358
1359            println!(
1360                "Frame {} ({}): {} {}:{}=\"{}\"{}  [{}]{}",
1361                entry.frame_id,
1362                ts_str,
1363                entry.relation.to_uppercase(),
1364                entry.entity,
1365                entry.slot,
1366                entry.value,
1367                polarity_suffix,
1368                entry.engine,
1369                conf,
1370            );
1371        }
1372    }
1373
1374    Ok(())
1375}
1376
1377// ============================================================================
1378// Export Command - Export facts to standard formats (N-Triples, JSON, CSV)
1379// ============================================================================
1380
1381/// Export format for the `export` command.
1382#[derive(Debug, Clone, Copy, ValueEnum, Default)]
1383pub enum ExportFormat {
1384    /// N-Triples RDF format (.nt)
1385    #[default]
1386    Ntriples,
1387    /// JSON format with full metadata
1388    Json,
1389    /// CSV format for spreadsheet import
1390    Csv,
1391}
1392
1393/// Arguments for the `export` subcommand.
1394#[derive(Debug, Args)]
1395pub struct ExportArgs {
1396    /// Path to the `.mv2` file
1397    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1398    pub file: PathBuf,
1399
1400    /// Export format
1401    #[arg(long, short = 'f', value_enum, default_value_t = ExportFormat::Ntriples)]
1402    pub format: ExportFormat,
1403
1404    /// Filter by entity (optional)
1405    #[arg(long, short = 'e')]
1406    pub entity: Option<String>,
1407
1408    /// Filter by predicate/slot (optional)
1409    #[arg(long, short = 'p')]
1410    pub predicate: Option<String>,
1411
1412    /// Base URI for N-Triples output (default: mv2://entity/)
1413    #[arg(long, default_value = "mv2://entity/")]
1414    pub base_uri: String,
1415
1416    /// Include provenance metadata in output
1417    #[arg(long)]
1418    pub with_provenance: bool,
1419}
1420
1421/// Export entry for JSON output.
1422#[derive(Debug, Serialize)]
1423pub struct ExportEntry {
1424    pub subject: String,
1425    pub predicate: String,
1426    pub object: String,
1427    #[serde(skip_serializing_if = "Option::is_none")]
1428    pub source_frame_id: Option<u64>,
1429    #[serde(skip_serializing_if = "Option::is_none")]
1430    pub timestamp: Option<i64>,
1431    #[serde(skip_serializing_if = "Option::is_none")]
1432    pub engine: Option<String>,
1433    #[serde(skip_serializing_if = "Option::is_none")]
1434    pub confidence: Option<f32>,
1435}
1436
1437/// Escape a string for N-Triples format.
1438fn escape_ntriples(s: &str) -> String {
1439    let mut result = String::with_capacity(s.len());
1440    for c in s.chars() {
1441        match c {
1442            '\\' => result.push_str("\\\\"),
1443            '"' => result.push_str("\\\""),
1444            '\n' => result.push_str("\\n"),
1445            '\r' => result.push_str("\\r"),
1446            '\t' => result.push_str("\\t"),
1447            _ => result.push(c),
1448        }
1449    }
1450    result
1451}
1452
1453/// Escape a string for CSV format.
1454fn escape_csv(s: &str) -> String {
1455    if s.contains(',') || s.contains('"') || s.contains('\n') {
1456        format!("\"{}\"", s.replace('"', "\"\""))
1457    } else {
1458        s.to_string()
1459    }
1460}
1461
1462/// Normalize an entity name into a valid URI component.
1463fn normalize_uri_component(s: &str) -> String {
1464    s.replace(' ', "_")
1465        .replace('/', "_")
1466        .replace(':', "_")
1467        .replace('#', "_")
1468        .replace('?', "_")
1469        .replace('&', "_")
1470}
1471
1472pub fn handle_export(_config: &CliConfig, args: ExportArgs) -> Result<()> {
1473    let mem = Memvid::open(&args.file)?;
1474
1475    // Collect all memory cards matching the filters
1476    let entities: Vec<String> = if let Some(entity) = &args.entity {
1477        vec![entity.to_lowercase()]
1478    } else {
1479        mem.memory_entities()
1480    };
1481
1482    // Build list of triplets
1483    let mut triplets: Vec<ExportEntry> = Vec::new();
1484
1485    for entity in entities {
1486        let cards = mem.get_entity_memories(&entity);
1487
1488        for card in cards {
1489            // Filter by predicate
1490            if let Some(pred) = &args.predicate {
1491                if !card.slot.eq_ignore_ascii_case(pred) {
1492                    continue;
1493                }
1494            }
1495
1496            // Skip retracted cards
1497            if card.is_retracted() {
1498                continue;
1499            }
1500
1501            triplets.push(ExportEntry {
1502                subject: card.entity.clone(),
1503                predicate: card.slot.clone(),
1504                object: card.value.clone(),
1505                source_frame_id: if args.with_provenance {
1506                    Some(card.source_frame_id)
1507                } else {
1508                    None
1509                },
1510                timestamp: if args.with_provenance {
1511                    card.document_date.or(Some(card.created_at))
1512                } else {
1513                    None
1514                },
1515                engine: if args.with_provenance {
1516                    Some(card.engine.clone())
1517                } else {
1518                    None
1519                },
1520                confidence: if args.with_provenance {
1521                    card.confidence
1522                } else {
1523                    None
1524                },
1525            });
1526        }
1527    }
1528
1529    match args.format {
1530        ExportFormat::Ntriples => {
1531            // Output N-Triples format
1532            // Format: <subject> <predicate> "object" .
1533            for t in &triplets {
1534                let subject_uri =
1535                    format!("<{}{}>", args.base_uri, normalize_uri_component(&t.subject));
1536                let predicate_uri = format!(
1537                    "<{}pred/{}>",
1538                    args.base_uri,
1539                    normalize_uri_component(&t.predicate)
1540                );
1541                let object_literal = format!("\"{}\"", escape_ntriples(&t.object));
1542
1543                println!("{} {} {} .", subject_uri, predicate_uri, object_literal);
1544            }
1545        }
1546        ExportFormat::Json => {
1547            // Output JSON format
1548            println!("{}", serde_json::to_string_pretty(&triplets)?);
1549        }
1550        ExportFormat::Csv => {
1551            // Output CSV format
1552            if args.with_provenance {
1553                println!("subject,predicate,object,source_frame_id,timestamp,engine,confidence");
1554            } else {
1555                println!("subject,predicate,object");
1556            }
1557
1558            for t in &triplets {
1559                if args.with_provenance {
1560                    println!(
1561                        "{},{},{},{},{},{},{}",
1562                        escape_csv(&t.subject),
1563                        escape_csv(&t.predicate),
1564                        escape_csv(&t.object),
1565                        t.source_frame_id
1566                            .map(|id| id.to_string())
1567                            .unwrap_or_default(),
1568                        t.timestamp.map(|ts| ts.to_string()).unwrap_or_default(),
1569                        t.engine.as_deref().map(escape_csv).unwrap_or_default(),
1570                        t.confidence
1571                            .map(|c| format!("{:.2}", c))
1572                            .unwrap_or_default(),
1573                    );
1574                } else {
1575                    println!(
1576                        "{},{},{}",
1577                        escape_csv(&t.subject),
1578                        escape_csv(&t.predicate),
1579                        escape_csv(&t.object),
1580                    );
1581                }
1582            }
1583        }
1584    }
1585
1586    // Output count to stderr so it doesn't pollute stdout piping
1587    eprintln!("Exported {} triplets", triplets.len());
1588
1589    Ok(())
1590}
1591
1592// ============================================================================
1593// Schema Command - Infer and manage predicate schemas
1594// ============================================================================
1595
1596/// Arguments for the `schema` subcommand.
1597#[derive(Debug, Args)]
1598pub struct SchemaArgs {
1599    #[command(subcommand)]
1600    pub command: SchemaCommand,
1601}
1602
1603/// Schema subcommands.
1604#[derive(Debug, clap::Subcommand)]
1605pub enum SchemaCommand {
1606    /// Infer schemas from existing memory cards
1607    Infer(SchemaInferArgs),
1608    /// List registered schemas (built-in + custom)
1609    List(SchemaListArgs),
1610}
1611
1612/// Arguments for `schema infer`.
1613#[derive(Debug, Args)]
1614pub struct SchemaInferArgs {
1615    /// Path to the `.mv2` file
1616    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1617    pub file: PathBuf,
1618
1619    /// Register inferred schemas to the memory file
1620    #[arg(long)]
1621    pub register: bool,
1622
1623    /// Overwrite existing schemas when registering
1624    #[arg(long, requires = "register")]
1625    pub overwrite: bool,
1626
1627    /// Output as JSON
1628    #[arg(long)]
1629    pub json: bool,
1630}
1631
1632/// Arguments for `schema list`.
1633#[derive(Debug, Args)]
1634pub struct SchemaListArgs {
1635    /// Path to the `.mv2` file (optional, shows built-in schemas if omitted)
1636    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1637    pub file: Option<PathBuf>,
1638
1639    /// Output as JSON
1640    #[arg(long)]
1641    pub json: bool,
1642
1643    /// Show only built-in schemas
1644    #[arg(long)]
1645    pub builtin_only: bool,
1646}
1647
1648/// Schema list entry for JSON output.
1649#[derive(Debug, Serialize)]
1650pub struct SchemaListEntry {
1651    pub id: String,
1652    pub name: String,
1653    #[serde(skip_serializing_if = "Option::is_none")]
1654    pub description: Option<String>,
1655    pub value_type: String,
1656    pub cardinality: String,
1657    pub domain: Vec<String>,
1658    pub builtin: bool,
1659    #[serde(skip_serializing_if = "Option::is_none")]
1660    pub inverse: Option<String>,
1661}
1662
1663pub fn handle_schema(_config: &CliConfig, args: SchemaArgs) -> Result<()> {
1664    match args.command {
1665        SchemaCommand::Infer(infer_args) => handle_schema_infer(_config, infer_args),
1666        SchemaCommand::List(list_args) => handle_schema_list(_config, list_args),
1667    }
1668}
1669
1670fn handle_schema_infer(_config: &CliConfig, args: SchemaInferArgs) -> Result<()> {
1671    let mut mem = Memvid::open(&args.file)?;
1672
1673    // Get schema summary (which includes inference)
1674    let summary = mem.schema_summary();
1675
1676    if summary.is_empty() {
1677        if args.json {
1678            println!("[]");
1679        } else {
1680            println!("No predicates found in memory.");
1681        }
1682        return Ok(());
1683    }
1684
1685    if args.register {
1686        let count = mem.register_inferred_schemas(args.overwrite);
1687        mem.commit()?;
1688        eprintln!("Registered {} inferred schemas", count);
1689    }
1690
1691    if args.json {
1692        println!("{}", serde_json::to_string_pretty(&summary)?);
1693    } else {
1694        println!("Inferred Schemas ({} predicates):", summary.len());
1695        println!();
1696        println!(
1697            "{:<20} {:<12} {:<10} {:<8} {:<8} {:<8} {}",
1698            "PREDICATE", "TYPE", "CARDINAL", "ENTITIES", "VALUES", "UNIQUE", "BUILTIN"
1699        );
1700        println!("{}", "-".repeat(80));
1701
1702        for entry in &summary {
1703            let cardinality = match entry.cardinality {
1704                memvid_core::Cardinality::Single => "single",
1705                memvid_core::Cardinality::Multiple => "multiple",
1706            };
1707            let builtin = if entry.is_builtin { "yes" } else { "-" };
1708
1709            println!(
1710                "{:<20} {:<12} {:<10} {:<8} {:<8} {:<8} {}",
1711                truncate(&entry.predicate, 20),
1712                truncate(&entry.inferred_type, 12),
1713                cardinality,
1714                entry.entity_count,
1715                entry.value_count,
1716                entry.unique_values,
1717                builtin
1718            );
1719        }
1720    }
1721
1722    Ok(())
1723}
1724
1725fn handle_schema_list(_config: &CliConfig, args: SchemaListArgs) -> Result<()> {
1726    // If file is provided, open it to get custom schemas; otherwise use default registry
1727    let registry = if let Some(ref path) = args.file {
1728        let mem = Memvid::open(path)?;
1729        mem.schema_registry().clone()
1730    } else {
1731        memvid_core::SchemaRegistry::new()
1732    };
1733
1734    let mut entries: Vec<SchemaListEntry> = registry
1735        .all()
1736        .filter(|s| !args.builtin_only || s.builtin)
1737        .map(|schema| SchemaListEntry {
1738            id: schema.id.clone(),
1739            name: schema.name.clone(),
1740            description: schema.description.clone(),
1741            value_type: schema.range.description(),
1742            cardinality: match schema.cardinality {
1743                memvid_core::Cardinality::Single => "single".to_string(),
1744                memvid_core::Cardinality::Multiple => "multiple".to_string(),
1745            },
1746            domain: schema
1747                .domain
1748                .iter()
1749                .map(|k| k.as_str().to_string())
1750                .collect(),
1751            builtin: schema.builtin,
1752            inverse: schema.inverse.clone(),
1753        })
1754        .collect();
1755
1756    entries.sort_by(|a, b| a.id.cmp(&b.id));
1757
1758    if entries.is_empty() {
1759        if args.json {
1760            println!("[]");
1761        } else {
1762            println!("No schemas found.");
1763        }
1764        return Ok(());
1765    }
1766
1767    if args.json {
1768        println!("{}", serde_json::to_string_pretty(&entries)?);
1769    } else {
1770        let title = if args.builtin_only {
1771            "Built-in Schemas"
1772        } else {
1773            "Registered Schemas"
1774        };
1775        println!("{} ({} total):", title, entries.len());
1776        println!();
1777        println!(
1778            "{:<20} {:<15} {:<12} {:<10} {}",
1779            "ID", "NAME", "TYPE", "CARDINAL", "DOMAIN"
1780        );
1781        println!("{}", "-".repeat(70));
1782
1783        for entry in &entries {
1784            let domain = if entry.domain.is_empty() {
1785                "*".to_string()
1786            } else {
1787                entry.domain.join(", ")
1788            };
1789            let cardinality = if entry.cardinality == "multiple" {
1790                "multiple"
1791            } else {
1792                "single"
1793            };
1794
1795            println!(
1796                "{:<20} {:<15} {:<12} {:<10} {}",
1797                truncate(&entry.id, 20),
1798                truncate(&entry.name, 15),
1799                truncate(&entry.value_type, 12),
1800                cardinality,
1801                truncate(&domain, 20)
1802            );
1803        }
1804    }
1805
1806    Ok(())
1807}
1808
1809/// Truncate a string to max length, adding "..." if needed.
1810fn truncate(s: &str, max_len: usize) -> String {
1811    if s.len() <= max_len {
1812        s.to_string()
1813    } else {
1814        format!("{}...", &s[..max_len.saturating_sub(3)])
1815    }
1816}