memvid_cli/commands/
enrich.rs

1//! Enrichment command handler for extracting memory cards from frames.
2//!
3//! The `enrich` command runs enrichment engines over MV2 frames to extract
4//! structured memory cards (facts, preferences, events, etc.).
5
6use std::path::PathBuf;
7
8#[cfg(feature = "llama-cpp")]
9use anyhow::bail;
10use anyhow::Result;
11use clap::{Args, ValueEnum};
12use memvid_core::{EnrichmentEngine, Memvid, RulesEngine};
13use serde::Serialize;
14
15#[cfg(feature = "llama-cpp")]
16use crate::commands::{default_enrichment_model, get_installed_model_path, LlmModel};
17use crate::config::CliConfig;
18#[cfg(feature = "candle-llm")]
19use crate::enrich::CandlePhiEngine;
20#[cfg(feature = "llama-cpp")]
21use crate::enrich::LlmEngine;
22use crate::enrich::{ClaudeEngine, GeminiEngine, GroqEngine, MistralEngine, OpenAiEngine, XaiEngine};
23
24/// Engine type for enrichment
25#[derive(Debug, Clone, Copy, ValueEnum, Default)]
26pub enum EnrichEngine {
27    /// Rules-based extraction using regex patterns (fast, no models)
28    #[default]
29    Rules,
30    /// LLM-based extraction with Phi-3.5 Mini via llama.cpp (requires model installation)
31    #[cfg(feature = "llama-cpp")]
32    Llm,
33    /// Candle-based Phi-3 extraction (downloads from Hugging Face)
34    #[cfg(feature = "candle-llm")]
35    Candle,
36    /// OpenAI API-based extraction with GPT-4o-mini (requires OPENAI_API_KEY)
37    Openai,
38    /// Claude (Anthropic) API-based extraction with Claude 3.5 Haiku (requires ANTHROPIC_API_KEY)
39    Claude,
40    /// Gemini (Google) API-based extraction with Gemini 2.0 Flash (requires GOOGLE_API_KEY or GEMINI_API_KEY)
41    Gemini,
42    /// xAI API-based extraction with Grok-2 (requires XAI_API_KEY)
43    Xai,
44    /// Groq API-based extraction with Llama 3.3 70B (requires GROQ_API_KEY)
45    Groq,
46    /// Mistral API-based extraction with Mistral Large (requires MISTRAL_API_KEY)
47    Mistral,
48}
49
50/// Arguments for the `enrich` subcommand
51#[derive(Args)]
52pub struct EnrichArgs {
53    /// Path to the `.mv2` file
54    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
55    pub file: PathBuf,
56
57    /// Enrichment engine to use
58    #[arg(long, value_enum, default_value_t = EnrichEngine::Rules)]
59    pub engine: EnrichEngine,
60
61    /// Only process frames that haven't been enriched yet (default)
62    #[arg(long, default_value_t = true)]
63    pub incremental: bool,
64
65    /// Re-enrich all frames, ignoring previous enrichment records
66    #[arg(long, conflicts_with = "incremental")]
67    pub force: bool,
68
69    /// Output results as JSON
70    #[arg(long)]
71    pub json: bool,
72
73    /// Show extracted memory cards
74    #[arg(long)]
75    pub verbose: bool,
76
77    /// Number of parallel workers for API calls (default: 20)
78    #[arg(long, default_value_t = 20)]
79    pub workers: usize,
80
81    /// Number of frames to batch per API call (default: 10)
82    #[arg(long, default_value_t = 10)]
83    pub batch_size: usize,
84}
85
86/// Result of enrichment for JSON output
87#[derive(Debug, Serialize)]
88pub struct EnrichResult {
89    pub engine: String,
90    pub version: String,
91    pub frames_processed: usize,
92    pub cards_extracted: usize,
93    pub total_cards: usize,
94    pub total_entities: usize,
95}
96
97/// Handle the `enrich` command
98#[allow(unused_variables)]
99pub fn handle_enrich(config: &CliConfig, args: EnrichArgs) -> Result<()> {
100    let mut mem = Memvid::open(&args.file)?;
101
102    // Get initial stats
103    let initial_stats = mem.memories_stats();
104
105    // If force mode, clear existing memories first
106    if args.force {
107        mem.clear_memories();
108    }
109
110    // Run the selected engine
111    let (engine_kind, engine_version, frames, cards) = match args.engine {
112        EnrichEngine::Rules => {
113            let engine = RulesEngine::new();
114            let kind = engine.kind().to_string();
115            let version = engine.version().to_string();
116            let (frames, cards) = mem.run_enrichment(&engine)?;
117            (kind, version, frames, cards)
118        }
119        #[cfg(feature = "llama-cpp")]
120        EnrichEngine::Llm => {
121            // Check if model is installed
122            let model = default_enrichment_model();
123            let model_path = match get_installed_model_path(config, model) {
124                Some(path) => path,
125                None => {
126                    bail!(
127                        "LLM model not installed. Run `memvid models install {}` first.",
128                        match model {
129                            LlmModel::Phi35Mini => "phi-3.5-mini",
130                            LlmModel::Phi35MiniQ8 => "phi-3.5-mini-q8",
131                        }
132                    );
133                }
134            };
135
136            // Create and initialize the LLM engine
137            let mut engine = LlmEngine::new(model_path);
138            eprintln!("Loading LLM model...");
139            engine.init()?;
140
141            let kind = engine.kind().to_string();
142            let version = engine.version().to_string();
143            let (frames, cards) = mem.run_enrichment(&engine)?;
144            (kind, version, frames, cards)
145        }
146        #[cfg(feature = "candle-llm")]
147        EnrichEngine::Candle => {
148            // Create and initialize the Candle Phi-3 engine
149            // Uses Q4 quantized GGUF (~2.4GB) stored in ~/.memvid/models/llm/phi-3-mini-q4/
150            eprintln!("Loading Phi-3-mini Q4 model via Candle (first run downloads ~2.4GB to ~/.memvid/models/llm/)...");
151            let mut engine = CandlePhiEngine::from_memvid_models(config.models_dir.clone());
152            engine.init()?;
153
154            let kind = engine.kind().to_string();
155            let version = engine.version().to_string();
156            let (frames, cards) = mem.run_enrichment(&engine)?;
157            (kind, version, frames, cards)
158        }
159        EnrichEngine::Openai => {
160            // Create and initialize the OpenAI engine with parallel batch support
161            eprintln!("Using OpenAI GPT-4o-mini for enrichment (parallel mode, {} workers, batch size {})...", args.workers, args.batch_size);
162            let mut engine = OpenAiEngine::new().with_parallelism(args.workers).with_batch_size(args.batch_size);
163            engine.init()?;
164
165            let kind = engine.kind().to_string();
166            let version = engine.version().to_string();
167
168            // Use parallel batch processing for OpenAI
169            let (frames, cards) = run_openai_parallel(&mut mem, &engine, args.workers)?;
170            (kind, version, frames, cards)
171        }
172        EnrichEngine::Claude => {
173            // Create and initialize the Claude engine with parallel support
174            eprintln!("Using Claude Haiku 4.5 for enrichment (parallel mode, {} workers)...", args.workers);
175            let mut engine = ClaudeEngine::new().with_parallelism(args.workers);
176            engine.init()?;
177
178            let kind = engine.kind().to_string();
179            let version = engine.version().to_string();
180
181            // Use parallel batch processing for Claude
182            let (frames, cards) = run_claude_parallel(&mut mem, &engine, args.workers)?;
183            (kind, version, frames, cards)
184        }
185        EnrichEngine::Gemini => {
186            // Create and initialize the Gemini engine with parallel support
187            eprintln!("Using Gemini 2.5 Flash for enrichment (parallel mode, {} workers)...", args.workers);
188            let mut engine = GeminiEngine::new().with_parallelism(args.workers);
189            engine.init()?;
190
191            let kind = engine.kind().to_string();
192            let version = engine.version().to_string();
193
194            // Use parallel batch processing for Gemini
195            let (frames, cards) = run_gemini_parallel(&mut mem, &engine, args.workers)?;
196            (kind, version, frames, cards)
197        }
198        EnrichEngine::Xai => {
199            // Create and initialize the xAI engine with parallel support
200            eprintln!("Using xAI Grok 4 Fast for enrichment (parallel mode, {} workers)...", args.workers);
201            let mut engine = XaiEngine::new().with_parallelism(args.workers);
202            engine.init()?;
203
204            let kind = engine.kind().to_string();
205            let version = engine.version().to_string();
206
207            // Use parallel batch processing for xAI
208            let (frames, cards) = run_xai_parallel(&mut mem, &engine, args.workers)?;
209            (kind, version, frames, cards)
210        }
211        EnrichEngine::Groq => {
212            // Create and initialize the Groq engine with parallel support
213            eprintln!("Using Groq Llama 3.3 70B for enrichment (parallel mode, {} workers)...", args.workers);
214            let mut engine = GroqEngine::new().with_parallelism(args.workers);
215            engine.init()?;
216
217            let kind = engine.kind().to_string();
218            let version = engine.version().to_string();
219
220            // Use parallel batch processing for Groq
221            let (frames, cards) = run_groq_parallel(&mut mem, &engine, args.workers)?;
222            (kind, version, frames, cards)
223        }
224        EnrichEngine::Mistral => {
225            // Create and initialize the Mistral engine with parallel support
226            eprintln!("Using Mistral Large for enrichment (parallel mode, {} workers)...", args.workers);
227            let mut engine = MistralEngine::new().with_parallelism(args.workers);
228            engine.init()?;
229
230            let kind = engine.kind().to_string();
231            let version = engine.version().to_string();
232
233            // Use parallel batch processing for Mistral
234            let (frames, cards) = run_mistral_parallel(&mut mem, &engine, args.workers)?;
235            (kind, version, frames, cards)
236        }
237    };
238
239    // Commit changes
240    mem.commit()?;
241
242    // Get final stats
243    let final_stats = mem.memories_stats();
244
245    if args.json {
246        let result = EnrichResult {
247            engine: engine_kind,
248            version: engine_version,
249            frames_processed: frames,
250            cards_extracted: cards,
251            total_cards: final_stats.card_count,
252            total_entities: final_stats.entity_count,
253        };
254        println!("{}", serde_json::to_string_pretty(&result)?);
255    } else {
256        println!("Enrichment complete:");
257        println!("  Engine: {} v{}", engine_kind, engine_version);
258        println!("  Frames processed: {}", frames);
259        println!("  Cards extracted: {}", cards);
260        println!(
261            "  Total cards: {} (+{})",
262            final_stats.card_count,
263            final_stats
264                .card_count
265                .saturating_sub(initial_stats.card_count)
266        );
267        println!("  Entities: {}", final_stats.entity_count);
268
269        if args.verbose && cards > 0 {
270            println!("\nExtracted memory cards:");
271            for entity in mem.memory_entities() {
272                println!("  {}:", entity);
273                for card in mem.get_entity_memories(&entity) {
274                    println!("    - {}: {} = \"{}\"", card.kind, card.slot, card.value);
275                }
276            }
277        }
278    }
279
280    Ok(())
281}
282
283/// Run OpenAI enrichment with parallel batch processing.
284///
285/// This gathers all unenriched frames, sends them to OpenAI in parallel,
286/// and stores the resulting memory cards.
287fn run_openai_parallel(
288    mem: &mut memvid_core::Memvid,
289    engine: &OpenAiEngine,
290    workers: usize,
291) -> Result<(usize, usize)> {
292    use memvid_core::enrich::EnrichmentContext;
293    use memvid_core::EnrichmentEngine;
294
295    let kind = engine.kind();
296    let version = engine.version();
297
298    // Get all unenriched frames
299    let unenriched = mem.get_unenriched_frames(kind, version);
300    let total_frames = unenriched.len();
301
302    if total_frames == 0 {
303        eprintln!("No unenriched frames found.");
304        return Ok((0, 0));
305    }
306
307    eprintln!(
308        "Gathering {} frames for parallel enrichment...",
309        total_frames
310    );
311
312    // Build enrichment contexts for all frames
313    let mut contexts = Vec::with_capacity(total_frames);
314    for frame_id in &unenriched {
315        let frame = match mem.frame_by_id(*frame_id) {
316            Ok(f) => f,
317            Err(_) => continue,
318        };
319
320        // Get full frame content (not truncated preview)
321        let text = match mem.frame_text_by_id(*frame_id) {
322            Ok(t) => t,
323            Err(_) => continue,
324        };
325
326        let uri = frame
327            .uri
328            .clone()
329            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
330        let metadata_json = frame
331            .metadata
332            .as_ref()
333            .and_then(|m| serde_json::to_string(m).ok());
334
335        let ctx = EnrichmentContext::new(
336            *frame_id,
337            uri,
338            text,
339            frame.title.clone(),
340            frame.timestamp,
341            metadata_json,
342        );
343
344        contexts.push(ctx);
345    }
346
347    eprintln!(
348        "Starting parallel enrichment of {} frames with {} workers...",
349        contexts.len(),
350        workers
351    );
352
353    // Run parallel batch enrichment
354    let results = engine.enrich_batch(contexts)?;
355
356    // Store results back to MV2
357    let mut total_cards = 0;
358    for (frame_id, cards) in results {
359        let card_count = cards.len();
360
361        // Store cards
362        let card_ids = if !cards.is_empty() {
363            mem.put_memory_cards(cards)?
364        } else {
365            Vec::new()
366        };
367
368        // Record enrichment
369        mem.record_enrichment(frame_id, kind, version, card_ids)?;
370
371        total_cards += card_count;
372    }
373
374    Ok((total_frames, total_cards))
375}
376
377/// Run Claude enrichment with parallel batch processing.
378fn run_claude_parallel(
379    mem: &mut memvid_core::Memvid,
380    engine: &ClaudeEngine,
381    workers: usize,
382) -> Result<(usize, usize)> {
383    use memvid_core::enrich::EnrichmentContext;
384    use memvid_core::EnrichmentEngine;
385
386    let kind = engine.kind();
387    let version = engine.version();
388
389    let unenriched = mem.get_unenriched_frames(kind, version);
390    let total_frames = unenriched.len();
391
392    if total_frames == 0 {
393        eprintln!("No unenriched frames found.");
394        return Ok((0, 0));
395    }
396
397    eprintln!(
398        "Gathering {} frames for parallel enrichment...",
399        total_frames
400    );
401
402    let mut contexts = Vec::with_capacity(total_frames);
403    for frame_id in &unenriched {
404        let frame = match mem.frame_by_id(*frame_id) {
405            Ok(f) => f,
406            Err(_) => continue,
407        };
408
409        let text = match mem.frame_text_by_id(*frame_id) {
410            Ok(t) => t,
411            Err(_) => continue,
412        };
413
414        let uri = frame
415            .uri
416            .clone()
417            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
418        let metadata_json = frame
419            .metadata
420            .as_ref()
421            .and_then(|m| serde_json::to_string(m).ok());
422
423        let ctx = EnrichmentContext::new(
424            *frame_id,
425            uri,
426            text,
427            frame.title.clone(),
428            frame.timestamp,
429            metadata_json,
430        );
431
432        contexts.push(ctx);
433    }
434
435    eprintln!(
436        "Starting parallel enrichment of {} frames with {} workers...",
437        contexts.len(),
438        workers
439    );
440
441    let results = engine.enrich_batch(contexts)?;
442
443    let mut total_cards = 0;
444    for (frame_id, cards) in results {
445        let card_count = cards.len();
446
447        let card_ids = if !cards.is_empty() {
448            mem.put_memory_cards(cards)?
449        } else {
450            Vec::new()
451        };
452
453        mem.record_enrichment(frame_id, kind, version, card_ids)?;
454
455        total_cards += card_count;
456    }
457
458    Ok((total_frames, total_cards))
459}
460
461/// Run Gemini enrichment with parallel batch processing.
462fn run_gemini_parallel(
463    mem: &mut memvid_core::Memvid,
464    engine: &GeminiEngine,
465    workers: usize,
466) -> Result<(usize, usize)> {
467    use memvid_core::enrich::EnrichmentContext;
468    use memvid_core::EnrichmentEngine;
469
470    let kind = engine.kind();
471    let version = engine.version();
472
473    let unenriched = mem.get_unenriched_frames(kind, version);
474    let total_frames = unenriched.len();
475
476    if total_frames == 0 {
477        eprintln!("No unenriched frames found.");
478        return Ok((0, 0));
479    }
480
481    eprintln!(
482        "Gathering {} frames for parallel enrichment...",
483        total_frames
484    );
485
486    let mut contexts = Vec::with_capacity(total_frames);
487    for frame_id in &unenriched {
488        let frame = match mem.frame_by_id(*frame_id) {
489            Ok(f) => f,
490            Err(_) => continue,
491        };
492
493        let text = match mem.frame_text_by_id(*frame_id) {
494            Ok(t) => t,
495            Err(_) => continue,
496        };
497
498        let uri = frame
499            .uri
500            .clone()
501            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
502        let metadata_json = frame
503            .metadata
504            .as_ref()
505            .and_then(|m| serde_json::to_string(m).ok());
506
507        let ctx = EnrichmentContext::new(
508            *frame_id,
509            uri,
510            text,
511            frame.title.clone(),
512            frame.timestamp,
513            metadata_json,
514        );
515
516        contexts.push(ctx);
517    }
518
519    eprintln!(
520        "Starting parallel enrichment of {} frames with {} workers...",
521        contexts.len(),
522        workers
523    );
524
525    let results = engine.enrich_batch(contexts)?;
526
527    let mut total_cards = 0;
528    for (frame_id, cards) in results {
529        let card_count = cards.len();
530
531        let card_ids = if !cards.is_empty() {
532            mem.put_memory_cards(cards)?
533        } else {
534            Vec::new()
535        };
536
537        mem.record_enrichment(frame_id, kind, version, card_ids)?;
538
539        total_cards += card_count;
540    }
541
542    Ok((total_frames, total_cards))
543}
544
545/// Run xAI enrichment with parallel batch processing.
546fn run_xai_parallel(
547    mem: &mut memvid_core::Memvid,
548    engine: &XaiEngine,
549    workers: usize,
550) -> Result<(usize, usize)> {
551    use memvid_core::enrich::EnrichmentContext;
552    use memvid_core::EnrichmentEngine;
553
554    let kind = engine.kind();
555    let version = engine.version();
556
557    let unenriched = mem.get_unenriched_frames(kind, version);
558    let total_frames = unenriched.len();
559
560    if total_frames == 0 {
561        eprintln!("No unenriched frames found.");
562        return Ok((0, 0));
563    }
564
565    eprintln!(
566        "Gathering {} frames for parallel enrichment...",
567        total_frames
568    );
569
570    let mut contexts = Vec::with_capacity(total_frames);
571    for frame_id in &unenriched {
572        let frame = match mem.frame_by_id(*frame_id) {
573            Ok(f) => f,
574            Err(_) => continue,
575        };
576
577        let text = match mem.frame_text_by_id(*frame_id) {
578            Ok(t) => t,
579            Err(_) => continue,
580        };
581
582        let uri = frame
583            .uri
584            .clone()
585            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
586        let metadata_json = frame
587            .metadata
588            .as_ref()
589            .and_then(|m| serde_json::to_string(m).ok());
590
591        let ctx = EnrichmentContext::new(
592            *frame_id,
593            uri,
594            text,
595            frame.title.clone(),
596            frame.timestamp,
597            metadata_json,
598        );
599
600        contexts.push(ctx);
601    }
602
603    eprintln!(
604        "Starting parallel enrichment of {} frames with {} workers...",
605        contexts.len(),
606        workers
607    );
608
609    let results = engine.enrich_batch(contexts)?;
610
611    let mut total_cards = 0;
612    for (frame_id, cards) in results {
613        let card_count = cards.len();
614
615        let card_ids = if !cards.is_empty() {
616            mem.put_memory_cards(cards)?
617        } else {
618            Vec::new()
619        };
620
621        mem.record_enrichment(frame_id, kind, version, card_ids)?;
622
623        total_cards += card_count;
624    }
625
626    Ok((total_frames, total_cards))
627}
628
629/// Run Groq enrichment with parallel batch processing.
630fn run_groq_parallel(
631    mem: &mut memvid_core::Memvid,
632    engine: &GroqEngine,
633    workers: usize,
634) -> Result<(usize, usize)> {
635    use memvid_core::enrich::EnrichmentContext;
636    use memvid_core::EnrichmentEngine;
637
638    let kind = engine.kind();
639    let version = engine.version();
640
641    let unenriched = mem.get_unenriched_frames(kind, version);
642    let total_frames = unenriched.len();
643
644    if total_frames == 0 {
645        eprintln!("No unenriched frames found.");
646        return Ok((0, 0));
647    }
648
649    eprintln!(
650        "Gathering {} frames for parallel enrichment...",
651        total_frames
652    );
653
654    let mut contexts = Vec::with_capacity(total_frames);
655    for frame_id in &unenriched {
656        let frame = match mem.frame_by_id(*frame_id) {
657            Ok(f) => f,
658            Err(_) => continue,
659        };
660
661        let text = match mem.frame_text_by_id(*frame_id) {
662            Ok(t) => t,
663            Err(_) => continue,
664        };
665
666        let uri = frame
667            .uri
668            .clone()
669            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
670        let metadata_json = frame
671            .metadata
672            .as_ref()
673            .and_then(|m| serde_json::to_string(m).ok());
674
675        let ctx = EnrichmentContext::new(
676            *frame_id,
677            uri,
678            text,
679            frame.title.clone(),
680            frame.timestamp,
681            metadata_json,
682        );
683
684        contexts.push(ctx);
685    }
686
687    eprintln!(
688        "Starting parallel enrichment of {} frames with {} workers...",
689        contexts.len(),
690        workers
691    );
692
693    let results = engine.enrich_batch(contexts)?;
694
695    let mut total_cards = 0;
696    for (frame_id, cards) in results {
697        let card_count = cards.len();
698
699        let card_ids = if !cards.is_empty() {
700            mem.put_memory_cards(cards)?
701        } else {
702            Vec::new()
703        };
704
705        mem.record_enrichment(frame_id, kind, version, card_ids)?;
706
707        total_cards += card_count;
708    }
709
710    Ok((total_frames, total_cards))
711}
712
713/// Run Mistral enrichment with parallel batch processing.
714fn run_mistral_parallel(
715    mem: &mut memvid_core::Memvid,
716    engine: &MistralEngine,
717    workers: usize,
718) -> Result<(usize, usize)> {
719    use memvid_core::enrich::EnrichmentContext;
720    use memvid_core::EnrichmentEngine;
721
722    let kind = engine.kind();
723    let version = engine.version();
724
725    let unenriched = mem.get_unenriched_frames(kind, version);
726    let total_frames = unenriched.len();
727
728    if total_frames == 0 {
729        eprintln!("No unenriched frames found.");
730        return Ok((0, 0));
731    }
732
733    eprintln!(
734        "Gathering {} frames for parallel enrichment...",
735        total_frames
736    );
737
738    let mut contexts = Vec::with_capacity(total_frames);
739    for frame_id in &unenriched {
740        let frame = match mem.frame_by_id(*frame_id) {
741            Ok(f) => f,
742            Err(_) => continue,
743        };
744
745        let text = match mem.frame_text_by_id(*frame_id) {
746            Ok(t) => t,
747            Err(_) => continue,
748        };
749
750        let uri = frame
751            .uri
752            .clone()
753            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
754        let metadata_json = frame
755            .metadata
756            .as_ref()
757            .and_then(|m| serde_json::to_string(m).ok());
758
759        let ctx = EnrichmentContext::new(
760            *frame_id,
761            uri,
762            text,
763            frame.title.clone(),
764            frame.timestamp,
765            metadata_json,
766        );
767
768        contexts.push(ctx);
769    }
770
771    eprintln!(
772        "Starting parallel enrichment of {} frames with {} workers...",
773        contexts.len(),
774        workers
775    );
776
777    let results = engine.enrich_batch(contexts)?;
778
779    let mut total_cards = 0;
780    for (frame_id, cards) in results {
781        let card_count = cards.len();
782
783        let card_ids = if !cards.is_empty() {
784            mem.put_memory_cards(cards)?
785        } else {
786            Vec::new()
787        };
788
789        mem.record_enrichment(frame_id, kind, version, card_ids)?;
790
791        total_cards += card_count;
792    }
793
794    Ok((total_frames, total_cards))
795}
796
797/// Handle the `memories` subcommand (view memory cards)
798#[derive(Args)]
799pub struct MemoriesArgs {
800    /// Path to the `.mv2` file
801    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
802    pub file: PathBuf,
803
804    /// Filter by entity
805    #[arg(long)]
806    pub entity: Option<String>,
807
808    /// Filter by slot
809    #[arg(long)]
810    pub slot: Option<String>,
811
812    /// Output as JSON
813    #[arg(long)]
814    pub json: bool,
815}
816
817/// Memory card output for JSON serialization
818#[derive(Debug, Serialize)]
819pub struct MemoryOutput {
820    pub id: u64,
821    pub kind: String,
822    pub entity: String,
823    pub slot: String,
824    pub value: String,
825    pub polarity: Option<String>,
826    pub document_date: Option<i64>,
827    pub source_frame_id: u64,
828}
829
830pub fn handle_memories(_config: &CliConfig, args: MemoriesArgs) -> Result<()> {
831    let mem = Memvid::open(&args.file)?;
832
833    let stats = mem.memories_stats();
834
835    if args.json {
836        let mut cards: Vec<MemoryOutput> = Vec::new();
837
838        if let Some(entity) = &args.entity {
839            if let Some(slot) = &args.slot {
840                // Specific entity:slot
841                if let Some(card) = mem.get_current_memory(entity, slot) {
842                    cards.push(card_to_output(card));
843                }
844            } else {
845                // All cards for entity
846                for card in mem.get_entity_memories(entity) {
847                    cards.push(card_to_output(card));
848                }
849            }
850        } else {
851            // All entities
852            for entity in mem.memory_entities() {
853                for card in mem.get_entity_memories(&entity) {
854                    cards.push(card_to_output(card));
855                }
856            }
857        }
858
859        println!("{}", serde_json::to_string_pretty(&cards)?);
860    } else {
861        println!(
862            "Memory cards: {} total, {} entities",
863            stats.card_count, stats.entity_count
864        );
865        println!();
866
867        if let Some(entity) = &args.entity {
868            if let Some(slot) = &args.slot {
869                // Specific entity:slot
870                if let Some(card) = mem.get_current_memory(entity, slot) {
871                    println!("{}:{} = \"{}\"", entity, slot, card.value);
872                } else {
873                    println!("No memory found for {}:{}", entity, slot);
874                }
875            } else {
876                // All cards for entity
877                println!("{}:", entity);
878                for card in mem.get_entity_memories(entity) {
879                    println!("  {}: {} = \"{}\"", card.kind, card.slot, card.value);
880                }
881            }
882        } else {
883            // All entities
884            for entity in mem.memory_entities() {
885                println!("{}:", entity);
886                for card in mem.get_entity_memories(&entity) {
887                    let polarity = card
888                        .polarity
889                        .as_ref()
890                        .map(|p| format!(" [{}]", p))
891                        .unwrap_or_default();
892                    println!(
893                        "  {}: {} = \"{}\"{}",
894                        card.kind, card.slot, card.value, polarity
895                    );
896                }
897                println!();
898            }
899        }
900    }
901
902    Ok(())
903}
904
905fn card_to_output(card: &memvid_core::MemoryCard) -> MemoryOutput {
906    MemoryOutput {
907        id: card.id,
908        kind: card.kind.to_string(),
909        entity: card.entity.clone(),
910        slot: card.slot.clone(),
911        value: card.value.clone(),
912        polarity: card.polarity.as_ref().map(|p| p.to_string()),
913        document_date: card.document_date,
914        source_frame_id: card.source_frame_id,
915    }
916}
917
918// ============================================================================
919// State Command - Entity-Centric Current State Queries
920// ============================================================================
921
922/// Handle the `state` subcommand (query current entity state)
923#[derive(Args)]
924pub struct StateArgs {
925    /// Path to the `.mv2` file
926    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
927    pub file: PathBuf,
928
929    /// Entity to query (required)
930    #[arg(long, short = 'e')]
931    pub entity: String,
932
933    /// Specific slot to query (optional, omit for full entity profile)
934    #[arg(long, short = 's')]
935    pub slot: Option<String>,
936
937    /// Query state at a specific point in time (Unix timestamp)
938    #[arg(long)]
939    pub at_time: Option<i64>,
940
941    /// Output as JSON
942    #[arg(long)]
943    pub json: bool,
944}
945
946/// State output for JSON serialization
947#[derive(Debug, Serialize)]
948pub struct StateOutput {
949    pub entity: String,
950    #[serde(skip_serializing_if = "Option::is_none")]
951    pub slot: Option<String>,
952    #[serde(skip_serializing_if = "Option::is_none")]
953    pub at_time: Option<i64>,
954    pub state: StateValue,
955}
956
957/// The state value - either a single slot or full profile
958#[derive(Debug, Serialize)]
959#[serde(untagged)]
960pub enum StateValue {
961    /// Single slot value
962    Single {
963        value: String,
964        kind: String,
965        polarity: Option<String>,
966        source_frame_id: u64,
967        document_date: Option<i64>,
968    },
969    /// Full entity profile
970    Profile(Vec<SlotState>),
971}
972
973#[derive(Debug, Serialize)]
974pub struct SlotState {
975    pub slot: String,
976    pub value: String,
977    pub kind: String,
978    pub polarity: Option<String>,
979    pub source_frame_id: u64,
980    pub document_date: Option<i64>,
981}
982
983pub fn handle_state(_config: &CliConfig, args: StateArgs) -> Result<()> {
984    let mem = Memvid::open(&args.file)?;
985
986    let entity = args.entity.to_lowercase(); // Normalize entity name
987
988    if let Some(slot) = &args.slot {
989        // O(1) lookup for specific entity:slot
990        let card = if let Some(ts) = args.at_time {
991            mem.get_memory_at_time(&entity, slot, ts)
992        } else {
993            mem.get_current_memory(&entity, slot)
994        };
995
996        if args.json {
997            if let Some(card) = card {
998                let output = StateOutput {
999                    entity: entity.clone(),
1000                    slot: Some(slot.clone()),
1001                    at_time: args.at_time,
1002                    state: StateValue::Single {
1003                        value: card.value.clone(),
1004                        kind: card.kind.to_string(),
1005                        polarity: card.polarity.as_ref().map(|p| p.to_string()),
1006                        source_frame_id: card.source_frame_id,
1007                        document_date: card.document_date,
1008                    },
1009                };
1010                println!("{}", serde_json::to_string_pretty(&output)?);
1011            } else {
1012                println!("null");
1013            }
1014        } else {
1015            if let Some(card) = card {
1016                let time_info = if let Some(ts) = args.at_time {
1017                    format!(" (at {})", format_timestamp(ts))
1018                } else {
1019                    String::new()
1020                };
1021                let polarity = card
1022                    .polarity
1023                    .as_ref()
1024                    .map(|p| format!(" [{}]", p))
1025                    .unwrap_or_default();
1026                println!(
1027                    "{}:{} = \"{}\"{}{}",
1028                    entity, slot, card.value, polarity, time_info
1029                );
1030                println!("  kind: {}", card.kind);
1031                println!("  source: frame {}", card.source_frame_id);
1032                if let Some(date) = card.document_date {
1033                    println!("  date: {}", format_timestamp(date));
1034                }
1035            } else {
1036                let time_info = if let Some(ts) = args.at_time {
1037                    format!(" at {}", format_timestamp(ts))
1038                } else {
1039                    String::new()
1040                };
1041                println!("No value for {}:{}{}", entity, slot, time_info);
1042            }
1043        }
1044    } else {
1045        // Get full entity profile
1046        let cards = mem.get_entity_memories(&entity);
1047
1048        if cards.is_empty() {
1049            if args.json {
1050                println!("null");
1051            } else {
1052                println!("No state found for entity: {}", entity);
1053            }
1054            return Ok(());
1055        }
1056
1057        // Group by slot and get current value for each
1058        let mut slots: std::collections::HashMap<String, &memvid_core::MemoryCard> =
1059            std::collections::HashMap::new();
1060
1061        for card in &cards {
1062            // Keep the most recent card for each slot
1063            let dominated = slots
1064                .get(&card.slot)
1065                .map(|existing| {
1066                    card.effective_timestamp() > existing.effective_timestamp()
1067                })
1068                .unwrap_or(true);
1069
1070            if dominated && !card.is_retracted() {
1071                slots.insert(card.slot.clone(), card);
1072            }
1073        }
1074
1075        if args.json {
1076            let mut profile: Vec<SlotState> = slots
1077                .values()
1078                .map(|card| SlotState {
1079                    slot: card.slot.clone(),
1080                    value: card.value.clone(),
1081                    kind: card.kind.to_string(),
1082                    polarity: card.polarity.as_ref().map(|p| p.to_string()),
1083                    source_frame_id: card.source_frame_id,
1084                    document_date: card.document_date,
1085                })
1086                .collect();
1087            profile.sort_by(|a, b| a.slot.cmp(&b.slot));
1088
1089            let output = StateOutput {
1090                entity: entity.clone(),
1091                slot: None,
1092                at_time: args.at_time,
1093                state: StateValue::Profile(profile),
1094            };
1095            println!("{}", serde_json::to_string_pretty(&output)?);
1096        } else {
1097            println!("{}:", entity);
1098            let mut sorted_slots: Vec<_> = slots.into_iter().collect();
1099            sorted_slots.sort_by(|a, b| a.0.cmp(&b.0));
1100
1101            for (slot, card) in sorted_slots {
1102                let polarity = card
1103                    .polarity
1104                    .as_ref()
1105                    .map(|p| format!(" [{}]", p))
1106                    .unwrap_or_default();
1107                println!("  {}: \"{}\"{}  ({})", slot, card.value, polarity, card.kind);
1108            }
1109        }
1110    }
1111
1112    Ok(())
1113}
1114
1115fn format_timestamp(ts: i64) -> String {
1116    use std::time::{Duration, UNIX_EPOCH};
1117    let datetime = UNIX_EPOCH + Duration::from_secs(ts as u64);
1118    let datetime: chrono::DateTime<chrono::Utc> = datetime.into();
1119    datetime.format("%Y-%m-%d %H:%M:%S UTC").to_string()
1120}
1121
1122/// Arguments for the `facts` (entity audit) command.
1123#[derive(Debug, Args)]
1124pub struct FactsArgs {
1125    /// Path to the `.mv2` file
1126    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1127    pub file: PathBuf,
1128
1129    /// Filter by entity (optional)
1130    #[arg(long, short = 'e')]
1131    pub entity: Option<String>,
1132
1133    /// Filter by predicate/slot (optional)
1134    #[arg(long, short = 'p')]
1135    pub predicate: Option<String>,
1136
1137    /// Filter by value (optional)
1138    #[arg(long, short = 'v')]
1139    pub value: Option<String>,
1140
1141    /// Show full history including superseded values
1142    #[arg(long)]
1143    pub history: bool,
1144
1145    /// Output as JSON
1146    #[arg(long)]
1147    pub json: bool,
1148}
1149
1150/// Audit log entry for JSON output.
1151#[derive(Debug, Serialize)]
1152pub struct AuditLogEntry {
1153    pub frame_id: u64,
1154    pub timestamp: Option<i64>,
1155    pub entity: String,
1156    pub slot: String,
1157    pub value: String,
1158    pub relation: String,
1159    pub kind: String,
1160    #[serde(skip_serializing_if = "Option::is_none")]
1161    pub polarity: Option<String>,
1162    #[serde(skip_serializing_if = "Option::is_none")]
1163    pub confidence: Option<f32>,
1164    pub engine: String,
1165    #[serde(skip_serializing_if = "Option::is_none")]
1166    pub supersedes: Option<u64>,
1167}
1168
1169/// Audit output for JSON serialization.
1170#[derive(Debug, Serialize)]
1171pub struct AuditLogOutput {
1172    pub total: usize,
1173    #[serde(skip_serializing_if = "Option::is_none")]
1174    pub entity_filter: Option<String>,
1175    #[serde(skip_serializing_if = "Option::is_none")]
1176    pub predicate_filter: Option<String>,
1177    #[serde(skip_serializing_if = "Option::is_none")]
1178    pub value_filter: Option<String>,
1179    pub entries: Vec<AuditLogEntry>,
1180}
1181
1182/// Format a Unix timestamp as ISO 8601 (without chrono).
1183fn format_audit_timestamp(ts: i64) -> String {
1184    use std::time::{Duration, UNIX_EPOCH};
1185
1186    let datetime = UNIX_EPOCH + Duration::from_secs(ts.unsigned_abs() as u64);
1187    let secs = datetime
1188        .duration_since(UNIX_EPOCH)
1189        .unwrap_or_default()
1190        .as_secs();
1191
1192    let days = secs / 86400;
1193    let remaining = secs % 86400;
1194    let hours = remaining / 3600;
1195    let minutes = (remaining % 3600) / 60;
1196    let seconds = remaining % 60;
1197
1198    let mut year = 1970i32;
1199    let mut remaining_days = days as i32;
1200
1201    loop {
1202        let days_in_year = if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
1203            366
1204        } else {
1205            365
1206        };
1207        if remaining_days < days_in_year {
1208            break;
1209        }
1210        remaining_days -= days_in_year;
1211        year += 1;
1212    }
1213
1214    let mut month = 1u32;
1215    let is_leap = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0);
1216    let days_in_months = if is_leap {
1217        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1218    } else {
1219        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1220    };
1221
1222    for days_in_month in days_in_months {
1223        if remaining_days < days_in_month {
1224            break;
1225        }
1226        remaining_days -= days_in_month;
1227        month += 1;
1228    }
1229
1230    let day = remaining_days + 1;
1231
1232    format!(
1233        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
1234        year, month, day, hours, minutes, seconds
1235    )
1236}
1237
1238pub fn handle_facts(_config: &CliConfig, args: FactsArgs) -> Result<()> {
1239    let mem = Memvid::open(&args.file)?;
1240
1241    // Collect all memory cards matching the filters
1242    let mut entries: Vec<AuditLogEntry> = Vec::new();
1243
1244    // Get entities to iterate
1245    let entities: Vec<String> = if let Some(entity) = &args.entity {
1246        vec![entity.to_lowercase()]
1247    } else {
1248        mem.memory_entities()
1249    };
1250
1251    for entity in entities {
1252        let cards = mem.get_entity_memories(&entity);
1253
1254        for card in cards {
1255            // Filter by predicate
1256            if let Some(pred) = &args.predicate {
1257                if !card.slot.eq_ignore_ascii_case(pred) {
1258                    continue;
1259                }
1260            }
1261
1262            // Filter by value
1263            if let Some(val) = &args.value {
1264                if !card.value.to_lowercase().contains(&val.to_lowercase()) {
1265                    continue;
1266                }
1267            }
1268
1269            entries.push(AuditLogEntry {
1270                frame_id: card.source_frame_id,
1271                timestamp: card.document_date.or(Some(card.created_at)),
1272                entity: card.entity.clone(),
1273                slot: card.slot.clone(),
1274                value: card.value.clone(),
1275                relation: card.version_relation.as_str().to_string(),
1276                kind: card.kind.to_string(),
1277                polarity: card.polarity.as_ref().map(|p| p.to_string()),
1278                confidence: card.confidence,
1279                engine: card.engine.clone(),
1280                supersedes: None, // TODO: track superseded frame IDs
1281            });
1282        }
1283    }
1284
1285    // Sort by timestamp (oldest first for audit trail)
1286    entries.sort_by(|a, b| {
1287        let ts_a = a.timestamp.unwrap_or(0);
1288        let ts_b = b.timestamp.unwrap_or(0);
1289        ts_a.cmp(&ts_b)
1290    });
1291
1292    if args.json {
1293        let output = AuditLogOutput {
1294            total: entries.len(),
1295            entity_filter: args.entity.clone(),
1296            predicate_filter: args.predicate.clone(),
1297            value_filter: args.value.clone(),
1298            entries,
1299        };
1300        println!("{}", serde_json::to_string_pretty(&output)?);
1301    } else {
1302        if entries.is_empty() {
1303            println!("No matching facts found.");
1304            return Ok(());
1305        }
1306
1307        println!("Audit Trail ({} entries):", entries.len());
1308        println!();
1309
1310        for entry in entries {
1311            let ts_str = entry
1312                .timestamp
1313                .map(format_audit_timestamp)
1314                .unwrap_or_else(|| "unknown".to_string());
1315
1316            let polarity_suffix = entry
1317                .polarity
1318                .as_ref()
1319                .map(|p| format!(" [{}]", p))
1320                .unwrap_or_default();
1321
1322            let conf = entry
1323                .confidence
1324                .map(|c| format!(" (conf: {:.2})", c))
1325                .unwrap_or_default();
1326
1327            let _polarity_prefix = if entry.polarity.is_some() {
1328                if entry.polarity.as_deref() == Some("negative") {
1329                    "-"
1330                } else if entry.polarity.as_deref() == Some("positive") {
1331                    "+"
1332                } else {
1333                    ""
1334                }
1335            } else {
1336                ""
1337            };
1338
1339            println!(
1340                "Frame {} ({}): {} {}:{}=\"{}\"{}  [{}]{}",
1341                entry.frame_id,
1342                ts_str,
1343                entry.relation.to_uppercase(),
1344                entry.entity,
1345                entry.slot,
1346                entry.value,
1347                polarity_suffix,
1348                entry.engine,
1349                conf,
1350            );
1351        }
1352    }
1353
1354    Ok(())
1355}
1356
1357// ============================================================================
1358// Export Command - Export facts to standard formats (N-Triples, JSON, CSV)
1359// ============================================================================
1360
1361/// Export format for the `export` command.
1362#[derive(Debug, Clone, Copy, ValueEnum, Default)]
1363pub enum ExportFormat {
1364    /// N-Triples RDF format (.nt)
1365    #[default]
1366    Ntriples,
1367    /// JSON format with full metadata
1368    Json,
1369    /// CSV format for spreadsheet import
1370    Csv,
1371}
1372
1373/// Arguments for the `export` subcommand.
1374#[derive(Debug, Args)]
1375pub struct ExportArgs {
1376    /// Path to the `.mv2` file
1377    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1378    pub file: PathBuf,
1379
1380    /// Export format
1381    #[arg(long, short = 'f', value_enum, default_value_t = ExportFormat::Ntriples)]
1382    pub format: ExportFormat,
1383
1384    /// Filter by entity (optional)
1385    #[arg(long, short = 'e')]
1386    pub entity: Option<String>,
1387
1388    /// Filter by predicate/slot (optional)
1389    #[arg(long, short = 'p')]
1390    pub predicate: Option<String>,
1391
1392    /// Base URI for N-Triples output (default: mv2://entity/)
1393    #[arg(long, default_value = "mv2://entity/")]
1394    pub base_uri: String,
1395
1396    /// Include provenance metadata in output
1397    #[arg(long)]
1398    pub with_provenance: bool,
1399}
1400
1401/// Export entry for JSON output.
1402#[derive(Debug, Serialize)]
1403pub struct ExportEntry {
1404    pub subject: String,
1405    pub predicate: String,
1406    pub object: String,
1407    #[serde(skip_serializing_if = "Option::is_none")]
1408    pub source_frame_id: Option<u64>,
1409    #[serde(skip_serializing_if = "Option::is_none")]
1410    pub timestamp: Option<i64>,
1411    #[serde(skip_serializing_if = "Option::is_none")]
1412    pub engine: Option<String>,
1413    #[serde(skip_serializing_if = "Option::is_none")]
1414    pub confidence: Option<f32>,
1415}
1416
1417/// Escape a string for N-Triples format.
1418fn escape_ntriples(s: &str) -> String {
1419    let mut result = String::with_capacity(s.len());
1420    for c in s.chars() {
1421        match c {
1422            '\\' => result.push_str("\\\\"),
1423            '"' => result.push_str("\\\""),
1424            '\n' => result.push_str("\\n"),
1425            '\r' => result.push_str("\\r"),
1426            '\t' => result.push_str("\\t"),
1427            _ => result.push(c),
1428        }
1429    }
1430    result
1431}
1432
1433/// Escape a string for CSV format.
1434fn escape_csv(s: &str) -> String {
1435    if s.contains(',') || s.contains('"') || s.contains('\n') {
1436        format!("\"{}\"", s.replace('"', "\"\""))
1437    } else {
1438        s.to_string()
1439    }
1440}
1441
1442/// Normalize an entity name into a valid URI component.
1443fn normalize_uri_component(s: &str) -> String {
1444    s.replace(' ', "_")
1445        .replace('/', "_")
1446        .replace(':', "_")
1447        .replace('#', "_")
1448        .replace('?', "_")
1449        .replace('&', "_")
1450}
1451
1452pub fn handle_export(_config: &CliConfig, args: ExportArgs) -> Result<()> {
1453    let mem = Memvid::open(&args.file)?;
1454
1455    // Collect all memory cards matching the filters
1456    let entities: Vec<String> = if let Some(entity) = &args.entity {
1457        vec![entity.to_lowercase()]
1458    } else {
1459        mem.memory_entities()
1460    };
1461
1462    // Build list of triplets
1463    let mut triplets: Vec<ExportEntry> = Vec::new();
1464
1465    for entity in entities {
1466        let cards = mem.get_entity_memories(&entity);
1467
1468        for card in cards {
1469            // Filter by predicate
1470            if let Some(pred) = &args.predicate {
1471                if !card.slot.eq_ignore_ascii_case(pred) {
1472                    continue;
1473                }
1474            }
1475
1476            // Skip retracted cards
1477            if card.is_retracted() {
1478                continue;
1479            }
1480
1481            triplets.push(ExportEntry {
1482                subject: card.entity.clone(),
1483                predicate: card.slot.clone(),
1484                object: card.value.clone(),
1485                source_frame_id: if args.with_provenance {
1486                    Some(card.source_frame_id)
1487                } else {
1488                    None
1489                },
1490                timestamp: if args.with_provenance {
1491                    card.document_date.or(Some(card.created_at))
1492                } else {
1493                    None
1494                },
1495                engine: if args.with_provenance {
1496                    Some(card.engine.clone())
1497                } else {
1498                    None
1499                },
1500                confidence: if args.with_provenance {
1501                    card.confidence
1502                } else {
1503                    None
1504                },
1505            });
1506        }
1507    }
1508
1509    match args.format {
1510        ExportFormat::Ntriples => {
1511            // Output N-Triples format
1512            // Format: <subject> <predicate> "object" .
1513            for t in &triplets {
1514                let subject_uri = format!(
1515                    "<{}{}>",
1516                    args.base_uri,
1517                    normalize_uri_component(&t.subject)
1518                );
1519                let predicate_uri = format!(
1520                    "<{}pred/{}>",
1521                    args.base_uri,
1522                    normalize_uri_component(&t.predicate)
1523                );
1524                let object_literal = format!("\"{}\"", escape_ntriples(&t.object));
1525
1526                println!("{} {} {} .", subject_uri, predicate_uri, object_literal);
1527            }
1528        }
1529        ExportFormat::Json => {
1530            // Output JSON format
1531            println!("{}", serde_json::to_string_pretty(&triplets)?);
1532        }
1533        ExportFormat::Csv => {
1534            // Output CSV format
1535            if args.with_provenance {
1536                println!("subject,predicate,object,source_frame_id,timestamp,engine,confidence");
1537            } else {
1538                println!("subject,predicate,object");
1539            }
1540
1541            for t in &triplets {
1542                if args.with_provenance {
1543                    println!(
1544                        "{},{},{},{},{},{},{}",
1545                        escape_csv(&t.subject),
1546                        escape_csv(&t.predicate),
1547                        escape_csv(&t.object),
1548                        t.source_frame_id.map(|id| id.to_string()).unwrap_or_default(),
1549                        t.timestamp.map(|ts| ts.to_string()).unwrap_or_default(),
1550                        t.engine.as_deref().map(escape_csv).unwrap_or_default(),
1551                        t.confidence.map(|c| format!("{:.2}", c)).unwrap_or_default(),
1552                    );
1553                } else {
1554                    println!(
1555                        "{},{},{}",
1556                        escape_csv(&t.subject),
1557                        escape_csv(&t.predicate),
1558                        escape_csv(&t.object),
1559                    );
1560                }
1561            }
1562        }
1563    }
1564
1565    // Output count to stderr so it doesn't pollute stdout piping
1566    eprintln!("Exported {} triplets", triplets.len());
1567
1568    Ok(())
1569}
1570
1571// ============================================================================
1572// Schema Command - Infer and manage predicate schemas
1573// ============================================================================
1574
1575/// Arguments for the `schema` subcommand.
1576#[derive(Debug, Args)]
1577pub struct SchemaArgs {
1578    #[command(subcommand)]
1579    pub command: SchemaCommand,
1580}
1581
1582/// Schema subcommands.
1583#[derive(Debug, clap::Subcommand)]
1584pub enum SchemaCommand {
1585    /// Infer schemas from existing memory cards
1586    Infer(SchemaInferArgs),
1587    /// List registered schemas (built-in + custom)
1588    List(SchemaListArgs),
1589}
1590
1591/// Arguments for `schema infer`.
1592#[derive(Debug, Args)]
1593pub struct SchemaInferArgs {
1594    /// Path to the `.mv2` file
1595    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1596    pub file: PathBuf,
1597
1598    /// Register inferred schemas to the memory file
1599    #[arg(long)]
1600    pub register: bool,
1601
1602    /// Overwrite existing schemas when registering
1603    #[arg(long, requires = "register")]
1604    pub overwrite: bool,
1605
1606    /// Output as JSON
1607    #[arg(long)]
1608    pub json: bool,
1609}
1610
1611/// Arguments for `schema list`.
1612#[derive(Debug, Args)]
1613pub struct SchemaListArgs {
1614    /// Path to the `.mv2` file (optional, shows built-in schemas if omitted)
1615    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1616    pub file: Option<PathBuf>,
1617
1618    /// Output as JSON
1619    #[arg(long)]
1620    pub json: bool,
1621
1622    /// Show only built-in schemas
1623    #[arg(long)]
1624    pub builtin_only: bool,
1625}
1626
1627/// Schema list entry for JSON output.
1628#[derive(Debug, Serialize)]
1629pub struct SchemaListEntry {
1630    pub id: String,
1631    pub name: String,
1632    #[serde(skip_serializing_if = "Option::is_none")]
1633    pub description: Option<String>,
1634    pub value_type: String,
1635    pub cardinality: String,
1636    pub domain: Vec<String>,
1637    pub builtin: bool,
1638    #[serde(skip_serializing_if = "Option::is_none")]
1639    pub inverse: Option<String>,
1640}
1641
1642pub fn handle_schema(_config: &CliConfig, args: SchemaArgs) -> Result<()> {
1643    match args.command {
1644        SchemaCommand::Infer(infer_args) => handle_schema_infer(_config, infer_args),
1645        SchemaCommand::List(list_args) => handle_schema_list(_config, list_args),
1646    }
1647}
1648
1649fn handle_schema_infer(_config: &CliConfig, args: SchemaInferArgs) -> Result<()> {
1650    let mut mem = Memvid::open(&args.file)?;
1651
1652    // Get schema summary (which includes inference)
1653    let summary = mem.schema_summary();
1654
1655    if summary.is_empty() {
1656        if args.json {
1657            println!("[]");
1658        } else {
1659            println!("No predicates found in memory.");
1660        }
1661        return Ok(());
1662    }
1663
1664    if args.register {
1665        let count = mem.register_inferred_schemas(args.overwrite);
1666        mem.commit()?;
1667        eprintln!("Registered {} inferred schemas", count);
1668    }
1669
1670    if args.json {
1671        println!("{}", serde_json::to_string_pretty(&summary)?);
1672    } else {
1673        println!("Inferred Schemas ({} predicates):", summary.len());
1674        println!();
1675        println!(
1676            "{:<20} {:<12} {:<10} {:<8} {:<8} {:<8} {}",
1677            "PREDICATE", "TYPE", "CARDINAL", "ENTITIES", "VALUES", "UNIQUE", "BUILTIN"
1678        );
1679        println!("{}", "-".repeat(80));
1680
1681        for entry in &summary {
1682            let cardinality = match entry.cardinality {
1683                memvid_core::Cardinality::Single => "single",
1684                memvid_core::Cardinality::Multiple => "multiple",
1685            };
1686            let builtin = if entry.is_builtin { "yes" } else { "-" };
1687
1688            println!(
1689                "{:<20} {:<12} {:<10} {:<8} {:<8} {:<8} {}",
1690                truncate(&entry.predicate, 20),
1691                truncate(&entry.inferred_type, 12),
1692                cardinality,
1693                entry.entity_count,
1694                entry.value_count,
1695                entry.unique_values,
1696                builtin
1697            );
1698        }
1699    }
1700
1701    Ok(())
1702}
1703
1704fn handle_schema_list(_config: &CliConfig, args: SchemaListArgs) -> Result<()> {
1705    // If file is provided, open it to get custom schemas; otherwise use default registry
1706    let registry = if let Some(ref path) = args.file {
1707        let mem = Memvid::open(path)?;
1708        mem.schema_registry().clone()
1709    } else {
1710        memvid_core::SchemaRegistry::new()
1711    };
1712
1713    let mut entries: Vec<SchemaListEntry> = registry
1714        .all()
1715        .filter(|s| !args.builtin_only || s.builtin)
1716        .map(|schema| SchemaListEntry {
1717            id: schema.id.clone(),
1718            name: schema.name.clone(),
1719            description: schema.description.clone(),
1720            value_type: schema.range.description(),
1721            cardinality: match schema.cardinality {
1722                memvid_core::Cardinality::Single => "single".to_string(),
1723                memvid_core::Cardinality::Multiple => "multiple".to_string(),
1724            },
1725            domain: schema.domain.iter().map(|k| k.as_str().to_string()).collect(),
1726            builtin: schema.builtin,
1727            inverse: schema.inverse.clone(),
1728        })
1729        .collect();
1730
1731    entries.sort_by(|a, b| a.id.cmp(&b.id));
1732
1733    if entries.is_empty() {
1734        if args.json {
1735            println!("[]");
1736        } else {
1737            println!("No schemas found.");
1738        }
1739        return Ok(());
1740    }
1741
1742    if args.json {
1743        println!("{}", serde_json::to_string_pretty(&entries)?);
1744    } else {
1745        let title = if args.builtin_only {
1746            "Built-in Schemas"
1747        } else {
1748            "Registered Schemas"
1749        };
1750        println!("{} ({} total):", title, entries.len());
1751        println!();
1752        println!(
1753            "{:<20} {:<15} {:<12} {:<10} {}",
1754            "ID", "NAME", "TYPE", "CARDINAL", "DOMAIN"
1755        );
1756        println!("{}", "-".repeat(70));
1757
1758        for entry in &entries {
1759            let domain = if entry.domain.is_empty() {
1760                "*".to_string()
1761            } else {
1762                entry.domain.join(", ")
1763            };
1764            let cardinality = if entry.cardinality == "multiple" {
1765                "multiple"
1766            } else {
1767                "single"
1768            };
1769
1770            println!(
1771                "{:<20} {:<15} {:<12} {:<10} {}",
1772                truncate(&entry.id, 20),
1773                truncate(&entry.name, 15),
1774                truncate(&entry.value_type, 12),
1775                cardinality,
1776                truncate(&domain, 20)
1777            );
1778        }
1779    }
1780
1781    Ok(())
1782}
1783
1784/// Truncate a string to max length, adding "..." if needed.
1785fn truncate(s: &str, max_len: usize) -> String {
1786    if s.len() <= max_len {
1787        s.to_string()
1788    } else {
1789        format!("{}...", &s[..max_len.saturating_sub(3)])
1790    }
1791}