memvid_cli/commands/
enrich.rs

1//! Enrichment command handler for extracting memory cards from frames.
2//!
3//! The `enrich` command runs enrichment engines over MV2 frames to extract
4//! structured memory cards (facts, preferences, events, etc.).
5
6use std::path::PathBuf;
7
8#[cfg(feature = "llama-cpp")]
9use anyhow::bail;
10use anyhow::Result;
11use clap::{Args, ValueEnum};
12use memvid_core::{EnrichmentEngine, Memvid, RulesEngine};
13use serde::Serialize;
14
15#[cfg(feature = "llama-cpp")]
16use crate::commands::{default_enrichment_model, get_installed_model_path, LlmModel};
17use crate::config::CliConfig;
18#[cfg(feature = "candle-llm")]
19use crate::enrich::CandlePhiEngine;
20#[cfg(feature = "llama-cpp")]
21use crate::enrich::LlmEngine;
22use crate::enrich::{ClaudeEngine, GeminiEngine, GroqEngine, MistralEngine, OpenAiEngine, XaiEngine};
23
24/// Engine type for enrichment
25#[derive(Debug, Clone, Copy, ValueEnum, Default)]
26pub enum EnrichEngine {
27    /// Rules-based extraction using regex patterns (fast, no models)
28    #[default]
29    Rules,
30    /// LLM-based extraction with Phi-3.5 Mini via llama.cpp (requires model installation)
31    #[cfg(feature = "llama-cpp")]
32    Llm,
33    /// Candle-based Phi-3 extraction (downloads from Hugging Face)
34    #[cfg(feature = "candle-llm")]
35    Candle,
36    /// OpenAI API-based extraction with GPT-4o-mini (requires OPENAI_API_KEY)
37    Openai,
38    /// Claude (Anthropic) API-based extraction with Claude 3.5 Haiku (requires ANTHROPIC_API_KEY)
39    Claude,
40    /// Gemini (Google) API-based extraction with Gemini 2.0 Flash (requires GOOGLE_API_KEY or GEMINI_API_KEY)
41    Gemini,
42    /// xAI API-based extraction with Grok-2 (requires XAI_API_KEY)
43    Xai,
44    /// Groq API-based extraction with Llama 3.3 70B (requires GROQ_API_KEY)
45    Groq,
46    /// Mistral API-based extraction with Mistral Large (requires MISTRAL_API_KEY)
47    Mistral,
48}
49
50/// Arguments for the `enrich` subcommand
51#[derive(Args)]
52pub struct EnrichArgs {
53    /// Path to the `.mv2` file
54    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
55    pub file: PathBuf,
56
57    /// Enrichment engine to use
58    #[arg(long, value_enum, default_value_t = EnrichEngine::Rules)]
59    pub engine: EnrichEngine,
60
61    /// Only process frames that haven't been enriched yet (default)
62    #[arg(long, default_value_t = true)]
63    pub incremental: bool,
64
65    /// Re-enrich all frames, ignoring previous enrichment records
66    #[arg(long, conflicts_with = "incremental")]
67    pub force: bool,
68
69    /// Output results as JSON
70    #[arg(long)]
71    pub json: bool,
72
73    /// Show extracted memory cards
74    #[arg(long)]
75    pub verbose: bool,
76}
77
78/// Result of enrichment for JSON output
79#[derive(Debug, Serialize)]
80pub struct EnrichResult {
81    pub engine: String,
82    pub version: String,
83    pub frames_processed: usize,
84    pub cards_extracted: usize,
85    pub total_cards: usize,
86    pub total_entities: usize,
87}
88
89/// Handle the `enrich` command
90#[allow(unused_variables)]
91pub fn handle_enrich(config: &CliConfig, args: EnrichArgs) -> Result<()> {
92    let mut mem = Memvid::open(&args.file)?;
93
94    // Get initial stats
95    let initial_stats = mem.memories_stats();
96
97    // If force mode, clear existing memories first
98    if args.force {
99        mem.clear_memories();
100    }
101
102    // Run the selected engine
103    let (engine_kind, engine_version, frames, cards) = match args.engine {
104        EnrichEngine::Rules => {
105            let engine = RulesEngine::new();
106            let kind = engine.kind().to_string();
107            let version = engine.version().to_string();
108            let (frames, cards) = mem.run_enrichment(&engine)?;
109            (kind, version, frames, cards)
110        }
111        #[cfg(feature = "llama-cpp")]
112        EnrichEngine::Llm => {
113            // Check if model is installed
114            let model = default_enrichment_model();
115            let model_path = match get_installed_model_path(config, model) {
116                Some(path) => path,
117                None => {
118                    bail!(
119                        "LLM model not installed. Run `memvid models install {}` first.",
120                        match model {
121                            LlmModel::Phi35Mini => "phi-3.5-mini",
122                            LlmModel::Phi35MiniQ8 => "phi-3.5-mini-q8",
123                        }
124                    );
125                }
126            };
127
128            // Create and initialize the LLM engine
129            let mut engine = LlmEngine::new(model_path);
130            eprintln!("Loading LLM model...");
131            engine.init()?;
132
133            let kind = engine.kind().to_string();
134            let version = engine.version().to_string();
135            let (frames, cards) = mem.run_enrichment(&engine)?;
136            (kind, version, frames, cards)
137        }
138        #[cfg(feature = "candle-llm")]
139        EnrichEngine::Candle => {
140            // Create and initialize the Candle Phi-3 engine
141            // Uses Q4 quantized GGUF (~2.4GB) stored in ~/.memvid/models/llm/phi-3-mini-q4/
142            eprintln!("Loading Phi-3-mini Q4 model via Candle (first run downloads ~2.4GB to ~/.memvid/models/llm/)...");
143            let mut engine = CandlePhiEngine::from_memvid_models(config.models_dir.clone());
144            engine.init()?;
145
146            let kind = engine.kind().to_string();
147            let version = engine.version().to_string();
148            let (frames, cards) = mem.run_enrichment(&engine)?;
149            (kind, version, frames, cards)
150        }
151        EnrichEngine::Openai => {
152            // Create and initialize the OpenAI engine with parallel batch support
153            eprintln!("Using OpenAI GPT-4o-mini for enrichment (parallel mode)...");
154            let mut engine = OpenAiEngine::new();
155            engine.init()?;
156
157            let kind = engine.kind().to_string();
158            let version = engine.version().to_string();
159
160            // Use parallel batch processing for OpenAI
161            let (frames, cards) = run_openai_parallel(&mut mem, &engine)?;
162            (kind, version, frames, cards)
163        }
164        EnrichEngine::Claude => {
165            // Create and initialize the Claude engine with parallel batch support
166            eprintln!("Using Claude 3.5 Haiku for enrichment (parallel mode)...");
167            let mut engine = ClaudeEngine::new();
168            engine.init()?;
169
170            let kind = engine.kind().to_string();
171            let version = engine.version().to_string();
172
173            // Use parallel batch processing for Claude
174            let (frames, cards) = run_claude_parallel(&mut mem, &engine)?;
175            (kind, version, frames, cards)
176        }
177        EnrichEngine::Gemini => {
178            // Create and initialize the Gemini engine with parallel batch support
179            eprintln!("Using Gemini 2.0 Flash for enrichment (parallel mode)...");
180            let mut engine = GeminiEngine::new();
181            engine.init()?;
182
183            let kind = engine.kind().to_string();
184            let version = engine.version().to_string();
185
186            // Use parallel batch processing for Gemini
187            let (frames, cards) = run_gemini_parallel(&mut mem, &engine)?;
188            (kind, version, frames, cards)
189        }
190        EnrichEngine::Xai => {
191            // Create and initialize the xAI engine with parallel batch support
192            eprintln!("Using xAI Grok-2 for enrichment (parallel mode)...");
193            let mut engine = XaiEngine::new();
194            engine.init()?;
195
196            let kind = engine.kind().to_string();
197            let version = engine.version().to_string();
198
199            // Use parallel batch processing for xAI
200            let (frames, cards) = run_xai_parallel(&mut mem, &engine)?;
201            (kind, version, frames, cards)
202        }
203        EnrichEngine::Groq => {
204            // Create and initialize the Groq engine with parallel batch support
205            eprintln!("Using Groq Llama 3.3 70B for enrichment (parallel mode)...");
206            let mut engine = GroqEngine::new();
207            engine.init()?;
208
209            let kind = engine.kind().to_string();
210            let version = engine.version().to_string();
211
212            // Use parallel batch processing for Groq
213            let (frames, cards) = run_groq_parallel(&mut mem, &engine)?;
214            (kind, version, frames, cards)
215        }
216        EnrichEngine::Mistral => {
217            // Create and initialize the Mistral engine with parallel batch support
218            eprintln!("Using Mistral Large for enrichment (parallel mode)...");
219            let mut engine = MistralEngine::new();
220            engine.init()?;
221
222            let kind = engine.kind().to_string();
223            let version = engine.version().to_string();
224
225            // Use parallel batch processing for Mistral
226            let (frames, cards) = run_mistral_parallel(&mut mem, &engine)?;
227            (kind, version, frames, cards)
228        }
229    };
230
231    // Commit changes
232    mem.commit()?;
233
234    // Get final stats
235    let final_stats = mem.memories_stats();
236
237    if args.json {
238        let result = EnrichResult {
239            engine: engine_kind,
240            version: engine_version,
241            frames_processed: frames,
242            cards_extracted: cards,
243            total_cards: final_stats.card_count,
244            total_entities: final_stats.entity_count,
245        };
246        println!("{}", serde_json::to_string_pretty(&result)?);
247    } else {
248        println!("Enrichment complete:");
249        println!("  Engine: {} v{}", engine_kind, engine_version);
250        println!("  Frames processed: {}", frames);
251        println!("  Cards extracted: {}", cards);
252        println!(
253            "  Total cards: {} (+{})",
254            final_stats.card_count,
255            final_stats
256                .card_count
257                .saturating_sub(initial_stats.card_count)
258        );
259        println!("  Entities: {}", final_stats.entity_count);
260
261        if args.verbose && cards > 0 {
262            println!("\nExtracted memory cards:");
263            for entity in mem.memory_entities() {
264                println!("  {}:", entity);
265                for card in mem.get_entity_memories(&entity) {
266                    println!("    - {}: {} = \"{}\"", card.kind, card.slot, card.value);
267                }
268            }
269        }
270    }
271
272    Ok(())
273}
274
275/// Run OpenAI enrichment with parallel batch processing.
276///
277/// This gathers all unenriched frames, sends them to OpenAI in parallel (20 concurrent requests),
278/// and stores the resulting memory cards. This is ~20x faster than sequential processing.
279fn run_openai_parallel(
280    mem: &mut memvid_core::Memvid,
281    engine: &OpenAiEngine,
282) -> Result<(usize, usize)> {
283    use memvid_core::enrich::EnrichmentContext;
284    use memvid_core::EnrichmentEngine;
285
286    let kind = engine.kind();
287    let version = engine.version();
288
289    // Get all unenriched frames
290    let unenriched = mem.get_unenriched_frames(kind, version);
291    let total_frames = unenriched.len();
292
293    if total_frames == 0 {
294        eprintln!("No unenriched frames found.");
295        return Ok((0, 0));
296    }
297
298    eprintln!(
299        "Gathering {} frames for parallel enrichment...",
300        total_frames
301    );
302
303    // Build enrichment contexts for all frames
304    let mut contexts = Vec::with_capacity(total_frames);
305    for frame_id in &unenriched {
306        let frame = match mem.frame_by_id(*frame_id) {
307            Ok(f) => f,
308            Err(_) => continue,
309        };
310
311        // Get full frame content (not truncated preview)
312        let text = match mem.frame_text_by_id(*frame_id) {
313            Ok(t) => t,
314            Err(_) => continue,
315        };
316
317        let uri = frame
318            .uri
319            .clone()
320            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
321        let metadata_json = frame
322            .metadata
323            .as_ref()
324            .and_then(|m| serde_json::to_string(m).ok());
325
326        let ctx = EnrichmentContext::new(
327            *frame_id,
328            uri,
329            text,
330            frame.title.clone(),
331            frame.timestamp,
332            metadata_json,
333        );
334
335        contexts.push(ctx);
336    }
337
338    eprintln!(
339        "Starting parallel enrichment of {} frames with 20 workers...",
340        contexts.len()
341    );
342
343    // Run parallel batch enrichment
344    let results = engine.enrich_batch(contexts)?;
345
346    // Store results back to MV2
347    let mut total_cards = 0;
348    for (frame_id, cards) in results {
349        let card_count = cards.len();
350
351        // Store cards
352        let card_ids = if !cards.is_empty() {
353            mem.put_memory_cards(cards)?
354        } else {
355            Vec::new()
356        };
357
358        // Record enrichment
359        mem.record_enrichment(frame_id, kind, version, card_ids)?;
360
361        total_cards += card_count;
362    }
363
364    Ok((total_frames, total_cards))
365}
366
367/// Run Claude enrichment with parallel batch processing.
368fn run_claude_parallel(
369    mem: &mut memvid_core::Memvid,
370    engine: &ClaudeEngine,
371) -> Result<(usize, usize)> {
372    use memvid_core::enrich::EnrichmentContext;
373    use memvid_core::EnrichmentEngine;
374
375    let kind = engine.kind();
376    let version = engine.version();
377
378    let unenriched = mem.get_unenriched_frames(kind, version);
379    let total_frames = unenriched.len();
380
381    if total_frames == 0 {
382        eprintln!("No unenriched frames found.");
383        return Ok((0, 0));
384    }
385
386    eprintln!(
387        "Gathering {} frames for parallel enrichment...",
388        total_frames
389    );
390
391    let mut contexts = Vec::with_capacity(total_frames);
392    for frame_id in &unenriched {
393        let frame = match mem.frame_by_id(*frame_id) {
394            Ok(f) => f,
395            Err(_) => continue,
396        };
397
398        let text = match mem.frame_text_by_id(*frame_id) {
399            Ok(t) => t,
400            Err(_) => continue,
401        };
402
403        let uri = frame
404            .uri
405            .clone()
406            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
407        let metadata_json = frame
408            .metadata
409            .as_ref()
410            .and_then(|m| serde_json::to_string(m).ok());
411
412        let ctx = EnrichmentContext::new(
413            *frame_id,
414            uri,
415            text,
416            frame.title.clone(),
417            frame.timestamp,
418            metadata_json,
419        );
420
421        contexts.push(ctx);
422    }
423
424    eprintln!(
425        "Starting parallel enrichment of {} frames with 20 workers...",
426        contexts.len()
427    );
428
429    let results = engine.enrich_batch(contexts)?;
430
431    let mut total_cards = 0;
432    for (frame_id, cards) in results {
433        let card_count = cards.len();
434
435        let card_ids = if !cards.is_empty() {
436            mem.put_memory_cards(cards)?
437        } else {
438            Vec::new()
439        };
440
441        mem.record_enrichment(frame_id, kind, version, card_ids)?;
442
443        total_cards += card_count;
444    }
445
446    Ok((total_frames, total_cards))
447}
448
449/// Run Gemini enrichment with parallel batch processing.
450fn run_gemini_parallel(
451    mem: &mut memvid_core::Memvid,
452    engine: &GeminiEngine,
453) -> Result<(usize, usize)> {
454    use memvid_core::enrich::EnrichmentContext;
455    use memvid_core::EnrichmentEngine;
456
457    let kind = engine.kind();
458    let version = engine.version();
459
460    let unenriched = mem.get_unenriched_frames(kind, version);
461    let total_frames = unenriched.len();
462
463    if total_frames == 0 {
464        eprintln!("No unenriched frames found.");
465        return Ok((0, 0));
466    }
467
468    eprintln!(
469        "Gathering {} frames for parallel enrichment...",
470        total_frames
471    );
472
473    let mut contexts = Vec::with_capacity(total_frames);
474    for frame_id in &unenriched {
475        let frame = match mem.frame_by_id(*frame_id) {
476            Ok(f) => f,
477            Err(_) => continue,
478        };
479
480        let text = match mem.frame_text_by_id(*frame_id) {
481            Ok(t) => t,
482            Err(_) => continue,
483        };
484
485        let uri = frame
486            .uri
487            .clone()
488            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
489        let metadata_json = frame
490            .metadata
491            .as_ref()
492            .and_then(|m| serde_json::to_string(m).ok());
493
494        let ctx = EnrichmentContext::new(
495            *frame_id,
496            uri,
497            text,
498            frame.title.clone(),
499            frame.timestamp,
500            metadata_json,
501        );
502
503        contexts.push(ctx);
504    }
505
506    eprintln!(
507        "Starting parallel enrichment of {} frames with 20 workers...",
508        contexts.len()
509    );
510
511    let results = engine.enrich_batch(contexts)?;
512
513    let mut total_cards = 0;
514    for (frame_id, cards) in results {
515        let card_count = cards.len();
516
517        let card_ids = if !cards.is_empty() {
518            mem.put_memory_cards(cards)?
519        } else {
520            Vec::new()
521        };
522
523        mem.record_enrichment(frame_id, kind, version, card_ids)?;
524
525        total_cards += card_count;
526    }
527
528    Ok((total_frames, total_cards))
529}
530
531/// Run xAI enrichment with parallel batch processing.
532fn run_xai_parallel(
533    mem: &mut memvid_core::Memvid,
534    engine: &XaiEngine,
535) -> Result<(usize, usize)> {
536    use memvid_core::enrich::EnrichmentContext;
537    use memvid_core::EnrichmentEngine;
538
539    let kind = engine.kind();
540    let version = engine.version();
541
542    let unenriched = mem.get_unenriched_frames(kind, version);
543    let total_frames = unenriched.len();
544
545    if total_frames == 0 {
546        eprintln!("No unenriched frames found.");
547        return Ok((0, 0));
548    }
549
550    eprintln!(
551        "Gathering {} frames for parallel enrichment...",
552        total_frames
553    );
554
555    let mut contexts = Vec::with_capacity(total_frames);
556    for frame_id in &unenriched {
557        let frame = match mem.frame_by_id(*frame_id) {
558            Ok(f) => f,
559            Err(_) => continue,
560        };
561
562        let text = match mem.frame_text_by_id(*frame_id) {
563            Ok(t) => t,
564            Err(_) => continue,
565        };
566
567        let uri = frame
568            .uri
569            .clone()
570            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
571        let metadata_json = frame
572            .metadata
573            .as_ref()
574            .and_then(|m| serde_json::to_string(m).ok());
575
576        let ctx = EnrichmentContext::new(
577            *frame_id,
578            uri,
579            text,
580            frame.title.clone(),
581            frame.timestamp,
582            metadata_json,
583        );
584
585        contexts.push(ctx);
586    }
587
588    eprintln!(
589        "Starting parallel enrichment of {} frames with 20 workers...",
590        contexts.len()
591    );
592
593    let results = engine.enrich_batch(contexts)?;
594
595    let mut total_cards = 0;
596    for (frame_id, cards) in results {
597        let card_count = cards.len();
598
599        let card_ids = if !cards.is_empty() {
600            mem.put_memory_cards(cards)?
601        } else {
602            Vec::new()
603        };
604
605        mem.record_enrichment(frame_id, kind, version, card_ids)?;
606
607        total_cards += card_count;
608    }
609
610    Ok((total_frames, total_cards))
611}
612
613/// Run Groq enrichment with parallel batch processing.
614fn run_groq_parallel(
615    mem: &mut memvid_core::Memvid,
616    engine: &GroqEngine,
617) -> Result<(usize, usize)> {
618    use memvid_core::enrich::EnrichmentContext;
619    use memvid_core::EnrichmentEngine;
620
621    let kind = engine.kind();
622    let version = engine.version();
623
624    let unenriched = mem.get_unenriched_frames(kind, version);
625    let total_frames = unenriched.len();
626
627    if total_frames == 0 {
628        eprintln!("No unenriched frames found.");
629        return Ok((0, 0));
630    }
631
632    eprintln!(
633        "Gathering {} frames for parallel enrichment...",
634        total_frames
635    );
636
637    let mut contexts = Vec::with_capacity(total_frames);
638    for frame_id in &unenriched {
639        let frame = match mem.frame_by_id(*frame_id) {
640            Ok(f) => f,
641            Err(_) => continue,
642        };
643
644        let text = match mem.frame_text_by_id(*frame_id) {
645            Ok(t) => t,
646            Err(_) => continue,
647        };
648
649        let uri = frame
650            .uri
651            .clone()
652            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
653        let metadata_json = frame
654            .metadata
655            .as_ref()
656            .and_then(|m| serde_json::to_string(m).ok());
657
658        let ctx = EnrichmentContext::new(
659            *frame_id,
660            uri,
661            text,
662            frame.title.clone(),
663            frame.timestamp,
664            metadata_json,
665        );
666
667        contexts.push(ctx);
668    }
669
670    eprintln!(
671        "Starting parallel enrichment of {} frames with 20 workers...",
672        contexts.len()
673    );
674
675    let results = engine.enrich_batch(contexts)?;
676
677    let mut total_cards = 0;
678    for (frame_id, cards) in results {
679        let card_count = cards.len();
680
681        let card_ids = if !cards.is_empty() {
682            mem.put_memory_cards(cards)?
683        } else {
684            Vec::new()
685        };
686
687        mem.record_enrichment(frame_id, kind, version, card_ids)?;
688
689        total_cards += card_count;
690    }
691
692    Ok((total_frames, total_cards))
693}
694
695/// Run Mistral enrichment with parallel batch processing.
696fn run_mistral_parallel(
697    mem: &mut memvid_core::Memvid,
698    engine: &MistralEngine,
699) -> Result<(usize, usize)> {
700    use memvid_core::enrich::EnrichmentContext;
701    use memvid_core::EnrichmentEngine;
702
703    let kind = engine.kind();
704    let version = engine.version();
705
706    let unenriched = mem.get_unenriched_frames(kind, version);
707    let total_frames = unenriched.len();
708
709    if total_frames == 0 {
710        eprintln!("No unenriched frames found.");
711        return Ok((0, 0));
712    }
713
714    eprintln!(
715        "Gathering {} frames for parallel enrichment...",
716        total_frames
717    );
718
719    let mut contexts = Vec::with_capacity(total_frames);
720    for frame_id in &unenriched {
721        let frame = match mem.frame_by_id(*frame_id) {
722            Ok(f) => f,
723            Err(_) => continue,
724        };
725
726        let text = match mem.frame_text_by_id(*frame_id) {
727            Ok(t) => t,
728            Err(_) => continue,
729        };
730
731        let uri = frame
732            .uri
733            .clone()
734            .unwrap_or_else(|| format!("mv2://frame/{}", frame_id));
735        let metadata_json = frame
736            .metadata
737            .as_ref()
738            .and_then(|m| serde_json::to_string(m).ok());
739
740        let ctx = EnrichmentContext::new(
741            *frame_id,
742            uri,
743            text,
744            frame.title.clone(),
745            frame.timestamp,
746            metadata_json,
747        );
748
749        contexts.push(ctx);
750    }
751
752    eprintln!(
753        "Starting parallel enrichment of {} frames with 20 workers...",
754        contexts.len()
755    );
756
757    let results = engine.enrich_batch(contexts)?;
758
759    let mut total_cards = 0;
760    for (frame_id, cards) in results {
761        let card_count = cards.len();
762
763        let card_ids = if !cards.is_empty() {
764            mem.put_memory_cards(cards)?
765        } else {
766            Vec::new()
767        };
768
769        mem.record_enrichment(frame_id, kind, version, card_ids)?;
770
771        total_cards += card_count;
772    }
773
774    Ok((total_frames, total_cards))
775}
776
777/// Handle the `memories` subcommand (view memory cards)
778#[derive(Args)]
779pub struct MemoriesArgs {
780    /// Path to the `.mv2` file
781    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
782    pub file: PathBuf,
783
784    /// Filter by entity
785    #[arg(long)]
786    pub entity: Option<String>,
787
788    /// Filter by slot
789    #[arg(long)]
790    pub slot: Option<String>,
791
792    /// Output as JSON
793    #[arg(long)]
794    pub json: bool,
795}
796
797/// Memory card output for JSON serialization
798#[derive(Debug, Serialize)]
799pub struct MemoryOutput {
800    pub id: u64,
801    pub kind: String,
802    pub entity: String,
803    pub slot: String,
804    pub value: String,
805    pub polarity: Option<String>,
806    pub document_date: Option<i64>,
807    pub source_frame_id: u64,
808}
809
810pub fn handle_memories(_config: &CliConfig, args: MemoriesArgs) -> Result<()> {
811    let mem = Memvid::open(&args.file)?;
812
813    let stats = mem.memories_stats();
814
815    if args.json {
816        let mut cards: Vec<MemoryOutput> = Vec::new();
817
818        if let Some(entity) = &args.entity {
819            if let Some(slot) = &args.slot {
820                // Specific entity:slot
821                if let Some(card) = mem.get_current_memory(entity, slot) {
822                    cards.push(card_to_output(card));
823                }
824            } else {
825                // All cards for entity
826                for card in mem.get_entity_memories(entity) {
827                    cards.push(card_to_output(card));
828                }
829            }
830        } else {
831            // All entities
832            for entity in mem.memory_entities() {
833                for card in mem.get_entity_memories(&entity) {
834                    cards.push(card_to_output(card));
835                }
836            }
837        }
838
839        println!("{}", serde_json::to_string_pretty(&cards)?);
840    } else {
841        println!(
842            "Memory cards: {} total, {} entities",
843            stats.card_count, stats.entity_count
844        );
845        println!();
846
847        if let Some(entity) = &args.entity {
848            if let Some(slot) = &args.slot {
849                // Specific entity:slot
850                if let Some(card) = mem.get_current_memory(entity, slot) {
851                    println!("{}:{} = \"{}\"", entity, slot, card.value);
852                } else {
853                    println!("No memory found for {}:{}", entity, slot);
854                }
855            } else {
856                // All cards for entity
857                println!("{}:", entity);
858                for card in mem.get_entity_memories(entity) {
859                    println!("  {}: {} = \"{}\"", card.kind, card.slot, card.value);
860                }
861            }
862        } else {
863            // All entities
864            for entity in mem.memory_entities() {
865                println!("{}:", entity);
866                for card in mem.get_entity_memories(&entity) {
867                    let polarity = card
868                        .polarity
869                        .as_ref()
870                        .map(|p| format!(" [{}]", p))
871                        .unwrap_or_default();
872                    println!(
873                        "  {}: {} = \"{}\"{}",
874                        card.kind, card.slot, card.value, polarity
875                    );
876                }
877                println!();
878            }
879        }
880    }
881
882    Ok(())
883}
884
885fn card_to_output(card: &memvid_core::MemoryCard) -> MemoryOutput {
886    MemoryOutput {
887        id: card.id,
888        kind: card.kind.to_string(),
889        entity: card.entity.clone(),
890        slot: card.slot.clone(),
891        value: card.value.clone(),
892        polarity: card.polarity.as_ref().map(|p| p.to_string()),
893        document_date: card.document_date,
894        source_frame_id: card.source_frame_id,
895    }
896}
897
898// ============================================================================
899// State Command - Entity-Centric Current State Queries
900// ============================================================================
901
902/// Handle the `state` subcommand (query current entity state)
903#[derive(Args)]
904pub struct StateArgs {
905    /// Path to the `.mv2` file
906    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
907    pub file: PathBuf,
908
909    /// Entity to query (required)
910    #[arg(long, short = 'e')]
911    pub entity: String,
912
913    /// Specific slot to query (optional, omit for full entity profile)
914    #[arg(long, short = 's')]
915    pub slot: Option<String>,
916
917    /// Query state at a specific point in time (Unix timestamp)
918    #[arg(long)]
919    pub at_time: Option<i64>,
920
921    /// Output as JSON
922    #[arg(long)]
923    pub json: bool,
924}
925
926/// State output for JSON serialization
927#[derive(Debug, Serialize)]
928pub struct StateOutput {
929    pub entity: String,
930    #[serde(skip_serializing_if = "Option::is_none")]
931    pub slot: Option<String>,
932    #[serde(skip_serializing_if = "Option::is_none")]
933    pub at_time: Option<i64>,
934    pub state: StateValue,
935}
936
937/// The state value - either a single slot or full profile
938#[derive(Debug, Serialize)]
939#[serde(untagged)]
940pub enum StateValue {
941    /// Single slot value
942    Single {
943        value: String,
944        kind: String,
945        polarity: Option<String>,
946        source_frame_id: u64,
947        document_date: Option<i64>,
948    },
949    /// Full entity profile
950    Profile(Vec<SlotState>),
951}
952
953#[derive(Debug, Serialize)]
954pub struct SlotState {
955    pub slot: String,
956    pub value: String,
957    pub kind: String,
958    pub polarity: Option<String>,
959    pub source_frame_id: u64,
960    pub document_date: Option<i64>,
961}
962
963pub fn handle_state(_config: &CliConfig, args: StateArgs) -> Result<()> {
964    let mem = Memvid::open(&args.file)?;
965
966    let entity = args.entity.to_lowercase(); // Normalize entity name
967
968    if let Some(slot) = &args.slot {
969        // O(1) lookup for specific entity:slot
970        let card = if let Some(ts) = args.at_time {
971            mem.get_memory_at_time(&entity, slot, ts)
972        } else {
973            mem.get_current_memory(&entity, slot)
974        };
975
976        if args.json {
977            if let Some(card) = card {
978                let output = StateOutput {
979                    entity: entity.clone(),
980                    slot: Some(slot.clone()),
981                    at_time: args.at_time,
982                    state: StateValue::Single {
983                        value: card.value.clone(),
984                        kind: card.kind.to_string(),
985                        polarity: card.polarity.as_ref().map(|p| p.to_string()),
986                        source_frame_id: card.source_frame_id,
987                        document_date: card.document_date,
988                    },
989                };
990                println!("{}", serde_json::to_string_pretty(&output)?);
991            } else {
992                println!("null");
993            }
994        } else {
995            if let Some(card) = card {
996                let time_info = if let Some(ts) = args.at_time {
997                    format!(" (at {})", format_timestamp(ts))
998                } else {
999                    String::new()
1000                };
1001                let polarity = card
1002                    .polarity
1003                    .as_ref()
1004                    .map(|p| format!(" [{}]", p))
1005                    .unwrap_or_default();
1006                println!(
1007                    "{}:{} = \"{}\"{}{}",
1008                    entity, slot, card.value, polarity, time_info
1009                );
1010                println!("  kind: {}", card.kind);
1011                println!("  source: frame {}", card.source_frame_id);
1012                if let Some(date) = card.document_date {
1013                    println!("  date: {}", format_timestamp(date));
1014                }
1015            } else {
1016                let time_info = if let Some(ts) = args.at_time {
1017                    format!(" at {}", format_timestamp(ts))
1018                } else {
1019                    String::new()
1020                };
1021                println!("No value for {}:{}{}", entity, slot, time_info);
1022            }
1023        }
1024    } else {
1025        // Get full entity profile
1026        let cards = mem.get_entity_memories(&entity);
1027
1028        if cards.is_empty() {
1029            if args.json {
1030                println!("null");
1031            } else {
1032                println!("No state found for entity: {}", entity);
1033            }
1034            return Ok(());
1035        }
1036
1037        // Group by slot and get current value for each
1038        let mut slots: std::collections::HashMap<String, &memvid_core::MemoryCard> =
1039            std::collections::HashMap::new();
1040
1041        for card in &cards {
1042            // Keep the most recent card for each slot
1043            let dominated = slots
1044                .get(&card.slot)
1045                .map(|existing| {
1046                    card.effective_timestamp() > existing.effective_timestamp()
1047                })
1048                .unwrap_or(true);
1049
1050            if dominated && !card.is_retracted() {
1051                slots.insert(card.slot.clone(), card);
1052            }
1053        }
1054
1055        if args.json {
1056            let mut profile: Vec<SlotState> = slots
1057                .values()
1058                .map(|card| SlotState {
1059                    slot: card.slot.clone(),
1060                    value: card.value.clone(),
1061                    kind: card.kind.to_string(),
1062                    polarity: card.polarity.as_ref().map(|p| p.to_string()),
1063                    source_frame_id: card.source_frame_id,
1064                    document_date: card.document_date,
1065                })
1066                .collect();
1067            profile.sort_by(|a, b| a.slot.cmp(&b.slot));
1068
1069            let output = StateOutput {
1070                entity: entity.clone(),
1071                slot: None,
1072                at_time: args.at_time,
1073                state: StateValue::Profile(profile),
1074            };
1075            println!("{}", serde_json::to_string_pretty(&output)?);
1076        } else {
1077            println!("{}:", entity);
1078            let mut sorted_slots: Vec<_> = slots.into_iter().collect();
1079            sorted_slots.sort_by(|a, b| a.0.cmp(&b.0));
1080
1081            for (slot, card) in sorted_slots {
1082                let polarity = card
1083                    .polarity
1084                    .as_ref()
1085                    .map(|p| format!(" [{}]", p))
1086                    .unwrap_or_default();
1087                println!("  {}: \"{}\"{}  ({})", slot, card.value, polarity, card.kind);
1088            }
1089        }
1090    }
1091
1092    Ok(())
1093}
1094
1095fn format_timestamp(ts: i64) -> String {
1096    use std::time::{Duration, UNIX_EPOCH};
1097    let datetime = UNIX_EPOCH + Duration::from_secs(ts as u64);
1098    let datetime: chrono::DateTime<chrono::Utc> = datetime.into();
1099    datetime.format("%Y-%m-%d %H:%M:%S UTC").to_string()
1100}
1101
1102/// Arguments for the `facts` (entity audit) command.
1103#[derive(Debug, Args)]
1104pub struct FactsArgs {
1105    /// Path to the `.mv2` file
1106    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1107    pub file: PathBuf,
1108
1109    /// Filter by entity (optional)
1110    #[arg(long, short = 'e')]
1111    pub entity: Option<String>,
1112
1113    /// Filter by predicate/slot (optional)
1114    #[arg(long, short = 'p')]
1115    pub predicate: Option<String>,
1116
1117    /// Filter by value (optional)
1118    #[arg(long, short = 'v')]
1119    pub value: Option<String>,
1120
1121    /// Show full history including superseded values
1122    #[arg(long)]
1123    pub history: bool,
1124
1125    /// Output as JSON
1126    #[arg(long)]
1127    pub json: bool,
1128}
1129
1130/// Audit log entry for JSON output.
1131#[derive(Debug, Serialize)]
1132pub struct AuditLogEntry {
1133    pub frame_id: u64,
1134    pub timestamp: Option<i64>,
1135    pub entity: String,
1136    pub slot: String,
1137    pub value: String,
1138    pub relation: String,
1139    pub kind: String,
1140    #[serde(skip_serializing_if = "Option::is_none")]
1141    pub polarity: Option<String>,
1142    #[serde(skip_serializing_if = "Option::is_none")]
1143    pub confidence: Option<f32>,
1144    pub engine: String,
1145    #[serde(skip_serializing_if = "Option::is_none")]
1146    pub supersedes: Option<u64>,
1147}
1148
1149/// Audit output for JSON serialization.
1150#[derive(Debug, Serialize)]
1151pub struct AuditLogOutput {
1152    pub total: usize,
1153    #[serde(skip_serializing_if = "Option::is_none")]
1154    pub entity_filter: Option<String>,
1155    #[serde(skip_serializing_if = "Option::is_none")]
1156    pub predicate_filter: Option<String>,
1157    #[serde(skip_serializing_if = "Option::is_none")]
1158    pub value_filter: Option<String>,
1159    pub entries: Vec<AuditLogEntry>,
1160}
1161
1162/// Format a Unix timestamp as ISO 8601 (without chrono).
1163fn format_audit_timestamp(ts: i64) -> String {
1164    use std::time::{Duration, UNIX_EPOCH};
1165
1166    let datetime = UNIX_EPOCH + Duration::from_secs(ts.unsigned_abs() as u64);
1167    let secs = datetime
1168        .duration_since(UNIX_EPOCH)
1169        .unwrap_or_default()
1170        .as_secs();
1171
1172    let days = secs / 86400;
1173    let remaining = secs % 86400;
1174    let hours = remaining / 3600;
1175    let minutes = (remaining % 3600) / 60;
1176    let seconds = remaining % 60;
1177
1178    let mut year = 1970i32;
1179    let mut remaining_days = days as i32;
1180
1181    loop {
1182        let days_in_year = if (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) {
1183            366
1184        } else {
1185            365
1186        };
1187        if remaining_days < days_in_year {
1188            break;
1189        }
1190        remaining_days -= days_in_year;
1191        year += 1;
1192    }
1193
1194    let mut month = 1u32;
1195    let is_leap = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0);
1196    let days_in_months = if is_leap {
1197        [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1198    } else {
1199        [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
1200    };
1201
1202    for days_in_month in days_in_months {
1203        if remaining_days < days_in_month {
1204            break;
1205        }
1206        remaining_days -= days_in_month;
1207        month += 1;
1208    }
1209
1210    let day = remaining_days + 1;
1211
1212    format!(
1213        "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
1214        year, month, day, hours, minutes, seconds
1215    )
1216}
1217
1218pub fn handle_facts(_config: &CliConfig, args: FactsArgs) -> Result<()> {
1219    let mem = Memvid::open(&args.file)?;
1220
1221    // Collect all memory cards matching the filters
1222    let mut entries: Vec<AuditLogEntry> = Vec::new();
1223
1224    // Get entities to iterate
1225    let entities: Vec<String> = if let Some(entity) = &args.entity {
1226        vec![entity.to_lowercase()]
1227    } else {
1228        mem.memory_entities()
1229    };
1230
1231    for entity in entities {
1232        let cards = mem.get_entity_memories(&entity);
1233
1234        for card in cards {
1235            // Filter by predicate
1236            if let Some(pred) = &args.predicate {
1237                if !card.slot.eq_ignore_ascii_case(pred) {
1238                    continue;
1239                }
1240            }
1241
1242            // Filter by value
1243            if let Some(val) = &args.value {
1244                if !card.value.to_lowercase().contains(&val.to_lowercase()) {
1245                    continue;
1246                }
1247            }
1248
1249            entries.push(AuditLogEntry {
1250                frame_id: card.source_frame_id,
1251                timestamp: card.document_date.or(Some(card.created_at)),
1252                entity: card.entity.clone(),
1253                slot: card.slot.clone(),
1254                value: card.value.clone(),
1255                relation: card.version_relation.as_str().to_string(),
1256                kind: card.kind.to_string(),
1257                polarity: card.polarity.as_ref().map(|p| p.to_string()),
1258                confidence: card.confidence,
1259                engine: card.engine.clone(),
1260                supersedes: None, // TODO: track superseded frame IDs
1261            });
1262        }
1263    }
1264
1265    // Sort by timestamp (oldest first for audit trail)
1266    entries.sort_by(|a, b| {
1267        let ts_a = a.timestamp.unwrap_or(0);
1268        let ts_b = b.timestamp.unwrap_or(0);
1269        ts_a.cmp(&ts_b)
1270    });
1271
1272    if args.json {
1273        let output = AuditLogOutput {
1274            total: entries.len(),
1275            entity_filter: args.entity.clone(),
1276            predicate_filter: args.predicate.clone(),
1277            value_filter: args.value.clone(),
1278            entries,
1279        };
1280        println!("{}", serde_json::to_string_pretty(&output)?);
1281    } else {
1282        if entries.is_empty() {
1283            println!("No matching facts found.");
1284            return Ok(());
1285        }
1286
1287        println!("Audit Trail ({} entries):", entries.len());
1288        println!();
1289
1290        for entry in entries {
1291            let ts_str = entry
1292                .timestamp
1293                .map(format_audit_timestamp)
1294                .unwrap_or_else(|| "unknown".to_string());
1295
1296            let polarity_suffix = entry
1297                .polarity
1298                .as_ref()
1299                .map(|p| format!(" [{}]", p))
1300                .unwrap_or_default();
1301
1302            let conf = entry
1303                .confidence
1304                .map(|c| format!(" (conf: {:.2})", c))
1305                .unwrap_or_default();
1306
1307            let polarity_prefix = if entry.polarity.is_some() {
1308                if entry.polarity.as_deref() == Some("negative") {
1309                    "-"
1310                } else if entry.polarity.as_deref() == Some("positive") {
1311                    "+"
1312                } else {
1313                    ""
1314                }
1315            } else {
1316                ""
1317            };
1318
1319            println!(
1320                "Frame {} ({}): {} {}:{}=\"{}\"{}  [{}]{}",
1321                entry.frame_id,
1322                ts_str,
1323                entry.relation.to_uppercase(),
1324                entry.entity,
1325                entry.slot,
1326                entry.value,
1327                polarity_suffix,
1328                entry.engine,
1329                conf,
1330            );
1331        }
1332    }
1333
1334    Ok(())
1335}
1336
1337// ============================================================================
1338// Export Command - Export facts to standard formats (N-Triples, JSON, CSV)
1339// ============================================================================
1340
1341/// Export format for the `export` command.
1342#[derive(Debug, Clone, Copy, ValueEnum, Default)]
1343pub enum ExportFormat {
1344    /// N-Triples RDF format (.nt)
1345    #[default]
1346    Ntriples,
1347    /// JSON format with full metadata
1348    Json,
1349    /// CSV format for spreadsheet import
1350    Csv,
1351}
1352
1353/// Arguments for the `export` subcommand.
1354#[derive(Debug, Args)]
1355pub struct ExportArgs {
1356    /// Path to the `.mv2` file
1357    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1358    pub file: PathBuf,
1359
1360    /// Export format
1361    #[arg(long, short = 'f', value_enum, default_value_t = ExportFormat::Ntriples)]
1362    pub format: ExportFormat,
1363
1364    /// Filter by entity (optional)
1365    #[arg(long, short = 'e')]
1366    pub entity: Option<String>,
1367
1368    /// Filter by predicate/slot (optional)
1369    #[arg(long, short = 'p')]
1370    pub predicate: Option<String>,
1371
1372    /// Base URI for N-Triples output (default: mv2://entity/)
1373    #[arg(long, default_value = "mv2://entity/")]
1374    pub base_uri: String,
1375
1376    /// Include provenance metadata in output
1377    #[arg(long)]
1378    pub with_provenance: bool,
1379}
1380
1381/// Export entry for JSON output.
1382#[derive(Debug, Serialize)]
1383pub struct ExportEntry {
1384    pub subject: String,
1385    pub predicate: String,
1386    pub object: String,
1387    #[serde(skip_serializing_if = "Option::is_none")]
1388    pub source_frame_id: Option<u64>,
1389    #[serde(skip_serializing_if = "Option::is_none")]
1390    pub timestamp: Option<i64>,
1391    #[serde(skip_serializing_if = "Option::is_none")]
1392    pub engine: Option<String>,
1393    #[serde(skip_serializing_if = "Option::is_none")]
1394    pub confidence: Option<f32>,
1395}
1396
1397/// Escape a string for N-Triples format.
1398fn escape_ntriples(s: &str) -> String {
1399    let mut result = String::with_capacity(s.len());
1400    for c in s.chars() {
1401        match c {
1402            '\\' => result.push_str("\\\\"),
1403            '"' => result.push_str("\\\""),
1404            '\n' => result.push_str("\\n"),
1405            '\r' => result.push_str("\\r"),
1406            '\t' => result.push_str("\\t"),
1407            _ => result.push(c),
1408        }
1409    }
1410    result
1411}
1412
1413/// Escape a string for CSV format.
1414fn escape_csv(s: &str) -> String {
1415    if s.contains(',') || s.contains('"') || s.contains('\n') {
1416        format!("\"{}\"", s.replace('"', "\"\""))
1417    } else {
1418        s.to_string()
1419    }
1420}
1421
1422/// Normalize an entity name into a valid URI component.
1423fn normalize_uri_component(s: &str) -> String {
1424    s.replace(' ', "_")
1425        .replace('/', "_")
1426        .replace(':', "_")
1427        .replace('#', "_")
1428        .replace('?', "_")
1429        .replace('&', "_")
1430}
1431
1432pub fn handle_export(_config: &CliConfig, args: ExportArgs) -> Result<()> {
1433    let mem = Memvid::open(&args.file)?;
1434
1435    // Collect all memory cards matching the filters
1436    let entities: Vec<String> = if let Some(entity) = &args.entity {
1437        vec![entity.to_lowercase()]
1438    } else {
1439        mem.memory_entities()
1440    };
1441
1442    // Build list of triplets
1443    let mut triplets: Vec<ExportEntry> = Vec::new();
1444
1445    for entity in entities {
1446        let cards = mem.get_entity_memories(&entity);
1447
1448        for card in cards {
1449            // Filter by predicate
1450            if let Some(pred) = &args.predicate {
1451                if !card.slot.eq_ignore_ascii_case(pred) {
1452                    continue;
1453                }
1454            }
1455
1456            // Skip retracted cards
1457            if card.is_retracted() {
1458                continue;
1459            }
1460
1461            triplets.push(ExportEntry {
1462                subject: card.entity.clone(),
1463                predicate: card.slot.clone(),
1464                object: card.value.clone(),
1465                source_frame_id: if args.with_provenance {
1466                    Some(card.source_frame_id)
1467                } else {
1468                    None
1469                },
1470                timestamp: if args.with_provenance {
1471                    card.document_date.or(Some(card.created_at))
1472                } else {
1473                    None
1474                },
1475                engine: if args.with_provenance {
1476                    Some(card.engine.clone())
1477                } else {
1478                    None
1479                },
1480                confidence: if args.with_provenance {
1481                    card.confidence
1482                } else {
1483                    None
1484                },
1485            });
1486        }
1487    }
1488
1489    match args.format {
1490        ExportFormat::Ntriples => {
1491            // Output N-Triples format
1492            // Format: <subject> <predicate> "object" .
1493            for t in &triplets {
1494                let subject_uri = format!(
1495                    "<{}{}>",
1496                    args.base_uri,
1497                    normalize_uri_component(&t.subject)
1498                );
1499                let predicate_uri = format!(
1500                    "<{}pred/{}>",
1501                    args.base_uri,
1502                    normalize_uri_component(&t.predicate)
1503                );
1504                let object_literal = format!("\"{}\"", escape_ntriples(&t.object));
1505
1506                println!("{} {} {} .", subject_uri, predicate_uri, object_literal);
1507            }
1508        }
1509        ExportFormat::Json => {
1510            // Output JSON format
1511            println!("{}", serde_json::to_string_pretty(&triplets)?);
1512        }
1513        ExportFormat::Csv => {
1514            // Output CSV format
1515            if args.with_provenance {
1516                println!("subject,predicate,object,source_frame_id,timestamp,engine,confidence");
1517            } else {
1518                println!("subject,predicate,object");
1519            }
1520
1521            for t in &triplets {
1522                if args.with_provenance {
1523                    println!(
1524                        "{},{},{},{},{},{},{}",
1525                        escape_csv(&t.subject),
1526                        escape_csv(&t.predicate),
1527                        escape_csv(&t.object),
1528                        t.source_frame_id.map(|id| id.to_string()).unwrap_or_default(),
1529                        t.timestamp.map(|ts| ts.to_string()).unwrap_or_default(),
1530                        t.engine.as_deref().map(escape_csv).unwrap_or_default(),
1531                        t.confidence.map(|c| format!("{:.2}", c)).unwrap_or_default(),
1532                    );
1533                } else {
1534                    println!(
1535                        "{},{},{}",
1536                        escape_csv(&t.subject),
1537                        escape_csv(&t.predicate),
1538                        escape_csv(&t.object),
1539                    );
1540                }
1541            }
1542        }
1543    }
1544
1545    // Output count to stderr so it doesn't pollute stdout piping
1546    eprintln!("Exported {} triplets", triplets.len());
1547
1548    Ok(())
1549}
1550
1551// ============================================================================
1552// Schema Command - Infer and manage predicate schemas
1553// ============================================================================
1554
1555/// Arguments for the `schema` subcommand.
1556#[derive(Debug, Args)]
1557pub struct SchemaArgs {
1558    #[command(subcommand)]
1559    pub command: SchemaCommand,
1560}
1561
1562/// Schema subcommands.
1563#[derive(Debug, clap::Subcommand)]
1564pub enum SchemaCommand {
1565    /// Infer schemas from existing memory cards
1566    Infer(SchemaInferArgs),
1567    /// List registered schemas (built-in + custom)
1568    List(SchemaListArgs),
1569}
1570
1571/// Arguments for `schema infer`.
1572#[derive(Debug, Args)]
1573pub struct SchemaInferArgs {
1574    /// Path to the `.mv2` file
1575    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1576    pub file: PathBuf,
1577
1578    /// Register inferred schemas to the memory file
1579    #[arg(long)]
1580    pub register: bool,
1581
1582    /// Overwrite existing schemas when registering
1583    #[arg(long, requires = "register")]
1584    pub overwrite: bool,
1585
1586    /// Output as JSON
1587    #[arg(long)]
1588    pub json: bool,
1589}
1590
1591/// Arguments for `schema list`.
1592#[derive(Debug, Args)]
1593pub struct SchemaListArgs {
1594    /// Path to the `.mv2` file (optional, shows built-in schemas if omitted)
1595    #[arg(value_name = "FILE", value_parser = clap::value_parser!(PathBuf))]
1596    pub file: Option<PathBuf>,
1597
1598    /// Output as JSON
1599    #[arg(long)]
1600    pub json: bool,
1601
1602    /// Show only built-in schemas
1603    #[arg(long)]
1604    pub builtin_only: bool,
1605}
1606
1607/// Schema list entry for JSON output.
1608#[derive(Debug, Serialize)]
1609pub struct SchemaListEntry {
1610    pub id: String,
1611    pub name: String,
1612    #[serde(skip_serializing_if = "Option::is_none")]
1613    pub description: Option<String>,
1614    pub value_type: String,
1615    pub cardinality: String,
1616    pub domain: Vec<String>,
1617    pub builtin: bool,
1618    #[serde(skip_serializing_if = "Option::is_none")]
1619    pub inverse: Option<String>,
1620}
1621
1622pub fn handle_schema(_config: &CliConfig, args: SchemaArgs) -> Result<()> {
1623    match args.command {
1624        SchemaCommand::Infer(infer_args) => handle_schema_infer(_config, infer_args),
1625        SchemaCommand::List(list_args) => handle_schema_list(_config, list_args),
1626    }
1627}
1628
1629fn handle_schema_infer(_config: &CliConfig, args: SchemaInferArgs) -> Result<()> {
1630    let mut mem = Memvid::open(&args.file)?;
1631
1632    // Get schema summary (which includes inference)
1633    let summary = mem.schema_summary();
1634
1635    if summary.is_empty() {
1636        if args.json {
1637            println!("[]");
1638        } else {
1639            println!("No predicates found in memory.");
1640        }
1641        return Ok(());
1642    }
1643
1644    if args.register {
1645        let count = mem.register_inferred_schemas(args.overwrite);
1646        mem.commit()?;
1647        eprintln!("Registered {} inferred schemas", count);
1648    }
1649
1650    if args.json {
1651        println!("{}", serde_json::to_string_pretty(&summary)?);
1652    } else {
1653        println!("Inferred Schemas ({} predicates):", summary.len());
1654        println!();
1655        println!(
1656            "{:<20} {:<12} {:<10} {:<8} {:<8} {:<8} {}",
1657            "PREDICATE", "TYPE", "CARDINAL", "ENTITIES", "VALUES", "UNIQUE", "BUILTIN"
1658        );
1659        println!("{}", "-".repeat(80));
1660
1661        for entry in &summary {
1662            let cardinality = match entry.cardinality {
1663                memvid_core::Cardinality::Single => "single",
1664                memvid_core::Cardinality::Multiple => "multiple",
1665            };
1666            let builtin = if entry.is_builtin { "yes" } else { "-" };
1667
1668            println!(
1669                "{:<20} {:<12} {:<10} {:<8} {:<8} {:<8} {}",
1670                truncate(&entry.predicate, 20),
1671                truncate(&entry.inferred_type, 12),
1672                cardinality,
1673                entry.entity_count,
1674                entry.value_count,
1675                entry.unique_values,
1676                builtin
1677            );
1678        }
1679    }
1680
1681    Ok(())
1682}
1683
1684fn handle_schema_list(_config: &CliConfig, args: SchemaListArgs) -> Result<()> {
1685    // If file is provided, open it to get custom schemas; otherwise use default registry
1686    let registry = if let Some(ref path) = args.file {
1687        let mem = Memvid::open(path)?;
1688        mem.schema_registry().clone()
1689    } else {
1690        memvid_core::SchemaRegistry::new()
1691    };
1692
1693    let mut entries: Vec<SchemaListEntry> = registry
1694        .all()
1695        .filter(|s| !args.builtin_only || s.builtin)
1696        .map(|schema| SchemaListEntry {
1697            id: schema.id.clone(),
1698            name: schema.name.clone(),
1699            description: schema.description.clone(),
1700            value_type: schema.range.description(),
1701            cardinality: match schema.cardinality {
1702                memvid_core::Cardinality::Single => "single".to_string(),
1703                memvid_core::Cardinality::Multiple => "multiple".to_string(),
1704            },
1705            domain: schema.domain.iter().map(|k| k.as_str().to_string()).collect(),
1706            builtin: schema.builtin,
1707            inverse: schema.inverse.clone(),
1708        })
1709        .collect();
1710
1711    entries.sort_by(|a, b| a.id.cmp(&b.id));
1712
1713    if entries.is_empty() {
1714        if args.json {
1715            println!("[]");
1716        } else {
1717            println!("No schemas found.");
1718        }
1719        return Ok(());
1720    }
1721
1722    if args.json {
1723        println!("{}", serde_json::to_string_pretty(&entries)?);
1724    } else {
1725        let title = if args.builtin_only {
1726            "Built-in Schemas"
1727        } else {
1728            "Registered Schemas"
1729        };
1730        println!("{} ({} total):", title, entries.len());
1731        println!();
1732        println!(
1733            "{:<20} {:<15} {:<12} {:<10} {}",
1734            "ID", "NAME", "TYPE", "CARDINAL", "DOMAIN"
1735        );
1736        println!("{}", "-".repeat(70));
1737
1738        for entry in &entries {
1739            let domain = if entry.domain.is_empty() {
1740                "*".to_string()
1741            } else {
1742                entry.domain.join(", ")
1743            };
1744            let cardinality = if entry.cardinality == "multiple" {
1745                "multiple"
1746            } else {
1747                "single"
1748            };
1749
1750            println!(
1751                "{:<20} {:<15} {:<12} {:<10} {}",
1752                truncate(&entry.id, 20),
1753                truncate(&entry.name, 15),
1754                truncate(&entry.value_type, 12),
1755                cardinality,
1756                truncate(&domain, 20)
1757            );
1758        }
1759    }
1760
1761    Ok(())
1762}
1763
1764/// Truncate a string to max length, adding "..." if needed.
1765fn truncate(s: &str, max_len: usize) -> String {
1766    if s.len() <= max_len {
1767        s.to_string()
1768    } else {
1769        format!("{}...", &s[..max_len.saturating_sub(3)])
1770    }
1771}