Skip to main content

rlm_rs/cli/
commands.rs

1//! CLI command implementations.
2//!
3//! Contains the business logic for each CLI command.
4
5// Allow style choices for clarity
6#![allow(clippy::format_push_string)]
7#![allow(clippy::uninlined_format_args)]
8#![allow(clippy::too_many_lines)]
9#![allow(clippy::option_if_let_else)]
10#![allow(clippy::manual_div_ceil)]
11#![allow(clippy::redundant_closure_for_method_calls)]
12#![allow(clippy::if_not_else)]
13
14use crate::chunking::{ChunkerMetadata, create_chunker};
15use crate::cli::output::{
16    GrepMatch, OutputFormat, format_buffer, format_buffer_list, format_chunk_indices,
17    format_grep_matches, format_peek, format_status, format_write_chunks_result,
18};
19use crate::cli::parser::{ChunkCommands, Cli, Commands};
20use crate::core::{Buffer, Context, ContextValue};
21use crate::embedding::create_embedder;
22use crate::error::{CommandError, Result, StorageError};
23use crate::io::{read_file, write_file};
24use crate::search::{SearchConfig, SearchResult, embed_buffer_chunks, hybrid_search};
25use crate::storage::{SqliteStorage, Storage};
26use regex::RegexBuilder;
27use std::fmt::Write as FmtWrite;
28use std::io::{self, Read, Write as IoWrite};
29
30/// Executes the CLI command.
31///
32/// # Arguments
33///
34/// * `cli` - Parsed CLI arguments.
35///
36/// # Returns
37///
38/// Result with output string on success.
39///
40/// # Errors
41///
42/// Returns an error if the command fails to execute.
43#[allow(clippy::too_many_lines)]
44pub fn execute(cli: &Cli) -> Result<String> {
45    let format = OutputFormat::parse(&cli.format);
46    let db_path = cli.get_db_path();
47
48    match &cli.command {
49        Commands::Init { force } => cmd_init(&db_path, *force, format),
50        Commands::Status => cmd_status(&db_path, format),
51        Commands::Reset { yes } => cmd_reset(&db_path, *yes, format),
52        Commands::Load {
53            file,
54            name,
55            chunker,
56            chunk_size,
57            overlap,
58        } => cmd_load(
59            &db_path,
60            file,
61            name.as_deref(),
62            chunker,
63            *chunk_size,
64            *overlap,
65            format,
66        ),
67        Commands::ListBuffers => cmd_list_buffers(&db_path, format),
68        Commands::ShowBuffer { buffer, chunks } => {
69            cmd_show_buffer(&db_path, buffer, *chunks, format)
70        }
71        Commands::DeleteBuffer { buffer, yes } => cmd_delete_buffer(&db_path, buffer, *yes, format),
72        Commands::Peek { buffer, start, end } => cmd_peek(&db_path, buffer, *start, *end, format),
73        Commands::Grep {
74            buffer,
75            pattern,
76            max_matches,
77            window,
78            ignore_case,
79        } => cmd_grep(
80            &db_path,
81            buffer,
82            pattern,
83            *max_matches,
84            *window,
85            *ignore_case,
86            format,
87        ),
88        Commands::ChunkIndices {
89            buffer,
90            chunk_size,
91            overlap,
92        } => cmd_chunk_indices(&db_path, buffer, *chunk_size, *overlap, format),
93        Commands::WriteChunks {
94            buffer,
95            out_dir,
96            chunk_size,
97            overlap,
98            prefix,
99        } => cmd_write_chunks(
100            &db_path,
101            buffer,
102            out_dir,
103            *chunk_size,
104            *overlap,
105            prefix,
106            format,
107        ),
108        Commands::AddBuffer { name, content } => {
109            cmd_add_buffer(&db_path, name, content.as_deref(), format)
110        }
111        Commands::UpdateBuffer {
112            buffer,
113            content,
114            embed,
115            strategy,
116            chunk_size,
117            overlap,
118        } => cmd_update_buffer(
119            &db_path,
120            buffer,
121            content.as_deref(),
122            *embed,
123            strategy,
124            *chunk_size,
125            *overlap,
126            format,
127        ),
128        Commands::ExportBuffers { output, pretty } => {
129            cmd_export_buffers(&db_path, output.as_deref(), *pretty, format)
130        }
131        Commands::Variable {
132            name,
133            value,
134            delete,
135        } => cmd_variable(&db_path, name, value.as_deref(), *delete, format),
136        Commands::Global {
137            name,
138            value,
139            delete,
140        } => cmd_global(&db_path, name, value.as_deref(), *delete, format),
141        Commands::Search {
142            query,
143            top_k,
144            threshold,
145            mode,
146            rrf_k,
147            buffer,
148            preview,
149            preview_len,
150        } => cmd_search(
151            &db_path,
152            query,
153            *top_k,
154            *threshold,
155            mode,
156            *rrf_k,
157            buffer.as_deref(),
158            *preview,
159            *preview_len,
160            format,
161        ),
162        Commands::Aggregate {
163            buffer,
164            min_relevance,
165            group_by,
166            sort_by,
167            output_buffer,
168        } => cmd_aggregate(
169            &db_path,
170            buffer.as_deref(),
171            min_relevance,
172            group_by,
173            sort_by,
174            output_buffer.as_deref(),
175            format,
176        ),
177        Commands::Dispatch {
178            buffer,
179            batch_size,
180            workers,
181            query,
182            mode,
183            threshold,
184        } => cmd_dispatch(
185            &db_path,
186            buffer,
187            *batch_size,
188            *workers,
189            query.as_deref(),
190            mode,
191            *threshold,
192            format,
193        ),
194        Commands::Chunk(chunk_cmd) => match chunk_cmd {
195            ChunkCommands::Get { id, metadata } => cmd_chunk_get(&db_path, *id, *metadata, format),
196            ChunkCommands::List {
197                buffer,
198                preview,
199                preview_len,
200            } => cmd_chunk_list(&db_path, buffer, *preview, *preview_len, format),
201            ChunkCommands::Embed { buffer, force } => {
202                cmd_chunk_embed(&db_path, buffer, *force, format)
203            }
204            ChunkCommands::Status => cmd_chunk_status(&db_path, format),
205        },
206    }
207}
208
209/// Opens storage and ensures it's initialized.
210fn open_storage(db_path: &std::path::Path) -> Result<SqliteStorage> {
211    let storage = SqliteStorage::open(db_path)?;
212
213    if !storage.is_initialized()? {
214        return Err(StorageError::NotInitialized.into());
215    }
216
217    Ok(storage)
218}
219
220/// Resolves a buffer identifier (ID or name) to a buffer.
221fn resolve_buffer(storage: &SqliteStorage, identifier: &str) -> Result<Buffer> {
222    // Try as ID first
223    if let Ok(id) = identifier.parse::<i64>()
224        && let Some(buffer) = storage.get_buffer(id)?
225    {
226        return Ok(buffer);
227    }
228
229    // Try as name
230    if let Some(buffer) = storage.get_buffer_by_name(identifier)? {
231        return Ok(buffer);
232    }
233
234    Err(StorageError::BufferNotFound {
235        identifier: identifier.to_string(),
236    }
237    .into())
238}
239
240// ==================== Command Implementations ====================
241
242fn cmd_init(db_path: &std::path::Path, force: bool, _format: OutputFormat) -> Result<String> {
243    // Check if already exists
244    if db_path.exists() && !force {
245        return Err(CommandError::ExecutionFailed(
246            "Database already exists. Use --force to reinitialize.".to_string(),
247        )
248        .into());
249    }
250
251    // Create parent directory if needed
252    if let Some(parent) = db_path.parent()
253        && !parent.exists()
254    {
255        std::fs::create_dir_all(parent).map_err(|e| {
256            CommandError::ExecutionFailed(format!("Failed to create directory: {e}"))
257        })?;
258    }
259
260    // If force, delete existing
261    if force && db_path.exists() {
262        std::fs::remove_file(db_path).map_err(|e| {
263            CommandError::ExecutionFailed(format!("Failed to remove existing database: {e}"))
264        })?;
265    }
266
267    let mut storage = SqliteStorage::open(db_path)?;
268    storage.init()?;
269
270    // Initialize empty context
271    let context = Context::new();
272    storage.save_context(&context)?;
273
274    Ok(format!(
275        "Initialized RLM database at: {}\n",
276        db_path.display()
277    ))
278}
279
280fn cmd_status(db_path: &std::path::Path, format: OutputFormat) -> Result<String> {
281    let storage = open_storage(db_path)?;
282    let stats = storage.stats()?;
283    Ok(format_status(&stats, format))
284}
285
286fn cmd_reset(db_path: &std::path::Path, yes: bool, _format: OutputFormat) -> Result<String> {
287    if !yes {
288        // In a real implementation, we'd prompt the user
289        // For now, require --yes flag
290        return Err(CommandError::ExecutionFailed(
291            "Use --yes to confirm reset. This will delete all data.".to_string(),
292        )
293        .into());
294    }
295
296    let mut storage = open_storage(db_path)?;
297    storage.reset()?;
298
299    // Reinitialize with empty context
300    let context = Context::new();
301    storage.save_context(&context)?;
302
303    Ok("RLM state reset successfully.\n".to_string())
304}
305
306fn cmd_load(
307    db_path: &std::path::Path,
308    file: &std::path::Path,
309    name: Option<&str>,
310    chunker_name: &str,
311    chunk_size: usize,
312    overlap: usize,
313    format: OutputFormat,
314) -> Result<String> {
315    let mut storage = open_storage(db_path)?;
316
317    // Read file content
318    let content = read_file(file)?;
319
320    // Create buffer
321    let buffer_name = name
322        .map(String::from)
323        .or_else(|| file.file_name().and_then(|n| n.to_str()).map(String::from));
324
325    let mut buffer = Buffer::from_file(file.to_path_buf(), content.clone());
326    buffer.name = buffer_name;
327    buffer.compute_hash();
328
329    // Add buffer to storage
330    let buffer_id = storage.add_buffer(&buffer)?;
331
332    // Chunk the content
333    let chunker = create_chunker(chunker_name)?;
334    let meta = ChunkerMetadata::with_size_and_overlap(chunk_size, overlap);
335    let chunks = chunker.chunk(buffer_id, &content, Some(&meta))?;
336
337    // Store chunks
338    storage.add_chunks(buffer_id, &chunks)?;
339
340    // Generate embeddings for semantic search (automatic during load)
341    let embedder = create_embedder()?;
342    let embedded_count = embed_buffer_chunks(&mut storage, embedder.as_ref(), buffer_id)?;
343
344    // Update buffer with chunk count
345    let mut updated_buffer =
346        storage
347            .get_buffer(buffer_id)?
348            .ok_or_else(|| StorageError::BufferNotFound {
349                identifier: buffer_id.to_string(),
350            })?;
351    updated_buffer.set_chunk_count(chunks.len());
352    storage.update_buffer(&updated_buffer)?;
353
354    // Update context
355    if let Some(mut context) = storage.load_context()? {
356        context.add_buffer(buffer_id);
357        storage.save_context(&context)?;
358    }
359
360    match format {
361        OutputFormat::Text => Ok(format!(
362            "Loaded buffer {} (ID: {}) with {} chunks ({} embedded) from {}\n",
363            updated_buffer.name.as_deref().unwrap_or("unnamed"),
364            buffer_id,
365            chunks.len(),
366            embedded_count,
367            file.display()
368        )),
369        OutputFormat::Json | OutputFormat::Ndjson => {
370            let result = serde_json::json!({
371                "buffer_id": buffer_id,
372                "name": updated_buffer.name,
373                "chunk_count": chunks.len(),
374                "embedded_count": embedded_count,
375                "size": content.len(),
376                "source": file.to_string_lossy()
377            });
378            Ok(serde_json::to_string_pretty(&result).unwrap_or_default())
379        }
380    }
381}
382
383fn cmd_list_buffers(db_path: &std::path::Path, format: OutputFormat) -> Result<String> {
384    let storage = open_storage(db_path)?;
385    let buffers = storage.list_buffers()?;
386    Ok(format_buffer_list(&buffers, format))
387}
388
389fn cmd_show_buffer(
390    db_path: &std::path::Path,
391    identifier: &str,
392    show_chunks: bool,
393    format: OutputFormat,
394) -> Result<String> {
395    let storage = open_storage(db_path)?;
396    let buffer = resolve_buffer(&storage, identifier)?;
397
398    let chunks = if show_chunks {
399        Some(storage.get_chunks(buffer.id.unwrap_or(0))?)
400    } else {
401        None
402    };
403
404    Ok(format_buffer(&buffer, chunks.as_deref(), format))
405}
406
407fn cmd_delete_buffer(
408    db_path: &std::path::Path,
409    identifier: &str,
410    yes: bool,
411    _format: OutputFormat,
412) -> Result<String> {
413    if !yes {
414        return Err(
415            CommandError::ExecutionFailed("Use --yes to confirm deletion.".to_string()).into(),
416        );
417    }
418
419    let mut storage = open_storage(db_path)?;
420    let buffer = resolve_buffer(&storage, identifier)?;
421    let buffer_id = buffer.id.unwrap_or(0);
422    let buffer_name = buffer.name.unwrap_or_else(|| format!("{buffer_id}"));
423
424    storage.delete_buffer(buffer_id)?;
425
426    // Update context
427    if let Some(mut context) = storage.load_context()? {
428        context.remove_buffer(buffer_id);
429        storage.save_context(&context)?;
430    }
431
432    Ok(format!("Deleted buffer: {buffer_name}\n"))
433}
434
435fn cmd_peek(
436    db_path: &std::path::Path,
437    identifier: &str,
438    start: usize,
439    end: Option<usize>,
440    format: OutputFormat,
441) -> Result<String> {
442    let storage = open_storage(db_path)?;
443    let buffer = resolve_buffer(&storage, identifier)?;
444
445    let end = end.unwrap_or(start + 3000).min(buffer.content.len());
446    let start = start.min(buffer.content.len());
447
448    let content = buffer.slice(start, end).unwrap_or("");
449    Ok(format_peek(content, start, end, format))
450}
451
452fn cmd_grep(
453    db_path: &std::path::Path,
454    identifier: &str,
455    pattern: &str,
456    max_matches: usize,
457    window: usize,
458    ignore_case: bool,
459    format: OutputFormat,
460) -> Result<String> {
461    let storage = open_storage(db_path)?;
462    let buffer = resolve_buffer(&storage, identifier)?;
463
464    let regex = RegexBuilder::new(pattern)
465        .case_insensitive(ignore_case)
466        .build()
467        .map_err(|e| CommandError::InvalidArgument(format!("Invalid regex: {e}")))?;
468
469    let mut matches = Vec::new();
470    for m in regex.find_iter(&buffer.content) {
471        if matches.len() >= max_matches {
472            break;
473        }
474
475        let start = m.start().saturating_sub(window);
476        let end = (m.end() + window).min(buffer.content.len());
477
478        // Find valid UTF-8 boundaries
479        let start = crate::io::find_char_boundary(&buffer.content, start);
480        let end = crate::io::find_char_boundary(&buffer.content, end);
481
482        matches.push(GrepMatch {
483            offset: m.start(),
484            matched: m.as_str().to_string(),
485            snippet: buffer.content[start..end].to_string(),
486        });
487    }
488
489    Ok(format_grep_matches(&matches, pattern, format))
490}
491
492fn cmd_chunk_indices(
493    db_path: &std::path::Path,
494    identifier: &str,
495    chunk_size: usize,
496    overlap: usize,
497    format: OutputFormat,
498) -> Result<String> {
499    let storage = open_storage(db_path)?;
500    let buffer = resolve_buffer(&storage, identifier)?;
501
502    let content_len = buffer.content.len();
503    let mut indices = Vec::new();
504
505    if chunk_size == 0 || overlap >= chunk_size {
506        return Err(
507            CommandError::InvalidArgument("Invalid chunk_size or overlap".to_string()).into(),
508        );
509    }
510
511    let step = chunk_size - overlap;
512    let mut start = 0;
513
514    while start < content_len {
515        let end = (start + chunk_size).min(content_len);
516        indices.push((start, end));
517        if end >= content_len {
518            break;
519        }
520        start += step;
521    }
522
523    Ok(format_chunk_indices(&indices, format))
524}
525
526fn cmd_write_chunks(
527    db_path: &std::path::Path,
528    identifier: &str,
529    out_dir: &std::path::Path,
530    chunk_size: usize,
531    overlap: usize,
532    prefix: &str,
533    format: OutputFormat,
534) -> Result<String> {
535    let mut storage = open_storage(db_path)?;
536    let buffer = resolve_buffer(&storage, identifier)?;
537    let buffer_id = buffer.id.unwrap_or(0);
538
539    // Create chunker and chunk the content
540    let chunker = create_chunker("semantic")?;
541    let meta = ChunkerMetadata::with_size_and_overlap(chunk_size, overlap);
542    let chunks = chunker.chunk(buffer_id, &buffer.content, Some(&meta))?;
543
544    // Store chunks in SQLite
545    storage.add_chunks(buffer_id, &chunks)?;
546
547    // Update buffer with chunk count
548    let mut updated_buffer =
549        storage
550            .get_buffer(buffer_id)?
551            .ok_or_else(|| StorageError::BufferNotFound {
552                identifier: buffer_id.to_string(),
553            })?;
554    updated_buffer.set_chunk_count(chunks.len());
555    storage.update_buffer(&updated_buffer)?;
556
557    // Write chunks to files
558    let chunks_iter = chunks
559        .iter()
560        .enumerate()
561        .map(|(i, c)| (i, c.content.as_str()));
562    let paths = crate::io::reader::write_chunks(out_dir, chunks_iter, prefix)?;
563
564    Ok(format_write_chunks_result(&paths, format))
565}
566
567fn cmd_add_buffer(
568    db_path: &std::path::Path,
569    name: &str,
570    content: Option<&str>,
571    format: OutputFormat,
572) -> Result<String> {
573    let mut storage = open_storage(db_path)?;
574
575    // Read content from stdin if not provided
576    let content = if let Some(c) = content {
577        c.to_string()
578    } else {
579        let mut buffer = String::new();
580        io::stdin().read_to_string(&mut buffer).map_err(|e| {
581            CommandError::ExecutionFailed(format!("Failed to read from stdin: {e}"))
582        })?;
583        buffer
584    };
585
586    let buffer = Buffer::from_named(name.to_string(), content.clone());
587    let buffer_id = storage.add_buffer(&buffer)?;
588
589    // Update context
590    if let Some(mut context) = storage.load_context()? {
591        context.add_buffer(buffer_id);
592        storage.save_context(&context)?;
593    }
594
595    match format {
596        OutputFormat::Text => Ok(format!(
597            "Added buffer '{}' (ID: {}, {} bytes)\n",
598            name,
599            buffer_id,
600            content.len()
601        )),
602        OutputFormat::Json | OutputFormat::Ndjson => {
603            let result = serde_json::json!({
604                "buffer_id": buffer_id,
605                "name": name,
606                "size": content.len()
607            });
608            Ok(serde_json::to_string_pretty(&result).unwrap_or_default())
609        }
610    }
611}
612
613#[allow(clippy::too_many_arguments, clippy::redundant_clone)]
614fn cmd_update_buffer(
615    db_path: &std::path::Path,
616    identifier: &str,
617    content: Option<&str>,
618    embed: bool,
619    strategy: &str,
620    chunk_size: usize,
621    overlap: usize,
622    format: OutputFormat,
623) -> Result<String> {
624    let mut storage = open_storage(db_path)?;
625    let buffer = resolve_buffer(&storage, identifier)?;
626    let buffer_id = buffer
627        .id
628        .ok_or_else(|| CommandError::ExecutionFailed("Buffer has no ID".to_string()))?;
629    let buffer_name = buffer.name.clone().unwrap_or_else(|| buffer_id.to_string());
630
631    // Read content from stdin if not provided
632    let new_content = if let Some(c) = content {
633        c.to_string()
634    } else {
635        let mut buf = String::new();
636        io::stdin().read_to_string(&mut buf).map_err(|e| {
637            CommandError::ExecutionFailed(format!("Failed to read from stdin: {e}"))
638        })?;
639        buf
640    };
641
642    let content_size = new_content.len();
643
644    // Get old chunk count for comparison
645    let old_chunk_count = storage.chunk_count(buffer_id)?;
646
647    // Delete existing chunks (this cascades to embeddings)
648    storage.delete_chunks(buffer_id)?;
649
650    // Update buffer content
651    let updated_buffer = Buffer {
652        id: Some(buffer_id),
653        name: buffer.name.clone(),
654        content: new_content.clone(),
655        source: buffer.source.clone(),
656        metadata: buffer.metadata.clone(),
657    };
658    storage.update_buffer(&updated_buffer)?;
659
660    // Re-chunk the content
661    let chunker = create_chunker(strategy)?;
662    let meta = ChunkerMetadata::with_size_and_overlap(chunk_size, overlap);
663    let chunks = chunker.chunk(buffer_id, &new_content, Some(&meta))?;
664    let new_chunk_count = chunks.len();
665    storage.add_chunks(buffer_id, &chunks)?;
666
667    // Optionally embed the new chunks
668    let embed_result = if embed {
669        let embedder = create_embedder()?;
670        let result = crate::search::embed_buffer_chunks_incremental(
671            &mut storage,
672            embedder.as_ref(),
673            buffer_id,
674            false,
675        )?;
676        Some(result)
677    } else {
678        None
679    };
680
681    match format {
682        OutputFormat::Text => {
683            let mut output = String::new();
684            output.push_str(&format!(
685                "Updated buffer '{}' ({} bytes)\n",
686                buffer_name, content_size
687            ));
688            output.push_str(&format!(
689                "Chunks: {} -> {} (using {} strategy)\n",
690                old_chunk_count, new_chunk_count, strategy
691            ));
692            if let Some(ref result) = embed_result {
693                output.push_str(&format!(
694                    "Embedded {} chunks using model '{}'\n",
695                    result.embedded_count, result.model_name
696                ));
697            }
698            Ok(output)
699        }
700        OutputFormat::Json | OutputFormat::Ndjson => {
701            let json = serde_json::json!({
702                "buffer_id": buffer_id,
703                "buffer_name": buffer_name,
704                "content_size": content_size,
705                "old_chunk_count": old_chunk_count,
706                "new_chunk_count": new_chunk_count,
707                "strategy": strategy,
708                "embedded": embed_result.as_ref().map(|r| serde_json::json!({
709                    "count": r.embedded_count,
710                    "model": r.model_name
711                }))
712            });
713            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
714        }
715    }
716}
717
718/// Analyst finding from a subagent.
719#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
720struct AnalystFinding {
721    chunk_id: i64,
722    relevance: String,
723    #[serde(default)]
724    findings: Vec<String>,
725    #[serde(default)]
726    summary: Option<String>,
727    #[serde(default)]
728    follow_up: Vec<String>,
729}
730
731/// Relevance level for sorting.
732fn relevance_order(relevance: &str) -> u8 {
733    match relevance.to_lowercase().as_str() {
734        "high" => 0,
735        "medium" => 1,
736        "low" => 2,
737        "none" => 3,
738        _ => 4,
739    }
740}
741
742/// Check if relevance meets minimum threshold.
743fn meets_relevance_threshold(relevance: &str, min_relevance: &str) -> bool {
744    relevance_order(relevance) <= relevance_order(min_relevance)
745}
746
747fn cmd_aggregate(
748    db_path: &std::path::Path,
749    buffer: Option<&str>,
750    min_relevance: &str,
751    group_by: &str,
752    sort_by: &str,
753    output_buffer: Option<&str>,
754    format: OutputFormat,
755) -> Result<String> {
756    let mut storage = open_storage(db_path)?;
757
758    // Read findings from buffer or stdin
759    let input = if let Some(buffer_name) = buffer {
760        let buf = resolve_buffer(&storage, buffer_name)?;
761        buf.content
762    } else {
763        let mut buf = String::new();
764        io::stdin().read_to_string(&mut buf).map_err(|e| {
765            CommandError::ExecutionFailed(format!("Failed to read from stdin: {e}"))
766        })?;
767        buf
768    };
769
770    // Parse findings
771    let findings: Vec<AnalystFinding> = serde_json::from_str(&input)
772        .map_err(|e| CommandError::ExecutionFailed(format!("Invalid JSON input: {e}")))?;
773
774    // Filter by relevance
775    let filtered: Vec<_> = findings
776        .into_iter()
777        .filter(|f| meets_relevance_threshold(&f.relevance, min_relevance))
778        .collect();
779
780    // Sort findings
781    let mut sorted = filtered;
782    match sort_by {
783        "relevance" => sorted.sort_by_key(|f| relevance_order(&f.relevance)),
784        "chunk_id" => sorted.sort_by_key(|f| f.chunk_id),
785        "findings_count" => sorted.sort_by_key(|f| std::cmp::Reverse(f.findings.len())),
786        _ => {}
787    }
788
789    // Group findings
790    let grouped: std::collections::BTreeMap<String, Vec<&AnalystFinding>> = match group_by {
791        "relevance" => {
792            let mut map = std::collections::BTreeMap::new();
793            for f in &sorted {
794                map.entry(f.relevance.clone())
795                    .or_insert_with(Vec::new)
796                    .push(f);
797            }
798            map
799        }
800        "chunk_id" => {
801            let mut map = std::collections::BTreeMap::new();
802            for f in &sorted {
803                map.entry(f.chunk_id.to_string())
804                    .or_insert_with(Vec::new)
805                    .push(f);
806            }
807            map
808        }
809        _ => {
810            let mut map = std::collections::BTreeMap::new();
811            map.insert("all".to_string(), sorted.iter().collect());
812            map
813        }
814    };
815
816    // Collect all unique findings (deduplicated)
817    let mut all_findings: Vec<&str> = Vec::new();
818    for f in &sorted {
819        for finding in &f.findings {
820            if !all_findings.contains(&finding.as_str()) {
821                all_findings.push(finding);
822            }
823        }
824    }
825
826    // Build summary stats
827    let total_findings = sorted.len();
828    let high_count = sorted.iter().filter(|f| f.relevance == "high").count();
829    let medium_count = sorted.iter().filter(|f| f.relevance == "medium").count();
830    let low_count = sorted.iter().filter(|f| f.relevance == "low").count();
831    let unique_findings_count = all_findings.len();
832
833    // Store in output buffer if requested
834    if let Some(out_name) = output_buffer {
835        let output_content = serde_json::to_string_pretty(&sorted).unwrap_or_default();
836        let out_buffer = Buffer::from_named(out_name.to_string(), output_content);
837        storage.add_buffer(&out_buffer)?;
838    }
839
840    match format {
841        OutputFormat::Text => {
842            let mut output = String::new();
843            output.push_str(&format!("Aggregated {} analyst findings\n", total_findings));
844            output.push_str(&format!(
845                "Relevance: {} high, {} medium, {} low\n",
846                high_count, medium_count, low_count
847            ));
848            output.push_str(&format!("Unique findings: {}\n\n", unique_findings_count));
849
850            for (group, items) in &grouped {
851                output.push_str(&format!("## {} ({} chunks)\n", group, items.len()));
852                for f in items {
853                    output.push_str(&format!("  Chunk {}: ", f.chunk_id));
854                    if let Some(ref summary) = f.summary {
855                        output.push_str(&truncate_str(summary, 80));
856                    } else if !f.findings.is_empty() {
857                        output.push_str(&truncate_str(&f.findings[0], 80));
858                    }
859                    output.push('\n');
860                }
861                output.push('\n');
862            }
863
864            if output_buffer.is_some() {
865                output.push_str(&format!(
866                    "Results stored in buffer '{}'\n",
867                    output_buffer.unwrap_or("")
868                ));
869            }
870
871            Ok(output)
872        }
873        OutputFormat::Json | OutputFormat::Ndjson => {
874            let json = serde_json::json!({
875                "summary": {
876                    "total_findings": total_findings,
877                    "high_relevance": high_count,
878                    "medium_relevance": medium_count,
879                    "low_relevance": low_count,
880                    "unique_findings": unique_findings_count
881                },
882                "grouped": grouped,
883                "findings": sorted,
884                "all_findings_deduplicated": all_findings,
885                "output_buffer": output_buffer
886            });
887            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
888        }
889    }
890}
891
892fn cmd_export_buffers(
893    db_path: &std::path::Path,
894    output: Option<&std::path::Path>,
895    _pretty: bool,
896    _format: OutputFormat,
897) -> Result<String> {
898    let storage = open_storage(db_path)?;
899    let content = storage.export_buffers()?;
900
901    if let Some(path) = output {
902        write_file(path, &content)?;
903        Ok(format!("Exported buffers to: {}\n", path.display()))
904    } else {
905        // Write to stdout
906        let stdout = io::stdout();
907        let mut handle = stdout.lock();
908        handle.write_all(content.as_bytes()).map_err(|e| {
909            CommandError::ExecutionFailed(format!("Failed to write to stdout: {e}"))
910        })?;
911        Ok(String::new()) // Content already written
912    }
913}
914
915fn cmd_variable(
916    db_path: &std::path::Path,
917    name: &str,
918    value: Option<&str>,
919    delete: bool,
920    format: OutputFormat,
921) -> Result<String> {
922    let mut storage = open_storage(db_path)?;
923    let mut context = storage.load_context()?.unwrap_or_else(Context::new);
924
925    if delete {
926        context.remove_variable(name);
927        storage.save_context(&context)?;
928        return Ok(format!("Deleted variable: {name}\n"));
929    }
930
931    if let Some(v) = value {
932        context.set_variable(name.to_string(), ContextValue::String(v.to_string()));
933        storage.save_context(&context)?;
934        Ok(format!("Set variable: {name} = {v}\n"))
935    } else {
936        context.get_variable(name).map_or_else(
937            || Ok(format!("Variable '{name}' not found\n")),
938            |v| match format {
939                OutputFormat::Text => Ok(format!("{name} = {v:?}\n")),
940                OutputFormat::Json | OutputFormat::Ndjson => {
941                    Ok(serde_json::to_string_pretty(v).unwrap_or_default())
942                }
943            },
944        )
945    }
946}
947
948fn cmd_global(
949    db_path: &std::path::Path,
950    name: &str,
951    value: Option<&str>,
952    delete: bool,
953    format: OutputFormat,
954) -> Result<String> {
955    let mut storage = open_storage(db_path)?;
956    let mut context = storage.load_context()?.unwrap_or_else(Context::new);
957
958    if delete {
959        context.remove_global(name);
960        storage.save_context(&context)?;
961        return Ok(format!("Deleted global: {name}\n"));
962    }
963
964    if let Some(v) = value {
965        context.set_global(name.to_string(), ContextValue::String(v.to_string()));
966        storage.save_context(&context)?;
967        Ok(format!("Set global: {name} = {v}\n"))
968    } else {
969        context.get_global(name).map_or_else(
970            || Ok(format!("Global '{name}' not found\n")),
971            |v| match format {
972                OutputFormat::Text => Ok(format!("{name} = {v:?}\n")),
973                OutputFormat::Json | OutputFormat::Ndjson => {
974                    Ok(serde_json::to_string_pretty(v).unwrap_or_default())
975                }
976            },
977        )
978    }
979}
980
981// ==================== Dispatch Command ====================
982
983#[allow(clippy::too_many_arguments)]
984fn cmd_dispatch(
985    db_path: &std::path::Path,
986    identifier: &str,
987    batch_size: usize,
988    workers: Option<usize>,
989    query: Option<&str>,
990    mode: &str,
991    threshold: f32,
992    format: OutputFormat,
993) -> Result<String> {
994    let storage = open_storage(db_path)?;
995    let buffer = resolve_buffer(&storage, identifier)?;
996    let buffer_id = buffer.id.unwrap_or(0);
997    let buffer_name = buffer.name.unwrap_or_else(|| buffer_id.to_string());
998
999    // Get all chunks for this buffer
1000    let chunks = storage.get_chunks(buffer_id)?;
1001
1002    if chunks.is_empty() {
1003        return Ok(format!("No chunks found in buffer '{}'\n", buffer_name));
1004    }
1005
1006    // Get chunk IDs, optionally filtered by search query
1007    let chunk_ids: Vec<i64> = if let Some(query_str) = query {
1008        // Filter chunks by search relevance
1009        let embedder = create_embedder()?;
1010
1011        let (use_semantic, use_bm25) = match mode.to_lowercase().as_str() {
1012            "semantic" => (true, false),
1013            "bm25" => (false, true),
1014            _ => (true, true),
1015        };
1016
1017        let config = SearchConfig::new()
1018            .with_top_k(chunks.len()) // Get all matches
1019            .with_threshold(threshold)
1020            .with_semantic(use_semantic)
1021            .with_bm25(use_bm25);
1022
1023        let results = hybrid_search(&storage, embedder.as_ref(), query_str, &config)?;
1024
1025        // Filter to only chunks from this buffer
1026        let buffer_chunk_ids: std::collections::HashSet<i64> =
1027            chunks.iter().filter_map(|c| c.id).collect();
1028
1029        results
1030            .into_iter()
1031            .filter(|r| buffer_chunk_ids.contains(&r.chunk_id))
1032            .map(|r| r.chunk_id)
1033            .collect()
1034    } else {
1035        chunks.iter().filter_map(|c| c.id).collect()
1036    };
1037
1038    if chunk_ids.is_empty() {
1039        return Ok(format!(
1040            "No matching chunks found in buffer '{}' for query\n",
1041            buffer_name
1042        ));
1043    }
1044
1045    // Calculate batch assignments
1046    let effective_batch_size = if let Some(num_workers) = workers {
1047        // Divide chunks evenly among workers
1048        (chunk_ids.len() + num_workers - 1) / num_workers
1049    } else {
1050        batch_size
1051    };
1052
1053    // Create batches
1054    let batches: Vec<Vec<i64>> = chunk_ids
1055        .chunks(effective_batch_size)
1056        .map(|chunk| chunk.to_vec())
1057        .collect();
1058
1059    match format {
1060        OutputFormat::Text => {
1061            let mut output = String::new();
1062            let _ = writeln!(
1063                output,
1064                "Dispatch plan for buffer '{}' ({} chunks -> {} batches):\n",
1065                buffer_name,
1066                chunk_ids.len(),
1067                batches.len()
1068            );
1069
1070            for (i, batch) in batches.iter().enumerate() {
1071                let _ = writeln!(
1072                    output,
1073                    "Batch {}: {} chunks (IDs: {})",
1074                    i,
1075                    batch.len(),
1076                    batch
1077                        .iter()
1078                        .take(5)
1079                        .map(|id| id.to_string())
1080                        .collect::<Vec<_>>()
1081                        .join(", ")
1082                        + if batch.len() > 5 { ", ..." } else { "" }
1083                );
1084            }
1085
1086            output
1087                .push_str("\nUsage: Feed each batch to a subagent with 'rlm-cli chunk get <id>'\n");
1088            Ok(output)
1089        }
1090        OutputFormat::Json | OutputFormat::Ndjson => {
1091            let json = serde_json::json!({
1092                "buffer_id": buffer_id,
1093                "buffer_name": buffer_name,
1094                "total_chunks": chunk_ids.len(),
1095                "batch_count": batches.len(),
1096                "batch_size": effective_batch_size,
1097                "query_filter": query,
1098                "batches": batches.iter().enumerate().map(|(i, batch)| {
1099                    serde_json::json!({
1100                        "batch_index": i,
1101                        "chunk_count": batch.len(),
1102                        "chunk_ids": batch
1103                    })
1104                }).collect::<Vec<_>>()
1105            });
1106            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
1107        }
1108    }
1109}
1110
1111// ==================== Search Commands ====================
1112
1113#[allow(clippy::too_many_arguments)]
1114fn cmd_search(
1115    db_path: &std::path::Path,
1116    query: &str,
1117    top_k: usize,
1118    threshold: f32,
1119    mode: &str,
1120    rrf_k: u32,
1121    buffer_filter: Option<&str>,
1122    preview: bool,
1123    preview_len: usize,
1124    format: OutputFormat,
1125) -> Result<String> {
1126    let storage = open_storage(db_path)?;
1127    let embedder = create_embedder()?;
1128
1129    // Determine search mode
1130    let (use_semantic, use_bm25) = match mode.to_lowercase().as_str() {
1131        "semantic" => (true, false),
1132        "bm25" => (false, true),
1133        _ => (true, true), // hybrid is default
1134    };
1135
1136    let config = SearchConfig::new()
1137        .with_top_k(top_k)
1138        .with_threshold(threshold)
1139        .with_rrf_k(rrf_k)
1140        .with_semantic(use_semantic)
1141        .with_bm25(use_bm25);
1142
1143    // If buffer filter is specified, validate it exists
1144    let buffer_id = if let Some(identifier) = buffer_filter {
1145        let buffer = resolve_buffer(&storage, identifier)?;
1146        buffer.id
1147    } else {
1148        None
1149    };
1150
1151    let results = hybrid_search(&storage, embedder.as_ref(), query, &config)?;
1152
1153    // Filter by buffer if specified
1154    let mut results: Vec<SearchResult> = if let Some(bid) = buffer_id {
1155        let buffer_chunks: std::collections::HashSet<i64> = storage
1156            .get_chunks(bid)?
1157            .iter()
1158            .filter_map(|c| c.id)
1159            .collect();
1160        results
1161            .into_iter()
1162            .filter(|r| buffer_chunks.contains(&r.chunk_id))
1163            .collect()
1164    } else {
1165        results
1166    };
1167
1168    // Populate content previews if requested
1169    if preview {
1170        crate::search::populate_previews(&storage, &mut results, preview_len)?;
1171    }
1172
1173    Ok(format_search_results(&results, query, mode, format))
1174}
1175
1176/// Formats a score for display, using scientific notation for very small values.
1177fn format_score(score: f64) -> String {
1178    if score == 0.0 {
1179        "0".to_string()
1180    } else if score.abs() < 0.0001 {
1181        format!("{score:.2e}")
1182    } else {
1183        format!("{score:.4}")
1184    }
1185}
1186
1187fn format_search_results(
1188    results: &[SearchResult],
1189    query: &str,
1190    mode: &str,
1191    format: OutputFormat,
1192) -> String {
1193    match format {
1194        OutputFormat::Text => {
1195            if results.is_empty() {
1196                return format!("No results found for query: \"{query}\"\n");
1197            }
1198
1199            let mut output = String::new();
1200            let _ = writeln!(
1201                output,
1202                "Search results for \"{query}\" ({mode} mode, {} results):\n",
1203                results.len()
1204            );
1205            let _ = writeln!(
1206                output,
1207                "{:<10} {:<12} {:<12} {:<12}",
1208                "Chunk ID", "Score", "Semantic", "BM25"
1209            );
1210            output.push_str(&"-".repeat(50));
1211            output.push('\n');
1212
1213            for result in results {
1214                let semantic = result
1215                    .semantic_score
1216                    .map_or_else(|| "-".to_string(), |s| format_score(f64::from(s)));
1217                let bm25 = result
1218                    .bm25_score
1219                    .map_or_else(|| "-".to_string(), format_score);
1220
1221                let _ = writeln!(
1222                    output,
1223                    "{:<10} {:<12.4} {:<12} {:<12}",
1224                    result.chunk_id, result.score, semantic, bm25
1225                );
1226
1227                // Show content preview if available
1228                if let Some(ref preview) = result.content_preview {
1229                    let _ = writeln!(output, "  Preview: {preview}");
1230                }
1231            }
1232
1233            output.push_str("\nUse 'rlm-cli chunk get <id>' to retrieve chunk content.\n");
1234            output
1235        }
1236        OutputFormat::Json | OutputFormat::Ndjson => {
1237            let json = serde_json::json!({
1238                "query": query,
1239                "mode": mode,
1240                "count": results.len(),
1241                "results": results.iter().map(|r| {
1242                    let mut obj = serde_json::json!({
1243                        "chunk_id": r.chunk_id,
1244                        "buffer_id": r.buffer_id,
1245                        "index": r.index,
1246                        "score": r.score,
1247                        "semantic_score": r.semantic_score,
1248                        "bm25_score": r.bm25_score
1249                    });
1250                    if let Some(ref preview) = r.content_preview {
1251                        obj["content_preview"] = serde_json::json!(preview);
1252                    }
1253                    obj
1254                }).collect::<Vec<_>>()
1255            });
1256            serde_json::to_string_pretty(&json).unwrap_or_default()
1257        }
1258    }
1259}
1260
1261// ==================== Chunk Commands ====================
1262
1263fn cmd_chunk_get(
1264    db_path: &std::path::Path,
1265    chunk_id: i64,
1266    include_metadata: bool,
1267    format: OutputFormat,
1268) -> Result<String> {
1269    let storage = open_storage(db_path)?;
1270
1271    let chunk = storage
1272        .get_chunk(chunk_id)?
1273        .ok_or(StorageError::ChunkNotFound { id: chunk_id })?;
1274
1275    match format {
1276        OutputFormat::Text => {
1277            if include_metadata {
1278                let mut output = String::new();
1279                let _ = writeln!(output, "Chunk ID: {}", chunk.id.unwrap_or(0));
1280                let _ = writeln!(output, "Buffer ID: {}", chunk.buffer_id);
1281                let _ = writeln!(output, "Index: {}", chunk.index);
1282                let _ = writeln!(
1283                    output,
1284                    "Byte range: {}..{}",
1285                    chunk.byte_range.start, chunk.byte_range.end
1286                );
1287                let _ = writeln!(output, "Size: {} bytes", chunk.size());
1288                output.push_str("---\n");
1289                output.push_str(&chunk.content);
1290                if !chunk.content.ends_with('\n') {
1291                    output.push('\n');
1292                }
1293                Ok(output)
1294            } else {
1295                // Plain content output for pass-by-reference use case
1296                Ok(chunk.content)
1297            }
1298        }
1299        OutputFormat::Json | OutputFormat::Ndjson => {
1300            let json = serde_json::json!({
1301                "chunk_id": chunk.id,
1302                "buffer_id": chunk.buffer_id,
1303                "index": chunk.index,
1304                "byte_range": {
1305                    "start": chunk.byte_range.start,
1306                    "end": chunk.byte_range.end
1307                },
1308                "size": chunk.size(),
1309                "content": chunk.content
1310            });
1311            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
1312        }
1313    }
1314}
1315
1316fn cmd_chunk_list(
1317    db_path: &std::path::Path,
1318    identifier: &str,
1319    show_preview: bool,
1320    preview_len: usize,
1321    format: OutputFormat,
1322) -> Result<String> {
1323    let storage = open_storage(db_path)?;
1324    let buffer = resolve_buffer(&storage, identifier)?;
1325    let buffer_id = buffer.id.unwrap_or(0);
1326
1327    let chunks = storage.get_chunks(buffer_id)?;
1328
1329    match format {
1330        OutputFormat::Text => {
1331            if chunks.is_empty() {
1332                return Ok(format!(
1333                    "No chunks found for buffer: {}\n",
1334                    buffer.name.as_deref().unwrap_or(&buffer_id.to_string())
1335                ));
1336            }
1337
1338            let mut output = String::new();
1339            let _ = writeln!(
1340                output,
1341                "Chunks for buffer '{}' ({} chunks):\n",
1342                buffer.name.as_deref().unwrap_or(&buffer_id.to_string()),
1343                chunks.len()
1344            );
1345
1346            if show_preview {
1347                let _ = writeln!(
1348                    output,
1349                    "{:<8} {:<6} {:<12} {:<12} Preview",
1350                    "ID", "Index", "Start", "Size"
1351                );
1352                output.push_str(&"-".repeat(70));
1353                output.push('\n');
1354
1355                for chunk in &chunks {
1356                    let preview: String = chunk
1357                        .content
1358                        .chars()
1359                        .take(preview_len)
1360                        .map(|c| if c == '\n' { ' ' } else { c })
1361                        .collect();
1362                    let preview = if chunk.content.len() > preview_len {
1363                        format!("{preview}...")
1364                    } else {
1365                        preview
1366                    };
1367
1368                    let _ = writeln!(
1369                        output,
1370                        "{:<8} {:<6} {:<12} {:<12} {}",
1371                        chunk.id.unwrap_or(0),
1372                        chunk.index,
1373                        chunk.byte_range.start,
1374                        chunk.size(),
1375                        preview
1376                    );
1377                }
1378            } else {
1379                let _ = writeln!(
1380                    output,
1381                    "{:<8} {:<6} {:<12} {:<12}",
1382                    "ID", "Index", "Start", "Size"
1383                );
1384                output.push_str(&"-".repeat(40));
1385                output.push('\n');
1386
1387                for chunk in &chunks {
1388                    let _ = writeln!(
1389                        output,
1390                        "{:<8} {:<6} {:<12} {:<12}",
1391                        chunk.id.unwrap_or(0),
1392                        chunk.index,
1393                        chunk.byte_range.start,
1394                        chunk.size()
1395                    );
1396                }
1397            }
1398
1399            Ok(output)
1400        }
1401        OutputFormat::Json | OutputFormat::Ndjson => {
1402            let json = serde_json::json!({
1403                "buffer_id": buffer_id,
1404                "buffer_name": buffer.name,
1405                "chunk_count": chunks.len(),
1406                "chunks": chunks.iter().map(|c| {
1407                    let mut obj = serde_json::json!({
1408                        "id": c.id,
1409                        "index": c.index,
1410                        "byte_range": {
1411                            "start": c.byte_range.start,
1412                            "end": c.byte_range.end
1413                        },
1414                        "size": c.size()
1415                    });
1416                    if show_preview {
1417                        let preview: String = c.content.chars().take(preview_len).collect();
1418                        obj["preview"] = serde_json::Value::String(preview);
1419                    }
1420                    obj
1421                }).collect::<Vec<_>>()
1422            });
1423            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
1424        }
1425    }
1426}
1427
1428fn cmd_chunk_embed(
1429    db_path: &std::path::Path,
1430    identifier: &str,
1431    force: bool,
1432    format: OutputFormat,
1433) -> Result<String> {
1434    let mut storage = open_storage(db_path)?;
1435    let buffer = resolve_buffer(&storage, identifier)?;
1436    let buffer_id = buffer.id.unwrap_or(0);
1437    let buffer_name = buffer.name.unwrap_or_else(|| buffer_id.to_string());
1438
1439    let embedder = create_embedder()?;
1440
1441    // Use incremental embedding (force_reembed = force flag)
1442    let result = crate::search::embed_buffer_chunks_incremental(
1443        &mut storage,
1444        embedder.as_ref(),
1445        buffer_id,
1446        force,
1447    )?;
1448
1449    // Check for model version mismatch warning
1450    let model_warning = if !force {
1451        if let Some(existing_model) =
1452            crate::search::check_model_mismatch(&storage, buffer_id, &result.model_name)?
1453        {
1454            Some(format!(
1455                "Warning: Some embeddings use model '{existing_model}', current model is '{}'. \
1456                 Use --force to regenerate with the new model.",
1457                result.model_name
1458            ))
1459        } else {
1460            None
1461        }
1462    } else {
1463        None
1464    };
1465
1466    match format {
1467        OutputFormat::Text => {
1468            let mut output = String::new();
1469            if let Some(warning) = &model_warning {
1470                output.push_str(warning);
1471                output.push('\n');
1472            }
1473
1474            if !result.had_changes() {
1475                output.push_str(&format!(
1476                    "Buffer '{buffer_name}' already fully embedded ({} chunks). Use --force to re-embed.\n",
1477                    result.total_chunks
1478                ));
1479            } else {
1480                if result.embedded_count > 0 {
1481                    output.push_str(&format!(
1482                        "Embedded {} new chunks in buffer '{buffer_name}' using model '{}'.\n",
1483                        result.embedded_count, result.model_name
1484                    ));
1485                }
1486                if result.replaced_count > 0 {
1487                    output.push_str(&format!(
1488                        "Re-embedded {} chunks with updated model.\n",
1489                        result.replaced_count
1490                    ));
1491                }
1492                if result.skipped_count > 0 {
1493                    output.push_str(&format!(
1494                        "Skipped {} chunks (already embedded with current model).\n",
1495                        result.skipped_count
1496                    ));
1497                }
1498            }
1499            Ok(output)
1500        }
1501        OutputFormat::Json | OutputFormat::Ndjson => {
1502            let json = serde_json::json!({
1503                "buffer_id": buffer_id,
1504                "buffer_name": buffer_name,
1505                "embedded_count": result.embedded_count,
1506                "replaced_count": result.replaced_count,
1507                "skipped_count": result.skipped_count,
1508                "total_chunks": result.total_chunks,
1509                "model": result.model_name,
1510                "had_changes": result.had_changes(),
1511                "completion_percentage": result.completion_percentage(),
1512                "model_warning": model_warning
1513            });
1514            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
1515        }
1516    }
1517}
1518
1519fn cmd_chunk_status(db_path: &std::path::Path, format: OutputFormat) -> Result<String> {
1520    let storage = open_storage(db_path)?;
1521    let buffers = storage.list_buffers()?;
1522
1523    let mut buffer_stats: Vec<(String, i64, usize, usize)> = Vec::new();
1524
1525    for buffer in &buffers {
1526        let buffer_id = buffer.id.unwrap_or(0);
1527        let buffer_name = buffer.name.clone().unwrap_or_else(|| buffer_id.to_string());
1528        let chunks = storage.get_chunks(buffer_id)?;
1529        let chunk_count = chunks.len();
1530
1531        let mut embedded_count = 0;
1532        for chunk in &chunks {
1533            if let Some(cid) = chunk.id
1534                && storage.has_embedding(cid)?
1535            {
1536                embedded_count += 1;
1537            }
1538        }
1539
1540        buffer_stats.push((buffer_name, buffer_id, chunk_count, embedded_count));
1541    }
1542
1543    let total_chunks: usize = buffer_stats.iter().map(|(_, _, c, _)| c).sum();
1544    let total_embedded: usize = buffer_stats.iter().map(|(_, _, _, e)| e).sum();
1545
1546    match format {
1547        OutputFormat::Text => {
1548            let mut output = String::new();
1549            output.push_str("Embedding Status\n");
1550            output.push_str("================\n\n");
1551            let _ = writeln!(
1552                output,
1553                "Total: {total_embedded}/{total_chunks} chunks embedded\n"
1554            );
1555
1556            if !buffer_stats.is_empty() {
1557                let _ = writeln!(
1558                    output,
1559                    "{:<6} {:<20} {:<10} {:<10} Status",
1560                    "ID", "Name", "Chunks", "Embedded"
1561                );
1562                output.push_str(&"-".repeat(60));
1563                output.push('\n');
1564
1565                for (name, id, chunks, embedded) in &buffer_stats {
1566                    let status = if *embedded == *chunks {
1567                        "✓ complete"
1568                    } else if *embedded > 0 {
1569                        "◐ partial"
1570                    } else {
1571                        "○ none"
1572                    };
1573
1574                    let _ = writeln!(
1575                        output,
1576                        "{:<6} {:<20} {:<10} {:<10} {}",
1577                        id,
1578                        truncate_str(name, 20),
1579                        chunks,
1580                        embedded,
1581                        status
1582                    );
1583                }
1584            }
1585
1586            Ok(output)
1587        }
1588        OutputFormat::Json | OutputFormat::Ndjson => {
1589            let json = serde_json::json!({
1590                "total_chunks": total_chunks,
1591                "total_embedded": total_embedded,
1592                "buffers": buffer_stats.iter().map(|(name, id, chunks, embedded)| {
1593                    serde_json::json!({
1594                        "buffer_id": id,
1595                        "name": name,
1596                        "chunk_count": chunks,
1597                        "embedded_count": embedded,
1598                        "fully_embedded": chunks == embedded
1599                    })
1600                }).collect::<Vec<_>>()
1601            });
1602            Ok(serde_json::to_string_pretty(&json).unwrap_or_default())
1603        }
1604    }
1605}
1606
1607/// Truncates a string to max length with ellipsis.
1608fn truncate_str(s: &str, max_len: usize) -> String {
1609    if s.len() <= max_len {
1610        s.to_string()
1611    } else if max_len <= 3 {
1612        s[..max_len].to_string()
1613    } else {
1614        format!("{}...", &s[..max_len - 3])
1615    }
1616}
1617
1618#[cfg(test)]
1619mod tests {
1620    use super::*;
1621    use tempfile::TempDir;
1622
1623    fn setup() -> (TempDir, std::path::PathBuf) {
1624        let temp_dir = TempDir::new().unwrap();
1625        let db_path = temp_dir.path().join("test.db");
1626        (temp_dir, db_path)
1627    }
1628
1629    #[test]
1630    fn test_cmd_init() {
1631        let (_temp_dir, db_path) = setup();
1632        let result = cmd_init(&db_path, false, OutputFormat::Text);
1633        assert!(result.is_ok());
1634        assert!(db_path.exists());
1635    }
1636
1637    #[test]
1638    fn test_cmd_init_already_exists() {
1639        let (_temp_dir, db_path) = setup();
1640
1641        // First init
1642        cmd_init(&db_path, false, OutputFormat::Text).unwrap();
1643
1644        // Second init should fail without force
1645        let result = cmd_init(&db_path, false, OutputFormat::Text);
1646        assert!(result.is_err());
1647
1648        // With force should succeed
1649        let result = cmd_init(&db_path, true, OutputFormat::Text);
1650        assert!(result.is_ok());
1651    }
1652
1653    #[test]
1654    fn test_cmd_status() {
1655        let (_temp_dir, db_path) = setup();
1656        cmd_init(&db_path, false, OutputFormat::Text).unwrap();
1657
1658        let result = cmd_status(&db_path, OutputFormat::Text);
1659        assert!(result.is_ok());
1660        assert!(result.unwrap().contains("Buffers"));
1661    }
1662
1663    #[test]
1664    fn test_cmd_reset() {
1665        let (_temp_dir, db_path) = setup();
1666        cmd_init(&db_path, false, OutputFormat::Text).unwrap();
1667
1668        // Without --yes should fail
1669        let result = cmd_reset(&db_path, false, OutputFormat::Text);
1670        assert!(result.is_err());
1671
1672        // With --yes should succeed
1673        let result = cmd_reset(&db_path, true, OutputFormat::Text);
1674        assert!(result.is_ok());
1675    }
1676
1677    #[test]
1678    fn test_cmd_add_buffer() {
1679        let (_temp_dir, db_path) = setup();
1680        cmd_init(&db_path, false, OutputFormat::Text).unwrap();
1681
1682        let result = cmd_add_buffer(
1683            &db_path,
1684            "test-buffer",
1685            Some("Hello, world!"),
1686            OutputFormat::Text,
1687        );
1688        assert!(result.is_ok());
1689        assert!(result.unwrap().contains("test-buffer"));
1690    }
1691
1692    #[test]
1693    fn test_cmd_list_buffers() {
1694        let (_temp_dir, db_path) = setup();
1695        cmd_init(&db_path, false, OutputFormat::Text).unwrap();
1696
1697        // Empty list
1698        let result = cmd_list_buffers(&db_path, OutputFormat::Text);
1699        assert!(result.is_ok());
1700        assert!(result.unwrap().contains("No buffers"));
1701
1702        // Add a buffer
1703        cmd_add_buffer(&db_path, "test", Some("content"), OutputFormat::Text).unwrap();
1704
1705        let result = cmd_list_buffers(&db_path, OutputFormat::Text);
1706        assert!(result.is_ok());
1707        assert!(result.unwrap().contains("test"));
1708    }
1709
1710    #[test]
1711    fn test_cmd_variable() {
1712        let (_temp_dir, db_path) = setup();
1713        cmd_init(&db_path, false, OutputFormat::Text).unwrap();
1714
1715        // Set variable
1716        let result = cmd_variable(&db_path, "key", Some("value"), false, OutputFormat::Text);
1717        assert!(result.is_ok());
1718
1719        // Get variable
1720        let result = cmd_variable(&db_path, "key", None, false, OutputFormat::Text);
1721        assert!(result.is_ok());
1722        assert!(result.unwrap().contains("value"));
1723
1724        // Delete variable
1725        let result = cmd_variable(&db_path, "key", None, true, OutputFormat::Text);
1726        assert!(result.is_ok());
1727    }
1728
1729    #[test]
1730    fn test_truncate_str_short() {
1731        // String shorter than max_len should be returned as-is
1732        let result = truncate_str("hello", 10);
1733        assert_eq!(result, "hello");
1734    }
1735
1736    #[test]
1737    fn test_truncate_str_exact() {
1738        // String exactly at max_len should be returned as-is
1739        let result = truncate_str("hello", 5);
1740        assert_eq!(result, "hello");
1741    }
1742
1743    #[test]
1744    fn test_truncate_str_long() {
1745        // String longer than max_len should be truncated with ...
1746        let result = truncate_str("hello world", 8);
1747        assert_eq!(result, "hello...");
1748    }
1749
1750    #[test]
1751    fn test_truncate_str_very_short_max() {
1752        // max_len <= 3 should just truncate without ellipsis
1753        let result = truncate_str("hello", 3);
1754        assert_eq!(result, "hel");
1755    }
1756
1757    #[test]
1758    fn test_truncate_str_edge_case() {
1759        // max_len of 4 should show 1 char + ...
1760        let result = truncate_str("hello", 4);
1761        assert_eq!(result, "h...");
1762    }
1763}