1pub mod args;
2
3use std::path::Path;
4
5use kbolt_core::engine::Engine;
6use kbolt_core::Result;
7use kbolt_types::{
8 ActiveSpaceSource, AddCollectionRequest, AddCollectionResult, AddScheduleRequest,
9 DoctorCheckStatus, DoctorReport, DoctorSetupStatus, EvalImportReport, EvalRunReport,
10 GetRequest, InitialIndexingBlock, InitialIndexingOutcome, KboltError, LocalAction, LocalReport,
11 Locator, ModelInfo, MultiGetRequest, OmitReason, RemoveScheduleRequest, ScheduleAddResponse,
12 ScheduleBackend, ScheduleInterval, ScheduleIntervalUnit, ScheduleRunResult, ScheduleScope,
13 ScheduleState, ScheduleStatusResponse, ScheduleTrigger, ScheduleWeekday, SearchMode,
14 SearchPipeline, SearchPipelineNotice, SearchPipelineStep, SearchPipelineUnavailableReason,
15 SearchRequest, UpdateDecision, UpdateDecisionKind, UpdateOptions, UpdateReport,
16};
17
18pub struct CliAdapter {
19 pub engine: Engine,
20}
21
22pub struct CliSearchOptions<'a> {
23 pub space: Option<&'a str>,
24 pub query: &'a str,
25 pub collections: &'a [String],
26 pub limit: usize,
27 pub min_score: f32,
28 pub deep: bool,
29 pub keyword: bool,
30 pub semantic: bool,
31 pub rerank: bool,
32 pub no_rerank: bool,
33 pub debug: bool,
34}
35
36impl CliAdapter {
37 pub fn new(engine: Engine) -> Self {
38 Self { engine }
39 }
40
41 pub fn space_add(
42 &mut self,
43 name: &str,
44 description: Option<&str>,
45 strict: bool,
46 dirs: &[std::path::PathBuf],
47 ) -> Result<String> {
48 if strict {
49 use std::collections::HashSet;
50
51 let mut validation_errors = Vec::new();
52 let mut derived_names = HashSet::new();
53 for dir in dirs {
54 if !dir.is_absolute() || !dir.is_dir() {
55 validation_errors.push(format!("- {} -> invalid path", dir.display()));
56 continue;
57 }
58
59 let collection_name = dir.file_name().and_then(|item| item.to_str());
60 match collection_name {
61 Some(name) => {
62 if !derived_names.insert(name.to_string()) {
63 validation_errors.push(format!(
64 "- {} -> duplicate derived collection name '{name}'",
65 dir.display()
66 ));
67 }
68 }
69 None => validation_errors.push(format!(
70 "- {} -> cannot derive collection name from path",
71 dir.display()
72 )),
73 }
74 }
75
76 if !validation_errors.is_empty() {
77 let mut lines = Vec::new();
78 lines.push("strict mode aborted: one or more directories are invalid".to_string());
79 lines.extend(validation_errors);
80 return Err(kbolt_types::KboltError::InvalidInput(lines.join("\n")).into());
81 }
82 }
83
84 let added = self.engine.add_space(name, description)?;
85 let description = added.description.unwrap_or_default();
86 let suffix = if description.is_empty() {
87 String::new()
88 } else {
89 format!(" - {description}")
90 };
91
92 if dirs.is_empty() {
93 return Ok(format!("space added: {}{suffix}", added.name));
94 }
95
96 let mut successes = Vec::new();
97 let mut failures = Vec::new();
98 for dir in dirs {
99 let collection_name = dir
100 .file_name()
101 .and_then(|item| item.to_str())
102 .map(ToString::to_string);
103
104 let result = self.engine.add_collection(AddCollectionRequest {
105 path: dir.clone(),
106 space: Some(name.to_string()),
107 name: collection_name,
108 description: None,
109 extensions: None,
110 no_index: true,
111 });
112
113 match result {
114 Ok(info) => successes.push(format!(
115 "- {} -> {}/{}",
116 dir.display(),
117 info.collection.space,
118 info.collection.name
119 )),
120 Err(err) => {
121 if strict {
122 let rollback_result = self.engine.remove_space(name);
123 return match rollback_result {
124 Ok(()) => Err(err),
125 Err(rollback_err) => Err(kbolt_types::KboltError::Internal(format!(
126 "strict mode rollback failed: add error: {err}; rollback error: {rollback_err}"
127 ))
128 .into()),
129 };
130 }
131 failures.push(format!("- {} -> {}", dir.display(), err));
132 }
133 }
134 }
135
136 let mut lines = Vec::new();
137 lines.push(format!("space added: {}{suffix}", added.name));
138 lines.push(format!("collections registered: {}", successes.len()));
139 lines.extend(successes);
140 if !failures.is_empty() {
141 lines.push(format!("collections failed: {}", failures.len()));
142 lines.extend(failures);
143 }
144 lines.push(format!(
145 "note: collections were registered without indexing; run `kbolt --space {} update` to index them",
146 added.name
147 ));
148
149 Ok(lines.join("\n"))
150 }
151
152 pub fn space_describe(&self, name: &str, text: &str) -> Result<String> {
153 self.engine.describe_space(name, text)?;
154 Ok(format!("space description updated: {name}"))
155 }
156
157 pub fn space_rename(&mut self, old: &str, new: &str) -> Result<String> {
158 self.engine.rename_space(old, new)?;
159 Ok(format!("space renamed: {old} -> {new}"))
160 }
161
162 pub fn space_remove(&mut self, name: &str) -> Result<String> {
163 self.engine.remove_space(name)?;
164 if name == "default" {
165 return Ok("default space cleared".to_string());
166 }
167 Ok(format!("space removed: {name}"))
168 }
169
170 pub fn space_default(&mut self, name: Option<&str>) -> Result<String> {
171 if let Some(space_name) = name {
172 let updated = self.engine.set_default_space(Some(space_name))?;
173 let value = updated.unwrap_or_default();
174 return Ok(format!("default space: {value}"));
175 }
176
177 let current = self.engine.config().default_space.as_deref();
178 let output = match current {
179 Some(value) => format!("default space: {value}"),
180 None => "default space: none".to_string(),
181 };
182 Ok(output)
183 }
184
185 pub fn space_current(&self, explicit: Option<&str>) -> Result<String> {
186 let active = self.engine.current_space(explicit)?;
187 let output = match active {
188 Some(active) => {
189 let source = match active.source {
190 ActiveSpaceSource::Flag => "flag",
191 ActiveSpaceSource::EnvVar => "env",
192 ActiveSpaceSource::ConfigDefault => "default",
193 };
194 format!("active space: {} ({source})", active.name)
195 }
196 None => "active space: none".to_string(),
197 };
198 Ok(output)
199 }
200
201 pub fn space_list(&self) -> Result<String> {
202 let spaces = self.engine.list_spaces()?;
203 let mut lines = Vec::with_capacity(spaces.len() + 1);
204 lines.push("spaces:".to_string());
205 for space in spaces {
206 let description = space.description.unwrap_or_default();
207 let suffix = if description.is_empty() {
208 String::new()
209 } else {
210 format!(" - {description}")
211 };
212 lines.push(format!(
213 "- {} (collections: {}, documents: {}, chunks: {}){}",
214 space.name, space.collection_count, space.document_count, space.chunk_count, suffix
215 ));
216 }
217 Ok(lines.join("\n"))
218 }
219
220 pub fn space_info(&self, name: &str) -> Result<String> {
221 let space = self.engine.space_info(name)?;
222 let description = space.description.unwrap_or_default();
223 let description_line = if description.is_empty() {
224 "description:".to_string()
225 } else {
226 format!("description: {description}")
227 };
228
229 Ok(format!(
230 "name: {}\n{description_line}\ncollections: {}\ndocuments: {}\nchunks: {}\ncreated: {}",
231 space.name,
232 space.collection_count,
233 space.document_count,
234 space.chunk_count,
235 space.created
236 ))
237 }
238
239 pub fn collection_list(&self, space: Option<&str>) -> Result<String> {
240 let collections = self.engine.list_collections(space)?;
241 let mut lines = Vec::with_capacity(collections.len() + 1);
242 lines.push("collections:".to_string());
243 if collections.is_empty() {
244 lines.push("- none".to_string());
245 return Ok(lines.join("\n"));
246 }
247
248 for collection in collections {
249 lines.push(format!(
250 "- {}/{} ({})",
251 collection.space,
252 collection.name,
253 collection.path.display()
254 ));
255 }
256 Ok(lines.join("\n"))
257 }
258
259 pub fn collection_add(
260 &self,
261 space: Option<&str>,
262 path: &std::path::Path,
263 name: Option<&str>,
264 description: Option<&str>,
265 extensions: Option<&[String]>,
266 no_index: bool,
267 ) -> Result<String> {
268 let added = self.engine.add_collection(AddCollectionRequest {
269 path: path.to_path_buf(),
270 space: space.map(ToString::to_string),
271 name: name.map(ToString::to_string),
272 description: description.map(ToString::to_string),
273 extensions: extensions.map(|items| items.to_vec()),
274 no_index,
275 })?;
276
277 Ok(format_collection_add_result(&added))
278 }
279
280 pub fn collection_info(&self, space: Option<&str>, name: &str) -> Result<String> {
281 let collection = self.engine.collection_info(space, name)?;
282 let description = collection.description.unwrap_or_default();
283 let extensions = collection
284 .extensions
285 .map(|items| items.join(","))
286 .unwrap_or_default();
287 let description_line = if description.is_empty() {
288 "description:".to_string()
289 } else {
290 format!("description: {description}")
291 };
292 let extensions_line = if extensions.is_empty() {
293 "extensions:".to_string()
294 } else {
295 format!("extensions: {extensions}")
296 };
297
298 Ok(format!(
299 "name: {}\nspace: {}\npath: {}\n{description_line}\n{extensions_line}\ndocuments: {}\nactive_documents: {}\nchunks: {}\nembedded_chunks: {}\ncreated: {}\nupdated: {}",
300 collection.name,
301 collection.space,
302 collection.path.display(),
303 collection.document_count,
304 collection.active_document_count,
305 collection.chunk_count,
306 collection.embedded_chunk_count,
307 collection.created,
308 collection.updated
309 ))
310 }
311
312 pub fn collection_describe(
313 &self,
314 space: Option<&str>,
315 name: &str,
316 text: &str,
317 ) -> Result<String> {
318 self.engine.describe_collection(space, name, text)?;
319 Ok(format!("collection description updated: {name}"))
320 }
321
322 pub fn collection_rename(&self, space: Option<&str>, old: &str, new: &str) -> Result<String> {
323 self.engine.rename_collection(space, old, new)?;
324 Ok(format!("collection renamed: {old} -> {new}"))
325 }
326
327 pub fn collection_remove(&self, space: Option<&str>, name: &str) -> Result<String> {
328 self.engine.remove_collection(space, name)?;
329 Ok(format!("collection removed: {name}"))
330 }
331
332 pub fn ignore_show(&self, space: Option<&str>, collection: &str) -> Result<String> {
333 let (resolved_space, content) = self.engine.read_collection_ignore(space, collection)?;
334 if let Some(content) = content {
335 return Ok(format!(
336 "ignore patterns for {resolved_space}/{collection}:\n{content}"
337 ));
338 }
339 Ok(format!(
340 "no ignore patterns configured for {resolved_space}/{collection}"
341 ))
342 }
343
344 pub fn ignore_add(
345 &self,
346 space: Option<&str>,
347 collection: &str,
348 pattern: &str,
349 ) -> Result<String> {
350 let (resolved_space, normalized_pattern) = self
351 .engine
352 .add_collection_ignore_pattern(space, collection, pattern)?;
353 Ok(format!(
354 "ignore pattern added for {resolved_space}/{collection}: {normalized_pattern}"
355 ))
356 }
357
358 pub fn ignore_remove(
359 &self,
360 space: Option<&str>,
361 collection: &str,
362 pattern: &str,
363 ) -> Result<String> {
364 let (resolved_space, removed_count) = self
365 .engine
366 .remove_collection_ignore_pattern(space, collection, pattern)?;
367 if removed_count == 0 {
368 return Ok(format!(
369 "ignore pattern not found for {resolved_space}/{collection}: {pattern}"
370 ));
371 }
372
373 Ok(format!(
374 "ignore pattern removed for {resolved_space}/{collection}: {pattern} ({removed_count} match(es))"
375 ))
376 }
377
378 pub fn ignore_list(&self, space: Option<&str>) -> Result<String> {
379 let entries = self.engine.list_collection_ignores(space)?;
380 let mut lines = Vec::new();
381 lines.push("ignore patterns:".to_string());
382 if entries.is_empty() {
383 lines.push("- none".to_string());
384 return Ok(lines.join("\n"));
385 }
386
387 let mut current_space: Option<String> = None;
388 for entry in entries {
389 if current_space.as_deref() != Some(entry.space.as_str()) {
390 lines.push(format!("{}:", entry.space));
391 current_space = Some(entry.space.clone());
392 }
393 lines.push(format!(
394 "- {} (patterns: {})",
395 entry.collection, entry.pattern_count
396 ));
397 }
398
399 Ok(lines.join("\n"))
400 }
401
402 pub fn ignore_edit(&self, space: Option<&str>, collection: &str) -> Result<String> {
403 let (resolved_space, path) = self
404 .engine
405 .prepare_collection_ignore_edit(space, collection)?;
406 let editor_command = resolve_editor_command()?;
407
408 let mut process = std::process::Command::new(&editor_command[0]);
409 if editor_command.len() > 1 {
410 process.args(&editor_command[1..]);
411 }
412 process.arg(&path);
413
414 let status = process.status().map_err(|err| {
415 KboltError::Internal(format!(
416 "failed to launch editor '{}': {err}",
417 editor_command[0]
418 ))
419 })?;
420 if !status.success() {
421 return Err(
422 KboltError::Internal(format!("editor exited with status: {status}")).into(),
423 );
424 }
425
426 Ok(format!(
427 "ignore patterns updated for {resolved_space}/{collection}: {}",
428 path.display()
429 ))
430 }
431
432 pub fn models_list(&self) -> Result<String> {
433 let status = self.engine.model_status()?;
434 let mut lines = Vec::new();
435 lines.push("models:".to_string());
436
437 for (label, info) in [
438 ("embedder", &status.embedder),
439 ("reranker", &status.reranker),
440 ("expander", &status.expander),
441 ] {
442 lines.push(format!("- {label}: {}", format_model_binding_summary(info)));
443 }
444
445 Ok(lines.join("\n"))
446 }
447
448 pub fn eval_run(&self, eval_file: Option<&Path>) -> Result<String> {
449 let report = self.engine.run_eval(eval_file)?;
450 Ok(format_eval_run_report(&report))
451 }
452
453 pub fn search(&self, options: CliSearchOptions<'_>) -> Result<String> {
454 let CliSearchOptions {
455 space,
456 query,
457 collections,
458 limit,
459 min_score,
460 deep,
461 keyword,
462 semantic,
463 rerank,
464 no_rerank,
465 debug,
466 } = options;
467 let mode_flags = deep as u8 + keyword as u8 + semantic as u8;
468 if mode_flags > 1 {
469 return Err(KboltError::InvalidInput(
470 "only one of --deep, --keyword, or --semantic can be set".to_string(),
471 )
472 .into());
473 }
474
475 let mode = if deep {
476 SearchMode::Deep
477 } else if keyword {
478 SearchMode::Keyword
479 } else if semantic {
480 SearchMode::Semantic
481 } else {
482 SearchMode::Auto
483 };
484 let effective_no_rerank = resolve_no_rerank_for_mode(mode.clone(), rerank, no_rerank);
485
486 let response = self.engine.search(SearchRequest {
487 query: query.to_string(),
488 mode,
489 space: space.map(ToString::to_string),
490 collections: collections.to_vec(),
491 limit,
492 min_score,
493 no_rerank: effective_no_rerank,
494 debug,
495 })?;
496
497 let mut lines = Vec::new();
498 lines.push(format!("query: {}", response.query));
499 lines.push(format!(
500 "requested_mode: {}",
501 format_search_mode(&response.requested_mode)
502 ));
503 lines.push(format!(
504 "effective_mode: {}",
505 format_search_mode(&response.effective_mode)
506 ));
507 lines.push(format!(
508 "pipeline: {}",
509 format_search_pipeline(&response.pipeline)
510 ));
511 for notice in &response.pipeline.notices {
512 lines.push(format!("note: {}", format_search_pipeline_notice(notice)));
513 }
514 lines.push(format!("results: {}", response.results.len()));
515 for (index, item) in response.results.iter().enumerate() {
516 lines.push(format!(
517 "{}. {} {} score={:.3}",
518 index + 1,
519 item.docid,
520 item.path,
521 item.score
522 ));
523 lines.push(format!("title: {}", item.title));
524 lines.push(format!(
525 "space: {} | collection: {}",
526 item.space, item.collection
527 ));
528 if let Some(heading) = &item.heading {
529 lines.push(format!("heading: {heading}"));
530 }
531 lines.push(format!("text: {}", item.text));
532 if let Some(signals) = &item.signals {
533 lines.push(format!(
534 "signals: bm25={:?} dense={:?} fusion={:.3} reranker={:?}",
535 signals.bm25, signals.dense, signals.fusion, signals.reranker
536 ));
537 }
538 }
539 if let Some(hint) = response.staleness_hint {
540 lines.push(hint);
541 }
542 lines.push(format!("elapsed_ms: {}", response.elapsed_ms));
543 Ok(lines.join("\n"))
544 }
545
546 pub fn update(
547 &self,
548 space: Option<&str>,
549 collections: &[String],
550 no_embed: bool,
551 dry_run: bool,
552 verbose: bool,
553 ) -> Result<String> {
554 let report = self.engine.update(UpdateOptions {
555 space: space.map(ToString::to_string),
556 collections: collections.to_vec(),
557 no_embed,
558 dry_run,
559 verbose,
560 })?;
561
562 Ok(format_update_report(&report, verbose))
563 }
564
565 pub fn schedule_add(&self, req: AddScheduleRequest) -> Result<String> {
566 let response = self.engine.add_schedule(req)?;
567 Ok(format_schedule_add_response(&response))
568 }
569
570 pub fn schedule_status(&self) -> Result<String> {
571 let response = self.engine.schedule_status()?;
572 Ok(format_schedule_status_response(&response))
573 }
574
575 pub fn schedule_remove(&self, req: RemoveScheduleRequest) -> Result<String> {
576 let response = self.engine.remove_schedule(req)?;
577 Ok(format_schedule_remove_response(&response))
578 }
579
580 pub fn status(&self, space: Option<&str>) -> Result<String> {
581 let status = self.engine.status(space)?;
582 let mut lines = Vec::new();
583 lines.push("spaces:".to_string());
584 if status.spaces.is_empty() {
585 lines.push("- none".to_string());
586 } else {
587 for space in status.spaces {
588 let collection_count = space.collections.len();
589 let description = space.description.unwrap_or_default();
590 let description_suffix = if description.is_empty() {
591 String::new()
592 } else {
593 format!(" - {description}")
594 };
595 let last_updated = space
596 .last_updated
597 .as_deref()
598 .map(|value| format!(", last_updated: {value}"))
599 .unwrap_or_default();
600 lines.push(format!(
601 "- {} (collections: {}{}){}",
602 space.name, collection_count, last_updated, description_suffix
603 ));
604
605 for collection in space.collections {
606 lines.push(format!(
607 " - {} ({}) documents: {}, active: {}, chunks: {}, embedded: {}, last_updated: {}",
608 collection.name,
609 collection.path.display(),
610 collection.documents,
611 collection.active_documents,
612 collection.chunks,
613 collection.embedded_chunks,
614 collection.last_updated
615 ));
616 }
617 }
618 }
619
620 lines.push(format!("total_documents: {}", status.total_documents));
621 lines.push(format!("total_chunks: {}", status.total_chunks));
622 lines.push(format!("total_embedded: {}", status.total_embedded));
623 lines.push(format!("sqlite_bytes: {}", status.disk_usage.sqlite_bytes));
624 lines.push(format!(
625 "tantivy_bytes: {}",
626 status.disk_usage.tantivy_bytes
627 ));
628 lines.push(format!(
629 "usearch_bytes: {}",
630 status.disk_usage.usearch_bytes
631 ));
632 lines.push(format!("models_bytes: {}", status.disk_usage.models_bytes));
633 lines.push(format!("total_bytes: {}", status.disk_usage.total_bytes));
634 lines.push(format!(
635 "model_embedder: {}",
636 format_model_binding_summary(&status.models.embedder)
637 ));
638 lines.push(format!(
639 "model_reranker: {}",
640 format_model_binding_summary(&status.models.reranker)
641 ));
642 lines.push(format!(
643 "model_expander: {}",
644 format_model_binding_summary(&status.models.expander)
645 ));
646 lines.push(format!("cache_dir: {}", status.cache_dir.display()));
647 lines.push(format!("config_dir: {}", status.config_dir.display()));
648
649 Ok(lines.join("\n"))
650 }
651
652 pub fn ls(
653 &self,
654 space: Option<&str>,
655 collection: &str,
656 prefix: Option<&str>,
657 all: bool,
658 ) -> Result<String> {
659 let mut files = self.engine.list_files(space, collection, prefix)?;
660 if !all {
661 files.retain(|file| file.active);
662 }
663
664 let mut lines = Vec::new();
665 lines.push("files:".to_string());
666 if files.is_empty() {
667 lines.push("- none".to_string());
668 return Ok(lines.join("\n"));
669 }
670
671 for file in files {
672 if all {
673 lines.push(format!(
674 "- {} | {} | {} | active: {}",
675 file.path, file.title, file.docid, file.active
676 ));
677 } else {
678 lines.push(format!("- {} | {} | {}", file.path, file.title, file.docid));
679 }
680 }
681
682 Ok(lines.join("\n"))
683 }
684
685 pub fn get(
686 &self,
687 space: Option<&str>,
688 identifier: &str,
689 offset: Option<usize>,
690 limit: Option<usize>,
691 ) -> Result<String> {
692 let locator = Locator::parse(identifier);
693
694 let document = self.engine.get_document(GetRequest {
695 locator,
696 space: space.map(ToString::to_string),
697 offset,
698 limit,
699 })?;
700
701 Ok(format!(
702 "docid: {}\npath: {}\ntitle: {}\nspace: {}\ncollection: {}\nstale: {}\ntotal_lines: {}\nreturned_lines: {}\ncontent:\n{}",
703 document.docid,
704 document.path,
705 document.title,
706 document.space,
707 document.collection,
708 document.stale,
709 document.total_lines,
710 document.returned_lines,
711 document.content
712 ))
713 }
714
715 pub fn multi_get(
716 &self,
717 space: Option<&str>,
718 locators: &[String],
719 max_files: usize,
720 max_bytes: usize,
721 ) -> Result<String> {
722 let locators = locators
723 .iter()
724 .map(|item| Locator::parse(item))
725 .collect::<Vec<_>>();
726
727 let response = self.engine.multi_get(MultiGetRequest {
728 locators,
729 space: space.map(ToString::to_string),
730 max_files,
731 max_bytes,
732 })?;
733
734 let mut lines = Vec::new();
735 lines.push(format!("documents: {}", response.documents.len()));
736 for document in response.documents {
737 lines.push(format!(
738 "--- {} {} (stale: {}, lines: {}/{}) ---",
739 document.docid,
740 document.path,
741 document.stale,
742 document.returned_lines,
743 document.total_lines
744 ));
745 lines.push(document.content);
746 }
747
748 lines.push(format!("omitted: {}", response.omitted.len()));
749 for omitted in response.omitted {
750 let reason = match omitted.reason {
751 OmitReason::MaxFiles => "max_files",
752 OmitReason::MaxBytes => "max_bytes",
753 };
754 lines.push(format!(
755 "- {} {} ({} bytes, reason: {reason})",
756 omitted.docid, omitted.path, omitted.size_bytes
757 ));
758 }
759 lines.push(format!("resolved_count: {}", response.resolved_count));
760 if !response.warnings.is_empty() {
761 lines.push(format!("warnings: {}", response.warnings.len()));
762 for warning in response.warnings {
763 lines.push(format!("- {warning}"));
764 }
765 }
766 Ok(lines.join("\n"))
767 }
768}
769
770pub fn format_doctor_report(report: &DoctorReport) -> String {
771 let mut lines = Vec::new();
772 lines.push(format!(
773 "setup: {}",
774 format_doctor_setup_status(report.setup_status)
775 ));
776 lines.push(format!("ready: {}", report.ready));
777 if let Some(path) = report.config_file.as_ref() {
778 lines.push(format!("config_file: {}", path.display()));
779 }
780 if let Some(path) = report.config_dir.as_ref() {
781 lines.push(format!("config_dir: {}", path.display()));
782 }
783 if let Some(path) = report.cache_dir.as_ref() {
784 lines.push(format!("cache_dir: {}", path.display()));
785 }
786 lines.push("checks:".to_string());
787 for check in &report.checks {
788 lines.push(format!(
789 "- [{}] {} {} ({}ms): {}",
790 format_doctor_check_status(check.status),
791 check.scope,
792 check.id,
793 check.elapsed_ms,
794 check.message
795 ));
796 if let Some(fix) = check.fix.as_deref() {
797 lines.push(format!(" fix: {fix}"));
798 }
799 }
800 lines.join("\n")
801}
802
803pub fn format_local_report(report: &LocalReport) -> String {
804 let mut lines = Vec::new();
805 lines.push(format!("action: {}", format_local_action(report.action)));
806 lines.push(format!("ready: {}", report.ready));
807 lines.push(format!("config_file: {}", report.config_file.display()));
808 lines.push(format!("cache_dir: {}", report.cache_dir.display()));
809 if let Some(path) = report.llama_server_path.as_ref() {
810 lines.push(format!("llama_server: {}", path.display()));
811 } else {
812 lines.push("llama_server: missing".to_string());
813 }
814 if !report.notes.is_empty() {
815 lines.push("notes:".to_string());
816 for note in &report.notes {
817 lines.push(format!("- {note}"));
818 }
819 }
820 lines.push("services:".to_string());
821 for service in &report.services {
822 lines.push(format!(
823 "- {}: {} | configured={} | enabled={} | managed={} | running={} | ready={} | model={} | endpoint={} | model_path={} | pid={} | pid_file={} | log_file={}",
824 service.name,
825 service.provider,
826 service.configured,
827 service.enabled,
828 service.managed,
829 service.running,
830 service.ready,
831 service.model,
832 service.endpoint,
833 service.model_path.display(),
834 service
835 .pid
836 .map(|value| value.to_string())
837 .unwrap_or_else(|| "none".to_string()),
838 service.pid_file.display(),
839 service.log_file.display()
840 ));
841 if let Some(issue) = service.issue.as_deref() {
842 lines.push(format!(" issue: {issue}"));
843 }
844 }
845 lines.join("\n")
846}
847
848fn format_doctor_setup_status(status: DoctorSetupStatus) -> &'static str {
849 match status {
850 DoctorSetupStatus::ConfigMissing => "config_missing",
851 DoctorSetupStatus::ConfigInvalid => "config_invalid",
852 DoctorSetupStatus::NotConfigured => "not_configured",
853 DoctorSetupStatus::Configured => "configured",
854 }
855}
856
857fn format_local_action(action: LocalAction) -> &'static str {
858 match action {
859 LocalAction::Setup => "setup",
860 LocalAction::Start => "start",
861 LocalAction::Stop => "stop",
862 LocalAction::Status => "status",
863 LocalAction::EnableDeep => "enable_deep",
864 }
865}
866
867fn format_doctor_check_status(status: DoctorCheckStatus) -> &'static str {
868 match status {
869 DoctorCheckStatus::Pass => "PASS",
870 DoctorCheckStatus::Warn => "WARN",
871 DoctorCheckStatus::Fail => "FAIL",
872 }
873}
874
875fn format_search_mode(mode: &SearchMode) -> &'static str {
876 match mode {
877 SearchMode::Auto => "auto",
878 SearchMode::Deep => "deep",
879 SearchMode::Keyword => "keyword",
880 SearchMode::Semantic => "semantic",
881 }
882}
883
884fn format_model_binding_summary(info: &ModelInfo) -> String {
885 let mut parts = vec![if !info.configured {
886 "unconfigured".to_string()
887 } else if info.ready {
888 "ready".to_string()
889 } else {
890 "not_ready".to_string()
891 }];
892
893 if let Some(profile) = info.profile.as_deref() {
894 parts.push(format!("profile={profile}"));
895 }
896 if let Some(kind) = info.kind.as_deref() {
897 parts.push(format!("kind={kind}"));
898 }
899 if let Some(operation) = info.operation.as_deref() {
900 parts.push(format!("operation={operation}"));
901 }
902 if let Some(model) = info.model.as_deref() {
903 parts.push(format!("model={model}"));
904 }
905 if let Some(endpoint) = info.endpoint.as_deref() {
906 parts.push(format!("endpoint={endpoint}"));
907 }
908 if let Some(issue) = info.issue.as_deref() {
909 parts.push(format!("issue={issue}"));
910 }
911
912 parts.join(" | ")
913}
914
915fn format_search_pipeline(pipeline: &SearchPipeline) -> String {
916 let mut parts = Vec::new();
917 if pipeline.expansion {
918 parts.push("expansion");
919 }
920 if pipeline.keyword {
921 parts.push("keyword");
922 }
923 if pipeline.dense {
924 parts.push("dense");
925 }
926 if pipeline.rerank {
927 parts.push("rerank");
928 }
929
930 if parts.is_empty() {
931 "none".to_string()
932 } else {
933 parts.join(" + ")
934 }
935}
936
937fn format_search_pipeline_notice(notice: &SearchPipelineNotice) -> String {
938 let step = match notice.step {
939 SearchPipelineStep::Dense => "dense retrieval",
940 SearchPipelineStep::Rerank => "rerank",
941 };
942 let reason = match notice.reason {
943 SearchPipelineUnavailableReason::NotConfigured => "not configured",
944 SearchPipelineUnavailableReason::ModelNotAvailable => "required provider is not ready",
945 };
946 format!("{step} unavailable: {reason}")
947}
948
949fn format_update_report(report: &UpdateReport, verbose: bool) -> String {
950 let mut lines = Vec::new();
951 if verbose {
952 for decision in &report.decisions {
953 lines.push(format_update_decision(decision));
954 }
955
956 for error in unreported_update_errors(report) {
957 lines.push(format!("error: {}: {}", error.path, error.error));
958 }
959 }
960
961 lines.push(format!("scanned_docs: {}", report.scanned_docs));
962 lines.push(format!("skipped_mtime_docs: {}", report.skipped_mtime_docs));
963 lines.push(format!("skipped_hash_docs: {}", report.skipped_hash_docs));
964 lines.push(format!("added_docs: {}", report.added_docs));
965 lines.push(format!("updated_docs: {}", report.updated_docs));
966 lines.push(format!("failed_docs: {}", report.failed_docs));
967 lines.push(format!("deactivated_docs: {}", report.deactivated_docs));
968 lines.push(format!("reactivated_docs: {}", report.reactivated_docs));
969 lines.push(format!("reaped_docs: {}", report.reaped_docs));
970 lines.push(format!("embedded_chunks: {}", report.embedded_chunks));
971 lines.push(format!("errors: {}", report.errors.len()));
972 lines.push(format!("elapsed_ms: {}", report.elapsed_ms));
973 lines.join("\n")
974}
975
976fn format_collection_add_result(result: &AddCollectionResult) -> String {
977 let collection = &result.collection;
978 let locator = format!("{}/{}", collection.space, collection.name);
979
980 match &result.initial_indexing {
981 InitialIndexingOutcome::Skipped => {
982 format!("collection added without indexing: {locator}")
983 }
984 InitialIndexingOutcome::Indexed(report) => {
985 format_collection_add_indexing_report(collection, &locator, report)
986 }
987 InitialIndexingOutcome::Blocked(block) => {
988 format_collection_add_block(collection, &locator, block)
989 }
990 }
991}
992
993fn format_collection_add_indexing_report(
994 collection: &kbolt_types::CollectionInfo,
995 locator: &str,
996 report: &UpdateReport,
997) -> String {
998 let mut lines = Vec::new();
999 if report.failed_docs == 0 {
1000 lines.push(format!("collection added and indexed: {locator}"));
1001 } else {
1002 lines.push(format!("collection added: {locator}"));
1003 lines.push("initial indexing incomplete".to_string());
1004 }
1005
1006 lines.push(format!("scanned_docs: {}", report.scanned_docs));
1007 lines.push(format!("added_docs: {}", report.added_docs));
1008 lines.push(format!("updated_docs: {}", report.updated_docs));
1009 lines.push(format!("failed_docs: {}", report.failed_docs));
1010
1011 if report.failed_docs > 0 {
1012 lines.push(format!(
1013 "rerun: kbolt --space {} update --collection {}",
1014 collection.space, collection.name
1015 ));
1016 }
1017
1018 lines.join("\n")
1019}
1020
1021fn format_collection_add_block(
1022 collection: &kbolt_types::CollectionInfo,
1023 locator: &str,
1024 block: &InitialIndexingBlock,
1025) -> String {
1026 let mut lines = Vec::new();
1027 lines.push(format!("collection added: {locator}"));
1028
1029 match block {
1030 InitialIndexingBlock::SpaceDenseRepairRequired { space, reason } => {
1031 lines.push(format!(
1032 "initial indexing blocked by space-level dense integrity issue in '{space}'"
1033 ));
1034 lines.push(format!("reason: {reason}"));
1035 lines.push(format!("run: kbolt --space {space} update"));
1036 }
1037 InitialIndexingBlock::ModelNotAvailable { name } => {
1038 lines.push(format!(
1039 "initial indexing blocked: model '{name}' is not available"
1040 ));
1041 lines.push("run: kbolt setup local".to_string());
1042 lines.push("or configure [roles.embedder] in index.toml".to_string());
1043 lines.push(format!(
1044 "then run: kbolt --space {} update --collection {}",
1045 collection.space, collection.name
1046 ));
1047 }
1048 }
1049
1050 lines.join("\n")
1051}
1052
1053fn format_update_decision(decision: &UpdateDecision) -> String {
1054 let locator = format!(
1055 "{}/{}/{}",
1056 decision.space, decision.collection, decision.path
1057 );
1058 match decision.detail.as_deref() {
1059 Some(detail) => format!(
1060 "{locator}: {} ({detail})",
1061 format_update_decision_kind(&decision.kind)
1062 ),
1063 None => format!("{locator}: {}", format_update_decision_kind(&decision.kind)),
1064 }
1065}
1066
1067fn format_update_decision_kind(kind: &UpdateDecisionKind) -> &'static str {
1068 match kind {
1069 UpdateDecisionKind::New => "new",
1070 UpdateDecisionKind::Changed => "changed",
1071 UpdateDecisionKind::SkippedMtime => "skipped_mtime",
1072 UpdateDecisionKind::SkippedHash => "skipped_hash",
1073 UpdateDecisionKind::Ignored => "ignored",
1074 UpdateDecisionKind::Unsupported => "unsupported",
1075 UpdateDecisionKind::ReadFailed => "read_failed",
1076 UpdateDecisionKind::ExtractFailed => "extract_failed",
1077 UpdateDecisionKind::Reactivated => "reactivated",
1078 UpdateDecisionKind::Deactivated => "deactivated",
1079 }
1080}
1081
1082fn unreported_update_errors(report: &UpdateReport) -> Vec<&kbolt_types::FileError> {
1083 report
1084 .errors
1085 .iter()
1086 .filter(|error| {
1087 !report.decisions.iter().any(|decision| {
1088 matches!(
1089 decision.kind,
1090 UpdateDecisionKind::ReadFailed | UpdateDecisionKind::ExtractFailed
1091 ) && std::path::Path::new(&error.path)
1092 .ends_with(std::path::Path::new(&decision.path))
1093 })
1094 })
1095 .collect()
1096}
1097
1098pub fn resolve_no_rerank_for_mode(mode: SearchMode, rerank: bool, no_rerank: bool) -> bool {
1099 match mode {
1100 SearchMode::Auto => !rerank,
1101 SearchMode::Deep => no_rerank,
1102 SearchMode::Keyword | SearchMode::Semantic => true,
1103 }
1104}
1105
1106fn resolve_editor_command() -> Result<Vec<String>> {
1107 let raw = std::env::var("VISUAL")
1108 .ok()
1109 .filter(|value| !value.trim().is_empty())
1110 .or_else(|| {
1111 std::env::var("EDITOR")
1112 .ok()
1113 .filter(|value| !value.trim().is_empty())
1114 })
1115 .unwrap_or_else(|| "vi".to_string());
1116
1117 parse_editor_command(&raw)
1118}
1119
1120fn parse_editor_command(raw: &str) -> Result<Vec<String>> {
1121 let args = shell_words::split(raw).map_err(|err| {
1122 KboltError::InvalidInput(format!("invalid editor command '{raw}': {err}"))
1123 })?;
1124 if args.is_empty() {
1125 return Err(KboltError::InvalidInput("editor command cannot be empty".to_string()).into());
1126 }
1127 Ok(args)
1128}
1129
1130fn format_schedule_add_response(response: &ScheduleAddResponse) -> String {
1131 format!(
1132 "schedule added: {}\ntrigger: {}\nscope: {}\nbackend: {}",
1133 response.schedule.id,
1134 format_schedule_trigger(&response.schedule.trigger),
1135 format_schedule_scope(&response.schedule.scope),
1136 format_schedule_backend(response.backend),
1137 )
1138}
1139
1140fn format_schedule_status_response(response: &ScheduleStatusResponse) -> String {
1141 let mut lines = Vec::new();
1142 lines.push("schedules:".to_string());
1143 if response.schedules.is_empty() {
1144 lines.push("- none".to_string());
1145 } else {
1146 for entry in &response.schedules {
1147 lines.push(format!(
1148 "- {} | {} | {} | {} | {}",
1149 entry.schedule.id,
1150 format_schedule_trigger(&entry.schedule.trigger),
1151 format_schedule_scope(&entry.schedule.scope),
1152 format_schedule_backend(entry.backend),
1153 format_schedule_state(entry.state),
1154 ));
1155 lines.push(format!(
1156 " last_started: {}",
1157 entry.run_state.last_started.as_deref().unwrap_or("never")
1158 ));
1159 lines.push(format!(
1160 " last_finished: {}",
1161 entry.run_state.last_finished.as_deref().unwrap_or("never")
1162 ));
1163 lines.push(format!(
1164 " last_result: {}",
1165 format_schedule_run_result(entry.run_state.last_result)
1166 ));
1167 if let Some(error) = entry.run_state.last_error.as_deref() {
1168 lines.push(format!(" last_error: {error}"));
1169 }
1170 }
1171 }
1172
1173 lines.push("orphans:".to_string());
1174 if response.orphans.is_empty() {
1175 lines.push("- none".to_string());
1176 } else {
1177 for orphan in &response.orphans {
1178 lines.push(format!(
1179 "- {} ({})",
1180 orphan.id,
1181 format_schedule_backend(orphan.backend)
1182 ));
1183 }
1184 }
1185
1186 lines.join("\n")
1187}
1188
1189fn format_schedule_remove_response(response: &kbolt_types::ScheduleRemoveResponse) -> String {
1190 if response.removed_ids.is_empty() {
1191 return "removed schedules: none".to_string();
1192 }
1193
1194 format!("removed schedules: {}", response.removed_ids.join(", "))
1195}
1196
1197fn format_schedule_trigger(trigger: &ScheduleTrigger) -> String {
1198 match trigger {
1199 ScheduleTrigger::Every { interval } => format_schedule_interval(interval),
1200 ScheduleTrigger::Daily { time } => format!("daily at {}", format_schedule_time(time)),
1201 ScheduleTrigger::Weekly { weekdays, time } => format!(
1202 "{} at {}",
1203 format_schedule_weekdays(weekdays),
1204 format_schedule_time(time)
1205 ),
1206 }
1207}
1208
1209fn format_schedule_interval(interval: &ScheduleInterval) -> String {
1210 let suffix = match interval.unit {
1211 ScheduleIntervalUnit::Minutes => "m",
1212 ScheduleIntervalUnit::Hours => "h",
1213 };
1214 format!("every {}{suffix}", interval.value)
1215}
1216
1217fn format_schedule_scope(scope: &ScheduleScope) -> String {
1218 match scope {
1219 ScheduleScope::All => "all spaces".to_string(),
1220 ScheduleScope::Space { space } => format!("space {space}"),
1221 ScheduleScope::Collections { space, collections } => collections
1222 .iter()
1223 .map(|collection| format!("{space}/{collection}"))
1224 .collect::<Vec<_>>()
1225 .join(", "),
1226 }
1227}
1228
1229fn format_schedule_backend(backend: ScheduleBackend) -> &'static str {
1230 match backend {
1231 ScheduleBackend::Launchd => "launchd",
1232 ScheduleBackend::SystemdUser => "systemd-user",
1233 }
1234}
1235
1236fn format_schedule_state(state: ScheduleState) -> &'static str {
1237 match state {
1238 ScheduleState::Installed => "installed",
1239 ScheduleState::Drifted => "drifted",
1240 ScheduleState::TargetMissing => "target_missing",
1241 }
1242}
1243
1244fn format_schedule_run_result(result: Option<ScheduleRunResult>) -> &'static str {
1245 match result {
1246 Some(ScheduleRunResult::Success) => "success",
1247 Some(ScheduleRunResult::SkippedLock) => "skipped_lock",
1248 Some(ScheduleRunResult::Failed) => "failed",
1249 None => "never",
1250 }
1251}
1252
1253fn format_schedule_weekdays(weekdays: &[ScheduleWeekday]) -> String {
1254 weekdays
1255 .iter()
1256 .map(|weekday| match weekday {
1257 ScheduleWeekday::Mon => "mon",
1258 ScheduleWeekday::Tue => "tue",
1259 ScheduleWeekday::Wed => "wed",
1260 ScheduleWeekday::Thu => "thu",
1261 ScheduleWeekday::Fri => "fri",
1262 ScheduleWeekday::Sat => "sat",
1263 ScheduleWeekday::Sun => "sun",
1264 })
1265 .collect::<Vec<_>>()
1266 .join(",")
1267}
1268
1269fn format_schedule_time(time: &str) -> String {
1270 let Some((hour, minute)) = time.split_once(':') else {
1271 return time.to_string();
1272 };
1273 let Ok(mut hour) = hour.parse::<u32>() else {
1274 return time.to_string();
1275 };
1276 let Ok(minute) = minute.parse::<u32>() else {
1277 return time.to_string();
1278 };
1279
1280 let meridiem = if hour >= 12 { "PM" } else { "AM" };
1281 if hour == 0 {
1282 hour = 12;
1283 } else if hour > 12 {
1284 hour -= 12;
1285 }
1286
1287 format!("{hour}:{minute:02} {meridiem}")
1288}
1289
1290fn format_eval_run_report(report: &EvalRunReport) -> String {
1291 let mut lines = vec!["eval:".to_string()];
1292 for mode in &report.modes {
1293 lines.push(format!(
1294 "- {}: ndcg@10 {:.3}, recall@10 {:.3}, mrr@10 {:.3}, p50 {}ms, p95 {}ms",
1295 format_eval_mode_label(&mode.mode, mode.no_rerank),
1296 mode.ndcg_at_10,
1297 mode.recall_at_10,
1298 mode.mrr_at_10,
1299 mode.latency_p50_ms,
1300 mode.latency_p95_ms
1301 ));
1302 }
1303 for failure in &report.failed_modes {
1304 lines.push(format!(
1305 "- {}: failed ({})",
1306 format_eval_mode_label(&failure.mode, failure.no_rerank),
1307 failure.error
1308 ));
1309 }
1310
1311 let findings = report
1312 .modes
1313 .iter()
1314 .flat_map(|mode| {
1315 mode.queries.iter().filter_map(|query| {
1316 let perfect_recall = query.matched_paths.len() == relevant_judgment_count(query);
1317 let perfect_rank = query.first_relevant_rank == Some(1);
1318 if perfect_recall && perfect_rank {
1319 return None;
1320 }
1321
1322 Some(format!(
1323 "- [{}] {} | first relevant: {} | expected: {} | returned: {}",
1324 format_eval_mode_label(&mode.mode, mode.no_rerank),
1325 query.query,
1326 query
1327 .first_relevant_rank
1328 .map(|rank| rank.to_string())
1329 .unwrap_or_else(|| "none".to_string()),
1330 format_eval_judgments(&query.judgments),
1331 if query.returned_paths.is_empty() {
1332 "none".to_string()
1333 } else {
1334 query.returned_paths.join(", ")
1335 }
1336 ))
1337 })
1338 })
1339 .collect::<Vec<_>>();
1340
1341 if findings.is_empty() {
1342 lines.push("queries needing attention: none".to_string());
1343 } else {
1344 lines.push("queries needing attention:".to_string());
1345 lines.extend(findings);
1346 }
1347
1348 lines.join("\n")
1349}
1350
1351pub fn format_eval_import_report(report: &EvalImportReport) -> String {
1352 [
1353 format!("imported benchmark: {}", report.dataset),
1354 format!("source: {}", report.source),
1355 format!("output: {}", report.output_dir),
1356 format!("corpus_dir: {}", report.corpus_dir),
1357 format!("manifest: {}", report.manifest_path),
1358 format!("documents: {}", report.document_count),
1359 format!("queries: {}", report.query_count),
1360 format!("judgments: {}", report.judgment_count),
1361 "next:".to_string(),
1362 format!(
1363 "- create the benchmark space if needed: kbolt space add {}",
1364 report.default_space
1365 ),
1366 format!(
1367 "- register the corpus: kbolt --space {} collection add {} --name {} --no-index",
1368 report.default_space, report.corpus_dir, report.collection
1369 ),
1370 format!(
1371 "- index it: kbolt --space {} update --collection {}",
1372 report.default_space, report.collection
1373 ),
1374 format!("- run eval: kbolt eval run --file {}", report.manifest_path),
1375 ]
1376 .join("\n")
1377}
1378
1379fn relevant_judgment_count(query: &kbolt_types::EvalQueryReport) -> usize {
1380 query
1381 .judgments
1382 .iter()
1383 .filter(|judgment| judgment.relevance > 0)
1384 .count()
1385}
1386
1387fn format_eval_judgments(judgments: &[kbolt_types::EvalJudgment]) -> String {
1388 judgments
1389 .iter()
1390 .map(|judgment| format!("{}(rel={})", judgment.path, judgment.relevance))
1391 .collect::<Vec<_>>()
1392 .join(", ")
1393}
1394
1395fn format_eval_mode_label(mode: &SearchMode, no_rerank: bool) -> &'static str {
1396 match (mode, no_rerank) {
1397 (SearchMode::Keyword, _) => "keyword",
1398 (SearchMode::Auto, true) => "auto",
1399 (SearchMode::Auto, false) => "auto+rerank",
1400 (SearchMode::Semantic, _) => "semantic",
1401 (SearchMode::Deep, true) => "deep-norerank",
1402 (SearchMode::Deep, false) => "deep",
1403 }
1404}
1405
1406#[cfg(test)]
1407mod tests {
1408 use std::ffi::OsString;
1409 use std::sync::{Mutex, OnceLock};
1410 use std::{
1411 fs,
1412 path::{Path, PathBuf},
1413 };
1414
1415 use tempfile::tempdir;
1416
1417 use super::{
1418 format_collection_add_result, format_doctor_report, format_eval_import_report,
1419 format_eval_run_report, format_local_report, format_schedule_add_response,
1420 format_schedule_status_response, parse_editor_command, resolve_editor_command,
1421 resolve_no_rerank_for_mode, CliAdapter, CliSearchOptions,
1422 };
1423 use kbolt_core::engine::Engine;
1424 use kbolt_types::{
1425 AddCollectionRequest, AddCollectionResult, CollectionInfo, DoctorCheck, DoctorCheckStatus,
1426 DoctorReport, DoctorSetupStatus, EvalImportReport, EvalJudgment, EvalModeReport,
1427 EvalQueryReport, EvalRunReport, InitialIndexingBlock, InitialIndexingOutcome, LocalAction,
1428 LocalReport, ScheduleAddResponse, ScheduleBackend, ScheduleDefinition, ScheduleInterval,
1429 ScheduleIntervalUnit, ScheduleOrphan, ScheduleRunResult, ScheduleRunState, ScheduleScope,
1430 ScheduleState, ScheduleStatusEntry, ScheduleStatusResponse, ScheduleTrigger,
1431 ScheduleWeekday, SearchMode, UpdateReport,
1432 };
1433
1434 struct EnvRestore {
1435 home: Option<OsString>,
1436 config_home: Option<OsString>,
1437 cache_home: Option<OsString>,
1438 visual: Option<OsString>,
1439 editor: Option<OsString>,
1440 }
1441
1442 impl EnvRestore {
1443 fn capture() -> Self {
1444 Self {
1445 home: std::env::var_os("HOME"),
1446 config_home: std::env::var_os("XDG_CONFIG_HOME"),
1447 cache_home: std::env::var_os("XDG_CACHE_HOME"),
1448 visual: std::env::var_os("VISUAL"),
1449 editor: std::env::var_os("EDITOR"),
1450 }
1451 }
1452 }
1453
1454 impl Drop for EnvRestore {
1455 fn drop(&mut self) {
1456 match &self.home {
1457 Some(path) => std::env::set_var("HOME", path),
1458 None => std::env::remove_var("HOME"),
1459 }
1460 match &self.config_home {
1461 Some(path) => std::env::set_var("XDG_CONFIG_HOME", path),
1462 None => std::env::remove_var("XDG_CONFIG_HOME"),
1463 }
1464 match &self.cache_home {
1465 Some(path) => std::env::set_var("XDG_CACHE_HOME", path),
1466 None => std::env::remove_var("XDG_CACHE_HOME"),
1467 }
1468 match &self.visual {
1469 Some(value) => std::env::set_var("VISUAL", value),
1470 None => std::env::remove_var("VISUAL"),
1471 }
1472 match &self.editor {
1473 Some(value) => std::env::set_var("EDITOR", value),
1474 None => std::env::remove_var("EDITOR"),
1475 }
1476 }
1477 }
1478
1479 fn with_isolated_xdg_dirs<T>(run: impl FnOnce() -> T) -> T {
1480 static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
1481 let lock = ENV_LOCK.get_or_init(|| Mutex::new(()));
1482 let _guard = lock.lock().expect("lock env mutex");
1483 let _restore = EnvRestore::capture();
1484
1485 let root = tempdir().expect("create temp root");
1486 std::env::set_var("HOME", root.path());
1487 std::env::set_var("XDG_CONFIG_HOME", root.path().join("config-home"));
1488 std::env::set_var("XDG_CACHE_HOME", root.path().join("cache-home"));
1489
1490 run()
1491 }
1492
1493 fn new_collection_dir(root: &Path, name: &str) -> PathBuf {
1494 let path = root.join(name);
1495 fs::create_dir_all(&path).expect("create collection directory");
1496 path
1497 }
1498
1499 #[test]
1500 fn editor_command_resolution_prefers_visual_then_editor_then_vi() {
1501 with_isolated_xdg_dirs(|| {
1502 std::env::set_var("VISUAL", "nvim -f");
1503 std::env::set_var("EDITOR", "vim");
1504 let from_visual = resolve_editor_command().expect("resolve visual");
1505 assert_eq!(from_visual, vec!["nvim".to_string(), "-f".to_string()]);
1506
1507 std::env::remove_var("VISUAL");
1508 let from_editor = resolve_editor_command().expect("resolve editor");
1509 assert_eq!(from_editor, vec!["vim".to_string()]);
1510
1511 std::env::remove_var("EDITOR");
1512 let fallback = resolve_editor_command().expect("resolve fallback");
1513 assert_eq!(fallback, vec!["vi".to_string()]);
1514 });
1515 }
1516
1517 #[test]
1518 fn parse_editor_command_rejects_invalid_shell_words() {
1519 let err = parse_editor_command("'").expect_err("invalid shell words should fail");
1520 assert!(
1521 err.to_string().contains("invalid editor command"),
1522 "unexpected error: {err}"
1523 );
1524 }
1525
1526 #[test]
1527 fn eval_run_report_formats_summary_and_attention_queries() {
1528 let output = format_eval_run_report(&EvalRunReport {
1529 total_cases: 1,
1530 modes: vec![
1531 EvalModeReport {
1532 mode: SearchMode::Keyword,
1533 no_rerank: true,
1534 ndcg_at_10: 1.0,
1535 recall_at_10: 1.0,
1536 mrr_at_10: 1.0,
1537 latency_p50_ms: 2,
1538 latency_p95_ms: 3,
1539 queries: vec![EvalQueryReport {
1540 query: "trait object generic".to_string(),
1541 space: Some("default".to_string()),
1542 collections: vec!["rust".to_string()],
1543 judgments: vec![EvalJudgment {
1544 path: "rust/guides/traits.md".to_string(),
1545 relevance: 1,
1546 }],
1547 returned_paths: vec!["rust/guides/traits.md".to_string()],
1548 matched_paths: vec!["rust/guides/traits.md".to_string()],
1549 first_relevant_rank: Some(1),
1550 elapsed_ms: 2,
1551 }],
1552 },
1553 EvalModeReport {
1554 mode: SearchMode::Deep,
1555 no_rerank: false,
1556 ndcg_at_10: 0.0,
1557 recall_at_10: 0.0,
1558 mrr_at_10: 0.0,
1559 latency_p50_ms: 8,
1560 latency_p95_ms: 12,
1561 queries: vec![EvalQueryReport {
1562 query: "trait object generic".to_string(),
1563 space: Some("default".to_string()),
1564 collections: vec!["rust".to_string()],
1565 judgments: vec![EvalJudgment {
1566 path: "rust/guides/traits.md".to_string(),
1567 relevance: 1,
1568 }],
1569 returned_paths: vec!["rust/overview.md".to_string()],
1570 matched_paths: vec![],
1571 first_relevant_rank: None,
1572 elapsed_ms: 8,
1573 }],
1574 },
1575 ],
1576 failed_modes: vec![kbolt_types::EvalModeFailure {
1577 mode: SearchMode::Semantic,
1578 no_rerank: true,
1579 error: "model not available".to_string(),
1580 }],
1581 });
1582
1583 assert!(output
1584 .contains("- keyword: ndcg@10 1.000, recall@10 1.000, mrr@10 1.000, p50 2ms, p95 3ms"));
1585 assert!(output
1586 .contains("- deep: ndcg@10 0.000, recall@10 0.000, mrr@10 0.000, p50 8ms, p95 12ms"));
1587 assert!(output.contains("- semantic: failed (model not available)"));
1588 assert!(output.contains("queries needing attention:"));
1589 assert!(output.contains("[deep] trait object generic | first relevant: none"));
1590 }
1591
1592 #[test]
1593 fn eval_import_report_formats_next_steps() {
1594 let output = format_eval_import_report(&EvalImportReport {
1595 dataset: "scifact".to_string(),
1596 source: "/tmp/scifact-source".to_string(),
1597 output_dir: "/tmp/scifact-bench".to_string(),
1598 corpus_dir: "/tmp/scifact-bench/corpus".to_string(),
1599 manifest_path: "/tmp/scifact-bench/eval.toml".to_string(),
1600 default_space: "bench".to_string(),
1601 collection: "scifact".to_string(),
1602 document_count: 2,
1603 query_count: 2,
1604 judgment_count: 3,
1605 });
1606
1607 assert!(output.contains("imported benchmark: scifact"));
1608 assert!(output.contains("documents: 2"));
1609 assert!(output.contains("queries: 2"));
1610 assert!(output.contains("judgments: 3"));
1611 assert!(output.contains("kbolt space add bench"));
1612 assert!(output.contains("kbolt eval run --file /tmp/scifact-bench/eval.toml"));
1613 }
1614
1615 #[test]
1616 fn models_list_reports_role_binding_readiness() {
1617 with_isolated_xdg_dirs(|| {
1618 let engine = Engine::new(None).expect("create engine");
1619 let adapter = CliAdapter::new(engine);
1620
1621 let output = adapter.models_list().expect("list models");
1622 assert!(output.contains("models:"), "unexpected output: {output}");
1623 assert!(
1624 output.contains("- embedder: unconfigured"),
1625 "unexpected output: {output}"
1626 );
1627 assert!(
1628 output.contains("- reranker: unconfigured"),
1629 "unexpected output: {output}"
1630 );
1631 assert!(
1632 output.contains("- expander: unconfigured"),
1633 "unexpected output: {output}"
1634 );
1635 });
1636 }
1637
1638 #[test]
1639 fn doctor_report_formats_status_checks_and_fixes() {
1640 let output = format_doctor_report(&DoctorReport {
1641 setup_status: DoctorSetupStatus::Configured,
1642 config_file: Some(PathBuf::from("/tmp/kbolt/index.toml")),
1643 config_dir: Some(PathBuf::from("/tmp/kbolt")),
1644 cache_dir: Some(PathBuf::from("/tmp/cache/kbolt")),
1645 ready: false,
1646 checks: vec![
1647 DoctorCheck {
1648 id: "config.file_parses".to_string(),
1649 scope: "config".to_string(),
1650 status: DoctorCheckStatus::Pass,
1651 elapsed_ms: 2,
1652 message: "ok".to_string(),
1653 fix: None,
1654 },
1655 DoctorCheck {
1656 id: "roles.embedder.reachable".to_string(),
1657 scope: "roles.embedder".to_string(),
1658 status: DoctorCheckStatus::Fail,
1659 elapsed_ms: 17,
1660 message: "llama_cpp_server endpoint is unreachable".to_string(),
1661 fix: Some("Start the embedding server.".to_string()),
1662 },
1663 ],
1664 });
1665
1666 assert!(output.contains("setup: configured"));
1667 assert!(output.contains("ready: false"));
1668 assert!(output.contains("config_file: /tmp/kbolt/index.toml"));
1669 assert!(
1670 output.contains("- [PASS] config config.file_parses (2ms): ok"),
1671 "unexpected output: {output}"
1672 );
1673 assert!(
1674 output.contains(
1675 "- [FAIL] roles.embedder roles.embedder.reachable (17ms): llama_cpp_server endpoint is unreachable"
1676 ),
1677 "unexpected output: {output}"
1678 );
1679 assert!(output.contains(" fix: Start the embedding server."));
1680 }
1681
1682 #[test]
1683 fn local_report_formats_service_state_and_notes() {
1684 let output = format_local_report(&LocalReport {
1685 action: LocalAction::Setup,
1686 config_file: PathBuf::from("/tmp/kbolt/index.toml"),
1687 cache_dir: PathBuf::from("/tmp/cache/kbolt"),
1688 llama_server_path: Some(PathBuf::from("/opt/homebrew/bin/llama-server")),
1689 ready: false,
1690 notes: vec!["started embedder on http://127.0.0.1:8101".to_string()],
1691 services: vec![kbolt_types::LocalServiceReport {
1692 name: "embedder".to_string(),
1693 provider: "kbolt_local_embed".to_string(),
1694 enabled: true,
1695 configured: true,
1696 managed: true,
1697 running: true,
1698 ready: false,
1699 model: "embeddinggemma".to_string(),
1700 model_path: PathBuf::from("/tmp/cache/kbolt/models/embedder/model.gguf"),
1701 endpoint: "http://127.0.0.1:8101".to_string(),
1702 port: 8101,
1703 pid: Some(42),
1704 pid_file: PathBuf::from("/tmp/cache/kbolt/run/embedder.pid"),
1705 log_file: PathBuf::from("/tmp/cache/kbolt/logs/embedder.log"),
1706 issue: Some("service is not ready".to_string()),
1707 }],
1708 });
1709
1710 assert!(output.contains("action: setup"));
1711 assert!(output.contains("llama_server: /opt/homebrew/bin/llama-server"));
1712 assert!(output.contains("- started embedder on http://127.0.0.1:8101"));
1713 assert!(output.contains("configured=true"));
1714 assert!(output.contains("issue: service is not ready"));
1715 }
1716
1717 #[test]
1718 fn search_rejects_conflicting_mode_flags() {
1719 with_isolated_xdg_dirs(|| {
1720 let adapter = CliAdapter::new(Engine::new(None).expect("create engine"));
1721
1722 let err = adapter
1723 .search(CliSearchOptions {
1724 space: None,
1725 query: "alpha",
1726 collections: &[],
1727 limit: 10,
1728 min_score: 0.0,
1729 deep: true,
1730 keyword: true,
1731 semantic: false,
1732 rerank: false,
1733 no_rerank: false,
1734 debug: false,
1735 })
1736 .expect_err("conflicting search flags should fail");
1737 assert!(
1738 err.to_string()
1739 .contains("only one of --deep, --keyword, or --semantic"),
1740 "unexpected error: {err}"
1741 );
1742 });
1743 }
1744
1745 #[test]
1746 fn resolve_no_rerank_for_mode_matches_cli_contract() {
1747 assert!(resolve_no_rerank_for_mode(SearchMode::Auto, false, false));
1748 assert!(!resolve_no_rerank_for_mode(SearchMode::Auto, true, false));
1749 assert!(!resolve_no_rerank_for_mode(SearchMode::Deep, false, false));
1750 assert!(resolve_no_rerank_for_mode(SearchMode::Deep, false, true));
1751 assert!(resolve_no_rerank_for_mode(SearchMode::Keyword, true, false));
1752 assert!(resolve_no_rerank_for_mode(
1753 SearchMode::Semantic,
1754 true,
1755 false
1756 ));
1757 }
1758
1759 #[test]
1760 fn search_reports_requested_and_effective_mode_for_auto_keyword_fallback() {
1761 with_isolated_xdg_dirs(|| {
1762 let root = tempdir().expect("create collection root");
1763 let engine = Engine::new(None).expect("create engine");
1764 engine.add_space("work", None).expect("add work");
1765
1766 let work_path = new_collection_dir(root.path(), "work-api");
1767 engine
1768 .add_collection(AddCollectionRequest {
1769 path: work_path.clone(),
1770 space: Some("work".to_string()),
1771 name: Some("api".to_string()),
1772 description: None,
1773 extensions: None,
1774 no_index: true,
1775 })
1776 .expect("add collection");
1777 fs::write(work_path.join("a.md"), "fallback token\n").expect("write file");
1778
1779 let adapter = CliAdapter::new(engine);
1780 adapter
1781 .update(Some("work"), &["api".to_string()], true, false, false)
1782 .expect("run update");
1783
1784 let output = adapter
1785 .search(CliSearchOptions {
1786 space: Some("work"),
1787 query: "fallback",
1788 collections: &["api".to_string()],
1789 limit: 5,
1790 min_score: 0.0,
1791 deep: false,
1792 keyword: false,
1793 semantic: false,
1794 rerank: false,
1795 no_rerank: false,
1796 debug: false,
1797 })
1798 .expect("run auto search");
1799
1800 assert!(
1801 output.contains("requested_mode: auto"),
1802 "unexpected output: {output}"
1803 );
1804 assert!(
1805 output.contains("effective_mode: keyword"),
1806 "unexpected output: {output}"
1807 );
1808 assert!(
1809 output.contains("pipeline: keyword"),
1810 "unexpected output: {output}"
1811 );
1812 assert!(
1813 output.contains("note: dense retrieval unavailable: not configured"),
1814 "unexpected output: {output}"
1815 );
1816 });
1817 }
1818
1819 #[test]
1820 fn update_verbose_reports_buffered_decisions_before_summary() {
1821 with_isolated_xdg_dirs(|| {
1822 let root = tempdir().expect("create collection root");
1823 let engine = Engine::new(None).expect("create engine");
1824 engine.add_space("work", None).expect("add work");
1825
1826 let collection_path = new_collection_dir(root.path(), "work-api");
1827 engine
1828 .add_collection(AddCollectionRequest {
1829 path: collection_path.clone(),
1830 space: Some("work".to_string()),
1831 name: Some("api".to_string()),
1832 description: None,
1833 extensions: Some(vec!["rs".to_string()]),
1834 no_index: true,
1835 })
1836 .expect("add collection");
1837 let adapter = CliAdapter::new(engine);
1838
1839 fs::create_dir_all(collection_path.join("src")).expect("create src dir");
1840 fs::write(collection_path.join("src/lib.rs"), "fn alpha() {}\n")
1841 .expect("write valid file");
1842 fs::write(collection_path.join("src/bad.rs"), [0xff, 0xfe, 0xfd])
1843 .expect("write invalid file");
1844
1845 let output = adapter
1846 .update(Some("work"), &["api".to_string()], true, false, true)
1847 .expect("run verbose update");
1848
1849 let summary_index = output
1850 .lines()
1851 .position(|line| line.starts_with("scanned_docs: "))
1852 .expect("expected summary output");
1853 assert!(summary_index > 0, "unexpected output: {output}");
1854 assert!(
1855 output
1856 .lines()
1857 .next()
1858 .unwrap_or_default()
1859 .starts_with("work/api/"),
1860 "unexpected output: {output}"
1861 );
1862 assert!(
1863 output.contains("work/api/src/lib.rs: new"),
1864 "unexpected output: {output}"
1865 );
1866 assert!(
1867 output.contains("work/api/src/bad.rs: extract_failed (extract failed:"),
1868 "unexpected output: {output}"
1869 );
1870 });
1871 }
1872
1873 #[test]
1874 fn collection_add_result_formats_no_index_message() {
1875 let output = format_collection_add_result(&AddCollectionResult {
1876 collection: CollectionInfo {
1877 name: "api".to_string(),
1878 space: "work".to_string(),
1879 path: PathBuf::from("/tmp/work-api"),
1880 description: None,
1881 extensions: None,
1882 document_count: 0,
1883 active_document_count: 0,
1884 chunk_count: 0,
1885 embedded_chunk_count: 0,
1886 created: "2026-03-31T00:00:00Z".to_string(),
1887 updated: "2026-03-31T00:00:00Z".to_string(),
1888 },
1889 initial_indexing: InitialIndexingOutcome::Skipped,
1890 });
1891
1892 assert_eq!(output, "collection added without indexing: work/api");
1893 }
1894
1895 #[test]
1896 fn collection_add_result_formats_incomplete_initial_indexing() {
1897 let output = format_collection_add_result(&AddCollectionResult {
1898 collection: CollectionInfo {
1899 name: "api".to_string(),
1900 space: "work".to_string(),
1901 path: PathBuf::from("/tmp/work-api"),
1902 description: None,
1903 extensions: None,
1904 document_count: 3,
1905 active_document_count: 3,
1906 chunk_count: 3,
1907 embedded_chunk_count: 2,
1908 created: "2026-03-31T00:00:00Z".to_string(),
1909 updated: "2026-03-31T00:00:00Z".to_string(),
1910 },
1911 initial_indexing: InitialIndexingOutcome::Indexed(UpdateReport {
1912 scanned_docs: 3,
1913 skipped_mtime_docs: 0,
1914 skipped_hash_docs: 0,
1915 added_docs: 2,
1916 updated_docs: 0,
1917 failed_docs: 1,
1918 deactivated_docs: 0,
1919 reactivated_docs: 0,
1920 reaped_docs: 0,
1921 embedded_chunks: 2,
1922 decisions: Vec::new(),
1923 errors: Vec::new(),
1924 elapsed_ms: 5,
1925 }),
1926 });
1927
1928 assert!(output.contains("collection added: work/api"));
1929 assert!(output.contains("initial indexing incomplete"));
1930 assert!(output.contains("scanned_docs: 3"));
1931 assert!(output.contains("added_docs: 2"));
1932 assert!(output.contains("failed_docs: 1"));
1933 assert!(output.contains("rerun: kbolt --space work update --collection api"));
1934 }
1935
1936 #[test]
1937 fn collection_add_result_formats_model_block_with_resume_steps() {
1938 let output = format_collection_add_result(&AddCollectionResult {
1939 collection: CollectionInfo {
1940 name: "api".to_string(),
1941 space: "work".to_string(),
1942 path: PathBuf::from("/tmp/work-api"),
1943 description: None,
1944 extensions: None,
1945 document_count: 0,
1946 active_document_count: 0,
1947 chunk_count: 0,
1948 embedded_chunk_count: 0,
1949 created: "2026-03-31T00:00:00Z".to_string(),
1950 updated: "2026-03-31T00:00:00Z".to_string(),
1951 },
1952 initial_indexing: InitialIndexingOutcome::Blocked(
1953 InitialIndexingBlock::ModelNotAvailable {
1954 name: "embed-model".to_string(),
1955 },
1956 ),
1957 });
1958
1959 assert!(output.contains("collection added: work/api"));
1960 assert!(output.contains("initial indexing blocked: model 'embed-model' is not available"));
1961 assert!(output.contains("run: kbolt setup local"));
1962 assert!(output.contains("configure [roles.embedder] in index.toml"));
1963 assert!(output.contains("then run: kbolt --space work update --collection api"));
1964 }
1965
1966 #[test]
1967 fn space_add_with_directories_reports_registration_without_indexing() {
1968 with_isolated_xdg_dirs(|| {
1969 let root = tempdir().expect("create collection root");
1970 let engine = Engine::new(None).expect("create engine");
1971 let mut adapter = CliAdapter::new(engine);
1972
1973 let work_path = new_collection_dir(root.path(), "work-api");
1974 let notes_path = new_collection_dir(root.path(), "work-notes");
1975
1976 let output = adapter
1977 .space_add("work", Some("work docs"), false, &[work_path, notes_path])
1978 .expect("add space with directories");
1979
1980 assert!(output.contains("space added: work - work docs"));
1981 assert!(output.contains("collections registered: 2"));
1982 assert!(output.contains("run `kbolt --space work update` to index them"));
1983 });
1984 }
1985
1986 #[test]
1987 fn format_schedule_add_response_renders_trigger_scope_and_backend() {
1988 let output = format_schedule_add_response(&ScheduleAddResponse {
1989 schedule: ScheduleDefinition {
1990 id: "s1".to_string(),
1991 trigger: ScheduleTrigger::Every {
1992 interval: ScheduleInterval {
1993 value: 30,
1994 unit: ScheduleIntervalUnit::Minutes,
1995 },
1996 },
1997 scope: ScheduleScope::All,
1998 },
1999 backend: ScheduleBackend::Launchd,
2000 });
2001
2002 assert_eq!(
2003 output,
2004 "schedule added: s1\ntrigger: every 30m\nscope: all spaces\nbackend: launchd"
2005 );
2006 }
2007
2008 #[test]
2009 fn format_schedule_status_response_renders_entries_and_orphans() {
2010 let output = format_schedule_status_response(&ScheduleStatusResponse {
2011 schedules: vec![ScheduleStatusEntry {
2012 schedule: ScheduleDefinition {
2013 id: "s2".to_string(),
2014 trigger: ScheduleTrigger::Weekly {
2015 weekdays: vec![ScheduleWeekday::Mon, ScheduleWeekday::Fri],
2016 time: "15:00".to_string(),
2017 },
2018 scope: ScheduleScope::Collections {
2019 space: "work".to_string(),
2020 collections: vec!["api".to_string(), "docs".to_string()],
2021 },
2022 },
2023 backend: ScheduleBackend::Launchd,
2024 state: ScheduleState::Drifted,
2025 run_state: ScheduleRunState {
2026 last_started: Some("2026-03-07T20:00:00Z".to_string()),
2027 last_finished: Some("2026-03-07T20:00:05Z".to_string()),
2028 last_result: Some(ScheduleRunResult::SkippedLock),
2029 last_error: None,
2030 },
2031 }],
2032 orphans: vec![ScheduleOrphan {
2033 id: "s9".to_string(),
2034 backend: ScheduleBackend::Launchd,
2035 }],
2036 });
2037
2038 assert!(output.contains(
2039 "schedules:\n- s2 | mon,fri at 3:00 PM | work/api, work/docs | launchd | drifted"
2040 ));
2041 assert!(output.contains("last_result: skipped_lock"));
2042 assert!(output.contains("orphans:\n- s9 (launchd)"));
2043 }
2044}