1use crate::cli::{GetArgs, SaveArgs, TagCommands, UpdateArgs};
4use crate::config::{default_actor, resolve_db_path, resolve_session_or_suggest};
5use crate::embeddings::{
6 create_embedding_provider, is_embeddings_enabled, prepare_item_text, BoxedProvider,
7 EmbeddingProvider, Model2VecProvider, SearchMode,
8};
9use crate::error::{Error, Result};
10use crate::storage::{SemanticSearchResult, SqliteStorage};
11use serde::Serialize;
12use std::collections::HashMap;
13use std::path::PathBuf;
14use std::sync::OnceLock;
15use tracing::{debug, info, trace, warn};
16
17static FAST_PROVIDER: OnceLock<Option<Model2VecProvider>> = OnceLock::new();
22
23fn get_fast_provider() -> Option<&'static Model2VecProvider> {
25 FAST_PROVIDER
26 .get_or_init(|| {
27 if !is_embeddings_enabled() {
28 debug!("Fast embedding provider skipped: embeddings disabled");
29 return None;
30 }
31 let provider = Model2VecProvider::try_new();
32 if provider.is_some() {
33 debug!("Fast embedding provider initialized (Model2Vec)");
34 } else {
35 warn!("Fast embedding provider failed to initialize");
36 }
37 provider
38 })
39 .as_ref()
40}
41
42fn store_fast_embedding(
47 storage: &mut SqliteStorage,
48 item_id: &str,
49 key: &str,
50 value: &str,
51 category: Option<&str>,
52) {
53 let Some(provider) = get_fast_provider() else {
55 trace!(key, "Skipping fast embedding: provider unavailable");
56 return;
57 };
58
59 let text = prepare_item_text(key, value, category);
61
62 let embedding = {
65 let rt = match tokio::runtime::Runtime::new() {
66 Ok(rt) => rt,
67 Err(e) => {
68 warn!(key, error = %e, "Failed to create tokio runtime for fast embedding");
69 return;
70 }
71 };
72 match rt.block_on(provider.generate_embedding(&text)) {
73 Ok(emb) => emb,
74 Err(e) => {
75 warn!(key, error = %e, "Fast embedding generation failed");
76 return;
77 }
78 }
79 };
80
81 let chunk_id = format!("fast_{}_{}", item_id, 0);
83 let model = provider.info().model;
84
85 match storage.store_fast_embedding_chunk(&chunk_id, item_id, 0, &text, &embedding, &model) {
87 Ok(_) => debug!(key, dim = embedding.len(), "Fast embedding stored"),
88 Err(e) => warn!(key, error = %e, "Failed to store fast embedding"),
89 }
90}
91
92#[derive(Serialize)]
94struct SaveOutput {
95 key: String,
96 category: String,
97 priority: String,
98 session_id: String,
99}
100
101#[derive(Serialize)]
103struct GetOutput {
104 items: Vec<crate::storage::ContextItem>,
105 count: usize,
106}
107
108#[derive(Serialize)]
110struct SemanticSearchOutput {
111 items: Vec<SemanticSearchItem>,
112 count: usize,
113 query: String,
114 threshold: f32,
115 semantic: bool,
116 #[serde(skip_serializing_if = "Option::is_none")]
117 strategy: Option<String>,
118}
119
120#[derive(Serialize)]
122struct SemanticSearchItem {
123 key: String,
124 value: String,
125 category: String,
126 priority: String,
127 similarity: f32,
128 chunk_text: String,
129}
130
131#[derive(Serialize)]
133struct DeleteOutput {
134 key: String,
135 deleted: bool,
136}
137
138pub fn execute_save(
140 args: &SaveArgs,
141 db_path: Option<&PathBuf>,
142 actor: Option<&str>,
143 session_id: Option<&str>,
144 json: bool,
145) -> Result<()> {
146 let db_path = resolve_db_path(db_path.map(|p| p.as_path()))
147 .ok_or(Error::NotInitialized)?;
148
149 if !db_path.exists() {
150 return Err(Error::NotInitialized);
151 }
152
153 let mut storage = SqliteStorage::open(&db_path)?;
154 let actor = actor.map(ToString::to_string).unwrap_or_else(default_actor);
155
156 let resolved_session_id = resolve_session_or_suggest(session_id, &storage)?;
158 debug!(session = %resolved_session_id, key = %args.key, category = %args.category, "Saving context item");
159
160 let id = format!("item_{}", &uuid::Uuid::new_v4().to_string()[..12]);
162
163 storage.save_context_item(
164 &id,
165 &resolved_session_id,
166 &args.key,
167 &args.value,
168 Some(&args.category),
169 Some(&args.priority),
170 &actor,
171 )?;
172
173 let actual_id = storage
177 .get_item_id_by_key(&resolved_session_id, &args.key)?
178 .unwrap_or(id);
179
180 store_fast_embedding(
183 &mut storage,
184 &actual_id,
185 &args.key,
186 &args.value,
187 Some(&args.category),
188 );
189
190 super::embeddings::spawn_background_embedder();
192
193 if crate::is_silent() {
194 println!("{}", args.key);
195 return Ok(());
196 }
197
198 if json {
199 let output = SaveOutput {
200 key: args.key.clone(),
201 category: args.category.clone(),
202 priority: args.priority.clone(),
203 session_id: resolved_session_id.clone(),
204 };
205 println!("{}", serde_json::to_string(&output)?);
206 } else {
207 println!("Saved: {} [{}]", args.key, args.category);
208 }
209
210 Ok(())
211}
212
213pub fn execute_get(
224 args: &GetArgs,
225 db_path: Option<&PathBuf>,
226 session_id: Option<&str>,
227 json: bool,
228) -> Result<()> {
229 let db_path = resolve_db_path(db_path.map(|p| p.as_path()))
230 .ok_or(Error::NotInitialized)?;
231
232 if !db_path.exists() {
233 return Err(Error::NotInitialized);
234 }
235
236 let use_semantic = args.query.is_some() && is_embeddings_enabled();
238 debug!(
239 query = args.query.as_deref().unwrap_or("(none)"),
240 use_semantic,
241 embeddings_enabled = is_embeddings_enabled(),
242 "Search mode selection"
243 );
244
245 if use_semantic {
246 let rt = tokio::runtime::Runtime::new()
248 .map_err(|e| Error::Other(format!("Failed to create async runtime: {e}")))?;
249
250 return rt.block_on(execute_semantic_search(args, &db_path, session_id, json));
251 }
252
253 let storage = SqliteStorage::open(&db_path)?;
255
256 #[allow(clippy::cast_possible_truncation)]
258 let fetch_limit = ((args.limit + args.offset.unwrap_or(0)) * 2).min(1000) as u32;
259
260 let items = if args.search_all_sessions {
262 storage.get_all_context_items(
264 args.category.as_deref(),
265 args.priority.as_deref(),
266 Some(fetch_limit),
267 )?
268 } else {
269 let resolved_session_id = resolve_session_or_suggest(session_id, &storage)?;
271
272 storage.get_context_items(
273 &resolved_session_id,
274 args.category.as_deref(),
275 args.priority.as_deref(),
276 Some(fetch_limit),
277 )?
278 };
279
280 let items: Vec<_> = if let Some(ref key) = args.key {
282 items.into_iter().filter(|i| i.key == *key).collect()
283 } else if let Some(ref query) = args.query {
284 let q = query.to_lowercase();
286 items
287 .into_iter()
288 .filter(|i| {
289 i.key.to_lowercase().contains(&q) || i.value.to_lowercase().contains(&q)
290 })
291 .collect()
292 } else {
293 items
294 };
295
296 let items: Vec<_> = items
298 .into_iter()
299 .skip(args.offset.unwrap_or(0))
300 .take(args.limit)
301 .collect();
302
303 if crate::is_csv() {
304 println!("key,category,priority,value");
305 for item in &items {
306 let val = crate::csv_escape(&item.value);
307 println!("{},{},{},{}", item.key, item.category, item.priority, val);
308 }
309 } else if json {
310 let output = GetOutput {
311 count: items.len(),
312 items,
313 };
314 println!("{}", serde_json::to_string(&output)?);
315 } else if items.is_empty() {
316 println!("No context items found.");
317 } else {
318 println!("Context items ({} found):", items.len());
319 println!();
320 for item in &items {
321 let priority_icon = match item.priority.as_str() {
322 "high" => "!",
323 "low" => "-",
324 _ => " ",
325 };
326 println!("[{}] {} ({})", priority_icon, item.key, item.category);
327 let display_value = if item.value.len() > 100 {
329 format!("{}...", &item.value[..100])
330 } else {
331 item.value.clone()
332 };
333 println!(" {display_value}");
334 println!();
335 }
336 }
337
338 Ok(())
339}
340
341async fn execute_semantic_search(
354 args: &GetArgs,
355 db_path: &std::path::Path,
356 session_id: Option<&str>,
357 json: bool,
358) -> Result<()> {
359 let query = args.query.as_ref().ok_or_else(|| {
360 Error::InvalidArgument("Query is required for semantic search".to_string())
361 })?;
362
363 let explicit_threshold = args.threshold.map(|t| t as f32);
364 let search_mode = args.search_mode.unwrap_or_default();
365
366 let storage = SqliteStorage::open(db_path)?;
368
369 let session_filter = if args.search_all_sessions {
371 None
372 } else {
373 Some(resolve_session_or_suggest(session_id, &storage)?)
374 };
375
376 let query_text = prepare_item_text("query", query, None);
378 info!(query, ?search_mode, session = session_filter.as_deref().unwrap_or("all"), "Starting semantic search");
379
380 let (query_embedding, provider) = match search_mode {
382 SearchMode::Fast => {
383 debug!("Using fast provider (Model2Vec)");
384 let p = Model2VecProvider::try_new().ok_or_else(|| {
385 Error::Embedding("Model2Vec not available for fast search".to_string())
386 })?;
387 let emb = p.generate_embedding(&query_text).await?;
388 (emb, SmartProvider::Fast(p))
389 }
390 SearchMode::Quality | SearchMode::Tiered => {
391 debug!("Using quality provider (Ollama/HuggingFace)");
392 let p = create_embedding_provider()
393 .await
394 .ok_or_else(|| Error::Embedding("No quality embedding provider available".to_string()))?;
395 let emb = p.generate_embedding(&query_text).await?;
396 (emb, SmartProvider::Quality(p))
397 }
398 };
399
400 let search_fn = match search_mode {
402 SearchMode::Fast => SearchFn::Fast,
403 SearchMode::Quality | SearchMode::Tiered => SearchFn::Quality,
404 };
405
406 debug!("Stage 1: adaptive threshold search");
408 let results = smart_search_adaptive(
409 &storage,
410 &search_fn,
411 &query_embedding,
412 session_filter.as_deref(),
413 args.limit,
414 explicit_threshold,
415 )?;
416
417 if !results.is_empty() {
418 info!(count = results.len(), "Stage 1 matched");
419 return output_semantic_results(&results, query, explicit_threshold.unwrap_or(0.0), json, None);
420 }
421 debug!("Stage 1: no results");
422
423 let sub_queries = decompose_query(query);
425 debug!(sub_query_count = sub_queries.len(), ?sub_queries, "Stage 2: decomposition");
426 if sub_queries.len() > 1 {
427 let results = smart_search_rrf(
428 &provider,
429 &storage,
430 &search_fn,
431 &sub_queries,
432 session_filter.as_deref(),
433 args.limit,
434 )
435 .await?;
436
437 if !results.is_empty() {
438 info!(count = results.len(), "Stage 2 matched (decomposed query)");
439 return output_semantic_results(&results, query, 0.0, json, Some("decomposed query"));
440 }
441 debug!("Stage 2: no results from RRF");
442 }
443
444 if session_filter.is_some() {
446 debug!("Stage 3: expanding scope to all sessions");
447 let results = smart_search_adaptive(
448 &storage,
449 &search_fn,
450 &query_embedding,
451 None,
452 args.limit,
453 explicit_threshold,
454 )?;
455
456 if !results.is_empty() {
457 info!(count = results.len(), "Stage 3 matched (all sessions, adaptive)");
458 return output_semantic_results(
459 &results, query, explicit_threshold.unwrap_or(0.0), json,
460 Some("expanded to all sessions"),
461 );
462 }
463
464 if sub_queries.len() > 1 {
465 debug!("Stage 3b: all sessions + decomposition");
466 let results = smart_search_rrf(
467 &provider,
468 &storage,
469 &search_fn,
470 &sub_queries,
471 None,
472 args.limit,
473 )
474 .await?;
475
476 if !results.is_empty() {
477 info!(count = results.len(), "Stage 3b matched (all sessions + decomposed)");
478 return output_semantic_results(
479 &results, query, 0.0, json,
480 Some("expanded to all sessions + decomposed"),
481 );
482 }
483 }
484 }
485
486 debug!("Stage 4: all stages exhausted, fetching nearest misses");
488 let all_results = match search_fn {
489 SearchFn::Fast => storage.search_fast_tier(&query_embedding, None, 5, 0.0)?,
490 SearchFn::Quality => storage.semantic_search(&query_embedding, None, 5, 0.0)?,
491 };
492
493 if all_results.is_empty() {
494 output_semantic_results(&[], query, 0.0, json, None)
495 } else {
496 output_suggestions(&all_results, query, json)
497 }
498}
499
500enum SearchFn {
502 Fast,
503 Quality,
504}
505
506enum SmartProvider {
508 Fast(Model2VecProvider),
509 Quality(BoxedProvider),
510}
511
512impl SmartProvider {
513 async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>> {
514 match self {
515 SmartProvider::Fast(p) => p.generate_embedding(text).await,
516 SmartProvider::Quality(p) => p.generate_embedding(text).await,
517 }
518 }
519}
520
521fn smart_search_adaptive(
526 storage: &SqliteStorage,
527 search_fn: &SearchFn,
528 query_embedding: &[f32],
529 session_id: Option<&str>,
530 limit: usize,
531 explicit_threshold: Option<f32>,
532) -> Result<Vec<SemanticSearchResult>> {
533 if let Some(t) = explicit_threshold {
534 trace!(threshold = t, "Using explicit threshold");
536 return match search_fn {
537 SearchFn::Fast => storage.search_fast_tier(query_embedding, session_id, limit, t),
538 SearchFn::Quality => storage.semantic_search(query_embedding, session_id, limit, t),
539 };
540 }
541
542 let all = match search_fn {
544 SearchFn::Fast => storage.search_fast_tier(query_embedding, session_id, limit * 3, 0.0)?,
545 SearchFn::Quality => storage.semantic_search(query_embedding, session_id, limit * 3, 0.0)?,
546 };
547
548 if all.is_empty() {
549 trace!("Adaptive search: corpus empty");
550 return Ok(vec![]);
551 }
552
553 let top_score = all[0].similarity;
554 let adaptive_threshold = (top_score * 0.6).max(0.25);
555 let filtered_count = all.iter().filter(|r| r.similarity >= adaptive_threshold).count();
556 debug!(
557 top_score,
558 adaptive_threshold,
559 candidates = all.len(),
560 above_threshold = filtered_count,
561 "Adaptive threshold computed"
562 );
563
564 Ok(all
565 .into_iter()
566 .filter(|r| r.similarity >= adaptive_threshold)
567 .take(limit)
568 .collect())
569}
570
571async fn smart_search_rrf(
576 provider: &SmartProvider,
577 storage: &SqliteStorage,
578 search_fn: &SearchFn,
579 sub_queries: &[String],
580 session_id: Option<&str>,
581 limit: usize,
582) -> Result<Vec<SemanticSearchResult>> {
583 let k = 60.0_f32; let mut all_result_sets = Vec::new();
587 for sq in sub_queries {
588 let text = prepare_item_text("query", sq, None);
589 let emb = provider.generate_embedding(&text).await?;
590 let results = match search_fn {
591 SearchFn::Fast => storage.search_fast_tier(&emb, session_id, 20, 0.2)?,
592 SearchFn::Quality => storage.semantic_search(&emb, session_id, 20, 0.2)?,
593 };
594 trace!(sub_query = sq, hits = results.len(), "Sub-query results");
595 all_result_sets.push(results);
596 }
597
598 let mut scores: HashMap<String, (f32, SemanticSearchResult)> = HashMap::new();
600 for results in &all_result_sets {
601 for (rank, result) in results.iter().enumerate() {
602 let rrf_score = 1.0 / (k + rank as f32 + 1.0);
603 scores
604 .entry(result.item_id.clone())
605 .and_modify(|(score, _)| *score += rrf_score)
606 .or_insert((rrf_score, result.clone()));
607 }
608 }
609
610 let mut fused: Vec<_> = scores.into_values().collect();
611 fused.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
612 debug!(
613 unique_items = fused.len(),
614 top_rrf_score = fused.first().map(|(s, _)| *s).unwrap_or(0.0),
615 "RRF fusion complete"
616 );
617
618 Ok(fused
619 .into_iter()
620 .take(limit)
621 .map(|(rrf_score, mut r)| {
622 r.similarity = rrf_score;
623 r
624 })
625 .collect())
626}
627
628fn decompose_query(query: &str) -> Vec<String> {
633 let words: Vec<&str> = query.split_whitespace().filter(|w| w.len() > 2).collect();
634
635 if words.len() <= 1 {
636 return vec![query.to_string()];
637 }
638
639 let mut sub_queries = Vec::new();
640
641 for word in &words {
643 sub_queries.push((*word).to_string());
644 }
645
646 for window in words.windows(2) {
648 sub_queries.push(format!("{} {}", window[0], window[1]));
649 }
650
651 sub_queries
652}
653
654fn output_semantic_results(
656 results: &[SemanticSearchResult],
657 query: &str,
658 threshold: f32,
659 json: bool,
660 strategy: Option<&str>,
661) -> Result<()> {
662 if json {
663 let items: Vec<SemanticSearchItem> = results
664 .iter()
665 .map(|r| SemanticSearchItem {
666 key: r.key.clone(),
667 value: r.value.clone(),
668 category: r.category.clone(),
669 priority: r.priority.clone(),
670 similarity: r.similarity,
671 chunk_text: r.chunk_text.clone(),
672 })
673 .collect();
674
675 let output = SemanticSearchOutput {
676 count: items.len(),
677 items,
678 query: query.to_string(),
679 threshold,
680 semantic: true,
681 strategy: strategy.map(String::from),
682 };
683 println!("{}", serde_json::to_string(&output)?);
684 } else if results.is_empty() {
685 println!("No matching items found.");
686 println!();
687 println!("Tips:");
688 println!(" - Try a simpler query (single keywords work best)");
689 println!(" - Ensure items have been backfilled: sc embeddings backfill");
690 } else {
691 let strategy_note = strategy
692 .map(|s| format!(", strategy: {s}"))
693 .unwrap_or_default();
694 println!(
695 "Semantic search results ({} found{}):",
696 results.len(),
697 strategy_note
698 );
699 println!();
700 for (i, result) in results.iter().enumerate() {
701 let priority_icon = match result.priority.as_str() {
702 "high" => "!",
703 "low" => "-",
704 _ => " ",
705 };
706 println!(
707 "{}. [{:.0}%] [{}] {} ({})",
708 i + 1,
709 result.similarity * 100.0,
710 priority_icon,
711 result.key,
712 result.category
713 );
714 let display_text = if result.chunk_text.len() > 100 {
715 format!("{}...", &result.chunk_text[..100])
716 } else {
717 result.chunk_text.clone()
718 };
719 println!(" {display_text}");
720 println!();
721 }
722 }
723
724 Ok(())
725}
726
727fn output_suggestions(
729 results: &[SemanticSearchResult],
730 query: &str,
731 json: bool,
732) -> Result<()> {
733 if json {
734 let items: Vec<SemanticSearchItem> = results
735 .iter()
736 .map(|r| SemanticSearchItem {
737 key: r.key.clone(),
738 value: r.value.clone(),
739 category: r.category.clone(),
740 priority: r.priority.clone(),
741 similarity: r.similarity,
742 chunk_text: r.chunk_text.clone(),
743 })
744 .collect();
745
746 let output = SemanticSearchOutput {
747 count: 0,
748 items,
749 query: query.to_string(),
750 threshold: 0.0,
751 semantic: true,
752 strategy: Some("suggestions (nearest misses)".to_string()),
753 };
754 println!("{}", serde_json::to_string(&output)?);
755 } else {
756 println!("No strong matches found. Nearest items in corpus:");
757 println!();
758 for (i, result) in results.iter().enumerate() {
759 println!(
760 " {}. [{:.0}%] {} ({})",
761 i + 1,
762 result.similarity * 100.0,
763 result.key,
764 result.category
765 );
766 }
767 println!();
768 println!("Tips:");
769 println!(" - Try simpler keywords: single terms work best");
770 println!(" - Try --search-all-sessions to search all sessions");
771 }
772
773 Ok(())
774}
775
776pub fn execute_delete(
778 key: &str,
779 db_path: Option<&PathBuf>,
780 actor: Option<&str>,
781 session_id: Option<&str>,
782 json: bool,
783) -> Result<()> {
784 let db_path = resolve_db_path(db_path.map(|p| p.as_path()))
785 .ok_or(Error::NotInitialized)?;
786
787 if !db_path.exists() {
788 return Err(Error::NotInitialized);
789 }
790
791 let mut storage = SqliteStorage::open(&db_path)?;
792 let actor = actor.map(ToString::to_string).unwrap_or_else(default_actor);
793
794 let resolved_session_id = resolve_session_or_suggest(session_id, &storage)?;
796
797 storage.delete_context_item(&resolved_session_id, key, &actor)?;
798
799 if json {
800 let output = DeleteOutput {
801 key: key.to_string(),
802 deleted: true,
803 };
804 println!("{}", serde_json::to_string(&output)?);
805 } else {
806 println!("Deleted: {key}");
807 }
808
809 Ok(())
810}
811
812#[derive(Serialize)]
814struct UpdateOutput {
815 key: String,
816 updated: bool,
817}
818
819pub fn execute_update(
821 args: &UpdateArgs,
822 db_path: Option<&PathBuf>,
823 actor: Option<&str>,
824 session_id: Option<&str>,
825 json: bool,
826) -> Result<()> {
827 let db_path = resolve_db_path(db_path.map(|p| p.as_path()))
828 .ok_or(Error::NotInitialized)?;
829
830 if !db_path.exists() {
831 return Err(Error::NotInitialized);
832 }
833
834 if args.value.is_none()
836 && args.category.is_none()
837 && args.priority.is_none()
838 && args.channel.is_none()
839 {
840 return Err(Error::Config(
841 "At least one of --value, --category, --priority, or --channel must be provided"
842 .to_string(),
843 ));
844 }
845
846 let mut storage = SqliteStorage::open(&db_path)?;
847 let actor = actor.map(ToString::to_string).unwrap_or_else(default_actor);
848
849 let resolved_session_id = resolve_session_or_suggest(session_id, &storage)?;
851
852 storage.update_context_item(
853 &resolved_session_id,
854 &args.key,
855 args.value.as_deref(),
856 args.category.as_deref(),
857 args.priority.as_deref(),
858 args.channel.as_deref(),
859 &actor,
860 )?;
861
862 if json {
863 let output = UpdateOutput {
864 key: args.key.clone(),
865 updated: true,
866 };
867 println!("{}", serde_json::to_string(&output)?);
868 } else {
869 println!("Updated: {}", args.key);
870 }
871
872 Ok(())
873}
874
875#[derive(Serialize)]
877struct TagOutput {
878 key: String,
879 action: String,
880 tags: Vec<String>,
881}
882
883pub fn execute_tag(
885 command: &TagCommands,
886 db_path: Option<&PathBuf>,
887 actor: Option<&str>,
888 session_id: Option<&str>,
889 json: bool,
890) -> Result<()> {
891 let db_path = resolve_db_path(db_path.map(|p| p.as_path()))
892 .ok_or(Error::NotInitialized)?;
893
894 if !db_path.exists() {
895 return Err(Error::NotInitialized);
896 }
897
898 let mut storage = SqliteStorage::open(&db_path)?;
899 let actor = actor.map(ToString::to_string).unwrap_or_else(default_actor);
900
901 let resolved_session_id = resolve_session_or_suggest(session_id, &storage)?;
903
904 match command {
905 TagCommands::Add { key, tags } => {
906 storage.add_tags_to_item(&resolved_session_id, key, tags, &actor)?;
907
908 if json {
909 let output = TagOutput {
910 key: key.clone(),
911 action: "add".to_string(),
912 tags: tags.clone(),
913 };
914 println!("{}", serde_json::to_string(&output)?);
915 } else {
916 println!("Added tags to {}: {}", key, tags.join(", "));
917 }
918 }
919 TagCommands::Remove { key, tags } => {
920 storage.remove_tags_from_item(&resolved_session_id, key, tags, &actor)?;
921
922 if json {
923 let output = TagOutput {
924 key: key.clone(),
925 action: "remove".to_string(),
926 tags: tags.clone(),
927 };
928 println!("{}", serde_json::to_string(&output)?);
929 } else {
930 println!("Removed tags from {}: {}", key, tags.join(", "));
931 }
932 }
933 }
934
935 Ok(())
936}
937
938#[cfg(test)]
939mod tests {
940 use super::*;
941
942 #[test]
945 fn test_decompose_single_word() {
946 let result = decompose_query("authentication");
948 assert_eq!(result, vec!["authentication"]);
949 }
950
951 #[test]
952 fn test_decompose_short_words_filtered() {
953 let result = decompose_query("is it ok");
955 assert_eq!(result, vec!["is it ok"]);
957 }
958
959 #[test]
960 fn test_decompose_multi_word() {
961 let result = decompose_query("ABG revenue impact metrics");
962 assert!(result.contains(&"ABG".to_string()));
964 assert!(result.contains(&"revenue".to_string()));
965 assert!(result.contains(&"impact".to_string()));
966 assert!(result.contains(&"metrics".to_string()));
967 assert!(result.contains(&"ABG revenue".to_string()));
969 assert!(result.contains(&"revenue impact".to_string()));
970 assert!(result.contains(&"impact metrics".to_string()));
971 assert_eq!(result.len(), 7);
973 }
974
975 #[test]
976 fn test_decompose_filters_short_words_in_multi() {
977 let result = decompose_query("how to fix a bug");
979 assert!(result.contains(&"how".to_string()));
981 assert!(result.contains(&"fix".to_string()));
982 assert!(result.contains(&"bug".to_string()));
983 assert!(!result.iter().any(|s| s == "to"));
985 assert!(!result.iter().any(|s| s == "a"));
986 assert!(result.contains(&"how fix".to_string()));
988 assert!(result.contains(&"fix bug".to_string()));
989 assert_eq!(result.len(), 5);
990 }
991
992 #[test]
993 fn test_decompose_two_words() {
994 let result = decompose_query("retainer pricing");
995 assert_eq!(result, vec!["retainer", "pricing", "retainer pricing"]);
997 }
998
999 #[test]
1000 fn test_decompose_empty() {
1001 let result = decompose_query("");
1002 assert_eq!(result, vec![""]);
1003 }
1004
1005 #[test]
1006 fn test_decompose_whitespace_only() {
1007 let result = decompose_query(" ");
1008 assert_eq!(result, vec![" "]);
1010 }
1011
1012 #[test]
1016 fn test_rrf_scoring_formula() {
1017 let k = 60.0_f32;
1019
1020 let rank_0 = 1.0 / (k + 0.0 + 1.0);
1022 assert!((rank_0 - 0.01639).abs() < 0.001);
1023
1024 let rank_1 = 1.0 / (k + 1.0 + 1.0);
1026 assert!(rank_0 > rank_1);
1027
1028 let rank_19 = 1.0 / (k + 19.0 + 1.0);
1030 assert!((rank_19 - 0.0125).abs() < 0.001);
1031 }
1032
1033 #[test]
1034 fn test_rrf_fusion_logic() {
1035 let k = 60.0_f32;
1037 let mut scores: HashMap<String, f32> = HashMap::new();
1038
1039 for (rank, item) in ["item-A", "item-B", "item-C"].iter().enumerate() {
1041 let rrf_score = 1.0 / (k + rank as f32 + 1.0);
1042 *scores.entry(item.to_string()).or_default() += rrf_score;
1043 }
1044
1045 for (rank, item) in ["item-B", "item-D", "item-A"].iter().enumerate() {
1047 let rrf_score = 1.0 / (k + rank as f32 + 1.0);
1048 *scores.entry(item.to_string()).or_default() += rrf_score;
1049 }
1050
1051 let mut sorted: Vec<_> = scores.into_iter().collect();
1054 sorted.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1055
1056 assert_eq!(sorted[0].0, "item-B");
1057 assert_eq!(sorted[1].0, "item-A");
1058 assert_eq!(sorted[2].0, "item-D");
1059 assert_eq!(sorted[3].0, "item-C");
1060 }
1061
1062 #[test]
1063 fn test_rrf_single_result_set() {
1064 let k = 60.0_f32;
1066 let mut scores: Vec<(String, f32)> = Vec::new();
1067
1068 for (rank, item) in ["a", "b", "c"].iter().enumerate() {
1069 let rrf_score = 1.0 / (k + rank as f32 + 1.0);
1070 scores.push((item.to_string(), rrf_score));
1071 }
1072
1073 assert!(scores[0].1 > scores[1].1);
1074 assert!(scores[1].1 > scores[2].1);
1075 }
1076
1077 #[test]
1080 fn test_adaptive_threshold_formula() {
1081 assert_eq!((0.9_f32 * 0.6).max(0.25), 0.54); assert_eq!((0.5_f32 * 0.6).max(0.25), 0.3); assert_eq!((0.3_f32 * 0.6).max(0.25), 0.25); assert_eq!((0.1_f32 * 0.6).max(0.25), 0.25); }
1087}