vectorizer_sdk/models/
hybrid_search.rs

1//! Hybrid search models for combining dense and sparse vectors
2
3use std::collections::HashMap;
4use serde::{Deserialize, Serialize};
5
6/// Sparse vector representation
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct SparseVector {
9    /// Non-zero indices
10    pub indices: Vec<usize>,
11    /// Values at corresponding indices
12    pub values: Vec<f32>,
13}
14
15impl SparseVector {
16    /// Create a new sparse vector
17    pub fn new(indices: Vec<usize>, values: Vec<f32>) -> Result<Self, String> {
18        if indices.len() != values.len() {
19            return Err("Indices and values must have the same length".to_string());
20        }
21        if indices.is_empty() {
22            return Err("Sparse vector cannot be empty".to_string());
23        }
24        for &idx in &indices {
25            if idx == usize::MAX {
26                return Err("Indices must be valid".to_string());
27            }
28        }
29        for &val in &values {
30            if val.is_nan() || val.is_infinite() {
31                return Err("Values must be finite numbers".to_string());
32            }
33        }
34        Ok(Self { indices, values })
35    }
36}
37
38/// Hybrid search request
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct HybridSearchRequest {
41    /// Collection name
42    pub collection: String,
43    /// Text query for dense vector search
44    pub query: String,
45    /// Optional sparse vector query
46    pub query_sparse: Option<SparseVector>,
47    /// Alpha parameter for blending (0.0-1.0)
48    #[serde(default = "default_alpha")]
49    pub alpha: f32,
50    /// Scoring algorithm
51    #[serde(default = "default_algorithm")]
52    pub algorithm: HybridScoringAlgorithm,
53    /// Number of dense results to retrieve
54    #[serde(default = "default_dense_k")]
55    pub dense_k: usize,
56    /// Number of sparse results to retrieve
57    #[serde(default = "default_sparse_k")]
58    pub sparse_k: usize,
59    /// Final number of results to return
60    #[serde(default = "default_final_k")]
61    pub final_k: usize,
62}
63
64fn default_alpha() -> f32 {
65    0.7
66}
67
68fn default_algorithm() -> HybridScoringAlgorithm {
69    HybridScoringAlgorithm::ReciprocalRankFusion
70}
71
72fn default_dense_k() -> usize {
73    20
74}
75
76fn default_sparse_k() -> usize {
77    20
78}
79
80fn default_final_k() -> usize {
81    10
82}
83
84/// Hybrid scoring algorithm
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
86#[serde(rename_all = "lowercase")]
87pub enum HybridScoringAlgorithm {
88    /// Reciprocal Rank Fusion
89    #[serde(rename = "rrf")]
90    ReciprocalRankFusion,
91    /// Weighted Combination
92    #[serde(rename = "weighted")]
93    WeightedCombination,
94    /// Alpha Blending
95    #[serde(rename = "alpha")]
96    AlphaBlending,
97}
98
99/// Hybrid search result
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct HybridSearchResult {
102    /// Result ID
103    pub id: String,
104    /// Similarity score
105    pub score: f32,
106    /// Optional vector data
107    pub vector: Option<Vec<f32>>,
108    /// Optional payload data
109    pub payload: Option<HashMap<String, serde_json::Value>>,
110}
111
112/// Hybrid search response
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct HybridSearchResponse {
115    /// Search results
116    pub results: Vec<HybridSearchResult>,
117    /// Query text
118    pub query: String,
119    /// Optional sparse query
120    pub query_sparse: Option<SparseVectorResponse>,
121    /// Alpha parameter used
122    pub alpha: f32,
123    /// Algorithm used
124    pub algorithm: String,
125    /// Duration in milliseconds
126    pub duration_ms: Option<u64>,
127}
128
129/// Sparse vector response format
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct SparseVectorResponse {
132    /// Indices
133    pub indices: Vec<usize>,
134    /// Values
135    pub values: Vec<f32>,
136}
137