vectorizer_sdk/models/
hybrid_search.rs

1//! Hybrid search models for combining dense and sparse vectors
2
3use std::collections::HashMap;
4
5use serde::{Deserialize, Serialize};
6
7/// Sparse vector representation
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct SparseVector {
10    /// Non-zero indices
11    pub indices: Vec<usize>,
12    /// Values at corresponding indices
13    pub values: Vec<f32>,
14}
15
16impl SparseVector {
17    /// Create a new sparse vector
18    pub fn new(indices: Vec<usize>, values: Vec<f32>) -> Result<Self, String> {
19        if indices.len() != values.len() {
20            return Err("Indices and values must have the same length".to_string());
21        }
22        if indices.is_empty() {
23            return Err("Sparse vector cannot be empty".to_string());
24        }
25        for &idx in &indices {
26            if idx == usize::MAX {
27                return Err("Indices must be valid".to_string());
28            }
29        }
30        for &val in &values {
31            if val.is_nan() || val.is_infinite() {
32                return Err("Values must be finite numbers".to_string());
33            }
34        }
35        Ok(Self { indices, values })
36    }
37}
38
39/// Hybrid search request
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct HybridSearchRequest {
42    /// Collection name
43    pub collection: String,
44    /// Text query for dense vector search
45    pub query: String,
46    /// Optional sparse vector query
47    pub query_sparse: Option<SparseVector>,
48    /// Alpha parameter for blending (0.0-1.0)
49    #[serde(default = "default_alpha")]
50    pub alpha: f32,
51    /// Scoring algorithm
52    #[serde(default = "default_algorithm")]
53    pub algorithm: HybridScoringAlgorithm,
54    /// Number of dense results to retrieve
55    #[serde(default = "default_dense_k")]
56    pub dense_k: usize,
57    /// Number of sparse results to retrieve
58    #[serde(default = "default_sparse_k")]
59    pub sparse_k: usize,
60    /// Final number of results to return
61    #[serde(default = "default_final_k")]
62    pub final_k: usize,
63}
64
65fn default_alpha() -> f32 {
66    0.7
67}
68
69fn default_algorithm() -> HybridScoringAlgorithm {
70    HybridScoringAlgorithm::ReciprocalRankFusion
71}
72
73fn default_dense_k() -> usize {
74    20
75}
76
77fn default_sparse_k() -> usize {
78    20
79}
80
81fn default_final_k() -> usize {
82    10
83}
84
85/// Hybrid scoring algorithm
86#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
87#[serde(rename_all = "lowercase")]
88pub enum HybridScoringAlgorithm {
89    /// Reciprocal Rank Fusion
90    #[serde(rename = "rrf")]
91    ReciprocalRankFusion,
92    /// Weighted Combination
93    #[serde(rename = "weighted")]
94    WeightedCombination,
95    /// Alpha Blending
96    #[serde(rename = "alpha")]
97    AlphaBlending,
98}
99
100/// Hybrid search result
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct HybridSearchResult {
103    /// Result ID
104    pub id: String,
105    /// Similarity score
106    pub score: f32,
107    /// Optional vector data
108    pub vector: Option<Vec<f32>>,
109    /// Optional payload data
110    pub payload: Option<HashMap<String, serde_json::Value>>,
111}
112
113/// Hybrid search response
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct HybridSearchResponse {
116    /// Search results
117    pub results: Vec<HybridSearchResult>,
118    /// Query text
119    pub query: String,
120    /// Optional sparse query
121    pub query_sparse: Option<SparseVectorResponse>,
122    /// Alpha parameter used
123    pub alpha: f32,
124    /// Algorithm used
125    pub algorithm: String,
126    /// Duration in milliseconds
127    pub duration_ms: Option<u64>,
128}
129
130/// Sparse vector response format
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct SparseVectorResponse {
133    /// Indices
134    pub indices: Vec<usize>,
135    /// Values
136    pub values: Vec<f32>,
137}