swiftide_core/search_strategies/
hybrid_search.rs

1use derive_builder::Builder;
2
3use crate::{indexing::EmbeddedField, querying};
4
5use super::{DEFAULT_TOP_K, DEFAULT_TOP_N, SearchFilter};
6
7/// A hybrid search strategy that combines a similarity search with a
8/// keyword search / sparse search.
9///
10/// Defaults to a a maximum of 10 documents and `EmbeddedField::Combined` for the field(s).
11#[derive(Debug, Clone, Builder)]
12#[builder(setter(into))]
13pub struct HybridSearch<FILTER: SearchFilter = ()> {
14    /// Maximum number of documents to return
15    #[builder(default)]
16    top_k: u64,
17    /// Maximum number of documents to return per query
18    #[builder(default)]
19    top_n: u64,
20
21    /// The field to use for the dense vector
22    #[builder(default)]
23    dense_vector_field: EmbeddedField,
24
25    /// The field to use for the sparse vector
26    /// TODO: I.e. lancedb does not use sparse embeddings for hybrid search
27    #[builder(default)]
28    sparse_vector_field: EmbeddedField,
29
30    #[builder(default)]
31    filter: Option<FILTER>,
32}
33
34impl<FILTER: SearchFilter> querying::SearchStrategy for HybridSearch<FILTER> {}
35
36impl<FILTER: SearchFilter> Default for HybridSearch<FILTER> {
37    fn default() -> Self {
38        Self {
39            top_k: DEFAULT_TOP_K,
40            top_n: DEFAULT_TOP_N,
41            dense_vector_field: EmbeddedField::Combined,
42            sparse_vector_field: EmbeddedField::Combined,
43            filter: None,
44        }
45    }
46}
47
48impl<FILTER: SearchFilter> HybridSearch<FILTER> {
49    /// Creates a new hybrid search strategy that uses the provided filter
50    pub fn from_filter(filter: FILTER) -> Self {
51        Self {
52            filter: Some(filter),
53            ..Default::default()
54        }
55    }
56
57    pub fn with_filter<NEWFILTER: SearchFilter>(
58        self,
59        filter: NEWFILTER,
60    ) -> HybridSearch<NEWFILTER> {
61        HybridSearch {
62            top_k: self.top_k,
63            top_n: self.top_n,
64            dense_vector_field: self.dense_vector_field,
65            sparse_vector_field: self.sparse_vector_field,
66            filter: Some(filter),
67        }
68    }
69
70    /// Sets the maximum amount of total documents retrieved
71    pub fn with_top_k(&mut self, top_k: u64) -> &mut Self {
72        self.top_k = top_k;
73        self
74    }
75    /// Returns the maximum amount of total documents to be retrieved
76    pub fn top_k(&self) -> u64 {
77        self.top_k
78    }
79    /// Sets the maximum amount of documents to be retrieved
80    /// per individual query
81    pub fn with_top_n(&mut self, top_n: u64) -> &mut Self {
82        self.top_n = top_n;
83        self
84    }
85    /// Returns the maximum amount of documents per query
86    pub fn top_n(&self) -> u64 {
87        self.top_n
88    }
89    /// Sets the vector field for the dense vector
90    ///
91    /// Defaults to `EmbeddedField::Combined`
92    pub fn with_dense_vector_field(
93        &mut self,
94        dense_vector_field: impl Into<EmbeddedField>,
95    ) -> &mut Self {
96        self.dense_vector_field = dense_vector_field.into();
97        self
98    }
99
100    /// Returns the field for the dense vector
101    pub fn dense_vector_field(&self) -> &EmbeddedField {
102        &self.dense_vector_field
103    }
104    /// Sets the vector field for the sparse vector (if applicable)
105    ///
106    /// Defaults to `EmbeddedField::Combined`
107    pub fn with_sparse_vector_field(
108        &mut self,
109        sparse_vector_field: impl Into<EmbeddedField>,
110    ) -> &mut Self {
111        self.sparse_vector_field = sparse_vector_field.into();
112        self
113    }
114
115    /// Returns the field for the dense vector
116    pub fn sparse_vector_field(&self) -> &EmbeddedField {
117        &self.sparse_vector_field
118    }
119
120    pub fn filter(&self) -> Option<&FILTER> {
121        self.filter.as_ref()
122    }
123}