swiftide_core/search_strategies/
hybrid_search.rs

1use derive_builder::Builder;
2
3use crate::{indexing::EmbeddedField, querying};
4
5use super::{DEFAULT_TOP_K, DEFAULT_TOP_N};
6
7/// A hybrid search strategy that combines a similarity search with a
8/// keyword search / sparse search.
9///
10/// Defaults to a a maximum of 10 documents and `EmbeddedField::Combined` for the field(s).
11#[derive(Debug, Clone, Builder)]
12#[builder(setter(into))]
13pub struct HybridSearch {
14    /// Maximum number of documents to return
15    #[builder(default)]
16    top_k: u64,
17    /// Maximum number of documents to return per query
18    #[builder(default)]
19    top_n: u64,
20
21    /// The field to use for the dense vector
22    #[builder(default)]
23    dense_vector_field: EmbeddedField,
24
25    /// The field to use for the sparse vector
26    /// TODO: I.e. lancedb does not use sparse embeddings for hybrid search
27    #[builder(default)]
28    sparse_vector_field: EmbeddedField,
29}
30
31impl querying::SearchStrategy for HybridSearch {}
32
33impl Default for HybridSearch {
34    fn default() -> Self {
35        Self {
36            top_k: DEFAULT_TOP_K,
37            top_n: DEFAULT_TOP_N,
38            dense_vector_field: EmbeddedField::Combined,
39            sparse_vector_field: EmbeddedField::Combined,
40        }
41    }
42}
43
44impl HybridSearch {
45    /// Sets the maximum amount of total documents retrieved
46    pub fn with_top_k(&mut self, top_k: u64) -> &mut Self {
47        self.top_k = top_k;
48        self
49    }
50    /// Returns the maximum amount of total documents to be retrieved
51    pub fn top_k(&self) -> u64 {
52        self.top_k
53    }
54    /// Sets the maximum amount of documents to be retrieved
55    /// per individual query
56    pub fn with_top_n(&mut self, top_n: u64) -> &mut Self {
57        self.top_n = top_n;
58        self
59    }
60    /// Returns the maximum amount of documents per query
61    pub fn top_n(&self) -> u64 {
62        self.top_n
63    }
64    /// Sets the vector field for the dense vector
65    ///
66    /// Defaults to `EmbeddedField::Combined`
67    pub fn with_dense_vector_field(
68        &mut self,
69        dense_vector_field: impl Into<EmbeddedField>,
70    ) -> &mut Self {
71        self.dense_vector_field = dense_vector_field.into();
72        self
73    }
74
75    /// Returns the field for the dense vector
76    pub fn dense_vector_field(&self) -> &EmbeddedField {
77        &self.dense_vector_field
78    }
79    /// Sets the vector field for the sparse vector (if applicable)
80    ///
81    /// Defaults to `EmbeddedField::Combined`
82    pub fn with_sparse_vector_field(
83        &mut self,
84        sparse_vector_field: impl Into<EmbeddedField>,
85    ) -> &mut Self {
86        self.sparse_vector_field = sparse_vector_field.into();
87        self
88    }
89
90    /// Returns the field for the dense vector
91    pub fn sparse_vector_field(&self) -> &EmbeddedField {
92        &self.sparse_vector_field
93    }
94}