swiftide_core/search_strategies/
hybrid_search.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
use derive_builder::Builder;

use crate::{indexing::EmbeddedField, querying};

use super::{DEFAULT_TOP_K, DEFAULT_TOP_N};

/// A hybrid search strategy that combines a similarity search with a
/// keyword search / sparse search.
///
/// Defaults to a a maximum of 10 documents and `EmbeddedField::Combined` for the field(s).
#[derive(Debug, Clone, Builder)]
#[builder(setter(into))]
pub struct HybridSearch {
    /// Maximum number of documents to return
    #[builder(default)]
    top_k: u64,
    /// Maximum number of documents to return per query
    #[builder(default)]
    top_n: u64,

    /// The field to use for the dense vector
    #[builder(default)]
    dense_vector_field: EmbeddedField,

    /// The field to use for the sparse vector
    /// TODO: I.e. lancedb does not use sparse embeddings for hybrid search
    #[builder(default)]
    sparse_vector_field: EmbeddedField,
}

impl querying::SearchStrategy for HybridSearch {}

impl Default for HybridSearch {
    fn default() -> Self {
        Self {
            top_k: DEFAULT_TOP_K,
            top_n: DEFAULT_TOP_N,
            dense_vector_field: EmbeddedField::Combined,
            sparse_vector_field: EmbeddedField::Combined,
        }
    }
}

impl HybridSearch {
    /// Sets the maximum amount of total documents retrieved
    pub fn with_top_k(&mut self, top_k: u64) -> &mut Self {
        self.top_k = top_k;
        self
    }
    /// Returns the maximum amount of total documents to be retrieved
    pub fn top_k(&self) -> u64 {
        self.top_k
    }
    /// Sets the maximum amount of documents to be retrieved
    /// per individual query
    pub fn with_top_n(&mut self, top_n: u64) -> &mut Self {
        self.top_n = top_n;
        self
    }
    /// Returns the maximum amount of documents per query
    pub fn top_n(&self) -> u64 {
        self.top_n
    }
    /// Sets the vector field for the dense vector
    ///
    /// Defaults to `EmbeddedField::Combined`
    pub fn with_dense_vector_field(
        &mut self,
        dense_vector_field: impl Into<EmbeddedField>,
    ) -> &mut Self {
        self.dense_vector_field = dense_vector_field.into();
        self
    }

    /// Returns the field for the dense vector
    pub fn dense_vector_field(&self) -> &EmbeddedField {
        &self.dense_vector_field
    }
    /// Sets the vector field for the sparse vector (if applicable)
    ///
    /// Defaults to `EmbeddedField::Combined`
    pub fn with_sparse_vector_field(
        &mut self,
        sparse_vector_field: impl Into<EmbeddedField>,
    ) -> &mut Self {
        self.sparse_vector_field = sparse_vector_field.into();
        self
    }

    /// Returns the field for the dense vector
    pub fn sparse_vector_field(&self) -> &EmbeddedField {
        &self.sparse_vector_field
    }
}