swiftide_core/search_strategies/hybrid_search.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
use derive_builder::Builder;
use crate::{indexing::EmbeddedField, querying};
use super::{DEFAULT_TOP_K, DEFAULT_TOP_N};
/// A hybrid search strategy that combines a similarity search with a
/// keyword search / sparse search.
///
/// Defaults to a a maximum of 10 documents and `EmbeddedField::Combined` for the field(s).
#[derive(Debug, Clone, Builder)]
#[builder(setter(into))]
pub struct HybridSearch {
/// Maximum number of documents to return
#[builder(default)]
top_k: u64,
/// Maximum number of documents to return per query
#[builder(default)]
top_n: u64,
/// The field to use for the dense vector
#[builder(default)]
dense_vector_field: EmbeddedField,
/// The field to use for the sparse vector
/// TODO: I.e. lancedb does not use sparse embeddings for hybrid search
#[builder(default)]
sparse_vector_field: EmbeddedField,
}
impl querying::SearchStrategy for HybridSearch {}
impl Default for HybridSearch {
fn default() -> Self {
Self {
top_k: DEFAULT_TOP_K,
top_n: DEFAULT_TOP_N,
dense_vector_field: EmbeddedField::Combined,
sparse_vector_field: EmbeddedField::Combined,
}
}
}
impl HybridSearch {
/// Sets the maximum amount of total documents retrieved
pub fn with_top_k(&mut self, top_k: u64) -> &mut Self {
self.top_k = top_k;
self
}
/// Returns the maximum amount of total documents to be retrieved
pub fn top_k(&self) -> u64 {
self.top_k
}
/// Sets the maximum amount of documents to be retrieved
/// per individual query
pub fn with_top_n(&mut self, top_n: u64) -> &mut Self {
self.top_n = top_n;
self
}
/// Returns the maximum amount of documents per query
pub fn top_n(&self) -> u64 {
self.top_n
}
/// Sets the vector field for the dense vector
///
/// Defaults to `EmbeddedField::Combined`
pub fn with_dense_vector_field(
&mut self,
dense_vector_field: impl Into<EmbeddedField>,
) -> &mut Self {
self.dense_vector_field = dense_vector_field.into();
self
}
/// Returns the field for the dense vector
pub fn dense_vector_field(&self) -> &EmbeddedField {
&self.dense_vector_field
}
/// Sets the vector field for the sparse vector (if applicable)
///
/// Defaults to `EmbeddedField::Combined`
pub fn with_sparse_vector_field(
&mut self,
sparse_vector_field: impl Into<EmbeddedField>,
) -> &mut Self {
self.sparse_vector_field = sparse_vector_field.into();
self
}
/// Returns the field for the dense vector
pub fn sparse_vector_field(&self) -> &EmbeddedField {
&self.sparse_vector_field
}
}