pub struct Bm25VectorizerBuilder<TokenIndexer, Tokenizer> { /* private fields */ }Expand description
Builder for creating and configuring a Bm25Vectorizer.
It supports fitting on a corpus to automatically compute the average document length, and validates all parameters before building.
§Type Parameters
TokenIndexer: Implementation ofBm25TokenIndexertraitTokenizer: Implementation ofBm25Tokenizertrait
§Examples
Basic usage with manual avgdl:
use bm25_vectorizer::{Bm25VectorizerBuilder, MockWhitespaceTokenizer, MockHashTokenIndexer};
let vectorizer = Bm25VectorizerBuilder::new()
.tokenizer(MockWhitespaceTokenizer)
.token_indexer(MockHashTokenIndexer)
.k1(1.2)
.b(0.75)
.avgdl(10.0)
.build()?;Usage with corpus fitting:
use bm25_vectorizer::{Bm25VectorizerBuilder, MockWhitespaceTokenizer, MockHashTokenIndexer};
let corpus = vec!["hello world", "world of rust", "hello rust programming"];
let vectorizer = Bm25VectorizerBuilder::new()
.tokenizer(MockWhitespaceTokenizer)
.token_indexer(MockHashTokenIndexer)
.k1(1.2)
.b(0.75)
.fit(&corpus)? // Automatically computes avgdl
.build()?;Implementations§
Source§impl<TokenIndexer, Tokenizer> Bm25VectorizerBuilder<TokenIndexer, Tokenizer>
impl<TokenIndexer, Tokenizer> Bm25VectorizerBuilder<TokenIndexer, Tokenizer>
pub fn new() -> Self
pub fn k1(self, k1: f32) -> Self
pub fn b(self, b: f32) -> Self
pub fn delta(self, delta: f32) -> Self
pub fn avgdl(self, avgdl: f32) -> Self
pub fn tokenizer(self, tokenizer: Tokenizer) -> Self
pub fn token_indexer(self, token_indexer: TokenIndexer) -> Self
pub fn fit(self, corpus: &[&str]) -> Result<Self, Bm25VectorizerError>where
Tokenizer: Bm25Tokenizer + Sync,
pub fn fit_iter<I, S>(self, corpus: I) -> Result<Self, Bm25VectorizerError>
pub fn build( self, ) -> Result<Bm25Vectorizer<TokenIndexer, Tokenizer>, Bm25VectorizerError>
Auto Trait Implementations§
impl<TokenIndexer, Tokenizer> Freeze for Bm25VectorizerBuilder<TokenIndexer, Tokenizer>
impl<TokenIndexer, Tokenizer> RefUnwindSafe for Bm25VectorizerBuilder<TokenIndexer, Tokenizer>where
Tokenizer: RefUnwindSafe,
TokenIndexer: RefUnwindSafe,
impl<TokenIndexer, Tokenizer> Send for Bm25VectorizerBuilder<TokenIndexer, Tokenizer>
impl<TokenIndexer, Tokenizer> Sync for Bm25VectorizerBuilder<TokenIndexer, Tokenizer>
impl<TokenIndexer, Tokenizer> Unpin for Bm25VectorizerBuilder<TokenIndexer, Tokenizer>
impl<TokenIndexer, Tokenizer> UnwindSafe for Bm25VectorizerBuilder<TokenIndexer, Tokenizer>where
Tokenizer: UnwindSafe,
TokenIndexer: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more