pub struct BatchProcessor { /* private fields */ }Expand description
Processes multiple documents through an NlpPipeline in parallel using
the scirs2-core parallel abstractions.
§Example
use scirs2_text::pipeline::{BatchProcessor, PipelineBuilder, PipelineStep};
let pipeline = PipelineBuilder::new()
.add_step(PipelineStep::Tokenize)
.add_step(PipelineStep::Lowercase)
.add_step(PipelineStep::RemoveStopwords)
.build();
let docs = vec![
"The quick brown fox",
"A lazy dog sleeps",
"Hello world",
];
let processor = BatchProcessor::new(pipeline);
let results = processor.process_batch(&docs).unwrap();
assert_eq!(results.len(), 3);Implementations§
Source§impl BatchProcessor
impl BatchProcessor
Sourcepub fn new(pipeline: NlpPipeline) -> Self
pub fn new(pipeline: NlpPipeline) -> Self
Create a new BatchProcessor wrapping the given pipeline.
Sourcepub fn with_parallel_threshold(self, threshold: usize) -> Self
pub fn with_parallel_threshold(self, threshold: usize) -> Self
Set the minimum number of documents required before parallel processing is used. Defaults to 32.
Sourcepub fn process_batch(&self, documents: &[&str]) -> Result<Vec<Vec<String>>>
pub fn process_batch(&self, documents: &[&str]) -> Result<Vec<Vec<String>>>
Process a slice of text documents and return one token list per document.
Sourcepub fn process_batch_tolerant(
&self,
documents: &[&str],
) -> Vec<Result<Vec<String>, TextError>>
pub fn process_batch_tolerant( &self, documents: &[&str], ) -> Vec<Result<Vec<String>, TextError>>
Process documents and also return any per-document errors instead of short-circuiting on the first failure.
Sourcepub fn pipeline(&self) -> &NlpPipeline
pub fn pipeline(&self) -> &NlpPipeline
Return a reference to the inner pipeline.
Auto Trait Implementations§
impl Freeze for BatchProcessor
impl !RefUnwindSafe for BatchProcessor
impl Send for BatchProcessor
impl Sync for BatchProcessor
impl Unpin for BatchProcessor
impl UnsafeUnpin for BatchProcessor
impl !UnwindSafe for BatchProcessor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.