pub struct PreprocessingUtils;Expand description
Preprocessing utilities for common operations
Implementations§
Source§impl PreprocessingUtils
impl PreprocessingUtils
Sourcepub fn classification_pipeline() -> TextPreprocessingPipeline
pub fn classification_pipeline() -> TextPreprocessingPipeline
Create a basic preprocessing pipeline for classification tasks
Sourcepub fn language_modeling_pipeline() -> TextPreprocessingPipeline
pub fn language_modeling_pipeline() -> TextPreprocessingPipeline
Create a preprocessing pipeline for language modeling
Sourcepub fn translation_pipeline() -> TextPreprocessingPipeline
pub fn translation_pipeline() -> TextPreprocessingPipeline
Create a preprocessing pipeline for machine translation
Sourcepub fn filter_texts(
texts: &[String],
min_length: Option<usize>,
max_length: Option<usize>,
allowed_chars: Option<&str>,
) -> Vec<String>
pub fn filter_texts( texts: &[String], min_length: Option<usize>, max_length: Option<usize>, allowed_chars: Option<&str>, ) -> Vec<String>
Validate and filter texts based on criteria
Sourcepub fn compute_batch_stats(texts: &[String]) -> PreprocessingStats
pub fn compute_batch_stats(texts: &[String]) -> PreprocessingStats
Batch statistics for analyzing preprocessing effects
Auto Trait Implementations§
impl Freeze for PreprocessingUtils
impl RefUnwindSafe for PreprocessingUtils
impl Send for PreprocessingUtils
impl Sync for PreprocessingUtils
impl Unpin for PreprocessingUtils
impl UnsafeUnpin for PreprocessingUtils
impl UnwindSafe for PreprocessingUtils
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.