pub struct TransformerPreprocessor { /* private fields */ }Expand description
Text preprocessor for transformer models
Implementations§
Source§impl TransformerPreprocessor
impl TransformerPreprocessor
pub fn new(config: TransformerConfig) -> Self
Sourcepub fn preprocess_text(&self, text: &str) -> String
pub fn preprocess_text(&self, text: &str) -> String
Main preprocessing function that routes to appropriate domain-specific methods
Sourcepub fn preprocess_scientific_text(&self, text: &str) -> String
pub fn preprocess_scientific_text(&self, text: &str) -> String
Preprocessing for scientific text (SciBERT)
Sourcepub fn preprocess_biomedical_text(&self, text: &str) -> String
pub fn preprocess_biomedical_text(&self, text: &str) -> String
Preprocessing for biomedical text (BioBERT)
Sourcepub fn preprocess_code_text(&self, text: &str) -> String
pub fn preprocess_code_text(&self, text: &str) -> String
Preprocessing for code text (CodeBERT)
Sourcepub fn preprocess_legal_text(&self, text: &str) -> String
pub fn preprocess_legal_text(&self, text: &str) -> String
Preprocessing for legal text (LegalBERT)
Sourcepub fn preprocess_news_text(&self, text: &str) -> String
pub fn preprocess_news_text(&self, text: &str) -> String
Preprocessing for news text (NewsBERT)
Preprocessing for social media text (SocialMediaBERT)
Sourcepub fn expand_camel_case(&self, text: &str) -> String
pub fn expand_camel_case(&self, text: &str) -> String
Expand camelCase to separate words
Sourcepub fn max_sequence_length(&self) -> usize
pub fn max_sequence_length(&self) -> usize
Get maximum sequence length
Sourcepub fn needs_truncation(&self, text: &str) -> bool
pub fn needs_truncation(&self, text: &str) -> bool
Check if text should be truncated
Sourcepub fn truncate_text(&self, text: &str) -> String
pub fn truncate_text(&self, text: &str) -> String
Truncate text to maximum sequence length
Trait Implementations§
Source§impl Clone for TransformerPreprocessor
impl Clone for TransformerPreprocessor
Source§fn clone(&self) -> TransformerPreprocessor
fn clone(&self) -> TransformerPreprocessor
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreAuto Trait Implementations§
impl Freeze for TransformerPreprocessor
impl RefUnwindSafe for TransformerPreprocessor
impl Send for TransformerPreprocessor
impl Sync for TransformerPreprocessor
impl Unpin for TransformerPreprocessor
impl UnwindSafe for TransformerPreprocessor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CheckedAs for T
impl<T> CheckedAs for T
Source§fn checked_as<Dst>(self) -> Option<Dst>where
T: CheckedCast<Dst>,
fn checked_as<Dst>(self) -> Option<Dst>where
T: CheckedCast<Dst>,
Casts the value.
Source§impl<Src, Dst> CheckedCastFrom<Src> for Dstwhere
Src: CheckedCast<Dst>,
impl<Src, Dst> CheckedCastFrom<Src> for Dstwhere
Src: CheckedCast<Dst>,
Source§fn checked_cast_from(src: Src) -> Option<Dst>
fn checked_cast_from(src: Src) -> Option<Dst>
Casts the value.
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> OverflowingAs for T
impl<T> OverflowingAs for T
Source§fn overflowing_as<Dst>(self) -> (Dst, bool)where
T: OverflowingCast<Dst>,
fn overflowing_as<Dst>(self) -> (Dst, bool)where
T: OverflowingCast<Dst>,
Casts the value.
Source§impl<Src, Dst> OverflowingCastFrom<Src> for Dstwhere
Src: OverflowingCast<Dst>,
impl<Src, Dst> OverflowingCastFrom<Src> for Dstwhere
Src: OverflowingCast<Dst>,
Source§fn overflowing_cast_from(src: Src) -> (Dst, bool)
fn overflowing_cast_from(src: Src) -> (Dst, bool)
Casts the value.
Source§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<T> SaturatingAs for T
impl<T> SaturatingAs for T
Source§fn saturating_as<Dst>(self) -> Dstwhere
T: SaturatingCast<Dst>,
fn saturating_as<Dst>(self) -> Dstwhere
T: SaturatingCast<Dst>,
Casts the value.
Source§impl<Src, Dst> SaturatingCastFrom<Src> for Dstwhere
Src: SaturatingCast<Dst>,
impl<Src, Dst> SaturatingCastFrom<Src> for Dstwhere
Src: SaturatingCast<Dst>,
Source§fn saturating_cast_from(src: Src) -> Dst
fn saturating_cast_from(src: Src) -> Dst
Casts the value.
Source§impl<T> StrictAs for T
impl<T> StrictAs for T
Source§fn strict_as<Dst>(self) -> Dstwhere
T: StrictCast<Dst>,
fn strict_as<Dst>(self) -> Dstwhere
T: StrictCast<Dst>,
Casts the value.
Source§impl<Src, Dst> StrictCastFrom<Src> for Dstwhere
Src: StrictCast<Dst>,
impl<Src, Dst> StrictCastFrom<Src> for Dstwhere
Src: StrictCast<Dst>,
Source§fn strict_cast_from(src: Src) -> Dst
fn strict_cast_from(src: Src) -> Dst
Casts the value.
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.Source§impl<T> UnwrappedAs for T
impl<T> UnwrappedAs for T
Source§fn unwrapped_as<Dst>(self) -> Dstwhere
T: UnwrappedCast<Dst>,
fn unwrapped_as<Dst>(self) -> Dstwhere
T: UnwrappedCast<Dst>,
Casts the value.
Source§impl<Src, Dst> UnwrappedCastFrom<Src> for Dstwhere
Src: UnwrappedCast<Dst>,
impl<Src, Dst> UnwrappedCastFrom<Src> for Dstwhere
Src: UnwrappedCast<Dst>,
Source§fn unwrapped_cast_from(src: Src) -> Dst
fn unwrapped_cast_from(src: Src) -> Dst
Casts the value.
Source§impl<T> WithSubscriber for T
impl<T> WithSubscriber for T
Source§fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
Source§fn with_current_subscriber(self) -> WithDispatch<Self>
fn with_current_subscriber(self) -> WithDispatch<Self>
Source§impl<T> WrappingAs for T
impl<T> WrappingAs for T
Source§fn wrapping_as<Dst>(self) -> Dstwhere
T: WrappingCast<Dst>,
fn wrapping_as<Dst>(self) -> Dstwhere
T: WrappingCast<Dst>,
Casts the value.
Source§impl<Src, Dst> WrappingCastFrom<Src> for Dstwhere
Src: WrappingCast<Dst>,
impl<Src, Dst> WrappingCastFrom<Src> for Dstwhere
Src: WrappingCast<Dst>,
Source§fn wrapping_cast_from(src: Src) -> Dst
fn wrapping_cast_from(src: Src) -> Dst
Casts the value.