Struct Tokenizer

Source

pub struct Tokenizer { /* private fields */ }

Expand description

Text tokenizer for splitting text into tokens

Implementations§

Source §

impl Tokenizer

Source

pub fn new() -> LangExtractResult<Self>

Create a new tokenizer

Source

pub fn tokenize(&self, text: &str) -> LangExtractResult<TokenizedText>

Tokenize text into tokens

Source

pub fn tokens_text( &self, tokenized_text: &TokenizedText, token_interval: &TokenInterval, ) -> LangExtractResult<String>

Reconstruct text from a token interval

Source

pub fn is_end_of_sentence_token( &self, text: &str, tokens: &[Token], current_idx: usize, ) -> bool

Check if a punctuation token ends a sentence

Source

pub fn is_sentence_break_after_newline( &self, text: &str, tokens: &[Token], current_idx: usize, ) -> bool

Check if there’s a sentence break after a newline

Source

pub fn find_sentence_range( &self, text: &str, tokens: &[Token], start_token_index: usize, ) -> LangExtractResult<TokenInterval>

Find sentence range starting from a given token index

Trait Implementations§

Source §

impl Default for Tokenizer

Source §

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl UnwindSafe for Tokenizer

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> PolicyExt for T
where T: ?Sized,

Source §

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more

Source §

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into<'async_trait>( self, ) -> Pin<Box<dyn Future<Output = Result<U, >::Error>> + 'async_trait>>
where T: 'async_trait,

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

Tokenizer

Struct Tokenizer Copy item path

Implementations§

impl Tokenizer

pub fn new() -> LangExtractResult<Self>

pub fn tokenize(&self, text: &str) -> LangExtractResult<TokenizedText>

pub fn tokens_text( &self, tokenized_text: &TokenizedText, token_interval: &TokenInterval, ) -> LangExtractResult<String>

pub fn is_end_of_sentence_token( &self, text: &str, tokens: &[Token], current_idx: usize, ) -> bool

pub fn is_sentence_break_after_newline( &self, text: &str, tokens: &[Token], current_idx: usize, ) -> bool

pub fn find_sentence_range( &self, text: &str, tokens: &[Token], start_token_index: usize, ) -> LangExtractResult<TokenInterval>

Trait Implementations§

impl Default for Tokenizer

fn default() -> Self

Auto Trait Implementations§

impl Freeze for Tokenizer

impl RefUnwindSafe for Tokenizer

impl Send for Tokenizer

impl Sync for Tokenizer

impl Unpin for Tokenizer

impl UnwindSafe for Tokenizer

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into<'async_trait>( self, ) -> Pin<Box<dyn Future<Output = Result<U, <U as TryFrom<T>>::Error>> + 'async_trait>>where T: 'async_trait,

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

impl<T> ErasedDestructor for Twhere T: 'static,

Struct Tokenizer

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn try_into<'async_trait>( self, ) -> Pin<Box<dyn Future<Output = Result<U, <U as TryFrom<T>>::Error>> + 'async_trait>>
where T: 'async_trait,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> ErasedDestructor for T
where T: 'static,