pub struct AlignmentEngine { /* private fields */ }Expand description
Token-to-word alignment engine
Implementations§
Source§impl AlignmentEngine
impl AlignmentEngine
pub fn new(config: AlignmentConfig) -> Self
Sourcepub fn extract_words(&mut self, text: &str) -> Vec<Word>
pub fn extract_words(&mut self, text: &str) -> Vec<Word>
Extract words from text with their positions
Sourcepub fn align_tokens_to_words(
&mut self,
text: &str,
token_offsets: &[(usize, usize)],
special_tokens_mask: Option<&[u8]>,
) -> Result<Vec<TokenAlignment>>
pub fn align_tokens_to_words( &mut self, text: &str, token_offsets: &[(usize, usize)], special_tokens_mask: Option<&[u8]>, ) -> Result<Vec<TokenAlignment>>
Align tokens to words
Sourcepub fn extract_spans(
&mut self,
text: &str,
alignments: &[TokenAlignment],
spans: &[(usize, usize)],
) -> Result<Vec<AlignedSpan>>
pub fn extract_spans( &mut self, text: &str, alignments: &[TokenAlignment], spans: &[(usize, usize)], ) -> Result<Vec<AlignedSpan>>
Extract spans with word-level alignment
Sourcepub fn get_word_boundaries_for_token(
&self,
alignments: &[TokenAlignment],
token_index: usize,
) -> Option<(usize, usize)>
pub fn get_word_boundaries_for_token( &self, alignments: &[TokenAlignment], token_index: usize, ) -> Option<(usize, usize)>
Get word boundaries for a specific token
Sourcepub fn tokens_form_complete_word(
&self,
alignments: &[TokenAlignment],
token_indices: &[usize],
) -> bool
pub fn tokens_form_complete_word( &self, alignments: &[TokenAlignment], token_indices: &[usize], ) -> bool
Check if tokens form a complete word
Sourcepub fn preserve_entities(
&mut self,
text: &str,
alignments: &[TokenAlignment],
entities: &[(usize, usize, String)],
) -> Result<Vec<AlignedSpan>>
pub fn preserve_entities( &mut self, text: &str, alignments: &[TokenAlignment], entities: &[(usize, usize, String)], ) -> Result<Vec<AlignedSpan>>
Preserve entity boundaries during alignment
Source§impl AlignmentEngine
Utility functions for common alignment tasks
impl AlignmentEngine
Utility functions for common alignment tasks
Sourcepub fn get_tokens_for_word(
&self,
alignments: &[TokenAlignment],
word_index: usize,
) -> Vec<usize>
pub fn get_tokens_for_word( &self, alignments: &[TokenAlignment], word_index: usize, ) -> Vec<usize>
Get all tokens that belong to a specific word
Sourcepub fn get_word_for_token(
&self,
alignments: &[TokenAlignment],
token_index: usize,
) -> Option<usize>
pub fn get_word_for_token( &self, alignments: &[TokenAlignment], token_index: usize, ) -> Option<usize>
Get the word index for a token
Sourcepub fn token_starts_word(
&self,
alignments: &[TokenAlignment],
token_index: usize,
) -> bool
pub fn token_starts_word( &self, alignments: &[TokenAlignment], token_index: usize, ) -> bool
Check if a token starts a word
Sourcepub fn token_ends_word(
&self,
alignments: &[TokenAlignment],
token_index: usize,
) -> bool
pub fn token_ends_word( &self, alignments: &[TokenAlignment], token_index: usize, ) -> bool
Check if a token ends a word
Sourcepub fn get_alignment_stats(
&self,
alignments: &[TokenAlignment],
) -> AlignmentStats
pub fn get_alignment_stats( &self, alignments: &[TokenAlignment], ) -> AlignmentStats
Get statistics about the alignment
Trait Implementations§
Source§impl Clone for AlignmentEngine
impl Clone for AlignmentEngine
Source§fn clone(&self) -> AlignmentEngine
fn clone(&self) -> AlignmentEngine
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreAuto Trait Implementations§
impl Freeze for AlignmentEngine
impl RefUnwindSafe for AlignmentEngine
impl Send for AlignmentEngine
impl Sync for AlignmentEngine
impl Unpin for AlignmentEngine
impl UnsafeUnpin for AlignmentEngine
impl UnwindSafe for AlignmentEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more