pub struct Document { /* private fields */ }
Expand description
A document containing some amount of lexed and parsed English text.
Implementations§
Source§impl Document
impl Document
Sourcepub fn token_indices_intersecting(&self, span: Span<char>) -> Vec<usize>
pub fn token_indices_intersecting(&self, span: Span<char>) -> Vec<usize>
Locate all the tokens that intersect a provided span.
Desperately needs optimization.
Sourcepub fn fat_tokens_intersecting(&self, span: Span<char>) -> Vec<FatToken>
pub fn fat_tokens_intersecting(&self, span: Span<char>) -> Vec<FatToken>
Locate all the tokens that intersect a provided span and convert them to FatToken
s.
Desperately needs optimization.
Sourcepub fn new(
text: &str,
parser: &impl Parser,
dictionary: &impl Dictionary,
) -> Self
pub fn new( text: &str, parser: &impl Parser, dictionary: &impl Dictionary, ) -> Self
Lexes and parses text to produce a document using a provided language parser and dictionary.
Sourcepub fn new_curated(text: &str, parser: &impl Parser) -> Self
pub fn new_curated(text: &str, parser: &impl Parser) -> Self
Lexes and parses text to produce a document using a provided language parser and the included curated dictionary.
Sourcepub fn new_from_vec(
source: Lrc<Vec<char>>,
parser: &impl Parser,
dictionary: &impl Dictionary,
) -> Self
pub fn new_from_vec( source: Lrc<Vec<char>>, parser: &impl Parser, dictionary: &impl Dictionary, ) -> Self
Lexes and parses text to produce a document using a provided language parser and dictionary.
Sourcepub fn new_plain_english_curated(text: &str) -> Self
pub fn new_plain_english_curated(text: &str) -> Self
Parse text to produce a document using the built-in PlainEnglish
parser and curated dictionary.
Sourcepub fn new_plain_english(text: &str, dictionary: &impl Dictionary) -> Self
pub fn new_plain_english(text: &str, dictionary: &impl Dictionary) -> Self
Parse text to produce a document using the built-in PlainEnglish
parser and a provided dictionary.
Sourcepub fn new_markdown_curated(
text: &str,
markdown_options: MarkdownOptions,
) -> Self
pub fn new_markdown_curated( text: &str, markdown_options: MarkdownOptions, ) -> Self
Parse text to produce a document using the built-in Markdown
parser
and curated dictionary.
Sourcepub fn new_markdown_default_curated(text: &str) -> Self
pub fn new_markdown_default_curated(text: &str) -> Self
Parse text to produce a document using the built-in Markdown
parser
and curated dictionary with the default Markdown configuration.
Sourcepub fn new_markdown(
text: &str,
markdown_options: MarkdownOptions,
dictionary: &impl Dictionary,
) -> Self
pub fn new_markdown( text: &str, markdown_options: MarkdownOptions, dictionary: &impl Dictionary, ) -> Self
Parse text to produce a document using the built-in PlainEnglish
parser and the curated dictionary.
Sourcepub fn new_markdown_default(text: &str, dictionary: &impl Dictionary) -> Self
pub fn new_markdown_default(text: &str, dictionary: &impl Dictionary) -> Self
Parse text to produce a document using the built-in PlainEnglish
parser and the curated dictionary with the default Markdown configuration.
pub fn get_token_at_char_index(&self, char_index: usize) -> Option<&Token>
Sourcepub fn get_token(&self, index: usize) -> Option<&Token>
pub fn get_token(&self, index: usize) -> Option<&Token>
Defensively attempt to grab a specific token.
Sourcepub fn get_token_offset(&self, base: usize, offset: isize) -> Option<&Token>
pub fn get_token_offset(&self, base: usize, offset: isize) -> Option<&Token>
Get a token at a signed offset from a base index, or None if out of bounds.
Sourcepub fn tokens(&self) -> impl Iterator<Item = &Token> + '_
pub fn tokens(&self) -> impl Iterator<Item = &Token> + '_
Get an iterator over all the tokens contained in the document.
pub fn iter_nominal_phrases(&self) -> impl Iterator<Item = &[Token]>
Sourcepub fn fat_tokens(&self) -> impl Iterator<Item = FatToken> + '_
pub fn fat_tokens(&self) -> impl Iterator<Item = FatToken> + '_
Get an iterator over all the tokens contained in the document.
Sourcepub fn get_next_word_from_offset(
&self,
base: usize,
offset: isize,
) -> Option<&Token>
pub fn get_next_word_from_offset( &self, base: usize, offset: isize, ) -> Option<&Token>
Get the next or previous word token relative to a base index, if separated by whitespace. Returns None if the next/previous token is not a word or does not exist.
Sourcepub fn fat_string_tokens(&self) -> impl Iterator<Item = FatStringToken> + '_
pub fn fat_string_tokens(&self) -> impl Iterator<Item = FatStringToken> + '_
Get an iterator over all the tokens contained in the document.
pub fn get_span_content(&self, span: &Span<char>) -> &[char]
pub fn get_span_content_str(&self, span: &Span<char>) -> String
pub fn get_full_string(&self) -> String
pub fn get_full_content(&self) -> &[char]
pub fn get_source(&self) -> &[char]
pub fn get_tokens(&self) -> &[Token]
Trait Implementations§
Source§impl TokenStringExt for Document
impl TokenStringExt for Document
fn first_adjective(&self) -> Option<&Token>
fn last_adjective(&self) -> Option<&Token>
fn last_adjective_index(&self) -> Option<usize>
fn iter_adjective_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_adjectives(&self) -> impl Iterator<Item = &Token> + '_
fn first_apostrophe(&self) -> Option<&Token>
fn last_apostrophe(&self) -> Option<&Token>
fn last_apostrophe_index(&self) -> Option<usize>
fn iter_apostrophe_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_apostrophes(&self) -> impl Iterator<Item = &Token> + '_
fn first_at(&self) -> Option<&Token>
fn last_at(&self) -> Option<&Token>
fn last_at_index(&self) -> Option<usize>
fn iter_at_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_ats(&self) -> impl Iterator<Item = &Token> + '_
fn first_chunk_terminator(&self) -> Option<&Token>
fn last_chunk_terminator(&self) -> Option<&Token>
fn last_chunk_terminator_index(&self) -> Option<usize>
fn iter_chunk_terminator_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_chunk_terminators(&self) -> impl Iterator<Item = &Token> + '_
fn first_comma(&self) -> Option<&Token>
fn last_comma(&self) -> Option<&Token>
fn last_comma_index(&self) -> Option<usize>
fn iter_comma_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_commas(&self) -> impl Iterator<Item = &Token> + '_
fn first_conjunction(&self) -> Option<&Token>
fn last_conjunction(&self) -> Option<&Token>
fn last_conjunction_index(&self) -> Option<usize>
fn iter_conjunction_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_conjunctions(&self) -> impl Iterator<Item = &Token> + '_
fn first_currency(&self) -> Option<&Token>
fn last_currency(&self) -> Option<&Token>
fn last_currency_index(&self) -> Option<usize>
fn iter_currency_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_currencys(&self) -> impl Iterator<Item = &Token> + '_
fn first_ellipsis(&self) -> Option<&Token>
fn last_ellipsis(&self) -> Option<&Token>
fn last_ellipsis_index(&self) -> Option<usize>
fn iter_ellipsis_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_ellipsiss(&self) -> impl Iterator<Item = &Token> + '_
fn first_hostname(&self) -> Option<&Token>
fn last_hostname(&self) -> Option<&Token>
fn last_hostname_index(&self) -> Option<usize>
fn iter_hostname_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_hostnames(&self) -> impl Iterator<Item = &Token> + '_
fn first_likely_homograph(&self) -> Option<&Token>
fn last_likely_homograph(&self) -> Option<&Token>
fn last_likely_homograph_index(&self) -> Option<usize>
fn iter_likely_homograph_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_likely_homographs(&self) -> impl Iterator<Item = &Token> + '_
fn first_noun(&self) -> Option<&Token>
fn last_noun(&self) -> Option<&Token>
fn last_noun_index(&self) -> Option<usize>
fn iter_noun_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_nouns(&self) -> impl Iterator<Item = &Token> + '_
fn first_number(&self) -> Option<&Token>
fn last_number(&self) -> Option<&Token>
fn last_number_index(&self) -> Option<usize>
fn iter_number_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_numbers(&self) -> impl Iterator<Item = &Token> + '_
fn first_paragraph_break(&self) -> Option<&Token>
fn last_paragraph_break(&self) -> Option<&Token>
fn last_paragraph_break_index(&self) -> Option<usize>
fn iter_paragraph_break_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_paragraph_breaks(&self) -> impl Iterator<Item = &Token> + '_
fn first_pipe(&self) -> Option<&Token>
fn last_pipe(&self) -> Option<&Token>
fn last_pipe_index(&self) -> Option<usize>
fn iter_pipe_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_pipes(&self) -> impl Iterator<Item = &Token> + '_
fn first_preposition(&self) -> Option<&Token>
fn last_preposition(&self) -> Option<&Token>
fn last_preposition_index(&self) -> Option<usize>
fn iter_preposition_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_prepositions(&self) -> impl Iterator<Item = &Token> + '_
fn first_punctuation(&self) -> Option<&Token>
fn last_punctuation(&self) -> Option<&Token>
fn last_punctuation_index(&self) -> Option<usize>
fn iter_punctuation_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_punctuations(&self) -> impl Iterator<Item = &Token> + '_
fn first_quote(&self) -> Option<&Token>
fn last_quote(&self) -> Option<&Token>
fn last_quote_index(&self) -> Option<usize>
fn iter_quote_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_quotes(&self) -> impl Iterator<Item = &Token> + '_
fn first_sentence_terminator(&self) -> Option<&Token>
fn last_sentence_terminator(&self) -> Option<&Token>
fn last_sentence_terminator_index(&self) -> Option<usize>
fn iter_sentence_terminator_indices( &self, ) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_sentence_terminators(&self) -> impl Iterator<Item = &Token> + '_
fn first_space(&self) -> Option<&Token>
fn last_space(&self) -> Option<&Token>
fn last_space_index(&self) -> Option<usize>
fn iter_space_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_spaces(&self) -> impl Iterator<Item = &Token> + '_
fn first_unlintable(&self) -> Option<&Token>
fn last_unlintable(&self) -> Option<&Token>
fn last_unlintable_index(&self) -> Option<usize>
fn iter_unlintable_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_unlintables(&self) -> impl Iterator<Item = &Token> + '_
fn first_verb(&self) -> Option<&Token>
fn last_verb(&self) -> Option<&Token>
fn last_verb_index(&self) -> Option<usize>
fn iter_verb_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_verbs(&self) -> impl Iterator<Item = &Token> + '_
fn first_word(&self) -> Option<&Token>
fn last_word(&self) -> Option<&Token>
fn last_word_index(&self) -> Option<usize>
fn iter_word_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_words(&self) -> impl Iterator<Item = &Token> + '_
fn first_word_like(&self) -> Option<&Token>
fn last_word_like(&self) -> Option<&Token>
fn last_word_like_index(&self) -> Option<usize>
fn iter_word_like_indices(&self) -> impl DoubleEndedIterator<Item = usize> + '_
fn iter_word_likes(&self) -> impl Iterator<Item = &Token> + '_
fn first_sentence_word(&self) -> Option<&Token>
fn first_non_whitespace(&self) -> Option<&Token>
Source§fn span(&self) -> Option<Span<char>>
fn span(&self) -> Option<Span<char>>
fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_
fn iter_linking_verbs(&self) -> impl Iterator<Item = &Token> + '_
Source§fn iter_paragraphs(&self) -> impl Iterator<Item = &[Token]> + '_
fn iter_paragraphs(&self) -> impl Iterator<Item = &[Token]> + '_
Source§fn iter_sentences(&self) -> impl Iterator<Item = &[Token]> + '_
fn iter_sentences(&self) -> impl Iterator<Item = &[Token]> + '_
Source§fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &mut [Token]> + '_
fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &mut [Token]> + '_
Auto Trait Implementations§
impl Freeze for Document
impl RefUnwindSafe for Document
impl !Send for Document
impl !Sync for Document
impl Unpin for Document
impl UnwindSafe for Document
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more