Struct nlprule::tokenizer::Tokenizer [−][src]
The complete Tokenizer doing tagging, chunking and disambiguation.
Implementations
impl Tokenizer
[src]
pub fn new<P: AsRef<Path>>(p: P) -> Result<Self, Error>
[src]
Creates a new tokenizer from a path to a binary.
Errors
- If the file can not be opened.
- If the file content can not be deserialized to a rules set.
pub fn from_reader<R: Read>(reader: R) -> Result<Self, Error>
[src]
Creates a new tokenizer from a reader.
pub fn rules(&self) -> &[DisambiguationRule]
[src]
Gets all disambigation rules in the order they are applied.
pub fn tagger(&self) -> &Arc<Tagger>
[src]
Gets the lexical tagger.
pub fn chunker(&self) -> &Option<Chunker>
[src]
Gets the chunker if one exists.
pub fn disambiguate<'t>(
&'t self,
sentence: IncompleteSentence<'t>
) -> IncompleteSentence<'t>
[src]
&'t self,
sentence: IncompleteSentence<'t>
) -> IncompleteSentence<'t>
Apply rule-based disambiguation to the tokens. This does not change the number of tokens, but can change the content arbitrarily.
pub fn sentencize<'t>(&'t self, text: &'t str) -> IncompleteSentenceIter<'t>ⓘNotable traits for IncompleteSentenceIter<'t>
impl<'t> Iterator for IncompleteSentenceIter<'t> type Item = IncompleteSentence<'t>;
[src]
Notable traits for IncompleteSentenceIter<'t>
impl<'t> Iterator for IncompleteSentenceIter<'t> type Item = IncompleteSentence<'t>;
Splits the text into sentences and tokenizes each sentence.
pub fn pipe<'t>(&'t self, text: &'t str) -> SentenceIter<'t>ⓘNotable traits for SentenceIter<'t>
impl<'t> Iterator for SentenceIter<'t> type Item = Sentence<'t>;
[src]
Notable traits for SentenceIter<'t>
impl<'t> Iterator for SentenceIter<'t> type Item = Sentence<'t>;
Applies the entire tokenization pipeline including sentencization, tagging, chunking and disambiguation.
Trait Implementations
impl Default for Tokenizer
[src]
impl<'de> Deserialize<'de> for Tokenizer
[src]
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
[src]
__D: Deserializer<'de>,
impl Serialize for Tokenizer
[src]
Auto Trait Implementations
impl !RefUnwindSafe for Tokenizer
impl Send for Tokenizer
impl Sync for Tokenizer
impl Unpin for Tokenizer
impl !UnwindSafe for Tokenizer
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,
[src]
T: for<'de> Deserialize<'de>,
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,