Struct nlprule::tokenizer::tag::Tagger [−][src]
The lexical tagger.
Implementations
impl Tagger
[src]
pub fn from_dumps<S1: AsRef<Path>, S2: AsRef<Path>, S3: AsRef<str>>(
paths: &[S1],
remove_paths: &[S2],
extra_tags: &[S3],
common_words: &HashSet<String>
) -> Result<Self>
[src]
paths: &[S1],
remove_paths: &[S2],
extra_tags: &[S3],
common_words: &HashSet<String>
) -> Result<Self>
Creates a tagger from raw files.
Arguments
paths
: Paths to files where each line contains the word, lemma and tag, respectively, separated by tabs, to be added to the tagger.remove_paths
: Paths to files where each line contains the word, lemma and tag, respectively, separated by tabs, to be removed from the tagger if present in the files frompaths
.
pub fn id_tag<'a>(&self, tag: &'a str) -> PosId<'a>
[src]
pub fn id_word<'t>(&'t self, text: Cow<'t, str>) -> WordId<'t>
[src]
pub fn get_tags(
&self,
word: &str,
add_lower: bool,
use_compound_split_heuristic: bool
) -> Vec<WordData<'_>>
[src]
&self,
word: &str,
add_lower: bool,
use_compound_split_heuristic: bool
) -> Vec<WordData<'_>>
Get the tags and lemmas (as WordData) for the given word.
Arguments
word
: The word to lookup data for.add_lower
: Whether to add data for the lowercase variant of the word.use_compound_split_heuristic
: Whether to use a heuristic to split compound words. If true, will attempt to find tags for words which are longer than some cutoff and unknown by looking up tags for substrings from left to right until tags are found or a minimum length reached.
pub fn get_group_members(&self, lemma: &str) -> Vec<&str>
[src]
Get the words with the same lemma as the given lemma.
Trait Implementations
impl Clone for Tagger
[src]
impl Default for Tagger
[src]
impl<'de> Deserialize<'de> for Tagger
[src]
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
[src]
__D: Deserializer<'de>,
impl Serialize for Tagger
[src]
Auto Trait Implementations
impl !RefUnwindSafe for Tagger
[src]
impl Send for Tagger
[src]
impl Sync for Tagger
[src]
impl Unpin for Tagger
[src]
impl UnwindSafe for Tagger
[src]
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,
[src]
T: for<'de> Deserialize<'de>,
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> Pointable for T
pub const ALIGN: usize
type Init = T
The type for initializers.
pub unsafe fn init(init: <T as Pointable>::Init) -> usize
pub unsafe fn deref<'a>(ptr: usize) -> &'a T
pub unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T
pub unsafe fn drop(ptr: usize)
impl<T> ToOwned for T where
T: Clone,
[src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
pub fn to_owned(&self) -> T
[src]
pub fn clone_into(&self, target: &mut T)
[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,