[−][src]Struct punkt::params::Standard
Standard settings for all tokenizers, and trainers.
Trait Implementations
impl DefinesSentenceEndings for Standard
[src]
const SENTENCE_ENDINGS: &'static Set<char>
[src]
The set of characters that constitute a sentence ending.
fn is_sentence_ending(c: &char) -> bool
[src]
Checks if a character is a sentence ending.
impl DefinesInternalPunctuation for Standard
[src]
const INTERNAL_PUNCTUATION: &'static Set<char>
[src]
The set of legal punctuation characters that can occur within a word.
fn is_internal_punctuation(c: &char) -> bool
[src]
Checks if a character is a legal punctuation character that can occur within a word. Read more
impl DefinesNonWordCharacters for Standard
[src]
const NONWORD_CHARS: &'static Set<char>
[src]
The set of characters that can not occur inside of a word.
fn is_nonword_char(c: &char) -> bool
[src]
Checks if a character is one that can not occur inside of a word.
impl DefinesPunctuation for Standard
[src]
const PUNCTUATION: &'static Set<char>
[src]
The set of legal punctuation marks.
fn is_punctuation(c: &char) -> bool
[src]
Checks if a characters is a legal punctuation mark.
impl DefinesNonPrefixCharacters for Standard
[src]
const NONPREFIX_CHARS: &'static Set<char>
[src]
The set of characters that can not start a word.
fn is_nonprefix_char(c: &char) -> bool
[src]
Checks if a character can start a word.
impl TrainerParameters for Standard
[src]
const ABBREV_LOWER_BOUND: f64
[src]
Lower bound score for a token to be considered an abbreviation.
const ABBREV_UPPER_BOUND: f64
[src]
Upper bound score for a token to be considered an abbreviation.
const IGNORE_ABBREV_PENALTY: bool
[src]
Disables the abbreviation penalty which exponentially penalizes occurances of words without a trailing period. Read more
const COLLOCATION_LOWER_BOUND: f64
[src]
Lower bound score for two tokens to be considered a collocation
const SENTENCE_STARTER_LOWER_BOUND: f64
[src]
Lower bound score for a token to be considered a sentence starter.
const INCLUDE_ALL_COLLOCATIONS: bool
[src]
Include all pairs where the first token ends with a period.
const INCLUDE_ABBREV_COLLOCATIONS: bool
[src]
Include all pairs where the first is an abbreviation. Overridden by include_all_collocations
. Read more
const COLLOCATION_FREQUENCY_LOWER_BOUND: f64
[src]
Minimum number of times a bigram appears in order to be considered a collocation. Read more
Auto Trait Implementations
Blanket Implementations
impl<T> From for T
[src]
impl<T, U> Into for T where
U: From<T>,
[src]
U: From<T>,
impl<T, U> TryFrom for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = !
try_from
)The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T> Borrow for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> BorrowMut for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T, U> TryInto for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,