[][src]Struct punkt::params::Standard

pub struct Standard;

Standard settings for all tokenizers, and trainers.

Trait Implementations

impl DefinesSentenceEndings for Standard[src]

const SENTENCE_ENDINGS: &'static Set<char>[src]

The set of characters that constitute a sentence ending.

fn is_sentence_ending(c: &char) -> bool[src]

Checks if a character is a sentence ending.

impl DefinesInternalPunctuation for Standard[src]

const INTERNAL_PUNCTUATION: &'static Set<char>[src]

The set of legal punctuation characters that can occur within a word.

fn is_internal_punctuation(c: &char) -> bool[src]

Checks if a character is a legal punctuation character that can occur within a word. Read more

impl DefinesNonWordCharacters for Standard[src]

const NONWORD_CHARS: &'static Set<char>[src]

The set of characters that can not occur inside of a word.

fn is_nonword_char(c: &char) -> bool[src]

Checks if a character is one that can not occur inside of a word.

impl DefinesPunctuation for Standard[src]

const PUNCTUATION: &'static Set<char>[src]

The set of legal punctuation marks.

fn is_punctuation(c: &char) -> bool[src]

Checks if a characters is a legal punctuation mark.

impl DefinesNonPrefixCharacters for Standard[src]

const NONPREFIX_CHARS: &'static Set<char>[src]

The set of characters that can not start a word.

fn is_nonprefix_char(c: &char) -> bool[src]

Checks if a character can start a word.

impl TrainerParameters for Standard[src]

const ABBREV_LOWER_BOUND: f64[src]

Lower bound score for a token to be considered an abbreviation.

const ABBREV_UPPER_BOUND: f64[src]

Upper bound score for a token to be considered an abbreviation.

const IGNORE_ABBREV_PENALTY: bool[src]

Disables the abbreviation penalty which exponentially penalizes occurances of words without a trailing period. Read more

const COLLOCATION_LOWER_BOUND: f64[src]

Lower bound score for two tokens to be considered a collocation

const SENTENCE_STARTER_LOWER_BOUND: f64[src]

Lower bound score for a token to be considered a sentence starter.

const INCLUDE_ALL_COLLOCATIONS: bool[src]

Include all pairs where the first token ends with a period.

const INCLUDE_ABBREV_COLLOCATIONS: bool[src]

Include all pairs where the first is an abbreviation. Overridden by include_all_collocations. Read more

const COLLOCATION_FREQUENCY_LOWER_BOUND: f64[src]

Minimum number of times a bigram appears in order to be considered a collocation. Read more

Auto Trait Implementations

impl Send for Standard

impl Sync for Standard

Blanket Implementations

impl<T> From for T[src]

impl<T, U> Into for T where
    U: From<T>, 
[src]

impl<T, U> TryFrom for T where
    U: Into<T>, 
[src]

type Error = !

🔬 This is a nightly-only experimental API. (try_from)

The type returned in the event of a conversion error.

impl<T> Borrow for T where
    T: ?Sized
[src]

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> BorrowMut for T where
    T: ?Sized
[src]

impl<T, U> TryInto for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

🔬 This is a nightly-only experimental API. (try_from)

The type returned in the event of a conversion error.