pub struct NormalizerSpec {
pub name: Option<String>,
pub precompiled_charsmap: Option<Vec<u8>>,
pub add_dummy_prefix: Option<bool>,
pub remove_extra_whitespaces: Option<bool>,
pub escape_whitespaces: Option<bool>,
pub normalization_rule_tsv: Option<String>,
}Expand description
NormalizerSpec encodes a various parameters for string normalizaiton
Fields§
§name: Option<String>name of normalization rule.
precompiled_charsmap: Option<Vec<u8>>Pre-compiled normalization rule created by Builder::GetPrecompiledCharsMap() or Builder::CompileCharsMap() method. Usually this field is set by Builder::GetNormalizerSpec() method.
add_dummy_prefix: Option<bool>Adds dummy whitespace at the beginning of text in order to treat “world” in “world” and “hello world” in the same way.
remove_extra_whitespaces: Option<bool>Removes leading, trailing, and duplicate internal whitespace.
escape_whitespaces: Option<bool>Replaces whitespace with meta symbol. This field must be true to train sentence piece model.
normalization_rule_tsv: Option<String>Custom normalization rule file in TSV format.
https://github.com/google/sentencepiece/blob/master/doc/normalization.md
This field is only used in SentencePieceTrainer::Train() method, which
compiles the rule into the binary rule stored in precompiled_charsmap.
Implementations§
Source§impl NormalizerSpec
impl NormalizerSpec
Sourcepub fn precompiled_charsmap(&self) -> &[u8] ⓘ
pub fn precompiled_charsmap(&self) -> &[u8] ⓘ
Returns the value of precompiled_charsmap, or the default value if precompiled_charsmap is unset.
Sourcepub fn add_dummy_prefix(&self) -> bool
pub fn add_dummy_prefix(&self) -> bool
Returns the value of add_dummy_prefix, or the default value if add_dummy_prefix is unset.
Sourcepub fn remove_extra_whitespaces(&self) -> bool
pub fn remove_extra_whitespaces(&self) -> bool
Returns the value of remove_extra_whitespaces, or the default value if remove_extra_whitespaces is unset.
Sourcepub fn escape_whitespaces(&self) -> bool
pub fn escape_whitespaces(&self) -> bool
Returns the value of escape_whitespaces, or the default value if escape_whitespaces is unset.
Sourcepub fn normalization_rule_tsv(&self) -> &str
pub fn normalization_rule_tsv(&self) -> &str
Returns the value of normalization_rule_tsv, or the default value if normalization_rule_tsv is unset.
Trait Implementations§
Source§impl Clone for NormalizerSpec
impl Clone for NormalizerSpec
Source§fn clone(&self) -> NormalizerSpec
fn clone(&self) -> NormalizerSpec
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for NormalizerSpec
impl Debug for NormalizerSpec
Source§impl Default for NormalizerSpec
impl Default for NormalizerSpec
Source§impl Message for NormalizerSpec
impl Message for NormalizerSpec
Source§fn encoded_len(&self) -> usize
fn encoded_len(&self) -> usize
Source§fn encode(&self, buf: &mut impl BufMut) -> Result<(), EncodeError>where
Self: Sized,
fn encode(&self, buf: &mut impl BufMut) -> Result<(), EncodeError>where
Self: Sized,
Source§fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn encode_length_delimited(
&self,
buf: &mut impl BufMut,
) -> Result<(), EncodeError>where
Self: Sized,
fn encode_length_delimited(
&self,
buf: &mut impl BufMut,
) -> Result<(), EncodeError>where
Self: Sized,
Source§fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
fn encode_length_delimited_to_vec(&self) -> Vec<u8> ⓘwhere
Self: Sized,
Source§fn decode(buf: impl Buf) -> Result<Self, DecodeError>where
Self: Default,
fn decode(buf: impl Buf) -> Result<Self, DecodeError>where
Self: Default,
Source§fn decode_length_delimited(buf: impl Buf) -> Result<Self, DecodeError>where
Self: Default,
fn decode_length_delimited(buf: impl Buf) -> Result<Self, DecodeError>where
Self: Default,
Source§fn merge(&mut self, buf: impl Buf) -> Result<(), DecodeError>where
Self: Sized,
fn merge(&mut self, buf: impl Buf) -> Result<(), DecodeError>where
Self: Sized,
self. Read moreSource§fn merge_length_delimited(&mut self, buf: impl Buf) -> Result<(), DecodeError>where
Self: Sized,
fn merge_length_delimited(&mut self, buf: impl Buf) -> Result<(), DecodeError>where
Self: Sized,
self.