[−][src]Trait tokenizers::tokenizer::PostProcessor
A PostProcessor
has the responsibility to post process an encoded output of the Tokenizer
.
It adds any special tokens that a language model would require.
Required methods
fn added_tokens(&self, is_pair: bool) -> usize
Returns the number of tokens that will be added during the processing step
fn process(
&self,
encoding: Encoding,
pair_encoding: Option<Encoding>,
add_special_tokens: bool
) -> Result<Encoding>
&self,
encoding: Encoding,
pair_encoding: Option<Encoding>,
add_special_tokens: bool
) -> Result<Encoding>
Process both encodings and returns a new merged one
Methods
impl dyn PostProcessor
[src]
pub fn default_process(
encoding: Encoding,
pair_encoding: Option<Encoding>,
_add_special_tokens: bool
) -> Result<Encoding>
[src]
encoding: Encoding,
pair_encoding: Option<Encoding>,
_add_special_tokens: bool
) -> Result<Encoding>
Implementors
impl PostProcessor for ByteLevel
[src]
As a PostProcessor
, ByteLevel
is in charge of trimming the offsets if necessary.
fn added_tokens(&self, _is_pair: bool) -> usize
[src]
fn process(
&self,
encoding: Encoding,
pair_encoding: Option<Encoding>,
_add_special_tokens: bool
) -> Result<Encoding>
[src]
&self,
encoding: Encoding,
pair_encoding: Option<Encoding>,
_add_special_tokens: bool
) -> Result<Encoding>
impl PostProcessor for BertProcessing
[src]
fn added_tokens(&self, is_pair: bool) -> usize
[src]
fn process(
&self,
encoding: Encoding,
pair_encoding: Option<Encoding>,
add_special_tokens: bool
) -> Result<Encoding>
[src]
&self,
encoding: Encoding,
pair_encoding: Option<Encoding>,
add_special_tokens: bool
) -> Result<Encoding>