Trait tokenizers::tokenizer::PostProcessor

source ·

pub trait PostProcessor {
    // Required methods
    fn added_tokens(&self, is_pair: bool) -> usize;
    fn process_encodings(
        &self,
        encodings: Vec<Encoding>,
        add_special_tokens: bool
    ) -> Result<Vec<Encoding>>;

    // Provided method
    fn process(
        &self,
        encoding: Encoding,
        pair_encoding: Option<Encoding>,
        add_special_tokens: bool
    ) -> Result<Encoding> { ... }
}

Expand description

A PostProcessor has the responsibility to post process an encoded output of the Tokenizer. It adds any special tokens that a language model would require.

Required Methods§

source

fn added_tokens(&self, is_pair: bool) -> usize

Returns the number of tokens that will be added during the processing step

source

fn process_encodings( &self, encodings: Vec<Encoding>, add_special_tokens: bool ) -> Result<Vec<Encoding>>

Process any amount of encodings and returns a series of encoding (might merge them)

Provided Methods§

source

fn process( &self, encoding: Encoding, pair_encoding: Option<Encoding>, add_special_tokens: bool ) -> Result<Encoding>

Process both encodings and returns a new merged one

Implementations§

source §

impl dyn PostProcessor

source

pub fn default_process( encodings: Vec<Encoding>, _add_special_tokens: bool ) -> Result<Vec<Encoding>>

Implementors§

source §

impl PostProcessor for PostProcessorWrapper

source §

impl PostProcessor for ByteLevel

As a PostProcessor, ByteLevel is in charge of trimming the offsets if necessary.

source §

Trait tokenizers::tokenizer::PostProcessorCopy item path

Required Methods§

fn added_tokens(&self, is_pair: bool) -> usize

fn process_encodings( &self, encodings: Vec<Encoding>, add_special_tokens: bool ) -> Result<Vec<Encoding>>

Provided Methods§

fn process( &self, encoding: Encoding, pair_encoding: Option<Encoding>, add_special_tokens: bool ) -> Result<Encoding>

Implementations§

impl dyn PostProcessor

pub fn default_process( encodings: Vec<Encoding>, _add_special_tokens: bool ) -> Result<Vec<Encoding>>

Implementors§

impl PostProcessor for PostProcessorWrapper

impl PostProcessor for ByteLevel

impl PostProcessor for BertProcessing

impl PostProcessor for RobertaProcessing

impl PostProcessor for Sequence

impl PostProcessor for TemplateProcessing

Trait tokenizers::tokenizer::PostProcessor