pub struct SentencePieceProcessor { /* private fields */ }
Expand description

Sentence piece tokenizer.

Instances of SentencePieceProcessor can be used to tokenizer a sentence using a sentencepiece model.

Implementations§

source§

impl SentencePieceProcessor

source

pub fn from_serialized_proto(data: &[u8]) -> Result<Self, SentencePieceError>

source

pub fn to_serialized_proto(&self) -> Vec<u8>

Serialize the model to protobuf.

source

pub fn open(path: impl AsRef<Path>) -> Result<Self, SentencePieceError>

Open a sentencepiece model.

source

pub fn bos_id(&self) -> Option<u32>

source

pub fn decode_piece_ids( &self, pieces: &[u32] ) -> Result<String, SentencePieceError>

Decode a sentence from piece identifiers.

source

pub fn decode_pieces( &self, pieces: &[impl AsRef<str>] ) -> Result<String, SentencePieceError>

source

pub fn encode( &self, sentence: &str ) -> Result<Vec<PieceWithId>, SentencePieceError>

Encode a sentence as sentence pieces and their identifiers.

source

pub fn eos_id(&self) -> Option<u32>

source

pub fn is_empty(&self) -> bool

source

pub fn len(&self) -> usize

source

pub fn pad_id(&self) -> Option<u32>

source

pub fn piece_to_id(&self, piece: &str) -> Result<Option<u32>, NulError>

Get the identifier of a sentence piece.

source

pub fn sample_encode( &self, sentence: &str, n_best: usize, alpha: f32 ) -> Result<Vec<PieceWithId>, SentencePieceError>

Encode a sentence using sampling (subword regularization).

Sample for the n_best segmentations, where alpha controls the smoothness of the distribution.

This method panics when n_best > 512 or when alpha is not a (normal) positive floating point number.

source

pub fn unk_id(&self) -> u32

Trait Implementations§

source§

impl Debug for SentencePieceProcessor

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Drop for SentencePieceProcessor

source§

fn drop(&mut self)

Executes the destructor for this type. Read more
source§

impl Send for SentencePieceProcessor

source§

impl Sync for SentencePieceProcessor

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.