pub struct BPE {
pub dropout: Option<f32>,
pub unk_token: Option<String>,
pub continuing_subword_prefix: Option<String>,
pub end_of_word_suffix: Option<String>,
pub fuse_unk: bool,
pub byte_fallback: bool,
pub ignore_merges: bool,
/* private fields */
}Expand description
A Byte Pair Encoding model.
Fields§
§dropout: Option<f32>Dropout probability for merges. 0.0 = no dropout is the default. At 1.0, tokenization will perform no merges, so the result will just be characters.
unk_token: Option<String>The unknown token to be used when we encounter an unknown char
continuing_subword_prefix: Option<String>An optional prefix to use on any subword that exist only behind another one
end_of_word_suffix: Option<String>An optional suffix to caracterize and end-of-word subword
fuse_unk: boolDo multiple unk tokens get fused
byte_fallback: boolByte fallback from sentence pieces, instead of UNK, uses "<0x00>"
for each byte in the unk token
ignore_merges: boolWhether or not to direct output words if they are part of the vocab.
Implementations§
Source§impl BPE
impl BPE
Sourcepub fn builder() -> BpeBuilder
pub fn builder() -> BpeBuilder
Initialize a BpeBuilder.
Sourcepub fn new(vocab: Vocab, merges: Merges) -> Self
pub fn new(vocab: Vocab, merges: Merges) -> Self
Create a new BPE model with the given vocab and merges.
Sourcepub fn from_file(vocab: &str, merges: &str) -> BpeBuilder
pub fn from_file(vocab: &str, merges: &str) -> BpeBuilder
Initialize a BpeBuilder model from vocab and merges files
Sourcepub fn read_file(vocab: &str, merges: &str) -> Result<(Vocab, Merges)>
pub fn read_file(vocab: &str, merges: &str) -> Result<(Vocab, Merges)>
Read the given files to extract the vocab and merges
Sourcepub fn clear_cache(&self)
pub fn clear_cache(&self)
Reset the cache.
Sourcepub fn resize_cache(&mut self, capacity: usize)
pub fn resize_cache(&mut self, capacity: usize)
Resize the cache
pub fn get_vocab(&self) -> Vocab
pub fn get_unk_token(&self) -> &Option<String>
pub fn get_continuing_subword_prefix(&self) -> &Option<String>
Trait Implementations§
Source§impl<'de> Deserialize<'de> for BPE
impl<'de> Deserialize<'de> for BPE
Source§fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>where
D: Deserializer<'de>,
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>where
D: Deserializer<'de>,
Source§impl From<BPE> for ModelWrapper
impl From<BPE> for ModelWrapper
Source§impl Model for BPE
impl Model for BPE
type Trainer = BpeTrainer
Source§fn get_vocab(&self) -> HashMap<String, u32>
fn get_vocab(&self) -> HashMap<String, u32>
Source§fn get_vocab_size(&self) -> usize
fn get_vocab_size(&self) -> usize
Source§fn tokenize(&self, sequence: &str) -> Result<Vec<Token>>
fn tokenize(&self, sequence: &str) -> Result<Vec<Token>>
Token. The offsets on the Token
are expected to be relative to the given sequence.Source§fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>>
fn save(&self, folder: &Path, name: Option<&str>) -> Result<Vec<PathBuf>>
Model in the given folder, using the given prefix for the various
files that need to be saved.Source§fn get_trainer(&self) -> BpeTrainer
fn get_trainer(&self) -> BpeTrainer
impl StructuralPartialEq for BPE
Auto Trait Implementations§
impl !Freeze for BPE
impl RefUnwindSafe for BPE
impl Send for BPE
impl Sync for BPE
impl Unpin for BPE
impl UnwindSafe for BPE
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more