pub struct WordPieceTokenizer { /* private fields */ }Expand description
A subword tokenizer that runs WordPiece tokenization algorithm.
§Example
use bert_tokenizer::{Tokenizer, Vocab, WordPieceTokenizer};
let mut vocab = Vocab::new();
vocab.insert("hello".to_string(), 0);
vocab.insert("world".to_string(), 1);
vocab.insert("!".to_string(), 2);
vocab.insert("##!".to_string(), 3);
vocab.insert("##world".to_string(), 4);
vocab.insert("##hello".to_string(), 5);
let tokenizer = WordPieceTokenizer::new(vocab).build();
let tokens = tokenizer.tokenize("hello world!");
assert_eq!(tokens, vec!["hello", "world", "##!"]);Implementations§
Source§impl WordPieceTokenizer
impl WordPieceTokenizer
pub fn new(vocab: Vocab) -> WordPieceTokenizerBuilder
Trait Implementations§
Auto Trait Implementations§
impl Freeze for WordPieceTokenizer
impl RefUnwindSafe for WordPieceTokenizer
impl Send for WordPieceTokenizer
impl Sync for WordPieceTokenizer
impl Unpin for WordPieceTokenizer
impl UnwindSafe for WordPieceTokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more