[−][src]Struct syntaxdot_tokenizers::BertTokenizer
BERT word piece tokenizer.
This tokenizer splits CoNLL-X tokens into word pieces. For example, a sentence such as:
Veruntreute die AWO Spendengeld ?
Could be split (depending on the vocabulary) into the following word pieces:
Ver ##unt ##reute die A ##W ##O Spenden ##geld [UNK]
Then vocabulary index of each such piece is returned.
The unknown token (here [UNK]) can be specified while
constructing a tokenizer.
Implementations
impl BertTokenizer[src]
pub fn new(word_pieces: WordPieces, unknown_piece: impl Into<String>) -> Self[src]
Construct a tokenizer from wordpieces and the unknown piece.
pub fn open<P>(
model_path: P,
unknown_piece: impl Into<String>
) -> Result<Self, TokenizerError> where
P: AsRef<Path>, [src]
model_path: P,
unknown_piece: impl Into<String>
) -> Result<Self, TokenizerError> where
P: AsRef<Path>,
pub fn read<R>(
buf_read: R,
unknown_piece: impl Into<String>
) -> Result<BertTokenizer, TokenizerError> where
R: BufRead, [src]
buf_read: R,
unknown_piece: impl Into<String>
) -> Result<BertTokenizer, TokenizerError> where
R: BufRead,
Trait Implementations
impl Tokenize for BertTokenizer[src]
pub fn tokenize(&self, sentence: Sentence) -> SentenceWithPieces[src]
Auto Trait Implementations
impl RefUnwindSafe for BertTokenizer
impl Send for BertTokenizer
impl Sync for BertTokenizer
impl Unpin for BertTokenizer
impl UnwindSafe for BertTokenizer
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized, [src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized, [src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized, [src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T[src]
impl<T> From<T> for T[src]
impl<T, U> Into<U> for T where
U: From<T>, [src]
U: From<T>,
impl<T, U> TryFrom<U> for T where
U: Into<T>, [src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>, [src]
U: TryFrom<T>,