[−][src]Struct syntaxdot_tokenizers::XlmRobertaTokenizer
Tokenizer for Roberta models.
Roberta uses the sentencepiece tokenizer. However, we cannot use it in the intended way: we would have to detokenize sentences and it is not guaranteed that each token has a unique piece, which is required in sequence labeling. So instead, we use the tokenizer as a subword tokenizer.
Implementations
impl XlmRobertaTokenizer[src]
pub fn new(spp: SentencePieceProcessor) -> Self[src]
pub fn open<P>(model: P) -> Result<Self, TokenizerError> where
P: AsRef<Path>, [src]
P: AsRef<Path>,
Trait Implementations
impl From<SentencePieceProcessor> for XlmRobertaTokenizer[src]
pub fn from(spp: SentencePieceProcessor) -> Self[src]
impl Tokenize for XlmRobertaTokenizer[src]
pub fn tokenize(&self, sentence: Sentence) -> SentenceWithPieces[src]
Auto Trait Implementations
impl RefUnwindSafe for XlmRobertaTokenizer
impl Send for XlmRobertaTokenizer
impl Sync for XlmRobertaTokenizer
impl Unpin for XlmRobertaTokenizer
impl UnwindSafe for XlmRobertaTokenizer
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized, [src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized, [src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized, [src]
T: ?Sized,
pub fn borrow_mut(&mut self) -> &mut T[src]
impl<T> From<T> for T[src]
impl<T, U> Into<U> for T where
U: From<T>, [src]
U: From<T>,
impl<T, U> TryFrom<U> for T where
U: Into<T>, [src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
pub fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>, [src]
U: TryFrom<T>,