pub struct CodeTokenizer { /* private fields */ }Expand description
Code tokenizer implementation
Implementations§
Source§impl CodeTokenizer
impl CodeTokenizer
Sourcepub fn new(config: CodeTokenizerConfig) -> Self
pub fn new(config: CodeTokenizerConfig) -> Self
Create a new code tokenizer
Sourcepub fn for_language(language: Language) -> Self
pub fn for_language(language: Language) -> Self
Create tokenizer for a specific language
Sourcepub fn tokenize_code(&self, code: &str) -> Result<Vec<CodeToken>>
pub fn tokenize_code(&self, code: &str) -> Result<Vec<CodeToken>>
Tokenize code into structured tokens
Sourcepub fn vocab_size(&self) -> usize
pub fn vocab_size(&self) -> usize
Get vocabulary size
Sourcepub fn token_to_id(&self, token: &str) -> Option<u32>
pub fn token_to_id(&self, token: &str) -> Option<u32>
Get token ID
Sourcepub fn id_to_token(&self, id: u32) -> Option<String>
pub fn id_to_token(&self, id: u32) -> Option<String>
Get token from ID
Trait Implementations§
Source§impl Tokenizer for CodeTokenizer
impl Tokenizer for CodeTokenizer
Source§fn encode(&self, text: &str) -> Result<TokenizedInput>
fn encode(&self, text: &str) -> Result<TokenizedInput>
Encodes a single text string into tokens. Read more
Source§fn encode_pair(&self, text_a: &str, text_b: &str) -> Result<TokenizedInput>
fn encode_pair(&self, text_a: &str, text_b: &str) -> Result<TokenizedInput>
Encodes a pair of texts for sequence-pair tasks. Read more
Source§fn vocab_size(&self) -> usize
fn vocab_size(&self) -> usize
Returns the size of the tokenizer’s vocabulary. Read more
Source§fn get_vocab(&self) -> HashMap<String, u32>
fn get_vocab(&self) -> HashMap<String, u32>
Returns a copy of the vocabulary as a mapping from tokens to IDs. Read more
Auto Trait Implementations§
impl Freeze for CodeTokenizer
impl RefUnwindSafe for CodeTokenizer
impl Send for CodeTokenizer
impl Sync for CodeTokenizer
impl Unpin for CodeTokenizer
impl UnsafeUnpin for CodeTokenizer
impl UnwindSafe for CodeTokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more