pub struct Qwen2BpeTokenizer { /* private fields */ }Expand description
Qwen2-specific BPE tokenizer with chat template support.
Extends the base BPE tokenizer with Qwen2’s special tokens and chat formatting conventions.
§Example
use aprender::text::bpe::Qwen2BpeTokenizer;
let tokenizer = Qwen2BpeTokenizer::new();
// Check special tokens
assert!(tokenizer.is_eos(151645)); // <|im_end|>
// Format a chat message
let formatted = tokenizer.format_chat("user", "Hello, world!");
assert!(formatted.contains("<|im_start|>user"));Implementations§
Source§impl Qwen2BpeTokenizer
impl Qwen2BpeTokenizer
Sourcepub const IM_START_ID: u32 = 151644
pub const IM_START_ID: u32 = 151644
Special token: <|im_start|>
Sourcepub const ENDOFTEXT_ID: u32 = 151643
pub const ENDOFTEXT_ID: u32 = 151643
Special token: <|endoftext|>
Sourcepub fn vocab_size(&self) -> usize
pub fn vocab_size(&self) -> usize
Get vocabulary size.
Sourcepub fn format_chat(&self, role: &str, content: &str) -> String
pub fn format_chat(&self, role: &str, content: &str) -> String
Format a chat message with Qwen2 template.
Format: <|im_start|>role\nmessage<|im_end|>\n
Sourcepub fn format_conversation(&self, messages: &[(&str, &str)]) -> String
pub fn format_conversation(&self, messages: &[(&str, &str)]) -> String
Sourcepub fn im_start_id(&self) -> u32
pub fn im_start_id(&self) -> u32
Get the im_start token ID.
Trait Implementations§
Source§impl Clone for Qwen2BpeTokenizer
impl Clone for Qwen2BpeTokenizer
Source§fn clone(&self) -> Qwen2BpeTokenizer
fn clone(&self) -> Qwen2BpeTokenizer
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for Qwen2BpeTokenizer
impl Debug for Qwen2BpeTokenizer
Auto Trait Implementations§
impl Freeze for Qwen2BpeTokenizer
impl RefUnwindSafe for Qwen2BpeTokenizer
impl Send for Qwen2BpeTokenizer
impl Sync for Qwen2BpeTokenizer
impl Unpin for Qwen2BpeTokenizer
impl UnsafeUnpin for Qwen2BpeTokenizer
impl UnwindSafe for Qwen2BpeTokenizer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more