1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
//! # Vocabulary IO
//!
//! ## Loading A Vocab
//!
//! ```rust,no_run
//! use std::sync::Arc;
//!
//! use wordchipper::{
//! Tokenizer,
//! TokenizerOptions,
//! UnifiedTokenVocab,
//! pretrained::openai::OA_O200K_BASE_PATTERN,
//! spanners::TextSpanningConfig,
//! vocab::io::load_base64_unified_vocab_path,
//! };
//!
//! fn example() -> wordchipper::WCResult<Arc<Tokenizer<u32>>> {
//! let vocab: Arc<UnifiedTokenVocab<u32>> =
//! load_base64_unified_vocab_path(
//! "vocab.tiktoken",
//! TextSpanningConfig::from_pattern(OA_O200K_BASE_PATTERN),
//! )
//! .expect("failed to load vocab")
//! .into();
//!
//! let tokenizer: Arc<Tokenizer<u32>> =
//! TokenizerOptions::default().with_parallel(true).build(vocab);
//!
//! Ok(tokenizer)
//! }
//! ```
pub use *;
pub use *;