tiktoken_rs/singleton.rs
1use lazy_static::lazy_static;
2
3use crate::vendor_tiktoken::CoreBPE;
4
5use crate::{cl100k_base, o200k_base, o200k_harmony, p50k_base, p50k_edit, r50k_base};
6
7/// Returns a singleton instance of the r50k_base tokenizer. (also known as `gpt2`)
8/// Use for GPT-3 models like `davinci`
9///
10/// This function will only initialize the tokenizer once, and then return a reference the tokenizer
11pub fn r50k_base_singleton() -> &'static CoreBPE {
12 lazy_static! {
13 static ref R50K_BASE: CoreBPE = r50k_base().unwrap();
14 }
15 &R50K_BASE
16}
17
18/// Returns a singleton instance of the p50k_base tokenizer.
19/// Use for Code models, `text-davinci-002`, `text-davinci-003`
20///
21/// This function will only initialize the tokenizer once, and then return a reference the tokenizer.
22pub fn p50k_base_singleton() -> &'static CoreBPE {
23 lazy_static! {
24 static ref P50K_BASE: CoreBPE = p50k_base().unwrap();
25 }
26 &P50K_BASE
27}
28
29/// Returns a singleton instance of the p50k_edit tokenizer.
30/// Use for edit models like `text-davinci-edit-001`, `code-davinci-edit-001`
31///
32/// This function will only initialize the tokenizer once, and then return a reference the tokenizer.
33pub fn p50k_edit_singleton() -> &'static CoreBPE {
34 lazy_static! {
35 static ref P50K_EDIT: CoreBPE = p50k_edit().unwrap();
36 }
37 &P50K_EDIT
38}
39
40/// Returns a singleton instance of the cl100k_base tokenizer.
41/// Use for ChatGPT models, `text-embedding-ada-002`
42///
43/// This function will only initialize the tokenizer once, and then return a reference the tokenizer
44pub fn cl100k_base_singleton() -> &'static CoreBPE {
45 lazy_static! {
46 static ref CL100K_BASE: CoreBPE = cl100k_base().unwrap();
47 }
48 &CL100K_BASE
49}
50
51/// Returns a singleton instance of the o200k_base tokenizer.
52/// Use for GPT-5, GPT-4.1, GPT-4o, and other `o` series models like `o1`, `o3`, and `o4`.
53///
54/// This function will only initialize the tokenizer once, and then return a reference the tokenizer
55pub fn o200k_base_singleton() -> &'static CoreBPE {
56 lazy_static! {
57 static ref O200K_BASE: CoreBPE = o200k_base().unwrap();
58 }
59 &O200K_BASE
60}
61
62/// Returns a singleton instance of the o200k_harmony tokenizer.
63/// Use for gpt-oss models like `gpt-oss-20b`, `gpt-oss-120b`.
64///
65/// This function will only initialize the tokenizer once, and then return a reference the tokenizer
66pub fn o200k_harmony_singleton() -> &'static CoreBPE {
67 lazy_static! {
68 static ref O200K_HARMONY: CoreBPE = o200k_harmony().unwrap();
69 }
70 &O200K_HARMONY
71}