1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// auto.rs
//
// Copyright (c) 2023-2024 Junpei Kawamoto
//
// This software is released under the MIT License.
//
// http://opensource.org/licenses/mit-license.php
//! This module provides a tokenizer that automatically determines the appropriate tokenizer.
//!
//! It is inspired by the [`Auto Classes`](https://huggingface.co/docs/transformers/model_doc/auto)
//! feature provided by
//! [Hugging Face's Transformers](https://huggingface.co/docs/transformers/),
//! which simplifies the selection process by automatically choosing the correct tokenizer for a
//! given model.
//!
//! ## Example
//! This tokenizer is particularly useful when working with different types of models where
//! it is not feasible to manually specify the tokenizer each time. It is ideal for scenarios
//! where ease of use and flexibility are more critical than the absolute optimal performance.
//!
//! ```no_run
//! # use anyhow::Result;
//! use ct2rs::tokenizers::auto::Tokenizer as AutoTokenizer;
//! use ct2rs::Tokenizer;
//!
//! # fn main() -> Result<()> {
//! let model_path = "path/to/your/model";
//! let auto_tokenizer = AutoTokenizer::new(model_path)?;
//! let tokenized_output = auto_tokenizer.encode("Example text to tokenize.")?;
//! println!("Tokenized output: {:?}", tokenized_output);
//! # Ok(())
//! # }
//! ```
//!
//! ## Note
//! If you are integrating this module into performance-sensitive applications, it is recommended
//! to evaluate the overhead introduced by dynamic dispatch and consider using a direct tokenizer
//! approach where possible.
use Path;
use sentencepiece;
use ;
use ;
/// A tokenizer that automatically determines the appropriate tokenizer.