oxibonsai_tokenizer/
error.rs1use thiserror::Error;
4
5#[derive(Debug, Error, Clone, PartialEq, Eq)]
11#[non_exhaustive]
12pub enum TokenizerError {
13 #[error("unknown token: {0:?}")]
15 UnknownToken(String),
16
17 #[error("invalid vocabulary: {0}")]
19 InvalidVocab(String),
20
21 #[error("encode failed: {0}")]
23 EncodeFailed(String),
24
25 #[error("decode failed: {0}")]
27 DecodeFailed(String),
28
29 #[error("invalid JSON: {0}")]
31 InvalidJson(String),
32
33 #[error("HF tokenizer format error: {0}")]
38 HfFormat(String),
39
40 #[error("incomplete UTF-8 sequence at end of stream")]
46 IncompleteUtf8,
47
48 #[error("template render failed: {0}")]
50 TemplateRender(String),
51
52 #[error("I/O error: {0}")]
58 Io(String),
59}
60
61impl From<std::io::Error> for TokenizerError {
62 fn from(err: std::io::Error) -> Self {
63 Self::Io(err.to_string())
64 }
65}
66
67pub type TokenizerResult<T> = Result<T, TokenizerError>;
69
70#[cfg(test)]
71mod tests {
72 use super::*;
73
74 #[test]
75 fn display_unknown_token() {
76 let e = TokenizerError::UnknownToken("foo".to_owned());
77 let s = format!("{e}");
78 assert!(s.contains("foo"));
79 }
80
81 #[test]
82 fn display_hf_format() {
83 let e = TokenizerError::HfFormat("bad merges".to_owned());
84 let s = format!("{e}");
85 assert!(s.contains("bad merges"));
86 assert!(s.contains("HF"));
87 }
88
89 #[test]
90 fn display_incomplete_utf8() {
91 let e = TokenizerError::IncompleteUtf8;
92 let s = format!("{e}");
93 assert!(s.to_ascii_lowercase().contains("utf-8"));
94 }
95
96 #[test]
97 fn display_template_render() {
98 let e = TokenizerError::TemplateRender("no such var".to_owned());
99 let s = format!("{e}");
100 assert!(s.contains("no such var"));
101 }
102
103 #[test]
104 fn io_error_conversion_preserves_message() {
105 let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "missing");
106 let tok_err: TokenizerError = io_err.into();
107 match tok_err {
108 TokenizerError::Io(msg) => assert!(msg.contains("missing")),
109 other => panic!("expected Io variant, got {other:?}"),
110 }
111 }
112
113 #[test]
114 fn tokenizer_error_is_clone() {
115 let e = TokenizerError::InvalidVocab("oops".to_owned());
116 let c = e.clone();
117 assert_eq!(e, c);
118 }
119
120 #[test]
121 fn tokenizer_error_equality() {
122 let a = TokenizerError::EncodeFailed("x".to_owned());
123 let b = TokenizerError::EncodeFailed("x".to_owned());
124 let c = TokenizerError::EncodeFailed("y".to_owned());
125 assert_eq!(a, b);
126 assert_ne!(a, c);
127 }
128}