1use serde::{de, Deserialize, Serialize, Serializer};
2
3#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5pub enum AnalyzeResponse {
6 #[serde(rename = "tokens")]
8 Standard(Vec<Token>),
9
10 #[serde(rename = "detail")]
12 Explained(ExplainedResponse),
13}
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
17pub struct Token {
18 pub token: String,
20
21 pub start_offset: u32,
23
24 pub end_offset: u32,
26
27 #[serde(rename = "type")]
29 pub ty: TokenType,
30
31 pub position: u32,
33
34 pub bytes: Option<String>,
36
37 pub keyword: Option<bool>,
39
40 pub position_length: Option<u32>,
42
43 pub term_frequency: Option<u32>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
49pub struct ExplainedResponse {
50 custom_analyzer: bool,
51
52 analyzer: Option<AnalysisObject>,
53
54 #[serde(default, rename = "charfilters")]
55 char_filters: Vec<CharFilter>,
56
57 tokenizer: Option<AnalysisObject>,
58
59 #[serde(default, rename = "tokenfilters")]
60 token_filters: Vec<AnalysisObject>,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
65pub struct AnalysisObject {
66 name: String,
67 tokens: Vec<Token>,
68}
69
70#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct CharFilter {
73 name: String,
74 filtered_text: Vec<String>,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq, Default)]
79pub enum TokenType {
80 #[default]
82 Alphanum,
83
84 Synonym,
86
87 Word,
89
90 Hangul,
92
93 Num,
95
96 Email,
98
99 Apostrophe,
101
102 Double,
104
105 Katakana,
110
111 Acronym,
113
114 Gram,
116
117 Fingerprint,
119
120 Shingle,
122
123 Other(String),
125}
126
127impl Serialize for TokenType {
128 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
129 where
130 S: Serializer,
131 {
132 match self {
133 Self::Alphanum => "<ALPHANUM>",
134 Self::Synonym => "SYNONYM",
135 Self::Word => "word",
136 Self::Hangul => "<HANGUL>",
137 Self::Num => "<NUM>",
138 Self::Email => "<EMAIL>",
139 Self::Apostrophe => "<APOSTROPHE>",
140 Self::Double => "<DOUBLE>",
141 Self::Katakana => "<KATAKANA>",
142 Self::Acronym => "<ACRONYM>",
143 Self::Gram => "gram",
144 Self::Fingerprint => "fingerprint",
145 Self::Shingle => "shingle",
146 Self::Other(other) => other,
147 }
148 .serialize(serializer)
149 }
150}
151
152impl<'de> Deserialize<'de> for TokenType {
153 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
154 where
155 D: de::Deserializer<'de>,
156 {
157 Ok(match String::deserialize(deserializer)?.as_str() {
158 "<ALPHANUM>" => Self::Alphanum,
159 "SYNONYM" => Self::Synonym,
160 "word" => Self::Word,
161 "<HANGUL>" => Self::Hangul,
162 "<NUM>" => Self::Num,
163 "<EMAIL>" => Self::Email,
164 "<APOSTROPHE>" => Self::Apostrophe,
165 "<DOUBLE>" => Self::Double,
166 "<KATAKANA>" => Self::Katakana,
167 "<ACRONYM>" => Self::Acronym,
168 "gram" => Self::Gram,
169 "fingerprint" => Self::Fingerprint,
170 "shingle" => Self::Shingle,
171 other => Self::Other(other.to_string()),
172 })
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use serde_json::json;
179
180 use super::*;
181
182 #[test]
183 fn deserialize_standard() {
184 let json_response = json!({
185 "tokens": [
186 {
187 "token": "test1",
188 "start_offset": 0,
189 "end_offset": 6,
190 "type": "<ALPHANUM>",
191 "position": 0
192 },
193 {
194 "token": "test2",
195 "start_offset": 7,
196 "end_offset": 11,
197 "type": "<ALPHANUM>",
198 "position": 1
199 }
200 ]
201 });
202
203 let token_1 = Token {
204 token: "test1".to_string(),
205 start_offset: 0,
206 end_offset: 6,
207 ty: TokenType::Alphanum,
208 position: 0,
209 bytes: None,
210 keyword: None,
211 position_length: None,
212 term_frequency: None,
213 };
214 let token_2 = Token {
215 token: "test2".to_string(),
216 start_offset: 7,
217 end_offset: 11,
218 ty: TokenType::Alphanum,
219 position: 1,
220 bytes: None,
221 keyword: None,
222 position_length: None,
223 term_frequency: None,
224 };
225
226 let expected = AnalyzeResponse::Standard(vec![token_1, token_2]);
227 let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
228
229 assert_eq!(expected, result);
230 }
231
232 #[test]
233 fn deserialize_explained() {
234 let json_response = json!({
235 "detail": {
236 "custom_analyzer": true,
237 "charfilters": [
238 {
239 "name": "html_strip",
240 "filtered_text": [
241 "test"
242 ]
243 }
244 ],
245 "tokenizer": {
246 "name": "lowercase",
247 "tokens": [
248 {
249 "token": "test",
250 "start_offset": 0,
251 "end_offset": 6,
252 "type": "SYNONYM",
253 "position": 0
254 }
255 ]
256 },
257 "tokenfilters": [
258 {
259 "name": "__anonymous__stop",
260 "tokens": [
261 {
262 "token": "test",
263 "start_offset": 0,
264 "end_offset": 6,
265 "type": "SYNONYM",
266 "position": 0
267 }
268 ]
269 }
270 ]
271 }
272 });
273
274 let token = Token {
275 token: "test".to_string(),
276 start_offset: 0,
277 end_offset: 6,
278 ty: TokenType::Synonym,
279 position: 0,
280 bytes: None,
281 keyword: None,
282 position_length: None,
283 term_frequency: None,
284 };
285
286 let expected = AnalyzeResponse::Explained(ExplainedResponse {
287 custom_analyzer: true,
288 analyzer: None,
289 char_filters: vec![CharFilter {
290 name: "html_strip".to_string(),
291 filtered_text: vec!["test".to_string()],
292 }],
293 tokenizer: Some(AnalysisObject {
294 name: "lowercase".to_string(),
295 tokens: vec![token.clone()],
296 }),
297 token_filters: vec![AnalysisObject {
298 name: "__anonymous__stop".to_string(),
299 tokens: vec![token],
300 }],
301 });
302
303 let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
304
305 assert_eq!(expected, result);
306 }
307}