1use serde::{de, Deserialize, Serialize, Serializer};
2
3#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5pub enum AnalyzeResponse {
6 #[serde(rename = "tokens")]
8 Standard(Vec<Token>),
9
10 #[serde(rename = "detail")]
12 Explained(ExplainedResponse),
13}
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
17pub struct Token {
18 pub token: String,
20
21 pub start_offset: u32,
23
24 pub end_offset: u32,
26
27 #[serde(rename = "type")]
29 pub ty: TokenType,
30
31 pub position: u32,
33
34 pub bytes: Option<String>,
36
37 pub keyword: Option<bool>,
39
40 pub position_length: Option<u32>,
42
43 pub term_frequency: Option<u32>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
49pub struct ExplainedResponse {
50 custom_analyzer: bool,
51
52 analyzer: Option<AnalysisObject>,
53
54 #[serde(default, rename = "charfilters")]
55 char_filters: Vec<CharFilter>,
56
57 tokenizer: Option<AnalysisObject>,
58
59 #[serde(default, rename = "tokenfilters")]
60 token_filters: Vec<AnalysisObject>,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
65pub struct AnalysisObject {
66 name: String,
67 tokens: Vec<Token>,
68}
69
70#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct CharFilter {
73 name: String,
74 filtered_text: Vec<String>,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum TokenType {
80 Alphanum,
82
83 Synonym,
85
86 Word,
88
89 Hangul,
91
92 Num,
94
95 Email,
97
98 Apostrophe,
100
101 Double,
103
104 Katakana,
110
111 Acronym,
113
114 Gram,
116
117 Fingerprint,
119
120 Shingle,
122
123 Other(String),
125}
126
127impl Default for TokenType {
128 fn default() -> Self {
129 Self::Alphanum
130 }
131}
132
133impl Serialize for TokenType {
134 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
135 where
136 S: Serializer,
137 {
138 match self {
139 Self::Alphanum => "<ALPHANUM>",
140 Self::Synonym => "SYNONYM",
141 Self::Word => "word",
142 Self::Hangul => "<HANGUL>",
143 Self::Num => "<NUM>",
144 Self::Email => "<EMAIL>",
145 Self::Apostrophe => "<APOSTROPHE>",
146 Self::Double => "<DOUBLE>",
147 Self::Katakana => "<KATAKANA>",
148 Self::Acronym => "<ACRONYM>",
149 Self::Gram => "gram",
150 Self::Fingerprint => "fingerprint",
151 Self::Shingle => "shingle",
152 Self::Other(other) => other,
153 }
154 .serialize(serializer)
155 }
156}
157
158impl<'de> Deserialize<'de> for TokenType {
159 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
160 where
161 D: de::Deserializer<'de>,
162 {
163 Ok(match String::deserialize(deserializer)?.as_str() {
164 "<ALPHANUM>" => Self::Alphanum,
165 "SYNONYM" => Self::Synonym,
166 "word" => Self::Word,
167 "<HANGUL>" => Self::Hangul,
168 "<NUM>" => Self::Num,
169 "<EMAIL>" => Self::Email,
170 "<APOSTROPHE>" => Self::Apostrophe,
171 "<DOUBLE>" => Self::Double,
172 "<KATAKANA>" => Self::Katakana,
173 "<ACRONYM>" => Self::Acronym,
174 "gram" => Self::Gram,
175 "fingerprint" => Self::Fingerprint,
176 "shingle" => Self::Shingle,
177 other => Self::Other(other.to_string()),
178 })
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use serde_json::json;
185
186 use super::*;
187
188 #[test]
189 fn deserialize_standard() {
190 let json_response = json!({
191 "tokens": [
192 {
193 "token": "test1",
194 "start_offset": 0,
195 "end_offset": 6,
196 "type": "<ALPHANUM>",
197 "position": 0
198 },
199 {
200 "token": "test2",
201 "start_offset": 7,
202 "end_offset": 11,
203 "type": "<ALPHANUM>",
204 "position": 1
205 }
206 ]
207 });
208
209 let token_1 = Token {
210 token: "test1".to_string(),
211 start_offset: 0,
212 end_offset: 6,
213 ty: TokenType::Alphanum,
214 position: 0,
215 bytes: None,
216 keyword: None,
217 position_length: None,
218 term_frequency: None,
219 };
220 let token_2 = Token {
221 token: "test2".to_string(),
222 start_offset: 7,
223 end_offset: 11,
224 ty: TokenType::Alphanum,
225 position: 1,
226 bytes: None,
227 keyword: None,
228 position_length: None,
229 term_frequency: None,
230 };
231
232 let expected = AnalyzeResponse::Standard(vec![token_1, token_2]);
233 let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
234
235 assert_eq!(expected, result);
236 }
237
238 #[test]
239 fn deserialize_explained() {
240 let json_response = json!({
241 "detail": {
242 "custom_analyzer": true,
243 "charfilters": [
244 {
245 "name": "html_strip",
246 "filtered_text": [
247 "test"
248 ]
249 }
250 ],
251 "tokenizer": {
252 "name": "lowercase",
253 "tokens": [
254 {
255 "token": "test",
256 "start_offset": 0,
257 "end_offset": 6,
258 "type": "SYNONYM",
259 "position": 0
260 }
261 ]
262 },
263 "tokenfilters": [
264 {
265 "name": "__anonymous__stop",
266 "tokens": [
267 {
268 "token": "test",
269 "start_offset": 0,
270 "end_offset": 6,
271 "type": "SYNONYM",
272 "position": 0
273 }
274 ]
275 }
276 ]
277 }
278
279 });
280
281 let token = Token {
282 token: "test".to_string(),
283 start_offset: 0,
284 end_offset: 6,
285 ty: TokenType::Synonym,
286 position: 0,
287 bytes: None,
288 keyword: None,
289 position_length: None,
290 term_frequency: None,
291 };
292
293 let expected = AnalyzeResponse::Explained(ExplainedResponse {
294 custom_analyzer: true,
295 analyzer: None,
296 char_filters: vec![CharFilter {
297 name: "html_strip".to_string(),
298 filtered_text: vec!["test".to_string()],
299 }],
300 tokenizer: Some(AnalysisObject {
301 name: "lowercase".to_string(),
302 tokens: vec![token.clone()],
303 }),
304 token_filters: vec![AnalysisObject {
305 name: "__anonymous__stop".to_string(),
306 tokens: vec![token],
307 }],
308 });
309
310 let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
311
312 assert_eq!(expected, result);
313 }
314}