elasticsearch_dsl/analyze/
response.rs1use serde::{de, Deserialize, Serialize, Serializer};
2
3#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5pub enum AnalyzeResponse {
6 #[serde(rename = "tokens")]
8 Standard(Vec<Token>),
9
10 #[serde(rename = "detail")]
12 Explained(ExplainedResponse),
13}
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
17pub struct Token {
18 pub token: String,
20
21 pub start_offset: u32,
23
24 pub end_offset: u32,
26
27 #[serde(rename = "type")]
29 pub ty: TokenType,
30
31 pub position: u32,
33
34 pub bytes: Option<String>,
36
37 pub keyword: Option<bool>,
39
40 pub position_length: Option<u32>,
42
43 pub term_frequency: Option<u32>,
45}
46
47#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
49pub struct ExplainedResponse {
50 custom_analyzer: bool,
51
52 analyzer: Option<AnalysisObject>,
53
54 #[serde(default, rename = "charfilters")]
55 char_filters: Vec<CharFilter>,
56
57 tokenizer: Option<AnalysisObject>,
58
59 #[serde(default, rename = "tokenfilters")]
60 token_filters: Vec<AnalysisObject>,
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
65pub struct AnalysisObject {
66 name: String,
67 tokens: Vec<Token>,
68}
69
70#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct CharFilter {
73 name: String,
74 filtered_text: Vec<String>,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum TokenType {
80 Alphanum,
82
83 Synonym,
85
86 Word,
88
89 Hangul,
91
92 Num,
94
95 Email,
97
98 Apostrophe,
100
101 Double,
103
104 Katakana,
109
110 Acronym,
112
113 Gram,
115
116 Fingerprint,
118
119 Shingle,
121
122 Other(String),
124}
125
126impl Default for TokenType {
127 fn default() -> Self {
128 Self::Alphanum
129 }
130}
131
132impl Serialize for TokenType {
133 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
134 where
135 S: Serializer,
136 {
137 match self {
138 Self::Alphanum => "<ALPHANUM>",
139 Self::Synonym => "SYNONYM",
140 Self::Word => "word",
141 Self::Hangul => "<HANGUL>",
142 Self::Num => "<NUM>",
143 Self::Email => "<EMAIL>",
144 Self::Apostrophe => "<APOSTROPHE>",
145 Self::Double => "<DOUBLE>",
146 Self::Katakana => "<KATAKANA>",
147 Self::Acronym => "<ACRONYM>",
148 Self::Gram => "gram",
149 Self::Fingerprint => "fingerprint",
150 Self::Shingle => "shingle",
151 Self::Other(other) => other,
152 }
153 .serialize(serializer)
154 }
155}
156
157impl<'de> Deserialize<'de> for TokenType {
158 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
159 where
160 D: de::Deserializer<'de>,
161 {
162 Ok(match String::deserialize(deserializer)?.as_str() {
163 "<ALPHANUM>" => Self::Alphanum,
164 "SYNONYM" => Self::Synonym,
165 "word" => Self::Word,
166 "<HANGUL>" => Self::Hangul,
167 "<NUM>" => Self::Num,
168 "<EMAIL>" => Self::Email,
169 "<APOSTROPHE>" => Self::Apostrophe,
170 "<DOUBLE>" => Self::Double,
171 "<KATAKANA>" => Self::Katakana,
172 "<ACRONYM>" => Self::Acronym,
173 "gram" => Self::Gram,
174 "fingerprint" => Self::Fingerprint,
175 "shingle" => Self::Shingle,
176 other => Self::Other(other.to_string()),
177 })
178 }
179}
180
181#[cfg(test)]
182mod tests {
183 use serde_json::json;
184
185 use super::*;
186
187 #[test]
188 fn deserialize_standard() {
189 let json_response = json!({
190 "tokens": [
191 {
192 "token": "test1",
193 "start_offset": 0,
194 "end_offset": 6,
195 "type": "<ALPHANUM>",
196 "position": 0
197 },
198 {
199 "token": "test2",
200 "start_offset": 7,
201 "end_offset": 11,
202 "type": "<ALPHANUM>",
203 "position": 1
204 }
205 ]
206 });
207
208 let token_1 = Token {
209 token: "test1".to_string(),
210 start_offset: 0,
211 end_offset: 6,
212 ty: TokenType::Alphanum,
213 position: 0,
214 bytes: None,
215 keyword: None,
216 position_length: None,
217 term_frequency: None,
218 };
219 let token_2 = Token {
220 token: "test2".to_string(),
221 start_offset: 7,
222 end_offset: 11,
223 ty: TokenType::Alphanum,
224 position: 1,
225 bytes: None,
226 keyword: None,
227 position_length: None,
228 term_frequency: None,
229 };
230
231 let expected = AnalyzeResponse::Standard(vec![token_1, token_2]);
232 let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
233
234 assert_eq!(expected, result);
235 }
236
237 #[test]
238 fn deserialize_explained() {
239 let json_response = json!({
240 "detail": {
241 "custom_analyzer": true,
242 "charfilters": [
243 {
244 "name": "html_strip",
245 "filtered_text": [
246 "test"
247 ]
248 }
249 ],
250 "tokenizer": {
251 "name": "lowercase",
252 "tokens": [
253 {
254 "token": "test",
255 "start_offset": 0,
256 "end_offset": 6,
257 "type": "SYNONYM",
258 "position": 0
259 }
260 ]
261 },
262 "tokenfilters": [
263 {
264 "name": "__anonymous__stop",
265 "tokens": [
266 {
267 "token": "test",
268 "start_offset": 0,
269 "end_offset": 6,
270 "type": "SYNONYM",
271 "position": 0
272 }
273 ]
274 }
275 ]
276 }
277 });
278
279 let token = Token {
280 token: "test".to_string(),
281 start_offset: 0,
282 end_offset: 6,
283 ty: TokenType::Synonym,
284 position: 0,
285 bytes: None,
286 keyword: None,
287 position_length: None,
288 term_frequency: None,
289 };
290
291 let expected = AnalyzeResponse::Explained(ExplainedResponse {
292 custom_analyzer: true,
293 analyzer: None,
294 char_filters: vec![CharFilter {
295 name: "html_strip".to_string(),
296 filtered_text: vec!["test".to_string()],
297 }],
298 tokenizer: Some(AnalysisObject {
299 name: "lowercase".to_string(),
300 tokens: vec![token.clone()],
301 }),
302 token_filters: vec![AnalysisObject {
303 name: "__anonymous__stop".to_string(),
304 tokens: vec![token],
305 }],
306 });
307
308 let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
309
310 assert_eq!(expected, result);
311 }
312}