elasticsearch_dsl/analyze/
response.rs

1use serde::{de, Deserialize, Serialize, Serializer};
2
3/// Elasticsearch analyze API response
4#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5pub enum AnalyzeResponse {
6    /// Standard response, when `explain` value is `false`
7    #[serde(rename = "tokens")]
8    Standard(Vec<Token>),
9
10    /// Explained response, when `explain` value is `true`
11    #[serde(rename = "detail")]
12    Explained(ExplainedResponse),
13}
14
15/// Extracted token from text using tokenizer
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
17pub struct Token {
18    /// The characters of the current token
19    pub token: String,
20
21    /// The start offset of the current token
22    pub start_offset: u32,
23
24    /// The end offset of the current token
25    pub end_offset: u32,
26
27    /// The type of the current token
28    #[serde(rename = "type")]
29    pub ty: TokenType,
30
31    /// The position of the current token
32    pub position: u32,
33
34    /// Token in bytes
35    pub bytes: Option<String>,
36
37    /// Whether or not the current token is marked as a keyword
38    pub keyword: Option<bool>,
39
40    /// The position length of the current token
41    pub position_length: Option<u32>,
42
43    /// Term frequency in given text analysis
44    pub term_frequency: Option<u32>,
45}
46
47/// Explained response structure
48#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
49pub struct ExplainedResponse {
50    custom_analyzer: bool,
51
52    analyzer: Option<AnalysisObject>,
53
54    #[serde(default, rename = "charfilters")]
55    char_filters: Vec<CharFilter>,
56
57    tokenizer: Option<AnalysisObject>,
58
59    #[serde(default, rename = "tokenfilters")]
60    token_filters: Vec<AnalysisObject>,
61}
62
63/// Structure for analyzer, tokenizer and token filters
64#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
65pub struct AnalysisObject {
66    name: String,
67    tokens: Vec<Token>,
68}
69
70/// Structure for char filters
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct CharFilter {
73    name: String,
74    filtered_text: Vec<String>,
75}
76
77/// Type of token
78#[derive(Debug, Clone, PartialEq, Eq, Default)]
79pub enum TokenType {
80    /// Alphanumeric token
81    #[default]
82    Alphanum,
83
84    /// Synonym token
85    Synonym,
86
87    /// Word token
88    Word,
89
90    /// Hangul (Korean alphabet) token
91    Hangul,
92
93    /// Numeric token
94    Num,
95
96    /// Email token
97    Email,
98
99    /// Words with apostrophe token
100    Apostrophe,
101
102    /// CJK (Chinese, Japanese, and Korean) tokens
103    Double,
104
105    /// Normalized CJK (Chinese, Japanese, and Korean) tokens.
106    /// Normalizes width differences in CJK (Chinese, Japanese, and Korean) characters as follows:
107    /// Folds full-width ASCII character variants into the equivalent basic Latin characters
108    /// Folds half-width Katakana character variants into the equivalent Kana characters
109    Katakana,
110
111    /// Acronym token
112    Acronym,
113
114    /// Gram token
115    Gram,
116
117    /// Fingerprint token
118    Fingerprint,
119
120    /// Shingle token
121    Shingle,
122
123    /// Other token
124    Other(String),
125}
126
127impl Serialize for TokenType {
128    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
129    where
130        S: Serializer,
131    {
132        match self {
133            Self::Alphanum => "<ALPHANUM>",
134            Self::Synonym => "SYNONYM",
135            Self::Word => "word",
136            Self::Hangul => "<HANGUL>",
137            Self::Num => "<NUM>",
138            Self::Email => "<EMAIL>",
139            Self::Apostrophe => "<APOSTROPHE>",
140            Self::Double => "<DOUBLE>",
141            Self::Katakana => "<KATAKANA>",
142            Self::Acronym => "<ACRONYM>",
143            Self::Gram => "gram",
144            Self::Fingerprint => "fingerprint",
145            Self::Shingle => "shingle",
146            Self::Other(other) => other,
147        }
148        .serialize(serializer)
149    }
150}
151
152impl<'de> Deserialize<'de> for TokenType {
153    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
154    where
155        D: de::Deserializer<'de>,
156    {
157        Ok(match String::deserialize(deserializer)?.as_str() {
158            "<ALPHANUM>" => Self::Alphanum,
159            "SYNONYM" => Self::Synonym,
160            "word" => Self::Word,
161            "<HANGUL>" => Self::Hangul,
162            "<NUM>" => Self::Num,
163            "<EMAIL>" => Self::Email,
164            "<APOSTROPHE>" => Self::Apostrophe,
165            "<DOUBLE>" => Self::Double,
166            "<KATAKANA>" => Self::Katakana,
167            "<ACRONYM>" => Self::Acronym,
168            "gram" => Self::Gram,
169            "fingerprint" => Self::Fingerprint,
170            "shingle" => Self::Shingle,
171            other => Self::Other(other.to_string()),
172        })
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use serde_json::json;
179
180    use super::*;
181
182    #[test]
183    fn deserialize_standard() {
184        let json_response = json!({
185            "tokens": [
186                {
187                    "token": "test1",
188                    "start_offset": 0,
189                    "end_offset": 6,
190                    "type": "<ALPHANUM>",
191                    "position": 0
192                },
193                {
194                    "token": "test2",
195                    "start_offset": 7,
196                    "end_offset": 11,
197                    "type": "<ALPHANUM>",
198                    "position": 1
199                }
200            ]
201        });
202
203        let token_1 = Token {
204            token: "test1".to_string(),
205            start_offset: 0,
206            end_offset: 6,
207            ty: TokenType::Alphanum,
208            position: 0,
209            bytes: None,
210            keyword: None,
211            position_length: None,
212            term_frequency: None,
213        };
214        let token_2 = Token {
215            token: "test2".to_string(),
216            start_offset: 7,
217            end_offset: 11,
218            ty: TokenType::Alphanum,
219            position: 1,
220            bytes: None,
221            keyword: None,
222            position_length: None,
223            term_frequency: None,
224        };
225
226        let expected = AnalyzeResponse::Standard(vec![token_1, token_2]);
227        let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
228
229        assert_eq!(expected, result);
230    }
231
232    #[test]
233    fn deserialize_explained() {
234        let json_response = json!({
235            "detail": {
236                "custom_analyzer": true,
237                "charfilters": [
238                    {
239                        "name": "html_strip",
240                        "filtered_text": [
241                            "test"
242                        ]
243                    }
244                ],
245                "tokenizer": {
246                    "name": "lowercase",
247                    "tokens": [
248                        {
249                            "token": "test",
250                            "start_offset": 0,
251                            "end_offset": 6,
252                            "type": "SYNONYM",
253                            "position": 0
254                        }
255                    ]
256                },
257                "tokenfilters": [
258                    {
259                        "name": "__anonymous__stop",
260                        "tokens": [
261                            {
262                                "token": "test",
263                                "start_offset": 0,
264                                "end_offset": 6,
265                                "type": "SYNONYM",
266                                "position": 0
267                            }
268                        ]
269                    }
270                ]
271            }
272        });
273
274        let token = Token {
275            token: "test".to_string(),
276            start_offset: 0,
277            end_offset: 6,
278            ty: TokenType::Synonym,
279            position: 0,
280            bytes: None,
281            keyword: None,
282            position_length: None,
283            term_frequency: None,
284        };
285
286        let expected = AnalyzeResponse::Explained(ExplainedResponse {
287            custom_analyzer: true,
288            analyzer: None,
289            char_filters: vec![CharFilter {
290                name: "html_strip".to_string(),
291                filtered_text: vec!["test".to_string()],
292            }],
293            tokenizer: Some(AnalysisObject {
294                name: "lowercase".to_string(),
295                tokens: vec![token.clone()],
296            }),
297            token_filters: vec![AnalysisObject {
298                name: "__anonymous__stop".to_string(),
299                tokens: vec![token],
300            }],
301        });
302
303        let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
304
305        assert_eq!(expected, result);
306    }
307}