elasticsearch_dsl/analyze/
request.rs

1use crate::util::*;
2use serde::ser::{Serialize, SerializeStruct, Serializer};
3
4/// Performs analysis on a text string and returns the resulting tokens.
5/// The basic `analyze`:
6/// ```
7/// # use elasticsearch_dsl::analyze::*;
8/// # let query = Analyze::new("test this text");
9/// ```
10/// To `analyze` with custom analyzer:
11/// ```
12/// # use elasticsearch_dsl::analyze::*;
13/// # use serde_json::json;
14/// let custom_analyzer = CustomAnalyzer::new("whitespace")
15///    .filter([
16///        StringOrObject::String("lowercase".to_string()),
17///        StringOrObject::Object(json!({"type": "stop", "stopwords": ["a", "is", "this"]})),
18///    ]);
19/// let test = Analyze::new(["test this text", "and this one please"])
20///    .analyzer(custom_analyzer)
21///    .explain(true)
22///    .attributes(["attributes"]);
23/// ```
24/// To `analyze` custom normalizer:
25/// ```
26/// # use elasticsearch_dsl::analyze::*;
27/// # use serde_json::json;
28/// let custom_normalizer = CustomNormalizer::new()
29///    .char_filter([
30///        json!({ "type": "mapping", "mappings": ["٠ => 0", "١ => 1", "٢ => 2"] }),
31///    ])
32///    .filter(["snowball"]);
33/// let test = Analyze::new(["test this text", "and this one please"])
34///    .analyzer(custom_normalizer)
35///    .explain(true)
36///    .attributes(["attributes"]);
37/// ```
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
39pub struct Analyze {
40    text: StringOrVecString,
41
42    #[serde(skip_serializing_if = "ShouldSkip::should_skip", flatten)]
43    analysis: Option<Analysis>,
44
45    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
46    attributes: Vec<String>,
47
48    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
49    explain: Option<bool>,
50}
51
52/// Structure of custom analyzer.
53#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
54pub struct CustomAnalyzer {
55    tokenizer: String,
56
57    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
58    char_filter: Vec<StringOrObject>,
59
60    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
61    filter: Vec<StringOrObject>,
62}
63
64/// Structure of custom normalizer
65#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
66pub struct CustomNormalizer {
67    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
68    char_filter: Vec<StringOrObject>,
69
70    #[serde(skip_serializing_if = "ShouldSkip::should_skip")]
71    filter: Vec<StringOrObject>,
72}
73
74/// Analysis types
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum Analysis {
77    /// The name of the analyzer that should be applied to the provided text.
78    /// This could be a `built-in analyzer`, or an analyzer that’s been configured in the index.
79    /// If this parameter is not specified, the analyze API uses the analyzer defined in the field’s mapping.
80    /// If no field is specified, the analyze API uses the default analyzer for the index.
81    /// If no index is specified, or the index does not have a default analyzer, the analyze API uses the `standard analyzer`.
82    ///
83    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-analyzers.html>
84    BuiltInAnalyzer(String),
85
86    /// Custom analyzer that should be applied to the provided text.
87    CustomAnalyzer(CustomAnalyzer),
88
89    /// The name of built-in normalizer to use to convert text into a single token.
90    ///
91    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-normalizers.html>
92    BuiltInNormalizer(String),
93
94    /// The custom normalizer to use to convert text into a single token.
95    CustomNormalizer(CustomNormalizer),
96
97    /// Field used to derive the analyzer. To use this parameter, you must specify an index.
98    /// If specified, the analyzer parameter overrides this value.
99    /// If no field is specified, the analyze API uses the default analyzer for the index.
100    /// If no index is specified or the index does not have a default analyzer, the analyze API uses the `standard analyzer`.
101    Field(String),
102}
103
104/// Structure of filters
105#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
106#[serde(untagged)]
107pub enum StringOrObject {
108    /// Built-in filters
109    String(String),
110
111    /// Custom filters
112    Object(serde_json::Value),
113}
114
115/// Type for text field. Text can be string or array of strings
116#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
117#[serde(untagged)]
118pub enum StringOrVecString {
119    /// One text input to analyze
120    String(String),
121
122    /// Multiple text inputs to analyze
123    VecString(Vec<String>),
124}
125
126impl Analyze {
127    /// Creates an instance of [Analyze]
128    ///
129    /// - `text` - Text to analyze. If an array of strings is provided, it is analyzed as a multi-value field.
130    pub fn new<S>(text: S) -> Self
131    where
132        S: Into<StringOrVecString>,
133    {
134        Self {
135            text: text.into(),
136            analysis: None,
137            attributes: vec![],
138            explain: None,
139        }
140    }
141
142    /// Specify an analyzer, either it's built-in analyzer, custom analyzer, built-in normalizer,
143    /// custom normalizer or field
144    pub fn analyzer<S>(mut self, analyzer: S) -> Self
145    where
146        S: Into<Analysis>,
147    {
148        self.analysis = Some(analyzer.into());
149        self
150    }
151
152    /// Array of token attributes used to filter the output of the explain parameter.
153    pub fn attributes<I>(mut self, attributes: I) -> Self
154    where
155        I: IntoIterator,
156        I::Item: ToString,
157    {
158        self.attributes
159            .extend(attributes.into_iter().map(|x| x.to_string()));
160        self
161    }
162
163    /// If `true`, the response includes token attributes and additional details. Defaults to `false`. `experimental`
164    pub fn explain(mut self, explain: bool) -> Self {
165        self.explain = Some(explain);
166        self
167    }
168}
169
170impl CustomNormalizer {
171    /// Create instance of custom normalizer
172    pub fn new() -> Self {
173        Default::default()
174    }
175
176    /// Array of character filters used to preprocess characters before the tokenizer.
177    /// See `Character filters reference` for a list of character filters.
178    ///
179    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-charfilters.html>
180    pub fn char_filter<I>(mut self, char_filter: I) -> Self
181    where
182        I: IntoIterator,
183        I::Item: Into<StringOrObject>,
184    {
185        self.char_filter
186            .extend(char_filter.into_iter().map(Into::into));
187        self
188    }
189
190    /// Array of token filters used to apply after the tokenizer.
191    /// See `Token filter reference` for a list of token filters.
192    ///
193    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenfilters.html>
194    pub fn filter<I>(mut self, filter: I) -> Self
195    where
196        I: IntoIterator,
197        I::Item: Into<StringOrObject>,
198    {
199        self.filter.extend(filter.into_iter().map(Into::into));
200        self
201    }
202}
203
204impl CustomAnalyzer {
205    /// Create instance of custom analyzer and sets tokenizer
206    /// Tokenizer to use to convert text into tokens. See `Tokenizer reference` for a list of tokenizers.
207    ///
208    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenizers.html>
209    pub fn new<S>(tokenizer: S) -> Self
210    where
211        S: ToString,
212    {
213        Self {
214            tokenizer: tokenizer.to_string(),
215            char_filter: vec![],
216            filter: vec![],
217        }
218    }
219
220    /// Array of character filters used to preprocess characters before the tokenizer.
221    /// See `Character filters reference` for a list of character filters.
222    ///
223    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-charfilters.html>
224    pub fn char_filter<I>(mut self, char_filter: I) -> Self
225    where
226        I: IntoIterator,
227        I::Item: Into<StringOrObject>,
228    {
229        self.char_filter
230            .extend(char_filter.into_iter().map(Into::into));
231        self
232    }
233
234    /// Array of token filters used to apply after the tokenizer.
235    /// See `Token filter reference` for a list of token filters.
236    ///
237    /// <https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-tokenfilters.html>
238    pub fn filter<I>(mut self, filter: I) -> Self
239    where
240        I: IntoIterator,
241        I::Item: Into<StringOrObject>,
242    {
243        self.filter.extend(filter.into_iter().map(Into::into));
244        self
245    }
246}
247
248impl Analysis {
249    /// Creates an instance of [`Analysis::Field`]
250    pub fn field<S>(value: S) -> Self
251    where
252        S: ToString,
253    {
254        Self::Field(value.to_string())
255    }
256
257    /// Creates an instance of [`Analysis::BuiltInAnalyzer`]
258    pub fn analyzer<S>(value: S) -> Self
259    where
260        S: ToString,
261    {
262        Self::BuiltInAnalyzer(value.to_string())
263    }
264
265    /// Creates an instance of [`Analysis::BuiltInNormalizer`]
266    pub fn normalizer<S>(value: S) -> Self
267    where
268        S: ToString,
269    {
270        Self::BuiltInNormalizer(value.to_string())
271    }
272}
273
274impl<'a> From<&'a str> for StringOrObject {
275    fn from(value: &'a str) -> Self {
276        Self::String(value.to_owned())
277    }
278}
279
280impl From<String> for StringOrObject {
281    fn from(value: String) -> Self {
282        Self::String(value)
283    }
284}
285
286impl From<serde_json::Value> for StringOrObject {
287    fn from(value: serde_json::Value) -> Self {
288        Self::Object(value)
289    }
290}
291
292impl From<CustomAnalyzer> for Analysis {
293    fn from(value: CustomAnalyzer) -> Self {
294        Self::CustomAnalyzer(value)
295    }
296}
297
298impl From<CustomNormalizer> for Analysis {
299    fn from(value: CustomNormalizer) -> Self {
300        Self::CustomNormalizer(value)
301    }
302}
303
304impl From<String> for StringOrVecString {
305    fn from(value: String) -> Self {
306        Self::String(value)
307    }
308}
309
310impl From<&str> for StringOrVecString {
311    fn from(value: &str) -> Self {
312        Self::String(value.into())
313    }
314}
315
316impl From<Vec<&str>> for StringOrVecString {
317    fn from(value: Vec<&str>) -> Self {
318        Self::VecString(value.into_iter().map(Into::into).collect())
319    }
320}
321
322impl<const N: usize> From<[&str; N]> for StringOrVecString {
323    fn from(value: [&str; N]) -> Self {
324        Self::VecString(value.iter().map(ToString::to_string).collect())
325    }
326}
327
328impl<'a> From<&'a [&str]> for StringOrVecString {
329    fn from(value: &'a [&str]) -> Self {
330        Self::VecString(value.iter().map(ToString::to_string).collect())
331    }
332}
333
334impl Serialize for Analysis {
335    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
336    where
337        S: Serializer,
338    {
339        match self {
340            Analysis::BuiltInAnalyzer(name) => {
341                let mut state = serializer.serialize_struct("analysis_analyzer", 1)?;
342                state.serialize_field("analyzer", name)?;
343                state.end()
344            }
345            Analysis::CustomAnalyzer(analyzer) => analyzer.serialize(serializer),
346            Analysis::BuiltInNormalizer(name) => {
347                let mut state = serializer.serialize_struct("analysis_normalizer", 1)?;
348                state.serialize_field("normalizer", name)?;
349                state.end()
350            }
351            Analysis::CustomNormalizer(normalizer) => normalizer.serialize(serializer),
352            Analysis::Field(name) => {
353                let mut state = serializer.serialize_struct("analysis_field", 1)?;
354                state.serialize_field("field", name)?;
355                state.end()
356            }
357        }
358    }
359}
360
361impl Default for StringOrVecString {
362    fn default() -> Self {
363        Self::String(Default::default())
364    }
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    #[test]
372    fn serialization() {
373        assert_serialize(
374            Analyze::new("analyze these pants"),
375            json!({
376                "text": "analyze these pants"
377            }),
378        );
379
380        assert_serialize(
381            Analyze::new("analyze these pants").analyzer(Analysis::analyzer("test_default")),
382            json!({
383                "text": "analyze these pants",
384                "analyzer": "test_default"
385            }),
386        );
387
388        assert_serialize(
389            Analyze::new(["here is one to test", "and here is another one"])
390                .analyzer(
391                    CustomAnalyzer::new("lowercase")
392                        .char_filter(["html_strip", "test_strip"])
393                        .filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
394                )
395                .attributes(["score", "keyword"])
396                .explain(true),
397            json!({
398                "attributes": [
399                    "score",
400                    "keyword"
401                ],
402                "char_filter": [
403                    "html_strip",
404                    "test_strip"
405                ],
406                "filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
407                "tokenizer": "lowercase",
408                "explain": true,
409                "text": ["here is one to test", "and here is another one"]
410            }),
411        );
412
413        assert_serialize(
414            Analyze::new("analyze these pants").analyzer(Analysis::normalizer("asciifolding")),
415            json!({
416                "text": "analyze these pants",
417                "normalizer": "asciifolding"
418            }),
419        );
420
421        assert_serialize(
422            Analyze::new(["here is one to test", "and here is another one"])
423                .analyzer(
424                    CustomNormalizer::new()
425                        .char_filter(["html_strip", "test_strip"])
426                        .filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
427                )
428                .attributes(["score", "keyword"])
429                .explain(true),
430            json!({
431                "attributes": [
432                    "score",
433                    "keyword"
434                ],
435                "char_filter": [
436                    "html_strip",
437                    "test_strip"
438                ],
439                "filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
440                "explain": true,
441                "text": ["here is one to test", "and here is another one"]
442            }),
443        );
444
445        assert_serialize(
446            Analyze::new("analyze these pants").analyzer(Analysis::field("title")),
447            json!({
448                "text": "analyze these pants",
449                "field": "title"
450            }),
451        );
452    }
453}