Skip to main content

opensearch_dsl/analyze/
request.rs

1use serde::ser::{Serialize, SerializeStruct, Serializer};
2
3use crate::util::*;
4
5/// Performs analysis on a text string and returns the resulting tokens.
6/// The basic `analyze`:
7/// ```
8/// # use opensearch_dsl::analyze::*;
9/// # let query = Analyze::new("test this text");
10/// ```
11/// To `analyze` with custom analyzer:
12/// ```
13/// # use opensearch_dsl::analyze::*;
14/// # use serde_json::json;
15/// let custom_analyzer = CustomAnalyzer::new("whitespace")
16///    .filter([
17///        StringOrObject::String("lowercase".to_string()),
18///        StringOrObject::Object(json!({"type": "stop", "stopwords": ["a", "is", "this"]})),
19///    ]);
20/// let test = Analyze::new(["test this text", "and this one please"])
21///    .analyzer(custom_analyzer)
22///    .explain(true)
23///    .attributes(["attributes"]);
24/// ```
25/// To `analyze` custom normalizer:
26/// ```
27/// # use opensearch_dsl::analyze::*;
28/// # use serde_json::json;
29/// let custom_normalizer = CustomNormalizer::new()
30///    .char_filter([
31///        json!({ "type": "mapping", "mappings": ["٠ => 0", "١ => 1", "٢ => 2"] }),
32///    ])
33///    .filter(["snowball"]);
34/// let test = Analyze::new(["test this text", "and this one please"])
35///    .analyzer(custom_normalizer)
36///    .explain(true)
37///    .attributes(["attributes"]);
38/// ```
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
40pub struct Analyze {
41    text: StringOrVecString,
42
43    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip", flatten)]
44    analysis: Option<Analysis>,
45
46    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
47    attributes: Vec<String>,
48
49    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
50    explain: Option<bool>,
51}
52
53/// Structure of custom analyzer.
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
55pub struct CustomAnalyzer {
56    tokenizer: String,
57
58    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
59    char_filter: Vec<StringOrObject>,
60
61    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
62    filter: Vec<StringOrObject>,
63}
64
65/// Structure of custom normalizer
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
67pub struct CustomNormalizer {
68    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
69    char_filter: Vec<StringOrObject>,
70
71    #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
72    filter: Vec<StringOrObject>,
73}
74
75/// Analysis types
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum Analysis {
78    /// The name of the analyzer that should be applied to the provided text.
79    /// This could be a `built-in analyzer`, or an analyzer that’s been configured
80    /// in the index. If this parameter is not specified, the analyze API uses
81    /// the analyzer defined in the field’s mapping. If no field is specified,
82    /// the analyze API uses the default analyzer for the index. If no index is
83    /// specified, or the index does not have a default analyzer, the analyze API
84    /// uses the `standard analyzer`.
85    ///
86    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-analyzers.html>
87    BuiltInAnalyzer(String),
88
89    /// Custom analyzer that should be applied to the provided text.
90    CustomAnalyzer(CustomAnalyzer),
91
92    /// The name of built-in normalizer to use to convert text into a single
93    /// token.
94    ///
95    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-normalizers.html>
96    BuiltInNormalizer(String),
97
98    /// The custom normalizer to use to convert text into a single token.
99    CustomNormalizer(CustomNormalizer),
100
101    /// Field used to derive the analyzer. To use this parameter, you must specify
102    /// an index. If specified, the analyzer parameter overrides this value.
103    /// If no field is specified, the analyze API uses the default analyzer for
104    /// the index. If no index is specified or the index does not have a default
105    /// analyzer, the analyze API uses the `standard analyzer`.
106    Field(String),
107}
108
109/// Structure of filters
110#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
111#[serde(untagged)]
112pub enum StringOrObject {
113    /// Built-in filters
114    String(String),
115
116    /// Custom filters
117    Object(serde_json::Value),
118}
119
120/// Type for text field. Text can be string or array of strings
121#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
122#[serde(untagged)]
123pub enum StringOrVecString {
124    /// One text input to analyze
125    String(String),
126
127    /// Multiple text inputs to analyze
128    VecString(Vec<String>),
129}
130
131impl Analyze {
132    /// Creates an instance of [Analyze]
133    ///
134    /// - `text` - Text to analyze. If an array of strings is provided, it is
135    ///   analyzed as a multi-value field.
136    pub fn new<S>(text: S) -> Self
137    where
138        S: Into<StringOrVecString>,
139    {
140        Self {
141            text: text.into(),
142            analysis: None,
143            attributes: vec![],
144            explain: None,
145        }
146    }
147
148    /// Specify an analyzer, either it's built-in analyzer, custom analyzer,
149    /// built-in normalizer, custom normalizer or field
150    pub fn analyzer<S>(mut self, analyzer: S) -> Self
151    where
152        S: Into<Analysis>,
153    {
154        self.analysis = Some(analyzer.into());
155        self
156    }
157
158    /// Array of token attributes used to filter the output of the explain
159    /// parameter.
160    pub fn attributes<I>(mut self, attributes: I) -> Self
161    where
162        I: IntoIterator,
163        I::Item: ToString,
164    {
165        self.attributes
166            .extend(attributes.into_iter().map(|x| x.to_string()));
167        self
168    }
169
170    /// If `true`, the response includes token attributes and additional details.
171    /// Defaults to `false`. `experimental`
172    pub fn explain(mut self, explain: bool) -> Self {
173        self.explain = Some(explain);
174        self
175    }
176}
177
178impl CustomNormalizer {
179    /// Create instance of custom normalizer
180    pub fn new() -> Self {
181        Default::default()
182    }
183
184    /// Array of character filters used to preprocess characters before the
185    /// tokenizer. See `Character filters reference` for a list of character
186    /// filters.
187    ///
188    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-charfilters.html>
189    pub fn char_filter<I>(mut self, char_filter: I) -> Self
190    where
191        I: IntoIterator,
192        I::Item: Into<StringOrObject>,
193    {
194        self.char_filter
195            .extend(char_filter.into_iter().map(Into::into));
196        self
197    }
198
199    /// Array of token filters used to apply after the tokenizer.
200    /// See `Token filter reference` for a list of token filters.
201    ///
202    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-tokenfilters.html>
203    pub fn filter<I>(mut self, filter: I) -> Self
204    where
205        I: IntoIterator,
206        I::Item: Into<StringOrObject>,
207    {
208        self.filter.extend(filter.into_iter().map(Into::into));
209        self
210    }
211}
212
213impl CustomAnalyzer {
214    /// Create instance of custom analyzer and sets tokenizer
215    /// Tokenizer to use to convert text into tokens. See `Tokenizer reference`
216    /// for a list of tokenizers.
217    ///
218    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-tokenizers.html>
219    pub fn new<S>(tokenizer: S) -> Self
220    where
221        S: ToString,
222    {
223        Self {
224            tokenizer: tokenizer.to_string(),
225            char_filter: vec![],
226            filter: vec![],
227        }
228    }
229
230    /// Array of character filters used to preprocess characters before the
231    /// tokenizer. See `Character filters reference` for a list of character
232    /// filters.
233    ///
234    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-charfilters.html>
235    pub fn char_filter<I>(mut self, char_filter: I) -> Self
236    where
237        I: IntoIterator,
238        I::Item: Into<StringOrObject>,
239    {
240        self.char_filter
241            .extend(char_filter.into_iter().map(Into::into));
242        self
243    }
244
245    /// Array of token filters used to apply after the tokenizer.
246    /// See `Token filter reference` for a list of token filters.
247    ///
248    /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-tokenfilters.html>
249    pub fn filter<I>(mut self, filter: I) -> Self
250    where
251        I: IntoIterator,
252        I::Item: Into<StringOrObject>,
253    {
254        self.filter.extend(filter.into_iter().map(Into::into));
255        self
256    }
257}
258
259impl Analysis {
260    /// Creates an instance of [`Analysis::Field`]
261    pub fn field<S>(value: S) -> Self
262    where
263        S: ToString,
264    {
265        Self::Field(value.to_string())
266    }
267
268    /// Creates an instance of [`Analysis::BuiltInAnalyzer`]
269    pub fn analyzer<S>(value: S) -> Self
270    where
271        S: ToString,
272    {
273        Self::BuiltInAnalyzer(value.to_string())
274    }
275
276    /// Creates an instance of [`Analysis::BuiltInNormalizer`]
277    pub fn normalizer<S>(value: S) -> Self
278    where
279        S: ToString,
280    {
281        Self::BuiltInNormalizer(value.to_string())
282    }
283}
284
285impl<'a> From<&'a str> for StringOrObject {
286    fn from(value: &'a str) -> Self {
287        Self::String(value.to_owned())
288    }
289}
290
291impl From<String> for StringOrObject {
292    fn from(value: String) -> Self {
293        Self::String(value)
294    }
295}
296
297impl From<serde_json::Value> for StringOrObject {
298    fn from(value: serde_json::Value) -> Self {
299        Self::Object(value)
300    }
301}
302
303impl From<CustomAnalyzer> for Analysis {
304    fn from(value: CustomAnalyzer) -> Self {
305        Self::CustomAnalyzer(value)
306    }
307}
308
309impl From<CustomNormalizer> for Analysis {
310    fn from(value: CustomNormalizer) -> Self {
311        Self::CustomNormalizer(value)
312    }
313}
314
315impl From<String> for StringOrVecString {
316    fn from(value: String) -> Self {
317        Self::String(value)
318    }
319}
320
321impl From<&str> for StringOrVecString {
322    fn from(value: &str) -> Self {
323        Self::String(value.into())
324    }
325}
326
327impl From<Vec<&str>> for StringOrVecString {
328    fn from(value: Vec<&str>) -> Self {
329        Self::VecString(value.into_iter().map(Into::into).collect())
330    }
331}
332
333impl<const N: usize> From<[&str; N]> for StringOrVecString {
334    fn from(value: [&str; N]) -> Self {
335        Self::VecString(value.iter().map(ToString::to_string).collect())
336    }
337}
338
339impl<'a> From<&'a [&str]> for StringOrVecString {
340    fn from(value: &'a [&str]) -> Self {
341        Self::VecString(value.iter().map(ToString::to_string).collect())
342    }
343}
344
345impl Serialize for Analysis {
346    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
347    where
348        S: Serializer,
349    {
350        match self {
351            Analysis::BuiltInAnalyzer(name) => {
352                let mut state = serializer.serialize_struct("analysis_analyzer", 1)?;
353                state.serialize_field("analyzer", name)?;
354                state.end()
355            }
356            Analysis::CustomAnalyzer(analyzer) => analyzer.serialize(serializer),
357            Analysis::BuiltInNormalizer(name) => {
358                let mut state = serializer.serialize_struct("analysis_normalizer", 1)?;
359                state.serialize_field("normalizer", name)?;
360                state.end()
361            }
362            Analysis::CustomNormalizer(normalizer) => normalizer.serialize(serializer),
363            Analysis::Field(name) => {
364                let mut state = serializer.serialize_struct("analysis_field", 1)?;
365                state.serialize_field("field", name)?;
366                state.end()
367            }
368        }
369    }
370}
371
372impl Default for StringOrVecString {
373    fn default() -> Self {
374        Self::String(Default::default())
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381
382    #[test]
383    fn serialization() {
384        assert_serialize(
385            Analyze::new("analyze these pants"),
386            json!({
387                "text": "analyze these pants"
388            }),
389        );
390
391        assert_serialize(
392            Analyze::new("analyze these pants").analyzer(Analysis::analyzer("test_default")),
393            json!({
394                "text": "analyze these pants",
395                "analyzer": "test_default"
396            }),
397        );
398
399        assert_serialize(
400            Analyze::new(["here is one to test", "and here is another one"])
401                .analyzer(
402                    CustomAnalyzer::new("lowercase")
403                        .char_filter(["html_strip", "test_strip"])
404                        .filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
405                )
406                .attributes(["score", "keyword"])
407                .explain(true),
408            json!({
409                "attributes": [
410                    "score",
411                    "keyword"
412                ],
413                "char_filter": [
414                    "html_strip",
415                    "test_strip"
416                ],
417                "filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
418                "tokenizer": "lowercase",
419                "explain": true,
420                "text": ["here is one to test", "and here is another one"]
421            }),
422        );
423
424        assert_serialize(
425            Analyze::new("analyze these pants").analyzer(Analysis::normalizer("asciifolding")),
426            json!({
427                "text": "analyze these pants",
428                "normalizer": "asciifolding"
429            }),
430        );
431
432        assert_serialize(
433            Analyze::new(["here is one to test", "and here is another one"])
434                .analyzer(
435                    CustomNormalizer::new()
436                        .char_filter(["html_strip", "test_strip"])
437                        .filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
438                )
439                .attributes(["score", "keyword"])
440                .explain(true),
441            json!({
442                "attributes": [
443                    "score",
444                    "keyword"
445                ],
446                "char_filter": [
447                    "html_strip",
448                    "test_strip"
449                ],
450                "filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
451                "explain": true,
452                "text": ["here is one to test", "and here is another one"]
453            }),
454        );
455
456        assert_serialize(
457            Analyze::new("analyze these pants").analyzer(Analysis::field("title")),
458            json!({
459                "text": "analyze these pants",
460                "field": "title"
461            }),
462        );
463    }
464}