opensearch_dsl/analyze/
request.rs

1use serde::ser::{Serialize, SerializeStruct, Serializer};
2
3use crate::util::*;
4
5/// Performs analysis on a text string and returns the resulting tokens.
6/// The basic `analyze`:
7/// ```
8/// # use opensearch_dsl::analyze::*;
9/// # let query = Analyze::new("test this text");
10/// ```
11/// To `analyze` with custom analyzer:
12/// ```
13/// # use opensearch_dsl::analyze::*;
14/// # use serde_json::json;
15/// let custom_analyzer = CustomAnalyzer::new("whitespace")
16///    .filter([
17///        StringOrObject::String("lowercase".to_string()),
18///        StringOrObject::Object(json!({"type": "stop", "stopwords": ["a", "is", "this"]})),
19///    ]);
20/// let test = Analyze::new(["test this text", "and this one please"])
21///    .analyzer(custom_analyzer)
22///    .explain(true)
23///    .attributes(["attributes"]);
24/// ```
25/// To `analyze` custom normalizer:
26/// ```
27/// # use opensearch_dsl::analyze::*;
28/// # use serde_json::json;
29/// let custom_normalizer = CustomNormalizer::new()
30///    .char_filter([
31///        json!({ "type": "mapping", "mappings": ["٠ => 0", "١ => 1", "٢ => 2"] }),
32///    ])
33///    .filter(["snowball"]);
34/// let test = Analyze::new(["test this text", "and this one please"])
35///    .analyzer(custom_normalizer)
36///    .explain(true)
37///    .attributes(["attributes"]);
38/// ```
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
40pub struct Analyze {
41  text: StringOrVecString,
42
43  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip", flatten)]
44  analysis: Option<Analysis>,
45
46  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
47  attributes: Vec<String>,
48
49  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
50  explain: Option<bool>,
51}
52
53/// Structure of custom analyzer.
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
55pub struct CustomAnalyzer {
56  tokenizer: String,
57
58  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
59  char_filter: Vec<StringOrObject>,
60
61  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
62  filter: Vec<StringOrObject>,
63}
64
65/// Structure of custom normalizer
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
67pub struct CustomNormalizer {
68  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
69  char_filter: Vec<StringOrObject>,
70
71  #[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
72  filter: Vec<StringOrObject>,
73}
74
75/// Analysis types
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum Analysis {
78  /// The name of the analyzer that should be applied to the provided text.
79  /// This could be a `built-in analyzer`, or an analyzer that’s been configured
80  /// in the index. If this parameter is not specified, the analyze API uses
81  /// the analyzer defined in the field’s mapping. If no field is specified,
82  /// the analyze API uses the default analyzer for the index. If no index is
83  /// specified, or the index does not have a default analyzer, the analyze API
84  /// uses the `standard analyzer`.
85  ///
86  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-analyzers.html>
87  BuiltInAnalyzer(String),
88
89  /// Custom analyzer that should be applied to the provided text.
90  CustomAnalyzer(CustomAnalyzer),
91
92  /// The name of built-in normalizer to use to convert text into a single
93  /// token.
94  ///
95  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-normalizers.html>
96  BuiltInNormalizer(String),
97
98  /// The custom normalizer to use to convert text into a single token.
99  CustomNormalizer(CustomNormalizer),
100
101  /// Field used to derive the analyzer. To use this parameter, you must specify
102  /// an index. If specified, the analyzer parameter overrides this value.
103  /// If no field is specified, the analyze API uses the default analyzer for
104  /// the index. If no index is specified or the index does not have a default
105  /// analyzer, the analyze API uses the `standard analyzer`.
106  Field(String),
107}
108
109/// Structure of filters
110#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
111#[serde(untagged)]
112pub enum StringOrObject {
113  /// Built-in filters
114  String(String),
115
116  /// Custom filters
117  Object(serde_json::Value),
118}
119
120/// Type for text field. Text can be string or array of strings
121#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
122#[serde(untagged)]
123pub enum StringOrVecString {
124  /// One text input to analyze
125  String(String),
126
127  /// Multiple text inputs to analyze
128  VecString(Vec<String>),
129}
130
131impl Analyze {
132  /// Creates an instance of [Analyze]
133  ///
134  /// - `text` - Text to analyze. If an array of strings is provided, it is
135  ///   analyzed as a multi-value field.
136  pub fn new<S>(text: S) -> Self
137  where
138    S: Into<StringOrVecString>, {
139    Self {
140      text: text.into(),
141      analysis: None,
142      attributes: vec![],
143      explain: None,
144    }
145  }
146
147  /// Specify an analyzer, either it's built-in analyzer, custom analyzer,
148  /// built-in normalizer, custom normalizer or field
149  pub fn analyzer<S>(mut self, analyzer: S) -> Self
150  where
151    S: Into<Analysis>, {
152    self.analysis = Some(analyzer.into());
153    self
154  }
155
156  /// Array of token attributes used to filter the output of the explain
157  /// parameter.
158  pub fn attributes<I>(mut self, attributes: I) -> Self
159  where
160    I: IntoIterator,
161    I::Item: ToString, {
162    self.attributes.extend(attributes.into_iter().map(|x| x.to_string()));
163    self
164  }
165
166  /// If `true`, the response includes token attributes and additional details.
167  /// Defaults to `false`. `experimental`
168  pub fn explain(mut self, explain: bool) -> Self {
169    self.explain = Some(explain);
170    self
171  }
172}
173
174impl CustomNormalizer {
175  /// Create instance of custom normalizer
176  pub fn new() -> Self {
177    Default::default()
178  }
179
180  /// Array of character filters used to preprocess characters before the
181  /// tokenizer. See `Character filters reference` for a list of character
182  /// filters.
183  ///
184  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-charfilters.html>
185  pub fn char_filter<I>(mut self, char_filter: I) -> Self
186  where
187    I: IntoIterator,
188    I::Item: Into<StringOrObject>, {
189    self.char_filter.extend(char_filter.into_iter().map(Into::into));
190    self
191  }
192
193  /// Array of token filters used to apply after the tokenizer.
194  /// See `Token filter reference` for a list of token filters.
195  ///
196  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-tokenfilters.html>
197  pub fn filter<I>(mut self, filter: I) -> Self
198  where
199    I: IntoIterator,
200    I::Item: Into<StringOrObject>, {
201    self.filter.extend(filter.into_iter().map(Into::into));
202    self
203  }
204}
205
206impl CustomAnalyzer {
207  /// Create instance of custom analyzer and sets tokenizer
208  /// Tokenizer to use to convert text into tokens. See `Tokenizer reference`
209  /// for a list of tokenizers.
210  ///
211  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-tokenizers.html>
212  pub fn new<S>(tokenizer: S) -> Self
213  where
214    S: ToString, {
215    Self {
216      tokenizer: tokenizer.to_string(),
217      char_filter: vec![],
218      filter: vec![],
219    }
220  }
221
222  /// Array of character filters used to preprocess characters before the
223  /// tokenizer. See `Character filters reference` for a list of character
224  /// filters.
225  ///
226  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-charfilters.html>
227  pub fn char_filter<I>(mut self, char_filter: I) -> Self
228  where
229    I: IntoIterator,
230    I::Item: Into<StringOrObject>, {
231    self.char_filter.extend(char_filter.into_iter().map(Into::into));
232    self
233  }
234
235  /// Array of token filters used to apply after the tokenizer.
236  /// See `Token filter reference` for a list of token filters.
237  ///
238  /// <https://www.elastic.co/guide/en/opensearch/reference/current/analysis-tokenfilters.html>
239  pub fn filter<I>(mut self, filter: I) -> Self
240  where
241    I: IntoIterator,
242    I::Item: Into<StringOrObject>, {
243    self.filter.extend(filter.into_iter().map(Into::into));
244    self
245  }
246}
247
248impl Analysis {
249  /// Creates an instance of [`Analysis::Field`]
250  pub fn field<S>(value: S) -> Self
251  where
252    S: ToString, {
253    Self::Field(value.to_string())
254  }
255
256  /// Creates an instance of [`Analysis::BuiltInAnalyzer`]
257  pub fn analyzer<S>(value: S) -> Self
258  where
259    S: ToString, {
260    Self::BuiltInAnalyzer(value.to_string())
261  }
262
263  /// Creates an instance of [`Analysis::BuiltInNormalizer`]
264  pub fn normalizer<S>(value: S) -> Self
265  where
266    S: ToString, {
267    Self::BuiltInNormalizer(value.to_string())
268  }
269}
270
271impl<'a> From<&'a str> for StringOrObject {
272  fn from(value: &'a str) -> Self {
273    Self::String(value.to_owned())
274  }
275}
276
277impl From<String> for StringOrObject {
278  fn from(value: String) -> Self {
279    Self::String(value)
280  }
281}
282
283impl From<serde_json::Value> for StringOrObject {
284  fn from(value: serde_json::Value) -> Self {
285    Self::Object(value)
286  }
287}
288
289impl From<CustomAnalyzer> for Analysis {
290  fn from(value: CustomAnalyzer) -> Self {
291    Self::CustomAnalyzer(value)
292  }
293}
294
295impl From<CustomNormalizer> for Analysis {
296  fn from(value: CustomNormalizer) -> Self {
297    Self::CustomNormalizer(value)
298  }
299}
300
301impl From<String> for StringOrVecString {
302  fn from(value: String) -> Self {
303    Self::String(value)
304  }
305}
306
307impl From<&str> for StringOrVecString {
308  fn from(value: &str) -> Self {
309    Self::String(value.into())
310  }
311}
312
313impl From<Vec<&str>> for StringOrVecString {
314  fn from(value: Vec<&str>) -> Self {
315    Self::VecString(value.into_iter().map(Into::into).collect())
316  }
317}
318
319impl<const N: usize> From<[&str; N]> for StringOrVecString {
320  fn from(value: [&str; N]) -> Self {
321    Self::VecString(value.iter().map(ToString::to_string).collect())
322  }
323}
324
325impl<'a> From<&'a [&str]> for StringOrVecString {
326  fn from(value: &'a [&str]) -> Self {
327    Self::VecString(value.iter().map(ToString::to_string).collect())
328  }
329}
330
331impl Serialize for Analysis {
332  fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
333  where
334    S: Serializer, {
335    match self {
336      Analysis::BuiltInAnalyzer(name) => {
337        let mut state = serializer.serialize_struct("analysis_analyzer", 1)?;
338        state.serialize_field("analyzer", name)?;
339        state.end()
340      }
341      Analysis::CustomAnalyzer(analyzer) => analyzer.serialize(serializer),
342      Analysis::BuiltInNormalizer(name) => {
343        let mut state = serializer.serialize_struct("analysis_normalizer", 1)?;
344        state.serialize_field("normalizer", name)?;
345        state.end()
346      }
347      Analysis::CustomNormalizer(normalizer) => normalizer.serialize(serializer),
348      Analysis::Field(name) => {
349        let mut state = serializer.serialize_struct("analysis_field", 1)?;
350        state.serialize_field("field", name)?;
351        state.end()
352      }
353    }
354  }
355}
356
357impl Default for StringOrVecString {
358  fn default() -> Self {
359    Self::String(Default::default())
360  }
361}
362
363#[cfg(test)]
364mod tests {
365  use super::*;
366
367  #[test]
368  fn serialization() {
369    assert_serialize(
370      Analyze::new("analyze these pants"),
371      json!({
372          "text": "analyze these pants"
373      }),
374    );
375
376    assert_serialize(
377      Analyze::new("analyze these pants").analyzer(Analysis::analyzer("test_default")),
378      json!({
379          "text": "analyze these pants",
380          "analyzer": "test_default"
381      }),
382    );
383
384    assert_serialize(
385      Analyze::new(["here is one to test", "and here is another one"])
386        .analyzer(
387          CustomAnalyzer::new("lowercase")
388            .char_filter(["html_strip", "test_strip"])
389            .filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
390        )
391        .attributes(["score", "keyword"])
392        .explain(true),
393      json!({
394          "attributes": [
395              "score",
396              "keyword"
397          ],
398          "char_filter": [
399              "html_strip",
400              "test_strip"
401          ],
402          "filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
403          "tokenizer": "lowercase",
404          "explain": true,
405          "text": ["here is one to test", "and here is another one"]
406      }),
407    );
408
409    assert_serialize(
410      Analyze::new("analyze these pants").analyzer(Analysis::normalizer("asciifolding")),
411      json!({
412          "text": "analyze these pants",
413          "normalizer": "asciifolding"
414      }),
415    );
416
417    assert_serialize(
418      Analyze::new(["here is one to test", "and here is another one"])
419        .analyzer(
420          CustomNormalizer::new()
421            .char_filter(["html_strip", "test_strip"])
422            .filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
423        )
424        .attributes(["score", "keyword"])
425        .explain(true),
426      json!({
427          "attributes": [
428              "score",
429              "keyword"
430          ],
431          "char_filter": [
432              "html_strip",
433              "test_strip"
434          ],
435          "filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
436          "explain": true,
437          "text": ["here is one to test", "and here is another one"]
438      }),
439    );
440
441    assert_serialize(
442      Analyze::new("analyze these pants").analyzer(Analysis::field("title")),
443      json!({
444          "text": "analyze these pants",
445          "field": "title"
446      }),
447    );
448  }
449}