arangors/
analyzer.rs

1use serde::{Deserialize, Serialize};
2use typed_builder::TypedBuilder;
3
4#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
5#[serde(rename_all = "lowercase")]
6pub enum AnalyzerFeature {
7    Frequency,
8    Norm,
9    Position,
10}
11
12#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
13#[serde(rename_all = "lowercase")]
14pub enum AnalyzerCase {
15    Lower,
16    None,
17    Upper,
18}
19
20#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
21#[serde(rename_all = "lowercase")]
22pub enum NgramStreamType {
23    Binary,
24    Utf8,
25}
26
27#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
28#[serde(rename_all = "lowercase")]
29pub enum GeoJsonType {
30    Shape,
31    Centroid,
32    Point,
33}
34
35#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
36#[builder(doc)]
37pub struct DelimiterAnalyzerProperties {
38    /// The value will be used as delimiter to split text into tokens as
39    /// specified in RFC 4180, without starting new records on newlines.
40    #[serde(skip_serializing_if = "Option::is_none")]
41    #[builder(default, setter(strip_option))]
42    pub delimiter: Option<String>,
43}
44
45#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
46#[builder(doc)]
47pub struct StemAnalyzerProperties {
48    /// Format: `language[_COUNTRY][.encoding][@variant]`
49    pub locale: String,
50}
51
52#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
53#[builder(doc)]
54pub struct NormAnalyzerProperties {
55    /// Format: `language[_COUNTRY][.encoding][@variant]`
56    pub locale: String,
57
58    /// Case conversion.  Default: `"lower"`
59    #[serde(skip_serializing_if = "Option::is_none")]
60    #[builder(default, setter(strip_option))]
61    pub case: Option<AnalyzerCase>,
62
63    /// Preserve accents in returned words.  Default: `false`
64    #[serde(skip_serializing_if = "Option::is_none")]
65    #[builder(default, setter(strip_option))]
66    pub accent: Option<bool>,
67}
68
69#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
70#[builder(doc)]
71#[serde(rename_all = "camelCase")]
72pub struct NgramAnalyzerProperties {
73    /// Minimum n-gram length.
74    pub min: u16,
75
76    /// Maximum n-gram length.
77    pub max: u16,
78
79    /// Output the original value as well.
80    pub preserve_original: bool,
81
82    /// Type of the input stream.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    #[builder(default, setter(strip_option))]
85    pub stream_type: Option<NgramStreamType>,
86}
87
88#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
89#[builder(doc)]
90#[serde(rename_all = "camelCase")]
91pub struct TextAnalyzerProperties {
92    /// Format: `language[_COUNTRY][.encoding][@variant]`
93    pub locale: String,
94
95    #[serde(skip_serializing_if = "Option::is_none")]
96    #[builder(default, setter(strip_option))]
97    pub case: Option<AnalyzerCase>,
98
99    #[serde(skip_serializing_if = "Option::is_none")]
100    #[builder(default, setter(strip_option))]
101    pub accent: Option<bool>,
102
103    /// Words to omit from result.
104    /// Defaults to the words loaded from the file at `stopwordsPath`.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    #[builder(default, setter(strip_option))]
107    pub stopwords: Option<Vec<String>>,
108
109    /// Path with a `language` sub-directory containing files with words to
110    /// omit.
111    ///
112    /// Defaults to the path specified in the server-side environment variable
113    /// IRESEARCH_TEXT_STOPWORD_PATH` or the current working directory of the
114    /// ArangoDB process.
115    #[serde(skip_serializing_if = "Option::is_none")]
116    #[builder(default, setter(strip_option))]
117    pub stopwords_path: Option<Vec<String>>,
118
119    /// Apply stemming on returned words.
120    /// Default: `true`
121    #[serde(skip_serializing_if = "Option::is_none")]
122    #[builder(default, setter(strip_option))]
123    pub stemming: Option<bool>,
124}
125
126#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
127#[builder(doc)]
128pub struct GeoJsonAnalyzerProperties {
129    /// Whether to index all GeoJSON geometry types, just the centroid, or just
130    /// points
131    #[serde(skip_serializing_if = "Option::is_none")]
132    #[builder(default, setter(strip_option))]
133    pub r#type: Option<GeoJsonType>,
134    // Skip the options as they "generally should remain unchanged"
135}
136
137#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
138#[builder(doc)]
139#[serde(rename_all = "camelCase")]
140pub struct PipelineAnalyzerProperties {
141    pub pipeline: Vec<PipelineAnalyzers>,
142}
143#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)]
144#[builder(doc)]
145#[serde(rename_all = "camelCase")]
146pub struct StopwordsAnalyzerProperties {
147    #[serde(skip_serializing_if = "Option::is_none")]
148    #[builder(default, setter(strip_option))]
149    pub hex: Option<bool>,
150    pub stopwords: Vec<String>,
151}
152
153#[derive(Debug, Serialize, Deserialize, PartialEq)]
154#[serde(rename_all = "camelCase", tag = "type")]
155pub enum AnalyzerInfo {
156    /// The `identity` Analyzer does not take additional properties.
157    Identity {
158        name: String,
159
160        #[serde(skip_serializing_if = "Option::is_none")]
161        features: Option<Vec<AnalyzerFeature>>,
162    },
163    Delimiter {
164        name: String,
165
166        #[serde(skip_serializing_if = "Option::is_none")]
167        features: Option<Vec<AnalyzerFeature>>,
168
169        #[serde(skip_serializing_if = "Option::is_none")]
170        properties: Option<DelimiterAnalyzerProperties>,
171    },
172
173    Stem {
174        name: String,
175
176        #[serde(skip_serializing_if = "Option::is_none")]
177        features: Option<Vec<AnalyzerFeature>>,
178
179        #[serde(skip_serializing_if = "Option::is_none")]
180        properties: Option<StemAnalyzerProperties>,
181    },
182
183    Norm {
184        name: String,
185
186        #[serde(skip_serializing_if = "Option::is_none")]
187        features: Option<Vec<AnalyzerFeature>>,
188
189        #[serde(skip_serializing_if = "Option::is_none")]
190        properties: Option<NormAnalyzerProperties>,
191    },
192
193    Ngram {
194        name: String,
195
196        #[serde(skip_serializing_if = "Option::is_none")]
197        features: Option<Vec<AnalyzerFeature>>,
198
199        #[serde(skip_serializing_if = "Option::is_none")]
200        properties: Option<NgramAnalyzerProperties>,
201    },
202
203    Text {
204        name: String,
205
206        #[serde(skip_serializing_if = "Option::is_none")]
207        features: Option<Vec<AnalyzerFeature>>,
208
209        #[serde(skip_serializing_if = "Option::is_none")]
210        properties: Option<TextAnalyzerProperties>,
211    },
212
213    Geojson {
214        name: String,
215
216        #[serde(skip_serializing_if = "Option::is_none")]
217        features: Option<Vec<AnalyzerFeature>>,
218
219        #[serde(skip_serializing_if = "Option::is_none")]
220        properties: Option<GeoJsonAnalyzerProperties>,
221    },
222    Stopwords {
223        name: String,
224        properties: StopwordsAnalyzerProperties,
225        #[serde(skip_serializing_if = "Option::is_none")]
226        features: Option<Vec<AnalyzerFeature>>,
227    },
228    Pipeline {
229        name: String,
230        properties: PipelineAnalyzerProperties,
231    },
232}
233
234#[derive(Clone, Debug, Serialize, Deserialize)]
235pub struct AnalyzerDescription {
236    pub name: String,
237}
238
239//these are the exact same analyzer types , but customized to be used in a
240// pipeline analyzer since in pipeline analyzers `name` is not required for each
241// sub-analyzer, the name filed is deleted
242#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
243#[serde(rename_all = "camelCase", tag = "type")]
244pub enum PipelineAnalyzers {
245    /// The `identity` Analyzer does not take additional properties.
246    Identity {
247        #[serde(skip_serializing_if = "Option::is_none")]
248        features: Option<Vec<AnalyzerFeature>>,
249    },
250    Delimiter {
251        #[serde(skip_serializing_if = "Option::is_none")]
252        features: Option<Vec<AnalyzerFeature>>,
253
254        #[serde(skip_serializing_if = "Option::is_none")]
255        properties: Option<DelimiterAnalyzerProperties>,
256    },
257
258    Stem {
259        #[serde(skip_serializing_if = "Option::is_none")]
260        features: Option<Vec<AnalyzerFeature>>,
261
262        #[serde(skip_serializing_if = "Option::is_none")]
263        properties: Option<StemAnalyzerProperties>,
264    },
265
266    Norm {
267        #[serde(skip_serializing_if = "Option::is_none")]
268        features: Option<Vec<AnalyzerFeature>>,
269
270        #[serde(skip_serializing_if = "Option::is_none")]
271        properties: Option<NormAnalyzerProperties>,
272    },
273
274    Ngram {
275        #[serde(skip_serializing_if = "Option::is_none")]
276        features: Option<Vec<AnalyzerFeature>>,
277
278        #[serde(skip_serializing_if = "Option::is_none")]
279        properties: Option<NgramAnalyzerProperties>,
280    },
281
282    Text {
283        #[serde(skip_serializing_if = "Option::is_none")]
284        features: Option<Vec<AnalyzerFeature>>,
285
286        #[serde(skip_serializing_if = "Option::is_none")]
287        properties: Option<TextAnalyzerProperties>,
288    },
289
290    Geojson {
291        #[serde(skip_serializing_if = "Option::is_none")]
292        features: Option<Vec<AnalyzerFeature>>,
293
294        #[serde(skip_serializing_if = "Option::is_none")]
295        properties: Option<GeoJsonAnalyzerProperties>,
296    },
297    Stopwords {
298        properties: StopwordsAnalyzerProperties,
299        #[serde(skip_serializing_if = "Option::is_none")]
300        features: Option<Vec<AnalyzerFeature>>,
301    },
302}