1use serde_json::Value;
10use std::collections::HashMap;
11
12use crate::analysis::tokenizer::TokenChar;
13use crate::analysis::{
14 Analyzer, AnalyzerRegistry, AsciiFoldingFilter, CharFilter, EdgeNGramTokenFilter,
15 EdgeNGramTokenizer, HtmlStripCharFilter, KeywordTokenizer, LetterTokenizer, LowercaseFilter,
16 MappingCharFilter, NGramTokenFilter, NGramTokenizer, PathHierarchyTokenizer,
17 PatternReplaceCharFilter, PatternTokenizer, ShingleFilter, StandardTokenizer, StemmerFilter,
18 StopFilter, SynonymFilter, TokenFilter, Tokenizer, WhitespaceTokenizer,
19};
20
21#[derive(Clone, Debug, Default)]
25pub struct AnalysisConfig {
26 pub char_filters: HashMap<String, Value>,
27 pub tokenizers: HashMap<String, Value>,
28 pub filters: HashMap<String, Value>,
29 pub analyzers: HashMap<String, Value>,
30}
31
32impl AnalysisConfig {
33 pub fn from_json(analysis: &Value) -> Result<Self, String> {
35 let obj = analysis.as_object().ok_or("analysis must be an object")?;
36
37 let parse_section = |key: &str| -> HashMap<String, Value> {
38 obj.get(key)
39 .and_then(|v| v.as_object())
40 .map(|m| m.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
41 .unwrap_or_default()
42 };
43
44 Ok(Self {
45 char_filters: parse_section("char_filter"),
46 tokenizers: parse_section("tokenizer"),
47 filters: parse_section("filter"),
48 analyzers: parse_section("analyzer"),
49 })
50 }
51
52 pub fn to_json(&self) -> Value {
54 let mut obj = serde_json::Map::new();
55 if !self.char_filters.is_empty() {
56 obj.insert(
57 "char_filter".to_string(),
58 Value::Object(
59 self.char_filters
60 .iter()
61 .map(|(k, v)| (k.clone(), v.clone()))
62 .collect(),
63 ),
64 );
65 }
66 if !self.tokenizers.is_empty() {
67 obj.insert(
68 "tokenizer".to_string(),
69 Value::Object(
70 self.tokenizers
71 .iter()
72 .map(|(k, v)| (k.clone(), v.clone()))
73 .collect(),
74 ),
75 );
76 }
77 if !self.filters.is_empty() {
78 obj.insert(
79 "filter".to_string(),
80 Value::Object(
81 self.filters
82 .iter()
83 .map(|(k, v)| (k.clone(), v.clone()))
84 .collect(),
85 ),
86 );
87 }
88 if !self.analyzers.is_empty() {
89 obj.insert(
90 "analyzer".to_string(),
91 Value::Object(
92 self.analyzers
93 .iter()
94 .map(|(k, v)| (k.clone(), v.clone()))
95 .collect(),
96 ),
97 );
98 }
99 Value::Object(obj)
100 }
101
102 pub fn build_registry(&self) -> Result<AnalyzerRegistry, String> {
107 let mut registry = AnalyzerRegistry::new();
108
109 for (name, config) in &self.analyzers {
110 let analyzer = self.build_analyzer(name, config)?;
111 registry.register(analyzer);
112 }
113
114 Ok(registry)
115 }
116
117 fn build_analyzer(&self, name: &str, config: &Value) -> Result<Analyzer, String> {
118 let obj = config
119 .as_object()
120 .ok_or_else(|| format!("analyzer '{name}' must be an object"))?;
121
122 let tokenizer_name = obj
124 .get("tokenizer")
125 .and_then(|v| v.as_str())
126 .ok_or_else(|| format!("analyzer '{name}' requires a 'tokenizer' field"))?;
127
128 let tokenizer = self.build_tokenizer(tokenizer_name)?;
129
130 let char_filter_names: Vec<&str> = obj
132 .get("char_filter")
133 .and_then(|v| v.as_array())
134 .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
135 .unwrap_or_default();
136
137 let mut char_filters: Vec<Box<dyn CharFilter>> = Vec::new();
138 for cf_name in &char_filter_names {
139 char_filters.push(self.build_char_filter(cf_name)?);
140 }
141
142 let filter_names: Vec<&str> = obj
144 .get("filter")
145 .and_then(|v| v.as_array())
146 .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
147 .unwrap_or_default();
148
149 let mut filters: Vec<Box<dyn TokenFilter>> = Vec::new();
150 for f_name in &filter_names {
151 filters.push(self.build_token_filter(f_name)?);
152 }
153
154 Ok(Analyzer::from_boxed(name, char_filters, tokenizer, filters))
155 }
156
157 fn build_tokenizer(&self, name: &str) -> Result<Box<dyn Tokenizer>, String> {
158 if let Some(config) = self.tokenizers.get(name) {
160 return self.build_custom_tokenizer(name, config);
161 }
162
163 match name {
165 "standard" => Ok(Box::new(StandardTokenizer)),
166 "whitespace" => Ok(Box::new(WhitespaceTokenizer)),
167 "letter" => Ok(Box::new(LetterTokenizer)),
168 "keyword" => Ok(Box::new(KeywordTokenizer)),
169 _ => Err(format!("unknown tokenizer: '{name}'")),
170 }
171 }
172
173 fn build_custom_tokenizer(
174 &self,
175 name: &str,
176 config: &Value,
177 ) -> Result<Box<dyn Tokenizer>, String> {
178 let obj = config
179 .as_object()
180 .ok_or_else(|| format!("tokenizer '{name}' must be an object"))?;
181 let typ = obj.get("type").and_then(|v| v.as_str()).unwrap_or(name);
182
183 match typ {
184 "standard" => Ok(Box::new(StandardTokenizer)),
185 "whitespace" => Ok(Box::new(WhitespaceTokenizer)),
186 "letter" => Ok(Box::new(LetterTokenizer)),
187 "keyword" => Ok(Box::new(KeywordTokenizer)),
188 "ngram" => {
189 let min_gram = obj.get("min_gram").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
190 let max_gram = obj.get("max_gram").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
191 if min_gram > max_gram {
192 return Err(format!(
193 "tokenizer '{name}': min_gram ({min_gram}) > max_gram ({max_gram})"
194 ));
195 }
196 let token_chars = parse_token_chars(obj)?;
197 Ok(Box::new(NGramTokenizer::new(
198 min_gram,
199 max_gram,
200 token_chars,
201 )))
202 }
203 "edge_ngram" => {
204 let min_gram = obj.get("min_gram").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
205 let max_gram = obj.get("max_gram").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
206 if min_gram > max_gram {
207 return Err(format!(
208 "tokenizer '{name}': min_gram ({min_gram}) > max_gram ({max_gram})"
209 ));
210 }
211 let token_chars = parse_token_chars(obj)?;
212 Ok(Box::new(EdgeNGramTokenizer::new(
213 min_gram,
214 max_gram,
215 token_chars,
216 )))
217 }
218 "pattern" => {
219 let pattern = obj
220 .get("pattern")
221 .and_then(|v| v.as_str())
222 .unwrap_or(r"\W+");
223 let tok = PatternTokenizer::new(pattern)
224 .map_err(|e| format!("tokenizer '{name}': invalid pattern: {e}"))?;
225 Ok(Box::new(tok))
226 }
227 "path_hierarchy" => {
228 let separator = obj
229 .get("separator")
230 .and_then(|v| v.as_str())
231 .and_then(|s| s.chars().next())
232 .unwrap_or('/');
233 let replacement = obj
234 .get("replacement")
235 .and_then(|v| v.as_str())
236 .and_then(|s| s.chars().next());
237 Ok(Box::new(PathHierarchyTokenizer::new(
238 separator,
239 replacement,
240 )))
241 }
242 _ => Err(format!("unknown tokenizer type: '{typ}'")),
243 }
244 }
245
246 fn build_char_filter(&self, name: &str) -> Result<Box<dyn CharFilter>, String> {
247 if let Some(config) = self.char_filters.get(name) {
249 return self.build_custom_char_filter(name, config);
250 }
251
252 match name {
254 "html_strip" => Ok(Box::new(HtmlStripCharFilter)),
255 _ => Err(format!("unknown char_filter: '{name}'")),
256 }
257 }
258
259 fn build_custom_char_filter(
260 &self,
261 name: &str,
262 config: &Value,
263 ) -> Result<Box<dyn CharFilter>, String> {
264 let obj = config
265 .as_object()
266 .ok_or_else(|| format!("char_filter '{name}' must be an object"))?;
267 let typ = obj.get("type").and_then(|v| v.as_str()).unwrap_or(name);
268
269 match typ {
270 "html_strip" => Ok(Box::new(HtmlStripCharFilter)),
271 "mapping" => {
272 let mappings = obj
273 .get("mappings")
274 .and_then(|v| v.as_array())
275 .map(|arr| {
276 arr.iter()
277 .filter_map(|v| v.as_str())
278 .filter_map(|s| {
279 let parts: Vec<&str> = s.splitn(2, "=>").collect();
280 if parts.len() == 2 {
281 Some((parts[0].trim().to_string(), parts[1].trim().to_string()))
282 } else {
283 None
284 }
285 })
286 .collect()
287 })
288 .unwrap_or_default();
289 Ok(Box::new(MappingCharFilter::new(mappings)))
290 }
291 "pattern_replace" => {
292 let pattern = obj
293 .get("pattern")
294 .and_then(|v| v.as_str())
295 .ok_or_else(|| format!("char_filter '{name}': 'pattern' is required"))?;
296 let replacement = obj
297 .get("replacement")
298 .and_then(|v| v.as_str())
299 .unwrap_or("");
300 let cf = PatternReplaceCharFilter::new(pattern, replacement)
301 .map_err(|e| format!("char_filter '{name}': invalid pattern: {e}"))?;
302 Ok(Box::new(cf))
303 }
304 _ => Err(format!("unknown char_filter type: '{typ}'")),
305 }
306 }
307
308 fn build_token_filter(&self, name: &str) -> Result<Box<dyn TokenFilter>, String> {
309 if let Some(config) = self.filters.get(name) {
311 return self.build_custom_token_filter(name, config);
312 }
313
314 match name {
316 "lowercase" => Ok(Box::new(LowercaseFilter)),
317 "stop" => Ok(Box::new(StopFilter::english())),
318 "asciifolding" => Ok(Box::new(AsciiFoldingFilter::new(false))),
319 _ => Err(format!("unknown filter: '{name}'")),
320 }
321 }
322
323 fn build_custom_token_filter(
324 &self,
325 name: &str,
326 config: &Value,
327 ) -> Result<Box<dyn TokenFilter>, String> {
328 let obj = config
329 .as_object()
330 .ok_or_else(|| format!("filter '{name}' must be an object"))?;
331 let typ = obj.get("type").and_then(|v| v.as_str()).unwrap_or(name);
332
333 match typ {
334 "lowercase" => Ok(Box::new(LowercaseFilter)),
335 "stop" => {
336 let stopwords = obj
337 .get("stopwords")
338 .and_then(|v| v.as_array())
339 .map(|arr| {
340 arr.iter()
341 .filter_map(|v| v.as_str().map(String::from))
342 .collect::<Vec<_>>()
343 })
344 .map(|words| StopFilter::new(words))
345 .unwrap_or_else(StopFilter::english);
346 Ok(Box::new(stopwords))
347 }
348 "stemmer" => {
349 let lang = obj
350 .get("language")
351 .and_then(|v| v.as_str())
352 .unwrap_or("english");
353 let algorithm = parse_stemmer_language(lang)?;
354 Ok(Box::new(StemmerFilter::new(algorithm)))
355 }
356 "asciifolding" => {
357 let preserve = obj
358 .get("preserve_original")
359 .and_then(|v| v.as_bool())
360 .unwrap_or(false);
361 Ok(Box::new(AsciiFoldingFilter::new(preserve)))
362 }
363 "ngram" => {
364 let min_gram = obj.get("min_gram").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
365 let max_gram = obj.get("max_gram").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
366 Ok(Box::new(NGramTokenFilter::new(min_gram, max_gram)))
367 }
368 "edge_ngram" => {
369 let min_gram = obj.get("min_gram").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
370 let max_gram = obj.get("max_gram").and_then(|v| v.as_u64()).unwrap_or(2) as usize;
371 let preserve = obj
372 .get("preserve_original")
373 .and_then(|v| v.as_bool())
374 .unwrap_or(false);
375 Ok(Box::new(EdgeNGramTokenFilter::new(
376 min_gram, max_gram, preserve,
377 )))
378 }
379 "synonym" => {
380 let expand = obj.get("expand").and_then(|v| v.as_bool()).unwrap_or(true);
381 let synonyms: Vec<String> = obj
382 .get("synonyms")
383 .and_then(|v| v.as_array())
384 .map(|arr| {
385 arr.iter()
386 .filter_map(|v| v.as_str().map(String::from))
387 .collect()
388 })
389 .unwrap_or_default();
390 Ok(Box::new(SynonymFilter::new(&synonyms, expand)))
391 }
392 "shingle" => {
393 let min_size = obj
394 .get("min_shingle_size")
395 .and_then(|v| v.as_u64())
396 .unwrap_or(2) as usize;
397 let max_size = obj
398 .get("max_shingle_size")
399 .and_then(|v| v.as_u64())
400 .unwrap_or(2) as usize;
401 let output_unigrams = obj
402 .get("output_unigrams")
403 .and_then(|v| v.as_bool())
404 .unwrap_or(true);
405 Ok(Box::new(ShingleFilter::new(
406 min_size,
407 max_size,
408 output_unigrams,
409 )))
410 }
411 _ => Err(format!("unknown filter type: '{typ}'")),
412 }
413 }
414}
415
416fn parse_token_chars(obj: &serde_json::Map<String, Value>) -> Result<Vec<TokenChar>, String> {
417 obj.get("token_chars")
418 .and_then(|v| v.as_array())
419 .map(|arr| {
420 arr.iter()
421 .filter_map(|v| v.as_str())
422 .map(|s| {
423 TokenChar::from_str(s)
424 .ok_or_else(|| format!("unknown token_chars value: '{s}'"))
425 })
426 .collect::<Result<Vec<_>, _>>()
427 })
428 .unwrap_or(Ok(Vec::new()))
429}
430
431fn parse_stemmer_language(lang: &str) -> Result<rust_stemmers::Algorithm, String> {
432 match lang.to_lowercase().as_str() {
433 "english" => Ok(rust_stemmers::Algorithm::English),
434 "french" => Ok(rust_stemmers::Algorithm::French),
435 "german" => Ok(rust_stemmers::Algorithm::German),
436 "spanish" => Ok(rust_stemmers::Algorithm::Spanish),
437 "italian" => Ok(rust_stemmers::Algorithm::Italian),
438 "portuguese" => Ok(rust_stemmers::Algorithm::Portuguese),
439 "dutch" => Ok(rust_stemmers::Algorithm::Dutch),
440 "swedish" => Ok(rust_stemmers::Algorithm::Swedish),
441 "norwegian" => Ok(rust_stemmers::Algorithm::Norwegian),
442 "danish" => Ok(rust_stemmers::Algorithm::Danish),
443 "finnish" => Ok(rust_stemmers::Algorithm::Finnish),
444 "hungarian" => Ok(rust_stemmers::Algorithm::Hungarian),
445 "romanian" => Ok(rust_stemmers::Algorithm::Romanian),
446 "russian" => Ok(rust_stemmers::Algorithm::Russian),
447 "turkish" => Ok(rust_stemmers::Algorithm::Turkish),
448 "arabic" => Ok(rust_stemmers::Algorithm::Arabic),
449 "greek" => Ok(rust_stemmers::Algorithm::Greek),
450 _ => Err(format!("unsupported stemmer language: '{lang}'")),
451 }
452}
453
454#[cfg(test)]
455mod tests {
456 use super::*;
457 use serde_json::json;
458
459 #[test]
460 fn empty_config() {
461 let config = AnalysisConfig::default();
462 let registry = config.build_registry().unwrap();
463 assert!(registry.try_get("standard").is_some());
465 }
466
467 #[test]
468 fn custom_analyzer_basic() {
469 let analysis = json!({
470 "analyzer": {
471 "my_analyzer": {
472 "type": "custom",
473 "tokenizer": "standard",
474 "filter": ["lowercase"]
475 }
476 }
477 });
478 let config = AnalysisConfig::from_json(&analysis).unwrap();
479 let registry = config.build_registry().unwrap();
480 let analyzer = registry.try_get("my_analyzer").unwrap();
481 let tokens = analyzer.analyze("Hello World");
482 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
483 assert_eq!(texts, vec!["hello", "world"]);
484 }
485
486 #[test]
487 fn custom_edge_ngram_analyzer() {
488 let analysis = json!({
489 "tokenizer": {
490 "my_tok": {
491 "type": "edge_ngram",
492 "min_gram": 2,
493 "max_gram": 5,
494 "token_chars": ["letter"]
495 }
496 },
497 "analyzer": {
498 "autocomplete": {
499 "tokenizer": "my_tok",
500 "filter": ["lowercase"]
501 }
502 }
503 });
504 let config = AnalysisConfig::from_json(&analysis).unwrap();
505 let registry = config.build_registry().unwrap();
506 let analyzer = registry.try_get("autocomplete").unwrap();
507 let tokens = analyzer.analyze("Quick");
508 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
509 assert_eq!(texts, vec!["qu", "qui", "quic", "quick"]);
510 }
511
512 #[test]
513 fn custom_char_filter_html() {
514 let analysis = json!({
515 "analyzer": {
516 "html_analyzer": {
517 "char_filter": ["html_strip"],
518 "tokenizer": "standard",
519 "filter": ["lowercase"]
520 }
521 }
522 });
523 let config = AnalysisConfig::from_json(&analysis).unwrap();
524 let registry = config.build_registry().unwrap();
525 let analyzer = registry.try_get("html_analyzer").unwrap();
526 let tokens = analyzer.analyze("<p>Hello <b>World</b></p>");
527 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
528 assert_eq!(texts, vec!["hello", "world"]);
529 }
530
531 #[test]
532 fn custom_synonym_filter() {
533 let analysis = json!({
534 "filter": {
535 "my_synonyms": {
536 "type": "synonym",
537 "synonyms": ["quick, fast, speedy"]
538 }
539 },
540 "analyzer": {
541 "syn_analyzer": {
542 "tokenizer": "standard",
543 "filter": ["lowercase", "my_synonyms"]
544 }
545 }
546 });
547 let config = AnalysisConfig::from_json(&analysis).unwrap();
548 let registry = config.build_registry().unwrap();
549 let analyzer = registry.try_get("syn_analyzer").unwrap();
550 let tokens = analyzer.analyze("Quick fox");
551 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
552 assert!(texts.contains(&"quick"));
553 assert!(texts.contains(&"fast"));
554 assert!(texts.contains(&"fox"));
555 }
556
557 #[test]
558 fn custom_asciifolding() {
559 let analysis = json!({
560 "analyzer": {
561 "folding": {
562 "tokenizer": "standard",
563 "filter": ["lowercase", "asciifolding"]
564 }
565 }
566 });
567 let config = AnalysisConfig::from_json(&analysis).unwrap();
568 let registry = config.build_registry().unwrap();
569 let analyzer = registry.try_get("folding").unwrap();
570 let tokens = analyzer.analyze("Café résumé");
571 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
572 assert_eq!(texts, vec!["cafe", "resume"]);
573 }
574
575 #[test]
576 fn missing_tokenizer_error() {
577 let analysis = json!({
578 "analyzer": {
579 "bad": {
580 "tokenizer": "nonexistent",
581 "filter": ["lowercase"]
582 }
583 }
584 });
585 let config = AnalysisConfig::from_json(&analysis).unwrap();
586 let err = config.build_registry().unwrap_err();
587 assert!(err.contains("unknown tokenizer"), "got: {err}");
588 }
589
590 #[test]
591 fn missing_filter_error() {
592 let analysis = json!({
593 "analyzer": {
594 "bad": {
595 "tokenizer": "standard",
596 "filter": ["nonexistent"]
597 }
598 }
599 });
600 let config = AnalysisConfig::from_json(&analysis).unwrap();
601 let err = config.build_registry().unwrap_err();
602 assert!(err.contains("unknown filter"), "got: {err}");
603 }
604
605 #[test]
606 fn missing_tokenizer_field_error() {
607 let analysis = json!({
608 "analyzer": {
609 "bad": {
610 "filter": ["lowercase"]
611 }
612 }
613 });
614 let config = AnalysisConfig::from_json(&analysis).unwrap();
615 let err = config.build_registry().unwrap_err();
616 assert!(err.contains("requires a 'tokenizer'"), "got: {err}");
617 }
618
619 #[test]
620 fn invalid_ngram_params() {
621 let analysis = json!({
622 "tokenizer": {
623 "bad_tok": {
624 "type": "ngram",
625 "min_gram": 5,
626 "max_gram": 2
627 }
628 },
629 "analyzer": {
630 "bad": {
631 "tokenizer": "bad_tok"
632 }
633 }
634 });
635 let config = AnalysisConfig::from_json(&analysis).unwrap();
636 let err = config.build_registry().unwrap_err();
637 assert!(err.contains("min_gram"), "got: {err}");
638 }
639
640 #[test]
641 fn round_trip_serialization() {
642 let analysis = json!({
643 "tokenizer": {
644 "my_tok": {
645 "type": "edge_ngram",
646 "min_gram": 2,
647 "max_gram": 10
648 }
649 },
650 "analyzer": {
651 "my_analyzer": {
652 "tokenizer": "my_tok",
653 "filter": ["lowercase"]
654 }
655 }
656 });
657 let config = AnalysisConfig::from_json(&analysis).unwrap();
658 let json = config.to_json();
659
660 let config2 = AnalysisConfig::from_json(&json).unwrap();
662 let registry = config2.build_registry().unwrap();
663 assert!(registry.try_get("my_analyzer").is_some());
664 }
665
666 #[test]
667 fn custom_mapping_char_filter() {
668 let analysis = json!({
669 "char_filter": {
670 "emoticons": {
671 "type": "mapping",
672 "mappings": [":) => happy", ":( => sad"]
673 }
674 },
675 "analyzer": {
676 "emo": {
677 "char_filter": ["emoticons"],
678 "tokenizer": "standard",
679 "filter": ["lowercase"]
680 }
681 }
682 });
683 let config = AnalysisConfig::from_json(&analysis).unwrap();
684 let registry = config.build_registry().unwrap();
685 let analyzer = registry.try_get("emo").unwrap();
686 let tokens = analyzer.analyze("I am :) today");
687 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
688 assert!(texts.contains(&"happy"));
689 }
690
691 #[test]
692 fn custom_stop_words() {
693 let analysis = json!({
694 "filter": {
695 "my_stop": {
696 "type": "stop",
697 "stopwords": ["hello", "world"]
698 }
699 },
700 "analyzer": {
701 "custom_stop": {
702 "tokenizer": "standard",
703 "filter": ["lowercase", "my_stop"]
704 }
705 }
706 });
707 let config = AnalysisConfig::from_json(&analysis).unwrap();
708 let registry = config.build_registry().unwrap();
709 let analyzer = registry.try_get("custom_stop").unwrap();
710 let tokens = analyzer.analyze("Hello World Foo");
711 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
712 assert_eq!(texts, vec!["foo"]);
713 }
714
715 #[test]
716 fn pattern_tokenizer_config() {
717 let analysis = json!({
718 "tokenizer": {
719 "comma_tok": {
720 "type": "pattern",
721 "pattern": ","
722 }
723 },
724 "analyzer": {
725 "csv": {
726 "tokenizer": "comma_tok"
727 }
728 }
729 });
730 let config = AnalysisConfig::from_json(&analysis).unwrap();
731 let registry = config.build_registry().unwrap();
732 let analyzer = registry.try_get("csv").unwrap();
733 let tokens = analyzer.analyze("a,b,c");
734 let texts: Vec<&str> = tokens.iter().map(|t| t.text.as_str()).collect();
735 assert_eq!(texts, vec!["a", "b", "c"]);
736 }
737}