1use std::collections::BTreeMap;
4use std::collections::HashSet;
5
6use serde_json::{Number, Value};
7
8use crate::functions::{Function, number_value};
9use crate::interpreter::SearchResult;
10use crate::registry::register_if_enabled;
11use crate::{Context, Runtime, arg, defn};
12
13const WORDS_PER_MINUTE: f64 = 200.0;
15
16defn!(WordCountFn, vec![arg!(string)], None);
21
22impl Function for WordCountFn {
23 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
24 self.signature.validate(args, ctx)?;
25 let s = args[0].as_str().unwrap();
26 let count = s.split_whitespace().count();
27 Ok(Value::Number(Number::from(count)))
28 }
29}
30
31defn!(CharCountFn, vec![arg!(string)], None);
36
37impl Function for CharCountFn {
38 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
39 self.signature.validate(args, ctx)?;
40 let s = args[0].as_str().unwrap();
41 let count = s.chars().filter(|c| !c.is_whitespace()).count();
42 Ok(Value::Number(Number::from(count)))
43 }
44}
45
46defn!(SentenceCountFn, vec![arg!(string)], None);
51
52impl Function for SentenceCountFn {
53 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
54 self.signature.validate(args, ctx)?;
55 let s = args[0].as_str().unwrap();
56
57 if s.trim().is_empty() {
58 return Ok(Value::Number(Number::from(0)));
59 }
60
61 let count = s
63 .chars()
64 .filter(|c| *c == '.' || *c == '!' || *c == '?')
65 .count();
66
67 let count = if count == 0 && !s.trim().is_empty() {
69 1
70 } else {
71 count
72 };
73
74 Ok(Value::Number(Number::from(count)))
75 }
76}
77
78defn!(ParagraphCountFn, vec![arg!(string)], None);
83
84impl Function for ParagraphCountFn {
85 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
86 self.signature.validate(args, ctx)?;
87 let s = args[0].as_str().unwrap();
88
89 let count = s.split("\n\n").filter(|p| !p.trim().is_empty()).count();
91
92 Ok(Value::Number(Number::from(count)))
93 }
94}
95
96defn!(ReadingTimeFn, vec![arg!(string)], None);
101
102impl Function for ReadingTimeFn {
103 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
104 self.signature.validate(args, ctx)?;
105 let s = args[0].as_str().unwrap();
106 let word_count = s.split_whitespace().count() as f64;
107 let minutes = (word_count / WORDS_PER_MINUTE).ceil();
108 Ok(number_value(minutes))
109 }
110}
111
112defn!(ReadingTimeSecondsFn, vec![arg!(string)], None);
117
118impl Function for ReadingTimeSecondsFn {
119 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
120 self.signature.validate(args, ctx)?;
121 let s = args[0].as_str().unwrap();
122 let word_count = s.split_whitespace().count() as f64;
123 let seconds = (word_count / WORDS_PER_MINUTE) * 60.0;
124 Ok(number_value(seconds.ceil()))
125 }
126}
127
128defn!(CharFrequenciesFn, vec![arg!(string)], None);
133
134impl Function for CharFrequenciesFn {
135 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
136 self.signature.validate(args, ctx)?;
137 let s = args[0].as_str().unwrap();
138
139 let mut freq: BTreeMap<char, usize> = BTreeMap::new();
140 for c in s.chars() {
141 if !c.is_whitespace() {
142 *freq.entry(c).or_insert(0) += 1;
143 }
144 }
145
146 let obj: serde_json::Map<String, Value> = freq
147 .into_iter()
148 .map(|(k, v)| (k.to_string(), Value::Number(Number::from(v))))
149 .collect();
150
151 Ok(Value::Object(obj))
152 }
153}
154
155defn!(WordFrequenciesFn, vec![arg!(string)], None);
160
161impl Function for WordFrequenciesFn {
162 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
163 self.signature.validate(args, ctx)?;
164 let s = args[0].as_str().unwrap();
165
166 let mut freq: BTreeMap<String, usize> = BTreeMap::new();
167 for word in s.split_whitespace() {
168 let normalized: String = word
170 .chars()
171 .filter(|c| c.is_alphanumeric())
172 .collect::<String>()
173 .to_lowercase();
174
175 if !normalized.is_empty() {
176 *freq.entry(normalized).or_insert(0) += 1;
177 }
178 }
179
180 let obj: serde_json::Map<String, Value> = freq
181 .into_iter()
182 .map(|(k, v)| (k, Value::Number(Number::from(v))))
183 .collect();
184
185 Ok(Value::Object(obj))
186 }
187}
188
189defn!(
195 NgramsFn,
196 vec![arg!(string), arg!(number)],
197 Some(arg!(string))
198);
199
200impl Function for NgramsFn {
201 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
202 self.signature.validate(args, ctx)?;
203 let s = args[0].as_str().unwrap();
204 let n = args[1].as_f64().unwrap() as usize;
205
206 let ngram_type = if args.len() > 2 {
208 args[2].as_str().unwrap_or("word")
209 } else {
210 "word"
211 };
212
213 if n == 0 {
214 return Ok(Value::Array(vec![]));
215 }
216
217 let result = match ngram_type {
218 "char" => {
219 let chars: Vec<char> = s.chars().collect();
221 if chars.len() < n {
222 vec![]
223 } else {
224 chars
225 .windows(n)
226 .map(|w| Value::String(w.iter().collect()))
227 .collect()
228 }
229 }
230 _ => {
231 let words: Vec<&str> = s.split_whitespace().collect();
233 if words.len() < n {
234 vec![]
235 } else {
236 words
237 .windows(n)
238 .map(|w| {
239 let arr: Vec<Value> = w
240 .iter()
241 .map(|word| Value::String(word.to_string()))
242 .collect();
243 Value::Array(arr)
244 })
245 .collect()
246 }
247 }
248 };
249
250 Ok(Value::Array(result))
251 }
252}
253
254defn!(BigramsFn, vec![arg!(string)], None);
260
261impl Function for BigramsFn {
262 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
263 self.signature.validate(args, ctx)?;
264 let s = args[0].as_str().unwrap();
265
266 let words: Vec<&str> = s.split_whitespace().collect();
267 if words.len() < 2 {
268 return Ok(Value::Array(vec![]));
269 }
270
271 let result: Vec<Value> = words
272 .windows(2)
273 .map(|w| {
274 let arr: Vec<Value> = w
275 .iter()
276 .map(|word| Value::String(word.to_string()))
277 .collect();
278 Value::Array(arr)
279 })
280 .collect();
281
282 Ok(Value::Array(result))
283 }
284}
285
286defn!(TrigramsFn, vec![arg!(string)], None);
292
293impl Function for TrigramsFn {
294 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
295 self.signature.validate(args, ctx)?;
296 let s = args[0].as_str().unwrap();
297
298 let words: Vec<&str> = s.split_whitespace().collect();
299 if words.len() < 3 {
300 return Ok(Value::Array(vec![]));
301 }
302
303 let result: Vec<Value> = words
304 .windows(3)
305 .map(|w| {
306 let arr: Vec<Value> = w
307 .iter()
308 .map(|word| Value::String(word.to_string()))
309 .collect();
310 Value::Array(arr)
311 })
312 .collect();
313
314 Ok(Value::Array(result))
315 }
316}
317
318defn!(TokensFn, vec![arg!(string)], None);
324
325impl Function for TokensFn {
326 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
327 self.signature.validate(args, ctx)?;
328 let s = args[0].as_str().unwrap();
329
330 let tokens: Vec<Value> = s
331 .split_whitespace()
332 .filter_map(|word| {
333 let normalized: String = word
334 .chars()
335 .filter(|c| c.is_alphanumeric())
336 .collect::<String>()
337 .to_lowercase();
338
339 if normalized.is_empty() {
340 None
341 } else {
342 Some(Value::String(normalized))
343 }
344 })
345 .collect();
346
347 Ok(Value::Array(tokens))
348 }
349}
350
351defn!(TokenizeFn, vec![arg!(string)], Some(arg!(object)));
359
360impl Function for TokenizeFn {
361 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
362 self.signature.validate(args, ctx)?;
363 let s = args[0].as_str().unwrap();
364
365 let (case_mode, strip_punctuation) = if args.len() > 1 {
367 if let Some(opts) = args[1].as_object() {
368 let case_mode = opts.get("case").and_then(|v| v.as_str()).unwrap_or("lower");
369
370 let punctuation = opts
371 .get("punctuation")
372 .and_then(|v| v.as_str())
373 .unwrap_or("strip");
374
375 (case_mode.to_string(), punctuation != "keep")
376 } else {
377 ("lower".to_string(), true)
378 }
379 } else {
380 ("lower".to_string(), true)
381 };
382
383 let tokens: Vec<Value> = s
384 .split_whitespace()
385 .filter_map(|word| {
386 let processed: String = if strip_punctuation {
387 word.chars().filter(|c| c.is_alphanumeric()).collect()
388 } else {
389 word.to_string()
390 };
391
392 if processed.is_empty() {
393 return None;
394 }
395
396 let final_token = match case_mode.as_str() {
397 "upper" => processed.to_uppercase(),
398 "preserve" => processed,
399 _ => processed.to_lowercase(), };
401
402 Some(Value::String(final_token))
403 })
404 .collect();
405
406 Ok(Value::Array(tokens))
407 }
408}
409
410defn!(StemFn, vec![arg!(string)], Some(arg!(string)));
416
417impl Function for StemFn {
418 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
419 use rust_stemmers::{Algorithm, Stemmer};
420
421 self.signature.validate(args, ctx)?;
422 let word = args[0].as_str().unwrap();
423
424 let lang = if args.len() > 1 {
425 args[1].as_str().map(|s| s.to_string())
426 } else {
427 None
428 };
429
430 let algorithm = match lang.as_deref() {
431 Some("ar" | "arabic") => Algorithm::Arabic,
432 Some("da" | "danish") => Algorithm::Danish,
433 Some("nl" | "dutch") => Algorithm::Dutch,
434 Some("fi" | "finnish") => Algorithm::Finnish,
435 Some("fr" | "french") => Algorithm::French,
436 Some("de" | "german") => Algorithm::German,
437 Some("el" | "greek") => Algorithm::Greek,
438 Some("hu" | "hungarian") => Algorithm::Hungarian,
439 Some("it" | "italian") => Algorithm::Italian,
440 Some("no" | "norwegian") => Algorithm::Norwegian,
441 Some("pt" | "portuguese") => Algorithm::Portuguese,
442 Some("ro" | "romanian") => Algorithm::Romanian,
443 Some("ru" | "russian") => Algorithm::Russian,
444 Some("es" | "spanish") => Algorithm::Spanish,
445 Some("sv" | "swedish") => Algorithm::Swedish,
446 Some("ta" | "tamil") => Algorithm::Tamil,
447 Some("tr" | "turkish") => Algorithm::Turkish,
448 _ => Algorithm::English, };
450
451 let stemmer = Stemmer::create(algorithm);
452 let stemmed = stemmer.stem(word).to_string();
453
454 Ok(Value::String(stemmed))
455 }
456}
457
458defn!(StemsFn, vec![arg!(array)], Some(arg!(string)));
464
465impl Function for StemsFn {
466 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
467 use rust_stemmers::{Algorithm, Stemmer};
468
469 self.signature.validate(args, ctx)?;
470 let tokens = args[0].as_array().unwrap();
471
472 let lang = if args.len() > 1 {
473 args[1].as_str().map(|s| s.to_string())
474 } else {
475 None
476 };
477
478 let algorithm = match lang.as_deref() {
479 Some("ar" | "arabic") => Algorithm::Arabic,
480 Some("da" | "danish") => Algorithm::Danish,
481 Some("nl" | "dutch") => Algorithm::Dutch,
482 Some("fi" | "finnish") => Algorithm::Finnish,
483 Some("fr" | "french") => Algorithm::French,
484 Some("de" | "german") => Algorithm::German,
485 Some("el" | "greek") => Algorithm::Greek,
486 Some("hu" | "hungarian") => Algorithm::Hungarian,
487 Some("it" | "italian") => Algorithm::Italian,
488 Some("no" | "norwegian") => Algorithm::Norwegian,
489 Some("pt" | "portuguese") => Algorithm::Portuguese,
490 Some("ro" | "romanian") => Algorithm::Romanian,
491 Some("ru" | "russian") => Algorithm::Russian,
492 Some("es" | "spanish") => Algorithm::Spanish,
493 Some("sv" | "swedish") => Algorithm::Swedish,
494 Some("ta" | "tamil") => Algorithm::Tamil,
495 Some("tr" | "turkish") => Algorithm::Turkish,
496 _ => Algorithm::English,
497 };
498
499 let stemmer = Stemmer::create(algorithm);
500
501 let result: Vec<Value> = tokens
502 .iter()
503 .filter_map(|t| {
504 t.as_str()
505 .map(|s| Value::String(stemmer.stem(s).to_string()))
506 })
507 .collect();
508
509 Ok(Value::Array(result))
510 }
511}
512
513defn!(StopwordsFn, vec![], Some(arg!(string)));
519
520impl Function for StopwordsFn {
521 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
522 use stop_words::{LANGUAGE, get};
523
524 self.signature.validate(args, ctx)?;
525
526 let lang = if !args.is_empty() {
527 args[0].as_str().map(|s| s.to_string())
528 } else {
529 None
530 };
531
532 let language = match lang.as_deref() {
533 Some("ar" | "arabic") => LANGUAGE::Arabic,
534 Some("bg" | "bulgarian") => LANGUAGE::Bulgarian,
535 Some("ca" | "catalan") => LANGUAGE::Catalan,
536 Some("cs" | "czech") => LANGUAGE::Czech,
537 Some("da" | "danish") => LANGUAGE::Danish,
538 Some("nl" | "dutch") => LANGUAGE::Dutch,
539 Some("fi" | "finnish") => LANGUAGE::Finnish,
540 Some("fr" | "french") => LANGUAGE::French,
541 Some("de" | "german") => LANGUAGE::German,
542 Some("he" | "hebrew") => LANGUAGE::Hebrew,
543 Some("hi" | "hindi") => LANGUAGE::Hindi,
544 Some("hu" | "hungarian") => LANGUAGE::Hungarian,
545 Some("id" | "indonesian") => LANGUAGE::Indonesian,
546 Some("it" | "italian") => LANGUAGE::Italian,
547 Some("ja" | "japanese") => LANGUAGE::Japanese,
548 Some("ko" | "korean") => LANGUAGE::Korean,
549 Some("lv" | "latvian") => LANGUAGE::Latvian,
550 Some("no" | "norwegian") => LANGUAGE::Norwegian,
551 Some("fa" | "persian") => LANGUAGE::Persian,
552 Some("pl" | "polish") => LANGUAGE::Polish,
553 Some("pt" | "portuguese") => LANGUAGE::Portuguese,
554 Some("ro" | "romanian") => LANGUAGE::Romanian,
555 Some("ru" | "russian") => LANGUAGE::Russian,
556 Some("sk" | "slovak") => LANGUAGE::Slovak,
557 Some("es" | "spanish") => LANGUAGE::Spanish,
558 Some("sv" | "swedish") => LANGUAGE::Swedish,
559 Some("th" | "thai") => LANGUAGE::Thai,
560 Some("tr" | "turkish") => LANGUAGE::Turkish,
561 Some("uk" | "ukrainian") => LANGUAGE::Ukrainian,
562 Some("vi" | "vietnamese") => LANGUAGE::Vietnamese,
563 Some("zh" | "chinese") => LANGUAGE::Chinese,
564 _ => LANGUAGE::English,
565 };
566
567 let words = get(language);
568 let result: Vec<Value> = words.iter().map(|w| Value::String(w.to_string())).collect();
569
570 Ok(Value::Array(result))
571 }
572}
573
574defn!(RemoveStopwordsFn, vec![arg!(array)], Some(arg!(string)));
580
581impl Function for RemoveStopwordsFn {
582 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
583 use stop_words::{LANGUAGE, get};
584
585 self.signature.validate(args, ctx)?;
586 let tokens = args[0].as_array().unwrap();
587
588 let lang = if args.len() > 1 {
589 args[1].as_str().map(|s| s.to_string())
590 } else {
591 None
592 };
593
594 let language = match lang.as_deref() {
595 Some("ar" | "arabic") => LANGUAGE::Arabic,
596 Some("bg" | "bulgarian") => LANGUAGE::Bulgarian,
597 Some("ca" | "catalan") => LANGUAGE::Catalan,
598 Some("cs" | "czech") => LANGUAGE::Czech,
599 Some("da" | "danish") => LANGUAGE::Danish,
600 Some("nl" | "dutch") => LANGUAGE::Dutch,
601 Some("fi" | "finnish") => LANGUAGE::Finnish,
602 Some("fr" | "french") => LANGUAGE::French,
603 Some("de" | "german") => LANGUAGE::German,
604 Some("he" | "hebrew") => LANGUAGE::Hebrew,
605 Some("hi" | "hindi") => LANGUAGE::Hindi,
606 Some("hu" | "hungarian") => LANGUAGE::Hungarian,
607 Some("id" | "indonesian") => LANGUAGE::Indonesian,
608 Some("it" | "italian") => LANGUAGE::Italian,
609 Some("ja" | "japanese") => LANGUAGE::Japanese,
610 Some("ko" | "korean") => LANGUAGE::Korean,
611 Some("lv" | "latvian") => LANGUAGE::Latvian,
612 Some("no" | "norwegian") => LANGUAGE::Norwegian,
613 Some("fa" | "persian") => LANGUAGE::Persian,
614 Some("pl" | "polish") => LANGUAGE::Polish,
615 Some("pt" | "portuguese") => LANGUAGE::Portuguese,
616 Some("ro" | "romanian") => LANGUAGE::Romanian,
617 Some("ru" | "russian") => LANGUAGE::Russian,
618 Some("sk" | "slovak") => LANGUAGE::Slovak,
619 Some("es" | "spanish") => LANGUAGE::Spanish,
620 Some("sv" | "swedish") => LANGUAGE::Swedish,
621 Some("th" | "thai") => LANGUAGE::Thai,
622 Some("tr" | "turkish") => LANGUAGE::Turkish,
623 Some("uk" | "ukrainian") => LANGUAGE::Ukrainian,
624 Some("vi" | "vietnamese") => LANGUAGE::Vietnamese,
625 Some("zh" | "chinese") => LANGUAGE::Chinese,
626 _ => LANGUAGE::English,
627 };
628
629 let stopwords = get(language);
630 let stopwords_set: HashSet<String> = stopwords.iter().map(|s| s.to_string()).collect();
631
632 let result: Vec<Value> = tokens
633 .iter()
634 .filter_map(|t| {
635 t.as_str().and_then(|s| {
636 if stopwords_set.contains(&s.to_lowercase()) {
637 None
638 } else {
639 Some(Value::String(s.to_string()))
640 }
641 })
642 })
643 .collect();
644
645 Ok(Value::Array(result))
646 }
647}
648
649defn!(IsStopwordFn, vec![arg!(string)], Some(arg!(string)));
655
656impl Function for IsStopwordFn {
657 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
658 use stop_words::{LANGUAGE, get};
659
660 self.signature.validate(args, ctx)?;
661 let word = args[0].as_str().unwrap();
662
663 let lang = if args.len() > 1 {
664 args[1].as_str().map(|s| s.to_string())
665 } else {
666 None
667 };
668
669 let language = match lang.as_deref() {
670 Some("ar" | "arabic") => LANGUAGE::Arabic,
671 Some("bg" | "bulgarian") => LANGUAGE::Bulgarian,
672 Some("ca" | "catalan") => LANGUAGE::Catalan,
673 Some("cs" | "czech") => LANGUAGE::Czech,
674 Some("da" | "danish") => LANGUAGE::Danish,
675 Some("nl" | "dutch") => LANGUAGE::Dutch,
676 Some("fi" | "finnish") => LANGUAGE::Finnish,
677 Some("fr" | "french") => LANGUAGE::French,
678 Some("de" | "german") => LANGUAGE::German,
679 Some("he" | "hebrew") => LANGUAGE::Hebrew,
680 Some("hi" | "hindi") => LANGUAGE::Hindi,
681 Some("hu" | "hungarian") => LANGUAGE::Hungarian,
682 Some("id" | "indonesian") => LANGUAGE::Indonesian,
683 Some("it" | "italian") => LANGUAGE::Italian,
684 Some("ja" | "japanese") => LANGUAGE::Japanese,
685 Some("ko" | "korean") => LANGUAGE::Korean,
686 Some("lv" | "latvian") => LANGUAGE::Latvian,
687 Some("no" | "norwegian") => LANGUAGE::Norwegian,
688 Some("fa" | "persian") => LANGUAGE::Persian,
689 Some("pl" | "polish") => LANGUAGE::Polish,
690 Some("pt" | "portuguese") => LANGUAGE::Portuguese,
691 Some("ro" | "romanian") => LANGUAGE::Romanian,
692 Some("ru" | "russian") => LANGUAGE::Russian,
693 Some("sk" | "slovak") => LANGUAGE::Slovak,
694 Some("es" | "spanish") => LANGUAGE::Spanish,
695 Some("sv" | "swedish") => LANGUAGE::Swedish,
696 Some("th" | "thai") => LANGUAGE::Thai,
697 Some("tr" | "turkish") => LANGUAGE::Turkish,
698 Some("uk" | "ukrainian") => LANGUAGE::Ukrainian,
699 Some("vi" | "vietnamese") => LANGUAGE::Vietnamese,
700 Some("zh" | "chinese") => LANGUAGE::Chinese,
701 _ => LANGUAGE::English,
702 };
703
704 let stopwords = get(language);
705 let is_stop = stopwords.iter().any(|sw| sw.eq_ignore_ascii_case(word));
706
707 Ok(Value::Bool(is_stop))
708 }
709}
710
711defn!(NormalizeUnicodeFn, vec![arg!(string)], Some(arg!(string)));
717
718impl Function for NormalizeUnicodeFn {
719 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
720 use unicode_normalization::UnicodeNormalization;
721
722 self.signature.validate(args, ctx)?;
723 let s = args[0].as_str().unwrap();
724
725 let form = if args.len() > 1 {
726 args[1].as_str().map(|s| s.to_uppercase())
727 } else {
728 None
729 };
730
731 let normalized = match form.as_deref() {
732 Some("NFD") => s.nfd().collect::<String>(),
733 Some("NFKC") => s.nfkc().collect::<String>(),
734 Some("NFKD") => s.nfkd().collect::<String>(),
735 _ => s.nfc().collect::<String>(), };
737
738 Ok(Value::String(normalized))
739 }
740}
741
742defn!(RemoveAccentsFn, vec![arg!(string)], None);
748
749impl Function for RemoveAccentsFn {
750 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
751 use unicode_normalization::UnicodeNormalization;
752
753 self.signature.validate(args, ctx)?;
754 let s = args[0].as_str().unwrap();
755
756 let result: String = s
758 .nfd()
759 .filter(|c| !unicode_normalization::char::is_combining_mark(*c))
760 .collect();
761
762 Ok(Value::String(result))
763 }
764}
765
766defn!(CollapseWhitespaceFn, vec![arg!(string)], None);
772
773impl Function for CollapseWhitespaceFn {
774 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
775 self.signature.validate(args, ctx)?;
776 let s = args[0].as_str().unwrap();
777
778 let result: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
779
780 Ok(Value::String(result))
781 }
782}
783
784pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
786 register_if_enabled(runtime, "word_count", enabled, Box::new(WordCountFn::new()));
787 register_if_enabled(runtime, "char_count", enabled, Box::new(CharCountFn::new()));
788 register_if_enabled(
789 runtime,
790 "sentence_count",
791 enabled,
792 Box::new(SentenceCountFn::new()),
793 );
794 register_if_enabled(
795 runtime,
796 "paragraph_count",
797 enabled,
798 Box::new(ParagraphCountFn::new()),
799 );
800 register_if_enabled(
801 runtime,
802 "reading_time",
803 enabled,
804 Box::new(ReadingTimeFn::new()),
805 );
806 register_if_enabled(
807 runtime,
808 "reading_time_seconds",
809 enabled,
810 Box::new(ReadingTimeSecondsFn::new()),
811 );
812 register_if_enabled(
813 runtime,
814 "char_frequencies",
815 enabled,
816 Box::new(CharFrequenciesFn::new()),
817 );
818 register_if_enabled(
819 runtime,
820 "word_frequencies",
821 enabled,
822 Box::new(WordFrequenciesFn::new()),
823 );
824 register_if_enabled(runtime, "ngrams", enabled, Box::new(NgramsFn::new()));
825 register_if_enabled(runtime, "bigrams", enabled, Box::new(BigramsFn::new()));
826 register_if_enabled(runtime, "trigrams", enabled, Box::new(TrigramsFn::new()));
827 register_if_enabled(runtime, "tokens", enabled, Box::new(TokensFn::new()));
828 register_if_enabled(runtime, "tokenize", enabled, Box::new(TokenizeFn::new()));
829 register_if_enabled(runtime, "stem", enabled, Box::new(StemFn::new()));
830 register_if_enabled(runtime, "stems", enabled, Box::new(StemsFn::new()));
831 register_if_enabled(runtime, "stopwords", enabled, Box::new(StopwordsFn::new()));
832 register_if_enabled(
833 runtime,
834 "remove_stopwords",
835 enabled,
836 Box::new(RemoveStopwordsFn::new()),
837 );
838 register_if_enabled(
839 runtime,
840 "is_stopword",
841 enabled,
842 Box::new(IsStopwordFn::new()),
843 );
844 register_if_enabled(
845 runtime,
846 "normalize_unicode",
847 enabled,
848 Box::new(NormalizeUnicodeFn::new()),
849 );
850 register_if_enabled(
851 runtime,
852 "remove_accents",
853 enabled,
854 Box::new(RemoveAccentsFn::new()),
855 );
856 register_if_enabled(
857 runtime,
858 "collapse_whitespace",
859 enabled,
860 Box::new(CollapseWhitespaceFn::new()),
861 );
862}