tokenizers/normalizers/
utils.rs

1use serde::{Deserialize, Serialize};
2
3use crate::normalizers::NormalizerWrapper;
4use crate::tokenizer::{NormalizedString, Normalizer, Result};
5use crate::utils::macro_rules_attribute;
6
7#[derive(Clone, Deserialize, Debug, Serialize)]
8#[serde(tag = "type")]
9/// Allows concatenating multiple other Normalizer as a Sequence.
10/// All the normalizers run in sequence in the given order against the same NormalizedString.
11pub struct Sequence {
12    normalizers: Vec<NormalizerWrapper>,
13}
14
15impl Sequence {
16    pub fn new(normalizers: Vec<NormalizerWrapper>) -> Self {
17        Self { normalizers }
18    }
19
20    pub fn get_normalizers(&self) -> &[NormalizerWrapper] {
21        &self.normalizers
22    }
23
24    pub fn get_normalizers_mut(&mut self) -> &mut [NormalizerWrapper] {
25        &mut self.normalizers
26    }
27}
28
29impl Normalizer for Sequence {
30    fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
31        for normalizer in &self.normalizers {
32            normalizer.normalize(normalized)?;
33        }
34        Ok(())
35    }
36}
37
38/// Lowercases the input
39#[derive(Copy, Clone, Debug)]
40#[macro_rules_attribute(impl_serde_type!)]
41pub struct Lowercase;
42impl Normalizer for Lowercase {
43    fn normalize(&self, normalized: &mut NormalizedString) -> Result<()> {
44        normalized.lowercase();
45        Ok(())
46    }
47}