unobtanium_segmenter/normalization/
lowercase.rs

1use crate::augmentation::Augmenter;
2use crate::SegmentedToken;
3
4/// Will lowercase anything that can be lowercased using the rust builtin lowercasing methods.
5#[derive(Debug, Clone, Default)]
6pub struct NormalizationLowercase {/* Nothing in here */}
7
8impl NormalizationLowercase {
9	/// Create a new NormalizationLowercase instance.
10	pub fn new() -> Self {
11		Default::default()
12	}
13}
14
15impl Augmenter for NormalizationLowercase {
16	fn augment<'a>(&self, mut token: SegmentedToken<'a>) -> SegmentedToken<'a> {
17		let lowercased = token.get_text_prefer_normalized().to_lowercase();
18		if token.normalized_text.is_some() || lowercased != token.text {
19			token.normalized_text = Some(lowercased);
20		}
21		return token;
22	}
23}