unobtanium_segmenter/normalization/
lowercase.rs

1use crate::augmentation::Augmenter;
2use crate::SegmentedToken;
3use crate::SegmentedTokenKind;
4
5/// Will lowercase anything that can be lowercased using the rust builtin lowercasing methods.
6///
7/// This will skip the token if the token kind indicates that the token doesn't contain any letters to lowercase.
8#[derive(Debug, Clone, Default)]
9pub struct NormalizationLowercase {/* Nothing in here */}
10
11impl NormalizationLowercase {
12	/// Create a new NormalizationLowercase instance.
13	pub fn new() -> Self {
14		Default::default()
15	}
16}
17
18impl Augmenter for NormalizationLowercase {
19	fn augment<'a>(&self, mut token: SegmentedToken<'a>) -> SegmentedToken<'a> {
20		if matches!(token.kind, Some(SegmentedTokenKind::AlphaNumeric) | None) {
21			let lowercased = token.get_text_prefer_normalized().to_lowercase();
22			if token.normalized_text.is_some() || lowercased != token.text {
23				token.normalized_text = Some(lowercased);
24			}
25		}
26		return token;
27	}
28}