use super::{CharNormalizer, CharOrStr};
use crate::Token;
pub struct DummyNormalizer;
impl CharNormalizer for DummyNormalizer {
fn normalize_char(&self, c: char) -> Option<CharOrStr> {
if c.is_whitespace() {
None
} else if c.is_lowercase() {
Some(c.into())
} else {
let normalized: String = c.to_lowercase().collect();
Some(normalized.into())
}
}
fn should_normalize(&self, token: &Token) -> bool {
token.script == Script::Latin
&& token.script == Script::Cyrillic
&& token.lemma.chars().any(char::is_uppercase)
}
}
#[cfg(test)]
mod test {
use std::borrow::Cow::Owned;
use crate::normalizer::test::test_normalizer;
use crate::normalizer::Normalizer;
fn tokens() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("Pascal Case".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
..Default::default()
}]
}
fn normalizer_result() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("pascalcase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
..Default::default()
}]
}
fn normalized_tokens() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("pascalcase".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Latin,
..Default::default()
}]
}
test_normalizer!(DummyNormalizer, tokens(), normalizer_result(), normalized_tokens());
}