use std::borrow::Cow;
use super::{Normalizer, NormalizerOption};
use crate::{Script, Token};
pub struct GreekNormalizer;
impl Normalizer for GreekNormalizer {
fn normalize<'o>(&self, mut token: Token<'o>, _options: &NormalizerOption) -> Token<'o> {
if let Some(prefix) = token.lemma.strip_suffix('ς') {
token.lemma = Cow::Owned([prefix, "σ"].concat())
}
token
}
fn should_normalize(&self, token: &Token) -> bool {
token.script == Script::Greek
}
}
#[cfg(test)]
mod test {
use std::borrow::Cow::Owned;
use crate::normalizer::test::test_normalizer;
use crate::normalizer::{Normalizer, NormalizerOption};
use crate::token::TokenKind;
fn tokens() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("Αγαπητός".to_string()),
char_end: 10,
byte_end: 10,
script: Script::Greek,
..Default::default()
}]
}
fn normalizer_result() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("Αγαπητόσ".to_string()),
char_end: 10,
byte_end: 10,
char_map: None,
script: Script::Greek,
..Default::default()
}]
}
fn normalized_tokens() -> Vec<Token<'static>> {
vec![Token {
lemma: Owned("αγαπητοσ".to_string()),
char_end: 10,
byte_end: 10,
char_map: Some(vec![(2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2)]),
script: Script::Greek,
kind: TokenKind::Word,
..Default::default()
}]
}
test_normalizer!(GreekNormalizer, tokens(), normalizer_result(), normalized_tokens());
}