use std::sync::OnceLock;
use crate::process::process_type::ProcessType;
use crate::process::transform::constants::*;
use crate::process::transform::delete::DeleteMatcher;
use crate::process::transform::replace::{NormalizeMatcher, RomanizeMatcher, VariantNormMatcher};
#[derive(Clone)]
pub(crate) enum TransformStep {
None,
VariantNorm(VariantNormMatcher),
Delete(DeleteMatcher),
Normalize(NormalizeMatcher),
Romanize(RomanizeMatcher),
RomanizeChar(RomanizeMatcher),
EmojiNorm(RomanizeMatcher),
}
impl TransformStep {
#[inline(always)]
pub(crate) fn is_noop_on_ascii_input(&self) -> bool {
matches!(
self,
Self::None
| Self::VariantNorm(_)
| Self::Romanize(_)
| Self::RomanizeChar(_)
| Self::EmojiNorm(_)
)
}
#[inline(always)]
pub(crate) fn apply(&self, text: &str, parent_density: f32) -> Option<(String, f32)> {
if parent_density == 0.0 {
return match self {
Self::None
| Self::VariantNorm(_)
| Self::Romanize(_)
| Self::RomanizeChar(_)
| Self::EmojiNorm(_) => None,
Self::Delete(matcher) => matcher.delete(text).map(|s| (s, 0.0)),
Self::Normalize(matcher) => matcher.replace(text).map(|s| (s, 0.0)),
};
}
match self {
Self::None => None,
Self::VariantNorm(matcher) => matcher.replace(text).map(|s| (s, parent_density)),
Self::Delete(matcher) => matcher.delete(text).map(|s| (s, parent_density)),
Self::Normalize(matcher) => matcher.replace(text).map(|s| (s, parent_density)),
Self::Romanize(matcher) | Self::RomanizeChar(matcher) | Self::EmojiNorm(matcher) => {
matcher.replace(text).map(|s| (s, 0.0))
}
}
}
}
static TRANSFORM_STEP_CACHE: [OnceLock<TransformStep>; 8] = [
OnceLock::new(),
OnceLock::new(),
OnceLock::new(),
OnceLock::new(),
OnceLock::new(),
OnceLock::new(),
OnceLock::new(),
OnceLock::new(),
];
pub(crate) fn get_transform_step(process_type_bit: ProcessType) -> &'static TransformStep {
debug_assert!(
process_type_bit.bits().is_power_of_two() || process_type_bit == ProcessType::None,
"get_transform_step requires a single-bit ProcessType, got {:?}",
process_type_bit
);
let index = process_type_bit.bits().trailing_zeros() as usize;
debug_assert!(index < TRANSFORM_STEP_CACHE.len());
TRANSFORM_STEP_CACHE[index].get_or_init(|| build_transform_step(process_type_bit))
}
fn build_transform_step(process_type_bit: ProcessType) -> TransformStep {
match process_type_bit {
ProcessType::None => TransformStep::None,
ProcessType::VariantNorm => TransformStep::VariantNorm(VariantNormMatcher::new(
VARIANT_NORM_L1_BYTES,
VARIANT_NORM_L2_BYTES,
)),
ProcessType::Delete => TransformStep::Delete(DeleteMatcher::new(DELETE_BITSET_BYTES)),
ProcessType::Normalize => TransformStep::Normalize(NormalizeMatcher::new(
NORMALIZE_L1_BYTES,
NORMALIZE_L2_BYTES,
NORMALIZE_STR_BYTES,
)),
ProcessType::Romanize => TransformStep::Romanize(RomanizeMatcher::new(
ROMANIZE_L1_BYTES,
ROMANIZE_L2_BYTES,
ROMANIZE_STR_BYTES,
false,
)),
ProcessType::RomanizeChar => TransformStep::RomanizeChar(RomanizeMatcher::new(
ROMANIZE_L1_BYTES,
ROMANIZE_L2_BYTES,
ROMANIZE_STR_BYTES,
true,
)),
ProcessType::EmojiNorm => TransformStep::EmojiNorm(RomanizeMatcher::new(
EMOJI_NORM_L1_BYTES,
EMOJI_NORM_L2_BYTES,
EMOJI_NORM_STR_BYTES,
false,
)),
_ => unreachable!("unsupported single-bit ProcessType"),
}
}