use std::hash::Hash;
use fancy_regex::Regex;
use crate::transformer::LanguageTransformDescriptor;
trait TextProcessable<T> {
fn process(str: &str, options: Vec<T>) -> String;
}
#[derive(Debug, Clone)]
pub struct FindTermsTextReplacement {
pub pattern: Regex,
pub replacement: String,
pub is_global: bool,
}
impl PartialEq for FindTermsTextReplacement {
fn eq(&self, other: &Self) -> bool {
if self.pattern.as_str() == other.pattern.as_str() && self.replacement == other.replacement
{
return true;
}
false
}
}
impl Eq for FindTermsTextReplacement {}
impl Hash for FindTermsTextReplacement {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.pattern.as_str().hash(state);
self.replacement.as_str().hash(state);
}
}
pub type FindTermsTextReplacements = Vec<Option<Vec<FindTermsTextReplacement>>>;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TextDeinflectionOptions {
pub text_replacements: Option<Vec<FindTermsTextReplacement>>,
pub half_width: bool,
pub numeric: bool,
pub alphabetic: bool,
pub katakana: bool,
pub hiragana: bool,
pub emphatic: (bool, bool),
}
#[derive(Debug, Clone)]
pub struct TextDeinflectionOptionsArrays {
pub text_replacements: Vec<Option<Vec<FindTermsTextReplacement>>>,
pub half_width: Vec<bool>,
pub numeric: Vec<bool>,
pub alphabetic: Vec<bool>,
pub katakana: Vec<bool>,
pub hiragana: Vec<bool>,
pub emphatic: Vec<(bool, bool)>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TextProcessorSetting {
Bool(bool),
Int(i64),
String(String),
Emphatic(bool, bool),
Deinflection(TextDeinflectionOptions),
BiDirectional(BidirectionalPreProcessorOptions),
}
#[derive(Debug, Clone)]
pub struct TextProcessor {
pub name: &'static str,
pub description: &'static str,
pub options: &'static [TextProcessorSetting],
pub process: fn(&str, TextProcessorSetting) -> String,
}
pub type TextProcessorFn<T> = fn(&str, T) -> String;
pub type ReadingNormalizer = fn(&str) -> String;
#[derive(Debug, Clone)]
pub enum AnyTextProcessor {
ConvertHalfWidth(TextProcessor),
AlphabeticToHiragana(TextProcessor),
NormalizeCombiningCharacters(TextProcessor),
NormalizeCjkCompatibilityCharacters(TextProcessor),
NormalizeRadicalCharacters(TextProcessor),
StandardizeKanji(TextProcessor),
AlphanumericWidth(BidirectionalConversionPreProcessor),
HiraganaToKatakana(BidirectionalConversionPreProcessor),
CollapseEmphatic(TextProcessor),
Decapitalize(TextProcessor),
CapitalizeFirst(TextProcessor),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BidirectionalPreProcessorOptions {
Off,
Direct,
Inverse,
}
pub type BidirectionalConversionPreProcessor = TextProcessor;
pub enum AllTextProcessorsEnum {}
pub struct LanguageAndProcessors {
pub iso: &'static str,
pub pre: Vec<TextProcessorWithId>,
pub post: Vec<TextProcessorWithId>,
}
pub struct LanguageAndReadingNormalizer {
pub iso: &'static str,
pub reading_normalizer: ReadingNormalizer,
}
#[derive(Debug, Clone)]
pub struct TextProcessorWithId {
pub id: &'static str,
pub processor: TextProcessor,
}
pub struct LanguageAndTransforms {
pub iso: &'static str,
pub language_transforms: LanguageTransformDescriptor,
}
#[derive(Debug, Clone)]
pub struct LanguageSummary {
pub name: &'static str,
pub iso: &'static str,
pub iso639_3: &'static str,
pub example_text: &'static str,
}