use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum DigitTarget {
#[default]
Persian,
Latin,
None,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum ProfanityLevel {
#[default]
None,
Light,
Medium,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum ProcessingMode {
#[default]
Default,
MaximumSpeed,
MaximumQuality,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct CustomRule {
pub pattern: String,
pub replacement: String,
pub whole_word: bool,
}
impl CustomRule {
pub fn new(
pattern: impl Into<String>,
replacement: impl Into<String>,
whole_word: bool,
) -> Self {
Self {
pattern: pattern.into(),
replacement: replacement.into(),
whole_word,
}
}
}
impl fmt::Display for CustomRule {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?} → {:?}", self.pattern, self.replacement)
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ParsitextConfig {
pub normalize_zwnj: bool,
pub unify_digits: DigitTarget,
pub normalize_orthography: bool,
pub remove_extra_spaces: bool,
pub reduce_repetitions: bool,
pub remove_diacritics: bool,
pub insert_zwnj: bool,
pub enable_slang: bool,
pub enable_entity_recognition: bool,
pub profanity_level: ProfanityLevel,
pub mode: ProcessingMode,
pub custom_rules: Vec<CustomRule>,
}
impl ParsitextConfig {
pub fn builder() -> ParsitextConfigBuilder {
ParsitextConfigBuilder::default()
}
}
impl Default for ParsitextConfig {
fn default() -> Self {
ParsitextConfigBuilder::default().build()
}
}
#[derive(Debug)]
pub struct ParsitextConfigBuilder {
inner: ParsitextConfig,
}
impl Default for ParsitextConfigBuilder {
fn default() -> Self {
Self {
inner: ParsitextConfig {
normalize_zwnj: true,
unify_digits: DigitTarget::Persian,
normalize_orthography: true,
remove_extra_spaces: true,
reduce_repetitions: true,
remove_diacritics: false,
insert_zwnj: false,
enable_slang: false,
enable_entity_recognition: true,
profanity_level: ProfanityLevel::None,
mode: ProcessingMode::Default,
custom_rules: Vec::new(),
},
}
}
}
impl ParsitextConfigBuilder {
pub fn normalize_zwnj(mut self, v: bool) -> Self {
self.inner.normalize_zwnj = v;
self
}
pub fn unify_digits(mut self, target: DigitTarget) -> Self {
self.inner.unify_digits = target;
self
}
pub fn normalize_orthography(mut self, v: bool) -> Self {
self.inner.normalize_orthography = v;
self
}
pub fn remove_extra_spaces(mut self, v: bool) -> Self {
self.inner.remove_extra_spaces = v;
self
}
pub fn reduce_repetitions(mut self, v: bool) -> Self {
self.inner.reduce_repetitions = v;
self
}
pub fn remove_diacritics(mut self, v: bool) -> Self {
self.inner.remove_diacritics = v;
self
}
pub fn insert_zwnj(mut self, v: bool) -> Self {
self.inner.insert_zwnj = v;
self
}
pub fn enable_slang(mut self, v: bool) -> Self {
self.inner.enable_slang = v;
self
}
pub fn enable_entity_recognition(mut self, v: bool) -> Self {
self.inner.enable_entity_recognition = v;
self
}
pub fn profanity_level(mut self, level: ProfanityLevel) -> Self {
self.inner.profanity_level = level;
self
}
pub fn mode(mut self, mode: ProcessingMode) -> Self {
self.inner.mode = mode;
self
}
pub fn custom_rules(mut self, rules: Vec<CustomRule>) -> Self {
self.inner.custom_rules = rules;
self
}
pub fn add_rule(mut self, rule: CustomRule) -> Self {
self.inner.custom_rules.push(rule);
self
}
pub fn build(self) -> ParsitextConfig {
self.inner
}
}