mod node;
mod parse;
mod rule;
mod types;
use std::collections::BTreeMap;
use std::sync::Arc;
pub use self::parse::{affix_from_str, AffixNode, ParsedRuleGroup};
pub use self::types::{
CompoundPattern, CompoundSyllable, Conversion, Encoding, FlagType, PartOfSpeech, Phonetic,
RuleType,
};
use crate::dict::{AfxRule, Flag, FlagValue};
use crate::error::{BuildError, Error, ParseError};
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParsedCfg {
encoding: Encoding,
flag_type: FlagType,
complex_prefixes: bool,
lang: String,
ignore_chars: Vec<char>,
affix_alias: Vec<String>,
morph_alias: Vec<String>,
neighbor_keys: Vec<String>,
replacements: Vec<Conversion>,
try_characters: String,
nosuggest_flag: Option<Flag>,
warn_rare_flag: Option<Flag>,
no_split_suggestions: bool,
keep_term_dots: bool,
forbid_warn_words: bool,
maps: Vec<(char, char)>,
phonetics: Vec<Phonetic>,
ngram_sug_max: u16,
ngram_diff_max: u8,
ngram_limit_to_diff_max: bool,
compound_config: Box<CompoundConfig>,
afx_rule_groups: Vec<ParsedRuleGroup>,
afx_circumflex_flag: Option<Flag>,
forbidden_word_flag: Option<Flag>,
afx_full_strip: bool,
afx_keep_case_flag: Option<Flag>,
input_conversions: Vec<Conversion>,
output_conversions: Vec<Conversion>,
afx_needed_flag: Option<Flag>,
afx_substandard_flag: Option<Flag>,
afx_word_chars: String,
afx_check_sharps: bool,
name: String,
home_page: String,
version: String,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CompoundConfig {
break_separators: Vec<String>,
sug_max: u16,
rules: Vec<String>,
min_length: u16,
flag: Option<Flag>,
begin_flag: Option<Flag>,
end_flag: Option<Flag>,
middle_flag: Option<Flag>,
only_flag: Option<Flag>,
permit_flag: Option<Flag>,
forbid_flag: Option<Flag>,
more_suffixes: bool,
root_flag: Option<Flag>,
word_max: u16,
forbid_dup: bool,
forbid_repeat: bool,
check_case: bool,
check_triple: bool,
simplify_triple: bool,
forbid_pats: Vec<CompoundPattern>,
force_upper_flag: Option<Flag>,
syllable: CompoundSyllable,
syllable_num: String,
}
impl Default for ParsedCfg {
#[allow(clippy::default_trait_access)]
#[inline]
fn default() -> Self {
Self {
encoding: Default::default(),
flag_type: FlagType::Utf8,
complex_prefixes: Default::default(),
lang: Default::default(),
ignore_chars: Default::default(),
affix_alias: Default::default(),
morph_alias: Default::default(),
neighbor_keys: Default::default(),
replacements: Default::default(),
try_characters: Default::default(),
nosuggest_flag: Default::default(),
warn_rare_flag: Default::default(),
no_split_suggestions: Default::default(),
keep_term_dots: Default::default(),
forbid_warn_words: Default::default(),
maps: Default::default(),
phonetics: Default::default(),
ngram_sug_max: 2,
ngram_diff_max: 5,
ngram_limit_to_diff_max: Default::default(),
compound_config: Default::default(),
afx_rule_groups: Default::default(),
afx_circumflex_flag: Default::default(),
forbidden_word_flag: Default::default(),
afx_full_strip: Default::default(),
afx_keep_case_flag: Default::default(),
input_conversions: Default::default(),
output_conversions: Default::default(),
afx_needed_flag: Default::default(),
afx_substandard_flag: Default::default(),
afx_word_chars: Default::default(),
afx_check_sharps: Default::default(),
name: Default::default(),
home_page: Default::default(),
version: Default::default(),
}
}
}
impl Default for CompoundConfig {
#[allow(clippy::default_trait_access)]
fn default() -> Self {
Self {
break_separators: Default::default(),
sug_max: 3,
rules: Default::default(),
min_length: 3,
flag: Default::default(),
begin_flag: Default::default(),
end_flag: Default::default(),
middle_flag: Default::default(),
only_flag: Default::default(),
permit_flag: Default::default(),
forbid_flag: Default::default(),
more_suffixes: Default::default(),
root_flag: Default::default(),
word_max: Default::default(),
forbid_dup: Default::default(),
forbid_repeat: Default::default(),
check_case: Default::default(),
check_triple: Default::default(),
simplify_triple: Default::default(),
forbid_pats: Default::default(),
force_upper_flag: Default::default(),
syllable: Default::default(),
syllable_num: Default::default(),
}
}
}
impl ParsedCfg {
pub fn flag_type(&self) -> FlagType {
self.flag_type
}
#[inline]
pub fn load_from_str(s: &str) -> Result<Self, Error> {
Self::from_parsed(affix_from_str(s)?)
}
#[allow(clippy::unnecessary_wraps)]
#[allow(clippy::too_many_lines)]
fn from_parsed(v: Vec<AffixNode>) -> Result<Self, Error> {
let mut res = Self::default();
let mut warnings: Vec<String> = Vec::new();
if let Some(node) = v.iter().find(|node| matches!(node, AffixNode::FlagType(_))) {
if let AffixNode::FlagType(v) = node {
res.flag_type = *v;
} else {
unreachable!()
}
}
for node in v {
let name_str = node.name_str();
match node {
AffixNode::Encoding(v) => res.encoding = v,
AffixNode::FlagType(_) => (),
AffixNode::ComplexPrefixes => res.complex_prefixes = true,
AffixNode::Language(v) => res.lang = v,
AffixNode::IgnoreChars(v) => res.ignore_chars = v,
AffixNode::AffixAlias(v) => res.affix_alias = v,
AffixNode::MorphAlias(v) => res.morph_alias = v,
AffixNode::NeighborKeys(v) => res.neighbor_keys = v,
AffixNode::TryCharacters(v) => res.try_characters = v,
AffixNode::NoSuggestFlag(v) => res.nosuggest_flag = Some(res.convert_flag(&v)?),
AffixNode::CompoundSugMax(v) => res.compound_config.sug_max = v,
AffixNode::NGramSugMax(v) => res.ngram_sug_max = v,
AffixNode::NGramDiffMax(v) => res.ngram_diff_max = v,
AffixNode::NGramLimitToDiffMax => res.ngram_limit_to_diff_max = true,
AffixNode::NoSplitSuggestions => res.no_split_suggestions = true,
AffixNode::KeepTermDots => res.keep_term_dots = true,
AffixNode::Replacement(v) => res.replacements = v,
AffixNode::Mapping(v) => res.maps = v,
AffixNode::Phonetic(v) => res.phonetics = v,
AffixNode::WarnRareFlag(v) => res.warn_rare_flag = Some(res.convert_flag(&v)?),
AffixNode::ForbidWarnWords => res.forbid_warn_words = true,
AffixNode::BreakSeparator(v) => res.compound_config.break_separators = v,
AffixNode::CompoundRule(v) => res.compound_config.rules = v,
AffixNode::CompoundMinLen(v) => res.compound_config.min_length = v,
AffixNode::CompoundFlag(v) => {
res.compound_config.flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundBeginFlag(v) => {
res.compound_config.begin_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundEndFlag(v) => {
res.compound_config.end_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundMiddleFlag(v) => {
res.compound_config.middle_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundOnlyFlag(v) => {
res.compound_config.only_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundPermitFlag(v) => {
res.compound_config.permit_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundForbidFlag(v) => {
res.compound_config.forbid_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundMoreSuffixes => res.compound_config.more_suffixes = true,
AffixNode::CompoundRootFlag(v) => {
res.compound_config.root_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundWordMax(v) => res.compound_config.word_max = v,
AffixNode::CompoundForbidDup => res.compound_config.forbid_dup = true,
AffixNode::CompoundForbidRepeat => res.compound_config.forbid_repeat = true,
AffixNode::CompoundCheckCase => res.compound_config.check_case = true,
AffixNode::CompoundCheckTriple => res.compound_config.check_triple = true,
AffixNode::CompoundSimplifyTriple => res.compound_config.simplify_triple = true,
AffixNode::CompoundForbidPats(v) => res.compound_config.forbid_pats = v,
AffixNode::CompoundForceUpFlag(v) => {
res.compound_config.force_upper_flag = Some(res.convert_flag(&v)?);
}
AffixNode::CompoundSyllable(v) => res.compound_config.syllable = v,
AffixNode::SyllableNum(v) => res.compound_config.syllable_num = v,
AffixNode::Prefix(v) => res.afx_rule_groups.push(v),
AffixNode::Suffix(v) => res.afx_rule_groups.push(v),
AffixNode::AfxCircumfixFlag(v) => {
res.afx_circumflex_flag = Some(res.convert_flag(&v)?);
}
AffixNode::ForbiddenWordFlag(v) => {
res.forbidden_word_flag = Some(res.convert_flag(&v)?);
}
AffixNode::AfxFullStrip => res.afx_full_strip = true,
AffixNode::AfxKeepCaseFlag(v) => {
res.afx_keep_case_flag = Some(res.convert_flag(&v)?);
}
AffixNode::AfxInputConversion(v) => res.input_conversions = v,
AffixNode::AfxOutputConversion(v) => res.output_conversions = v,
AffixNode::AfxLemmaPresentFlag(_) => {
warnings.push(format!("flag {name_str} is deprecated"));
}
AffixNode::AfxNeededFlag(v) => res.afx_needed_flag = Some(res.convert_flag(&v)?),
AffixNode::AfxPseudoRootFlag(_) => {
warnings.push(format!("flag {name_str} is deprecated"));
}
AffixNode::AfxSubstandardFlag(v) => {
res.afx_substandard_flag = Some(res.convert_flag(&v)?);
}
AffixNode::AfxWordChars(v) => res.afx_word_chars = v,
AffixNode::AfxCheckSharps => res.afx_check_sharps = true,
AffixNode::Comment => (),
AffixNode::Name(v) => res.name = v,
AffixNode::HomePage(v) => res.home_page = v,
AffixNode::Version(v) => res.version = v,
}
}
for w in warnings {
eprintln!("warning: {w}");
}
Ok(res)
}
pub(crate) fn convert_flag(&self, flag: &str) -> Result<Flag, ParseError> {
self.flag_type
.str_to_flag(flag)
.map_err(|e| ParseError::new_nospan(e, flag))
}
pub fn compile_flags(&self) -> Result<CompiledFlags, Error> {
let stem_key_sets = [
(self.afx_keep_case_flag, FlagValue::AfxKeepCase),
(self.afx_needed_flag, FlagValue::AfxNeeded),
(self.afx_substandard_flag, FlagValue::AfxSubstandard),
(self.compound_config.flag, FlagValue::Compound),
(self.compound_config.begin_flag, FlagValue::CompoundBegin),
(self.compound_config.end_flag, FlagValue::CompoundEnd),
(self.compound_config.forbid_flag, FlagValue::CompoundForbid),
(
self.compound_config.force_upper_flag,
FlagValue::CompoundForceUp,
),
(self.compound_config.middle_flag, FlagValue::CompoundMiddle),
(self.compound_config.only_flag, FlagValue::CompoundOnly),
(self.compound_config.permit_flag, FlagValue::CompoundPermit),
(self.compound_config.root_flag, FlagValue::CompoundRoot),
(self.forbidden_word_flag, FlagValue::ForbiddenWord),
(self.nosuggest_flag, FlagValue::NoSuggest),
(self.warn_rare_flag, FlagValue::WarnRare),
];
let rule_key_sets = [(self.afx_circumflex_flag, FlagValue::AfxCircumfix)];
let mut affix_flags: BTreeMap<Flag, FlagValue> = BTreeMap::new();
let mut rule_flags: BTreeMap<Flag, FlagValue> = BTreeMap::new();
for (key, value) in stem_key_sets
.iter()
.filter_map(|(kopt, val)| kopt.map(|keyval| (keyval, val)))
{
if let Some(duplicate) = affix_flags.get(&key) {
return Err(BuildError::DuplicateFlag {
flag: self.flag_type.flag_to_str(key),
t1: duplicate.clone(),
t2: Some(value.clone()),
}
.into());
}
affix_flags.insert(key, value.clone());
}
for (key, value) in rule_key_sets
.iter()
.filter_map(|(kopt, val)| kopt.map(|keyval| (keyval, val)))
{
if let Some(duplicate) = rule_flags.get(&key) {
return Err(BuildError::DuplicateFlag {
flag: self.flag_type.flag_to_str(key),
t1: duplicate.clone(),
t2: Some(value.clone()),
}
.into());
}
rule_flags.insert(key, value.clone());
}
for group in &self.afx_rule_groups {
let flag = self
.flag_type
.str_to_flag(&group.flag)
.map_err(|e| ParseError::new_nospan(e, &group.flag))?;
if let Some(duplicate) = affix_flags.get(&flag) {
return Err(BuildError::DuplicateFlag {
flag: group.flag.clone(),
t1: duplicate.clone(),
t2: None,
}
.into());
}
let rule = AfxRule::from_parsed_group(self, group);
affix_flags.insert(flag, FlagValue::Rule(Arc::new(rule)));
}
Ok(CompiledFlags {
affix_flags,
rule_flags,
})
}
}
pub struct CompiledFlags {
pub affix_flags: BTreeMap<Flag, FlagValue>,
pub rule_flags: BTreeMap<Flag, FlagValue>,
}
#[cfg(test)]
mod tests;