#![warn(clippy::all)]
#![warn(clippy::pedantic)]
#![warn(clippy::cargo)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
#![warn(trivial_casts, trivial_numeric_casts)]
#![warn(unused_qualifications)]
#![warn(variant_size_differences)]
#![forbid(unsafe_code)]
#![warn(unused_import_braces)]
#![warn(unused_results)]
#![warn(unused_lifetimes)]
#![warn(unused)]
#![warn(missing_docs)]
#![allow(clippy::multiple_crate_versions)]
#![doc = include_str!("../README.md")]
use std::{collections::BTreeSet, error::Error, fmt::Display};
use bitflags::bitflags;
use log::trace;
use unicode_titlecase::StrTitleCase;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum DecompositionError {
SingleWord(String),
NothingValid, }
impl Display for DecompositionError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DecompositionError::SingleWord(word) => {
write!(f, "Not a compound, but valid single word: {word}")
}
DecompositionError::NothingValid => write!(f, "No valid decomposition found"),
}
}
}
impl Error for DecompositionError {}
bitflags! {
#[derive(Clone)]
pub struct DecompositionOptions: u32 {
const TRY_TITLECASE_SUFFIX = 1;
const SPLIT_HYPHENATED = 1 << 1;
const SHATTER = 1 << 2;
}
}
impl AsRef<DecompositionOptions> for DecompositionOptions {
fn as_ref(&self) -> &Self {
self
}
}
pub type DecompositionResult = Result<Vec<String>, DecompositionError>;
pub fn decompound(
word: impl AsRef<str>,
is_valid_single_word: &impl Fn(&str) -> bool,
options: impl AsRef<DecompositionOptions>,
) -> DecompositionResult {
let mut constituents = vec![];
let word = word.as_ref();
let options = options.as_ref();
if options.contains(DecompositionOptions::SPLIT_HYPHENATED) {
let options = options.clone() - DecompositionOptions::SPLIT_HYPHENATED;
for subword in word.split('-') {
match decompound(subword, is_valid_single_word, &options) {
Ok(words) => constituents.extend(words),
Err(DecompositionError::SingleWord(word)) => constituents.push(word),
_ => return Err(DecompositionError::NothingValid),
};
}
return match &constituents[..] {
[] => Err(DecompositionError::NothingValid),
[w] => Err(DecompositionError::SingleWord(w.into())),
_ => Ok(constituents),
};
}
if is_valid_compound_word(word, is_valid_single_word, options, &mut constituents) {
debug_assert!(
!constituents.is_empty(),
"Compound word must have constituents"
);
Ok(constituents)
} else {
trace!("Word is not a valid compound word");
if is_valid_single_word(word) {
Err(DecompositionError::SingleWord(word.to_owned()))
} else {
Err(DecompositionError::NothingValid)
}
}
}
fn is_valid_compound_word(
word: impl AsRef<str>,
is_valid_single_word: &impl Fn(&str) -> bool,
options: &DecompositionOptions,
constituents: &mut Vec<String>,
) -> bool {
let word = word.as_ref();
trace!("Checking if word is valid compound word: '{}'", word);
let mut all_valid_splits = Vec::new();
for (i, _) in word.char_indices().skip(1) {
let (prefix, suffix) = word.split_at(i);
debug_assert!(!prefix.is_empty(), "Prefix should never be empty");
debug_assert!(!suffix.is_empty(), "Suffix should never be empty");
if !is_valid_single_word(prefix) {
continue;
}
trace!(
"Prefix '{}' found to be valid, seeing if suffix '{}' is valid.",
prefix,
suffix
);
let suffix_candidates = {
let mut set = BTreeSet::from_iter(vec![suffix.to_owned()]);
if options.contains(DecompositionOptions::TRY_TITLECASE_SUFFIX) {
let _ = set.insert(suffix.to_titlecase_lower_rest());
}
set
};
debug_assert!(
!suffix_candidates.is_empty(),
"Suffix candidates should never be empty"
);
for suffix in suffix_candidates {
{
if is_valid_single_word(&suffix) {
trace!("Suffix '{}' is valid: valid single word", suffix);
all_valid_splits.push(vec![prefix.to_owned(), suffix.clone()]);
}
}
{
let mut further_constituents = Vec::new();
if is_valid_compound_word(
&suffix,
is_valid_single_word,
options,
&mut further_constituents,
) {
trace!("Suffix '{}' is valid: valid compound word", suffix);
let mut valid_split = vec![prefix.to_owned()];
valid_split.extend(further_constituents);
all_valid_splits.push(valid_split);
}
}
}
}
match if options.contains(DecompositionOptions::SHATTER) {
all_valid_splits.iter().max_by_key(|s| s.len())
} else {
all_valid_splits.iter().min_by_key(|s| s.len())
} {
Some(split) => {
constituents.extend(split.iter().cloned());
true
}
None => false,
}
}