use std::borrow::Cow;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum Morphology {
Isolating,
Agglutinative,
Fusional,
Polysynthetic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[non_exhaustive]
pub enum WordOrder {
SVO, SOV, VSO, VOS, OVS, OSV, Free, }
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct GrammarProfile {
pub language_code: Cow<'static, str>,
pub morphology: Morphology,
pub word_order: WordOrder,
pub case_count: u8,
pub has_gender: bool,
pub gender_count: u8,
pub has_dual: bool,
pub has_classifiers: bool,
}
#[must_use]
pub fn by_code(code: &str) -> Option<GrammarProfile> {
tracing::trace!(code, "grammar profile lookup");
match code {
"en" => Some(english()),
"ar" => Some(arabic()),
"zh" => Some(mandarin()),
"hi" => Some(hindi()),
"ja" => Some(japanese()),
"es" => Some(spanish()),
"fr" => Some(french()),
"de" => Some(german()),
"ru" => Some(russian()),
"ko" => Some(korean()),
"pt" => Some(portuguese()),
_ => None,
}
}
#[must_use]
pub fn all_codes() -> &'static [&'static str] {
&[
"en", "ar", "zh", "hi", "ja", "es", "fr", "de", "ru", "ko", "pt",
]
}
#[must_use]
pub fn english() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("en"),
morphology: Morphology::Fusional,
word_order: WordOrder::SVO,
case_count: 2, has_gender: false,
gender_count: 0,
has_dual: false,
has_classifiers: false,
}
}
#[must_use]
pub fn arabic() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("ar"),
morphology: Morphology::Fusional,
word_order: WordOrder::VSO,
case_count: 3, has_gender: true,
gender_count: 2,
has_dual: true,
has_classifiers: false,
}
}
#[must_use]
pub fn mandarin() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("zh"),
morphology: Morphology::Isolating,
word_order: WordOrder::SVO,
case_count: 0,
has_gender: false,
gender_count: 0,
has_dual: false,
has_classifiers: true,
}
}
#[must_use]
pub fn hindi() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("hi"),
morphology: Morphology::Fusional,
word_order: WordOrder::SOV,
case_count: 3, has_gender: true,
gender_count: 2,
has_dual: false,
has_classifiers: false,
}
}
#[must_use]
pub fn japanese() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("ja"),
morphology: Morphology::Agglutinative,
word_order: WordOrder::SOV,
case_count: 0, has_gender: false,
gender_count: 0,
has_dual: false,
has_classifiers: true,
}
}
#[must_use]
pub fn spanish() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("es"),
morphology: Morphology::Fusional,
word_order: WordOrder::SVO,
case_count: 0, has_gender: true,
gender_count: 2,
has_dual: false,
has_classifiers: false,
}
}
#[must_use]
pub fn french() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("fr"),
morphology: Morphology::Fusional,
word_order: WordOrder::SVO,
case_count: 0,
has_gender: true,
gender_count: 2,
has_dual: false,
has_classifiers: false,
}
}
#[must_use]
pub fn german() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("de"),
morphology: Morphology::Fusional,
word_order: WordOrder::SVO, case_count: 4, has_gender: true,
gender_count: 3, has_dual: false,
has_classifiers: false,
}
}
#[must_use]
pub fn russian() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("ru"),
morphology: Morphology::Fusional,
word_order: WordOrder::SVO, case_count: 6, has_gender: true,
gender_count: 3,
has_dual: false,
has_classifiers: false,
}
}
#[must_use]
pub fn korean() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("ko"),
morphology: Morphology::Agglutinative,
word_order: WordOrder::SOV,
case_count: 0, has_gender: false,
gender_count: 0,
has_dual: false,
has_classifiers: true,
}
}
#[must_use]
pub fn portuguese() -> GrammarProfile {
GrammarProfile {
language_code: Cow::Borrowed("pt"),
morphology: Morphology::Fusional,
word_order: WordOrder::SVO,
case_count: 0,
has_gender: true,
gender_count: 2,
has_dual: false,
has_classifiers: false,
}
}
#[cfg(test)]
mod tests {
use super::*;
macro_rules! grammar_test {
($name:ident, $fn:ident, $code:expr, $morph:expr, $order:expr) => {
#[test]
fn $name() {
let g = $fn();
assert_eq!(g.language_code, $code);
assert_eq!(g.morphology, $morph);
assert_eq!(g.word_order, $order);
}
};
}
grammar_test!(
test_arabic,
arabic,
"ar",
Morphology::Fusional,
WordOrder::VSO
);
grammar_test!(
test_mandarin,
mandarin,
"zh",
Morphology::Isolating,
WordOrder::SVO
);
grammar_test!(
test_hindi,
hindi,
"hi",
Morphology::Fusional,
WordOrder::SOV
);
grammar_test!(
test_japanese,
japanese,
"ja",
Morphology::Agglutinative,
WordOrder::SOV
);
grammar_test!(
test_spanish,
spanish,
"es",
Morphology::Fusional,
WordOrder::SVO
);
grammar_test!(
test_french,
french,
"fr",
Morphology::Fusional,
WordOrder::SVO
);
grammar_test!(
test_german,
german,
"de",
Morphology::Fusional,
WordOrder::SVO
);
grammar_test!(
test_russian,
russian,
"ru",
Morphology::Fusional,
WordOrder::SVO
);
grammar_test!(
test_korean,
korean,
"ko",
Morphology::Agglutinative,
WordOrder::SOV
);
grammar_test!(
test_portuguese,
portuguese,
"pt",
Morphology::Fusional,
WordOrder::SVO
);
#[test]
fn test_german_cases() {
let de = german();
assert_eq!(de.case_count, 4);
assert_eq!(de.gender_count, 3);
}
#[test]
fn test_russian_cases() {
let ru = russian();
assert_eq!(ru.case_count, 6);
assert_eq!(ru.gender_count, 3);
}
#[test]
fn test_arabic_dual() {
let ar = arabic();
assert!(ar.has_dual);
}
#[test]
fn test_classifiers() {
assert!(mandarin().has_classifiers);
assert!(japanese().has_classifiers);
assert!(korean().has_classifiers);
assert!(!spanish().has_classifiers);
}
#[test]
fn test_by_code_lookup() {
assert!(by_code("ar").is_some());
assert!(by_code("zh").is_some());
assert!(by_code("xx").is_none());
}
#[test]
fn test_all_codes_have_profiles() {
for code in all_codes() {
assert!(by_code(code).is_some(), "missing grammar for {code}");
}
}
#[test]
fn test_grammar_serde_roundtrip() {
for code in all_codes() {
let g = by_code(code).unwrap();
let json = serde_json::to_string(&g).unwrap();
let back: GrammarProfile = serde_json::from_str(&json).unwrap();
assert_eq!(g, back, "serde roundtrip failed for {code}");
}
}
}