use std::fmt::Debug;
use std::hash::Hash;
use crate::aggregable::FieldDescriptor;
use serde::{Deserialize, Serialize};
pub use crate::morphology_enums::*;
pub trait MorphologyInfo {
type PosTag: Debug + Clone + Copy + PartialEq + Eq + Hash;
fn lemma(&self) -> &str;
fn pos_tag(&self) -> Self::PosTag;
fn pos_label(&self) -> &'static str;
}
#[derive(Debug, Clone)]
pub struct MorphologyGroupSchema {
pub key: String,
pub label: String,
pub dimensions: Vec<FieldDescriptor>,
}
pub trait MorphologyCatalog {
fn group_descriptors() -> Vec<MorphologyGroupSchema>;
}
#[derive(Debug, Clone)]
pub struct FunctionVariantSchema {
pub key: String,
pub label: String,
pub dimensions: Vec<FieldDescriptor>,
}
pub trait GrammaticalFunctionCatalog {
fn function_descriptors() -> Vec<FunctionVariantSchema>;
}
impl GrammaticalFunctionCatalog for () {
fn function_descriptors() -> Vec<FunctionVariantSchema> {
vec![]
}
}
pub use isolang::Language as IsoLang;
#[derive(Clone, Copy)]
pub struct Script(&'static str);
impl Script {
pub const LATN: Self = Self("Latn");
pub const CYRL: Self = Self("Cyrl");
pub const HIRA: Self = Self("Hira");
pub const KANA: Self = Self("Kana");
pub const HANI: Self = Self("Hani");
pub const ARAB: Self = Self("Arab");
pub const HANG: Self = Self("Hang");
#[must_use]
pub const fn code(&self) -> &'static str {
self.0
}
#[must_use]
pub fn new(code: &str) -> Option<Self> {
let entry = iso15924::ScriptCode::by_code(code)?;
Some(Self(entry.code.as_ref()))
}
#[must_use]
pub fn resolve(&self) -> &'static iso15924::ScriptCode<'static> {
iso15924::ScriptCode::by_code(self.0)
.unwrap_or_else(|| panic!("Invalid ISO 15924 script code: {}", self.0))
}
}
impl Debug for Script {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Script(\"{}\")", self.0)
}
}
impl std::fmt::Display for Script {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.0)
}
}
impl PartialEq for Script {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl Eq for Script {}
impl Hash for Script {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl Serialize for Script {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(self.0)
}
}
impl<'de> Deserialize<'de> for Script {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let code = String::deserialize(deserializer)?;
Self::new(&code).ok_or_else(|| {
serde::de::Error::custom(format!("Unknown ISO 15924 script code: {code}"))
})
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum TypologicalFeature {
Conjugation,
Declension,
Agglutination,
}
pub trait LinguisticDefinition {
type Morphology: Debug
+ Clone
+ Serialize
+ for<'de> Deserialize<'de>
+ schemars::JsonSchema
+ MorphologyInfo
+ crate::aggregable::Aggregable
+ Send
+ Sync;
type GrammaticalFunction: Debug
+ Clone
+ PartialEq
+ Serialize
+ for<'de> Deserialize<'de>
+ schemars::JsonSchema
+ crate::aggregable::AggregableFields
+ Send
+ Sync;
const ISO_LANG: IsoLang;
fn iso_code(&self) -> &'static str {
Self::ISO_LANG.to_639_3()
}
fn name(&self) -> &str {
Self::ISO_LANG.to_name()
}
fn supported_scripts(&self) -> &[Script];
fn default_script(&self) -> Script;
fn extraction_directives(&self) -> &str;
fn typological_features(&self) -> &[TypologicalFeature] {
&[]
}
fn extra_extraction_directives(&self) -> Option<String> {
None
}
fn post_process_extraction(
&self,
_segmentation: &mut Option<
Vec<crate::morpheme::WordSegmentation<Self::GrammaticalFunction>>,
>,
) -> Result<(), String> {
Ok(())
}
}