use std::fmt::Debug;
use crate::aggregable::digest::{record_aggregable, AggregationSink};
use crate::aggregable::AggregableFields;
use crate::component::{AggregationError, Aggregating, AnalysisComponent, ComponentContext};
use crate::morpheme::WordSegmentation;
use crate::traits::{LinguisticDefinition, TypologicalFeature};
#[derive(Debug, Clone, Default)]
pub struct MorphemeSegmentation;
impl<L: LinguisticDefinition + crate::morpheme::Agglutinative>
crate::component::ComponentRequires<L> for MorphemeSegmentation
where
<L::Morphology as crate::traits::MorphologyInfo>::PosTag:
std::fmt::Debug + Clone + Copy + PartialEq + Eq + std::hash::Hash + 'static,
L::GrammaticalFunction: std::fmt::Debug
+ Clone
+ PartialEq
+ serde::Serialize
+ for<'de> serde::Deserialize<'de>
+ schemars::JsonSchema
+ Send
+ Sync
+ 'static,
{
}
impl<L: LinguisticDefinition> AnalysisComponent<L> for MorphemeSegmentation {
fn name(&self) -> &'static str {
"Morpheme Segmentation"
}
fn schema_key(&self) -> &'static str {
"morpheme_segmentation"
}
fn schema_fragment(&self, _lang: &L) -> serde_json::Value {
let r#gen = schemars::SchemaGenerator::default();
let schema = r#gen.into_root_schema_for::<Vec<WordSegmentation<L::GrammaticalFunction>>>();
serde_json::to_value(&schema).unwrap()
}
fn prompt_fragment(&self, lang: &L, _ctx: &ComponentContext) -> String {
lang.extra_extraction_directives().unwrap_or_default()
}
fn post_process(&self, lang: &L, section: &mut serde_json::Value) -> Result<(), String> {
let mut segmentation: Option<Vec<WordSegmentation<L::GrammaticalFunction>>> =
serde_json::from_value(section.clone()).map_err(|e| e.to_string())?;
lang.post_process_extraction(&mut segmentation)?;
*section = serde_json::to_value(&segmentation).map_err(|e| e.to_string())?;
Ok(())
}
fn is_compatible(&self, lang: &L) -> bool {
lang.typological_features()
.contains(&TypologicalFeature::Agglutination)
}
fn as_aggregating(&self) -> Option<&dyn Aggregating<L>> {
Some(self)
}
}
impl<L: LinguisticDefinition> Aggregating<L> for MorphemeSegmentation
where
L::GrammaticalFunction: AggregableFields + for<'de> serde::Deserialize<'de>,
{
fn aggregate_section(
&self,
_lang: &L,
section: &serde_json::Value,
sink: &mut dyn AggregationSink,
) -> Result<(), AggregationError> {
let segmentations: Option<Vec<WordSegmentation<L::GrammaticalFunction>>> =
serde_json::from_value(section.clone()).map_err(|e| {
AggregationError::Deserialize {
key: "morpheme_segmentation",
source: e,
}
})?;
if let Some(segs) = segmentations {
for seg in &segs {
for morpheme in &seg.morphemes {
record_aggregable(sink, morpheme);
}
}
}
Ok(())
}
}