jpreprocess_core/
word_details.rs

1use std::str::FromStr;
2
3use serde::{Deserialize, Serialize};
4
5use crate::{
6    accent_rule::ChainRules,
7    cform::CForm,
8    ctype::CType,
9    pos::{Meishi, POS},
10    pronunciation::Pronunciation,
11    word_line::WordDetailsLine,
12    JPreprocessResult,
13};
14
15#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
16pub struct WordDetails {
17    pub pos: POS,
18    pub ctype: CType,
19    pub cform: CForm,
20    pub read: Option<String>,
21    pub pron: Pronunciation,
22    pub chain_rule: ChainRules,
23    pub chain_flag: Option<bool>,
24}
25
26impl Default for WordDetails {
27    fn default() -> Self {
28        Self {
29            pos: POS::Meishi(Meishi::None),
30            ctype: CType::default(),
31            cform: CForm::default(),
32            read: None,
33            pron: Pronunciation::default(),
34            chain_rule: ChainRules::default(),
35            chain_flag: None,
36        }
37    }
38}
39
40impl WordDetails {
41    pub fn load(details: &[&str]) -> JPreprocessResult<Self> {
42        WordDetailsLine::from_strs(details).try_into()
43    }
44
45    pub fn extend_splited(
46        &mut self,
47        read: &str,
48        pron: &str,
49        acc_morasize: &str,
50    ) -> JPreprocessResult<()> {
51        self.read = match read {
52            "*" => None,
53            _ => Some(read.to_string()),
54        };
55        self.pron = Pronunciation::parse_csv_pron(pron, acc_morasize)?;
56        self.chain_flag = Some(false);
57        Ok(())
58    }
59
60    pub fn to_str_vec(&self, orig: String) -> [String; 9] {
61        let line = WordDetailsLine::from(self);
62
63        [
64            format!(
65                "{},{},{},{}",
66                line.pos, line.pos_group1, line.pos_group2, line.pos_group3
67            ),
68            line.ctype.to_string(),
69            line.cform.to_string(),
70            // Ideally, this should be `self.orig`, but jpreprocess njdnode does not have orig
71            // and in most cases, orig is the same as string.
72            orig,
73            line.read.to_string(),
74            line.pron.to_string(),
75            line.acc_morasize.to_string(),
76            line.chain_rule.to_string(),
77            line.chain_flag.to_string(),
78        ]
79    }
80}
81
82impl TryFrom<WordDetailsLine> for WordDetails {
83    type Error = crate::JPreprocessError;
84    fn try_from(value: WordDetailsLine) -> Result<WordDetails, Self::Error> {
85        // orig: not used
86
87        Ok(Self {
88            pos: POS::from_strs(
89                &value.pos,
90                &value.pos_group1,
91                &value.pos_group2,
92                &value.pos_group3,
93            )?,
94            ctype: CType::from_str(&value.ctype)?,
95            cform: CForm::from_str(&value.cform)?,
96            chain_rule: ChainRules::new(&value.chain_rule),
97            chain_flag: match value.chain_flag.as_ref() {
98                "1" => Some(true),
99                "0" => Some(false),
100                _ => None,
101            },
102            read: match value.read.as_ref() {
103                "*" => None,
104                _ => Some(value.read.to_string()),
105            },
106            pron: Pronunciation::parse_csv_pron(&value.pron, &value.acc_morasize)?,
107        })
108    }
109}
110
111impl From<&WordDetails> for WordDetailsLine {
112    fn from(value: &WordDetails) -> Self {
113        let pos = value.pos.to_string();
114        let pos_parts: Vec<&str> = pos.split(',').collect();
115        assert_eq!(pos_parts.len(), 4, "POS must have exactly 4 parts");
116
117        Self {
118            pos: pos_parts[0].to_string(),
119            pos_group1: pos_parts[1].to_string(),
120            pos_group2: pos_parts[2].to_string(),
121            pos_group3: pos_parts[3].to_string(),
122            ctype: value.ctype.to_string(),
123            cform: value.cform.to_string(),
124            orig: "*".to_string(), // orig is not stored in WordDetails
125            read: value.read.as_deref().unwrap_or("*").to_string(),
126            pron: value.pron.to_string(),
127            acc_morasize: format!("{}/{}", value.pron.accent(), value.pron.mora_size()),
128            chain_rule: value.chain_rule.to_string(),
129            chain_flag: match value.chain_flag {
130                Some(true) => "1",
131                Some(false) => "0",
132                None => "-1",
133            }
134            .into(),
135        }
136    }
137}