jpreprocess_core/pos/
mod.rs

1use serde::{Deserialize, Serialize};
2use std::{fmt::Display, str::FromStr};
3
4mod doushi;
5mod fukushi;
6mod joshi;
7mod keiyoushi;
8mod kigou;
9mod meishi;
10mod settoushi;
11
12pub use self::{doushi::*, fukushi::*, joshi::*, keiyoushi::*, kigou::*, meishi::*, settoushi::*};
13
14#[derive(Debug, thiserror::Error, PartialEq, Eq)]
15#[error("Tried to parse {string} (depth: {depth}), but failed in {kind}")]
16pub struct POSParseError {
17    depth: u8,
18    string: String,
19    kind: POSKind,
20}
21impl POSParseError {
22    pub(crate) fn new(depth: u8, string: String, kind: POSKind) -> Self {
23        Self {
24            depth,
25            string,
26            kind,
27        }
28    }
29}
30
31#[derive(Debug, PartialEq, Eq)]
32pub enum POSKind {
33    POSMajor,
34    Kigou,
35    Keiyoushi,
36    Joshi,
37    Settoushi,
38    Doushi,
39    Fukushi,
40    Meishi,
41    KakuJoshi,
42    KoyuMeishi,
43    Person,
44    Region,
45    MeishiSetsubi,
46    Daimeishi,
47    MeishiHijiritsu,
48}
49impl Display for POSKind {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        f.write_str(match self {
52            Self::POSMajor => "品詞",
53            Self::Kigou => "記号",
54            Self::Keiyoushi => "形容詞",
55            Self::Joshi => "助詞",
56            Self::Settoushi => "接頭詞",
57            Self::Doushi => "動詞",
58            Self::Fukushi => "副詞",
59            Self::Meishi => "名詞",
60            Self::KakuJoshi => "格助詞",
61            Self::KoyuMeishi => "固有名詞",
62            Self::Person => "人名(固有名詞)",
63            Self::Region => "地域(固有名詞)",
64            Self::MeishiSetsubi => "接尾(名詞)",
65            Self::Daimeishi => "代名詞",
66            Self::MeishiHijiritsu => "非自立(名詞)",
67        })
68    }
69}
70
71#[derive(Clone, Copy, PartialEq, Debug, Serialize, Deserialize, Default)]
72/// 品詞
73pub enum POS {
74    /// フィラー
75    Filler,
76    /// 感動詞
77    Kandoushi,
78    /// 記号
79    Kigou(Kigou),
80    /// 形容詞
81    Keiyoushi(Keiyoushi),
82    /// 助詞
83    Joshi(Joshi),
84    /// 助動詞
85    Jodoushi,
86    /// 接続詞
87    Setsuzokushi,
88    /// 接頭詞
89    Settoushi(Settoushi),
90    /// 動詞
91    Doushi(Doushi),
92    /// 副詞
93    Fukushi(Fukushi),
94    /// 名詞
95    Meishi(Meishi),
96    /// 連体詞
97    Rentaishi,
98
99    /// その他
100    Others,
101
102    /// 不明
103    #[default]
104    Unknown,
105}
106
107impl POS {
108    pub fn from_strs(g0: &str, g1: &str, g2: &str, g3: &str) -> Result<Self, POSParseError> {
109        match g0 {
110            "フィラー" => Ok(Self::Filler),
111            "感動詞" => Ok(Self::Kandoushi),
112            "記号" => Kigou::from_str(g1).map(Self::Kigou),
113            "形容詞" => Keiyoushi::from_str(g1).map(Self::Keiyoushi),
114            "助詞" => Joshi::from_strs(g1, g2).map(Self::Joshi),
115            "助動詞" => Ok(Self::Jodoushi),
116            "接続詞" => Ok(Self::Setsuzokushi),
117            "接頭詞" => Settoushi::from_str(g1).map(Self::Settoushi),
118            "動詞" => Doushi::from_str(g1).map(Self::Doushi),
119            "副詞" => Fukushi::from_str(g1).map(Self::Fukushi),
120            "名詞" => Meishi::from_strs(g1, g2, g3).map(Self::Meishi),
121            "連体詞" => Ok(Self::Rentaishi),
122
123            "その他" => Ok(Self::Others),
124
125            "*" => Ok(Self::Unknown),
126
127            _ => Err(POSParseError::new(0, g0.to_string(), POSKind::POSMajor)),
128        }
129    }
130
131    pub fn is_kazu(&self) -> bool {
132        matches!(self, Self::Kigou(Kigou::Kazu) | Self::Meishi(Meishi::Kazu))
133    }
134
135    pub fn convert_to_kigou(&mut self) {
136        *self = match self {
137            Self::Kigou(kigou) => Self::Kigou(*kigou),
138            Self::Meishi(Meishi::Kazu) => Self::Kigou(Kigou::Kazu),
139            Self::Fukushi(Fukushi::General) | Self::Meishi(Meishi::General) => {
140                Self::Kigou(Kigou::General)
141            }
142            _ => Self::Kigou(Kigou::None),
143        }
144    }
145}
146
147impl Display for POS {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        f.write_str(match &self {
150            Self::Filler => "フィラー",
151            Self::Kandoushi => "感動詞",
152            Self::Kigou(_) => "記号",
153            Self::Keiyoushi(_) => "形容詞",
154            Self::Joshi(_) => "助詞",
155            Self::Jodoushi => "助動詞",
156            Self::Setsuzokushi => "接続詞",
157            Self::Settoushi(_) => "接頭詞",
158            Self::Doushi(_) => "動詞",
159            Self::Fukushi(_) => "副詞",
160            Self::Meishi(_) => "名詞",
161            Self::Rentaishi => "連体詞",
162
163            Self::Others => "その他",
164
165            Self::Unknown => "*",
166        })?;
167
168        match &self {
169            Self::Kigou(kigou) => write!(f, ",{}", kigou),
170            Self::Keiyoushi(keiyoushi) => write!(f, ",{}", keiyoushi),
171            Self::Joshi(joshi) => write!(f, ",{}", joshi),
172            Self::Settoushi(settoushi) => write!(f, ",{}", settoushi),
173            Self::Doushi(doushi) => write!(f, ",{}", doushi),
174            Self::Fukushi(fukushi) => write!(f, ",{}", fukushi),
175            Self::Meishi(meishi) => write!(f, ",{}", meishi),
176
177            _ => f.write_str(",*,*,*"),
178        }?;
179
180        Ok(())
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn filler() {
190        let pos = POS::from_strs("フィラー", "*", "*", "*").unwrap();
191        assert!(matches!(pos, POS::Filler));
192        assert_eq!(pos.to_string(), "フィラー,*,*,*")
193    }
194
195    #[test]
196    fn joshi() {
197        let pos = POS::from_strs("助詞", "副助詞/並立助詞/終助詞", "*", "*").unwrap();
198        assert!(matches!(pos, POS::Joshi(Joshi::FukuHeiritsuShuJoshi)));
199        assert_eq!(pos.to_string(), "助詞,副助詞/並立助詞/終助詞,*,*")
200    }
201
202    #[test]
203    fn meishi() {
204        let pos = POS::from_strs("名詞", "*", "*", "*").unwrap();
205        assert!(matches!(pos, POS::Meishi(Meishi::None)));
206        assert_eq!(pos.to_string(), "名詞,*,*,*")
207    }
208
209    #[test]
210    fn koyumeishi() {
211        let pos = POS::from_strs("名詞", "固有名詞", "人名", "姓").unwrap();
212        assert!(matches!(
213            pos,
214            POS::Meishi(Meishi::KoyuMeishi(KoyuMeishi::Person(Person::Sei)))
215        ));
216        assert_eq!(pos.to_string(), "名詞,固有名詞,人名,姓")
217    }
218}