use crate::{Error, Result};
use byteorder::{ByteOrder, LittleEndian};
#[derive(Debug)]
pub struct FeatureTable {
features: Vec<String>,
}
impl FeatureTable {
pub fn from_bytes(data: &[u8]) -> Result<Self> {
if data.len() < 72 {
return Err(Error::FeatureParseError(
"Data too small for feature table".to_string(),
));
}
let feature_size = LittleEndian::read_u32(&data[32..36]) as usize;
let features = Vec::with_capacity(feature_size);
Ok(Self { features })
}
pub fn get(&self, feature_id: u32) -> Option<&str> {
self.features.get(feature_id as usize).map(String::as_str)
}
pub fn len(&self) -> usize {
self.features.len()
}
pub fn is_empty(&self) -> bool {
self.features.is_empty()
}
pub fn parse_feature(feature: &str) -> FeatureComponents<'_> {
let parts: Vec<&str> = feature.split(',').collect();
FeatureComponents {
pos1: parts.first().copied(),
pos2: parts.get(1).copied(),
pos3: parts.get(2).copied(),
pos4: parts.get(3).copied(),
conjugation_type: parts.get(4).copied(),
conjugation_form: parts.get(5).copied(),
base_form: parts.get(6).copied(),
reading: parts.get(7).copied(),
pronunciation: parts.get(8).copied(),
}
}
}
#[derive(Debug, Clone)]
pub struct FeatureComponents<'a> {
pub pos1: Option<&'a str>,
pub pos2: Option<&'a str>,
pub pos3: Option<&'a str>,
pub pos4: Option<&'a str>,
pub conjugation_type: Option<&'a str>,
pub conjugation_form: Option<&'a str>,
pub base_form: Option<&'a str>,
pub reading: Option<&'a str>,
pub pronunciation: Option<&'a str>,
}
impl FeatureComponents<'_> {
pub fn full_pos(&self) -> String {
[self.pos1, self.pos2, self.pos3, self.pos4]
.iter()
.filter_map(|&p| p.filter(|&s| s != "*"))
.collect::<Vec<_>>()
.join("-")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_feature() {
let feature = "名詞,一般,*,*,*,*,東京,トウキョウ,トーキョー";
let components = FeatureTable::parse_feature(feature);
assert_eq!(components.pos1, Some("名詞"));
assert_eq!(components.pos2, Some("一般"));
assert_eq!(components.base_form, Some("東京"));
assert_eq!(components.reading, Some("トウキョウ"));
assert_eq!(components.pronunciation, Some("トーキョー"));
}
#[test]
fn test_full_pos() {
let feature = "動詞,自立,*,*,五段・カ行イ音便,連用タ接続,書く,カイ,カイ";
let components = FeatureTable::parse_feature(feature);
assert_eq!(components.full_pos(), "動詞-自立");
}
}