use std::sync::Arc;
use super::Flag;
use crate::affix::FlagType;
use crate::error::ParseError;
use crate::helpers::convertu32;
use crate::morph::MorphInfo;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct DictEntry {
pub(super) stem: Arc<str>,
pub(super) flags: Vec<Flag>,
pub(super) morph: Vec<Arc<MorphInfo>>,
}
impl DictEntry {
#[cfg(test)]
pub(crate) fn new(stem: &str, flags: &[Flag], morph: &[MorphInfo]) -> Self {
Self {
stem: stem.into(),
flags: flags.to_owned(),
morph: morph.iter().map(|v| Arc::new(v.clone())).collect(),
}
}
fn parse_single(value: &str, flag_type: FlagType, line_num: u32) -> Result<Self, ParseError> {
let (stem, flagstr, morphstr) = separate_into_parts(value);
let flags: Vec<Flag> = match flagstr {
Some(s) => flag_type
.parse_str(s.trim())
.map_err(|e| ParseError::new_nocol(e, s, line_num))?,
None => Vec::new(),
};
let morph = MorphInfo::many_from_str(morphstr.trim())
.map(Arc::new)
.collect();
let ret = Self {
stem: stem.trim().into(),
flags,
morph,
};
Ok(ret)
}
#[inline]
#[allow(clippy::option_if_let_else)]
pub fn parse_all(input: &str, flag_type: FlagType) -> Result<Vec<DictEntry>, ParseError> {
let mut lines_iter = extract_content(input);
let lines_backup = lines_iter.clone();
let Some(first) = lines_iter.next() else {
return Ok(Vec::new());
};
let (mut ret, start) = if let Ok(cap) = first.parse::<usize>() {
(Vec::with_capacity(cap), 2)
} else {
lines_iter = lines_backup;
(Vec::new(), 1)
};
for (i, line) in lines_iter.enumerate() {
ret.push(
DictEntry::parse_single(line, flag_type, convertu32(i + start))
.map_err(|e| e.add_offset_ret(i + start, 0))?,
);
}
Ok(ret)
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct PersonalEntry {
pub stem: Arc<str>,
pub friend: Option<Box<str>>,
pub morph: Vec<MorphInfo>,
pub forbid: bool,
}
impl PersonalEntry {
#[cfg(test)]
pub(crate) fn new(
stem: &str,
friend: Option<&str>,
morph: Vec<MorphInfo>,
forbid: bool,
) -> Self {
Self {
stem: stem.into(),
friend: friend.map(Into::into),
morph,
forbid,
}
}
pub fn parse_single(value: &str) -> Self {
let (stem, friend, morphstr) = separate_into_parts(value);
let forbid = stem.starts_with('*');
let stem = stem.strip_prefix('*').unwrap_or(stem);
let morph = MorphInfo::many_from_str(morphstr).collect();
Self {
stem: stem.trim().into(),
friend: friend.map(|f| f.trim().into()),
morph,
forbid,
}
}
pub fn parse_all(s: &str) -> Vec<PersonalEntry> {
extract_content(s).map(Self::parse_single).collect()
}
}
fn separate_into_parts(value: &str) -> (&str, Option<&str>, &str) {
let stem: &str;
let flagstr: Option<&str>;
let morphstr: &str;
let value = value.split_once('#').unwrap_or((value, "")).0;
if let Some((word, rest)) = value.split_once('/') {
stem = word;
let (tmpflag, tmpmorph) = rest
.split_once(|ch: char| ch.is_ascii_whitespace())
.unwrap_or((rest, ""));
flagstr = Some(tmpflag);
morphstr = tmpmorph;
} else {
flagstr = None;
(stem, morphstr) = value.find(':').map_or((value, ""), |idx| {
value[..idx]
.rfind(|ch: char| ch.is_ascii_whitespace())
.map_or((value, ""), |ws_idx| (&value[..ws_idx], &value[ws_idx..]))
});
};
(stem, flagstr, morphstr)
}
fn extract_content(input: &str) -> impl Iterator<Item = &str> + Clone {
input
.lines()
.filter(|line| !line.starts_with('\t'))
.map(|line| line.split_once('#').unwrap_or((line, "")).0)
.map(str::trim)
.filter(|line| !line.is_empty())
}
#[cfg(test)]
#[path = "tests_parse.rs"]
mod tests;