use serde::Deserialize;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PatternAtom {
Symbol(String),
Boundary,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AllophonyRule {
pub name: String,
pub lhs: Option<PatternAtom>,
pub rhs: Option<String>,
pub left: Vec<PatternAtom>,
pub right: Vec<PatternAtom>,
pub optional: bool,
}
#[derive(Deserialize)]
struct RawRule {
#[serde(default)]
name: String,
rule: String,
#[serde(default)]
optional: bool,
}
impl TryFrom<RawRule> for AllophonyRule {
type Error = String;
fn try_from(r: RawRule) -> Result<Self, Self::Error> {
let (lhs, rhs, left, right) = parse_rule(&r.rule)?;
Ok(AllophonyRule {
name: if r.name.trim().is_empty() {
r.rule.trim().to_string()
} else {
r.name
},
lhs,
rhs,
left,
right,
optional: r.optional,
})
}
}
impl<'de> Deserialize<'de> for AllophonyRule {
fn deserialize<D>(d: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
AllophonyRule::try_from(RawRule::deserialize(d)?).map_err(serde::de::Error::custom)
}
}
fn clean(tok: &str) -> String {
let t = tok.trim().trim_matches(|c| c == '/' || c == '[' || c == ']').trim();
if t == "∅" || t == "0" {
String::new()
} else {
t.to_string()
}
}
fn parse_atom(tok: &str) -> Option<PatternAtom> {
let t = clean(tok);
if t.is_empty() {
None
} else if t == "#" {
Some(PatternAtom::Boundary)
} else {
Some(PatternAtom::Symbol(t))
}
}
fn parse_context(part: &str) -> Vec<PatternAtom> {
part.split_whitespace().filter_map(parse_atom).collect()
}
#[allow(clippy::type_complexity)]
fn parse_rule(
s: &str,
) -> Result<(Option<PatternAtom>, Option<String>, Vec<PatternAtom>, Vec<PatternAtom>), String> {
let (change, context) = match s.find('_') {
Some(upos) => match s[..upos].rfind('/') {
Some(sep) => (&s[..sep], Some(&s[sep + 1..])),
None => return Err(format!("allophony rule `{s}` has a `_` context but no `/`")),
},
None => (s, None),
};
let arrow = change.find('>').or_else(|| change.find('→'));
let arrow = arrow.ok_or_else(|| format!("allophony rule `{s}` has no `>` / `→`"))?;
let (lhs_str, rhs_str) = change.split_at(arrow);
let rhs_str = rhs_str.trim_start_matches(['>', '→']);
let lhs = parse_atom(lhs_str);
let rhs = {
let c = clean(rhs_str);
if c.is_empty() {
None
} else {
Some(c)
}
};
if lhs.is_none() && rhs.is_none() {
return Err(format!("allophony rule `{s}` is ∅ > ∅ (no-op)"));
}
if matches!(lhs, Some(PatternAtom::Boundary)) {
return Err(format!("allophony rule `{s}` targets a boundary"));
}
let (left, right) = match context {
Some(ctx) => {
let (l, r) = ctx
.split_once('_')
.ok_or_else(|| format!("allophony rule `{s}` context has no `_`"))?;
(parse_context(l), parse_context(r))
}
None => (Vec::new(), Vec::new()),
};
Ok((lhs, rhs, left, right))
}
#[cfg(test)]
mod tests {
use super::*;
fn parsed(rule: &str) -> AllophonyRule {
AllophonyRule::try_from(RawRule { name: String::new(), rule: rule.into(), optional: false })
.expect("parses")
}
#[test]
fn parses_substitution_with_right_context() {
let r = parsed("k > tʃ / _ i");
assert_eq!(r.lhs, Some(PatternAtom::Symbol("k".into())));
assert_eq!(r.rhs, Some("tʃ".into()));
assert!(r.left.is_empty());
assert_eq!(r.right, vec![PatternAtom::Symbol("i".into())]);
}
#[test]
fn parses_final_devoicing_with_boundary() {
let r = parsed("d > t / _ #");
assert_eq!(r.rhs, Some("t".into()));
assert_eq!(r.right, vec![PatternAtom::Boundary]);
}
#[test]
fn parses_epenthesis_and_deletion() {
let ins = parsed("∅ > ə / C _ C");
assert_eq!(ins.lhs, None);
assert_eq!(ins.rhs, Some("ə".into()));
assert_eq!(ins.left, vec![PatternAtom::Symbol("C".into())]);
let del = parsed("V > 0 / _ #");
assert_eq!(del.lhs, Some(PatternAtom::Symbol("V".into())));
assert_eq!(del.rhs, None);
}
#[test]
fn strips_spe_brackets() {
let r = parsed("/k/ > [x] / V _ V");
assert_eq!(r.lhs, Some(PatternAtom::Symbol("k".into())));
assert_eq!(r.rhs, Some("x".into()));
assert_eq!(r.left, vec![PatternAtom::Symbol("V".into())]);
assert_eq!(r.right, vec![PatternAtom::Symbol("V".into())]);
}
}