use crate::syllable;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Segmentation {
pub syllables: Vec<String>,
}
impl Segmentation {
pub fn len(&self) -> usize {
self.syllables.len()
}
pub fn is_empty(&self) -> bool {
self.syllables.is_empty()
}
}
const MAX_SYL_LEN: usize = 6;
pub fn segment(input: &str) -> Vec<Segmentation> {
let s = input.to_ascii_lowercase();
if s.is_empty() {
return vec![Segmentation {
syllables: Vec::new(),
}];
}
let bytes = s.as_bytes();
let n = bytes.len();
let mut dp: Vec<Vec<(usize, &str)>> = vec![Vec::new(); n + 1];
let mut reachable = vec![false; n + 1];
reachable[0] = true;
for i in 0..n {
if !reachable[i] {
continue;
}
let max_end = (i + MAX_SYL_LEN).min(n);
for j in (i + 1)..=max_end {
let candidate = &s[i..j];
if syllable::is_valid(candidate) {
dp[j].push((i, candidate));
reachable[j] = true;
}
}
}
if !reachable[n] {
return Vec::new();
}
let mut results: Vec<Segmentation> = Vec::new();
let mut path: Vec<&str> = Vec::new();
backtrack(&dp, n, &mut path, &mut results);
results.sort_by_key(|seg| seg.syllables.len());
results
}
fn backtrack<'a>(
dp: &[Vec<(usize, &'a str)>],
pos: usize,
path: &mut Vec<&'a str>,
out: &mut Vec<Segmentation>,
) {
if pos == 0 {
let mut syllables: Vec<String> = path.iter().map(|s| s.to_string()).collect();
syllables.reverse();
out.push(Segmentation { syllables });
return;
}
for (prev, syl) in &dp[pos] {
path.push(*syl);
backtrack(dp, *prev, path, out);
path.pop();
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_input_yields_empty_segmentation() {
let out = segment("");
assert_eq!(out.len(), 1);
assert!(out[0].is_empty());
}
#[test]
fn single_syllable() {
let out = segment("zhong");
assert!(!out.is_empty());
assert_eq!(out[0].syllables, vec!["zhong"]);
}
#[test]
fn two_syllable_unambiguous() {
let out = segment("zhongguo");
assert!(
out.iter().any(|s| s.syllables == vec!["zhong", "guo"]),
"expected [zhong, guo] in {out:?}"
);
}
#[test]
fn long_unambiguous() {
let out = segment("zhonghuarenmin");
assert!(
out.iter()
.any(|s| s.syllables == vec!["zhong", "hua", "ren", "min"]),
"expected [zhong, hua, ren, min] in {out:?}"
);
}
#[test]
fn ambiguous_xian_returns_multiple() {
let out = segment("xian");
let has_xian = out.iter().any(|s| s.syllables == vec!["xian"]);
let has_xi_an = out.iter().any(|s| s.syllables == vec!["xi", "an"]);
assert!(has_xian, "missing [xian] in {out:?}");
assert!(has_xi_an, "missing [xi, an] in {out:?}");
}
#[test]
fn fewer_syllables_first() {
let out = segment("xian");
assert_eq!(out[0].len(), 1);
}
#[test]
fn invalid_input_returns_empty() {
assert!(segment("xxqz").is_empty());
assert!(segment("zhongq").is_empty()); }
#[test]
fn case_insensitive() {
let out = segment("ZhongGuo");
assert!(out.iter().any(|s| s.syllables == vec!["zhong", "guo"]));
}
}