pub fn soft_hyphen_breaks(text: &str) -> Vec<usize> {
text.char_indices()
.filter_map(|(i, c)| {
if c == '\u{00AD}' {
Some(i + c.len_utf8())
} else {
None
}
})
.collect()
}
#[cfg(feature = "hyphenation")]
pub fn automatic_hyphen_breaks(text: &str, lang: hypher::Lang) -> Vec<usize> {
let mut result = Vec::new();
let mut remaining = text;
let mut base_offset = 0usize;
loop {
let trimmed = remaining.trim_start_matches(|c: char| c.is_ascii_whitespace());
let skipped = remaining.len() - trimmed.len();
base_offset += skipped;
remaining = trimmed;
if remaining.is_empty() {
break;
}
let word_len = remaining
.find(|c: char| c.is_ascii_whitespace())
.unwrap_or(remaining.len());
let word = &remaining[..word_len];
let syllables: Vec<&str> = hypher::hyphenate(word, lang).collect();
let mut syl_offset = 0usize;
for (i, syl) in syllables.iter().enumerate() {
syl_offset += syl.len();
if i + 1 < syllables.len() {
result.push(base_offset + syl_offset);
}
}
base_offset += word_len;
remaining = &remaining[word_len..];
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn soft_hyphen_single() {
assert_eq!(soft_hyphen_breaks("ma\u{00AD}chine"), vec![4]);
}
#[test]
fn soft_hyphen_none() {
assert!(soft_hyphen_breaks("no hyphens").is_empty());
}
#[test]
fn soft_hyphen_multiple() {
let breaks = soft_hyphen_breaks("a\u{00AD}b\u{00AD}c");
assert_eq!(breaks, vec![3, 6]);
}
#[test]
fn soft_hyphen_at_start() {
let breaks = soft_hyphen_breaks("\u{00AD}abc");
assert_eq!(breaks, vec![2]);
}
#[test]
fn soft_hyphen_consecutive() {
let breaks = soft_hyphen_breaks("a\u{00AD}\u{00AD}b");
assert_eq!(breaks, vec![3, 5]);
}
#[cfg(feature = "hyphenation")]
mod hyphenation_feature {
use super::*;
use hypher::Lang;
#[test]
fn automatic_breaks_machine() {
let breaks = automatic_hyphen_breaks("machine", Lang::English);
assert_eq!(breaks, vec![2]);
}
#[test]
fn automatic_breaks_empty() {
let breaks = automatic_hyphen_breaks("", Lang::English);
assert!(breaks.is_empty());
}
#[test]
fn automatic_breaks_short_word() {
let breaks = automatic_hyphen_breaks("I", Lang::English);
let _ = breaks;
}
#[test]
fn automatic_breaks_sentence() {
let breaks = automatic_hyphen_breaks("hyphenation machine", Lang::English);
for &b in &breaks {
assert!(b <= "hyphenation machine".len(), "break {b} out of bounds");
}
assert!(!breaks.is_empty());
}
}
}