use std::sync::OnceLock;
use hyphenation::{Hyphenator, Language, Load, Standard};
use zenith_core::FontProvider;
use zenith_layout::{RustybuzzEngine, ShapeRequest, TextDirection, TextLayoutEngine};
use super::pack::Line;
use super::shape::{WordSource, WordToken};
static EN_US_HYPHENATOR: OnceLock<Option<Standard>> = OnceLock::new();
pub(in crate::compile) fn en_us_hyphenator() -> Option<&'static Standard> {
EN_US_HYPHENATOR
.get_or_init(|| Standard::from_embedded(Language::EnglishUS).ok())
.as_ref()
}
pub(in crate::compile) struct HyphenationContext<'a> {
pub(in crate::compile) dict: Option<&'static Standard>,
pub(in crate::compile) engine: &'a RustybuzzEngine,
pub(in crate::compile) fonts: &'a dyn FontProvider,
pub(in crate::compile) families: &'a [String],
pub(in crate::compile) hyphen: &'a str,
pub(in crate::compile) direction: TextDirection,
pub(in crate::compile) break_word: bool,
}
pub(in crate::compile) struct HyphenSplit {
pub(in crate::compile) head: WordToken,
pub(in crate::compile) tail: WordToken,
}
fn reshape_fragment(
text: &str,
donor: &WordToken,
hyphen_part: Option<(String, bool)>,
ctx: &HyphenationContext,
) -> Option<WordToken> {
let req = ShapeRequest {
text,
families: ctx.families,
weight: donor.src.weight,
style: donor.src.style,
font_size: donor.src.font_size,
direction: ctx.direction,
};
let result = ctx.engine.shape_with_fallback(&req, ctx.fonts).ok()?;
let advance: f64 = result.runs.iter().map(|r| r.advance_width as f64).sum();
Some(WordToken {
runs: result.runs,
advance,
color: donor.color,
underline: donor.underline,
strikethrough: donor.strikethrough,
highlight: donor.highlight,
code: donor.code,
link: donor.link.clone(),
baseline_dy: donor.baseline_dy,
glued: donor.glued,
src: WordSource {
text: text.to_owned(),
weight: donor.src.weight,
style: donor.src.style,
font_size: donor.src.font_size,
paragraph: donor.src.paragraph,
hyphen_part,
},
})
}
pub(in crate::compile) fn try_hyphenate(
word: &WordToken,
avail: f64,
ctx: &HyphenationContext,
) -> Option<HyphenSplit> {
let dict = ctx.dict?;
let text = word.src.text.as_str();
if text.len() < 4 {
return None;
}
let breaks = dict.hyphenate(text).breaks;
for &b in breaks.iter().rev() {
let (Some(head_txt), Some(tail_txt)) = (text.get(..b), text.get(b..)) else {
continue;
};
if head_txt.is_empty() || tail_txt.is_empty() {
continue;
}
let head_with_hyphen = format!("{head_txt}{}", ctx.hyphen);
let orig = text.to_owned();
let Some(head) = reshape_fragment(&head_with_hyphen, word, Some((orig.clone(), true)), ctx)
else {
continue;
};
if head.advance > avail {
continue;
}
let Some(tail) = reshape_fragment(tail_txt, word, Some((orig, false)), ctx) else {
continue;
};
return Some(HyphenSplit { head, tail });
}
None
}
pub(in crate::compile) fn try_break_word(
word: &WordToken,
avail: f64,
ctx: &HyphenationContext,
) -> Option<(WordToken, WordToken)> {
let text = word.src.text.as_str();
let mut best: Option<(usize, WordToken)> = None;
let mut boundaries: Vec<usize> = text.char_indices().map(|(b, _)| b).skip(1).collect();
if boundaries.is_empty() {
return None;
}
boundaries.push(text.len()); for &b in &boundaries {
let Some(head_txt) = text.get(..b) else {
continue;
};
if head_txt.is_empty() {
continue;
}
let Some(head) = reshape_fragment(head_txt, word, None, ctx) else {
break;
};
if head.advance > avail {
break;
}
if b >= text.len() {
break;
}
best = Some((b, head));
}
let (b, head) = best?;
let tail_txt = text.get(b..)?;
let tail = reshape_fragment(tail_txt, word, None, ctx)?;
Some((head, tail))
}
pub(in crate::compile) fn flatten_lines_to_tokens(
lines: Vec<Line>,
hyph: Option<&HyphenationContext>,
) -> Vec<WordToken> {
let mut words: Vec<WordToken> = Vec::new();
for line in lines {
for w in line.words {
words.push(w);
}
}
let Some(ctx) = hyph else {
return words;
};
let mut out: Vec<WordToken> = Vec::with_capacity(words.len());
let mut iter = words.into_iter().peekable();
while let Some(w) = iter.next() {
if let Some((orig, true)) = &w.src.hyphen_part {
let is_tail_next = iter
.peek()
.and_then(|n| n.src.hyphen_part.as_ref())
.is_some_and(|(o, head)| !head && o == orig);
if is_tail_next {
let orig = orig.clone();
let tail = iter.next();
match reshape_fragment(&orig, &w, None, ctx) {
Some(merged) => out.push(merged),
None => {
out.push(w);
if let Some(t) = tail {
out.push(t);
}
}
}
continue;
}
}
out.push(w);
}
out
}
#[cfg(test)]
mod break_word_tests {
use super::{HyphenationContext, try_break_word};
use zenith_core::{FontProvider, FontStyle, default_provider};
use zenith_layout::{RustybuzzEngine, TextDirection};
use super::super::ctx::{NodeShape, ShapeEnv};
use super::super::pack::pack_lines_reporting;
use super::super::shape::{ResolvedSpan, WordToken, shape_words};
use crate::ir::Color;
fn shape_word(word: &str, engine: &RustybuzzEngine, fonts: &dyn FontProvider) -> WordToken {
let families = vec!["Noto Sans".to_owned()];
let spans = [ResolvedSpan {
text: word.to_owned(),
color: Color::srgb(0, 0, 0, 255),
underline: false,
strikethrough: false,
highlight: None,
code: false,
link: None,
weight: 400,
style: FontStyle::Normal,
font_size: 16.0,
baseline_dy: 0.0,
}];
let mut diags = Vec::new();
let (mut tokens, _m) = shape_words(
&spans,
&families,
NodeShape {
font_size: 16.0,
base_weight: 400,
direction: TextDirection::Ltr,
},
ShapeEnv { engine, fonts },
&mut diags,
"t",
None,
);
tokens.pop().expect("the word must shape to one token")
}
fn ctx<'a>(
engine: &'a RustybuzzEngine,
fonts: &'a dyn FontProvider,
families: &'a [String],
) -> HyphenationContext<'a> {
HyphenationContext {
dict: None,
engine,
fonts,
families,
hyphen: "-",
direction: TextDirection::Ltr,
break_word: true,
}
}
#[test]
fn splits_and_reconstructs_original_text() {
let engine = RustybuzzEngine::new();
let provider = default_provider();
let families = vec!["Noto Sans".to_owned()];
let original = "https://very-long.example.com/some/deep/path";
let word = shape_word(original, &engine, &provider);
let c = ctx(&engine, &provider, &families);
let avail = word.advance / 3.0;
let (head, tail) = try_break_word(&word, avail, &c).expect("a prefix must fit");
assert!(head.advance <= avail, "head must fit avail");
assert!(
!head.src.text.is_empty() && head.src.text.chars().count() >= 1,
"head needs at least one char"
);
assert!(!tail.src.text.is_empty(), "tail must be non-empty");
assert_eq!(
format!("{}{}", head.src.text, tail.src.text),
original,
"head+tail must reconstruct the original token exactly"
);
}
#[test]
fn respects_multibyte_char_boundaries() {
let engine = RustybuzzEngine::new();
let provider = default_provider();
let families = vec!["Noto Sans".to_owned()];
let original = "café—über—straße—long—compound—word";
let word = shape_word(original, &engine, &provider);
let c = ctx(&engine, &provider, &families);
let avail = word.advance / 2.0;
let (head, tail) = try_break_word(&word, avail, &c).expect("a prefix must fit");
assert_eq!(format!("{}{}", head.src.text, tail.src.text), original);
assert!(
original.is_char_boundary(head.src.text.len()),
"split must land on a char boundary"
);
}
#[test]
fn returns_none_when_no_char_fits() {
let engine = RustybuzzEngine::new();
let provider = default_provider();
let families = vec!["Noto Sans".to_owned()];
let word = shape_word("wide", &engine, &provider);
let c = ctx(&engine, &provider, &families);
assert!(
try_break_word(&word, 0.0, &c).is_none(),
"zero avail fits no char → None"
);
}
#[test]
fn single_char_token_is_not_split() {
let engine = RustybuzzEngine::new();
let provider = default_provider();
let families = vec!["Noto Sans".to_owned()];
let word = shape_word("W", &engine, &provider);
let c = ctx(&engine, &provider, &families);
assert!(try_break_word(&word, 1000.0, &c).is_none());
}
#[test]
fn ordinary_word_wraps_whole_not_broken_into_remaining_space() {
let engine = RustybuzzEngine::new();
let provider = default_provider();
let families = vec!["Noto Sans".to_owned()];
let c = ctx(&engine, &provider, &families);
let alpha = shape_word("alpha", &engine, &provider);
let betagamma = shape_word("betagamma", &engine, &provider);
let space_advance = 6.0;
let box_w = betagamma.advance + 5.0;
assert!(
alpha.advance + space_advance + betagamma.advance > box_w,
"test setup: the pair must overflow one line"
);
let mut forced = false;
let lines = pack_lines_reporting(
vec![alpha, betagamma],
box_w,
space_advance,
Some(&c),
&mut forced,
18.0,
);
assert!(!forced, "no forced break: the word fits a line by itself");
assert_eq!(lines.len(), 2, "the second word wraps to its own line");
assert_eq!(
lines[1]
.words
.iter()
.map(|w| w.src.text.as_str())
.collect::<String>(),
"betagamma",
"the wrapped word stays intact (not split mid-word)"
);
}
}