use ahash::AHashMap;
use crate::analysis::{
contains_cjk_text, is_cjk, is_cjk_line_end_prohibited, is_cjk_line_start_prohibited,
slice_text, AnalyzedGrapheme, GraphemeKind, TextAnalysis, WordBreakMode,
};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum BreakOpportunity {
Allowed,
Prohibited,
Forced,
}
struct BreakContext<'a> {
analysis: &'a TextAnalysis,
grapheme_index: usize,
current: &'a AnalyzedGrapheme,
next: Option<&'a AnalyzedGrapheme>,
current_text: &'a str,
next_text: Option<&'a str>,
baseline: BreakOpportunity,
}
type BreakRule = fn(&BreakContext<'_>) -> BreakOpportunity;
const OVERRIDE_RULES: &[BreakRule] = &[
rule_forced_newline,
rule_nbsp,
rule_wj,
rule_zwsp,
rule_soft_hyphen,
rule_keep_all_text_run,
rule_cjk_punctuation,
rule_url_atom,
];
pub(crate) fn compute_breaks(analysis: &TextAnalysis) -> Vec<BreakOpportunity> {
let baseline_by_byte = baseline_breaks(&analysis.normalized);
let mut breaks = Vec::with_capacity(analysis.graphemes.len());
for index in 0..analysis.graphemes.len() {
let current = &analysis.graphemes[index];
let current_text = slice_text(&analysis.normalized, ¤t.byte_range);
let next_text = analysis
.graphemes
.get(index + 1)
.map(|next| slice_text(&analysis.normalized, &next.byte_range));
let next = analysis.graphemes.get(index + 1);
let mut opportunity = break_opportunity(&BreakContext {
analysis,
grapheme_index: index,
current,
next,
current_text,
next_text,
baseline: baseline_by_byte
.get(¤t.byte_range.end)
.copied()
.unwrap_or(BreakOpportunity::Prohibited),
});
if index + 1 == analysis.graphemes.len() {
opportunity = merge(opportunity, BreakOpportunity::Forced);
}
breaks.push(opportunity);
}
breaks
}
fn break_opportunity(ctx: &BreakContext<'_>) -> BreakOpportunity {
let mut opportunity = uax14_baseline(ctx);
for rule in OVERRIDE_RULES {
opportunity = merge(opportunity, rule(ctx));
}
opportunity
}
fn merge(a: BreakOpportunity, b: BreakOpportunity) -> BreakOpportunity {
use BreakOpportunity::*;
match (a, b) {
(Forced, _) | (_, Forced) => Forced,
(Prohibited, _) | (_, Prohibited) => Prohibited,
_ => Allowed,
}
}
fn uax14_baseline(ctx: &BreakContext<'_>) -> BreakOpportunity {
match ctx.current.kind {
GraphemeKind::Text => {
if ctx
.next_text
.map(|next_text| is_cjk(ctx.current_text) && is_cjk(next_text))
.unwrap_or(false)
{
merge(ctx.baseline, BreakOpportunity::Allowed)
} else {
ctx.baseline
}
}
GraphemeKind::Space | GraphemeKind::Tab => merge(ctx.baseline, BreakOpportunity::Allowed),
GraphemeKind::Newline => BreakOpportunity::Forced,
GraphemeKind::ZeroWidthBreak | GraphemeKind::SoftHyphen => BreakOpportunity::Allowed,
GraphemeKind::WordJoiner => BreakOpportunity::Prohibited,
}
}
fn rule_forced_newline(ctx: &BreakContext<'_>) -> BreakOpportunity {
if ctx.current.kind == GraphemeKind::Newline {
BreakOpportunity::Forced
} else {
BreakOpportunity::Allowed
}
}
fn rule_nbsp(ctx: &BreakContext<'_>) -> BreakOpportunity {
if contains_nbsp(ctx.current_text) || ctx.next_text.is_some_and(contains_nbsp) {
BreakOpportunity::Prohibited
} else {
BreakOpportunity::Allowed
}
}
fn rule_wj(ctx: &BreakContext<'_>) -> BreakOpportunity {
if contains_word_joiner(ctx.current_text) || ctx.next_text.is_some_and(contains_word_joiner) {
BreakOpportunity::Prohibited
} else {
BreakOpportunity::Allowed
}
}
fn rule_zwsp(_ctx: &BreakContext<'_>) -> BreakOpportunity {
BreakOpportunity::Allowed
}
fn rule_soft_hyphen(_ctx: &BreakContext<'_>) -> BreakOpportunity {
BreakOpportunity::Allowed
}
fn rule_keep_all_text_run(ctx: &BreakContext<'_>) -> BreakOpportunity {
if ctx.analysis.word_break != WordBreakMode::KeepAll
|| ctx.current.kind != GraphemeKind::Text
|| !ctx.next.is_some_and(|next| next.kind == GraphemeKind::Text)
|| !boundary_is_inside_keep_all_segment(ctx)
{
return BreakOpportunity::Allowed;
}
BreakOpportunity::Prohibited
}
fn boundary_is_inside_keep_all_segment(ctx: &BreakContext<'_>) -> bool {
let Some(next) = ctx.next else {
return false;
};
ctx.analysis.segments.iter().any(|segment| {
segment.byte_range.start <= ctx.current.byte_range.start
&& segment.byte_range.end >= next.byte_range.end
&& contains_cjk_text(slice_text(&ctx.analysis.normalized, &segment.byte_range))
})
}
fn rule_cjk_punctuation(ctx: &BreakContext<'_>) -> BreakOpportunity {
if ctx
.next_text
.map(is_cjk_line_start_prohibited)
.unwrap_or(false)
|| is_cjk_line_end_prohibited(ctx.current_text)
{
BreakOpportunity::Prohibited
} else {
BreakOpportunity::Allowed
}
}
fn rule_url_atom(ctx: &BreakContext<'_>) -> BreakOpportunity {
if boundary_is_inside_url(ctx.analysis, ctx.grapheme_index) {
BreakOpportunity::Prohibited
} else {
BreakOpportunity::Allowed
}
}
fn baseline_breaks(text: &str) -> AHashMap<usize, BreakOpportunity> {
let mut map = AHashMap::new();
for (byte_index, opportunity) in unicode_linebreak::linebreaks(text) {
let mapped = match opportunity {
unicode_linebreak::BreakOpportunity::Allowed => BreakOpportunity::Allowed,
unicode_linebreak::BreakOpportunity::Mandatory => BreakOpportunity::Forced,
};
map.insert(byte_index, mapped);
}
map
}
fn boundary_is_inside_url(analysis: &TextAnalysis, grapheme_index: usize) -> bool {
let boundary = analysis.graphemes[grapheme_index].byte_range.end;
analysis
.urls
.iter()
.any(|span| boundary > span.start && boundary < span.end)
}
fn contains_nbsp(text: &str) -> bool {
text.contains('\u{00A0}')
}
fn contains_word_joiner(text: &str) -> bool {
text.contains('\u{2060}')
}
#[cfg(test)]
mod tests {
use super::{compute_breaks, merge, BreakOpportunity};
use crate::analysis::{WhiteSpaceMode, WordBreakMode};
use crate::engine::PrepareOptions;
#[test]
fn nbsp_prohibits_adjacent_breaks() {
let analysis = crate::analysis::analyze_text(
"a\u{00A0}b",
&PrepareOptions {
white_space: WhiteSpaceMode::Normal,
word_break: WordBreakMode::Normal,
paragraph_direction: crate::bidi::ParagraphDirection::Auto,
letter_spacing: 0.0,
},
None,
);
let breaks = compute_breaks(&analysis);
assert_eq!(breaks[0], BreakOpportunity::Prohibited);
assert_eq!(breaks[1], BreakOpportunity::Prohibited);
}
#[test]
fn zero_width_space_allows_break_after_itself() {
let analysis = crate::analysis::analyze_text(
"a\u{200B}b",
&PrepareOptions {
white_space: WhiteSpaceMode::Normal,
word_break: WordBreakMode::Normal,
paragraph_direction: crate::bidi::ParagraphDirection::Auto,
letter_spacing: 0.0,
},
None,
);
let breaks = compute_breaks(&analysis);
assert_eq!(breaks[1], BreakOpportunity::Allowed);
}
#[test]
fn forced_wins_over_prohibited() {
let analysis = crate::analysis::analyze_text(
"\n\u{00A0}a",
&PrepareOptions {
white_space: WhiteSpaceMode::PreWrap,
word_break: WordBreakMode::Normal,
paragraph_direction: crate::bidi::ParagraphDirection::Auto,
letter_spacing: 0.0,
},
None,
);
let breaks = compute_breaks(&analysis);
assert_eq!(breaks[0], BreakOpportunity::Forced);
}
#[test]
fn url_remains_atomic() {
let analysis = crate::analysis::analyze_text(
"https://example.com/path",
&PrepareOptions {
white_space: WhiteSpaceMode::Normal,
word_break: WordBreakMode::Normal,
paragraph_direction: crate::bidi::ParagraphDirection::Auto,
letter_spacing: 0.0,
},
None,
);
let breaks = compute_breaks(&analysis);
assert!(breaks
.iter()
.take(breaks.len().saturating_sub(1))
.all(|opportunity| *opportunity == BreakOpportunity::Prohibited));
}
#[test]
fn cjk_punctuation_is_not_line_start() {
let analysis = crate::analysis::analyze_text(
"你。",
&PrepareOptions {
white_space: WhiteSpaceMode::Normal,
word_break: WordBreakMode::Normal,
paragraph_direction: crate::bidi::ParagraphDirection::Auto,
letter_spacing: 0.0,
},
None,
);
let breaks = compute_breaks(&analysis);
assert_eq!(breaks[0], BreakOpportunity::Prohibited);
}
#[test]
fn keep_all_suppresses_cjk_led_no_space_breaks() {
let analysis = crate::analysis::analyze_text(
"日本語foo-bar",
&PrepareOptions {
white_space: WhiteSpaceMode::Normal,
word_break: WordBreakMode::KeepAll,
paragraph_direction: crate::bidi::ParagraphDirection::Auto,
letter_spacing: 0.0,
},
None,
);
let breaks = compute_breaks(&analysis);
assert_eq!(breaks[0], BreakOpportunity::Prohibited);
assert_eq!(breaks[1], BreakOpportunity::Prohibited);
assert_eq!(breaks[2], BreakOpportunity::Prohibited);
}
#[test]
fn merge_respects_priority() {
assert_eq!(
merge(BreakOpportunity::Forced, BreakOpportunity::Prohibited),
BreakOpportunity::Forced
);
assert_eq!(
merge(BreakOpportunity::Allowed, BreakOpportunity::Prohibited),
BreakOpportunity::Prohibited
);
}
}