pub mod dictionary;
use crate::data::line_break::{lb, is_east_asian_wide, Lb};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BreakAction {
Mandatory,
Allowed,
Prohibited,
}
pub fn line_break_opportunities(text: &str) -> Vec<BreakAction> {
let len = text.len();
let mut breaks = vec![BreakAction::Prohibited; len + 1];
let chars: Vec<(usize, char)> = text.char_indices().collect();
let n = chars.len();
if n == 0 {
breaks[0] = BreakAction::Mandatory;
return breaks;
}
let cps: Vec<u32> = chars.iter().map(|(_, ch)| *ch as u32).collect();
let classes: Vec<Lb> = cps.iter().map(|&cp| resolve_lb1(cp)).collect();
breaks[chars[0].0] = BreakAction::Prohibited;
breaks[len] = BreakAction::Mandatory;
if n == 1 {
return breaks;
}
let mut effective: Vec<Lb> = classes.clone();
{
let mut base_idx: Option<usize> = None;
for i in 0..n {
let c = classes[i];
if c == Lb::CM || c == Lb::ZWJ {
if let Some(bi) = base_idx {
let base_c = classes[bi];
if !is_hard_break_or_space(base_c) {
effective[i] = effective[bi];
continue;
}
}
effective[i] = Lb::AL;
} else {
base_idx = Some(i);
}
}
}
let mut after_zw_sp = false;
for i in 0..n {
let byte_pos = if i + 1 < n {
chars[i + 1].0
} else {
len
};
if i + 1 >= n {
break;
}
let cls_before = classes[i]; let cls_after = classes[i + 1]; let eff_before = effective[i]; let eff_after = effective[i + 1];
if cls_before == Lb::BK {
breaks[byte_pos] = BreakAction::Mandatory;
after_zw_sp = false;
continue;
}
if cls_before == Lb::CR && cls_after == Lb::LF {
breaks[byte_pos] = BreakAction::Prohibited;
after_zw_sp = false;
continue;
}
if cls_before == Lb::CR || cls_before == Lb::LF || cls_before == Lb::NL {
breaks[byte_pos] = BreakAction::Mandatory;
after_zw_sp = false;
continue;
}
if cls_after == Lb::BK
|| cls_after == Lb::CR
|| cls_after == Lb::LF
|| cls_after == Lb::NL
{
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if cls_after == Lb::SP || cls_after == Lb::ZW {
breaks[byte_pos] = BreakAction::Prohibited;
if cls_after == Lb::ZW || cls_before == Lb::ZW || after_zw_sp {
after_zw_sp = true;
} else {
after_zw_sp = false;
}
continue;
}
if after_zw_sp {
breaks[byte_pos] = BreakAction::Allowed;
after_zw_sp = false;
continue;
}
if cls_before == Lb::ZW {
breaks[byte_pos] = BreakAction::Allowed;
continue;
}
if cls_before == Lb::ZWJ {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (cls_after == Lb::CM || cls_after == Lb::ZWJ)
&& !is_hard_break_or_space(eff_before)
{
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
let eb = eff_before;
let ea = eff_after;
if ea == Lb::WJ || eb == Lb::WJ {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::GL {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::GL {
if eb != Lb::SP && eb != Lb::BA && eb != Lb::HY && eb != Lb::HH {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if ea == Lb::CL
|| ea == Lb::CP
|| ea == Lb::EX
|| ea == Lb::SY
{
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::IS {
if cls_before == Lb::SP && i + 2 < n && effective[i + 2] == Lb::NU {
breaks[byte_pos] = BreakAction::Allowed;
continue;
}
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if is_after_op_sp(&effective, &classes, i) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if is_after_qu_pi_sp(&effective, &classes, &cps, i, &chars) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::QU && is_qu_pf(cps[i + 1]) {
if is_followed_by_lb15b_context(&classes, i + 1, n) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if ea == Lb::NS && is_after_cl_cp_sp(&effective, &classes, i) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::B2 && is_after_b2_sp(&effective, &classes, i) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if cls_before == Lb::SP {
breaks[byte_pos] = BreakAction::Allowed;
continue;
}
if ea == Lb::QU && !is_qu_pi(cps[i + 1]) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::QU && !is_qu_pf(cps[find_base_index(&classes, i)]) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::QU && !is_east_asian_wide(cps[find_base_index(&classes, i)]) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::QU {
let qu_followed_by_ea = if i + 2 < n {
is_east_asian_wide(cps[i + 2])
} else {
false };
if !qu_followed_by_ea {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb == Lb::QU && !is_east_asian_wide(cps[i + 1]) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::QU {
let qu_base_idx = find_base_index(&classes, i);
let preceded_by_ea = if qu_base_idx > 0 {
is_east_asian_wide(cps[qu_base_idx - 1])
} else {
false };
if !preceded_by_ea {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb == Lb::CB || ea == Lb::CB {
breaks[byte_pos] = BreakAction::Allowed;
continue;
}
if (eb == Lb::HY || eb == Lb::HH)
&& (ea == Lb::AL || ea == Lb::HL)
{
let base_idx = find_base_index(&classes, i);
let prev_class = if base_idx == 0 {
None } else {
Some(effective[base_idx - 1])
};
let is_word_initial = match prev_class {
None => true, Some(c) => matches!(c,
Lb::BK | Lb::CR | Lb::LF | Lb::NL | Lb::SP
| Lb::ZW | Lb::CB | Lb::GL | Lb::OP | Lb::QU
),
};
if is_word_initial {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if ea == Lb::BA || ea == Lb::HH || ea == Lb::HY || ea == Lb::NS {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::BB {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::HY || eb == Lb::HH) && ea != Lb::HL && i >= 1 {
let prev_eff = effective_class_before(&effective, &classes, i - 1);
if prev_eff == Lb::HL {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb == Lb::SY && ea == Lb::HL {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::IN {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::AL || eb == Lb::HL) && ea == Lb::NU {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::NU && (ea == Lb::AL || ea == Lb::HL) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::PR && (ea == Lb::ID || ea == Lb::EB || ea == Lb::EM) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::ID || eb == Lb::EB || eb == Lb::EM) && ea == Lb::PO {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::PR || eb == Lb::PO) && (ea == Lb::AL || ea == Lb::HL) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::AL || eb == Lb::HL) && (ea == Lb::PR || ea == Lb::PO) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if is_lb25_no_break(&effective, &classes, &cps, i, n) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::JL
&& (ea == Lb::JL || ea == Lb::JV || ea == Lb::H2 || ea == Lb::H3)
{
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::JV || eb == Lb::H2) && (ea == Lb::JV || ea == Lb::JT) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::JT || eb == Lb::H3) && ea == Lb::JT {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if matches!(eb, Lb::JL | Lb::JV | Lb::JT | Lb::H2 | Lb::H3) && ea == Lb::PO
{
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::PR
&& matches!(ea, Lb::JL | Lb::JV | Lb::JT | Lb::H2 | Lb::H3)
{
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::AL || eb == Lb::HL) && (ea == Lb::AL || ea == Lb::HL) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
let eb_base_cp = cps[find_base_index(&classes, i)];
let ea_base_cp = cps[find_base_index(&classes, i + 1)];
let eb_aksara = eb == Lb::AK || eb == Lb::AS || is_aksara_base(eb_base_cp);
let ea_aksara = ea == Lb::AK || ea == Lb::AS || is_aksara_base(ea_base_cp);
if eb == Lb::AP && ea_aksara {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea_aksara && eb == Lb::VI {
let vi_base = find_base_index(&classes, i);
if vi_base > 0 {
let prev = effective_class_before(&effective, &classes, vi_base - 1);
let prev_cp = cps[find_base_index(&classes, vi_base - 1)];
if prev == Lb::AK || prev == Lb::AS || is_aksara_base(prev_cp) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
}
if ea == Lb::VF && eb == Lb::VI {
let vi_base = find_base_index(&classes, i);
if vi_base > 0 {
let prev = effective_class_before(&effective, &classes, vi_base - 1);
let prev_cp = cps[find_base_index(&classes, vi_base - 1)];
if prev == Lb::AK || prev == Lb::AS || is_aksara_base(prev_cp) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
}
if eb_aksara && ea_aksara {
let next_non_cm = find_next_non_cm(&classes, &effective, i + 1, n);
if next_non_cm < n && effective[next_non_cm] == Lb::VF {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb_aksara && (ea == Lb::VF || ea == Lb::VI) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if eb == Lb::IS && (ea == Lb::AL || ea == Lb::HL) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if (eb == Lb::AL || eb == Lb::HL || eb == Lb::NU) && ea == Lb::OP {
if !is_east_asian_wide(cps[i + 1]) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb == Lb::CP && (ea == Lb::AL || ea == Lb::HL || ea == Lb::NU) {
if !is_east_asian_wide(cps[i]) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb == Lb::RI && ea == Lb::RI {
let mut ri_before = 0u32;
let mut j = i as isize;
while j >= 0 && effective[j as usize] == Lb::RI {
if classes[j as usize] == Lb::RI {
ri_before += 1;
}
j -= 1;
}
if ri_before % 2 == 1 {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
if eb == Lb::EB && ea == Lb::EM {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
if ea == Lb::EM {
let cp_b = cps[find_base_index(&classes, i)];
if is_extended_pictographic(cp_b) && is_cn(cp_b) {
breaks[byte_pos] = BreakAction::Prohibited;
continue;
}
}
breaks[byte_pos] = BreakAction::Allowed;
}
breaks
}
pub fn line_break_opportunities_with_dictionary(text: &str) -> Vec<BreakAction> {
let mut breaks = line_break_opportunities(text);
apply_dictionary_breaks(text, &mut breaks);
breaks
}
fn apply_dictionary_breaks(text: &str, breaks: &mut [BreakAction]) {
use crate::data::line_break::lb;
use dictionary::{language_for_codepoint, segment_words};
let chars: Vec<(usize, char)> = text.char_indices().collect();
if chars.is_empty() {
return;
}
let mut i = 0;
while i < chars.len() {
let (_byte_start, ch) = chars[i];
let cp = ch as u32;
let raw_class = lb(cp);
if raw_class != Lb::SA {
i += 1;
continue;
}
let lang = match language_for_codepoint(cp) {
Some(l) => l,
None => {
i += 1;
continue;
}
};
let run_start = i;
let mut run_end = i + 1;
while run_end < chars.len() {
let next_cp = chars[run_end].1 as u32;
let next_class = lb(next_cp);
if next_class != Lb::SA {
break;
}
match language_for_codepoint(next_cp) {
Some(l) if l == lang => {
run_end += 1;
}
_ => break,
}
}
let byte_run_start = chars[run_start].0;
let byte_run_end = if run_end < chars.len() {
chars[run_end].0
} else {
text.len()
};
let run_text = &text[byte_run_start..byte_run_end];
let word_boundaries = segment_words(run_text, lang);
for j in (run_start + 1)..run_end {
let byte_pos = chars[j].0;
if breaks[byte_pos] != BreakAction::Mandatory {
breaks[byte_pos] = BreakAction::Prohibited;
}
}
for &boundary_offset in &word_boundaries {
let abs_byte = byte_run_start + boundary_offset;
if abs_byte < text.len() && breaks[abs_byte] != BreakAction::Mandatory {
breaks[abs_byte] = BreakAction::Allowed;
}
}
i = run_end;
}
}
fn resolve_lb1(cp: u32) -> Lb {
let c = lb(cp);
match c {
Lb::AI => {
Lb::AL
}
Lb::SG => Lb::AL,
Lb::XX => Lb::AL,
Lb::CJ => {
Lb::NS
}
Lb::SA => {
if is_gc_mn_or_mc(cp) {
Lb::CM
} else {
Lb::AL
}
}
_ => c,
}
}
fn is_gc_mn_or_mc(cp: u32) -> bool {
matches!(cp,
0x0E31 | 0x0E34..=0x0E3A | 0x0E47..=0x0E4E |
0x0E33 |
0x0EB1 | 0x0EB4..=0x0EBC | 0x0EC8..=0x0ECD |
0x0EB3 |
0x102D..=0x1030 | 0x1032..=0x1037 | 0x1039..=0x103A |
0x103D..=0x103E | 0x1058..=0x1059 | 0x105E..=0x1060 |
0x1071..=0x1074 | 0x1082 | 0x1085..=0x1086 | 0x108D | 0x109D |
0xA9E5 |
0x102B..=0x102C | 0x1031 | 0x1038 | 0x103B..=0x103C |
0x1056..=0x1057 | 0x1062 | 0x1067..=0x1068 | 0x1083..=0x1084 |
0x1087..=0x108C | 0x108F | 0x109A..=0x109C |
0x17B4..=0x17B5 | 0x17B7..=0x17BD | 0x17C6 | 0x17C9..=0x17D3 |
0x17DD |
0x17B6 | 0x17BE..=0x17C5 | 0x17C7..=0x17C8 |
0x1A56 | 0x1A58..=0x1A5E | 0x1A62 | 0x1A65..=0x1A6C |
0x1A73..=0x1A7C | 0x1A7F |
0x1A55 | 0x1A57 | 0x1A6D..=0x1A72
)
}
fn is_aksara_base(cp: u32) -> bool {
cp == 0x25CC
}
fn find_base_index(classes: &[Lb], idx: usize) -> usize {
let mut j = idx;
loop {
if classes[j] != Lb::CM && classes[j] != Lb::ZWJ {
return j;
}
if j == 0 {
return 0;
}
j -= 1;
}
}
fn is_hard_break_or_space(c: Lb) -> bool {
matches!(c, Lb::BK | Lb::CR | Lb::LF | Lb::NL | Lb::SP | Lb::ZW)
}
fn is_after_op_sp(effective: &[Lb], classes: &[Lb], i: usize) -> bool {
let mut j = i;
loop {
if effective[j] == Lb::OP {
return true;
}
if classes[j] != Lb::SP {
return false;
}
if j == 0 {
return false;
}
j -= 1;
}
}
fn is_after_cl_cp_sp(effective: &[Lb], classes: &[Lb], i: usize) -> bool {
let mut j = i;
loop {
if effective[j] == Lb::CL || effective[j] == Lb::CP {
return true;
}
if classes[j] != Lb::SP {
return false;
}
if j == 0 {
return false;
}
j -= 1;
}
}
fn is_after_b2_sp(effective: &[Lb], classes: &[Lb], i: usize) -> bool {
let mut j = i;
loop {
if effective[j] == Lb::B2 {
return true;
}
if classes[j] != Lb::SP {
return false;
}
if j == 0 {
return false;
}
j -= 1;
}
}
fn is_qu_pi(cp: u32) -> bool {
matches!(
cp,
0x00AB | 0x2018 | 0x201B | 0x201C | 0x201F | 0x2039 | 0x2E02 | 0x2E04 | 0x2E09 | 0x2E0C | 0x2E1C | 0x2E20 )
}
fn is_qu_pf(cp: u32) -> bool {
matches!(
cp,
0x00BB | 0x2019 | 0x201D | 0x203A | 0x2E03 | 0x2E05 | 0x2E0A | 0x2E0D | 0x2E1D | 0x2E21 )
}
fn is_after_qu_pi_sp(
effective: &[Lb],
classes: &[Lb],
cps: &[u32],
i: usize,
_chars: &[(usize, char)],
) -> bool {
let mut j = i;
loop {
let ej = effective[j];
if ej == Lb::QU {
let base_idx = find_base_index(&classes, j);
if is_qu_pi(cps[base_idx]) {
if base_idx == 0 {
return true; }
let prev = effective[base_idx - 1];
return matches!(
prev,
Lb::BK
| Lb::CR
| Lb::LF
| Lb::NL
| Lb::OP
| Lb::QU
| Lb::GL
| Lb::SP
| Lb::ZW
);
}
return false;
}
if classes[j] != Lb::SP {
return false;
}
if j == 0 {
return false;
}
j -= 1;
}
}
fn is_followed_by_lb15b_context(classes: &[Lb], qu_idx: usize, n: usize) -> bool {
if qu_idx + 1 >= n {
return true; }
let next = classes[qu_idx + 1];
matches!(
next,
Lb::SP
| Lb::GL
| Lb::WJ
| Lb::CL
| Lb::QU
| Lb::CP
| Lb::EX
| Lb::IS
| Lb::SY
| Lb::BK
| Lb::CR
| Lb::LF
| Lb::NL
| Lb::ZW
)
}
fn effective_class_before(effective: &[Lb], classes: &[Lb], idx: usize) -> Lb {
let mut j = idx;
loop {
if classes[j] != Lb::CM && classes[j] != Lb::ZWJ {
return effective[j];
}
if j == 0 {
return Lb::AL; }
j -= 1;
}
}
fn find_next_non_cm(
classes: &[Lb],
_effective: &[Lb],
start: usize,
n: usize,
) -> usize {
let mut k = start + 1;
while k < n && (classes[k] == Lb::CM || classes[k] == Lb::ZWJ) {
k += 1;
}
k
}
fn is_lb25_no_break(
effective: &[Lb],
classes: &[Lb],
_cps: &[u32],
i: usize,
n: usize,
) -> bool {
let eb = effective[i];
let ea = effective[i + 1];
if (eb == Lb::PR || eb == Lb::PO)
&& (ea == Lb::NU || ea == Lb::OP || ea == Lb::HY)
{
if ea == Lb::NU {
return true;
}
if i + 2 < n && effective[i + 2] == Lb::NU {
return true;
}
if ea == Lb::OP {
let mut k = i + 2;
while k < n && classes[k] == Lb::SP {
k += 1;
}
if k < n && effective[k] == Lb::NU {
return true;
}
}
}
if matches!(ea, Lb::NU | Lb::SY | Lb::IS | Lb::CL | Lb::CP) {
if is_in_numeric_sequence(effective, i) {
return true;
}
}
if ea == Lb::PO || ea == Lb::PR {
let mut j = i;
if effective[j] == Lb::CL || effective[j] == Lb::CP {
if j == 0 {
return false;
}
j -= 1;
}
if is_in_numeric_sequence(effective, j) {
return true;
}
}
if (eb == Lb::HY || eb == Lb::OP) && ea == Lb::NU {
return true;
}
if eb == Lb::IS && ea == Lb::NU {
return true;
}
false
}
fn is_in_numeric_sequence(effective: &[Lb], i: usize) -> bool {
let mut j = i;
loop {
let c = effective[j];
if c == Lb::NU {
return true;
}
if c != Lb::SY && c != Lb::IS {
return false;
}
if j == 0 {
return false;
}
j -= 1;
}
}
fn is_cn(cp: u32) -> bool {
matches!(cp,
0x1F02C..=0x1F02F |
0x1F094..=0x1F09F |
0x1F0AF..=0x1F0B0 |
0x1F0C0 |
0x1F0D0 |
0x1F0F6..=0x1F0FF |
0x1F1AE..=0x1F1E5 |
0x1F203..=0x1F20F |
0x1F23C..=0x1F23F |
0x1F249..=0x1F24F |
0x1F252..=0x1F25F |
0x1F266..=0x1F2FF |
0x1F6D9..=0x1F6DB |
0x1F6ED..=0x1F6EF |
0x1F6FD..=0x1F6FF |
0x1F7DA..=0x1F7DF |
0x1F7EC..=0x1F7EF |
0x1F7F1..=0x1F7FF |
0x1F80C..=0x1F80F |
0x1F848..=0x1F84F |
0x1F85A..=0x1F85F |
0x1F888..=0x1F88F |
0x1F8AE..=0x1F8FF |
0x1FA58..=0x1FA5F |
0x1FA6E..=0x1FA6F |
0x1FA7D..=0x1FA7F |
0x1FA8B..=0x1FA8D |
0x1FAC7 |
0x1FAC9..=0x1FACC |
0x1FADD..=0x1FADE |
0x1FAEB..=0x1FAEE |
0x1FAF9..=0x1FAFF |
0x1FC00..=0x1FFFD
)
}
fn is_extended_pictographic(cp: u32) -> bool {
matches!(cp,
0x00A9 | 0x00AE |
0x203C | 0x2049 |
0x2122 | 0x2139 |
0x2194..=0x2199 |
0x21A9..=0x21AA |
0x231A..=0x231B |
0x2328 |
0x23CF |
0x23E9..=0x23F3 |
0x23F8..=0x23FA |
0x24C2 |
0x25AA..=0x25AB |
0x25B6 | 0x25C0 |
0x25FB..=0x25FE |
0x2600..=0x2604 |
0x260E | 0x2611 | 0x2614..=0x2615 |
0x2618 | 0x261D | 0x2620 |
0x2622..=0x2623 | 0x2626 | 0x262A | 0x262E..=0x262F |
0x2638..=0x263A | 0x2640 | 0x2642 |
0x2648..=0x2653 |
0x265F..=0x2660 | 0x2663 | 0x2665..=0x2666 | 0x2668 |
0x267B | 0x267E..=0x267F |
0x2692..=0x2697 | 0x2699 | 0x269B..=0x269C |
0x26A0..=0x26A1 | 0x26A7 |
0x26AA..=0x26AB |
0x26B0..=0x26B1 |
0x26BD..=0x26BE |
0x26C4..=0x26C5 | 0x26C8 |
0x26CE..=0x26CF | 0x26D1 | 0x26D3..=0x26D4 |
0x26E9..=0x26EA |
0x26F0..=0x26F5 | 0x26F7..=0x26FA | 0x26FD |
0x2702 | 0x2705 | 0x2708..=0x270D | 0x270F |
0x2712 | 0x2714 | 0x2716 | 0x271D | 0x2721 |
0x2728 |
0x2733..=0x2734 | 0x2744 | 0x2747 | 0x274C | 0x274E |
0x2753..=0x2755 | 0x2757 |
0x2763..=0x2764 |
0x2795..=0x2797 | 0x27A1 | 0x27B0 | 0x27BF |
0x2934..=0x2935 |
0x2B05..=0x2B07 | 0x2B1B..=0x2B1C | 0x2B50 | 0x2B55 |
0x3030 | 0x303D | 0x3297 | 0x3299 |
0x1F000..=0x1F0FF |
0x1F10D..=0x1F10F |
0x1F12F |
0x1F170..=0x1F171 | 0x1F17E..=0x1F17F |
0x1F18E |
0x1F191..=0x1F19A |
0x1F1AD |
0x1F1E6..=0x1F1FF |
0x1F201..=0x1F202 | 0x1F21A | 0x1F22F |
0x1F232..=0x1F23A | 0x1F250..=0x1F251 |
0x1F300..=0x1F9FF |
0x1FA00..=0x1FA6F |
0x1FA70..=0x1FAFF |
0x1FC00..=0x1FFFD
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty() {
let b = line_break_opportunities("");
assert_eq!(b.len(), 1);
assert_eq!(b[0], BreakAction::Mandatory); }
#[test]
fn test_simple_ascii() {
let text = "Hello World";
let b = line_break_opportunities(text);
assert_eq!(b[6], BreakAction::Allowed);
}
#[test]
fn test_mandatory_break() {
let text = "A\nB";
let b = line_break_opportunities(text);
assert_eq!(b[2], BreakAction::Mandatory);
}
#[test]
fn test_crlf() {
let text = "A\r\nB";
let b = line_break_opportunities(text);
assert_eq!(b[2], BreakAction::Prohibited);
assert_eq!(b[3], BreakAction::Mandatory);
}
}