use super::generated::bidi as gen;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(clippy::upper_case_acronyms)]
pub enum BidiClass {
L,
R,
AL,
EN,
ES,
ET,
AN,
CS,
NSM,
BN,
B,
S,
WS,
ON,
LRE,
LRO,
RLE,
RLO,
PDF,
LRI,
RLI,
FSI,
PDI,
}
impl BidiClass {
#[inline]
#[must_use]
pub const fn is_rtl(self) -> bool {
matches!(self, BidiClass::R | BidiClass::AL)
}
}
#[inline]
#[must_use]
pub const fn bidi_class(c: char) -> BidiClass {
gen::bidi_class(c as u32)
}
#[inline]
#[must_use]
pub const fn bidi_class_u32(cp: u32) -> BidiClass {
gen::bidi_class(cp)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Direction {
LeftToRight,
RightToLeft,
}
#[must_use]
pub fn base_direction(s: &str) -> Direction {
let mut isolate_depth = 0u32;
for c in s.chars() {
match bidi_class(c) {
BidiClass::LRI | BidiClass::RLI | BidiClass::FSI => isolate_depth += 1,
BidiClass::PDI => isolate_depth = isolate_depth.saturating_sub(1),
BidiClass::L if isolate_depth == 0 => return Direction::LeftToRight,
BidiClass::R | BidiClass::AL if isolate_depth == 0 => return Direction::RightToLeft,
_ => {}
}
}
Direction::LeftToRight
}
#[must_use]
pub fn is_rtl(s: &str) -> bool {
base_direction(s) == Direction::RightToLeft
}
#[cfg(feature = "alloc")]
pub use resolve::{process, BidiInfo};
#[cfg(feature = "alloc")]
mod resolve {
use super::bidi_class;
use super::BidiClass::{self, *};
use super::Direction;
use crate::unicode::generated::bidi::bidi_bracket;
use alloc::vec;
use alloc::vec::Vec;
const MAX_DEPTH: u8 = 125;
#[derive(Debug, Clone)]
pub struct BidiInfo {
pub paragraph_level: u8,
pub levels: Vec<Option<u8>>,
pub visual_order: Vec<usize>,
}
fn next_odd(level: u8) -> u8 {
if level % 2 == 0 {
level + 1
} else {
level + 2
}
}
fn next_even(level: u8) -> u8 {
if level % 2 == 0 {
level + 2
} else {
level + 1
}
}
fn is_isolate_init(c: BidiClass) -> bool {
matches!(c, LRI | RLI | FSI)
}
fn is_ni(c: BidiClass) -> bool {
matches!(c, B | S | WS | ON | FSI | LRI | RLI | PDI)
}
fn neutral_dir(c: BidiClass) -> Option<BidiClass> {
match c {
L => Some(L),
R | EN | AN => Some(R),
_ => None,
}
}
fn canon_bracket(cp: u32) -> u32 {
match cp {
0x2329 => 0x3008,
0x232A => 0x3009,
other => other,
}
}
fn first_strong(classes: &[BidiClass], start: usize, end: usize) -> u8 {
let mut depth = 0u32;
for &c in &classes[start..end.min(classes.len())] {
match c {
_ if is_isolate_init(c) => depth += 1,
PDI => depth = depth.saturating_sub(1),
L if depth == 0 => return 0,
R | AL if depth == 0 => return 1,
_ => {}
}
}
0
}
fn match_isolates(classes: &[BidiClass]) -> Vec<usize> {
let n = classes.len();
let mut match_pdi = vec![n; n];
let mut stack: Vec<usize> = Vec::new();
for (i, &c) in classes.iter().enumerate() {
if is_isolate_init(c) {
stack.push(i);
} else if c == PDI {
if let Some(o) = stack.pop() {
match_pdi[o] = i;
}
}
}
match_pdi
}
#[must_use]
pub fn process(text: &str, base: Option<Direction>) -> BidiInfo {
let chars: Vec<char> = text.chars().collect();
let raw: Vec<BidiClass> = chars.iter().map(|&c| bidi_class(c)).collect();
let n = chars.len();
let para_level = match base {
Some(Direction::LeftToRight) => 0,
Some(Direction::RightToLeft) => 1,
None => first_strong(&raw, 0, n),
};
let match_pdi = match_isolates(&raw);
let mut is_matched_pdi = vec![false; n];
for &p in &match_pdi {
if p < n {
is_matched_pdi[p] = true;
}
}
let mut classes = raw.clone();
let mut levels = vec![para_level; n];
let mut removed = vec![false; n];
struct Entry {
level: u8,
ov: Option<BidiClass>,
iso: bool,
}
let mut stack = vec![Entry {
level: para_level,
ov: None,
iso: false,
}];
let (mut oic, mut oec, mut vic) = (0u32, 0u32, 0u32);
for i in 0..n {
let c = raw[i];
match c {
RLE | LRE | RLO | LRO => {
levels[i] = stack.last().unwrap().level;
removed[i] = true;
let new = if matches!(c, RLE | RLO) {
next_odd(stack.last().unwrap().level)
} else {
next_even(stack.last().unwrap().level)
};
if new <= MAX_DEPTH && oic == 0 && oec == 0 {
let ov = match c {
RLO => Some(R),
LRO => Some(L),
_ => None,
};
stack.push(Entry {
level: new,
ov,
iso: false,
});
} else if oic == 0 {
oec += 1;
}
}
RLI | LRI | FSI => {
levels[i] = stack.last().unwrap().level;
let rtl = match c {
RLI => true,
LRI => false,
_ => first_strong(&raw, i + 1, match_pdi[i]) == 1,
};
let new = if rtl {
next_odd(stack.last().unwrap().level)
} else {
next_even(stack.last().unwrap().level)
};
if new <= MAX_DEPTH && oic == 0 && oec == 0 {
vic += 1;
stack.push(Entry {
level: new,
ov: None,
iso: true,
});
} else {
oic += 1;
}
}
PDI => {
if oic > 0 {
oic -= 1;
} else if vic != 0 {
oec = 0;
while !stack.last().unwrap().iso {
stack.pop();
}
stack.pop();
vic -= 1;
}
levels[i] = stack.last().unwrap().level;
}
PDF => {
if oic > 0 {
} else if oec > 0 {
oec -= 1;
} else if !stack.last().unwrap().iso && stack.len() >= 2 {
stack.pop();
}
levels[i] = stack.last().unwrap().level;
removed[i] = true;
}
B => {
levels[i] = para_level;
}
BN => {
levels[i] = stack.last().unwrap().level;
removed[i] = true;
}
_ => {
let top = stack.last().unwrap();
levels[i] = top.level;
if let Some(ov) = top.ov {
classes[i] = ov;
}
}
}
}
let reduced: Vec<usize> = (0..n).filter(|&i| !removed[i]).collect();
let mut runs: Vec<Vec<usize>> = Vec::new();
for &i in &reduced {
match runs.last() {
Some(r) if levels[*r.last().unwrap()] == levels[i] => {
runs.last_mut().unwrap().push(i);
}
_ => runs.push(vec![i]),
}
}
let mut run_of_start = vec![usize::MAX; n];
for (ri, r) in runs.iter().enumerate() {
run_of_start[r[0]] = ri;
}
let orig = classes.clone(); let mut used = vec![false; runs.len()];
let mut sequences: Vec<Vec<usize>> = Vec::new();
for r in 0..runs.len() {
if used[r] {
continue;
}
let first = runs[r][0];
if classes[first] == PDI && is_matched_pdi[first] {
continue;
}
let mut seq = Vec::new();
let mut cur = r;
loop {
used[cur] = true;
seq.extend_from_slice(&runs[cur]);
let last = *runs[cur].last().unwrap();
if is_isolate_init(classes[last]) && match_pdi[last] < n {
let nr = run_of_start[match_pdi[last]];
if nr != usize::MAX {
cur = nr;
continue;
}
}
break;
}
sequences.push(seq);
}
let elevels = levels.clone();
for seq in &sequences {
resolve_sequence(
seq,
&chars,
&orig,
&mut classes,
&mut levels,
&elevels,
para_level,
n,
&removed,
);
}
let mut reset_from = n;
for i in 0..n {
if removed[i] {
continue;
}
match raw[i] {
S | B => {
levels[i] = para_level;
for j in reset_from..i {
if !removed[j] {
levels[j] = para_level;
}
}
reset_from = n;
}
WS | FSI | LRI | RLI | PDI => {
if reset_from == n {
reset_from = i;
}
}
_ => reset_from = n,
}
}
for j in reset_from..n {
if !removed[j] {
levels[j] = para_level;
}
}
let visible: Vec<usize> = (0..n).filter(|&i| !removed[i]).collect();
let lv: Vec<u8> = visible.iter().map(|&i| levels[i]).collect();
let mut order: Vec<usize> = (0..visible.len()).collect();
let max_level = lv.iter().copied().max().unwrap_or(0);
let min_odd = lv
.iter()
.copied()
.filter(|l| l % 2 == 1)
.min()
.unwrap_or(max_level + 1);
let mut level = max_level;
while level >= min_odd {
let mut i = 0;
while i < order.len() {
if lv[order[i]] >= level {
let start = i;
while i < order.len() && lv[order[i]] >= level {
i += 1;
}
order[start..i].reverse();
} else {
i += 1;
}
}
if level == 0 {
break;
}
level -= 1;
}
let visual_order: Vec<usize> = order.iter().map(|&p| visible[p]).collect();
let out_levels: Vec<Option<u8>> = (0..n)
.map(|i| if removed[i] { None } else { Some(levels[i]) })
.collect();
BidiInfo {
paragraph_level: para_level,
levels: out_levels,
visual_order,
}
}
#[allow(clippy::too_many_arguments)]
fn resolve_sequence(
seq: &[usize],
chars: &[char],
orig: &[BidiClass],
classes: &mut [BidiClass],
levels: &mut [u8],
elevels: &[u8],
para_level: u8,
n: usize,
removed: &[bool],
) {
let len = seq.len();
if len == 0 {
return;
}
let seq_level = elevels[seq[0]];
let e = if seq_level % 2 == 0 { L } else { R };
let first = seq[0];
let prev_level = (0..first)
.rev()
.find(|&j| !removed[j])
.map_or(para_level, |j| elevels[j]);
let sos = if seq_level.max(prev_level) % 2 == 1 {
R
} else {
L
};
let last = seq[len - 1];
let next_level = if is_isolate_init(classes[last]) {
para_level } else {
(last + 1..n)
.find(|&j| !removed[j])
.map_or(para_level, |j| elevels[j])
};
let eos = if seq_level.max(next_level) % 2 == 1 {
R
} else {
L
};
let mut cls: Vec<BidiClass> = seq.iter().map(|&i| classes[i]).collect();
let mut prev = sos;
for c in cls.iter_mut() {
if *c == NSM {
*c = if matches!(prev, LRI | RLI | FSI | PDI) {
ON
} else {
prev
};
}
prev = *c;
}
let mut strong = sos;
for c in cls.iter_mut() {
match *c {
R | L | AL => strong = *c,
EN if strong == AL => *c = AN,
_ => {}
}
}
for c in cls.iter_mut() {
if *c == AL {
*c = R;
}
}
for k in 1..len.saturating_sub(1) {
if cls[k] == ES && cls[k - 1] == EN && cls[k + 1] == EN {
cls[k] = EN;
} else if cls[k] == CS {
if cls[k - 1] == EN && cls[k + 1] == EN {
cls[k] = EN;
} else if cls[k - 1] == AN && cls[k + 1] == AN {
cls[k] = AN;
}
}
}
let mut k = 0;
while k < len {
if cls[k] == ET {
let start = k;
while k < len && cls[k] == ET {
k += 1;
}
let before = start > 0 && cls[start - 1] == EN;
let after = k < len && cls[k] == EN;
if before || after {
for c in cls.iter_mut().take(k).skip(start) {
*c = EN;
}
}
} else {
k += 1;
}
}
for c in cls.iter_mut() {
if matches!(*c, ES | ET | CS) {
*c = ON;
}
}
let mut strong = sos;
for c in cls.iter_mut() {
match *c {
R | L => strong = *c,
EN if strong == L => *c = L,
_ => {}
}
}
resolve_brackets(seq, chars, orig, &mut cls, e, sos);
let mut k = 0;
while k < len {
if is_ni(cls[k]) {
let start = k;
while k < len && is_ni(cls[k]) {
k += 1;
}
let before = if start == 0 {
sos
} else {
neutral_dir(cls[start - 1]).unwrap_or(sos)
};
let after = if k == len {
eos
} else {
neutral_dir(cls[k]).unwrap_or(eos)
};
let set = if before == after { before } else { e };
for c in cls.iter_mut().take(k).skip(start) {
*c = set;
}
} else {
k += 1;
}
}
for (k, &i) in seq.iter().enumerate() {
let add = if seq_level % 2 == 0 {
match cls[k] {
R => 1,
AN | EN => 2,
_ => 0,
}
} else {
match cls[k] {
L | EN | AN => 1,
_ => 0,
}
};
levels[i] = seq_level + add;
classes[i] = cls[k];
}
}
fn resolve_brackets(
seq: &[usize],
chars: &[char],
orig: &[BidiClass],
cls: &mut [BidiClass],
e: BidiClass,
sos: BidiClass,
) {
let len = seq.len();
let mut stack: Vec<(u32, usize)> = Vec::new();
let mut pairs: Vec<(usize, usize)> = Vec::new();
for k in 0..len {
if cls[k] != ON {
continue;
}
let (paired, ty) = bidi_bracket(chars[seq[k]] as u32);
if ty == 1 {
if stack.len() == 63 {
break;
}
stack.push((canon_bracket(paired), k));
} else if ty == 2 {
let cc = canon_bracket(chars[seq[k]] as u32);
if let Some(si) = (0..stack.len()).rev().find(|&si| stack[si].0 == cc) {
pairs.push((stack[si].1, k));
stack.truncate(si);
}
}
}
pairs.sort_unstable_by_key(|p| p.0);
let o = if e == L { R } else { L };
for (open_k, close_k) in pairs {
let mut found_e = false;
let mut found_o = false;
for c in cls.iter().take(close_k).skip(open_k + 1) {
if let Some(d) = neutral_dir(*c) {
if d == e {
found_e = true;
break;
}
found_o = true;
}
}
let set = if found_e {
Some(e)
} else if found_o {
let before = (0..open_k)
.rev()
.find_map(|m| neutral_dir(cls[m]))
.unwrap_or(sos);
Some(if before == o { o } else { e })
} else {
None
};
if let Some(dir) = set {
cls[open_k] = dir;
cls[close_k] = dir;
for &bk in &[open_k, close_k] {
let mut m = bk + 1;
while m < len && orig[seq[m]] == NSM {
cls[m] = dir;
m += 1;
}
}
}
}
}
}
#[cfg(all(test, feature = "alloc", feature = "bmp"))]
mod tests {
use super::process;
#[test]
fn matched_pdi_lookup_assembles_isolate_runs() {
let mut s = alloc::string::String::new();
for _ in 0..200 {
s.push('\u{2066}'); s.push('\u{05D0}'); s.push('\u{2069}'); }
let info = process(&s, None);
let n = s.chars().count();
assert_eq!(info.paragraph_level, 0);
let mut seen = alloc::vec![false; n];
for &i in &info.visual_order {
assert!(i < n);
assert!(!seen[i], "duplicate index in visual order");
seen[i] = true;
}
let chars: alloc::vec::Vec<char> = s.chars().collect();
for (i, &c) in chars.iter().enumerate() {
if c == '\u{05D0}' {
assert_eq!(info.levels[i].map(|l| l % 2), Some(1));
}
}
}
}