#![forbid(unsafe_code)]
#[macro_use] extern crate matches;
pub mod tables;
pub use tables::{BidiClass, bidi_class, UNICODE_VERSION};
use BidiClass::*;
use std::borrow::Cow;
use std::cmp::{max, min};
use std::iter::repeat;
use std::ops::Range;
#[derive(Debug, PartialEq)]
pub struct ParagraphInfo {
pub classes: Vec<BidiClass>,
pub levels: Vec<u8>,
pub para_level: u8,
pub max_level: u8,
}
pub fn process_paragraph(text: &str, level: Option<u8>) -> ParagraphInfo {
let InitialProperties { para_level, initial_classes } = initial_scan(text, level);
let explicit::Result { mut classes, mut levels } =
explicit::compute(text, para_level, &initial_classes);
let sequences = prepare::isolating_run_sequences(para_level, &initial_classes, &levels);
for sequence in &sequences {
implicit::resolve_weak(sequence, &mut classes);
implicit::resolve_neutral(sequence, &levels, &mut classes);
}
let max_level = implicit::resolve_levels(&classes, &mut levels);
ParagraphInfo {
levels: levels,
classes: initial_classes,
para_level: para_level,
max_level: max_level,
}
}
#[inline]
pub fn is_ltr(level: u8) -> bool { level % 2 == 0 }
pub fn is_rtl(level: u8) -> bool { level % 2 == 1 }
fn class_for_level(level: u8) -> BidiClass {
if is_rtl(level) { R } else { L }
}
pub fn reorder_line<'a>(paragraph: &'a str, line: Range<usize>, info: &ParagraphInfo)
-> Cow<'a, str>
{
let runs = visual_runs(line.clone(), info.para_level, info.max_level, &info.levels);
if runs.len() == 1 && !is_rtl(info.levels[runs[0].start]) {
return paragraph.into()
}
let mut result = String::with_capacity(line.len());
for run in runs {
if is_rtl(info.levels[run.start]) {
result.extend(paragraph[run].chars().rev());
} else {
result.push_str(¶graph[run]);
}
}
result.into()
}
pub type LevelRun = Range<usize>;
pub fn visual_runs(line: Range<usize>,
para_level: u8,
max_level: u8,
levels: &[u8]) -> Vec<LevelRun> {
assert!(line.start <= levels.len());
assert!(line.end <= levels.len());
assert!(max_level >= para_level);
let mut runs = Vec::with_capacity((max_level - para_level) as usize + 1);
if max_level == para_level || line.len() == 0 {
runs.push(line.clone());
return runs
}
let mut start = line.start;
let mut level = levels[start];
let mut min_level = level;
let mut max_level = level;
for i in (start + 1)..line.end {
let new_level = levels[i];
if new_level != level {
runs.push(start..i);
start = i;
level = new_level;
min_level = min(level, min_level);
max_level = max(level, max_level);
}
}
runs.push(start..line.end);
let run_count = runs.len();
min_level |= 1;
while max_level >= min_level {
let mut seq_start = 0;
while seq_start < run_count {
if levels[runs[seq_start].start] < max_level {
seq_start += 1;
}
if seq_start >= run_count {
break }
let mut seq_end = seq_start + 1;
while seq_end < run_count {
if levels[runs[seq_end].start] < max_level {
break
}
seq_end += 1;
}
runs[seq_start..seq_end].reverse();
seq_start = seq_end;
}
max_level -= 1;
}
runs
}
#[derive(PartialEq, Debug)]
pub struct InitialProperties {
pub para_level: u8,
pub initial_classes: Vec<BidiClass>,
}
pub fn initial_scan(paragraph: &str, mut para_level: Option<u8>) -> InitialProperties {
let mut classes = Vec::with_capacity(paragraph.len());
let mut isolate_stack = Vec::new();
const FSI_CHAR: char = '\u{2069}';
for (i, c) in paragraph.char_indices() {
let class = bidi_class(c);
classes.extend(repeat(class).take(c.len_utf8()));
match class {
L | R | AL => match isolate_stack.last() {
Some(&start) => if classes[start] == FSI {
for j in 0..FSI_CHAR.len_utf8() {
classes[start+j] = if class == L { LRI } else { RLI };
}
},
None => if para_level.is_none() {
para_level = Some(if class == L { 0 } else { 1 });
}
},
RLI | LRI | FSI => {
isolate_stack.push(i);
}
PDI => {
isolate_stack.pop();
}
_ => {}
}
}
assert!(classes.len() == paragraph.len());
InitialProperties {
para_level: para_level.unwrap_or(0),
initial_classes: classes,
}
}
mod explicit {
use super::{BidiClass, is_rtl};
use super::BidiClass::*;
pub struct Result {
pub levels: Vec<u8>,
pub classes: Vec<BidiClass>,
}
pub fn compute(text: &str, para_level: u8, classes: &[BidiClass]) -> Result {
assert!(text.len() == classes.len());
let mut result = Result {
levels: vec![para_level; text.len()],
classes: Vec::from(classes),
};
let mut stack = DirectionalStatusStack::new();
stack.push(para_level, OverrideStatus::Neutral);
let mut overflow_isolate_count = 0u32;
let mut overflow_embedding_count = 0u32;
let mut valid_isolate_count = 0u32;
for (i, c) in text.char_indices() {
match classes[i] {
RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
let is_rtl = match classes[i] {
RLE | RLO | RLI => true,
_ => false
};
let last_level = stack.last().level;
let new_level = match is_rtl {
true => next_rtl_level(last_level),
false => next_ltr_level(last_level)
};
let is_isolate = matches!(classes[i], RLI | LRI | FSI);
if is_isolate {
result.levels[i] = last_level;
}
if valid(new_level) && overflow_isolate_count == 0 && overflow_embedding_count == 0 {
stack.push(new_level, match classes[i] {
RLO => OverrideStatus::RTL,
LRO => OverrideStatus::LTR,
RLI | LRI | FSI => OverrideStatus::Isolate,
_ => OverrideStatus::Neutral
});
if is_isolate {
valid_isolate_count += 1;
} else {
result.levels[i] = new_level;
}
} else if is_isolate {
overflow_isolate_count += 1;
} else if overflow_isolate_count == 0 {
overflow_embedding_count += 1;
}
}
PDI => {
if overflow_isolate_count > 0 {
overflow_isolate_count -= 1;
continue
}
if valid_isolate_count == 0 {
continue
}
overflow_embedding_count = 0;
loop {
match stack.vec.pop() {
Some(Status { status: OverrideStatus::Isolate, .. }) => break,
None => break,
_ => continue
}
}
valid_isolate_count -= 1;
result.levels[i] = stack.last().level;
}
PDF => {
if overflow_isolate_count > 0 {
continue
}
if overflow_embedding_count > 0 {
overflow_embedding_count -= 1;
continue
}
if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 {
stack.vec.pop();
}
result.levels[i] = stack.last().level;
}
B | BN => {}
_ => {
let last = stack.last();
result.levels[i] = last.level;
match last.status {
OverrideStatus::RTL => result.classes[i] = R,
OverrideStatus::LTR => result.classes[i] = L,
_ => {}
}
}
}
for j in 1..c.len_utf8() {
result.levels[i+j] = result.levels[i];
result.classes[i+j] = result.classes[i];
}
}
result
}
pub const MAX_DEPTH: u8 = 125;
fn valid(level: u8) -> bool { level <= MAX_DEPTH }
fn next_rtl_level(level: u8) -> u8 { (level + 1) | 1 }
fn next_ltr_level(level: u8) -> u8 { (level + 2) & !1 }
struct Status {
level: u8,
status: OverrideStatus,
}
#[derive(PartialEq)]
enum OverrideStatus { Neutral, RTL, LTR, Isolate }
struct DirectionalStatusStack {
vec: Vec<Status>,
}
impl DirectionalStatusStack {
fn new() -> Self {
DirectionalStatusStack {
vec: Vec::with_capacity(MAX_DEPTH as usize + 2)
}
}
fn push(&mut self, level: u8, status: OverrideStatus) {
self.vec.push(Status { level: level, status: status });
}
fn last(&self) -> &Status {
self.vec.last().unwrap()
}
}
}
mod prepare {
use super::{BidiClass, class_for_level, LevelRun};
use super::BidiClass::*;
use std::cmp::max;
pub struct IsolatingRunSequence {
pub runs: Vec<LevelRun>,
pub sos: BidiClass, pub eos: BidiClass, }
pub fn isolating_run_sequences(para_level: u8, initial_classes: &[BidiClass], levels: &[u8])
-> Vec<IsolatingRunSequence>
{
let runs = level_runs(levels, initial_classes);
let mut sequences = Vec::with_capacity(runs.len());
let mut stack = vec![Vec::new()];
for run in runs {
assert!(run.len() > 0);
assert!(stack.len() > 0);
let start_class = initial_classes[run.start];
let end_class = initial_classes[run.end - 1];
let mut sequence = if start_class == PDI && stack.len() > 1 {
stack.pop().unwrap()
} else {
Vec::new()
};
sequence.push(run);
if matches!(end_class, RLI | LRI | FSI) {
stack.push(sequence);
} else {
sequences.push(sequence);
}
}
sequences.extend(stack.into_iter().rev().filter(|seq| seq.len() > 0));
return sequences.into_iter().map(|sequence| {
assert!(!sequence.len() > 0);
let start = sequence[0].start;
let end = sequence[sequence.len() - 1].end;
let level = levels[start];
let pred_level = match initial_classes[..start].iter().rposition(not_removed_by_x9) {
Some(idx) => levels[idx],
None => para_level
};
let succ_level = if matches!(initial_classes[end - 1], RLI|LRI|FSI) {
para_level
} else {
match initial_classes[end..].iter().position(not_removed_by_x9) {
Some(idx) => levels[idx],
None => para_level
}
};
IsolatingRunSequence {
runs: sequence,
sos: class_for_level(max(level, pred_level)),
eos: class_for_level(max(level, succ_level)),
}
}).collect()
}
fn level_runs(levels: &[u8], classes: &[BidiClass]) -> Vec<LevelRun> {
assert!(levels.len() == classes.len());
let mut runs = Vec::new();
if levels.len() == 0 {
return runs
}
let mut current_run_level = levels[0];
let mut current_run_start = 0;
for i in 1..levels.len() {
if !removed_by_x9(classes[i]) {
if levels[i] != current_run_level {
runs.push(current_run_start..i);
current_run_level = levels[i];
current_run_start = i;
}
}
}
runs.push(current_run_start..levels.len());
runs
}
pub fn removed_by_x9(class: BidiClass) -> bool {
matches!(class, RLE | LRE | RLO | LRO | PDF | BN)
}
fn not_removed_by_x9(class: &BidiClass) -> bool {
!removed_by_x9(*class)
}
#[cfg(test)] #[test]
fn test_level_runs() {
assert_eq!(level_runs(&[0,0,0,1,1,2,0,0], &[L; 8]), &[0..3, 3..5, 5..6, 6..8]);
}
#[cfg(test)] #[test]
fn test_isolating_run_sequences() {
let classes = &[L, RLI, AL, LRI, L, R, L, PDI, AL, PDI, L];
let levels = &[0, 0, 1, 1, 2, 3, 2, 1, 1, 0, 0];
let para_level = 0;
let sequences = isolating_run_sequences(para_level, classes, levels);
let runs: Vec<Vec<LevelRun>> = sequences.iter().map(|s| s.runs.clone()).collect();
assert_eq!(runs, vec![vec![4..5], vec![5..6], vec![6..7], vec![2..4, 7..9], vec![0..2, 9..11]]);
}
}
mod implicit {
use super::{BidiClass, class_for_level, is_rtl};
use super::BidiClass::*;
use super::prepare::IsolatingRunSequence;
use std::cmp::max;
pub fn resolve_weak(sequence: &IsolatingRunSequence, classes: &mut [BidiClass]) {
let mut prev_class = sequence.sos;
let mut last_strong_is_al = false;
let mut last_strong_is_l = false;
let mut et_run_indices = Vec::new();
let mut indices = sequence.runs.iter().flat_map(Clone::clone).peekable();
while let Some(i) = indices.next() {
match classes[i] {
NSM => {
classes[i] = match prev_class {
RLI | LRI | FSI | PDI => ON,
_ => prev_class
};
}
EN => {
if last_strong_is_al {
classes[i] = AN;
} else {
if last_strong_is_l {
classes[i] = L;
}
for j in &et_run_indices {
classes[*j] = classes[i];
}
et_run_indices.clear();
}
}
AL => classes[i] = R,
ES | CS => {
let next_class = indices.peek().map(|j| classes[*j]);
classes[i] = match (prev_class, classes[i], next_class) {
(EN, ES, Some(EN)) |
(EN, CS, Some(EN)) => EN,
(AN, CS, Some(AN)) => AN,
(_, _, _ ) => ON,
}
}
ET => {
match prev_class {
EN => classes[i] = EN,
_ => et_run_indices.push(i) }
}
_ => {}
}
prev_class = classes[i];
match prev_class {
L => { last_strong_is_al = false; last_strong_is_l = true; }
R => { last_strong_is_al = false; last_strong_is_l = false; }
AL => { last_strong_is_al = true; last_strong_is_l = false; }
_ => {}
}
if prev_class != ET {
for j in &et_run_indices {
classes[*j] = ON;
}
et_run_indices.clear();
}
}
}
pub fn resolve_neutral(sequence: &IsolatingRunSequence, levels: &[u8],
classes: &mut [BidiClass])
{
let mut indices = sequence.runs.iter().flat_map(Clone::clone).peekable();
let mut prev_class = sequence.sos;
fn ni(class: BidiClass) -> bool {
matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI)
}
while let Some(i) = indices.next() {
let mut ni_run = Vec::new();
if ni(classes[i]) {
let mut next_class;
loop {
ni_run.push(i);
next_class = match indices.peek() {
Some(&j) => classes[j],
None => sequence.eos
};
if !ni(next_class) {
break
}
indices.next();
}
let new_class = match (prev_class, next_class) {
(L, L ) => L,
(R, R ) |
(R, AN) |
(R, EN) |
(AN, R ) |
(AN, AN) |
(AN, EN) |
(EN, R ) |
(EN, AN) |
(EN, EN) => R,
(_, _ ) => class_for_level(levels[i]),
};
for j in &ni_run {
classes[*j] = new_class;
}
ni_run.clear();
}
prev_class = classes[i];
}
}
pub fn resolve_levels(classes: &[BidiClass], levels: &mut [u8]) -> u8 {
let mut max_level = 0;
assert!(classes.len() == levels.len());
for i in 0..levels.len() {
match (is_rtl(levels[i]), classes[i]) {
(false, R) => levels[i] += 1,
(false, AN) |
(false, EN) => levels[i] += 2,
(true, L) |
(true, EN) |
(true, AN) => levels[i] += 1,
(_, _) => {}
}
max_level = max(max_level, levels[i]);
}
max_level
}
}
#[cfg(test)]
mod test {
use super::BidiClass::*;
#[test]
fn test_initial_scan() {
use super::{InitialProperties, initial_scan};
assert_eq!(initial_scan("a1", None), InitialProperties {
para_level: 0,
initial_classes: vec![L, EN],
});
assert_eq!(initial_scan("غ א", None), InitialProperties {
para_level: 1,
initial_classes: vec![AL, AL, WS, R, R],
});
let fsi = '\u{2068}';
let pdi = '\u{2069}';
let s = format!("{}א{}a", fsi, pdi);
assert_eq!(initial_scan(&s, None), InitialProperties {
para_level: 0,
initial_classes: vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L],
});
}
#[test]
fn test_bidi_class() {
use super::bidi_class;
assert_eq!(bidi_class('c'), L);
assert_eq!(bidi_class('\u{05D1}'), R);
assert_eq!(bidi_class('\u{0627}'), AL);
}
#[test]
fn test_paragraph_info() {
use super::{ParagraphInfo, process_paragraph};
assert_eq!(process_paragraph("abc123", Some(0)), ParagraphInfo {
levels: vec![0, 0, 0, 0, 0, 0],
classes: vec![L, L, L, EN, EN, EN],
para_level: 0,
max_level: 0,
});
assert_eq!(process_paragraph("abc אבג", Some(0)), ParagraphInfo {
levels: vec![0, 0, 0, 0, 1,1, 1,1, 1,1],
classes: vec![L, L, L, WS, R,R, R,R, R,R],
para_level: 0,
max_level: 1,
});
assert_eq!(process_paragraph("abc אבג", Some(1)), ParagraphInfo {
levels: vec![2, 2, 2, 1, 1,1, 1,1, 1,1],
classes: vec![L, L, L, WS, R,R, R,R, R,R],
para_level: 1,
max_level: 2,
});
assert_eq!(process_paragraph("אבג abc", Some(0)), ParagraphInfo {
levels: vec![1,1, 1,1, 1,1, 0, 0, 0, 0],
classes: vec![R,R, R,R, R,R, WS, L, L, L],
para_level: 0,
max_level: 1,
});
assert_eq!(process_paragraph("אבג abc", None), ParagraphInfo {
levels: vec![1,1, 1,1, 1,1, 1, 2, 2, 2],
classes: vec![R,R, R,R, R,R, WS, L, L, L],
para_level: 1,
max_level: 2,
});
assert_eq!(process_paragraph("غ2ظ א2ג", Some(0)), ParagraphInfo {
levels: vec![1, 1, 2, 1, 1, 1, 1,1, 2, 1,1],
classes: vec![AL,AL, EN, AL,AL, WS, R,R, EN, R,R],
para_level: 0,
max_level: 2,
});
}
#[test]
fn test_reorder_line() {
use super::{process_paragraph, reorder_line};
use std::borrow::Cow;
fn reorder(s: &str) -> Cow<str> {
reorder_line(s, 0..s.len(), &process_paragraph(s, None))
}
assert_eq!(reorder("abc123"), "abc123");
assert_eq!(reorder("abc אבג"), "abc גבא");
assert_eq!(reorder("אבג abc"), "abc גבא");
}
}