#![forbid(unsafe_code)]
use unicode_bidi::{BidiInfo, Level};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Direction {
Ltr,
Rtl,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ParagraphDirection {
#[default]
Auto,
Ltr,
Rtl,
}
fn direction_to_level(dir: Option<Direction>) -> Option<Level> {
match dir {
None => None,
Some(Direction::Ltr) => Some(Level::ltr()),
Some(Direction::Rtl) => Some(Level::rtl()),
}
}
fn para_direction_to_level(dir: ParagraphDirection) -> Option<Level> {
match dir {
ParagraphDirection::Auto => None,
ParagraphDirection::Ltr => Some(Level::ltr()),
ParagraphDirection::Rtl => Some(Level::rtl()),
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BidiRun {
pub start: usize,
pub end: usize,
pub level: Level,
pub direction: Direction,
}
impl BidiRun {
pub fn len(&self) -> usize {
self.end - self.start
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
}
#[derive(Debug, Clone)]
pub struct BidiSegment {
pub text: String,
pub chars: Vec<char>,
pub levels: Vec<Level>,
pub runs: Vec<BidiRun>,
pub visual_to_logical: Vec<usize>,
pub logical_to_visual: Vec<usize>,
}
impl BidiSegment {
pub fn new(text: &str, base: Option<Direction>) -> Self {
let chars: Vec<char> = text.chars().collect();
let n = chars.len();
if n == 0 {
return Self {
text: String::new(),
chars: Vec::new(),
levels: Vec::new(),
runs: Vec::new(),
visual_to_logical: Vec::new(),
logical_to_visual: Vec::new(),
};
}
let level_opt = direction_to_level(base);
let bidi_info = BidiInfo::new(text, level_opt);
let char_levels = Self::byte_levels_to_char_levels(text, &bidi_info.levels);
let runs = Self::compute_runs(&char_levels);
let visual_to_logical = Self::compute_visual_order(&char_levels);
let logical_to_visual = Self::invert_permutation(&visual_to_logical);
Self {
text: text.to_string(),
chars,
levels: char_levels,
runs,
visual_to_logical,
logical_to_visual,
}
}
pub fn visual_pos(&self, logical: usize) -> usize {
self.logical_to_visual
.get(logical)
.copied()
.unwrap_or(logical)
}
pub fn logical_pos(&self, visual: usize) -> usize {
self.visual_to_logical
.get(visual)
.copied()
.unwrap_or(visual)
}
pub fn is_rtl(&self, logical: usize) -> bool {
self.levels.get(logical).is_some_and(|level| level.is_rtl())
}
pub fn visual_cursor_pos(&self, logical: usize) -> usize {
let n = self.chars.len();
if n == 0 {
return 0;
}
let base_level = if self.base_direction() == Direction::Ltr {
0
} else {
1
};
let level_left = if logical > 0 {
self.levels[logical - 1].number()
} else {
base_level
};
let level_right = if logical < n {
self.levels[logical].number()
} else {
base_level
};
if level_left <= level_right {
if logical == 0 {
return if base_level % 2 == 0 { 0 } else { n };
}
let prev = logical - 1;
let v = self.logical_to_visual[prev];
if self.levels[prev].is_rtl() {
v
} else {
v + 1
}
} else {
if logical == n {
return if base_level % 2 == 0 { n } else { 0 };
}
let v = self.logical_to_visual[logical];
if self.levels[logical].is_rtl() {
v + 1
} else {
v
}
}
}
pub fn logical_cursor_pos(&self, visual: usize) -> usize {
let n = self.chars.len();
if n == 0 {
return 0;
}
if visual >= n {
let l_l = self.logical_pos(n - 1);
return if self.levels[l_l].number().is_multiple_of(2) {
l_l + 1
} else {
l_l
};
}
if visual == 0 {
let l_r = self.logical_pos(0);
return if self.levels[l_r].number().is_multiple_of(2) {
l_r
} else {
l_r + 1
};
}
let l_l = self.logical_pos(visual - 1);
let l_r = self.logical_pos(visual);
let left_is_ltr = self.levels[l_l].number().is_multiple_of(2);
let right_is_ltr = self.levels[l_r].number().is_multiple_of(2);
let cand_left = if left_is_ltr { l_l + 1 } else { l_l };
let cand_right = if right_is_ltr { l_r } else { l_r + 1 };
if cand_left == cand_right {
return cand_left;
}
let base_is_ltr = self.base_direction() == Direction::Ltr;
if left_is_ltr == base_is_ltr {
cand_left
} else {
cand_right
}
}
pub fn move_right(&self, logical: usize) -> usize {
let visual = self.visual_cursor_pos(logical);
if visual < self.chars.len() {
self.logical_cursor_pos(visual + 1)
} else {
logical
}
}
pub fn move_left(&self, logical: usize) -> usize {
let visual = self.visual_cursor_pos(logical);
if visual > 0 {
self.logical_cursor_pos(visual - 1)
} else {
logical
}
}
pub fn len(&self) -> usize {
self.chars.len()
}
pub fn is_empty(&self) -> bool {
self.chars.is_empty()
}
pub fn base_direction(&self) -> Direction {
let min_level = self.levels.iter().map(|l| l.number()).min().unwrap_or(0);
if min_level % 2 == 1 {
Direction::Rtl
} else {
Direction::Ltr
}
}
pub fn char_at_visual(&self, visual: usize) -> Option<char> {
self.visual_to_logical
.get(visual)
.and_then(|&logical| self.chars.get(logical))
.copied()
}
pub fn visual_string(&self) -> String {
self.visual_to_logical
.iter()
.filter_map(|&logical| self.chars.get(logical))
.collect()
}
fn byte_levels_to_char_levels(text: &str, byte_levels: &[Level]) -> Vec<Level> {
text.char_indices()
.map(|(byte_offset, _)| byte_levels[byte_offset])
.collect()
}
fn compute_runs(char_levels: &[Level]) -> Vec<BidiRun> {
if char_levels.is_empty() {
return Vec::new();
}
let mut runs = Vec::new();
let mut start = 0;
let mut current_level = char_levels[0];
for (i, &level) in char_levels.iter().enumerate().skip(1) {
if level != current_level {
runs.push(BidiRun {
start,
end: i,
level: current_level,
direction: if current_level.is_rtl() {
Direction::Rtl
} else {
Direction::Ltr
},
});
start = i;
current_level = level;
}
}
runs.push(BidiRun {
start,
end: char_levels.len(),
level: current_level,
direction: if current_level.is_rtl() {
Direction::Rtl
} else {
Direction::Ltr
},
});
runs
}
fn compute_visual_order(char_levels: &[Level]) -> Vec<usize> {
let n = char_levels.len();
if n == 0 {
return Vec::new();
}
let mut order: Vec<usize> = (0..n).collect();
let max_level = char_levels.iter().map(|l| l.number()).max().unwrap_or(0);
let min_odd_level = char_levels
.iter()
.map(|l| l.number())
.filter(|&n| n % 2 == 1)
.min()
.unwrap_or(max_level + 1);
for level in (min_odd_level..=max_level).rev() {
let mut i = 0;
while i < n {
if char_levels[order[i]].number() >= level {
let start = i;
while i < n && char_levels[order[i]].number() >= level {
i += 1;
}
order[start..i].reverse();
} else {
i += 1;
}
}
}
order
}
fn invert_permutation(perm: &[usize]) -> Vec<usize> {
let mut inverse = vec![0; perm.len()];
for (visual, &logical) in perm.iter().enumerate() {
inverse[logical] = visual;
}
inverse
}
}
pub fn reorder(text: &str, direction: ParagraphDirection) -> String {
if text.is_empty() {
return String::new();
}
let level = para_direction_to_level(direction);
let bidi_info = BidiInfo::new(text, level);
let mut result = String::with_capacity(text.len());
for para in &bidi_info.paragraphs {
let line = para.range.clone();
let reordered = bidi_info.reorder_line(para, line);
result.push_str(&reordered);
}
result
}
pub fn resolve_levels(text: &str, direction: ParagraphDirection) -> Vec<Level> {
if text.is_empty() {
return Vec::new();
}
let level = para_direction_to_level(direction);
let bidi_info = BidiInfo::new(text, level);
bidi_info.levels.clone()
}
pub fn has_rtl(text: &str) -> bool {
text.chars().any(is_rtl_char)
}
use unicode_bidi::BidiClass;
fn is_rtl_char(c: char) -> bool {
matches!(
unicode_bidi::bidi_class(c),
BidiClass::R | BidiClass::AL | BidiClass::RLE | BidiClass::RLO | BidiClass::RLI
)
}
pub fn paragraph_level(text: &str) -> ParagraphDirection {
if text.is_empty() {
return ParagraphDirection::Ltr;
}
let bidi_info = BidiInfo::new(text, None);
if let Some(para) = bidi_info.paragraphs.first() {
if para.level.is_rtl() {
ParagraphDirection::Rtl
} else {
ParagraphDirection::Ltr
}
} else {
ParagraphDirection::Ltr
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn reorder_empty() {
assert_eq!(reorder("", ParagraphDirection::Auto), "");
}
#[test]
fn reorder_pure_ltr() {
let text = "Hello, world!";
assert_eq!(reorder(text, ParagraphDirection::Auto), text);
}
#[test]
fn reorder_pure_rtl_hebrew() {
let text = "\u{05E9}\u{05DC}\u{05D5}\u{05DD}";
let result = reorder(text, ParagraphDirection::Auto);
assert_eq!(result, "\u{05DD}\u{05D5}\u{05DC}\u{05E9}");
}
#[test]
fn reorder_pure_rtl_arabic() {
let text = "\u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
let result = reorder(text, ParagraphDirection::Auto);
assert_eq!(result, "\u{0627}\u{0628}\u{062D}\u{0631}\u{0645}");
}
#[test]
fn reorder_mixed_ltr_rtl() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
let result = reorder(text, ParagraphDirection::Ltr);
assert_eq!(result, "Hello \u{05DD}\u{05D5}\u{05DC}\u{05E9} World");
}
#[test]
fn reorder_forced_ltr() {
let text = "Hello";
assert_eq!(reorder(text, ParagraphDirection::Ltr), "Hello");
}
#[test]
fn reorder_forced_rtl_on_ltr_text() {
let text = "ABC";
let result = reorder(text, ParagraphDirection::Rtl);
assert_eq!(result, "ABC");
}
#[test]
fn reorder_with_numbers() {
let text = "\u{05E9}\u{05DC}\u{05D5}\u{05DD} 123";
let result = reorder(text, ParagraphDirection::Auto);
assert!(result.contains("123"));
}
#[test]
fn reorder_with_lrm_mark() {
let text = "A\u{200E}B";
let result = reorder(text, ParagraphDirection::Auto);
assert!(result.contains('A'));
assert!(result.contains('B'));
}
#[test]
fn reorder_with_rlm_mark() {
let text = "A\u{200F}B";
let result = reorder(text, ParagraphDirection::Auto);
assert!(result.contains('A'));
assert!(result.contains('B'));
}
#[test]
fn has_rtl_empty() {
assert!(!has_rtl(""));
}
#[test]
fn has_rtl_pure_ltr() {
assert!(!has_rtl("Hello, world!"));
}
#[test]
fn has_rtl_hebrew() {
assert!(has_rtl("\u{05E9}\u{05DC}\u{05D5}\u{05DD}"));
}
#[test]
fn has_rtl_arabic() {
assert!(has_rtl("\u{0645}\u{0631}\u{062D}\u{0628}\u{0627}"));
}
#[test]
fn has_rtl_mixed() {
assert!(has_rtl("Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD}"));
}
#[test]
fn has_rtl_with_rlm() {
assert!(has_rtl("A\u{200F}B"));
}
#[test]
fn has_rtl_numbers_only() {
assert!(!has_rtl("12345"));
}
#[test]
fn resolve_levels_empty() {
assert!(resolve_levels("", ParagraphDirection::Auto).is_empty());
}
#[test]
fn resolve_levels_pure_ltr() {
let levels = resolve_levels("ABC", ParagraphDirection::Auto);
assert!(!levels.is_empty());
for level in &levels {
assert!(level.is_ltr(), "Expected LTR level, got {:?}", level);
}
}
#[test]
fn resolve_levels_pure_rtl() {
let levels = resolve_levels("\u{05E9}\u{05DC}\u{05D5}\u{05DD}", ParagraphDirection::Auto);
assert!(!levels.is_empty());
for level in &levels {
assert!(level.is_rtl(), "Expected RTL level, got {:?}", level);
}
}
#[test]
fn paragraph_level_empty() {
assert_eq!(paragraph_level(""), ParagraphDirection::Ltr);
}
#[test]
fn paragraph_level_ltr() {
assert_eq!(paragraph_level("Hello"), ParagraphDirection::Ltr);
}
#[test]
fn paragraph_level_rtl() {
assert_eq!(
paragraph_level("\u{05E9}\u{05DC}\u{05D5}\u{05DD}"),
ParagraphDirection::Rtl
);
}
#[test]
fn paragraph_level_mixed_starts_ltr() {
assert_eq!(
paragraph_level("Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD}"),
ParagraphDirection::Ltr
);
}
#[test]
fn paragraph_level_mixed_starts_rtl() {
assert_eq!(
paragraph_level("\u{05E9}\u{05DC}\u{05D5}\u{05DD} Hello"),
ParagraphDirection::Rtl
);
}
#[test]
fn is_rtl_char_covers_ranges() {
assert!(is_rtl_char('\u{05D0}')); assert!(is_rtl_char('\u{0627}')); assert!(is_rtl_char('\u{200F}')); assert!(!is_rtl_char('A'));
assert!(!is_rtl_char('1'));
assert!(!is_rtl_char(' '));
}
#[test]
fn is_rtl_char_additional_ranges() {
let samples = [
'\u{FB1D}', '\u{FB50}', '\u{FE70}', '\u{10800}', '\u{10840}', '\u{10900}', '\u{10920}', '\u{10A00}', '\u{10B00}', '\u{1EE00}', ];
for sample in samples {
assert!(is_rtl_char(sample), "Expected RTL for {sample:?}");
}
}
#[test]
fn run_is_empty() {
let empty = BidiRun {
start: 2,
end: 2,
level: Level::ltr(),
direction: Direction::Ltr,
};
assert!(empty.is_empty());
let non_empty = BidiRun {
start: 2,
end: 3,
level: Level::ltr(),
direction: Direction::Ltr,
};
assert!(!non_empty.is_empty());
}
#[test]
fn segment_base_direction() {
let ltr = BidiSegment::new("Hello", None);
assert_eq!(ltr.base_direction(), Direction::Ltr);
let rtl_text = "\u{05E9}\u{05DC}\u{05D5}\u{05DD}";
let rtl = BidiSegment::new(rtl_text, None);
assert_eq!(rtl.base_direction(), Direction::Rtl);
}
#[test]
fn segment_compute_helpers_empty() {
let runs = BidiSegment::compute_runs(&[]);
assert!(runs.is_empty());
let order = BidiSegment::compute_visual_order(&[]);
assert!(order.is_empty());
}
#[test]
fn segment_empty() {
let seg = BidiSegment::new("", None);
assert!(seg.is_empty());
assert_eq!(seg.len(), 0);
assert!(seg.runs.is_empty());
assert!(seg.visual_to_logical.is_empty());
assert!(seg.logical_to_visual.is_empty());
}
#[test]
fn segment_ltr_only() {
let seg = BidiSegment::new("Hello", None);
assert_eq!(seg.len(), 5);
assert_eq!(seg.chars, vec!['H', 'e', 'l', 'l', 'o']);
for i in 0..5 {
assert_eq!(seg.visual_pos(i), i);
assert_eq!(seg.logical_pos(i), i);
assert!(!seg.is_rtl(i));
}
assert_eq!(seg.runs.len(), 1);
assert_eq!(seg.runs[0].direction, Direction::Ltr);
assert_eq!(seg.runs[0].start, 0);
assert_eq!(seg.runs[0].end, 5);
assert_eq!(seg.visual_string(), "Hello");
}
#[test]
fn segment_rtl_only() {
let text = "\u{05E9}\u{05DC}\u{05D5}\u{05DD}";
let seg = BidiSegment::new(text, None);
assert_eq!(seg.len(), 4);
assert_eq!(seg.visual_to_logical, vec![3, 2, 1, 0]);
assert_eq!(seg.logical_to_visual, vec![3, 2, 1, 0]);
for i in 0..4 {
assert!(seg.is_rtl(i));
}
assert_eq!(seg.runs.len(), 1);
assert_eq!(seg.runs[0].direction, Direction::Rtl);
assert_eq!(seg.visual_string(), "\u{05DD}\u{05D5}\u{05DC}\u{05E9}");
}
#[test]
fn segment_mixed_ltr_rtl() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
let seg = BidiSegment::new(text, Some(Direction::Ltr));
assert_eq!(seg.len(), 16);
assert_eq!(seg.visual_pos(0), 0); assert_eq!(seg.visual_pos(5), 5); assert_eq!(seg.visual_pos(6), 9); assert_eq!(seg.visual_pos(9), 6); assert_eq!(seg.visual_pos(11), 11);
assert!(!seg.is_rtl(0)); assert!(seg.is_rtl(6)); assert!(seg.is_rtl(9)); assert!(!seg.is_rtl(11));
assert!(seg.runs.len() >= 2);
}
#[test]
fn segment_numbers_in_rtl() {
let text = "\u{05E9}\u{05DC}\u{05D5}\u{05DD} 123";
let seg = BidiSegment::new(text, None);
let visual = seg.visual_string();
assert!(
visual.contains("123"),
"Numbers should stay in LTR order: {visual}"
);
let num_start = text.chars().position(|c| c == '1').unwrap();
assert!(
!seg.is_rtl(num_start),
"Digit '1' should resolve to LTR level"
);
}
#[test]
fn segment_brackets_pairing() {
let text = "\u{05D0}(\u{05D1})\u{05D2}"; let seg = BidiSegment::new(text, Some(Direction::Rtl));
let visual = seg.visual_string();
assert_eq!(visual.chars().count(), text.chars().count());
let mut sorted_vtl = seg.visual_to_logical.clone();
sorted_vtl.sort();
let expected: Vec<usize> = (0..seg.len()).collect();
assert_eq!(
sorted_vtl, expected,
"visual_to_logical must be a valid permutation"
);
}
#[test]
fn segment_explicit_markers() {
let text = "A\u{200E}B\u{200F}C";
let seg = BidiSegment::new(text, None);
assert!(!seg.is_empty());
let mut sorted_vtl = seg.visual_to_logical.clone();
sorted_vtl.sort();
let expected: Vec<usize> = (0..seg.len()).collect();
assert_eq!(sorted_vtl, expected);
}
#[test]
fn segment_cursor_movement() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD}";
let seg = BidiSegment::new(text, Some(Direction::Ltr));
let mut pos = 0;
for _ in 0..6 {
pos = seg.move_right(pos);
}
assert_eq!(seg.visual_cursor_pos(pos), 6);
pos = seg.move_left(pos);
assert_eq!(seg.visual_cursor_pos(pos), 5);
for _ in 0..5 {
pos = seg.move_left(pos);
}
assert_eq!(seg.visual_cursor_pos(pos), 0);
let same = seg.move_left(pos);
assert_eq!(seg.visual_cursor_pos(same), 0);
}
#[test]
fn segment_cursor_at_boundary() {
let seg = BidiSegment::new("ABC", None);
let last = seg.move_right(seg.move_right(seg.move_right(0)));
assert_eq!(seg.visual_cursor_pos(last), 3);
let still_last = seg.move_right(last);
assert_eq!(still_last, last);
}
#[test]
fn segment_double_toggle() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
let seg = BidiSegment::new(text, Some(Direction::Ltr));
for start in 0..seg.len() {
let right = seg.move_right(start);
if right != start {
let back = seg.move_left(right);
assert_eq!(
back, start,
"move_left(move_right({start})) should return {start}, got {back}"
);
}
}
}
#[test]
fn segment_visual_string_matches_reorder() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
let seg = BidiSegment::new(text, Some(Direction::Ltr));
let reordered = reorder(text, ParagraphDirection::Ltr);
assert_eq!(seg.visual_string(), reordered);
}
#[test]
fn segment_run_coverage() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
let seg = BidiSegment::new(text, Some(Direction::Ltr));
let total_chars: usize = seg.runs.iter().map(|r| r.len()).sum();
assert_eq!(total_chars, seg.len());
for window in seg.runs.windows(2) {
assert_eq!(window[0].end, window[1].start);
}
if let Some(first) = seg.runs.first() {
assert_eq!(first.start, 0);
}
if let Some(last) = seg.runs.last() {
assert_eq!(last.end, seg.len());
}
}
#[test]
fn segment_permutation_validity() {
let texts = [
"Hello",
"\u{05E9}\u{05DC}\u{05D5}\u{05DD}",
"Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World",
"ABC 123 \u{0645}\u{0631}\u{062D}\u{0628}\u{0627}",
"",
];
for text in texts {
let seg = BidiSegment::new(text, None);
let n = seg.len();
assert_eq!(seg.visual_to_logical.len(), n);
assert_eq!(seg.logical_to_visual.len(), n);
for i in 0..n {
assert_eq!(
seg.logical_to_visual[seg.visual_to_logical[i]], i,
"vtl->ltv roundtrip failed for text={text:?} at visual={i}"
);
assert_eq!(
seg.visual_to_logical[seg.logical_to_visual[i]], i,
"ltv->vtl roundtrip failed for text={text:?} at logical={i}"
);
}
}
}
#[test]
fn segment_char_at_visual() {
let seg = BidiSegment::new("ABC", None);
assert_eq!(seg.char_at_visual(0), Some('A'));
assert_eq!(seg.char_at_visual(1), Some('B'));
assert_eq!(seg.char_at_visual(2), Some('C'));
assert_eq!(seg.char_at_visual(3), None);
}
#[test]
fn cursor_movement_rtl_insertion_point() {
let text = "\u{05D3}\u{05D4}\u{05D5}"; let seg = BidiSegment::new(text, None);
assert_eq!(seg.base_direction(), Direction::Rtl);
let start_pos = 3;
let right = seg.move_right(start_pos);
assert_eq!(
right, 2,
"move_right from RTL end should go to penultimate logical char (visual index 1)"
);
let left = seg.move_left(start_pos);
assert_eq!(left, 3, "move_left from RTL end should stay at end");
}
}