use unicode_bidi::BidiInfo;
pub fn looks_rtl(text: &str) -> bool {
text.chars()
.any(|c| crate::text::rtl_detector::is_rtl_text(c as u32))
}
pub fn reorder_visual_to_logical(text: &str) -> String {
if !looks_rtl(text) {
return text.to_string();
}
let info = BidiInfo::new(text, None);
if info.paragraphs.is_empty() {
return text.to_string();
}
let mut out = String::with_capacity(text.len());
for para in &info.paragraphs {
let line_range = para.range.clone();
let line = info.reorder_line(para, line_range);
out.push_str(&line);
}
out
}
pub fn paragraph_is_rtl(text: &str) -> bool {
if !looks_rtl(text) {
return false;
}
let info = BidiInfo::new(text, None);
info.paragraphs
.first()
.map(|p| p.level.is_rtl())
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn looks_rtl_pure_ascii_is_false() {
assert!(!looks_rtl("hello world"));
assert!(!looks_rtl(""));
}
#[test]
fn looks_rtl_arabic_is_true() {
assert!(looks_rtl("مرحبا"));
assert!(looks_rtl("year 2024 عام"));
}
#[test]
fn looks_rtl_hebrew_is_true() {
assert!(looks_rtl("שלום"));
}
#[test]
fn reorder_pure_ltr_is_identity() {
let s = "Hello, world!";
assert_eq!(reorder_visual_to_logical(s), s);
}
#[test]
fn reorder_is_a_visual_to_logical_converter_not_idempotent() {
let logical_hebrew = "בנימין";
let after_first = reorder_visual_to_logical(logical_hebrew);
assert_ne!(after_first, logical_hebrew);
let after_second = reorder_visual_to_logical(&after_first);
assert_eq!(after_second, logical_hebrew);
}
#[test]
fn reorder_arabic_with_numerals_keeps_digits_logical() {
let logical = "عام 2024 كان جيدا";
let result = reorder_visual_to_logical(logical);
assert!(result.contains("2024"), "expected `2024` in reordered line, got {:?}", result);
assert_eq!(result.chars().count(), logical.chars().count());
}
#[test]
fn paragraph_is_rtl_for_arabic() {
assert!(paragraph_is_rtl("هذا نص عربي"));
}
#[test]
fn paragraph_is_not_rtl_for_pure_english() {
assert!(!paragraph_is_rtl("This is English"));
}
#[test]
fn looks_rtl_delegates_to_rtl_detector() {
for cp in [
0x058F, 0x0590, 0x05FF, 0x0600, 0x0633, 0x06FF, 0x0700, 0x074F, 0x0750, 0x077F, 0x0780,
0x08A0, 0x08FF, 0x0900, 0xFB4F, 0xFB50, 0xFDFF, 0xFE00, 0xFE70, 0xFEFE, 0xFEFF, 0xFF00,
] {
if let Some(c) = char::from_u32(cp) {
let s = c.to_string();
let bidi_says = looks_rtl(&s);
let detector_says = crate::text::rtl_detector::is_rtl_text(cp);
assert_eq!(
bidi_says, detector_says,
"U+{:04X}: looks_rtl={} but rtl_detector::is_rtl_text={}",
cp, bidi_says, detector_says
);
}
}
}
#[test]
fn paragraph_is_rtl_respects_dominant_direction() {
assert!(!paragraph_is_rtl("Foo بار 1"));
assert!(paragraph_is_rtl("بار Foo 1"));
}
#[test]
fn looks_rtl_covers_all_supported_blocks() {
let cases: &[(u32, &str)] = &[
(0x0590, "Hebrew start"),
(0x05F4, "Hebrew end-ish"),
(0x0600, "Arabic start"),
(0x06FF, "Arabic end"),
(0x0750, "Arabic Supplement start"),
(0x077F, "Arabic Supplement end"),
(0x08A0, "Arabic Extended-A start"),
(0x08FF, "Arabic Extended-A end"),
(0xFB50, "Arabic Presentation Forms-A start"),
(0xFDFF, "Arabic Presentation Forms-A end"),
(0xFE70, "Arabic Presentation Forms-B start"),
(0xFEFF, "Arabic Presentation Forms-B end"),
];
for (cp, name) in cases {
if let Some(c) = char::from_u32(*cp) {
let s = c.to_string();
assert!(looks_rtl(&s), "looks_rtl({:?} {}) should be true", s, name);
}
}
}
#[test]
fn looks_rtl_rejects_neutral_and_cjk() {
for s in [
"中文", "日本語", "α β γ", "1234567890",
"!@#$%^&*()",
"café",
"naïve",
] {
assert!(!looks_rtl(s), "looks_rtl({:?}) should be false", s);
}
}
#[test]
fn reorder_pure_ltr_identity_extras() {
for s in [
"",
"a",
"Hello, world!",
"Multi-line\nstays unchanged",
"Numbers: 1234 5678",
"Symbols: !@#$%^&*",
"Whitespace between words",
] {
assert_eq!(reorder_visual_to_logical(s), s, "identity broken on {:?}", s);
}
}
#[test]
fn reorder_preserves_character_count() {
for s in [
"عربي",
"هذا نص عربي للاختبار",
"year 2024 عام جيد",
"שלום world",
"Mixed: عربي + 123 + Latin",
] {
let out = reorder_visual_to_logical(s);
assert_eq!(
out.chars().count(),
s.chars().count(),
"char count changed: {:?} -> {:?}",
s,
out
);
}
}
#[test]
fn reorder_keeps_embedded_ltr_token_contiguous() {
let line = "هذا منتج Microsoft الجديد";
let result = reorder_visual_to_logical(line);
assert!(
result.contains("Microsoft"),
"embedded LTR token reversed: {:?} -> {:?}",
line,
result
);
}
#[test]
fn paragraph_is_rtl_edges() {
assert!(!paragraph_is_rtl(""));
assert!(!paragraph_is_rtl(" "));
assert!(!paragraph_is_rtl("123 456"));
assert!(paragraph_is_rtl("نص with English"));
}
}