#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum TextDirection {
#[default]
Ltr,
Rtl,
}
fn is_rtl_char(ch: char) -> bool {
let cp = ch as u32;
(0x0590..=0x08FF).contains(&cp) || (0xFB1D..=0xFDFF).contains(&cp) || (0xFE70..=0xFEFF).contains(&cp) }
pub fn detect_text_direction(text: &str) -> TextDirection {
let bytes = text.as_bytes();
let mut iter = text.char_indices().peekable();
while let Some((byte_pos, ch)) = iter.next() {
if ch == '#' && is_line_start(bytes, byte_pos) {
while iter.peek().is_some_and(|(_, c)| *c == '#') {
let _ = iter.next();
}
while iter.peek().is_some_and(|(_, c)| *c == ' ') {
let _ = iter.next();
}
continue;
}
if ch == '*' || ch == '_' {
continue;
}
if ch == '`' {
while iter.peek().is_some_and(|(_, c)| *c != '`') {
let _ = iter.next();
}
if iter.peek().is_some() {
let _ = iter.next(); }
continue;
}
if ch == '[' {
continue;
}
if ch == ']' {
if iter.peek().is_some_and(|(_, c)| *c == '(') {
let _ = iter.next(); while iter.peek().is_some_and(|(_, c)| *c != ')') {
let _ = iter.next();
}
if iter.peek().is_some() {
let _ = iter.next(); }
continue;
}
continue;
}
if is_line_start(bytes, byte_pos) && is_list_or_quote_char(ch) {
while iter
.peek()
.is_some_and(|(_, c)| is_list_or_quote_char(*c) || c.is_ascii_digit() || *c == '.')
{
let _ = iter.next();
}
while iter.peek().is_some_and(|(_, c)| *c == ' ') {
let _ = iter.next();
}
continue;
}
if ch.is_alphabetic() {
if is_rtl_char(ch) {
return TextDirection::Rtl;
}
return TextDirection::Ltr;
}
}
TextDirection::Ltr
}
fn is_line_start(bytes: &[u8], byte_offset: usize) -> bool {
if byte_offset == 0 {
return true;
}
let mut j = byte_offset;
while j > 0 {
j -= 1;
if bytes[j] == b'\n' {
return true;
}
if bytes[j] != b' ' && bytes[j] != b'\t' {
return false;
}
}
true
}
fn is_list_or_quote_char(ch: char) -> bool {
matches!(ch, '>' | '-' | '+' | '*')
}
#[cfg(test)]
mod tests {
use super::{TextDirection, detect_text_direction};
#[test]
fn english_text() {
assert_eq!(detect_text_direction("Hello world"), TextDirection::Ltr);
}
#[test]
fn hebrew_text() {
assert_eq!(detect_text_direction("שלום עולם"), TextDirection::Rtl);
}
#[test]
fn arabic_text() {
assert_eq!(detect_text_direction("مرحبا بالعالم"), TextDirection::Rtl);
}
#[test]
fn heading_with_hebrew() {
assert_eq!(detect_text_direction("## שלום"), TextDirection::Rtl);
}
#[test]
fn bold_english() {
assert_eq!(detect_text_direction("**hello**"), TextDirection::Ltr);
}
#[test]
fn bold_arabic() {
assert_eq!(detect_text_direction("**مرحبا**"), TextDirection::Rtl);
}
#[test]
fn link_with_hebrew_text() {
assert_eq!(
detect_text_direction("[שלום](https://example.com)"),
TextDirection::Rtl
);
}
#[test]
fn inline_code_then_hebrew() {
assert_eq!(detect_text_direction("`code` שלום"), TextDirection::Rtl);
}
#[test]
fn numbers_then_english() {
assert_eq!(detect_text_direction("123 hello"), TextDirection::Ltr);
}
#[test]
fn numbers_then_arabic() {
assert_eq!(detect_text_direction("123 مرحبا"), TextDirection::Rtl);
}
#[test]
fn empty_string() {
assert_eq!(detect_text_direction(""), TextDirection::Ltr);
}
#[test]
fn only_numbers() {
assert_eq!(detect_text_direction("12345"), TextDirection::Ltr);
}
#[test]
fn list_item_hebrew() {
assert_eq!(detect_text_direction("- שלום"), TextDirection::Rtl);
}
#[test]
fn blockquote_arabic() {
assert_eq!(detect_text_direction("> مرحبا"), TextDirection::Rtl);
}
#[test]
fn mixed_ltr_first() {
assert_eq!(detect_text_direction("Hello שלום"), TextDirection::Ltr);
}
#[test]
fn mixed_rtl_first() {
assert_eq!(detect_text_direction("שלום Hello"), TextDirection::Rtl);
}
#[test]
fn cjk_is_ltr() {
assert_eq!(detect_text_direction("你好世界"), TextDirection::Ltr);
}
#[test]
fn cyrillic_is_ltr() {
assert_eq!(detect_text_direction("Привет мир"), TextDirection::Ltr);
}
}