Skip to main content

forme/text/
bidi.rs

1//! # BiDi Text Support
2//!
3//! Implements UAX#9 (Unicode Bidirectional Algorithm) for mixed LTR/RTL text.
4//! Uses `unicode-bidi` for analysis and `unicode-script` for script detection.
5//!
6//! The pipeline:
7//! 1. Analyze paragraph direction → split into directional runs
8//! 2. Each run is shaped independently with the correct direction
9//! 3. After line breaking, runs on each line are visually reordered
10//! 4. Glyphs within RTL runs are reversed
11
12use crate::layout::PositionedGlyph;
13use crate::style::Direction;
14use unicode_bidi::{BidiInfo, Level};
15
16/// A contiguous run of text with a single BiDi direction.
17#[derive(Debug, Clone)]
18pub struct BidiRun {
19    /// Start index in chars of the original text.
20    pub char_start: usize,
21    /// End index (exclusive) in chars.
22    pub char_end: usize,
23    /// BiDi embedding level (even = LTR, odd = RTL).
24    pub level: Level,
25    /// Convenience: true if this run is right-to-left.
26    pub is_rtl: bool,
27}
28
29/// Analyze text for BiDi runs using the Unicode Bidirectional Algorithm.
30///
31/// Returns a list of `BidiRun` covering the entire text. For pure LTR text,
32/// returns a single run. `direction` controls the paragraph-level direction:
33/// - `Ltr` → paragraph is LTR
34/// - `Rtl` → paragraph is RTL
35/// - `Auto` → detect from first strong character
36pub fn analyze_bidi(text: &str, direction: Direction) -> Vec<BidiRun> {
37    if text.is_empty() {
38        return vec![];
39    }
40
41    let para_level = match direction {
42        Direction::Ltr => Some(Level::ltr()),
43        Direction::Rtl => Some(Level::rtl()),
44        Direction::Auto => None, // BidiInfo will auto-detect
45    };
46
47    let bidi_info = BidiInfo::new(text, para_level);
48
49    // BidiInfo may contain multiple paragraphs (split by \n), but for our
50    // text layout each paragraph is already a separate text node. We only
51    // process the first paragraph.
52    if bidi_info.paragraphs.is_empty() {
53        return vec![];
54    }
55
56    let paragraph = &bidi_info.paragraphs[0];
57    let levels = &bidi_info.levels;
58
59    // Build runs from contiguous same-level chars
60    let chars: Vec<char> = text.chars().collect();
61    let mut runs = Vec::new();
62    let mut run_start = 0;
63
64    // The levels array is indexed by byte position in the paragraph range
65    let para_start = paragraph.range.start;
66    let para_end = paragraph.range.end;
67
68    // Extract per-char levels
69    let mut char_levels = Vec::with_capacity(chars.len());
70    for (byte_idx, _ch) in text.char_indices() {
71        if byte_idx >= para_start && byte_idx < para_end {
72            char_levels.push(levels[byte_idx]);
73        }
74    }
75
76    if char_levels.is_empty() {
77        return vec![];
78    }
79
80    for i in 1..char_levels.len() {
81        if char_levels[i] != char_levels[run_start] {
82            runs.push(BidiRun {
83                char_start: run_start,
84                char_end: i,
85                level: char_levels[run_start],
86                is_rtl: char_levels[run_start].is_rtl(),
87            });
88            run_start = i;
89        }
90    }
91    // Final run
92    runs.push(BidiRun {
93        char_start: run_start,
94        char_end: char_levels.len(),
95        level: char_levels[run_start],
96        is_rtl: char_levels[run_start].is_rtl(),
97    });
98
99    runs
100}
101
102/// Check if text is purely LTR (no RTL characters at all).
103/// This is a fast path to skip BiDi processing for the common case.
104pub fn is_pure_ltr(text: &str, direction: Direction) -> bool {
105    if matches!(direction, Direction::Rtl) {
106        return false;
107    }
108
109    // Quick scan: if no char has RTL BiDi class, it's pure LTR
110    !text.chars().any(is_rtl_char)
111}
112
113/// Check if a character has an RTL BiDi class (R, AL, or AN).
114fn is_rtl_char(ch: char) -> bool {
115    // Unicode BiDi character types: R (Right-to-Left), AL (Arabic Letter),
116    // AN (Arabic Number). We check common RTL ranges.
117    matches!(ch,
118        '\u{0590}'..='\u{05FF}' |  // Hebrew
119        '\u{0600}'..='\u{06FF}' |  // Arabic
120        '\u{0700}'..='\u{074F}' |  // Syriac
121        '\u{0750}'..='\u{077F}' |  // Arabic Supplement
122        '\u{0780}'..='\u{07BF}' |  // Thaana
123        '\u{07C0}'..='\u{07FF}' |  // NKo
124        '\u{0800}'..='\u{083F}' |  // Samaritan
125        '\u{0840}'..='\u{085F}' |  // Mandaic
126        '\u{08A0}'..='\u{08FF}' |  // Arabic Extended-A
127        '\u{FB1D}'..='\u{FB4F}' |  // Hebrew Presentation Forms
128        '\u{FB50}'..='\u{FDFF}' |  // Arabic Presentation Forms-A
129        '\u{FE70}'..='\u{FEFF}' |  // Arabic Presentation Forms-B
130        '\u{10800}'..='\u{10FFF}' | // Various RTL scripts
131        '\u{1E800}'..='\u{1EEFF}' | // More RTL
132        '\u{200F}' |               // RTL Mark
133        '\u{202B}' |               // RTL Embedding
134        '\u{202E}' |               // RTL Override
135        '\u{2067}'                  // RTL Isolate
136    )
137}
138
139/// Reorder positioned glyphs on a line for visual display.
140///
141/// Takes glyphs in logical order with their BiDi levels and produces
142/// visual order. RTL runs are reversed so they display correctly.
143pub fn reorder_line_glyphs(
144    mut glyphs: Vec<PositionedGlyph>,
145    levels: &[Level],
146) -> Vec<PositionedGlyph> {
147    if glyphs.is_empty() || levels.is_empty() {
148        return glyphs;
149    }
150
151    // Use the standard BiDi reordering algorithm (L2):
152    // Find the highest level, then reverse all runs at that level and above,
153    // working down to the lowest odd level.
154    let min_level = levels.iter().copied().min().unwrap_or(Level::ltr());
155    let max_level = levels.iter().copied().max().unwrap_or(Level::ltr());
156
157    // Only reorder if there's actually an RTL level
158    if !max_level.is_rtl() {
159        return glyphs;
160    }
161
162    // L2: For each level from max down to the lowest odd level,
163    // reverse any contiguous run of glyphs at that level or higher
164    let min_odd = if min_level.is_rtl() {
165        min_level
166    } else {
167        Level::rtl() // level 1
168    };
169
170    let mut current_level = max_level;
171    while current_level >= min_odd {
172        let mut i = 0;
173        while i < glyphs.len() {
174            if levels.get(i).copied().unwrap_or(Level::ltr()) >= current_level {
175                // Find the end of this run at >= current_level
176                let start = i;
177                while i < glyphs.len()
178                    && levels.get(i).copied().unwrap_or(Level::ltr()) >= current_level
179                {
180                    i += 1;
181                }
182                // Reverse the run
183                glyphs[start..i].reverse();
184            } else {
185                i += 1;
186            }
187        }
188        // Move to next lower level
189        if current_level.number() == 0 {
190            break;
191        }
192        current_level = Level::new(current_level.number() - 1).unwrap_or(Level::ltr());
193    }
194
195    glyphs
196}
197
198/// Reposition glyphs after visual reordering.
199///
200/// After `reorder_line_glyphs`, x_offsets still reflect logical order.
201/// This recalculates x positions from left to right based on advance widths.
202pub fn reposition_after_reorder(glyphs: &mut [PositionedGlyph], start_x: f64) {
203    let mut x = start_x;
204    for g in glyphs.iter_mut() {
205        g.x_offset = x;
206        x += g.x_advance;
207    }
208}
209
210/// Build a byte-offset → char-index map for a string.
211#[allow(dead_code)]
212fn build_byte_to_char_map(text: &str) -> Vec<usize> {
213    let mut map = vec![0usize; text.len() + 1];
214    let mut char_idx = 0;
215    for (byte_idx, _) in text.char_indices() {
216        map[byte_idx] = char_idx;
217        char_idx += 1;
218    }
219    map[text.len()] = char_idx;
220    map
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    #[test]
228    fn test_pure_ltr() {
229        assert!(is_pure_ltr("Hello World", Direction::Ltr));
230        assert!(is_pure_ltr("Hello World", Direction::Auto));
231        assert!(!is_pure_ltr("Hello World", Direction::Rtl));
232    }
233
234    #[test]
235    fn test_rtl_detection() {
236        assert!(!is_pure_ltr("مرحبا", Direction::Ltr));
237        assert!(!is_pure_ltr("שלום", Direction::Ltr));
238    }
239
240    #[test]
241    fn test_analyze_bidi_pure_ltr() {
242        let runs = analyze_bidi("Hello World", Direction::Ltr);
243        assert_eq!(runs.len(), 1);
244        assert!(!runs[0].is_rtl);
245        assert_eq!(runs[0].char_start, 0);
246        assert_eq!(runs[0].char_end, 11);
247    }
248
249    #[test]
250    fn test_analyze_bidi_pure_rtl() {
251        let runs = analyze_bidi("مرحبا", Direction::Rtl);
252        assert_eq!(runs.len(), 1);
253        assert!(runs[0].is_rtl);
254    }
255
256    #[test]
257    fn test_analyze_bidi_mixed() {
258        // "Hello مرحبا World" — should produce 3 runs: LTR, RTL, LTR
259        let runs = analyze_bidi("Hello مرحبا World", Direction::Ltr);
260        assert!(
261            runs.len() >= 2,
262            "Expected at least 2 runs, got {}",
263            runs.len()
264        );
265        // The first run should be LTR (Hello + space)
266        assert!(!runs[0].is_rtl);
267        // There should be an RTL run somewhere
268        assert!(runs.iter().any(|r| r.is_rtl), "Should have an RTL run");
269    }
270
271    #[test]
272    fn test_analyze_bidi_empty() {
273        let runs = analyze_bidi("", Direction::Ltr);
274        assert!(runs.is_empty());
275    }
276
277    #[test]
278    fn test_rtl_direction_defaults_right_align() {
279        // This tests the style system integration
280        use crate::style::{Style, TextAlign};
281        let style = Style {
282            direction: Some(Direction::Rtl),
283            ..Default::default()
284        };
285        let resolved = style.resolve(None, 500.0);
286        assert!(matches!(resolved.text_align, TextAlign::Right));
287    }
288
289    #[test]
290    fn test_ltr_direction_defaults_left_align() {
291        use crate::style::{Style, TextAlign};
292        let style = Style {
293            direction: Some(Direction::Ltr),
294            ..Default::default()
295        };
296        let resolved = style.resolve(None, 500.0);
297        assert!(matches!(resolved.text_align, TextAlign::Left));
298    }
299}