Skip to main content

oxitext_layout/
bidi.rs

1//! UAX #9 Unicode Bidirectional Algorithm.
2//!
3//! Wraps the `unicode-bidi` crate to provide paragraph-level bidi analysis,
4//! resolving visual run order and embedding levels for mixed-direction text.
5
6use unicode_bidi::{BidiInfo, Level};
7
8/// A run of text with a uniform bidi embedding level.
9///
10/// Bidi runs are produced by [`BidiParagraph::new`] and represent contiguous
11/// slices of the source string that share the same embedding level.  Even-
12/// numbered levels are LTR; odd-numbered levels are RTL.
13#[derive(Debug, Clone)]
14pub struct BidiRun {
15    /// Byte-offset start of this run in the source string (inclusive).
16    pub start: usize,
17    /// Byte-offset end of this run in the source string (exclusive).
18    pub end: usize,
19    /// UAX #9 embedding level.  Level 0 = LTR paragraph base, 1 = RTL, etc.
20    pub level: u8,
21}
22
23/// Result of paragraph-level bidi analysis.
24///
25/// Call [`BidiParagraph::new`] to analyse a string, then inspect
26/// [`BidiParagraph::runs`] for the visual-order run sequence.
27pub struct BidiParagraph {
28    runs: Vec<BidiRun>,
29    base_level: u8,
30    /// Per-byte embedding levels for the source text, indexed by UTF-8 byte offset.
31    levels: Vec<Level>,
32}
33
34impl BidiParagraph {
35    /// Analyse a paragraph for bidi runs.
36    ///
37    /// The `base_rtl` argument controls the paragraph base direction:
38    /// - `None`  — auto-detect via UAX #9 rules P2/P3 (recommended).
39    /// - `Some(true)`  — force RTL base direction.
40    /// - `Some(false)` — force LTR base direction.
41    ///
42    /// The returned runs are in *visual* order (as they would appear on screen),
43    /// not logical order.
44    pub fn new(text: &str, base_rtl: Option<bool>) -> Self {
45        let hint = match base_rtl {
46            Some(true) => Some(Level::rtl()),
47            Some(false) => Some(Level::ltr()),
48            // Pass None to let BidiInfo apply P2/P3 auto-detection.
49            None => None,
50        };
51
52        let bidi = BidiInfo::new(text, hint);
53
54        // Clone per-byte levels before consuming `bidi` in the run-collecting loop.
55        let levels = bidi.levels.clone();
56
57        // Collect visual-order runs from every paragraph in the text.
58        let mut runs: Vec<BidiRun> = Vec::new();
59        for para in &bidi.paragraphs {
60            let para_range = para.range.start..para.range.end;
61            let (_run_levels, run_ranges) = bidi.visual_runs(para, para_range);
62            for run_range in run_ranges {
63                // Use the byte-level embedding level at the run's start position.
64                // `bidi.levels` is guaranteed to be indexed by UTF-8 byte offset,
65                // and run boundaries always fall on character boundaries.
66                let level = if run_range.start < bidi.levels.len() {
67                    bidi.levels[run_range.start].number()
68                } else {
69                    para.level.number()
70                };
71                runs.push(BidiRun {
72                    start: run_range.start,
73                    end: run_range.end,
74                    level,
75                });
76            }
77        }
78
79        // Resolve the base level from the first paragraph (or default to LTR).
80        let base_level = bidi
81            .paragraphs
82            .first()
83            .map(|p| p.level.number())
84            .unwrap_or(0);
85
86        BidiParagraph {
87            runs,
88            base_level,
89            levels,
90        }
91    }
92
93    /// Returns the resolved visual-order bidi runs.
94    pub fn runs(&self) -> &[BidiRun] {
95        &self.runs
96    }
97
98    /// Returns the resolved paragraph base embedding level.
99    pub fn base_level(&self) -> u8 {
100        self.base_level
101    }
102
103    /// Returns `true` if the paragraph base direction is RTL (odd base level).
104    pub fn is_rtl(&self) -> bool {
105        self.base_level % 2 == 1
106    }
107
108    /// Returns the per-byte UAX #9 embedding levels for the source text.
109    ///
110    /// The returned slice is indexed by UTF-8 byte offset. Multi-byte characters
111    /// have their level repeated for each byte.  Use the cluster byte offset from
112    /// a [`oxitext_core::ShapedGlyph`] to look up the level for that glyph.
113    pub fn levels(&self) -> &[Level] {
114        &self.levels
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn ltr_paragraph_base_level_is_even() {
124        let para = BidiParagraph::new("hello", Some(false));
125        assert!(!para.is_rtl(), "LTR forced base should not be RTL");
126    }
127
128    #[test]
129    fn rtl_forced_base_level_is_odd() {
130        let para = BidiParagraph::new("hello", Some(true));
131        assert!(para.is_rtl(), "RTL forced base should be RTL");
132    }
133}