oxitext_layout/bidi.rs
1//! UAX #9 Unicode Bidirectional Algorithm.
2//!
3//! Wraps the `unicode-bidi` crate to provide paragraph-level bidi analysis,
4//! resolving visual run order and embedding levels for mixed-direction text.
5
6use unicode_bidi::{BidiInfo, Level};
7
8/// A run of text with a uniform bidi embedding level.
9///
10/// Bidi runs are produced by [`BidiParagraph::new`] and represent contiguous
11/// slices of the source string that share the same embedding level. Even-
12/// numbered levels are LTR; odd-numbered levels are RTL.
13#[derive(Debug, Clone)]
14pub struct BidiRun {
15 /// Byte-offset start of this run in the source string (inclusive).
16 pub start: usize,
17 /// Byte-offset end of this run in the source string (exclusive).
18 pub end: usize,
19 /// UAX #9 embedding level. Level 0 = LTR paragraph base, 1 = RTL, etc.
20 pub level: u8,
21}
22
23/// Result of paragraph-level bidi analysis.
24///
25/// Call [`BidiParagraph::new`] to analyse a string, then inspect
26/// [`BidiParagraph::runs`] for the visual-order run sequence.
27pub struct BidiParagraph {
28 runs: Vec<BidiRun>,
29 base_level: u8,
30 /// Per-byte embedding levels for the source text, indexed by UTF-8 byte offset.
31 levels: Vec<Level>,
32}
33
34impl BidiParagraph {
35 /// Analyse a paragraph for bidi runs.
36 ///
37 /// The `base_rtl` argument controls the paragraph base direction:
38 /// - `None` — auto-detect via UAX #9 rules P2/P3 (recommended).
39 /// - `Some(true)` — force RTL base direction.
40 /// - `Some(false)` — force LTR base direction.
41 ///
42 /// The returned runs are in *visual* order (as they would appear on screen),
43 /// not logical order.
44 pub fn new(text: &str, base_rtl: Option<bool>) -> Self {
45 let hint = match base_rtl {
46 Some(true) => Some(Level::rtl()),
47 Some(false) => Some(Level::ltr()),
48 // Pass None to let BidiInfo apply P2/P3 auto-detection.
49 None => None,
50 };
51
52 let bidi = BidiInfo::new(text, hint);
53
54 // Clone per-byte levels before consuming `bidi` in the run-collecting loop.
55 let levels = bidi.levels.clone();
56
57 // Collect visual-order runs from every paragraph in the text.
58 let mut runs: Vec<BidiRun> = Vec::new();
59 for para in &bidi.paragraphs {
60 let para_range = para.range.start..para.range.end;
61 let (_run_levels, run_ranges) = bidi.visual_runs(para, para_range);
62 for run_range in run_ranges {
63 // Use the byte-level embedding level at the run's start position.
64 // `bidi.levels` is guaranteed to be indexed by UTF-8 byte offset,
65 // and run boundaries always fall on character boundaries.
66 let level = if run_range.start < bidi.levels.len() {
67 bidi.levels[run_range.start].number()
68 } else {
69 para.level.number()
70 };
71 runs.push(BidiRun {
72 start: run_range.start,
73 end: run_range.end,
74 level,
75 });
76 }
77 }
78
79 // Resolve the base level from the first paragraph (or default to LTR).
80 let base_level = bidi
81 .paragraphs
82 .first()
83 .map(|p| p.level.number())
84 .unwrap_or(0);
85
86 BidiParagraph {
87 runs,
88 base_level,
89 levels,
90 }
91 }
92
93 /// Returns the resolved visual-order bidi runs.
94 pub fn runs(&self) -> &[BidiRun] {
95 &self.runs
96 }
97
98 /// Returns the resolved paragraph base embedding level.
99 pub fn base_level(&self) -> u8 {
100 self.base_level
101 }
102
103 /// Returns `true` if the paragraph base direction is RTL (odd base level).
104 pub fn is_rtl(&self) -> bool {
105 self.base_level % 2 == 1
106 }
107
108 /// Returns the per-byte UAX #9 embedding levels for the source text.
109 ///
110 /// The returned slice is indexed by UTF-8 byte offset. Multi-byte characters
111 /// have their level repeated for each byte. Use the cluster byte offset from
112 /// a [`oxitext_core::ShapedGlyph`] to look up the level for that glyph.
113 pub fn levels(&self) -> &[Level] {
114 &self.levels
115 }
116}
117
118#[cfg(test)]
119mod tests {
120 use super::*;
121
122 #[test]
123 fn ltr_paragraph_base_level_is_even() {
124 let para = BidiParagraph::new("hello", Some(false));
125 assert!(!para.is_rtl(), "LTR forced base should not be RTL");
126 }
127
128 #[test]
129 fn rtl_forced_base_level_is_odd() {
130 let para = BidiParagraph::new("hello", Some(true));
131 assert!(para.is_rtl(), "RTL forced base should be RTL");
132 }
133}