1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
// SPDX-License-Identifier: MIT OR Apache-2.0
use alloc::vec::Vec;
use unicode_bidi::{bidi_class, BidiClass, BidiInfo, ParagraphInfo};
/// An iterator over the paragraphs in the input text.
/// It is equivalent to [`core::str::Lines`] but follows `unicode-bidi` behaviour.
#[derive(Debug)]
pub struct BidiParagraphs<'text> {
text: &'text str,
info: alloc::vec::IntoIter<ParagraphInfo>,
}
impl<'text> BidiParagraphs<'text> {
/// Create an iterator with optimized paragraph detection.
/// This version avoids `BidiInfo` allocation for simple ASCII text.
pub fn new(text: &'text str) -> Self {
// Fast path for simple ASCII text - just split on newlines
if text.is_ascii()
&& !text
.chars()
.any(|c| c.is_ascii_control() && c != '\n' && c != '\r' && c != '\t')
{
// For simple ASCII, we can avoid `BidiInfo` entirely
// Create minimal ParagraphInfo entries for each line
let mut paragraphs = Vec::new();
let mut start = 0;
for (i, c) in text.char_indices() {
if c == '\n' {
paragraphs.push(ParagraphInfo {
range: start..i,
level: unicode_bidi::Level::ltr(),
});
start = i + 1;
}
}
// Add final paragraph if text doesn't end with newline
if start < text.len() {
paragraphs.push(ParagraphInfo {
range: start..text.len(),
level: unicode_bidi::Level::ltr(),
});
}
let info = paragraphs.into_iter();
Self { text, info }
} else {
// Complex text - fall back to full `BidiInfo` analysis
let info = BidiInfo::new(text, None);
let info = info.paragraphs.into_iter();
Self { text, info }
}
}
}
impl<'text> Iterator for BidiParagraphs<'text> {
type Item = &'text str;
fn next(&mut self) -> Option<Self::Item> {
let para = self.info.next()?;
let paragraph = &self.text[para.range];
// `para.range` includes the newline that splits the line, so remove it if present
let mut char_indices = paragraph.char_indices();
char_indices
.next_back()
.and_then(|(i, c)| {
// `BidiClass::B` is a Paragraph_Separator (various newline characters)
(bidi_class(c) == BidiClass::B).then_some(i)
})
.map_or(Some(paragraph), |i| Some(¶graph[0..i]))
}
}