use crate::backend::TextBackend;
use crate::bidi::{self, BidiRun};
use crate::error::TextError;
use crate::types::{Direction, ShapedGlyph};
#[derive(Clone, Debug)]
pub struct ShapedWord {
pub glyphs: Vec<ShapedGlyph>,
pub advance_width_lpx: f32,
pub ascent_lpx: f32,
pub descent_lpx: f32,
pub source_byte_range: std::ops::Range<usize>,
pub is_space_run: bool,
pub level: u8,
pub is_tab: bool,
pub break_before: bool,
}
pub fn shape_words<B: TextBackend>(
backend: &B,
font: &B::Font,
text: &str,
) -> Result<(Vec<ShapedWord>, f32), TextError> {
let space_width = backend
.shape_line(font, " ")
.map(|s| s.width_lpx)
.unwrap_or(0.0);
let words = shape_words_in(backend, font, text, 0)?;
Ok((words, space_width))
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct Segment {
pub byte_range: std::ops::Range<usize>,
pub direction: Direction,
pub level: u8,
pub is_tab: bool,
pub break_before: bool,
}
pub(crate) trait LineSegmenter {
fn segments(&self, text: &str) -> Vec<Segment>;
}
#[cfg(test)]
pub(crate) struct WhitespaceSegmenter;
#[cfg(test)]
impl LineSegmenter for WhitespaceSegmenter {
fn segments(&self, text: &str) -> Vec<Segment> {
let bytes = text.as_bytes();
let mut segs = Vec::new();
let mut i = 0usize;
while i < bytes.len() {
let start = i;
let is_space = bytes[i] == b' ';
while i < bytes.len() && (bytes[i] == b' ') == is_space {
i += 1;
}
segs.push(Segment {
byte_range: start..i,
direction: Direction::Ltr,
level: 0,
is_tab: false,
break_before: false,
});
}
segs
}
}
pub(crate) struct BidiSegmenter {
pub base: Option<Direction>,
}
impl LineSegmenter for BidiSegmenter {
fn segments(&self, text: &str) -> Vec<Segment> {
let resolved = bidi::resolve_line(text, self.base);
let breaks = crate::linebreak::break_offsets(text);
let mut segs = Vec::new();
for run in &resolved.logical_runs {
split_run(text, run, &breaks, &mut segs);
}
segs
}
}
fn split_run(text: &str, run: &BidiRun, breaks: &[usize], out: &mut Vec<Segment>) {
let bytes = text.as_bytes();
let end = run.byte_range.end;
let mut i = run.byte_range.start;
while i < end {
let start = i;
let is_tab = bytes[i] == b'\t';
if is_tab {
i += 1;
} else if bytes[i] == b' ' {
while i < end && bytes[i] == b' ' {
i += 1;
}
} else {
loop {
let ch = text[i..end].chars().next().expect("i < end is on a char");
i += ch.len_utf8();
if i >= end
|| bytes[i] == b' '
|| bytes[i] == b'\t'
|| crate::linebreak::is_break_before(breaks, i)
{
break;
}
}
}
out.push(Segment {
byte_range: start..i,
direction: run.direction,
level: run.level,
is_tab,
break_before: crate::linebreak::is_break_before(breaks, start),
});
}
}
pub(crate) fn shape_words_in<B: TextBackend>(
backend: &B,
font: &B::Font,
segment: &str,
segment_start: usize,
) -> Result<Vec<ShapedWord>, TextError> {
let segmenter = BidiSegmenter { base: None };
let mut items = Vec::new();
for seg in segmenter.segments(segment) {
let slice = &segment[seg.byte_range.clone()];
let is_space = !seg.is_tab && slice.as_bytes().first() == Some(&b' ');
let mut shaped = backend.shape_segment(font, slice, seg.direction)?;
for g in &mut shaped.glyphs {
g.direction = seg.direction;
}
items.push(ShapedWord {
glyphs: shaped.glyphs,
advance_width_lpx: shaped.width_lpx,
ascent_lpx: shaped.ascent_lpx,
descent_lpx: shaped.descent_lpx,
source_byte_range: segment_start + seg.byte_range.start
..segment_start + seg.byte_range.end,
is_space_run: is_space,
level: seg.level,
is_tab: seg.is_tab,
break_before: seg.break_before,
});
}
Ok(items)
}
#[cfg(test)]
mod tests {
use super::*;
fn seg_shape(s: &Segment) -> (std::ops::Range<usize>, Direction, u8, bool) {
(s.byte_range.clone(), s.direction, s.level, s.is_tab)
}
#[test]
fn bidi_segmenter_matches_whitespace_segmenter_on_ascii() {
let bidi = BidiSegmenter { base: None };
let ws = WhitespaceSegmenter;
for text in ["", "hello", "ab cd", "a b", " ab ", "the quick brown fox"] {
let b: Vec<_> = bidi.segments(text).iter().map(seg_shape).collect();
let w: Vec<_> = ws.segments(text).iter().map(seg_shape).collect();
assert_eq!(b, w, "segmenters diverged on {text:?}");
}
}
#[test]
fn bidi_segmenter_splits_cjk_at_break_opportunities() {
let segs = BidiSegmenter { base: None }.segments("日本語");
let ranges: Vec<_> = segs.iter().map(|s| s.byte_range.clone()).collect();
assert_eq!(ranges, vec![0..3, 3..6, 6..9]);
assert!(!segs[0].break_before, "first piece has no break before it");
assert!(segs[1].break_before && segs[2].break_before);
}
#[test]
fn bidi_segmenter_splits_after_hyphen() {
let segs = BidiSegmenter { base: None }.segments("foo-bar");
let ranges: Vec<_> = segs.iter().map(|s| s.byte_range.clone()).collect();
assert_eq!(ranges, vec![0..4, 4..7]);
assert!(segs[1].break_before);
}
#[test]
fn tab_is_an_isolated_segment() {
let segs = BidiSegmenter { base: None }.segments("a\t\tb");
let shapes: Vec<_> = segs
.iter()
.map(|s| (s.byte_range.clone(), s.is_tab))
.collect();
assert_eq!(
shapes,
vec![(0..1, false), (1..2, true), (2..3, true), (3..4, false)]
);
}
#[test]
fn bidi_segmenter_tags_rtl_runs() {
let segs = BidiSegmenter { base: None }.segments("abc אבג");
assert!(segs.iter().any(|s| s.direction == Direction::Rtl));
assert!(segs.iter().any(|s| s.direction == Direction::Ltr));
assert_eq!(segs.first().unwrap().byte_range.start, 0);
assert_eq!(segs.last().unwrap().byte_range.end, "abc אבג".len());
}
}