#![forbid(unsafe_code)]
use std::hash::{Hash, Hasher};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u8)]
pub enum Script {
Common = 0,
Inherited,
Latin,
Greek,
Cyrillic,
Armenian,
Hebrew,
Arabic,
Syriac,
Thaana,
Devanagari,
Bengali,
Gurmukhi,
Gujarati,
Oriya,
Tamil,
Telugu,
Kannada,
Malayalam,
Sinhala,
Thai,
Lao,
Tibetan,
Myanmar,
Georgian,
Hangul,
Ethiopic,
Han,
Hiragana,
Katakana,
Bopomofo,
Unknown,
}
impl Script {
#[inline]
pub const fn is_common_or_inherited(self) -> bool {
matches!(self, Script::Common | Script::Inherited)
}
#[inline]
pub const fn is_rtl(self) -> bool {
matches!(
self,
Script::Arabic | Script::Hebrew | Script::Syriac | Script::Thaana
)
}
}
#[inline]
pub fn char_script(c: char) -> Script {
let cp = c as u32;
match cp {
0x0000..=0x0040 => Script::Common, 0x0041..=0x005A => Script::Latin, 0x005B..=0x0060 => Script::Common, 0x0061..=0x007A => Script::Latin, 0x007B..=0x00BF => Script::Common, 0x00C0..=0x00D6 => Script::Latin, 0x00D7 => Script::Common, 0x00D8..=0x00F6 => Script::Latin, 0x00F7 => Script::Common, 0x00F8..=0x024F => Script::Latin, 0x0250..=0x02AF => Script::Latin, 0x02B0..=0x02FF => Script::Common, 0x0300..=0x036F => Script::Inherited,
0x0370..=0x03FF => Script::Greek,
0x1F00..=0x1FFF => Script::Greek,
0x0400..=0x04FF => Script::Cyrillic,
0x0500..=0x052F => Script::Cyrillic, 0x2DE0..=0x2DFF => Script::Cyrillic, 0xA640..=0xA69F => Script::Cyrillic, 0x1C80..=0x1C8F => Script::Cyrillic,
0x0530..=0x058F => Script::Armenian,
0xFB13..=0xFB17 => Script::Armenian,
0x0590..=0x05FF => Script::Hebrew,
0xFB1D..=0xFB4F => Script::Hebrew,
0x0600..=0x06FF => Script::Arabic,
0x0750..=0x077F => Script::Arabic, 0x08A0..=0x08FF => Script::Arabic, 0xFB50..=0xFDFF => Script::Arabic, 0xFE70..=0xFEFF => Script::Arabic,
0x0700..=0x074F => Script::Syriac,
0x0860..=0x086F => Script::Syriac,
0x0780..=0x07BF => Script::Thaana,
0x0900..=0x097F => Script::Devanagari,
0xA8E0..=0xA8FF => Script::Devanagari,
0x0980..=0x09FF => Script::Bengali,
0x0A00..=0x0A7F => Script::Gurmukhi,
0x0A80..=0x0AFF => Script::Gujarati,
0x0B00..=0x0B7F => Script::Oriya,
0x0B80..=0x0BFF => Script::Tamil,
0x0C00..=0x0C7F => Script::Telugu,
0x0C80..=0x0CFF => Script::Kannada,
0x0D00..=0x0D7F => Script::Malayalam,
0x0D80..=0x0DFF => Script::Sinhala,
0x0E00..=0x0E7F => Script::Thai,
0x0E80..=0x0EFF => Script::Lao,
0x0F00..=0x0FFF => Script::Tibetan,
0x1000..=0x109F => Script::Myanmar,
0xAA60..=0xAA7F => Script::Myanmar,
0x10A0..=0x10FF => Script::Georgian,
0x2D00..=0x2D2F => Script::Georgian, 0x1C90..=0x1CBF => Script::Georgian,
0x1100..=0x11FF => Script::Hangul, 0x3130..=0x318F => Script::Hangul, 0xA960..=0xA97F => Script::Hangul, 0xAC00..=0xD7AF => Script::Hangul, 0xD7B0..=0xD7FF => Script::Hangul,
0x1200..=0x137F => Script::Ethiopic,
0x1380..=0x139F => Script::Ethiopic, 0x2D80..=0x2DDF => Script::Ethiopic, 0xAB00..=0xAB2F => Script::Ethiopic,
0x1E00..=0x1EFF => Script::Latin, 0x2C60..=0x2C7F => Script::Latin, 0xA720..=0xA7FF => Script::Latin, 0xAB30..=0xAB6F => Script::Latin, 0xFB00..=0xFB06 => Script::Latin,
0x2E80..=0x2EFF => Script::Han, 0x2F00..=0x2FDF => Script::Han, 0x3400..=0x4DBF => Script::Han, 0x4E00..=0x9FFF => Script::Han, 0xF900..=0xFAFF => Script::Han, 0x20000..=0x2A6DF => Script::Han, 0x2A700..=0x2B73F => Script::Han, 0x2B740..=0x2B81F => Script::Han, 0x2B820..=0x2CEAF => Script::Han, 0x2CEB0..=0x2EBEF => Script::Han, 0x30000..=0x3134F => Script::Han,
0x3040..=0x309F => Script::Hiragana,
0x1B001..=0x1B11F => Script::Hiragana,
0x30A0..=0x30FF => Script::Katakana,
0x31F0..=0x31FF => Script::Katakana, 0xFF65..=0xFF9F => Script::Katakana,
0x3100..=0x312F => Script::Bopomofo,
0x31A0..=0x31BF => Script::Bopomofo,
0x3000..=0x303F => Script::Common,
0x2000..=0x206F => Script::Common, 0x2070..=0x209F => Script::Common, 0x20A0..=0x20CF => Script::Common, 0x20D0..=0x20FF => Script::Inherited, 0x2100..=0x214F => Script::Common, 0x2150..=0x218F => Script::Common, 0x2190..=0x21FF => Script::Common, 0x2200..=0x22FF => Script::Common, 0x2300..=0x23FF => Script::Common, 0x2400..=0x243F => Script::Common, 0x2440..=0x245F => Script::Common, 0x2460..=0x24FF => Script::Common, 0x2500..=0x257F => Script::Common, 0x2580..=0x259F => Script::Common, 0x25A0..=0x25FF => Script::Common, 0x2600..=0x26FF => Script::Common, 0x2700..=0x27BF => Script::Common, 0x27C0..=0x27EF => Script::Common, 0x27F0..=0x27FF => Script::Common, 0x2800..=0x28FF => Script::Common, 0x2900..=0x297F => Script::Common, 0x2980..=0x29FF => Script::Common, 0x2A00..=0x2AFF => Script::Common, 0x2B00..=0x2BFF => Script::Common,
0xFF01..=0xFF5E => Script::Latin, 0xFF61..=0xFF64 => Script::Common,
0xFE00..=0xFE0F => Script::Inherited, 0xE0100..=0xE01EF => Script::Inherited, 0x1F000..=0x1FAFF => Script::Common, 0xFE10..=0xFE1F => Script::Common, 0xFE20..=0xFE2F => Script::Inherited, 0xFE30..=0xFE4F => Script::Common, 0xFE50..=0xFE6F => Script::Common,
0x07C0..=0x07FF => Script::Arabic,
_ => Script::Unknown,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ScriptRun {
pub start: usize,
pub end: usize,
pub script: Script,
}
impl ScriptRun {
#[inline]
pub fn len(&self) -> usize {
self.end - self.start
}
#[inline]
pub fn is_empty(&self) -> bool {
self.start == self.end
}
#[inline]
pub fn text<'a>(&self, source: &'a str) -> &'a str {
&source[self.start..self.end]
}
}
fn resolve_scripts(chars: &[char]) -> Vec<Script> {
let n = chars.len();
if n == 0 {
return Vec::new();
}
let mut scripts: Vec<Script> = chars.iter().map(|&c| char_script(c)).collect();
let mut last_specific = Script::Common;
for script in &mut scripts {
if *script == Script::Inherited {
*script = if last_specific.is_common_or_inherited() {
Script::Common } else {
last_specific
};
} else if !script.is_common_or_inherited() {
last_specific = *script;
}
}
let first_specific = scripts
.iter()
.find(|s| !s.is_common_or_inherited())
.copied()
.unwrap_or(Script::Latin);
for script in &mut scripts {
if script.is_common_or_inherited() {
*script = first_specific;
} else {
break;
}
}
let mut current = first_specific;
for script in &mut scripts {
if script.is_common_or_inherited() {
*script = current;
} else {
current = *script;
}
}
scripts
}
pub fn partition_by_script(text: &str) -> Vec<ScriptRun> {
if text.is_empty() {
return Vec::new();
}
let chars: Vec<char> = text.chars().collect();
let resolved = resolve_scripts(&chars);
let mut runs = Vec::new();
let mut byte_offset = 0;
let mut run_start = 0;
let mut current_script = resolved[0];
for (i, ch) in chars.iter().enumerate() {
let char_len = ch.len_utf8();
if resolved[i] != current_script {
runs.push(ScriptRun {
start: run_start,
end: byte_offset,
script: current_script,
});
run_start = byte_offset;
current_script = resolved[i];
}
byte_offset += char_len;
}
runs.push(ScriptRun {
start: run_start,
end: byte_offset,
script: current_script,
});
runs
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RunDirection {
Ltr,
Rtl,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TextRun {
pub start: usize,
pub end: usize,
pub script: Script,
pub direction: RunDirection,
pub style_id: u64,
}
impl TextRun {
#[inline]
pub fn len(&self) -> usize {
self.end - self.start
}
#[inline]
pub fn is_empty(&self) -> bool {
self.start == self.end
}
#[inline]
pub fn text<'a>(&self, source: &'a str) -> &'a str {
&source[self.start..self.end]
}
#[inline]
pub fn cache_key<'a>(&self, source: &'a str) -> RunCacheKey<'a> {
RunCacheKey {
text: self.text(source),
script: self.script,
direction: self.direction,
style_id: self.style_id,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RunCacheKey<'a> {
pub text: &'a str,
pub script: Script,
pub direction: RunDirection,
pub style_id: u64,
}
impl Hash for RunCacheKey<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.text.hash(state);
self.script.hash(state);
self.direction.hash(state);
self.style_id.hash(state);
}
}
pub fn partition_text_runs(
text: &str,
direction_fn: Option<&dyn Fn(usize) -> RunDirection>,
style_fn: Option<&dyn Fn(usize) -> u64>,
) -> Vec<TextRun> {
if text.is_empty() {
return Vec::new();
}
let script_runs = partition_by_script(text);
let default_direction = |script: Script| -> RunDirection {
if script.is_rtl() {
RunDirection::Rtl
} else {
RunDirection::Ltr
}
};
let mut runs = Vec::new();
for sr in &script_runs {
let sub_text = &text[sr.start..sr.end];
let mut sub_start = sr.start;
let first_dir = direction_fn
.as_ref()
.map_or_else(|| default_direction(sr.script), |f| f(sr.start));
let first_style = style_fn.as_ref().map_or(0u64, |f| f(sr.start));
let mut current_dir = first_dir;
let mut current_style = first_style;
for (i, ch) in sub_text.char_indices() {
let byte_pos = sr.start + i;
let dir = direction_fn
.as_ref()
.map_or_else(|| default_direction(sr.script), |f| f(byte_pos));
let style = style_fn.as_ref().map_or(0u64, |f| f(byte_pos));
if dir != current_dir || style != current_style {
if byte_pos > sub_start {
runs.push(TextRun {
start: sub_start,
end: byte_pos,
script: sr.script,
direction: current_dir,
style_id: current_style,
});
}
sub_start = byte_pos;
current_dir = dir;
current_style = style;
}
let _ = ch;
}
if sr.end > sub_start {
runs.push(TextRun {
start: sub_start,
end: sr.end,
script: sr.script,
direction: current_dir,
style_id: current_style,
});
}
}
runs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn script_ascii_letters() {
assert_eq!(char_script('A'), Script::Latin);
assert_eq!(char_script('z'), Script::Latin);
assert_eq!(char_script('M'), Script::Latin);
}
#[test]
fn script_ascii_digits_are_common() {
for d in '0'..='9' {
assert_eq!(char_script(d), Script::Common, "digit {d}");
}
}
#[test]
fn script_ascii_punctuation_is_common() {
for &c in &[' ', '!', '.', ',', ':', ';', '?', '-', '(', ')', '[', ']'] {
assert_eq!(char_script(c), Script::Common, "char {c:?}");
}
}
#[test]
fn script_latin_extended() {
assert_eq!(char_script('\u{00C0}'), Script::Latin); assert_eq!(char_script('\u{00E9}'), Script::Latin); assert_eq!(char_script('\u{0148}'), Script::Latin); assert_eq!(char_script('\u{1E00}'), Script::Latin); }
#[test]
fn script_greek() {
assert_eq!(char_script('\u{0391}'), Script::Greek); assert_eq!(char_script('\u{03B1}'), Script::Greek); assert_eq!(char_script('\u{03C9}'), Script::Greek); }
#[test]
fn script_cyrillic() {
assert_eq!(char_script('\u{0410}'), Script::Cyrillic); assert_eq!(char_script('\u{044F}'), Script::Cyrillic); }
#[test]
fn script_hebrew() {
assert_eq!(char_script('\u{05D0}'), Script::Hebrew); assert_eq!(char_script('\u{05EA}'), Script::Hebrew); }
#[test]
fn script_arabic() {
assert_eq!(char_script('\u{0627}'), Script::Arabic); assert_eq!(char_script('\u{0645}'), Script::Arabic); }
#[test]
fn script_devanagari() {
assert_eq!(char_script('\u{0905}'), Script::Devanagari); assert_eq!(char_script('\u{0939}'), Script::Devanagari); }
#[test]
fn script_thai() {
assert_eq!(char_script('\u{0E01}'), Script::Thai); assert_eq!(char_script('\u{0E3F}'), Script::Thai); }
#[test]
fn script_hangul() {
assert_eq!(char_script('\u{AC00}'), Script::Hangul); assert_eq!(char_script('\u{D7A3}'), Script::Hangul); }
#[test]
fn script_cjk_han() {
assert_eq!(char_script('\u{4E00}'), Script::Han); assert_eq!(char_script('\u{9FFF}'), Script::Han); }
#[test]
fn script_hiragana_katakana() {
assert_eq!(char_script('\u{3042}'), Script::Hiragana); assert_eq!(char_script('\u{30A2}'), Script::Katakana); }
#[test]
fn script_combining_marks_are_inherited() {
assert_eq!(char_script('\u{0300}'), Script::Inherited); assert_eq!(char_script('\u{0301}'), Script::Inherited); assert_eq!(char_script('\u{036F}'), Script::Inherited); }
#[test]
fn script_rtl_detection() {
assert!(Script::Arabic.is_rtl());
assert!(Script::Hebrew.is_rtl());
assert!(Script::Syriac.is_rtl());
assert!(Script::Thaana.is_rtl());
assert!(!Script::Latin.is_rtl());
assert!(!Script::Han.is_rtl());
assert!(!Script::Common.is_rtl());
}
#[test]
fn script_common_or_inherited() {
assert!(Script::Common.is_common_or_inherited());
assert!(Script::Inherited.is_common_or_inherited());
assert!(!Script::Latin.is_common_or_inherited());
assert!(!Script::Arabic.is_common_or_inherited());
}
#[test]
fn resolve_empty() {
assert!(resolve_scripts(&[]).is_empty());
}
#[test]
fn resolve_pure_latin() {
let chars: Vec<char> = "Hello".chars().collect();
let resolved = resolve_scripts(&chars);
assert!(resolved.iter().all(|&s| s == Script::Latin));
}
#[test]
fn resolve_common_absorbed_by_latin() {
let chars: Vec<char> = "Hi 42!".chars().collect();
let resolved = resolve_scripts(&chars);
assert!(
resolved.iter().all(|&s| s == Script::Latin),
"All should be Latin: {resolved:?}"
);
}
#[test]
fn resolve_leading_space() {
let chars: Vec<char> = " Hello".chars().collect();
let resolved = resolve_scripts(&chars);
assert_eq!(resolved[0], Script::Latin);
}
#[test]
fn resolve_combining_mark_inherits() {
let chars: Vec<char> = "e\u{0301}".chars().collect();
let resolved = resolve_scripts(&chars);
assert_eq!(resolved[0], Script::Latin);
assert_eq!(
resolved[1],
Script::Latin,
"combining mark should inherit Latin"
);
}
#[test]
fn resolve_mixed_scripts() {
let text = "Hello \u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
let chars: Vec<char> = text.chars().collect();
let resolved = resolve_scripts(&chars);
for (i, script) in resolved.iter().enumerate().take(5) {
assert_eq!(*script, Script::Latin, "char {i}");
}
assert_eq!(resolved[5], Script::Latin, "space");
for (i, script) in resolved.iter().enumerate().take(11).skip(6) {
assert_eq!(*script, Script::Arabic, "char {i}");
}
}
#[test]
fn resolve_all_common_defaults_to_latin() {
let chars: Vec<char> = "123 !?".chars().collect();
let resolved = resolve_scripts(&chars);
assert!(
resolved.iter().all(|&s| s == Script::Latin),
"All-Common should default to Latin"
);
}
#[test]
fn partition_empty() {
assert!(partition_by_script("").is_empty());
}
#[test]
fn partition_pure_latin() {
let runs = partition_by_script("Hello World");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
assert_eq!(runs[0].start, 0);
assert_eq!(runs[0].end, 11);
assert_eq!(runs[0].text("Hello World"), "Hello World");
}
#[test]
fn partition_pure_arabic() {
let text = "\u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
let runs = partition_by_script(text);
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Arabic);
}
#[test]
fn partition_latin_then_arabic() {
let text = "Hello \u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
let runs = partition_by_script(text);
assert!(runs.len() >= 2, "runs: {runs:?}");
assert_eq!(runs[0].script, Script::Latin);
assert!(runs[0].text(text).starts_with("Hello"));
let last = runs.last().unwrap();
assert_eq!(last.script, Script::Arabic);
}
#[test]
fn partition_latin_cjk_latin() {
let text = "Hello\u{4E16}\u{754C}World";
let runs = partition_by_script(text);
assert_eq!(runs.len(), 3, "runs: {runs:?}");
assert_eq!(runs[0].script, Script::Latin);
assert_eq!(runs[1].script, Script::Han);
assert_eq!(runs[2].script, Script::Latin);
}
#[test]
fn partition_japanese_mixed() {
let text = "\u{3053}\u{3093}\u{306B}\u{3061}\u{306F}\u{4E16}\u{754C}\u{30A2}";
let runs = partition_by_script(text);
assert!(runs.len() >= 2, "runs: {runs:?}");
let scripts: Vec<Script> = runs.iter().map(|r| r.script).collect();
assert!(scripts.contains(&Script::Hiragana));
assert!(scripts.contains(&Script::Han));
assert!(scripts.contains(&Script::Katakana));
}
#[test]
fn partition_runs_cover_full_text() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World \u{4E16}\u{754C}";
let runs = partition_by_script(text);
assert_eq!(runs[0].start, 0);
assert_eq!(runs.last().unwrap().end, text.len());
for window in runs.windows(2) {
assert_eq!(
window[0].end, window[1].start,
"runs must be contiguous: {:?}",
window
);
}
}
#[test]
fn partition_run_text_slicing() {
let text = "ABCdef";
let runs = partition_by_script(text);
let reconstructed: String = runs.iter().map(|r| r.text(text)).collect();
assert_eq!(reconstructed, text);
}
#[test]
fn partition_combining_mark_stays_with_base() {
let text = "e\u{0301}";
let runs = partition_by_script(text);
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
}
#[test]
fn partition_digits_absorbed() {
let runs = partition_by_script("Item 42");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
}
#[test]
fn text_runs_empty() {
assert!(partition_text_runs("", None, None).is_empty());
}
#[test]
fn text_runs_simple_latin() {
let runs = partition_text_runs("Hello World", None, None);
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
assert_eq!(runs[0].direction, RunDirection::Ltr);
assert_eq!(runs[0].style_id, 0);
}
#[test]
fn text_runs_arabic_direction() {
let text = "\u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
let runs = partition_text_runs(text, None, None);
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Arabic);
assert_eq!(runs[0].direction, RunDirection::Rtl);
}
#[test]
fn text_runs_mixed_scripts() {
let text = "Hello\u{4E16}\u{754C}World";
let runs = partition_text_runs(text, None, None);
assert_eq!(runs.len(), 3);
assert_eq!(runs[0].direction, RunDirection::Ltr);
assert_eq!(runs[1].direction, RunDirection::Ltr);
assert_eq!(runs[2].direction, RunDirection::Ltr);
}
#[test]
fn text_runs_style_split() {
let text = "Hello World";
let style_fn = |offset: usize| -> u64 { if offset < 5 { 1 } else { 2 } };
let runs = partition_text_runs(text, None, Some(&style_fn));
assert_eq!(runs.len(), 2, "runs: {runs:?}");
assert_eq!(runs[0].style_id, 1);
assert_eq!(runs[0].text(text), "Hello");
assert_eq!(runs[1].style_id, 2);
assert_eq!(runs[1].text(text), " World");
}
#[test]
fn text_runs_direction_override() {
let text = "ABC";
let dir_fn = |_offset: usize| -> RunDirection { RunDirection::Rtl };
let runs = partition_text_runs(text, Some(&dir_fn), None);
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].direction, RunDirection::Rtl);
}
#[test]
fn text_runs_cover_full_text() {
let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
let runs = partition_text_runs(text, None, None);
assert_eq!(runs[0].start, 0);
assert_eq!(runs.last().unwrap().end, text.len());
for window in runs.windows(2) {
assert_eq!(window[0].end, window[1].start);
}
let reconstructed: String = runs.iter().map(|r| r.text(text)).collect();
assert_eq!(reconstructed, text);
}
#[test]
fn cache_key_equality() {
let text = "Hello";
let run = TextRun {
start: 0,
end: 5,
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
let k1 = run.cache_key(text);
let k2 = run.cache_key(text);
assert_eq!(k1, k2);
}
#[test]
fn cache_key_differs_by_script() {
let k1 = RunCacheKey {
text: "abc",
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
let k2 = RunCacheKey {
text: "abc",
script: Script::Greek,
direction: RunDirection::Ltr,
style_id: 0,
};
assert_ne!(k1, k2);
}
#[test]
fn cache_key_differs_by_direction() {
let k1 = RunCacheKey {
text: "abc",
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
let k2 = RunCacheKey {
text: "abc",
script: Script::Latin,
direction: RunDirection::Rtl,
style_id: 0,
};
assert_ne!(k1, k2);
}
#[test]
fn cache_key_differs_by_style() {
let k1 = RunCacheKey {
text: "abc",
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
let k2 = RunCacheKey {
text: "abc",
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 1,
};
assert_ne!(k1, k2);
}
#[test]
fn cache_key_hashable() {
use std::collections::HashSet;
let mut set = HashSet::new();
let k = RunCacheKey {
text: "hello",
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
set.insert(k.clone());
assert!(set.contains(&k));
}
#[test]
fn single_char() {
let runs = partition_by_script("A");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
assert_eq!(runs[0].start, 0);
assert_eq!(runs[0].end, 1);
}
#[test]
fn only_spaces() {
let runs = partition_by_script(" ");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
}
#[test]
fn emoji_is_common() {
let text = "Hello \u{1F600} World";
let runs = partition_by_script(text);
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].script, Script::Latin);
}
#[test]
fn multibyte_utf8_offsets() {
let text = "\u{00E9}\u{4E00}";
let runs = partition_by_script(text);
assert!(runs.len() >= 2);
assert_eq!(runs[0].end, 2); assert_eq!(runs[1].start, 2);
assert_eq!(runs[1].end, 5); }
#[test]
fn text_run_len_and_empty() {
let run = TextRun {
start: 5,
end: 10,
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
assert_eq!(run.len(), 5);
assert!(!run.is_empty());
let empty = TextRun {
start: 5,
end: 5,
script: Script::Latin,
direction: RunDirection::Ltr,
style_id: 0,
};
assert_eq!(empty.len(), 0);
assert!(empty.is_empty());
}
#[test]
fn script_run_len_and_empty() {
let run = ScriptRun {
start: 0,
end: 5,
script: Script::Latin,
};
assert_eq!(run.len(), 5);
assert!(!run.is_empty());
}
#[test]
fn script_enum_ord() {
let mut scripts = [Script::Arabic, Script::Latin, Script::Common];
scripts.sort();
assert_eq!(scripts[0], Script::Common);
}
#[test]
fn many_script_transitions() {
let text = "Hello\u{0391}\u{0392}\u{0410}\u{0411}\u{05D0}\u{05D1}\u{0627}\u{0628}";
let runs = partition_by_script(text);
let scripts: Vec<Script> = runs.iter().map(|r| r.script).collect();
assert!(scripts.contains(&Script::Latin));
assert!(scripts.contains(&Script::Greek));
assert!(scripts.contains(&Script::Cyrillic));
assert!(scripts.contains(&Script::Hebrew));
assert!(scripts.contains(&Script::Arabic));
for window in runs.windows(2) {
assert_eq!(window[0].end, window[1].start);
}
}
}