1#![forbid(unsafe_code)]
2
3use std::hash::{Hash, Hasher};
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
55#[repr(u8)]
56pub enum Script {
57 Common = 0,
59 Inherited,
61 Latin,
63 Greek,
65 Cyrillic,
67 Armenian,
69 Hebrew,
71 Arabic,
73 Syriac,
75 Thaana,
77 Devanagari,
79 Bengali,
81 Gurmukhi,
83 Gujarati,
85 Oriya,
87 Tamil,
89 Telugu,
91 Kannada,
93 Malayalam,
95 Sinhala,
97 Thai,
99 Lao,
101 Tibetan,
103 Myanmar,
105 Georgian,
107 Hangul,
109 Ethiopic,
111 Han,
113 Hiragana,
115 Katakana,
117 Bopomofo,
119 Unknown,
121}
122
123impl Script {
124 #[inline]
126 pub const fn is_common_or_inherited(self) -> bool {
127 matches!(self, Script::Common | Script::Inherited)
128 }
129
130 #[inline]
132 pub const fn is_rtl(self) -> bool {
133 matches!(
134 self,
135 Script::Arabic | Script::Hebrew | Script::Syriac | Script::Thaana
136 )
137 }
138}
139
140#[inline]
151pub fn char_script(c: char) -> Script {
152 let cp = c as u32;
153 match cp {
154 0x0000..=0x0040 => Script::Common, 0x0041..=0x005A => Script::Latin, 0x005B..=0x0060 => Script::Common, 0x0061..=0x007A => Script::Latin, 0x007B..=0x00BF => Script::Common, 0x00C0..=0x00D6 => Script::Latin, 0x00D7 => Script::Common, 0x00D8..=0x00F6 => Script::Latin, 0x00F7 => Script::Common, 0x00F8..=0x024F => Script::Latin, 0x0250..=0x02AF => Script::Latin, 0x02B0..=0x02FF => Script::Common, 0x0300..=0x036F => Script::Inherited, 0x0370..=0x03FF => Script::Greek,
172 0x1F00..=0x1FFF => Script::Greek, 0x0400..=0x04FF => Script::Cyrillic,
176 0x0500..=0x052F => Script::Cyrillic, 0x2DE0..=0x2DFF => Script::Cyrillic, 0xA640..=0xA69F => Script::Cyrillic, 0x1C80..=0x1C8F => Script::Cyrillic, 0x0530..=0x058F => Script::Armenian,
183 0xFB13..=0xFB17 => Script::Armenian, 0x0590..=0x05FF => Script::Hebrew,
187 0xFB1D..=0xFB4F => Script::Hebrew, 0x0600..=0x06FF => Script::Arabic,
191 0x0750..=0x077F => Script::Arabic, 0x08A0..=0x08FF => Script::Arabic, 0xFB50..=0xFDFF => Script::Arabic, 0xFE70..=0xFEFF => Script::Arabic, 0x0700..=0x074F => Script::Syriac,
198 0x0860..=0x086F => Script::Syriac, 0x0780..=0x07BF => Script::Thaana,
202
203 0x0900..=0x097F => Script::Devanagari,
205 0xA8E0..=0xA8FF => Script::Devanagari, 0x0980..=0x09FF => Script::Bengali,
209
210 0x0A00..=0x0A7F => Script::Gurmukhi,
212
213 0x0A80..=0x0AFF => Script::Gujarati,
215
216 0x0B00..=0x0B7F => Script::Oriya,
218
219 0x0B80..=0x0BFF => Script::Tamil,
221
222 0x0C00..=0x0C7F => Script::Telugu,
224
225 0x0C80..=0x0CFF => Script::Kannada,
227
228 0x0D00..=0x0D7F => Script::Malayalam,
230
231 0x0D80..=0x0DFF => Script::Sinhala,
233
234 0x0E00..=0x0E7F => Script::Thai,
236
237 0x0E80..=0x0EFF => Script::Lao,
239
240 0x0F00..=0x0FFF => Script::Tibetan,
242
243 0x1000..=0x109F => Script::Myanmar,
245 0xAA60..=0xAA7F => Script::Myanmar, 0x10A0..=0x10FF => Script::Georgian,
249 0x2D00..=0x2D2F => Script::Georgian, 0x1C90..=0x1CBF => Script::Georgian, 0x1100..=0x11FF => Script::Hangul, 0x3130..=0x318F => Script::Hangul, 0xA960..=0xA97F => Script::Hangul, 0xAC00..=0xD7AF => Script::Hangul, 0xD7B0..=0xD7FF => Script::Hangul, 0x1200..=0x137F => Script::Ethiopic,
261 0x1380..=0x139F => Script::Ethiopic, 0x2D80..=0x2DDF => Script::Ethiopic, 0xAB00..=0xAB2F => Script::Ethiopic, 0x1E00..=0x1EFF => Script::Latin, 0x2C60..=0x2C7F => Script::Latin, 0xA720..=0xA7FF => Script::Latin, 0xAB30..=0xAB6F => Script::Latin, 0xFB00..=0xFB06 => Script::Latin, 0x2E80..=0x2EFF => Script::Han, 0x2F00..=0x2FDF => Script::Han, 0x3400..=0x4DBF => Script::Han, 0x4E00..=0x9FFF => Script::Han, 0xF900..=0xFAFF => Script::Han, 0x20000..=0x2A6DF => Script::Han, 0x2A700..=0x2B73F => Script::Han, 0x2B740..=0x2B81F => Script::Han, 0x2B820..=0x2CEAF => Script::Han, 0x2CEB0..=0x2EBEF => Script::Han, 0x30000..=0x3134F => Script::Han, 0x3040..=0x309F => Script::Hiragana,
287 0x1B001..=0x1B11F => Script::Hiragana, 0x30A0..=0x30FF => Script::Katakana,
291 0x31F0..=0x31FF => Script::Katakana, 0xFF65..=0xFF9F => Script::Katakana, 0x3100..=0x312F => Script::Bopomofo,
296 0x31A0..=0x31BF => Script::Bopomofo, 0x3000..=0x303F => Script::Common,
300
301 0x2000..=0x206F => Script::Common, 0x2070..=0x209F => Script::Common, 0x20A0..=0x20CF => Script::Common, 0x20D0..=0x20FF => Script::Inherited, 0x2100..=0x214F => Script::Common, 0x2150..=0x218F => Script::Common, 0x2190..=0x21FF => Script::Common, 0x2200..=0x22FF => Script::Common, 0x2300..=0x23FF => Script::Common, 0x2400..=0x243F => Script::Common, 0x2440..=0x245F => Script::Common, 0x2460..=0x24FF => Script::Common, 0x2500..=0x257F => Script::Common, 0x2580..=0x259F => Script::Common, 0x25A0..=0x25FF => Script::Common, 0x2600..=0x26FF => Script::Common, 0x2700..=0x27BF => Script::Common, 0x27C0..=0x27EF => Script::Common, 0x27F0..=0x27FF => Script::Common, 0x2800..=0x28FF => Script::Common, 0x2900..=0x297F => Script::Common, 0x2980..=0x29FF => Script::Common, 0x2A00..=0x2AFF => Script::Common, 0x2B00..=0x2BFF => Script::Common, 0xFF01..=0xFF5E => Script::Latin, 0xFF61..=0xFF64 => Script::Common, 0xFE00..=0xFE0F => Script::Inherited, 0xE0100..=0xE01EF => Script::Inherited, 0x1F000..=0x1FAFF => Script::Common, 0xFE10..=0xFE1F => Script::Common, 0xFE20..=0xFE2F => Script::Inherited, 0xFE30..=0xFE4F => Script::Common, 0xFE50..=0xFE6F => Script::Common, 0x07C0..=0x07FF => Script::Arabic, _ => Script::Unknown,
345 }
346}
347
348#[derive(Debug, Clone, PartialEq, Eq)]
356pub struct ScriptRun {
357 pub start: usize,
359 pub end: usize,
361 pub script: Script,
363}
364
365impl ScriptRun {
366 #[inline]
368 pub fn len(&self) -> usize {
369 self.end - self.start
370 }
371
372 #[inline]
374 pub fn is_empty(&self) -> bool {
375 self.start == self.end
376 }
377
378 #[inline]
380 pub fn text<'a>(&self, source: &'a str) -> &'a str {
381 &source[self.start..self.end]
382 }
383}
384
385fn resolve_scripts(chars: &[char]) -> Vec<Script> {
398 let n = chars.len();
399 if n == 0 {
400 return Vec::new();
401 }
402
403 let mut scripts: Vec<Script> = chars.iter().map(|&c| char_script(c)).collect();
404
405 let mut last_specific = Script::Common;
408 for script in &mut scripts {
409 if *script == Script::Inherited {
410 *script = if last_specific.is_common_or_inherited() {
411 Script::Common } else {
413 last_specific
414 };
415 } else if !script.is_common_or_inherited() {
416 last_specific = *script;
417 }
418 }
419
420 let first_specific = scripts
423 .iter()
424 .find(|s| !s.is_common_or_inherited())
425 .copied()
426 .unwrap_or(Script::Latin); for script in &mut scripts {
430 if script.is_common_or_inherited() {
431 *script = first_specific;
432 } else {
433 break;
434 }
435 }
436
437 let mut current = first_specific;
439 for script in &mut scripts {
440 if script.is_common_or_inherited() {
441 *script = current;
442 } else {
443 current = *script;
444 }
445 }
446
447 scripts
448}
449
450pub fn partition_by_script(text: &str) -> Vec<ScriptRun> {
477 if text.is_empty() {
478 return Vec::new();
479 }
480
481 let chars: Vec<char> = text.chars().collect();
482 let resolved = resolve_scripts(&chars);
483
484 let mut runs = Vec::new();
485 let mut byte_offset = 0;
486 let mut run_start = 0;
487 let mut current_script = resolved[0];
488
489 for (i, ch) in chars.iter().enumerate() {
490 let char_len = ch.len_utf8();
491
492 if resolved[i] != current_script {
493 runs.push(ScriptRun {
494 start: run_start,
495 end: byte_offset,
496 script: current_script,
497 });
498 run_start = byte_offset;
499 current_script = resolved[i];
500 }
501
502 byte_offset += char_len;
503 }
504
505 runs.push(ScriptRun {
507 start: run_start,
508 end: byte_offset,
509 script: current_script,
510 });
511
512 runs
513}
514
515#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
524pub enum RunDirection {
525 Ltr,
527 Rtl,
529}
530
531#[derive(Debug, Clone, PartialEq, Eq)]
536pub struct TextRun {
537 pub start: usize,
539 pub end: usize,
541 pub script: Script,
543 pub direction: RunDirection,
545 pub style_id: u64,
549}
550
551impl TextRun {
552 #[inline]
554 pub fn len(&self) -> usize {
555 self.end - self.start
556 }
557
558 #[inline]
560 pub fn is_empty(&self) -> bool {
561 self.start == self.end
562 }
563
564 #[inline]
566 pub fn text<'a>(&self, source: &'a str) -> &'a str {
567 &source[self.start..self.end]
568 }
569
570 #[inline]
572 pub fn cache_key<'a>(&self, source: &'a str) -> RunCacheKey<'a> {
573 RunCacheKey {
574 text: self.text(source),
575 script: self.script,
576 direction: self.direction,
577 style_id: self.style_id,
578 }
579 }
580}
581
582#[derive(Debug, Clone, PartialEq, Eq)]
591pub struct RunCacheKey<'a> {
592 pub text: &'a str,
594 pub script: Script,
596 pub direction: RunDirection,
598 pub style_id: u64,
600}
601
602impl Hash for RunCacheKey<'_> {
603 fn hash<H: Hasher>(&self, state: &mut H) {
604 self.text.hash(state);
605 self.script.hash(state);
606 self.direction.hash(state);
607 self.style_id.hash(state);
608 }
609}
610
611pub fn partition_text_runs(
640 text: &str,
641 direction_fn: Option<&dyn Fn(usize) -> RunDirection>,
642 style_fn: Option<&dyn Fn(usize) -> u64>,
643) -> Vec<TextRun> {
644 if text.is_empty() {
645 return Vec::new();
646 }
647
648 let script_runs = partition_by_script(text);
649
650 let default_direction = |script: Script| -> RunDirection {
651 if script.is_rtl() {
652 RunDirection::Rtl
653 } else {
654 RunDirection::Ltr
655 }
656 };
657
658 let mut runs = Vec::new();
659
660 for sr in &script_runs {
661 let sub_text = &text[sr.start..sr.end];
663 let mut sub_start = sr.start;
664
665 let first_dir = direction_fn
666 .as_ref()
667 .map_or_else(|| default_direction(sr.script), |f| f(sr.start));
668 let first_style = style_fn.as_ref().map_or(0u64, |f| f(sr.start));
669
670 let mut current_dir = first_dir;
671 let mut current_style = first_style;
672
673 for (i, ch) in sub_text.char_indices() {
674 let byte_pos = sr.start + i;
675 let dir = direction_fn
676 .as_ref()
677 .map_or_else(|| default_direction(sr.script), |f| f(byte_pos));
678 let style = style_fn.as_ref().map_or(0u64, |f| f(byte_pos));
679
680 if dir != current_dir || style != current_style {
681 if byte_pos > sub_start {
683 runs.push(TextRun {
684 start: sub_start,
685 end: byte_pos,
686 script: sr.script,
687 direction: current_dir,
688 style_id: current_style,
689 });
690 }
691 sub_start = byte_pos;
692 current_dir = dir;
693 current_style = style;
694 }
695
696 let _ = ch;
698 }
699
700 if sr.end > sub_start {
702 runs.push(TextRun {
703 start: sub_start,
704 end: sr.end,
705 script: sr.script,
706 direction: current_dir,
707 style_id: current_style,
708 });
709 }
710 }
711
712 runs
713}
714
715#[cfg(test)]
720mod tests {
721 use super::*;
722
723 #[test]
728 fn script_ascii_letters() {
729 assert_eq!(char_script('A'), Script::Latin);
730 assert_eq!(char_script('z'), Script::Latin);
731 assert_eq!(char_script('M'), Script::Latin);
732 }
733
734 #[test]
735 fn script_ascii_digits_are_common() {
736 for d in '0'..='9' {
737 assert_eq!(char_script(d), Script::Common, "digit {d}");
738 }
739 }
740
741 #[test]
742 fn script_ascii_punctuation_is_common() {
743 for &c in &[' ', '!', '.', ',', ':', ';', '?', '-', '(', ')', '[', ']'] {
744 assert_eq!(char_script(c), Script::Common, "char {c:?}");
745 }
746 }
747
748 #[test]
749 fn script_latin_extended() {
750 assert_eq!(char_script('\u{00C0}'), Script::Latin); assert_eq!(char_script('\u{00E9}'), Script::Latin); assert_eq!(char_script('\u{0148}'), Script::Latin); assert_eq!(char_script('\u{1E00}'), Script::Latin); }
755
756 #[test]
757 fn script_greek() {
758 assert_eq!(char_script('\u{0391}'), Script::Greek); assert_eq!(char_script('\u{03B1}'), Script::Greek); assert_eq!(char_script('\u{03C9}'), Script::Greek); }
762
763 #[test]
764 fn script_cyrillic() {
765 assert_eq!(char_script('\u{0410}'), Script::Cyrillic); assert_eq!(char_script('\u{044F}'), Script::Cyrillic); }
768
769 #[test]
770 fn script_hebrew() {
771 assert_eq!(char_script('\u{05D0}'), Script::Hebrew); assert_eq!(char_script('\u{05EA}'), Script::Hebrew); }
774
775 #[test]
776 fn script_arabic() {
777 assert_eq!(char_script('\u{0627}'), Script::Arabic); assert_eq!(char_script('\u{0645}'), Script::Arabic); }
780
781 #[test]
782 fn script_devanagari() {
783 assert_eq!(char_script('\u{0905}'), Script::Devanagari); assert_eq!(char_script('\u{0939}'), Script::Devanagari); }
786
787 #[test]
788 fn script_thai() {
789 assert_eq!(char_script('\u{0E01}'), Script::Thai); assert_eq!(char_script('\u{0E3F}'), Script::Thai); }
792
793 #[test]
794 fn script_hangul() {
795 assert_eq!(char_script('\u{AC00}'), Script::Hangul); assert_eq!(char_script('\u{D7A3}'), Script::Hangul); }
798
799 #[test]
800 fn script_cjk_han() {
801 assert_eq!(char_script('\u{4E00}'), Script::Han); assert_eq!(char_script('\u{9FFF}'), Script::Han); }
804
805 #[test]
806 fn script_hiragana_katakana() {
807 assert_eq!(char_script('\u{3042}'), Script::Hiragana); assert_eq!(char_script('\u{30A2}'), Script::Katakana); }
810
811 #[test]
812 fn script_combining_marks_are_inherited() {
813 assert_eq!(char_script('\u{0300}'), Script::Inherited); assert_eq!(char_script('\u{0301}'), Script::Inherited); assert_eq!(char_script('\u{036F}'), Script::Inherited); }
817
818 #[test]
819 fn script_rtl_detection() {
820 assert!(Script::Arabic.is_rtl());
821 assert!(Script::Hebrew.is_rtl());
822 assert!(Script::Syriac.is_rtl());
823 assert!(Script::Thaana.is_rtl());
824 assert!(!Script::Latin.is_rtl());
825 assert!(!Script::Han.is_rtl());
826 assert!(!Script::Common.is_rtl());
827 }
828
829 #[test]
830 fn script_common_or_inherited() {
831 assert!(Script::Common.is_common_or_inherited());
832 assert!(Script::Inherited.is_common_or_inherited());
833 assert!(!Script::Latin.is_common_or_inherited());
834 assert!(!Script::Arabic.is_common_or_inherited());
835 }
836
837 #[test]
842 fn resolve_empty() {
843 assert!(resolve_scripts(&[]).is_empty());
844 }
845
846 #[test]
847 fn resolve_pure_latin() {
848 let chars: Vec<char> = "Hello".chars().collect();
849 let resolved = resolve_scripts(&chars);
850 assert!(resolved.iter().all(|&s| s == Script::Latin));
851 }
852
853 #[test]
854 fn resolve_common_absorbed_by_latin() {
855 let chars: Vec<char> = "Hi 42!".chars().collect();
857 let resolved = resolve_scripts(&chars);
858 assert!(
859 resolved.iter().all(|&s| s == Script::Latin),
860 "All should be Latin: {resolved:?}"
861 );
862 }
863
864 #[test]
865 fn resolve_leading_space() {
866 let chars: Vec<char> = " Hello".chars().collect();
868 let resolved = resolve_scripts(&chars);
869 assert_eq!(resolved[0], Script::Latin);
870 }
871
872 #[test]
873 fn resolve_combining_mark_inherits() {
874 let chars: Vec<char> = "e\u{0301}".chars().collect();
876 let resolved = resolve_scripts(&chars);
877 assert_eq!(resolved[0], Script::Latin);
878 assert_eq!(
879 resolved[1],
880 Script::Latin,
881 "combining mark should inherit Latin"
882 );
883 }
884
885 #[test]
886 fn resolve_mixed_scripts() {
887 let text = "Hello \u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
889 let chars: Vec<char> = text.chars().collect();
890 let resolved = resolve_scripts(&chars);
891
892 for (i, script) in resolved.iter().enumerate().take(5) {
894 assert_eq!(*script, Script::Latin, "char {i}");
895 }
896 assert_eq!(resolved[5], Script::Latin, "space");
898 for (i, script) in resolved.iter().enumerate().take(11).skip(6) {
900 assert_eq!(*script, Script::Arabic, "char {i}");
901 }
902 }
903
904 #[test]
905 fn resolve_all_common_defaults_to_latin() {
906 let chars: Vec<char> = "123 !?".chars().collect();
907 let resolved = resolve_scripts(&chars);
908 assert!(
909 resolved.iter().all(|&s| s == Script::Latin),
910 "All-Common should default to Latin"
911 );
912 }
913
914 #[test]
919 fn partition_empty() {
920 assert!(partition_by_script("").is_empty());
921 }
922
923 #[test]
924 fn partition_pure_latin() {
925 let runs = partition_by_script("Hello World");
926 assert_eq!(runs.len(), 1);
927 assert_eq!(runs[0].script, Script::Latin);
928 assert_eq!(runs[0].start, 0);
929 assert_eq!(runs[0].end, 11);
930 assert_eq!(runs[0].text("Hello World"), "Hello World");
931 }
932
933 #[test]
934 fn partition_pure_arabic() {
935 let text = "\u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
936 let runs = partition_by_script(text);
937 assert_eq!(runs.len(), 1);
938 assert_eq!(runs[0].script, Script::Arabic);
939 }
940
941 #[test]
942 fn partition_latin_then_arabic() {
943 let text = "Hello \u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
944 let runs = partition_by_script(text);
945 assert!(runs.len() >= 2, "runs: {runs:?}");
946
947 assert_eq!(runs[0].script, Script::Latin);
949 assert!(runs[0].text(text).starts_with("Hello"));
950
951 let last = runs.last().unwrap();
953 assert_eq!(last.script, Script::Arabic);
954 }
955
956 #[test]
957 fn partition_latin_cjk_latin() {
958 let text = "Hello\u{4E16}\u{754C}World";
959 let runs = partition_by_script(text);
960 assert_eq!(runs.len(), 3, "runs: {runs:?}");
961 assert_eq!(runs[0].script, Script::Latin);
962 assert_eq!(runs[1].script, Script::Han);
963 assert_eq!(runs[2].script, Script::Latin);
964 }
965
966 #[test]
967 fn partition_japanese_mixed() {
968 let text = "\u{3053}\u{3093}\u{306B}\u{3061}\u{306F}\u{4E16}\u{754C}\u{30A2}";
970 let runs = partition_by_script(text);
971 assert!(runs.len() >= 2, "runs: {runs:?}");
972
973 let scripts: Vec<Script> = runs.iter().map(|r| r.script).collect();
975 assert!(scripts.contains(&Script::Hiragana));
976 assert!(scripts.contains(&Script::Han));
977 assert!(scripts.contains(&Script::Katakana));
978 }
979
980 #[test]
981 fn partition_runs_cover_full_text() {
982 let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World \u{4E16}\u{754C}";
983 let runs = partition_by_script(text);
984
985 assert_eq!(runs[0].start, 0);
987 assert_eq!(runs.last().unwrap().end, text.len());
988 for window in runs.windows(2) {
989 assert_eq!(
990 window[0].end, window[1].start,
991 "runs must be contiguous: {:?}",
992 window
993 );
994 }
995 }
996
997 #[test]
998 fn partition_run_text_slicing() {
999 let text = "ABCdef";
1000 let runs = partition_by_script(text);
1001 let reconstructed: String = runs.iter().map(|r| r.text(text)).collect();
1002 assert_eq!(reconstructed, text);
1003 }
1004
1005 #[test]
1006 fn partition_combining_mark_stays_with_base() {
1007 let text = "e\u{0301}";
1009 let runs = partition_by_script(text);
1010 assert_eq!(runs.len(), 1);
1011 assert_eq!(runs[0].script, Script::Latin);
1012 }
1013
1014 #[test]
1015 fn partition_digits_absorbed() {
1016 let runs = partition_by_script("Item 42");
1018 assert_eq!(runs.len(), 1);
1019 assert_eq!(runs[0].script, Script::Latin);
1020 }
1021
1022 #[test]
1027 fn text_runs_empty() {
1028 assert!(partition_text_runs("", None, None).is_empty());
1029 }
1030
1031 #[test]
1032 fn text_runs_simple_latin() {
1033 let runs = partition_text_runs("Hello World", None, None);
1034 assert_eq!(runs.len(), 1);
1035 assert_eq!(runs[0].script, Script::Latin);
1036 assert_eq!(runs[0].direction, RunDirection::Ltr);
1037 assert_eq!(runs[0].style_id, 0);
1038 }
1039
1040 #[test]
1041 fn text_runs_arabic_direction() {
1042 let text = "\u{0645}\u{0631}\u{062D}\u{0628}\u{0627}";
1043 let runs = partition_text_runs(text, None, None);
1044 assert_eq!(runs.len(), 1);
1045 assert_eq!(runs[0].script, Script::Arabic);
1046 assert_eq!(runs[0].direction, RunDirection::Rtl);
1047 }
1048
1049 #[test]
1050 fn text_runs_mixed_scripts() {
1051 let text = "Hello\u{4E16}\u{754C}World";
1052 let runs = partition_text_runs(text, None, None);
1053 assert_eq!(runs.len(), 3);
1054 assert_eq!(runs[0].direction, RunDirection::Ltr);
1055 assert_eq!(runs[1].direction, RunDirection::Ltr);
1056 assert_eq!(runs[2].direction, RunDirection::Ltr);
1057 }
1058
1059 #[test]
1060 fn text_runs_style_split() {
1061 let text = "Hello World";
1062 let style_fn = |offset: usize| -> u64 { if offset < 5 { 1 } else { 2 } };
1064 let runs = partition_text_runs(text, None, Some(&style_fn));
1065 assert_eq!(runs.len(), 2, "runs: {runs:?}");
1066 assert_eq!(runs[0].style_id, 1);
1067 assert_eq!(runs[0].text(text), "Hello");
1068 assert_eq!(runs[1].style_id, 2);
1069 assert_eq!(runs[1].text(text), " World");
1070 }
1071
1072 #[test]
1073 fn text_runs_direction_override() {
1074 let text = "ABC";
1075 let dir_fn = |_offset: usize| -> RunDirection { RunDirection::Rtl };
1077 let runs = partition_text_runs(text, Some(&dir_fn), None);
1078 assert_eq!(runs.len(), 1);
1079 assert_eq!(runs[0].direction, RunDirection::Rtl);
1080 }
1081
1082 #[test]
1083 fn text_runs_cover_full_text() {
1084 let text = "Hello \u{05E9}\u{05DC}\u{05D5}\u{05DD} World";
1085 let runs = partition_text_runs(text, None, None);
1086
1087 assert_eq!(runs[0].start, 0);
1088 assert_eq!(runs.last().unwrap().end, text.len());
1089 for window in runs.windows(2) {
1090 assert_eq!(window[0].end, window[1].start);
1091 }
1092
1093 let reconstructed: String = runs.iter().map(|r| r.text(text)).collect();
1094 assert_eq!(reconstructed, text);
1095 }
1096
1097 #[test]
1102 fn cache_key_equality() {
1103 let text = "Hello";
1104 let run = TextRun {
1105 start: 0,
1106 end: 5,
1107 script: Script::Latin,
1108 direction: RunDirection::Ltr,
1109 style_id: 0,
1110 };
1111
1112 let k1 = run.cache_key(text);
1113 let k2 = run.cache_key(text);
1114 assert_eq!(k1, k2);
1115 }
1116
1117 #[test]
1118 fn cache_key_differs_by_script() {
1119 let k1 = RunCacheKey {
1120 text: "abc",
1121 script: Script::Latin,
1122 direction: RunDirection::Ltr,
1123 style_id: 0,
1124 };
1125 let k2 = RunCacheKey {
1126 text: "abc",
1127 script: Script::Greek,
1128 direction: RunDirection::Ltr,
1129 style_id: 0,
1130 };
1131 assert_ne!(k1, k2);
1132 }
1133
1134 #[test]
1135 fn cache_key_differs_by_direction() {
1136 let k1 = RunCacheKey {
1137 text: "abc",
1138 script: Script::Latin,
1139 direction: RunDirection::Ltr,
1140 style_id: 0,
1141 };
1142 let k2 = RunCacheKey {
1143 text: "abc",
1144 script: Script::Latin,
1145 direction: RunDirection::Rtl,
1146 style_id: 0,
1147 };
1148 assert_ne!(k1, k2);
1149 }
1150
1151 #[test]
1152 fn cache_key_differs_by_style() {
1153 let k1 = RunCacheKey {
1154 text: "abc",
1155 script: Script::Latin,
1156 direction: RunDirection::Ltr,
1157 style_id: 0,
1158 };
1159 let k2 = RunCacheKey {
1160 text: "abc",
1161 script: Script::Latin,
1162 direction: RunDirection::Ltr,
1163 style_id: 1,
1164 };
1165 assert_ne!(k1, k2);
1166 }
1167
1168 #[test]
1169 fn cache_key_hashable() {
1170 use std::collections::HashSet;
1171 let mut set = HashSet::new();
1172 let k = RunCacheKey {
1173 text: "hello",
1174 script: Script::Latin,
1175 direction: RunDirection::Ltr,
1176 style_id: 0,
1177 };
1178 set.insert(k.clone());
1179 assert!(set.contains(&k));
1180 }
1181
1182 #[test]
1187 fn single_char() {
1188 let runs = partition_by_script("A");
1189 assert_eq!(runs.len(), 1);
1190 assert_eq!(runs[0].script, Script::Latin);
1191 assert_eq!(runs[0].start, 0);
1192 assert_eq!(runs[0].end, 1);
1193 }
1194
1195 #[test]
1196 fn only_spaces() {
1197 let runs = partition_by_script(" ");
1198 assert_eq!(runs.len(), 1);
1199 assert_eq!(runs[0].script, Script::Latin);
1201 }
1202
1203 #[test]
1204 fn emoji_is_common() {
1205 let text = "Hello \u{1F600} World";
1207 let runs = partition_by_script(text);
1208 assert_eq!(runs.len(), 1);
1210 assert_eq!(runs[0].script, Script::Latin);
1211 }
1212
1213 #[test]
1214 fn multibyte_utf8_offsets() {
1215 let text = "\u{00E9}\u{4E00}";
1218 let runs = partition_by_script(text);
1219 assert!(runs.len() >= 2);
1220 assert_eq!(runs[0].end, 2); assert_eq!(runs[1].start, 2);
1222 assert_eq!(runs[1].end, 5); }
1224
1225 #[test]
1226 fn text_run_len_and_empty() {
1227 let run = TextRun {
1228 start: 5,
1229 end: 10,
1230 script: Script::Latin,
1231 direction: RunDirection::Ltr,
1232 style_id: 0,
1233 };
1234 assert_eq!(run.len(), 5);
1235 assert!(!run.is_empty());
1236
1237 let empty = TextRun {
1238 start: 5,
1239 end: 5,
1240 script: Script::Latin,
1241 direction: RunDirection::Ltr,
1242 style_id: 0,
1243 };
1244 assert_eq!(empty.len(), 0);
1245 assert!(empty.is_empty());
1246 }
1247
1248 #[test]
1249 fn script_run_len_and_empty() {
1250 let run = ScriptRun {
1251 start: 0,
1252 end: 5,
1253 script: Script::Latin,
1254 };
1255 assert_eq!(run.len(), 5);
1256 assert!(!run.is_empty());
1257 }
1258
1259 #[test]
1260 fn script_enum_ord() {
1261 let mut scripts = [Script::Arabic, Script::Latin, Script::Common];
1263 scripts.sort();
1264 assert_eq!(scripts[0], Script::Common);
1265 }
1266
1267 #[test]
1268 fn many_script_transitions() {
1269 let text = "Hello\u{0391}\u{0392}\u{0410}\u{0411}\u{05D0}\u{05D1}\u{0627}\u{0628}";
1271 let runs = partition_by_script(text);
1272
1273 let scripts: Vec<Script> = runs.iter().map(|r| r.script).collect();
1274 assert!(scripts.contains(&Script::Latin));
1275 assert!(scripts.contains(&Script::Greek));
1276 assert!(scripts.contains(&Script::Cyrillic));
1277 assert!(scripts.contains(&Script::Hebrew));
1278 assert!(scripts.contains(&Script::Arabic));
1279
1280 for window in runs.windows(2) {
1282 assert_eq!(window[0].end, window[1].start);
1283 }
1284 }
1285}