1#![cfg_attr(not(test), no_std)]
5#![cfg_attr(feature = "bench", feature(test))]
6
7mod tables;
8
9use core::convert::{TryFrom, TryInto};
10use core::fmt;
11use core::u64;
12pub use tables::script_extensions;
13use tables::{get_script, get_script_extension, NEXT_SCRIPT};
14pub use tables::{Script, UNICODE_VERSION};
15
16impl Script {
17 pub fn full_name(self) -> &'static str {
19 self.inner_full_name()
20 }
21
22 pub fn from_full_name(input: &str) -> Option<Self> {
26 Self::inner_from_full_name(input)
27 }
28
29 pub fn short_name(self) -> &'static str {
31 self.inner_short_name()
32 }
33
34 pub fn from_short_name(input: &str) -> Option<Self> {
38 Self::inner_from_short_name(input)
39 }
40
41 pub fn as_iso15924_tag(self) -> u32 {
43 let arr: [u8; 4] = self.inner_short_name().as_bytes().try_into().unwrap();
44 u32::from_be_bytes(arr)
45 }
46
47 pub fn is_recommended(self) -> bool {
50 use Script::*;
51 match self {
52 Common | Inherited | Arabic | Armenian | Bengali | Bopomofo | Cyrillic | Devanagari
53 | Ethiopic | Georgian | Greek | Gujarati | Gurmukhi | Han | Hangul | Hebrew
54 | Hiragana | Kannada | Katakana | Khmer | Lao | Latin | Malayalam | Myanmar | Oriya
55 | Sinhala | Tamil | Telugu | Thaana | Thai | Tibetan => true,
56 _ => false,
57 }
58 }
59}
60
61impl From<Script> for ScriptExtension {
62 fn from(script: Script) -> Self {
63 if script == Script::Common {
64 ScriptExtension::new_common()
65 } else if script == Script::Inherited {
66 ScriptExtension::new_inherited()
67 } else if script == Script::Unknown {
68 ScriptExtension::new_unknown()
69 } else {
70 let mut first = 0;
71 let mut second = 0;
72 let mut third = 0;
73 let bit = script as u8;
74 if bit < 64 {
76 first = 1 << bit as u64;
77 } else if bit < 128 {
78 second = 1 << (bit - 64) as u64;
81 } else {
82 third = 1 << (bit - 128) as u32;
83 }
84 ScriptExtension::new(first, second, third)
85 }
86 }
87}
88
89impl TryFrom<ScriptExtension> for Script {
90 type Error = ();
91 fn try_from(ext: ScriptExtension) -> Result<Self, ()> {
92 if ext.is_common_or_inherited() {
93 if ext.common {
94 Ok(Script::Common)
95 } else {
96 Ok(Script::Inherited)
97 }
98 } else if ext.is_empty() {
99 Ok(Script::Unknown)
100 } else {
101 let fo = ext.first.count_ones();
103 let so = ext.second.count_ones();
104 let to = ext.third.count_ones();
105 if fo == 1 && so == 0 && to == 0 {
107 Ok(Script::for_integer(ext.first.trailing_zeros() as u8))
109 } else if fo == 0 && so == 1 && to == 0 {
111 Ok(Script::for_integer(64 + ext.second.trailing_zeros() as u8))
112 } else if fo == 0 && so == 0 && to == 1 {
114 Ok(Script::for_integer(128 + ext.third.trailing_zeros() as u8))
115 } else {
116 Err(())
117 }
118 }
119 }
120}
121
122impl Default for Script {
123 fn default() -> Self {
124 Script::Common
125 }
126}
127
128impl From<char> for Script {
129 fn from(o: char) -> Self {
130 o.script()
131 }
132}
133
134impl fmt::Display for Script {
135 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
136 write!(f, "{}", self.full_name())
137 }
138}
139
140#[derive(Clone, Copy, PartialEq, Eq, Hash)]
141#[non_exhaustive]
142pub struct ScriptExtension {
148 first: u64,
150 second: u64,
152 third: u64,
154 common: bool,
157}
158
159impl ScriptExtension {
160 const THIRD_MAX: u64 = ((1 << (NEXT_SCRIPT - 128)) - 1);
165
166 pub(crate) const fn new(first: u64, second: u64, third: u64) -> Self {
167 ScriptExtension {
168 first,
169 second,
170 third,
171 common: false,
172 }
173 }
174
175 pub(crate) const fn new_common() -> Self {
176 ScriptExtension {
177 first: u64::MAX,
178 second: u64::MAX,
179 third: Self::THIRD_MAX,
180 common: true,
181 }
182 }
183
184 pub(crate) const fn new_inherited() -> Self {
185 ScriptExtension {
186 first: u64::MAX,
187 second: u64::MAX,
188 third: Self::THIRD_MAX,
189 common: false,
190 }
191 }
192
193 pub(crate) const fn new_unknown() -> Self {
194 ScriptExtension {
195 first: 0,
196 second: 0,
197 third: 0,
198 common: false,
199 }
200 }
201
202 const fn is_common_or_inherited(self) -> bool {
203 (self.first == u64::MAX) & (self.second == u64::MAX) & (self.third == Self::THIRD_MAX)
204 }
205
206 pub const fn is_common(self) -> bool {
208 self.is_common_or_inherited() & self.common
209 }
210
211 pub const fn is_inherited(self) -> bool {
213 self.is_common_or_inherited() & !self.common
214 }
215
216 pub const fn is_empty(self) -> bool {
218 (self.first == 0) & (self.second == 0) & (self.third == 0)
219 }
220
221 pub fn len(self) -> usize {
223 if self.is_common_or_inherited() {
224 1
225 } else {
226 (self.first.count_ones() + self.second.count_ones() + self.third.count_ones()) as usize
227 }
228 }
229
230 pub fn intersect_with(&mut self, other: Self) {
237 *self = self.intersection(other)
238 }
239
240 pub const fn intersection(self, other: Self) -> Self {
246 let first = self.first & other.first;
247 let second = self.second & other.second;
248 let third = self.third & other.third;
249 let common = self.common & other.common;
250 ScriptExtension {
251 first,
252 second,
253 third,
254 common,
255 }
256 }
257
258 pub const fn union(self, other: Self) -> Self {
263 let first = self.first | other.first;
264 let second = self.second | other.second;
265 let third = self.third | other.third;
266 let common = self.common | other.common;
267 ScriptExtension {
268 first,
269 second,
270 third,
271 common,
272 }
273 }
274
275 pub fn contains_script(self, script: Script) -> bool {
281 !self.intersection(script.into()).is_empty()
282 }
283
284 pub fn for_str(x: &str) -> Self {
287 let mut ext = ScriptExtension::default();
288 for ch in x.chars() {
289 ext.intersect_with(ch.into());
290 }
291 ext
292 }
293
294 pub fn iter(self) -> ScriptIterator {
298 ScriptIterator { ext: self }
299 }
300}
301
302impl Default for ScriptExtension {
303 fn default() -> Self {
304 ScriptExtension::new_common()
305 }
306}
307
308impl From<char> for ScriptExtension {
309 fn from(o: char) -> Self {
310 o.script_extension()
311 }
312}
313
314impl From<&'_ str> for ScriptExtension {
315 fn from(o: &'_ str) -> Self {
316 Self::for_str(o)
317 }
318}
319
320impl fmt::Debug for ScriptExtension {
321 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
322 write!(f, "ScriptExtension(")?;
323 fmt::Display::fmt(self, f)?;
324 write!(f, ")")
325 }
326}
327
328impl fmt::Display for ScriptExtension {
329 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
330 if self.is_common() {
331 write!(f, "Common")?;
332 } else if self.is_inherited() {
333 write!(f, "Inherited")?;
334 } else if self.is_empty() {
335 write!(f, "Unknown")?;
336 } else {
337 let mut first = true;
338 for script in self.iter() {
339 if first {
340 first = false;
341 } else {
342 write!(f, " + ")?;
343 }
344 script.full_name().fmt(f)?;
345 }
346 }
347 Ok(())
348 }
349}
350
351pub trait UnicodeScript {
353 fn script(&self) -> Script;
355 fn script_extension(&self) -> ScriptExtension;
357}
358
359impl UnicodeScript for char {
360 fn script(&self) -> Script {
361 get_script(*self).unwrap_or(Script::Unknown)
362 }
363
364 fn script_extension(&self) -> ScriptExtension {
365 get_script_extension(*self).unwrap_or_else(|| self.script().into())
366 }
367}
368
369pub struct ScriptIterator {
373 ext: ScriptExtension,
374}
375
376impl Iterator for ScriptIterator {
377 type Item = Script;
378
379 fn next(&mut self) -> Option<Script> {
380 if self.ext.is_common_or_inherited() {
381 let common = self.ext.common;
382 self.ext = ScriptExtension::new_unknown();
383 if common {
384 Some(Script::Common)
385 } else {
386 Some(Script::Inherited)
387 }
388 } else if self.ext.first != 0 {
390 let bit = self.ext.first.trailing_zeros();
392 self.ext.first &= !(1 << bit);
394 Some(Script::for_integer(bit as u8))
395 } else if self.ext.second != 0 {
397 let bit = self.ext.second.trailing_zeros();
398 self.ext.second &= !(1 << bit);
399 Some(Script::for_integer(64 + bit as u8))
400 } else if self.ext.third != 0 {
402 let bit = self.ext.third.trailing_zeros();
403 self.ext.third &= !(1 << bit);
404 Some(Script::for_integer(128 + bit as u8))
405 } else {
406 None
408 }
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use crate::*;
415 use std::collections::HashSet;
416 use std::convert::TryInto;
417
418 #[cfg(feature = "bench")]
419 use test::bench::Bencher;
420 #[cfg(feature = "bench")]
421 extern crate test;
422
423 #[test]
424 fn test_conversion() {
425 let mut seen_scripts = HashSet::new();
426 let mut seen_exts = HashSet::new();
427 for bit in 0..NEXT_SCRIPT {
428 let script = Script::for_integer(bit);
429 let ext = script.into();
430 if seen_scripts.contains(&script) {
431 panic!("Found script {:?} twice!", script)
432 }
433 if seen_exts.contains(&ext) {
434 panic!("Found extension {:?} twice!", ext)
435 }
436 seen_scripts.insert(script);
437 seen_exts.insert(ext);
438 assert_eq!(script as u8, bit);
439 assert!(!ScriptExtension::new_common().intersection(ext).is_empty());
440 assert!(!ScriptExtension::new_inherited()
441 .intersection(ext)
442 .is_empty());
443 assert!(ScriptExtension::new_unknown().intersection(ext).is_empty());
444 assert_eq!(ext.iter().collect::<Vec<_>>(), vec![script]);
445 assert_eq!(Ok(script), ext.try_into());
446 }
447 }
448
449 #[test]
450 fn test_specific() {
451 let s = "सवव मानवी व्यद्क् जन्मतःच स्वतींत्र आहेत व त्ाींना समान प्रवतष्ठा व समान अविकार आहेत. त्ाींना ववचारशद्क् व सवविे कबुद्द्धलाभलेली आहे. व त्ाींनी एकमेकाींशी बींिुत्वाचाभावनेने आचरण करावे.";
452 let ext = ScriptExtension::for_str(s);
453 assert_eq!(ext, script_extensions::DEVA);
454 println!(
455 "{:?}",
456 script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH
457 );
458 println!(
459 "{:?}",
460 ext.intersection(
461 script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH
462 )
463 );
464 assert!(!ext
465 .intersection(script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH)
466 .is_empty());
467
468 let u = ext.union(Script::Dogra.into());
469 assert_eq!(
470 u.intersection(
471 script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH
472 ),
473 u
474 );
475 }
476
477 #[test]
478 fn test_specific_ext() {
479 let ext = script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH;
480
481 let all: HashSet<_> = ext.iter().collect();
482
483 for bit in 0..NEXT_SCRIPT {
484 let script = Script::for_integer(bit);
485
486 if all.contains(&script) {
487 assert!(ext.contains_script(script))
488 } else {
489 assert!(!ext.contains_script(script))
490 }
491 }
492
493 assert!(ext.contains_script(Script::Devanagari));
494 assert!(ext.contains_script(Script::Dogra));
495 assert!(ext.contains_script(Script::Gujarati));
496 assert!(ext.contains_script(Script::Gurmukhi));
497 assert!(ext.contains_script(Script::Khojki));
498 assert!(ext.contains_script(Script::Kaithi));
499 assert!(ext.contains_script(Script::Mahajani));
500 assert!(ext.contains_script(Script::Modi));
501 assert!(ext.contains_script(Script::Khudawadi));
502 assert!(ext.contains_script(Script::Takri));
503 assert!(ext.contains_script(Script::Tirhuta));
504
505 let scr: Result<Script, _> = ext.try_into();
506 assert!(scr.is_err());
507 }
508
509 #[cfg(feature = "bench")]
510 #[bench]
511 fn bench_script_intersection(b: &mut Bencher) {
512 b.iter(|| {
513 let script = test::black_box(Script::Devanagari);
514 let ext = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH);
515 test::black_box(ext.intersection(script.into()));
516 })
517 }
518
519 #[cfg(feature = "bench")]
520 #[bench]
521 fn bench_ext_to_script(b: &mut Bencher) {
522 let ext: ScriptExtension = Script::Devanagari.into();
523 b.iter(|| {
524 let ext = test::black_box(ext);
525 let script: Result<Script, _> = ext.try_into();
526 let _ = test::black_box(script);
527 })
528 }
529
530 #[cfg(feature = "bench")]
531 #[bench]
532 fn bench_script_to_ext(b: &mut Bencher) {
533 b.iter(|| {
534 let script = test::black_box(Script::Devanagari);
535 let ext: ScriptExtension = script.into();
536 test::black_box(ext);
537 })
538 }
539
540 #[cfg(feature = "bench")]
541 #[bench]
542 fn bench_ext_intersection(b: &mut Bencher) {
543 b.iter(|| {
544 let e1 = test::black_box(script_extensions::ARAB_GARA_NKOO_ROHG_SYRC_THAA_YEZI);
545 let e2 = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH);
546 test::black_box(e2.intersection(e1));
547 })
548 }
549
550 #[cfg(feature = "bench")]
551 #[bench]
552 fn bench_to_vec(b: &mut Bencher) {
553 b.iter(|| {
554 let ext = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH);
555 test::black_box(ext.iter().collect::<Vec<_>>());
556 })
557 }
558
559 #[cfg(feature = "bench")]
560 #[bench]
561 fn bench_string_ext(b: &mut Bencher) {
562 b.iter(|| {
563 let s = test::black_box("सवव मानवी व्यद्क् जन्मतःच स्वतींत्र आहेत व त्ाींना समान प्रवतष्ठा व समान अविकार आहेत. त्ाींना ववचारशद्क् व सवविे कबुद्द्धलाभलेली आहे. व त्ाींनी एकमेकाींशी बींिुत्वाचाभावनेने आचरण करावे.");
564 test::black_box(ScriptExtension::for_str(s));
565 })
566 }
567}