1use crate::common;
17
18#[inline]
26fn after(s: &str, offset: usize) -> Option<char> {
27 s.chars().nth(offset + 1)
28}
29
30#[inline]
38fn before(s: &str, offset: usize) -> Option<char> {
39 if offset == 0 {
40 None
41 } else {
42 s.chars().nth(offset - 1)
43 }
44}
45
46#[derive(Debug, PartialEq, Eq)]
48pub enum ContextRuleError {
49 NotApplicable,
51 Undefined,
54}
55
56pub fn rule_zero_width_nonjoiner(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
70 if 0x200c != s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32 {
71 return Err(ContextRuleError::NotApplicable);
72 }
73
74 let mut prev = before(s, offset).ok_or(ContextRuleError::Undefined)?;
75 let mut cp = prev as u32;
76 if common::is_virama(cp) {
77 return Ok(true);
78 }
79
80 let mut i = offset - 1;
85 while common::is_transparent(cp) {
86 prev = before(s, i).ok_or(ContextRuleError::Undefined)?;
87 cp = prev as u32;
88 i -= 1;
89 }
90
91 if !(common::is_left_joining(cp) || common::is_dual_joining(cp)) {
93 return Ok(false);
94 }
95
96 let mut next = after(s, offset).ok_or(ContextRuleError::Undefined)?;
98 cp = next as u32;
99 i = offset + 1;
100 while common::is_transparent(cp) {
101 next = after(s, i).ok_or(ContextRuleError::Undefined)?;
102 cp = next as u32;
103 i += 1;
104 }
105
106 Ok(common::is_right_joining(cp) || common::is_dual_joining(cp))
108}
109
110pub fn rule_zero_width_joiner(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
121 if 0x200d != s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32 {
122 return Err(ContextRuleError::NotApplicable);
123 }
124 let prev = before(s, offset).ok_or(ContextRuleError::Undefined)?;
125 Ok(common::is_virama(prev as u32))
126}
127
128pub fn rule_middle_dot(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
138 if 0x00b7 != s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32 {
139 return Err(ContextRuleError::NotApplicable);
140 }
141 let prev = before(s, offset).ok_or(ContextRuleError::Undefined)?;
142 let next = after(s, offset).ok_or(ContextRuleError::Undefined)?;
143 Ok(prev as u32 == 0x006c && next as u32 == 0x006c)
144}
145
146pub fn rule_greek_lower_numeral_sign_keraia(
155 s: &str,
156 offset: usize,
157) -> Result<bool, ContextRuleError> {
158 if 0x0375 != s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32 {
159 return Err(ContextRuleError::NotApplicable);
160 }
161 let after = after(s, offset).ok_or(ContextRuleError::Undefined)?;
162 Ok(common::is_greek(after as u32))
163}
164
165pub fn rule_hebrew_punctuation(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
175 let cp = s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32;
176 if cp != 0x05f3 && cp != 0x05f4 {
177 return Err(ContextRuleError::NotApplicable);
178 }
179 let prev = before(s, offset).ok_or(ContextRuleError::Undefined)?;
180 Ok(common::is_hebrew(prev as u32))
181}
182
183pub fn rule_katakana_middle_dot(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
194 if 0x30fb != s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32 {
195 return Err(ContextRuleError::NotApplicable);
196 }
197 for c in s.chars() {
198 let cp = c as u32;
199 if common::is_hiragana(cp) || common::is_katakana(cp) || common::is_han(cp) {
200 return Ok(true);
201 }
202 }
203
204 Ok(false)
205}
206
207pub fn rule_arabic_indic_digits(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
215 let cp = s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32;
216 if !(0x0660..=0x0669).contains(&cp) {
217 return Err(ContextRuleError::NotApplicable);
218 }
219 let range = 0x06f0..=0x06f9;
220 for c in s.chars() {
221 if range.contains(&(c as u32)) {
222 return Ok(false);
223 }
224 }
225
226 Ok(true)
227}
228
229pub fn rule_extended_arabic_indic_digits(s: &str, offset: usize) -> Result<bool, ContextRuleError> {
237 let cp = s.chars().nth(offset).ok_or(ContextRuleError::Undefined)? as u32;
238 if !(0x06f0..=0x06f9).contains(&cp) {
239 return Err(ContextRuleError::NotApplicable);
240 }
241 let range = 0x0660..=0x0669;
242 for c in s.chars() {
243 if range.contains(&(c as u32)) {
244 return Ok(false);
245 }
246 }
247
248 Ok(true)
249}
250
251pub type ContextRule = fn(s: &str, offset: usize) -> Result<bool, ContextRuleError>;
253
254pub fn get_context_rule(cp: u32) -> Option<ContextRule> {
261 match cp {
262 0x00b7 => Some(rule_middle_dot),
263 0x200c => Some(rule_zero_width_nonjoiner),
264 0x200d => Some(rule_zero_width_joiner),
265 0x0375 => Some(rule_greek_lower_numeral_sign_keraia),
266 0x05f3 | 0x5f4 => Some(rule_hebrew_punctuation),
267 0x30fb => Some(rule_katakana_middle_dot),
268 0x0660..=0x0669 => Some(rule_arabic_indic_digits),
269 0x06f0..=0x06f9 => Some(rule_extended_arabic_indic_digits),
270 _ => None,
271 }
272}
273
274#[cfg(test)]
275mod tests {
276 use crate::context::*;
277
278 #[test]
279 fn check_after() {
280 assert_eq!(after("", 0), None);
281 assert_eq!(after("", 5), None);
282 assert_eq!(after("a", 0), None);
283 assert_eq!(after("a", 5), None);
284 assert_eq!(after("ab", 0), Some('b'));
285 assert_eq!(after("ab", 1), None);
286 assert_eq!(after("abc", 1), Some('c'));
287 }
288
289 #[test]
290 fn check_before() {
291 assert_eq!(before("", 0), None);
292 assert_eq!(before("", 5), None);
293 assert_eq!(before("a", 0), None);
294 assert_eq!(before("a", 5), None);
295 assert_eq!(before("ab", 1), Some('a'));
296 assert_eq!(before("ab", 0), None);
297 assert_eq!(before("abc", 2), Some('b'));
298 }
299
300 #[test]
301 fn check_rule_zero_width_nonjoiner() {
302 let label = "A";
304 let res = rule_zero_width_nonjoiner(label, 0);
305 assert!(res.is_err());
306 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
307
308 let label = "";
309 let res = rule_zero_width_nonjoiner(label, 2);
310 assert!(res.is_err());
311 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
312
313 let label = "\u{200c}";
315 let res = rule_zero_width_nonjoiner(label, 0);
316 assert!(res.is_err());
317 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
318
319 let label = "\u{94d}\u{200c}";
321 let res = rule_zero_width_nonjoiner(label, 1);
322 assert!(res.is_ok());
323 assert!(res.unwrap());
324
325 let label = "A\u{94d}\u{200c}B";
327 let res = rule_zero_width_nonjoiner(label, 2);
328 assert!(res.is_ok());
329 assert!(res.unwrap());
330
331 let label = "A\u{200c}";
333 let res = rule_zero_width_nonjoiner(label, 1);
334 assert!(res.is_ok());
335 assert!(!res.unwrap());
336
337 let label = "\u{5bf}\u{200c}";
340 let res = rule_zero_width_nonjoiner(label, 1);
341 assert!(res.is_err());
342 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
343
344 let label = "A\u{5bf}\u{200c}";
347 let res = rule_zero_width_nonjoiner(label, 2);
348 assert!(res.is_ok());
349 assert!(!res.unwrap());
350
351 let label = "\u{a872}\u{5bf}\u{200c}";
354 let res = rule_zero_width_nonjoiner(label, 2);
355 assert!(res.is_err());
356 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
357
358 let label = "\u{a872}\u{5bf}\u{200c}\u{5bf}";
361 let res = rule_zero_width_nonjoiner(label, 2);
362 assert!(res.is_err());
363 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
364
365 let label = "\u{a872}\u{5bf}\u{200c}\u{5bf}\u{629}";
368 let res = rule_zero_width_nonjoiner(label, 2);
369 assert!(res.is_ok());
370 assert!(res.unwrap());
371
372 let label = "\u{a872}\u{5bf}\u{200c}\u{5bf}A";
375 let res = rule_zero_width_nonjoiner(label, 2);
376 assert!(res.is_ok());
377 assert!(!res.unwrap());
378
379 let label = "\u{a872}\u{5bf}\u{200c}A";
382 let res = rule_zero_width_nonjoiner(label, 2);
383 assert!(res.is_ok());
384 assert!(!res.unwrap());
385
386 let label = "A\u{5bf}\u{5bf}\u{200c}\u{5bf}\u{5bf}\u{5bf}\u{5bf}\u{626}";
388 let res = rule_zero_width_nonjoiner(label, 3);
389 assert!(res.is_ok());
390 assert!(!res.unwrap());
391
392 let label = "\u{626}\u{200c}\u{5bf}\u{626}";
396 let res = rule_zero_width_nonjoiner(label, 1);
397 assert!(res.is_ok());
398 assert!(res.unwrap());
399
400 let label = "\u{626}\u{200c}\u{626}";
402 let res = rule_zero_width_nonjoiner(label, 1);
403 assert!(res.is_ok());
404 assert!(res.unwrap());
405
406 let label = "\u{626}\u{5bf}\u{5bf}\u{200c}\u{5bf}\u{5bf}\u{5bf}\u{5bf}\u{626}";
408 let res = rule_zero_width_nonjoiner(label, 3);
409 assert!(res.is_ok());
410 assert!(res.unwrap());
411 }
412
413 #[test]
414 fn check_rule_zero_width_joiner() {
415 let label = "";
416 let res = rule_zero_width_joiner(label, 3);
417 assert!(res.is_err());
418 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
419
420 let label = "A";
421 let res = rule_zero_width_joiner(label, 0);
422 assert!(res.is_err());
423 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
424
425 let label = "\u{200d}";
426 let res = rule_zero_width_joiner(label, 0);
427 assert!(res.is_err());
428 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
429
430 let label = "\u{200d}A";
431 let res = rule_zero_width_joiner(label, 0);
432 assert!(res.is_err());
433 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
434
435 let label = "\u{94d}\u{200d}";
437 let res = rule_zero_width_joiner(label, 1);
438 assert!(res.is_ok());
439 assert!(res.unwrap());
440
441 let label = "A\u{200d}";
443 let res = rule_zero_width_joiner(label, 1);
444 assert!(res.is_ok());
445 assert!(!res.unwrap());
446
447 let label = "A\u{94d}\u{200d}B";
449 let res = rule_zero_width_joiner(label, 2);
450 assert!(res.is_ok());
451 assert!(res.unwrap());
452 }
453
454 #[test]
455 fn check_rule_middle_dot() {
456 let label = "";
457 let res = rule_middle_dot(label, 3);
458 assert!(res.is_err());
459 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
460
461 let label = "A";
462 let res = rule_middle_dot(label, 0);
463 assert!(res.is_err());
464 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
465
466 let label = "\u{00b7}";
467 let res = rule_middle_dot(label, 0);
468 assert!(res.is_err());
469 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
470
471 let label = "\u{006c}\u{00b7}";
472 let res = rule_middle_dot(label, 1);
473 assert!(res.is_err());
474 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
475
476 let label = "\u{006c}\u{00b7}\u{006c}";
477 let res = rule_middle_dot(label, 1);
478 assert!(res.is_ok());
479 assert!(res.unwrap());
480
481 let label = "\u{006c}\u{00b7}A";
482 let res = rule_middle_dot(label, 1);
483 assert!(res.is_ok());
484 assert!(!res.unwrap());
485
486 let label = "A\u{00b7}A";
487 let res = rule_middle_dot(label, 1);
488 assert!(res.is_ok());
489 assert!(!res.unwrap());
490 }
491
492 #[test]
493 fn check_rule_greek_lower_numeral_sign_keraia() {
494 let label = "";
495 let res = rule_greek_lower_numeral_sign_keraia(label, 3);
496 assert!(res.is_err());
497 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
498
499 let label = "A";
500 let res = rule_greek_lower_numeral_sign_keraia(label, 0);
501 assert!(res.is_err());
502 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
503
504 let label = "\u{0375}";
505 let res = rule_greek_lower_numeral_sign_keraia(label, 0);
506 assert!(res.is_err());
507 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
508
509 let label = "\u{0375}\u{0384}";
511 let res = rule_greek_lower_numeral_sign_keraia(label, 0);
512 assert!(res.is_ok());
513 assert!(res.unwrap());
514
515 let label = "A\u{0375}\u{0384}";
516 let res = rule_greek_lower_numeral_sign_keraia(label, 1);
517 assert!(res.is_ok());
518 assert!(res.unwrap());
519
520 let label = "\u{0375}A";
522 let res = rule_greek_lower_numeral_sign_keraia(label, 0);
523 assert!(res.is_ok());
524 assert!(!res.unwrap());
525 }
526
527 #[test]
528 fn check_rule_hebrew_punctuation() {
529 let label = "";
530 let res = rule_hebrew_punctuation(label, 3);
531 assert!(res.is_err());
532 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
533
534 let label = "A";
535 let res = rule_hebrew_punctuation(label, 0);
536 assert!(res.is_err());
537 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
538
539 let label = "\u{05F3}";
540 let res = rule_hebrew_punctuation(label, 0);
541 assert!(res.is_err());
542 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
543
544 let label = "\u{5f0}\u{05F3}";
546 let res = rule_hebrew_punctuation(label, 1);
547 assert!(res.is_ok());
548 assert!(res.unwrap());
549
550 let label = "\u{5f0}\u{05F4}";
552 let res = rule_hebrew_punctuation(label, 1);
553 assert!(res.is_ok());
554 assert!(res.unwrap());
555
556 let label = "A\u{05F4}";
558 let res = rule_hebrew_punctuation(label, 1);
559 assert!(res.is_ok());
560 assert!(!res.unwrap());
561
562 let label = "YYY\u{5f0}\u{05F4}XXX";
564 let res = rule_hebrew_punctuation(label, 4);
565 assert!(res.is_ok());
566 assert!(res.unwrap());
567 }
568
569 #[test]
570 fn check_rule_katakana_middle_dot() {
571 let label = "";
572 let res = rule_katakana_middle_dot(label, 3);
573 assert!(res.is_err());
574 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
575
576 let label = "A";
577 let res = rule_katakana_middle_dot(label, 0);
578 assert!(res.is_err());
579 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
580
581 let label = "\u{30fb}";
582 let res = rule_katakana_middle_dot(label, 0);
583 assert!(res.is_ok());
584 assert!(!res.unwrap());
585
586 let label = "a\u{30fb}b";
587 let res = rule_katakana_middle_dot(label, 1);
588 assert!(res.is_ok());
589 assert!(!res.unwrap());
590
591 let label = "a\u{30fb}b\u{1b001}c";
593 let res = rule_katakana_middle_dot(label, 1);
594 assert!(res.is_ok());
595 assert!(res.unwrap());
596
597 let label = "a\u{30fb}bc\u{3357}";
599 let res = rule_katakana_middle_dot(label, 1);
600 assert!(res.is_ok());
601 assert!(res.unwrap());
602
603 let label = "\u{3007}\u{30fb}bc";
605 let res = rule_katakana_middle_dot(label, 1);
606 assert!(res.is_ok());
607 assert!(res.unwrap());
608 }
609
610 #[test]
611 fn check_rule_arabic_indic_digits() {
612 let label = "";
613 let res = rule_arabic_indic_digits(label, 3);
614 assert!(res.is_err());
615 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
616
617 let label = "\u{065f}";
618 let res = rule_arabic_indic_digits(label, 0);
619 assert!(res.is_err());
620 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
621
622 let label = "\u{066a}";
623 let res = rule_arabic_indic_digits(label, 0);
624 assert!(res.is_err());
625 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
626
627 let label = "\u{0660}";
629 let res = rule_arabic_indic_digits(label, 0);
630 assert!(res.is_ok());
631 assert!(res.unwrap());
632
633 let label = "\u{0665}";
634 let res = rule_arabic_indic_digits(label, 0);
635 assert!(res.is_ok());
636 assert!(res.unwrap());
637
638 let label = "\u{0669}";
639 let res = rule_arabic_indic_digits(label, 0);
640 assert!(res.is_ok());
641 assert!(res.unwrap());
642
643 let label = "ab\u{0669}cd";
645 let res = rule_arabic_indic_digits(label, 2);
646 assert!(res.is_ok());
647 assert!(res.unwrap());
648
649 let label = "ab\u{0669}c\u{06ef}";
650 let res = rule_arabic_indic_digits(label, 2);
651 assert!(res.is_ok());
652 assert!(res.unwrap());
653
654 let label = "ab\u{0669}c\u{06fa}";
655 let res = rule_arabic_indic_digits(label, 2);
656 assert!(res.is_ok());
657 assert!(res.unwrap());
658
659 let label = "ab\u{0669}c\u{06f0}";
661 let res = rule_arabic_indic_digits(label, 2);
662 assert!(res.is_ok());
663 assert!(!res.unwrap());
664
665 let label = "ab\u{0669}c\u{06f9}";
666 let res = rule_arabic_indic_digits(label, 2);
667 assert!(res.is_ok());
668 assert!(!res.unwrap());
669 }
670
671 #[test]
672 fn check_rule_extended_arabic_indic_digits() {
673 let label = "";
674 let res = rule_extended_arabic_indic_digits(label, 3);
675 assert!(res.is_err());
676 assert_eq!(res.unwrap_err(), ContextRuleError::Undefined);
677
678 let label = "\u{06ef}";
679 let res = rule_extended_arabic_indic_digits(label, 0);
680 assert!(res.is_err());
681 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
682
683 let label = "\u{06fa}";
684 let res = rule_extended_arabic_indic_digits(label, 0);
685 assert!(res.is_err());
686 assert_eq!(res.unwrap_err(), ContextRuleError::NotApplicable);
687
688 let label = "\u{06f0}";
690 let res = rule_extended_arabic_indic_digits(label, 0);
691 assert!(res.is_ok());
692 assert!(res.unwrap());
693
694 let label = "\u{06f5}";
695 let res = rule_extended_arabic_indic_digits(label, 0);
696 assert!(res.is_ok());
697 assert!(res.unwrap());
698
699 let label = "\u{06f9}";
700 let res = rule_extended_arabic_indic_digits(label, 0);
701 assert!(res.is_ok());
702 assert!(res.unwrap());
703
704 let label = "ab\u{06f0}cd";
706 let res = rule_extended_arabic_indic_digits(label, 2);
707 assert!(res.is_ok());
708 assert!(res.unwrap());
709
710 let label = "ab\u{06f0}c\u{065f}";
711 let res = rule_extended_arabic_indic_digits(label, 2);
712 assert!(res.is_ok());
713 assert!(res.unwrap());
714
715 let label = "ab\u{06f0}c\u{066a}";
716 let res = rule_extended_arabic_indic_digits(label, 2);
717 assert!(res.is_ok());
718 assert!(res.unwrap());
719
720 let label = "ab\u{06f0}c\u{0660}";
722 let res = rule_extended_arabic_indic_digits(label, 2);
723 assert!(res.is_ok());
724 assert!(!res.unwrap());
725
726 let label = "ab\u{06f0}c\u{0669}";
727 let res = rule_extended_arabic_indic_digits(label, 2);
728 assert!(res.is_ok());
729 assert!(!res.unwrap());
730 }
731
732 #[test]
733 fn check_get_context_rule() {
734 let val = get_context_rule(0x013);
735 assert!(val.is_none());
736
737 let val = get_context_rule(0x00b7);
738 assert!(val.is_some());
739 assert_eq!(val.unwrap() as usize, rule_middle_dot as usize);
740
741 let val = get_context_rule(0x200c);
742 assert!(val.is_some());
743 assert_eq!(val.unwrap() as usize, rule_zero_width_nonjoiner as usize);
744
745 let val = get_context_rule(0x0375);
746 assert!(val.is_some());
747 assert_eq!(
748 val.unwrap() as usize,
749 rule_greek_lower_numeral_sign_keraia as usize
750 );
751
752 let val = get_context_rule(0x05f3);
753 assert!(val.is_some());
754 assert_eq!(val.unwrap() as usize, rule_hebrew_punctuation as usize);
755
756 let val = get_context_rule(0x05f4);
757 assert!(val.is_some());
758 assert_eq!(val.unwrap() as usize, rule_hebrew_punctuation as usize);
759
760 let val = get_context_rule(0x30fb);
761 assert!(val.is_some());
762 assert_eq!(val.unwrap() as usize, rule_katakana_middle_dot as usize);
763
764 let val = get_context_rule(0x0660);
765 assert!(val.is_some());
766 assert_eq!(val.unwrap() as usize, rule_arabic_indic_digits as usize);
767
768 let val = get_context_rule(0x0669);
769 assert!(val.is_some());
770 assert_eq!(val.unwrap() as usize, rule_arabic_indic_digits as usize);
771
772 let val = get_context_rule(0x065f);
773 assert!(val.is_none());
774
775 let val = get_context_rule(0x066a);
776 assert!(val.is_none());
777
778 let val = get_context_rule(0x06f0);
779 assert!(val.is_some());
780 assert_eq!(
781 val.unwrap() as usize,
782 rule_extended_arabic_indic_digits as usize
783 );
784
785 let val = get_context_rule(0x06f9);
786 assert!(val.is_some());
787 assert_eq!(
788 val.unwrap() as usize,
789 rule_extended_arabic_indic_digits as usize
790 );
791
792 let val = get_context_rule(0x06ef);
793 assert!(val.is_none());
794
795 let val = get_context_rule(0x06fa);
796 assert!(val.is_none());
797 }
798}