1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Accessor for `Bidi_Class` property from Unicode Character Database (UCD)

// TODO: Make private after dropping deprecated call
pub mod tables;

pub use self::tables::{BidiClass, UNICODE_VERSION};

use std::cmp::Ordering::{Equal, Less, Greater};
use std::char;

use self::tables::bidi_class_table;
use BidiClass::*;

/// Find the BidiClass of a single char.
pub fn bidi_class(c: char) -> BidiClass {
    bsearch_range_value_table(c, bidi_class_table)
}

fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
    match r.binary_search_by(
        |&(lo, hi, _)| if lo <= c && c <= hi {
            Equal
        } else if hi < c {
            Less
        } else {
            Greater
        },
    ) {
        Ok(idx) => {
            let (_, _, cat) = r[idx];
            cat
        }
        // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
        // for Bidi_Class have the value Left_To_Right (L)."
        Err(_) => L,
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_bidi_class() {
        for (input, expected) in
            vec![
                (0x0000, BN),
                (0x0040, ON),
                (0x0041, L),
                (0x0062, L),
                (0x007F, BN),

                // Hebrew
                (0x0590, R),
                (0x05D0, R),
                (0x05D1, R),
                (0x05FF, R),

                // Arabic
                (0x0600, AN),
                (0x0627, AL),
                (0x07BF, AL),

                // Default R + Arabic Extras
                (0x07C0, R),
                (0x085F, R),
                (0x0860, R),
                (0x089F, R),
                (0x08A0, AL),
                (0x089F, R),
                (0x08FF, NSM),

                // Default ET
                (0x20A0, ET),
                (0x20CF, ET),

                // Arabic Presentation Forms
                (0xFB1D, R),
                (0xFB4F, R),
                (0xFB50, AL),
                (0xFDCF, AL),
                (0xFDF0, AL),
                (0xFDFF, AL),
                (0xFE70, AL),
                (0xFEFE, AL),
                (0xFEFF, BN),

                // Default AL + R
                (0x10800, R),
                (0x10FFF, R),
                (0x1E800, R),
                (0x1EDFF, R),
                (0x1EE00, AL),
                (0x1EEFF, AL),
                (0x1EF00, R),
                (0x1EFFF, R),
            ] {
            assert_eq!(bidi_class(char::from_u32(input).unwrap()), expected);
        }
    }
}