#![no_std]
#![forbid(unsafe_code)]
const MAX_CASE_EXPANSION: usize = 3;
const ASCII_ZERO: u32 = '0' as u32;
#[derive(Clone, Copy)]
pub struct CaseMapResult {
chars: [char; MAX_CASE_EXPANSION],
len: usize,
}
impl Default for CaseMapResult {
#[inline]
fn default() -> Self {
CaseMapResult {
chars: ['\0'; MAX_CASE_EXPANSION],
len: 0,
}
}
}
impl CaseMapResult {
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn get(&self, index: usize) -> Option<char> {
if index < self.len {
Some(self.chars[index])
} else {
None
}
}
#[inline]
pub fn first(&self) -> char {
self.chars[0]
}
}
#[inline]
pub fn char_upcase(c: char) -> char {
let mut iter = c.to_uppercase();
let first = iter.next().unwrap_or(c);
if iter.next().is_some() {
c
} else {
first
}
}
#[inline]
pub fn char_downcase(c: char) -> char {
let mut iter = c.to_lowercase();
let first = iter.next().unwrap_or(c);
if iter.next().is_some() {
c
} else {
first
}
}
#[inline]
pub fn char_foldcase(c: char) -> char {
char_downcase(c)
}
#[inline]
pub fn char_is_alphabetic(c: char) -> bool {
c.is_alphabetic()
}
#[inline]
pub fn char_is_whitespace(c: char) -> bool {
c.is_whitespace()
}
#[inline]
pub fn char_is_uppercase(c: char) -> bool {
c.is_uppercase()
}
#[inline]
pub fn char_is_lowercase(c: char) -> bool {
c.is_lowercase()
}
#[inline]
pub fn char_is_numeric(c: char) -> bool {
digit_value(c).is_some()
}
#[inline]
pub fn digit_value(c: char) -> Option<u32> {
if c.is_ascii_digit() {
return Some(c as u32 - ASCII_ZERO);
}
digit_value_inner(c)
}
fn digit_value_inner(c: char) -> Option<u32> {
if !c.is_numeric() {
return None;
}
let cp = c as u32;
let mut zero = cp;
let mut count = 0u32;
while count < 9 && zero > 0 {
let prev = zero - 1;
if let Some(prev_char) = char::from_u32(prev) {
if prev_char.is_numeric() {
zero = prev;
count += 1;
} else {
break;
}
} else {
break;
}
}
let val = cp - zero;
if val > 9 {
return None;
}
if zero > 0 {
if let Some(before_zero) = char::from_u32(zero - 1) {
if before_zero.is_numeric() {
return None;
}
}
}
Some(val)
}
pub fn full_upcase(c: char) -> CaseMapResult {
from_char_iter(c.to_uppercase(), c)
}
pub fn full_downcase(c: char) -> CaseMapResult {
from_char_iter(c.to_lowercase(), c)
}
pub fn full_foldcase(c: char) -> CaseMapResult {
match lookup_full_casefold(c) {
Some(result) => result,
None => {
let folded = char_foldcase(c);
CaseMapResult {
chars: [folded, '\0', '\0'],
len: 1,
}
}
}
}
fn from_char_iter<I: Iterator<Item = char>>(iter: I, original: char) -> CaseMapResult {
let mut result = CaseMapResult {
chars: ['\0'; MAX_CASE_EXPANSION],
len: 0,
};
for c in iter {
if result.len < MAX_CASE_EXPANSION {
result.chars[result.len] = c;
result.len += 1;
}
}
if result.len == 0 {
result.chars[0] = original;
result.len = 1;
}
result
}
include!(concat!(env!("OUT_DIR"), "/casefold_generated.rs"));
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_char_upcase_ascii() {
assert_eq!(char_upcase('a'), 'A');
assert_eq!(char_upcase('z'), 'Z');
assert_eq!(char_upcase('A'), 'A');
assert_eq!(char_upcase('0'), '0');
}
#[test]
fn test_char_upcase_unicode() {
assert_eq!(char_upcase('é'), 'É');
assert_eq!(char_upcase('ω'), 'Ω');
assert_eq!(char_upcase('ß'), 'ß'); }
#[test]
fn test_char_downcase_ascii() {
assert_eq!(char_downcase('A'), 'a');
assert_eq!(char_downcase('Z'), 'z');
assert_eq!(char_downcase('a'), 'a');
}
#[test]
fn test_char_downcase_unicode() {
assert_eq!(char_downcase('É'), 'é');
assert_eq!(char_downcase('Ω'), 'ω');
}
#[test]
fn test_char_foldcase() {
assert_eq!(char_foldcase('A'), 'a');
assert_eq!(char_foldcase('a'), 'a');
assert_eq!(char_foldcase('É'), 'é');
assert_eq!(char_foldcase('ß'), 'ß'); }
#[test]
fn test_char_is_alphabetic() {
assert!(char_is_alphabetic('a'));
assert!(char_is_alphabetic('é'));
assert!(char_is_alphabetic('中'));
assert!(!char_is_alphabetic('0'));
assert!(!char_is_alphabetic(' '));
}
#[test]
fn test_char_is_whitespace() {
assert!(char_is_whitespace(' '));
assert!(char_is_whitespace('\t'));
assert!(char_is_whitespace('\n'));
assert!(!char_is_whitespace('a'));
}
#[test]
fn test_char_is_uppercase() {
assert!(char_is_uppercase('A'));
assert!(char_is_uppercase('É'));
assert!(!char_is_uppercase('a'));
assert!(!char_is_uppercase('0'));
}
#[test]
fn test_char_is_lowercase() {
assert!(char_is_lowercase('a'));
assert!(char_is_lowercase('é'));
assert!(!char_is_lowercase('A'));
assert!(!char_is_lowercase('0'));
}
#[test]
fn test_char_is_numeric() {
assert!(char_is_numeric('0'));
assert!(char_is_numeric('5'));
assert!(char_is_numeric('9'));
assert!(!char_is_numeric('a'));
assert!(!char_is_numeric(' '));
}
#[test]
fn test_char_is_numeric_unicode() {
assert!(char_is_numeric('\u{0660}')); assert!(char_is_numeric('\u{0663}')); assert!(char_is_numeric('\u{0966}')); }
#[test]
fn test_digit_value() {
assert_eq!(digit_value('0'), Some(0));
assert_eq!(digit_value('5'), Some(5));
assert_eq!(digit_value('9'), Some(9));
assert_eq!(digit_value('a'), None);
}
#[test]
fn test_digit_value_unicode() {
assert_eq!(digit_value('\u{0660}'), Some(0));
assert_eq!(digit_value('\u{0663}'), Some(3));
assert_eq!(digit_value('\u{0669}'), Some(9));
}
#[test]
fn test_full_upcase() {
let result = full_upcase('a');
assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'A');
let result = full_upcase('ß');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('S'));
assert_eq!(result.get(1), Some('S'));
}
#[test]
fn test_full_downcase() {
let result = full_downcase('A');
assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'a');
let result = full_downcase('a');
assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'a');
}
#[test]
fn test_full_foldcase() {
let result = full_foldcase('A');
assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'a');
let result = full_foldcase('ß');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('s'));
assert_eq!(result.get(1), Some('s'));
let result = full_foldcase('\u{FB01}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('f'));
assert_eq!(result.get(1), Some('i'));
}
#[test]
fn test_full_foldcase_capital_sharp_s() {
let result = full_foldcase('\u{1E9E}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('s'));
assert_eq!(result.get(1), Some('s'));
}
#[test]
fn test_full_foldcase_armenian_ligatures() {
let result = full_foldcase('\u{FB13}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('\u{0574}'));
assert_eq!(result.get(1), Some('\u{0576}'));
let result = full_foldcase('\u{FB14}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('\u{0574}'));
assert_eq!(result.get(1), Some('\u{0565}'));
let result = full_foldcase('\u{FB15}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('\u{0574}'));
assert_eq!(result.get(1), Some('\u{056B}'));
let result = full_foldcase('\u{FB16}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('\u{057E}'));
assert_eq!(result.get(1), Some('\u{0576}'));
let result = full_foldcase('\u{FB17}');
assert_eq!(result.len(), 2);
assert_eq!(result.get(0), Some('\u{0574}'));
assert_eq!(result.get(1), Some('\u{056D}'));
}
#[test]
fn test_full_foldcase_no_match() {
let result = full_foldcase('a');
assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'a');
}
#[test]
fn test_cyrillic_case_mapping() {
let result = full_upcase('а'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'А');
let result = full_upcase('я'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'Я');
let result = full_upcase('ё'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'Ё');
let result = full_downcase('Б'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'б');
let result = full_downcase('Ж'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'ж');
let result = full_foldcase('Щ'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'щ');
}
#[test]
fn test_georgian_case_mapping() {
let result = full_upcase('ა'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'Ა');
let result = full_upcase('ბ'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'Ბ');
let result = full_downcase('Გ'); assert_eq!(result.len(), 1);
assert_eq!(result.first(), 'გ');
}
#[test]
fn test_unicode_whitespace() {
assert!(char_is_whitespace(' '));
assert!(char_is_whitespace('\t'));
assert!(char_is_whitespace('\n'));
assert!(char_is_whitespace('\u{00A0}')); assert!(char_is_whitespace('\u{2003}')); assert!(char_is_whitespace('\u{3000}'));
assert!(!char_is_whitespace('a'));
assert!(!char_is_whitespace('ა')); }
}