use crate::data::casemap;
use crate::data::normalization::ccc;
fn is_cased(ch: char) -> bool {
let cp = ch as u32;
if casemap::simple_uppercase(cp).is_some() || casemap::simple_lowercase(cp).is_some() {
return true;
}
matches!(
cp,
0x01C5 | 0x01C8 | 0x01CB | 0x01F2 | 0x1F88..=0x1F8F
| 0x1F98..=0x1F9F | 0x1FA8..=0x1FAF | 0x1FBC | 0x1FCC | 0x1FFC
)
}
fn is_case_ignorable(ch: char) -> bool {
let cp = ch as u32;
if ccc(cp) != 0 {
return true;
}
matches!(
cp,
0x00AD | 0x0027 | 0x002E | 0x003A | 0x00B7 | 0x0387 | 0x05F4 | 0x2019 | 0x2027 | 0x200C..=0x200D | 0xFE00..=0xFE0F )
}
#[must_use]
pub fn to_lowercase(s: &str) -> String {
lowercase_impl(s, None)
}
#[must_use]
pub fn to_lowercase_locale(s: &str, locale: &str) -> String {
lowercase_impl(s, Some(locale))
}
fn lowercase_impl(s: &str, locale: Option<&str>) -> String {
let is_turkic = matches!(locale, Some("tr") | Some("az"));
let chars: Vec<char> = s.chars().collect();
let mut result = String::with_capacity(s.len());
for (i, &ch) in chars.iter().enumerate() {
let cp = ch as u32;
if is_turkic {
if cp == 0x0049 {
if i + 1 < chars.len() && chars[i + 1] as u32 == 0x0307 {
result.push('i');
continue;
}
result.push('\u{0131}'); continue;
}
if cp == 0x0307 && i > 0 && chars[i - 1] as u32 == 0x0049 {
continue;
}
if cp == 0x0130 {
result.push('i');
continue;
}
}
if cp == 0x03A3 {
let left_cased = has_cased_before(&chars, i);
let right_cased = has_cased_after(&chars, i);
if left_cased && !right_cased {
result.push('\u{03C2}'); } else {
result.push('\u{03C3}'); }
continue;
}
if let Some(cps) = casemap::full_lowercase(cp) {
for &c in cps {
if let Some(ch2) = char::from_u32(c) {
result.push(ch2);
}
}
continue;
}
if let Some(lower) = casemap::simple_lowercase(cp) {
if let Some(ch2) = char::from_u32(lower) {
result.push(ch2);
continue;
}
}
result.push(ch);
}
result
}
fn has_cased_before(chars: &[char], i: usize) -> bool {
let mut j = i;
while j > 0 {
j -= 1;
if is_cased(chars[j]) {
return true;
}
if !is_case_ignorable(chars[j]) {
return false;
}
}
false
}
fn has_cased_after(chars: &[char], i: usize) -> bool {
let mut j = i + 1;
while j < chars.len() {
if is_cased(chars[j]) {
return true;
}
if !is_case_ignorable(chars[j]) {
return false;
}
j += 1;
}
false
}
#[must_use]
pub fn to_uppercase(s: &str) -> String {
uppercase_impl(s, None)
}
#[must_use]
pub fn to_uppercase_locale(s: &str, locale: &str) -> String {
uppercase_impl(s, Some(locale))
}
fn uppercase_impl(s: &str, locale: Option<&str>) -> String {
let is_turkic = matches!(locale, Some("tr") | Some("az"));
let mut result = String::with_capacity(s.len());
for ch in s.chars() {
let cp = ch as u32;
if is_turkic {
if cp == 0x0069 {
result.push('\u{0130}');
continue;
}
}
if let Some(cps) = casemap::full_uppercase(cp) {
for &c in cps {
if let Some(ch2) = char::from_u32(c) {
result.push(ch2);
}
}
continue;
}
if let Some(upper) = casemap::simple_uppercase(cp) {
if let Some(ch2) = char::from_u32(upper) {
result.push(ch2);
continue;
}
}
result.push(ch);
}
result
}
#[must_use]
pub fn to_titlecase(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut need_title = true;
for ch in s.chars() {
let cp = ch as u32;
if ch.is_whitespace() {
result.push(ch);
need_title = true;
continue;
}
if need_title && is_cased(ch) {
if let Some(cps) = casemap::full_titlecase(cp) {
for &c in cps {
if let Some(ch2) = char::from_u32(c) {
result.push(ch2);
}
}
} else if let Some(tc) = casemap::simple_titlecase(cp) {
if let Some(ch2) = char::from_u32(tc) {
result.push(ch2);
}
} else {
result.push(ch);
}
need_title = false;
continue;
}
if !need_title {
if let Some(lower) = casemap::simple_lowercase(cp) {
if let Some(ch2) = char::from_u32(lower) {
result.push(ch2);
continue;
}
}
}
result.push(ch);
}
result
}
#[must_use]
pub fn case_fold(s: &str) -> String {
case_fold_impl(s, false)
}
#[must_use]
pub fn case_fold_simple(s: &str) -> String {
let mut result = String::with_capacity(s.len());
for ch in s.chars() {
let cp = ch as u32;
if let Some(folded) = casemap::simple_case_fold(cp) {
if let Some(ch2) = char::from_u32(folded) {
result.push(ch2);
continue;
}
}
result.push(ch);
}
result
}
#[must_use]
pub fn case_fold_locale(s: &str, locale: &str) -> String {
let is_turkic = matches!(locale, "tr" | "az");
if is_turkic {
case_fold_impl(s, true)
} else {
case_fold_impl(s, false)
}
}
fn case_fold_impl(s: &str, turkic: bool) -> String {
let mut result = String::with_capacity(s.len());
for ch in s.chars() {
let cp = ch as u32;
if turkic {
if let Some(folded) = casemap::turkic_case_fold(cp) {
if let Some(ch2) = char::from_u32(folded) {
result.push(ch2);
continue;
}
}
}
if let Some(cps) = casemap::full_case_fold(cp) {
for &c in cps {
if let Some(ch2) = char::from_u32(c) {
result.push(ch2);
}
}
continue;
}
if let Some(folded) = casemap::simple_case_fold(cp) {
if let Some(ch2) = char::from_u32(folded) {
result.push(ch2);
continue;
}
}
result.push(ch);
}
result
}
#[must_use]
pub fn is_lowercase(ch: char) -> bool {
casemap::simple_uppercase(ch as u32).is_some()
}
#[must_use]
pub fn is_uppercase(ch: char) -> bool {
casemap::simple_lowercase(ch as u32).is_some()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lowercase_ascii() {
assert_eq!(to_lowercase("HELLO WORLD"), "hello world");
assert_eq!(to_lowercase("Hello World"), "hello world");
assert_eq!(to_lowercase("hello"), "hello");
}
#[test]
fn lowercase_german_sharp_s() {
assert_eq!(to_lowercase("\u{00DF}"), "\u{00DF}");
}
#[test]
fn lowercase_greek_final_sigma() {
let upper = "\u{039F}\u{0394}\u{03A5}\u{03A3}\u{03A3}\u{0395}\u{03A5}\u{03A3}";
let lower = to_lowercase(upper);
assert!(lower.ends_with('\u{03C2}'), "final sigma at end");
assert!(lower.contains('\u{03C3}'), "regular sigma in middle");
}
#[test]
fn lowercase_single_sigma() {
assert_eq!(to_lowercase("\u{03A3}"), "\u{03C3}");
}
#[test]
fn uppercase_ascii() {
assert_eq!(to_uppercase("hello world"), "HELLO WORLD");
assert_eq!(to_uppercase("HELLO"), "HELLO");
}
#[test]
fn uppercase_german_sharp_s() {
assert_eq!(to_uppercase("stra\u{00DF}e"), "STRASSE");
}
#[test]
fn uppercase_ligatures() {
assert_eq!(to_uppercase("\u{FB00}"), "FF");
assert_eq!(to_uppercase("\u{FB01}"), "FI");
}
#[test]
fn titlecase_basic() {
assert_eq!(to_titlecase("hello world"), "Hello World");
assert_eq!(to_titlecase("HELLO WORLD"), "Hello World");
}
#[test]
fn titlecase_mixed() {
assert_eq!(to_titlecase("hELLO wORLD"), "Hello World");
}
#[test]
fn case_fold_basic() {
assert_eq!(case_fold("Hello World"), "hello world");
assert_eq!(case_fold("HELLO"), case_fold("hello"));
}
#[test]
fn case_fold_sharp_s() {
assert_eq!(case_fold("\u{00DF}"), "ss");
assert_eq!(case_fold("STRASSE"), case_fold("stra\u{00DF}e"));
}
#[test]
fn case_fold_simple_preserves_length() {
let s = "Stra\u{00DF}e";
let folded = case_fold_simple(s);
assert_eq!(folded.chars().count(), s.chars().count());
}
#[test]
fn turkish_lowercase_i() {
assert_eq!(to_lowercase_locale("I", "tr"), "\u{0131}");
}
#[test]
fn turkish_uppercase_i() {
assert_eq!(to_uppercase_locale("i", "tr"), "\u{0130}");
}
#[test]
fn turkish_i_roundtrip() {
let lower = to_lowercase_locale("I", "tr");
assert_eq!(lower, "\u{0131}");
let upper = to_uppercase(&lower);
assert_eq!(upper, "I");
}
#[test]
fn classification_basic() {
assert!(is_uppercase('A'));
assert!(is_lowercase('a'));
assert!(!is_uppercase('a'));
assert!(!is_lowercase('A'));
assert!(!is_uppercase('1'));
}
#[test]
fn empty_string() {
assert_eq!(to_lowercase(""), "");
assert_eq!(to_uppercase(""), "");
assert_eq!(to_titlecase(""), "");
assert_eq!(case_fold(""), "");
}
#[test]
fn no_case_characters() {
assert_eq!(to_lowercase("12345"), "12345");
assert_eq!(to_uppercase("12345"), "12345");
}
}