use std::sync::LazyLock;
fn map_aliases(s: &str) -> &'static str {
#[cfg(target_os = "freebsd")]
match s {
"ARMSCII-8" => return "ARMSCII-8",
"Big5" => return "BIG5",
"C" => return "ASCII",
"CP1131" => return "CP1131",
"CP1251" => return "CP1251",
"CP866" => return "CP866",
"GB18030" => return "GB18030",
"GB2312" => return "GB2312",
"GBK" => return "GBK",
"ISCII-DEV" => return "?",
"ISO8859-1" => return "ISO-8859-1",
"ISO8859-13" => return "ISO-8859-13",
"ISO8859-15" => return "ISO-8859-15",
"ISO8859-2" => return "ISO-8859-2",
"ISO8859-5" => return "ISO-8859-5",
"ISO8859-7" => return "ISO-8859-7",
"ISO8859-9" => return "ISO-8859-9",
"KOI8-R" => return "KOI8-R",
"KOI8-U" => return "KOI8-U",
"SJIS" => return "SHIFT_JIS",
"US-ASCII" => return "ASCII",
"eucCN" => return "GB2312",
"eucJP" => return "EUC-JP",
"eucKR" => return "EUC-KR",
_ => (),
};
#[cfg(target_os = "netbsd")]
match s {
"646" => return "ASCII",
"ARMSCII-8" => return "ARMSCII-8",
"BIG5" => return "BIG5",
"Big5-HKSCS" => return "BIG5-HKSCS",
"CP1251" => return "CP1251",
"CP866" => return "CP866",
"GB18030" => return "GB18030",
"GB2312" => return "GB2312",
"ISO8859-1" => return "ISO-8859-1",
"ISO8859-13" => return "ISO-8859-13",
"ISO8859-15" => return "ISO-8859-15",
"ISO8859-2" => return "ISO-8859-2",
"ISO8859-4" => return "ISO-8859-4",
"ISO8859-5" => return "ISO-8859-5",
"ISO8859-7" => return "ISO-8859-7",
"KOI8-R" => return "KOI8-R",
"KOI8-U" => return "KOI8-U",
"PT154" => return "PT154",
"SJIS" => return "SHIFT_JIS",
"eucCN" => return "GB2312",
"eucJP" => return "EUC-JP",
"eucKR" => return "EUC-KR",
"eucTW" => return "EUC-TW",
_ => (),
};
#[cfg(target_os = "openbsd")]
match s {
"646" => return "ASCII",
"ISO8859-1" => return "ISO-8859-1",
"ISO8859-13" => return "ISO-8859-13",
"ISO8859-15" => return "ISO-8859-15",
"ISO8859-2" => return "ISO-8859-2",
"ISO8859-4" => return "ISO-8859-4",
"ISO8859-5" => return "ISO-8859-5",
"ISO8859-7" => return "ISO-8859-7",
"US-ASCII" => return "ASCII",
_ => (),
};
#[cfg(target_os = "macos")]
match s {
"ARMSCII-8" => return "ARMSCII-8",
"Big5" => return "BIG5",
"Big5HKSCS" => return "BIG5-HKSCS",
"CP1131" => return "CP1131",
"CP1251" => return "CP1251",
"CP866" => return "CP866",
"CP949" => return "CP949",
"GB18030" => return "GB18030",
"GB2312" => return "GB2312",
"GBK" => return "GBK",
"ISO8859-1" => return "ISO-8859-1",
"ISO8859-13" => return "ISO-8859-13",
"ISO8859-15" => return "ISO-8859-15",
"ISO8859-2" => return "ISO-8859-2",
"ISO8859-4" => return "ISO-8859-4",
"ISO8859-5" => return "ISO-8859-5",
"ISO8859-7" => return "ISO-8859-7",
"ISO8859-9" => return "ISO-8859-9",
"KOI8-R" => return "KOI8-R",
"KOI8-U" => return "KOI8-U",
"PT154" => return "PT154",
"SJIS" => return "SHIFT_JIS",
"eucCN" => return "GB2312",
"eucJP" => return "EUC-JP",
"eucKR" => return "EUC-KR",
_ => (),
};
#[cfg(target_os = "aix")]
match s {
"GBK" => return "GBK",
"IBM-1046" => return "CP1046",
"IBM-1124" => return "CP1124",
"IBM-1129" => return "CP1129",
"IBM-1252" => return "CP1252",
"IBM-850" => return "CP850",
"IBM-856" => return "CP856",
"IBM-921" => return "ISO-8859-13",
"IBM-922" => return "CP922",
"IBM-932" => return "CP932",
"IBM-943" => return "CP943",
"IBM-eucCN" => return "GB2312",
"IBM-eucJP" => return "EUC-JP",
"IBM-eucKR" => return "EUC-KR",
"IBM-eucTW" => return "EUC-TW",
"ISO8859-1" => return "ISO-8859-1",
"ISO8859-15" => return "ISO-8859-15",
"ISO8859-2" => return "ISO-8859-2",
"ISO8859-5" => return "ISO-8859-5",
"ISO8859-6" => return "ISO-8859-6",
"ISO8859-7" => return "ISO-8859-7",
"ISO8859-8" => return "ISO-8859-8",
"ISO8859-9" => return "ISO-8859-9",
"TIS-620" => return "TIS-620",
"UTF-8" => return "UTF-8",
"big5" => return "BIG5",
_ => (),
};
#[cfg(windows)]
match s {
"CP1361" => return "JOHAB",
"CP20127" => return "ASCII",
"CP20866" => return "KOI8-R",
"CP20936" => return "GB2312",
"CP21866" => return "KOI8-RU",
"CP28591" => return "ISO-8859-1",
"CP28592" => return "ISO-8859-2",
"CP28593" => return "ISO-8859-3",
"CP28594" => return "ISO-8859-4",
"CP28595" => return "ISO-8859-5",
"CP28596" => return "ISO-8859-6",
"CP28597" => return "ISO-8859-7",
"CP28598" => return "ISO-8859-8",
"CP28599" => return "ISO-8859-9",
"CP28605" => return "ISO-8859-15",
"CP38598" => return "ISO-8859-8",
"CP51932" => return "EUC-JP",
"CP51936" => return "GB2312",
"CP51949" => return "EUC-KR",
"CP51950" => return "EUC-TW",
"CP54936" => return "GB18030",
"CP65001" => return "UTF-8",
"CP936" => return "GBK",
_ => (),
};
String::from(s).leak()
}
#[cfg(unix)]
mod inner {
use std::{
ffi::{CStr, CString, c_int},
ptr::null,
};
use libc::{self, CODESET, LC_CTYPE, nl_langinfo, setlocale};
unsafe fn string_from_pointer(s: *const i8) -> Option<String> {
if s.is_null() {
None
} else {
Some(unsafe { CStr::from_ptr(s).to_string_lossy().into() })
}
}
fn set_locale(category: c_int, locale: Option<&str>) -> Option<String> {
unsafe {
let locale = locale.map(|s| CString::new(s).unwrap());
let locale_ptr = locale.as_ref().map_or(null(), |s| s.as_ptr());
string_from_pointer(setlocale(category, locale_ptr))
}
}
pub fn locale_charset() -> Option<String> {
unsafe {
let saved_locale = set_locale(LC_CTYPE, None);
set_locale(LC_CTYPE, Some(""));
let codeset = string_from_pointer(nl_langinfo(CODESET));
set_locale(LC_CTYPE, saved_locale.as_deref());
codeset
}
}
}
#[cfg(windows)]
mod inner {
use libc::{LC_CTYPE, setlocale};
use std::ffi::{CStr, CString};
use windows_sys::Win32::Globalization::GetACP;
fn current_locale() -> Option<String> {
unsafe {
let empty_cstr = CString::new("").unwrap();
let locale = setlocale(LC_CTYPE, empty_cstr.as_ptr());
if locale.is_null() {
None
} else {
Some(CStr::from_ptr(locale).to_string_lossy().into())
}
}
}
pub fn locale_charset() -> Option<String> {
let Some(current_locale) = current_locale() else {
return None;
};
let codepage = if let Some((_, pdot)) = current_locale.rsplit_once('.') {
format!("CP{pdot}")
} else {
format!("CP{}", unsafe { GetACP() })
};
Some(match codepage.as_str() {
"CP65001" | "CPutf8" => String::from("UTF-8"),
_ => codepage,
})
}
}
#[cfg(not(any(unix, windows)))]
mod inner {
pub fn locale_charse() -> String {
String::from("UTF-8")
}
}
pub fn locale_charset() -> &'static str {
static LOCALE_CHARSET: LazyLock<&'static str> =
LazyLock::new(|| map_aliases(&inner::locale_charset().unwrap_or(String::from("UTF-8"))));
&LOCALE_CHARSET
}