#![doc = include_str!("../README.md")]
#[cfg(unix)]
use std::ffi::OsStr;
const LOCALE_ENVIRONMENT_VARIABLES: &[&str] = &["LC_ALL", "LC_CTYPE", "LANG"];
#[cfg(unix)]
enum LocaleSignal {
Unknown,
UTF8,
ASCII,
NonUTF8,
}
#[cfg(unix)]
fn strstr_ignore_case(haystack: &OsStr, needle: &[u8]) -> bool {
use std::os::unix::ffi::OsStrExt;
debug_assert!(
needle.iter().all(|c| c.to_ascii_lowercase() == *c),
"needle must contain only ASCII lowercase characters"
);
let hlen = haystack.len();
let nlen = needle.len();
if hlen < nlen {
return false;
}
let haystack = haystack.as_bytes();
'outer: for i in 0..(hlen - nlen + 1) as usize {
if haystack[i].to_ascii_lowercase() == needle[0] {
for j in 1..nlen {
if haystack[i + j].to_ascii_lowercase() != needle[j] {
continue 'outer;
}
}
return true;
}
}
false
}
#[cfg(unix)]
fn get_locale_signal(value: &OsStr) -> LocaleSignal {
if value.is_empty() {
LocaleSignal::Unknown
} else if value == "C" || value == "c" || value == "POSIX" || value == "posix" {
LocaleSignal::ASCII
} else if strstr_ignore_case(value, b"utf-8") || strstr_ignore_case(value, b"utf8") {
LocaleSignal::UTF8
} else {
LocaleSignal::NonUTF8
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum Utf8Support {
#[default]
Unknown,
ASCII,
Latin1,
UTF8,
Other,
}
pub fn utf8_supported() -> Utf8Support {
#[cfg(unix)]
return utf8_supported_unix();
#[cfg(windows)]
return utf8_supported_windows();
#[cfg(not(any(unix, windows)))]
return Utf8Support::Unset;
}
#[cfg(unix)]
fn utf8_supported_unix() -> Utf8Support {
let mut signal = Utf8Support::Unknown;
for &var in LOCALE_ENVIRONMENT_VARIABLES {
let locale = std::env::var_os(var).unwrap_or_default();
match get_locale_signal(locale.as_os_str()) {
LocaleSignal::UTF8 => return Utf8Support::UTF8,
LocaleSignal::ASCII => return Utf8Support::ASCII,
LocaleSignal::NonUTF8 => signal = Utf8Support::Other,
LocaleSignal::Unknown => {}
}
}
signal
}
#[cfg(windows)]
fn utf8_supported_windows() -> Utf8Support {
use windows_sys::Win32::System::Console::*;
match unsafe { GetConsoleOutputCP() } {
65001 => Utf8Support::UTF8,
20127 => Utf8Support::ASCII,
1252 => Utf8Support::Latin1,
0 => Utf8Support::Unknown,
437 => Utf8Support::Other,
_ => Utf8Support::Other,
}
}
pub trait CommandUtf8Ext {
#[cfg(unix)]
fn set_c_locale(&mut self);
}
impl CommandUtf8Ext for std::process::Command {
#[cfg(unix)]
fn set_c_locale(&mut self) {
self.env("LANG", "C");
self.env("LC_ALL", "C");
self.env("LC_CTYPE", "C");
}
}
#[derive(Debug, Default)]
#[cfg(windows)]
pub struct CodePageHandle(u32, u32);
#[cfg(windows)]
impl Drop for CodePageHandle {
fn drop(&mut self) {
use windows_sys::Win32::System::Console::*;
unsafe {
if self.0 != 0 {
SetConsoleOutputCP(self.0);
}
if self.1 != 0 {
SetConsoleCP(self.1);
}
}
}
}
#[must_use = "The returned CodePageHandle resets the console code page to the original value when dropped"]
#[cfg(windows)]
pub fn set_console_utf8() -> Result<CodePageHandle, std::io::Error> {
use windows_sys::Win32::Globalization::*;
use windows_sys::Win32::System::Console::*;
unsafe {
let original_output_cp = GetConsoleOutputCP();
let original_input_cp = GetConsoleCP();
if IsValidCodePage(65001) != 0 {
SetConsoleOutputCP(65001);
SetConsoleCP(65001);
} else {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"UTF-8 codepage (65001) is not available",
));
}
Ok(CodePageHandle(original_output_cp, original_input_cp))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(unix)]
#[test]
fn test_strstr() {
assert!(strstr_ignore_case(OsStr::new("UTF-8"), b"utf-8"));
assert!(strstr_ignore_case(OsStr::new("XUTF-8"), b"utf-8"));
assert!(strstr_ignore_case(OsStr::new("UTF-8X"), b"utf-8"));
assert!(strstr_ignore_case(OsStr::new("utf-8x"), b"utf-8"));
assert!(strstr_ignore_case(OsStr::new("utf-8"), b"utf-8"));
assert!(strstr_ignore_case(OsStr::new("xutf-8"), b"utf-8"));
assert!(!strstr_ignore_case(OsStr::new("UTF-16"), b"utf-8"));
assert!(!strstr_ignore_case(OsStr::new("utf16"), b"utf-8"));
assert!(!strstr_ignore_case(OsStr::new("utf8"), b"utf-8"));
}
}