1#![doc = include_str!("../README.md")]
2
3#[cfg(unix)]
4use std::ffi::OsStr;
5
6const LOCALE_ENVIRONMENT_VARIABLES: &[&str] = &["LC_ALL", "LC_CTYPE", "LANG"];
7
8#[cfg(unix)]
9enum LocaleSignal {
10 Unknown,
11 UTF8,
12 ASCII,
13 NonUTF8,
14}
15
16#[cfg(unix)]
17fn strstr_ignore_case(haystack: &OsStr, needle: &[u8]) -> bool {
18 use std::os::unix::ffi::OsStrExt;
19
20 debug_assert!(
21 needle.iter().all(|c| c.to_ascii_lowercase() == *c),
22 "needle must contain only ASCII lowercase characters"
23 );
24
25 let hlen = haystack.len();
26 let nlen = needle.len();
27 if hlen < nlen {
28 return false;
29 }
30 let haystack = haystack.as_bytes();
31
32 'outer: for i in 0..(hlen - nlen + 1) as usize {
33 if haystack[i].to_ascii_lowercase() == needle[0] {
34 for j in 1..nlen {
35 if haystack[i + j].to_ascii_lowercase() != needle[j] {
36 continue 'outer;
37 }
38 }
39 return true;
40 }
41 }
42 false
43}
44
45#[cfg(unix)]
46fn get_locale_signal(value: &OsStr) -> LocaleSignal {
47 if value.is_empty() {
48 LocaleSignal::Unknown
49 } else if value == "C" || value == "c" || value == "POSIX" || value == "posix" {
50 LocaleSignal::ASCII
51 } else if strstr_ignore_case(value, b"utf-8") || strstr_ignore_case(value, b"utf8") {
52 LocaleSignal::UTF8
53 } else {
54 LocaleSignal::NonUTF8
55 }
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
59pub enum Utf8Support {
60 #[default]
62 Unknown,
63 ASCII,
65 Latin1,
67 UTF8,
69 Other,
71}
72
73pub fn utf8_supported() -> Utf8Support {
75 #[cfg(unix)]
76 return utf8_supported_unix();
77 #[cfg(windows)]
78 return utf8_supported_windows();
79 #[cfg(not(any(unix, windows)))]
80 return Utf8Support::Unset;
81}
82
83#[cfg(unix)]
86fn utf8_supported_unix() -> Utf8Support {
87 let mut signal = Utf8Support::Unknown;
88 for &var in LOCALE_ENVIRONMENT_VARIABLES {
89 let locale = std::env::var_os(var).unwrap_or_default();
90 match get_locale_signal(locale.as_os_str()) {
91 LocaleSignal::UTF8 => return Utf8Support::UTF8,
92 LocaleSignal::ASCII => return Utf8Support::ASCII,
93 LocaleSignal::NonUTF8 => signal = Utf8Support::Other,
94 LocaleSignal::Unknown => {}
95 }
96 }
97 signal
98}
99
100#[cfg(windows)]
101fn utf8_supported_windows() -> Utf8Support {
102 use windows_sys::Win32::System::Console::*;
103 match unsafe { GetConsoleOutputCP() } {
104 65001 => Utf8Support::UTF8,
105 20127 => Utf8Support::ASCII,
106 1252 => Utf8Support::Latin1,
107 0 => Utf8Support::Unknown,
108 437 => Utf8Support::Other,
110 _ => Utf8Support::Other,
111 }
112}
113
114pub trait CommandUtf8Ext {
116 #[cfg(unix)]
118 fn set_c_locale(&mut self);
119}
120
121impl CommandUtf8Ext for std::process::Command {
122 #[cfg(unix)]
123 fn set_c_locale(&mut self) {
124 self.env("LANG", "C");
125 self.env("LC_ALL", "C");
126 self.env("LC_CTYPE", "C");
127 }
128}
129
130#[derive(Debug, Default)]
131#[cfg(windows)]
132pub struct CodePageHandle(u32, u32);
133
134#[cfg(windows)]
135impl Drop for CodePageHandle {
136 fn drop(&mut self) {
137 use windows_sys::Win32::System::Console::*;
138 unsafe {
139 if self.0 != 0 {
140 SetConsoleOutputCP(self.0);
141 }
142 if self.1 != 0 {
143 SetConsoleCP(self.1);
144 }
145 }
146 }
147}
148
149#[must_use = "The returned CodePageHandle resets the console code page to the original value when dropped"]
163#[cfg(windows)]
164pub fn set_console_utf8() -> Result<CodePageHandle, std::io::Error> {
165 use windows_sys::Win32::Globalization::*;
166 use windows_sys::Win32::System::Console::*;
167
168 unsafe {
169 let original_output_cp = GetConsoleOutputCP();
170 let original_input_cp = GetConsoleCP();
171
172 if IsValidCodePage(65001) != 0 {
174 SetConsoleOutputCP(65001);
175 SetConsoleCP(65001);
176 } else {
177 return Err(std::io::Error::new(
178 std::io::ErrorKind::Other,
179 "UTF-8 codepage (65001) is not available",
180 ));
181 }
182
183 Ok(CodePageHandle(original_output_cp, original_input_cp))
184 }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190
191 #[cfg(unix)]
192 #[test]
193 fn test_strstr() {
194 assert!(strstr_ignore_case(OsStr::new("UTF-8"), b"utf-8"));
195 assert!(strstr_ignore_case(OsStr::new("XUTF-8"), b"utf-8"));
196 assert!(strstr_ignore_case(OsStr::new("UTF-8X"), b"utf-8"));
197 assert!(strstr_ignore_case(OsStr::new("utf-8x"), b"utf-8"));
198 assert!(strstr_ignore_case(OsStr::new("utf-8"), b"utf-8"));
199 assert!(strstr_ignore_case(OsStr::new("xutf-8"), b"utf-8"));
200 assert!(!strstr_ignore_case(OsStr::new("UTF-16"), b"utf-8"));
201 assert!(!strstr_ignore_case(OsStr::new("utf16"), b"utf-8"));
202 assert!(!strstr_ignore_case(OsStr::new("utf8"), b"utf-8"));
203 }
204}