pub fn needs_utf8_flag(name: &str) -> bool {
!name.is_ascii()
}
pub fn decode_name(bytes: &[u8], utf8_flag: bool) -> String {
if utf8_flag {
return String::from_utf8_lossy(bytes).into_owned();
}
if bytes.is_ascii() {
return String::from_utf8_lossy(bytes).into_owned();
}
if let Ok(s) = std::str::from_utf8(bytes) {
return s.to_string();
}
for enc in [encoding_rs::SHIFT_JIS, encoding_rs::EUC_JP] {
let (cow, _, had_errors) = enc.decode(bytes);
if !had_errors {
return cow.into_owned();
}
}
let (cow, _, _) = encoding_rs::ISO_8859_15.decode(bytes);
cow.into_owned()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ascii_is_unflagged() {
assert!(!needs_utf8_flag("hello.txt"));
assert!(needs_utf8_flag("日本語.txt"));
}
#[test]
fn utf8_flag_trusts_bytes() {
let n = "café.txt";
assert_eq!(decode_name(n.as_bytes(), true), n);
}
#[test]
fn shift_jis_without_flag_round_trips() {
let (sjis, _, err) = encoding_rs::SHIFT_JIS.encode("ソ.txt");
assert!(!err);
assert_eq!(decode_name(&sjis, false), "ソ.txt");
}
#[test]
fn ascii_without_flag_is_verbatim() {
assert_eq!(decode_name(b"plain.txt", false), "plain.txt");
}
}