use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::ffi::{OsStr, OsString};
pub fn encode_path<P>(p: &P) -> Cow<str>
where P: AsRef<Path>
{
let p = p.as_ref();
if let Some(s) = p.to_str() {
if !should_be_encoded(s) {
return Cow::Borrowed(s);
}
}
Cow::Owned(encode_os(p.as_os_str()))
}
pub fn decode_path(encoded_path_string: &str) -> Result<PathBuf, base64::DecodeError>
{
if encoded_path_string.starts_with(PREFIX) {
let bytes = decode_bytes(encoded_path_string)?;
let os_str = decode_os(bytes);
Ok(PathBuf::from(os_str))
} else {
Ok(PathBuf::from(encoded_path_string))
}
}
#[cfg(windows)]
const PREFIX: &str = "::\\_";
#[cfg(not(windows))]
const PREFIX: &str = "/dev/null/b64_";
fn should_be_encoded(s: &str) -> bool {
s.chars().any(|c| c.is_control())
}
#[cfg(windows)]
fn encode_os(s: &OsStr) -> String {
use std::os::windows::ffi::OsStrExt;
let wide_chars = s.encode_wide().collect::<Vec<_>>();
let bytes = u16_slice_to_byte_array(&wide_chars);
encode_bytes(&bytes)
}
#[cfg(not(windows))]
fn encode_os(s: &OsStr) -> String {
use std::os::unix::ffi::OsStrExt;
let bytes = s.as_bytes();
encode_bytes(bytes)
}
fn encode_bytes(bytes: &[u8]) -> String {
let mut b64 = PREFIX.to_string();
base64::encode_config_buf(bytes, base64::STANDARD, &mut b64);
b64
}
fn decode_bytes(encoded_str: &str) -> Result<Vec<u8>, base64::DecodeError> {
let encoded_bytes = &encoded_str[PREFIX.len()..];
base64::decode_config(encoded_bytes, base64::STANDARD)
}
#[cfg(not(windows))]
pub(crate) fn decode_os(bytes: Vec<u8>) -> OsString {
use std::os::unix::ffi::OsStringExt;
OsString::from_vec(bytes)
}
#[cfg(windows)]
pub(crate) fn decode_os(bytes: Vec<u8>) -> OsString {
use std::os::windows::ffi::OsStringExt;
let mut wide_chars = Vec::with_capacity(bytes.len() / 2);
let mut i = 0;
while i < bytes.len() - 1 {
let wide = bytes_to_u16(bytes[i], bytes[i + 1]);
wide_chars.push(wide);
i += 2;
}
OsString::from_wide(&wide_chars)
}
#[cfg(windows)]
#[inline]
fn bytes_to_u16(b1: u8, b2: u8) -> u16 {
let result = ((b1 as u16) << 8) + b2 as u16;
result
}
#[cfg(windows)]
#[inline]
fn u16_to_bytes(value: u16) -> [u8; 2] {
let b1: u8 = ((value >> 8) & 0xff) as u8;
let b2: u8 = (value & 0xff) as u8;
return [b1, b2]
}
#[cfg(windows)]
fn u16_slice_to_byte_array(wides: &[u16]) -> Vec<u8> {
let mut bytes = Vec::with_capacity(wides.len() * 2);
for &wc in wides {
let a = u16_to_bytes(wc);
bytes.push(a[0]);
bytes.push(a[1]);
}
bytes
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::*;
#[cfg(unix)]
const INVALID_UTF8_BYTE_SEQUENCE: [u8; 6] = [0x48, 0x65, 0x6c, 0x6c, 0x6f, 0xc0];
#[cfg(windows)]
const INVALID_UTF16_BYTE_SEQUENCE: [u16; 7] = [0x48, 0x65, 0x6c, 0x6c, 0x6f, 0xd800, 0x48];
#[test]
fn for_utf8_which_does_not_need_encoding() {
let pb = PathBuf::new();
let s = encode_path(&pb);
assert_eq!(s, "", "Empty paths should be empty strings.");
let pb2 = decode_path(&s).unwrap();
assert_eq!(pb2, pb, "Empty paths should be round-trippable.");
let pb = PathBuf::from("hello");
let s = encode_path(&pb);
assert_eq!(s, "hello", "Valid UTF-8 paths without control chars should be encoded as-is.");
let pb2 = decode_path(&s).unwrap();
assert_eq!(pb2, pb, "Valid UTF-8 paths without control chars should be round-trippable.");
}
#[cfg(unix)]
#[test]
fn for_valid_utf8_needing_unix_encoding() {
let pb = PathBuf::from("hello\tworld");
let s = encode_path(&pb);
assert_eq!(s, format!("{}aGVsbG8Jd29ybGQ=", PREFIX), "Paths with control characters in them should be base-64 encoded.");
let pb2 = decode_path(&s).unwrap();
assert_eq!(pb2, pb, "Paths with control characters in them should be round-trippable.");
}
#[cfg(windows)]
#[test]
fn for_valid_utf8_needing_windows_encoding() {
let pb = PathBuf::from("hello\tworld");
let s = path_to_path_string(&pb);
assert_eq!(s, format!("{}AGgAZQBsAGwAbwAJAHcAbwByAGwAZA==", PREFIX), "Paths with control characters in them should be base-64 encoded.");
let pb2 = path_string_to_path_buf(&s);
assert_eq!(pb2, pb, "Paths with control characters in them should be round-trippable.");
}
#[cfg(unix)]
#[test]
fn for_invalid_utf8() {
let os = decode_os(INVALID_UTF8_BYTE_SEQUENCE.to_vec());
let pb = PathBuf::from(os);
let s = encode_path(&pb);
assert_eq!(s, format!("{}SGVsbG/A", PREFIX), "Invalid UTF-8 byte sequences should be base-64 encoded.");
let pb2 = decode_path(&s).unwrap();
assert_eq!(pb2, pb, "Invalid UTF-8 byte sequences should be round-trippable.");
}
#[cfg(windows)]
#[test]
fn for_invalid_utf16() {
let bytes = u16_slice_to_byte_array(&INVALID_UTF16_BYTE_SEQUENCE);
let os = decode_os(bytes);
let pb = PathBuf::from(os);
let s = encode_path(&pb);
assert_eq!(s, format!("{}AEgAZQBsAGwAb9gAAEg=", PREFIX), "Invalid UTF-16 byte sequences should be base-64 encoded.");
let pb2 = decode_path(&s);
assert_eq!(pb2, pb, "Invalid UTF-16 byte sequences should be round-trippable.");
}
#[cfg(unix)]
#[test]
fn decode_for_mangled_base64_returns_err() {
let mut s = encode_path(&"Hello\x11world").into_owned();
s.push('\t');
let decode_attempt = decode_path(&s);
assert!(decode_attempt.is_err(), "Tabs are not valid in Base-64 encoded strings, so we should get an error when decoding it.");
}
}