use std::{ffi, str};
pub struct PosixString {
data: ffi::CString,
}
impl PosixString {
pub fn from_path_bytes(bytes: &[u8]) -> Result<Self, ()> {
let s = str::from_utf8(bytes).map_err(|_| ())?;
Self::from_path_str(s)
}
pub fn from_path_str(s: &str) -> Result<Self, ()> {
match ffi::CString::new(s) {
Ok(cstr) => Ok(Self { data: cstr }),
Err(e) => Self::from_arf(s, e.nul_position()),
}
}
#[cold]
fn from_arf(s: &str, nul: usize) -> Result<Self, ()> {
if !s.starts_with('\u{feff}') {
return Err(());
}
let mut lossy = s.bytes().skip('\u{feff}'.len_utf8());
let mut nul_escaped = s.bytes().skip(nul + 1);
let mut any_invalid_bytes = false;
let mut vec = Vec::new();
while let Some(b) = nul_escaped.next() {
if b == b'\0' {
let more = nul_escaped.next().ok_or(())?;
if (more & 0x80) != 0 {
return Err(());
}
let l0 = lossy.next().ok_or(())?;
let l1 = lossy.next().ok_or(())?;
let l2 = lossy.next().ok_or(())?;
if [l0, l1, l2] != [0xef, 0xbf, 0xbd] {
return Err(());
}
any_invalid_bytes = true;
vec.push(more | 0x80);
} else {
if lossy.next() != Some(b) {
return Err(());
}
vec.push(b);
}
}
if !any_invalid_bytes {
return Err(());
}
if lossy.next() != Some(b'\0') {
return Err(());
}
Ok(Self {
data: unsafe { ffi::CString::from_vec_unchecked(vec) },
})
}
pub fn as_cstr(&self) -> &ffi::CStr {
&self.data
}
}
#[test]
fn utf8_inputs() {
assert_eq!(
PosixString::from_path_str("").unwrap().as_cstr().to_bytes(),
b""
);
assert_eq!(
PosixString::from_path_str("f")
.unwrap()
.as_cstr()
.to_bytes(),
b"f"
);
assert_eq!(
PosixString::from_path_str("foo")
.unwrap()
.as_cstr()
.to_bytes(),
b"foo"
);
assert_eq!(
PosixString::from_path_str("\u{fffd}")
.unwrap()
.as_cstr()
.to_bytes(),
"\u{fffd}".as_bytes()
);
assert_eq!(
PosixString::from_path_str("\u{fffd}foo")
.unwrap()
.as_cstr()
.to_bytes(),
"\u{fffd}foo".as_bytes()
);
assert_eq!(
PosixString::from_path_str("\u{feff}foo")
.unwrap()
.as_cstr()
.to_bytes(),
"\u{feff}foo".as_bytes()
);
}
#[test]
fn arf_inputs() {
assert_eq!(
PosixString::from_path_str("\u{feff}hello\u{fffd}world\0hello\0\x05world")
.unwrap()
.as_cstr()
.to_bytes(),
b"hello\x85world"
);
assert_eq!(
PosixString::from_path_str("\u{feff}hello\u{fffd}\0hello\0\x05")
.unwrap()
.as_cstr()
.to_bytes(),
b"hello\x85"
);
}
#[test]
fn errors_from_bytes() {
assert!(PosixString::from_path_bytes(b"\xfe").is_err());
assert!(PosixString::from_path_bytes(b"\xc0\xff").is_err());
}
#[test]
fn errors_from_str() {
assert!(PosixString::from_path_str("\u{feff}hello world\0hello world").is_err());
assert!(PosixString::from_path_str("\u{feff}hello world\0\0hello world\0").is_err());
assert!(
PosixString::from_path_str("\u{feff}hello\u{fffd}world\0\0hello\0\x05world\0").is_err()
);
assert!(PosixString::from_path_str("\u{fffe}hello\u{fffd}world\0hello\0\x05world").is_err());
assert!(PosixString::from_path_str("\u{feff}hello\u{fffd}\0hello\0").is_err());
}