1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::borrow::Cow;
use std::ffi::CStr;
use std::str;
pub struct WasiString<'str>(Cow<'str, str>);
impl<'str> WasiString<'str> {
pub fn from_maybe_nonutf8_cstr(cstr: &'str CStr) -> Self {
let bytes = cstr.to_bytes();
if let Ok(s) = str::from_utf8(bytes) {
return Self(Cow::Borrowed(s));
}
Self::from_nonutf8_cstr(bytes)
}
fn from_nonutf8_cstr(bytes: &[u8]) -> Self {
let mut data = String::new();
data.push('\u{feff}');
let mut input = bytes;
loop {
match std::str::from_utf8(input) {
Ok(valid) => {
data.push_str(valid);
break;
}
Err(error) => {
let (valid, after_valid) = input.split_at(error.valid_up_to());
unsafe { data.push_str(str::from_utf8_unchecked(valid)) }
data.push('\u{FFFD}');
if let Some((_, remaining)) = after_valid.split_first() {
input = remaining;
} else {
break;
}
}
}
}
data.push('\0');
let mut input = bytes;
loop {
match std::str::from_utf8(input) {
Ok(valid) => {
data.push_str(valid);
break;
}
Err(error) => {
let (valid, after_valid) = input.split_at(error.valid_up_to());
unsafe { data.push_str(str::from_utf8_unchecked(valid)) }
if let Some((byte, remaining)) = after_valid.split_first() {
data.push('\0');
data.push((byte & 0x7f) as char);
input = remaining;
} else {
break;
}
}
}
}
Self(Cow::Owned(data))
}
pub fn as_str(&self) -> &str {
&self.0
}
}
#[test]
fn valid_utf8() {
assert_eq!(
WasiString::from_maybe_nonutf8_cstr(CStr::from_bytes_with_nul(b"\0").unwrap()).as_str(),
""
);
assert_eq!(
WasiString::from_maybe_nonutf8_cstr(CStr::from_bytes_with_nul(b"foo\0").unwrap()).as_str(),
"foo"
);
}
#[test]
fn not_utf8() {
assert_eq!(
WasiString::from_maybe_nonutf8_cstr(CStr::from_bytes_with_nul(b"\xfe\0").unwrap()).as_str(),
"\u{feff}\u{fffd}\0\0\u{7e}"
);
assert_eq!(
WasiString::from_maybe_nonutf8_cstr(CStr::from_bytes_with_nul(b"\xc0\xff\0").unwrap())
.as_str(),
"\u{feff}\u{fffd}\u{fffd}\0\0\u{40}\0\u{7f}"
);
assert_eq!(
WasiString::from_maybe_nonutf8_cstr(CStr::from_bytes_with_nul(b"\xef\xbb\xbf\0").unwrap())
.as_str(),
"\u{feff}"
);
assert_eq!(
WasiString::from_maybe_nonutf8_cstr(
CStr::from_bytes_with_nul(b"\xef\xbb\xbf\xfd\0").unwrap()
)
.as_str(),
"\u{feff}\u{feff}\u{fffd}\0\u{feff}\0\x7d"
);
}