1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
use std::borrow::{Borrow, Cow};
use std::ops::Deref;
use std::str::Utf8Error;
/// OsStr, but specifically for Linux (since we aren't always processing native dumps).
#[derive(Debug, PartialOrd, Ord, Eq, PartialEq)]
pub struct LinuxOsStr([u8]);
/// OsString, but specifically for Linux (since we aren't always processing native dumps).
#[derive(Default, Debug, PartialOrd, Ord, Eq, PartialEq, Clone)]
pub struct LinuxOsString(Vec<u8>);
impl LinuxOsStr {
pub fn new() -> &'static Self {
Self::from_bytes(b"")
}
pub fn from_bytes(inner: &[u8]) -> &Self {
// This is the idiom std uses for creating a type that wraps a slice.
// Yes, there really isn't a way to do this without unsafe. No, it's
// not at all a safety concern.
unsafe { &*(inner as *const [u8] as *const LinuxOsStr) }
}
pub fn as_bytes(&self) -> &[u8] {
&self.0
}
/// Tries to interpret the LinuxOsStr as a utf8 str.
///
/// While linux OsStrs are "arbitrary bytes" in general, there are often
/// parts that are known to be utf8 (ascii even).
///
/// For instance, when parsing /proc/self/mem, most of the line is ascii
/// like "r-xp" or "1a23-4fe2". However the "path" at the end of each line
/// is a true LinuxOsStr and may not be proper utf8.
pub fn to_str(&self) -> Result<&str, Utf8Error> {
std::str::from_utf8(self)
}
/// Converts to a utf8 string lossily (uses the usual std lossy algorithm).
pub fn to_string_lossy(&self) -> Cow<str> {
// Ok so this is the end of the line for dancing around and acting
// like we can just be fine with Linux OS strings being arbitrary bags
// of bytes. We need some way to print this value in a reasonable way,
// and the best precedent I can find for that is std::Path::display.
// This wraps the a Path (which is just an OsStr) and provides a
// Display impl.
//
// What does this Display impl do..?
//
// It just calls from_utf8_lossy.
//
// Whelp. Ok.
//
// (Strictly speaking it wraps it up in the internal/unstable
// Utf8Lossy iterator so it avoids the allocation, but we don't
// have that luxury, so we might as well make the allocation/conversion
// explicit.)
String::from_utf8_lossy(self.as_bytes())
}
// ~Copies of a bunch of string APIs since [u8] doesn't have them (reasonably)
pub fn split_once(&self, separator: u8) -> Option<(&LinuxOsStr, &LinuxOsStr)> {
self.iter().position(|&b| b == separator).map(|idx| {
(
Self::from_bytes(&self[..idx]),
Self::from_bytes(&self[idx + 1..]),
)
})
}
pub fn rsplit_once(&self, separator: u8) -> Option<(&LinuxOsStr, &LinuxOsStr)> {
self.iter().rposition(|&b| b == separator).map(|idx| {
(
Self::from_bytes(&self[..idx]),
Self::from_bytes(&self[idx + 1..]),
)
})
}
pub fn split(&self, separator: u8) -> impl Iterator<Item = &LinuxOsStr> {
self.as_bytes()
.split(move |&b| b == separator)
.map(LinuxOsStr::from_bytes)
}
pub fn split_ascii_whitespace(&self) -> impl Iterator<Item = &LinuxOsStr> {
// Quick and dirty impl: just split on every individual whitespace
// char but discard all the empty substrings.
self.as_bytes()
.split(|b| b.is_ascii_whitespace())
.filter(|slice| !slice.is_empty())
.map(LinuxOsStr::from_bytes)
}
pub fn lines(&self) -> impl Iterator<Item = &LinuxOsStr> {
// Intentionally doesn't mess around with stuff like \r
// since we're processing files generated by the OS, but maybe
// this will be a problem later?
self.split(b'\n')
}
pub fn trim_ascii_whitespace(&self) -> &LinuxOsStr {
let input = self.as_bytes();
let mut first = None;
let mut last = None;
// Find first non-whitespace index
for (i, &c) in input.iter().enumerate() {
if !c.is_ascii_whitespace() {
first = Some(i);
break;
}
}
// Find last non-whitespace index
for (i, &c) in input.iter().enumerate().rev() {
if !c.is_ascii_whitespace() {
last = Some(i);
break;
}
}
if let (Some(first), Some(last)) = (first, last) {
Self::from_bytes(&input[first..=last])
} else {
// string was entirely whitespace, return an empty string starting
// at its position (so that it's still strictly a substring).
Self::from_bytes(&input[0..0])
}
}
}
impl LinuxOsString {
/// Create a new LinuxOsString from an array of bytes.
pub fn from_vec(vec: Vec<u8>) -> Self {
Self(vec)
}
pub fn new() -> Self {
Self(Vec::new())
}
pub fn as_os_str(&self) -> &LinuxOsStr {
self
}
}
impl Borrow<LinuxOsStr> for LinuxOsString {
fn borrow(&self) -> &LinuxOsStr {
LinuxOsStr::from_bytes(&self.0)
}
}
impl ToOwned for LinuxOsStr {
type Owned = LinuxOsString;
fn to_owned(&self) -> LinuxOsString {
LinuxOsString::from_vec(self.0.to_owned())
}
}
impl Deref for LinuxOsString {
type Target = LinuxOsStr;
fn deref(&self) -> &LinuxOsStr {
LinuxOsStr::from_bytes(&self.0)
}
}
impl Deref for LinuxOsStr {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.0
}
}