utf8_cstr/
lib.rs

1#[cfg(test)]
2#[macro_use]
3extern crate assert_matches;
4
5use std::ffi::CStr;
6use std::str::Utf8Error;
7use std::convert::AsRef;
8use std::mem::transmute;
9use std::os::raw::c_char;
10use std::ops::Deref;
11use std::fmt;
12
13
14#[derive(Debug)]
15pub enum Utf8CStrError {
16    NoNulTerm,
17    EmbeddedNulTerm(usize),
18    Utf8Error(Utf8Error)
19}
20
21/**
22 * A wrapper that promises it's contents are null-terminated & validly utf-8 encoded
23 *
24 * Plain `std::ffi::CStr` only promises null termination, but some ffi (and other bits) require
25 * strings that are both valid utf8 and null terminated.
26 */
27#[derive(PartialEq, Eq)]
28pub struct Utf8CStr {
29    inner: CStr,
30}
31
32impl fmt::Debug for Utf8CStr {
33    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
34        let s : &str = self.as_ref();
35        write!(fmt, "{:?}", s)
36    }
37}
38
39impl fmt::Display for Utf8CStr {
40    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
41        let s : &str = self.as_ref();
42        write!(fmt, "{}", s)
43    }
44}
45
46impl Utf8CStr {
47    /// Construct a `Utf8CStr` with no checking.
48    ///
49    /// Unsafety:
50    ///
51    ///  - `'a` must be the correct lifetime
52    ///  - `v` must be nul terminated
53    ///  - `v` must be utf-8 encoded (for use in `&str`)
54    ///  - `l` must be the length of `v` including the nul terminator
55    pub unsafe fn from_raw_parts<'a>(v: *const c_char, l: usize) -> &'a Self {
56        transmute((v, l))
57    }
58
59    /// Construct a `Utf8CStr` with minimal checking.
60    ///
61    /// This currently will scan for the terminating '\0' just to establish the length for various
62    /// slices.
63    ///
64    /// Unsafety:
65    ///
66    ///  - `'a` must be the correct lifetime
67    ///  - `v` must be nul terminated
68    ///  - `v` must be utf-8 encoded (for use in `&str`)
69    pub unsafe fn from_ptr_unchecked<'a>(v: *const c_char) -> &'a Self {
70        Self::from_cstr_unchecked(CStr::from_ptr(v))
71    }
72
73    /// Failable convertion from a CStr.
74    ///
75    /// Verifies that the CStr is utf8 encoded.
76    ///
77    /// NOTE: Only handles non-mutable variants. We may want to accept &mut as well in the future.
78    pub fn from_cstr(v: &CStr) -> Result<&Self, Utf8Error> {
79        try!(v.to_str());
80        Ok(unsafe { transmute(v)})
81    }
82
83    /// Convert from a `&CStr` without any checking
84    ///
85    /// Unsafety:
86    ///
87    ///  - `v` must be valid utf8 for use in a `&str`
88    pub unsafe fn from_cstr_unchecked(v: &CStr) -> &Utf8CStr {
89        Self::from_bytes_with_nul_unchecked(v.to_bytes_with_nul())
90    }
91
92    /// Convert directly from bytes
93    ///
94    /// NOTE: right now this scans `b` a few times over. Ideally, we'd adjust it to only scan `b`
95    /// once.
96    pub fn from_bytes(b: &[u8]) -> Result<&Self, Utf8CStrError> {
97        // FIXME: use from_bytes_with_nul when stablized
98        for (l, &v) in b[0..b.len() - 1].iter().enumerate() {
99            if v == b'\0' {
100                return Err(Utf8CStrError::EmbeddedNulTerm(l));
101            }
102        }
103        if b[b.len() - 1] != b'\0' {
104            return Err(Utf8CStrError::NoNulTerm);
105        }
106
107        let c : &CStr = unsafe { transmute(b) };
108        Self::from_cstr(c).map_err(|e| Utf8CStrError::Utf8Error(e))
109    }
110
111    /// Raw convertion from basic data type with no checking.
112    pub unsafe fn from_bytes_with_nul_unchecked(b: &[u8]) -> &Self {
113        transmute(b)
114    }
115
116    pub fn as_ptr(&self) -> *const c_char {
117        let v : &CStr = self.as_ref();
118        v.as_ptr()
119    }
120
121}
122
123impl AsRef<str> for Utf8CStr {
124    fn as_ref(&self) -> &str {
125        unsafe { transmute(self.inner.to_bytes()) }
126    }
127}
128
129impl AsRef<CStr> for Utf8CStr {
130    fn as_ref(&self) -> &CStr {
131        unsafe { transmute(self) }
132    }
133}
134
135impl Deref for Utf8CStr {
136    type Target = str;
137    fn deref(&self) -> &Self::Target {
138        self.as_ref()
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use super::Utf8CStr;
145    use super::Utf8CStrError;
146    #[test]
147    fn it_works() {
148        let x = Utf8CStr::from_bytes(b"hello\0").unwrap();
149        assert_matches!(Utf8CStr::from_bytes(b"hell").err().unwrap(), Utf8CStrError::NoNulTerm);
150        assert_matches!(Utf8CStr::from_bytes(b"hell\0d").err().unwrap(), Utf8CStrError::EmbeddedNulTerm(4));
151        assert_matches!(Utf8CStr::from_bytes(&[8,1,23,4,0xff, 0]).err().unwrap(), Utf8CStrError::Utf8Error(_));
152
153
154        println!("{:?}", x);
155        println!("{}", x);
156
157        assert_eq!(x, x);
158        assert!(x != Utf8CStr::from_bytes(b"hell\0").unwrap());
159
160        let v = b"hello\0";
161        let b = unsafe { Utf8CStr::from_raw_parts(v.as_ptr() as *const _, v.len()) };
162        assert_eq!(b, x);
163    }
164}