1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use std::borrow::Cow;
use std::str;

/// Exposes blocks of raw data, providing auxiliary functionality like
/// extracting substrings.
#[derive(Debug, Clone, Copy)]
pub struct RawData<'a> {
    data: &'a [u8],
}

impl<'a> std::ops::Deref for RawData<'a> {
    type Target = [u8];

    #[inline]
    fn deref(&self) -> &[u8] {
        self.data
    }
}

impl<'a> RawData<'a> {
    /// Creates a new object from raw memory reference.
    #[inline]
    pub fn new(data: &'a [u8]) -> Self {
        Self { data }
    }

    /// Reads a \0 terminated substring starting at specified offset.
    #[inline]
    pub fn substring(&self, start: usize) -> Result<&'a str, str::Utf8Error> {
        self.substring_with(start, str::from_utf8)
    }

    /// Reads a \0 terminated substring starting at specified offset, including invalid characters.
    #[inline]
    pub fn substring_lossy(&self, start: usize) -> Cow<'a, str> {
        self.substring_with(start, String::from_utf8_lossy)
    }

    /// Reads a \0 terminated substring starting at specified offset as raw bytes.
    #[inline]
    pub fn substring_raw(&self, start: usize) -> &'a [u8] {
        self.substring_with(start, std::convert::identity)
    }

    /// Reads a \0 terminated substring starting at specified offset without checking that the
    /// string contains valid UTF-8.
    ///
    /// # Safety
    /// Same as str::from_utf8_unchecked
    #[inline]
    pub unsafe fn substring_unchecked(&self, start: usize) -> &'a str {
        self.substring_with(start, |bytes| str::from_utf8_unchecked(bytes))
    }

    fn substring_with<T>(&self, start: usize, f: impl FnOnce(&'a [u8]) -> T) -> T {
        let suffix = &self.data[start..];
        match suffix.iter().position(|&c| c == 0) {
            Some(idx) => f(&suffix[..idx]),
            None => f(suffix),
        }
    }

    /// Converts RawData back into bytes.
    #[inline]
    pub fn as_bytes(&self) -> &'a [u8] {
        self.data
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty() {
        let data: &[u8] = b"";
        let raw_data = RawData::new(data);
        assert_eq!(raw_data.substring(0), Ok(""));
        assert_eq!(raw_data.substring_lossy(0), "");
        assert_eq!(raw_data.substring_raw(0), b"");
        assert_eq!(unsafe { raw_data.substring_unchecked(0) }, "");
    }

    #[test]
    fn last_without_terminator() {
        let data: &[u8] = b"abc";
        let raw_data = RawData::new(data);
        assert_eq!(raw_data.substring(1), Ok("bc"));
        assert_eq!(raw_data.substring_lossy(1), "bc");
        assert_eq!(raw_data.substring_raw(1), b"bc");
        assert_eq!(unsafe { raw_data.substring_unchecked(1) }, "bc");
    }

    #[test]
    fn until_terminator() {
        let data: &[u8] = b"ab\0c";
        let raw_data = RawData::new(data);
        assert_eq!(raw_data.substring(1), Ok("b"));
        assert_eq!(raw_data.substring_lossy(1), "b");
        assert_eq!(raw_data.substring_raw(1), b"b");
        assert_eq!(unsafe { raw_data.substring_unchecked(1) }, "b");
    }

    #[test]
    fn invalid_utf8() {
        let data: &[u8] = b"ab\xF0\x90\x80\0c";
        let raw_data = RawData::new(data);
        assert!(raw_data.substring(1).is_err());
        assert_eq!(raw_data.substring_lossy(1), "b�");
        assert_eq!(raw_data.substring_raw(1), b"b\xF0\x90\x80");
    }
}