goblin_experimental/
strtab.rs

1//! A byte-offset based string table.
2//! Commonly used in ELF binaries, Unix archives, and even PE binaries.
3
4use core::fmt;
5use core::ops::Index;
6use core::str;
7use scroll::{ctx, Pread};
8if_alloc! {
9    use crate::error;
10    use alloc::vec::Vec;
11}
12
13/// A common string table format which is indexed by byte offsets (and not
14/// member index). Constructed using [`parse`](#method.parse)
15/// with your choice of delimiter. Please be careful.
16pub struct Strtab<'a> {
17    delim: ctx::StrCtx,
18    bytes: &'a [u8],
19    #[cfg(feature = "alloc")]
20    strings: Vec<(usize, &'a str)>,
21}
22
23#[inline(always)]
24fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&str> {
25    bytes.pread_with::<&str>(offset, delim)
26}
27
28impl<'a> Strtab<'a> {
29    /// Creates a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
30    ///
31    /// NB: this does *not* preparse the string table, which can have non-optimal access patterns.
32    /// See <https://github.com/m4b/goblin/pull/275>
33    pub fn new(bytes: &'a [u8], delim: u8) -> Self {
34        Self::from_slice_unparsed(bytes, 0, bytes.len(), delim)
35    }
36
37    /// Returns the length of this `Strtab` in bytes
38    pub fn len(&self) -> usize {
39        self.bytes.len()
40    }
41
42    /// Creates a `Strtab` directly without bounds check and without parsing it.
43    ///
44    /// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled.
45    pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self {
46        Self {
47            delim: ctx::StrCtx::Delimiter(delim),
48            bytes: &bytes[offset..offset + len],
49            #[cfg(feature = "alloc")]
50            strings: Vec::new(),
51        }
52    }
53    /// Gets a str reference from the backing bytes starting at byte `offset`.
54    ///
55    /// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8.
56    /// Use this method if the `Strtab` was created using `from_slice_unparsed()`.
57    pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> {
58        if offset >= self.bytes.len() {
59            None
60        } else {
61            Some(get_str(offset, self.bytes, self.delim).unwrap())
62        }
63    }
64    #[cfg(feature = "alloc")]
65    /// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter.
66    ///
67    /// Errors if bytes are invalid UTF-8.
68    /// Requires `feature = "alloc"`
69    pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result<Self> {
70        let (end, overflow) = offset.overflowing_add(len);
71        if overflow || end > bytes.len() {
72            return Err(error::Error::Malformed(format!(
73                "Strtable size ({}) + offset ({}) is out of bounds for {} #bytes. Overflowed: {}",
74                len,
75                offset,
76                bytes.len(),
77                overflow
78            )));
79        }
80        let mut result = Self::from_slice_unparsed(bytes, offset, len, delim);
81        let mut i = 0;
82        while i < result.bytes.len() {
83            let string = get_str(i, result.bytes, result.delim)?;
84            result.strings.push((i, string));
85            i += string.len() + 1;
86        }
87        Ok(result)
88    }
89    #[cfg(feature = "alloc")]
90    /// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
91    ///
92    /// Requires `feature = "alloc"`
93    pub fn new_preparsed(bytes: &'a [u8], delim: u8) -> error::Result<Self> {
94        Self::parse(bytes, 0, bytes.len(), delim)
95    }
96    #[cfg(feature = "alloc")]
97    /// Converts the string table to a vector of parsed strings.
98    ///
99    /// Note: This method is used to check the parsed contents of `strtab`.
100    /// If you want to get the correct contents of `strtab` as `Vec`, use the following example.
101    ///
102    /// # Examples
103    /// ```rust
104    /// use goblin_experimental as goblin;
105    /// use goblin::error::Error;
106    ///
107    /// pub fn show_shdr_strtab(bytes: &[u8]) -> Result<(), Error> {
108    ///     let elf = goblin::elf::Elf::parse(&bytes)?;
109    ///
110    ///     for section in elf.section_headers {
111    ///         println!("{}", elf.shdr_strtab.get_at(section.sh_name).unwrap_or(""));
112    ///     }
113    ///
114    ///     Ok(())
115    /// }
116    /// ```
117    ///
118    /// Requires `feature = "alloc"`
119    pub fn to_vec(&self) -> error::Result<Vec<&'a str>> {
120        // Fallback in case `Strtab` was created using `from_slice_unparsed()`.
121        if self.strings.is_empty() {
122            let mut result = Vec::new();
123            let mut i = 0;
124            while i < self.bytes.len() {
125                let string = get_str(i, self.bytes, self.delim)?;
126                result.push(string);
127                i += string.len() + 1;
128            }
129            return Ok(result);
130        }
131        Ok(self.strings.iter().map(|&(_key, value)| value).collect())
132    }
133    #[cfg(feature = "alloc")]
134    /// Safely gets a str reference from the parsed table starting at byte `offset`.
135    ///
136    /// If the index is out of bounds, `None` is returned.
137    /// Requires `feature = "alloc"`
138    pub fn get_at(&self, offset: usize) -> Option<&'a str> {
139        match self
140            .strings
141            .binary_search_by_key(&offset, |&(key, _value)| key)
142        {
143            Ok(index) => Some(self.strings[index].1),
144            Err(index) => {
145                if index == 0 {
146                    return None;
147                }
148                let (string_begin_offset, entire_string) = self.strings[index - 1];
149                entire_string.get(offset - string_begin_offset..)
150            }
151        }
152    }
153    #[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")]
154    /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter
155    ///
156    /// # Safety
157    /// This function creates a `Strtab` directly from a raw pointer and size
158    pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> {
159        Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim)
160    }
161    #[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")]
162    #[cfg(feature = "alloc")]
163    /// Parses a str reference from the parsed table starting at byte `offset`.
164    ///
165    /// If the index is out of bounds, `None` is returned.
166    /// Requires `feature = "alloc"`
167    pub fn get(&self, offset: usize) -> Option<error::Result<&'a str>> {
168        if offset >= self.bytes.len() {
169            None
170        } else {
171            Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into))
172        }
173    }
174}
175
176impl<'a> fmt::Debug for Strtab<'a> {
177    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
178        f.debug_struct("Strtab")
179            .field("delim", &self.delim)
180            .field("bytes", &str::from_utf8(self.bytes))
181            .finish()
182    }
183}
184
185impl<'a> Default for Strtab<'a> {
186    fn default() -> Self {
187        Self {
188            delim: ctx::StrCtx::default(),
189            bytes: &[],
190            #[cfg(feature = "alloc")]
191            strings: Vec::new(),
192        }
193    }
194}
195
196impl<'a> Index<usize> for Strtab<'a> {
197    type Output = str;
198    /// Gets str reference at starting at byte `offset`.
199    /// **NB**: this will panic if the underlying bytes are not valid utf8, or the offset is invalid
200    #[inline(always)]
201    fn index(&self, offset: usize) -> &Self::Output {
202        // This can't delegate to get() because get() requires #[cfg(features = "alloc")]
203        // It's also slightly less useful than get() because the lifetime -- specified by the Index
204        // trait -- matches &self, even though we could return &'a instead
205        get_str(offset, self.bytes, self.delim).unwrap()
206    }
207}
208
209#[test]
210fn as_vec_no_final_null() {
211    let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta", 0x0).unwrap();
212    let vec = strtab.to_vec().unwrap();
213    assert_eq!(vec.len(), 4);
214    assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
215}
216
217#[test]
218fn as_vec_no_first_null_no_final_null() {
219    let strtab = Strtab::new_preparsed(b"printf\0memmove\0busta", 0x0).unwrap();
220    let vec = strtab.to_vec().unwrap();
221    assert_eq!(vec.len(), 3);
222    assert_eq!(vec, vec!["printf", "memmove", "busta"]);
223}
224
225#[test]
226fn to_vec_final_null() {
227    let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta\0", 0x0).unwrap();
228    let vec = strtab.to_vec().unwrap();
229    assert_eq!(vec.len(), 4);
230    assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
231}
232
233#[test]
234fn to_vec_newline_delim() {
235    let strtab = Strtab::new_preparsed(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap();
236    let vec = strtab.to_vec().unwrap();
237    assert_eq!(vec.len(), 4);
238    assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
239}
240
241#[test]
242fn parse_utf8() {
243    assert!(match Strtab::new_preparsed(&[0x80, 0x80], b'\n') {
244        Err(error::Error::Scroll(scroll::Error::BadInput {
245            size: 2,
246            msg: "invalid utf8",
247        })) => true,
248        _ => false,
249    });
250    assert!(
251        match Strtab::new_preparsed(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') {
252            Ok(_) => true,
253            _ => false,
254        }
255    );
256}
257
258#[test]
259fn get_at_utf8() {
260    let strtab = Strtab::new_preparsed("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap();
261    assert_eq!(strtab.get_at(0), Some(""));
262    assert_eq!(strtab.get_at(5), Some(""));
263    assert_eq!(strtab.get_at(6), Some("memmove"));
264    assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta"));
265    assert_eq!(strtab.get_at(16), None);
266    assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta"));
267    assert_eq!(strtab.get_at(21), Some("usta"));
268    assert_eq!(strtab.get_at(25), Some(""));
269    assert_eq!(strtab.get_at(26), None);
270}