linux_libc_auxv/
parser.rs

1/*
2MIT License
3
4Copyright (c) 2021 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24use crate::cstr_util::c_str_len_ptr;
25use crate::{AuxVar, AuxVarSerialized, AuxVarType};
26use core::fmt::Debug;
27use core::marker::PhantomData;
28
29/// Wrapper around a slice of data, that represents the data structure that Linux passes to the
30/// libc on program startup. Usually this is a struct from `rsp` (stack pointer) to `x`. It is no
31/// problem, if you pass for example a slice with 10000 bytes to it, because it will automatically
32/// stop, when the end of the data structure is found. Hence, if the data structure is valid,
33/// invalid memory (above the stack) will never be accessed.
34///
35/// It contains `argc`, `argv`, `envv`, and the auxiliary vector along with additional referenced
36/// payload. The data structure is right above the stack. The initial stack pointer points
37/// to `argc`. See <https://lwn.net/Articles/631631/> for more info.
38///
39/// Instances are created via `InitialLinuxLibcStackLayout::from::<[u8>]`.
40#[derive(Debug)]
41pub struct InitialLinuxLibcStackLayout<'a> {
42    bytes: &'a [u8],
43}
44
45impl<'a> From<&'a [u8]> for InitialLinuxLibcStackLayout<'a> {
46    /// Creates a new [`InitialLinuxLibcStackLayout`].
47    fn from(bytes: &'a [u8]) -> Self {
48        Self { bytes }
49    }
50}
51
52impl<'a> InitialLinuxLibcStackLayout<'a> {
53    /// Returns the number of arguments.
54    #[allow(clippy::missing_const_for_fn)]
55    pub fn argc(&self) -> usize {
56        unsafe { *self.bytes.as_ptr().cast() }
57    }
58
59    /// Returns the number of environment variables.
60    pub fn envc(&self) -> usize {
61        self.envv_ptr_iter().count()
62    }
63
64    /// Returns the pointer to the begin of argv array.
65    fn get_argv_ptr(&self) -> *const *const u8 {
66        // + 1: skip argc
67        let ptr = unsafe { self.bytes.as_ptr().cast::<u64>().add(1) };
68        // C-str array: array of pointers => pointer to pointer to bytes of c-str
69        ptr as *const *const u8
70    }
71
72    /// Iterates over the C-string arguments. See [`CstrIter`].
73    /// This is unsafe, because it will result in segfaults/page faults or invalid memory
74    /// being read, if the pointers are not valid in the address space of the caller.
75    ///
76    /// # Safety
77    /// This function produces UB (page fault, seg fault, read invalid memory), if the referenced
78    /// pointers are not valid inside the address space of the caller.
79    pub unsafe fn argv_iter(&self) -> CstrIter {
80        CstrIter::new(self.get_argv_ptr())
81    }
82
83    /// Iterates only over the pointers of the C-string arguments. See [`NullTerminatedArrIter`].
84    /// This is always memory-safe even if the pointers are created for another address space,
85    /// because no pointers are dereference by this iterator.
86    pub fn argv_ptr_iter(&self) -> NullTerminatedArrIter {
87        NullTerminatedArrIter {
88            ptr: self.get_argv_ptr(),
89        }
90    }
91
92    /// Returns the pointer to the beginning of the envp array.
93    fn get_envv_ptr(&self) -> *const *const u8 {
94        unsafe {
95            self.get_argv_ptr()
96                .add(self.argc())
97                // final null ptr after the envv (+ 8 bytes)
98                .add(1)
99        }
100    }
101
102    /// Iterates over all environment variables. See [`CstrIter`].
103    /// This is unsafe, because it will result in segfaults/page faults or invalid memory
104    /// being read, if the pointers are not valid in the address space of the caller.
105    ///
106    /// # Safety
107    /// This function produces UB (page fault, seg fault, read invalid memory), if the referenced
108    /// pointers are not valid inside the address space of the caller.
109    pub unsafe fn envv_iter(&self) -> CstrIter {
110        CstrIter::new(self.get_envv_ptr())
111    }
112
113    /// Iterates only over the pointers to the environment variables. See [`NullTerminatedArrIter`].
114    /// This is always memory-safe even if the pointers are created for another address space,
115    /// because no pointers are dereference by this iterator.
116    pub fn envv_ptr_iter(&self) -> NullTerminatedArrIter {
117        NullTerminatedArrIter {
118            ptr: self.get_envv_ptr(),
119        }
120    }
121
122    /// Iterates over all entries in the auxiliary vector. See [`AuxVarIter`].
123    /// This is unsafe, because it will result in segfaults/page faults or invalid memory
124    /// being read, if the pointers are not valid in the address space of the caller.
125    ///
126    /// # Safety
127    /// This function produces UB (page fault, seg fault, read invalid memory), if the referenced
128    /// pointers are not valid inside the address space of the caller.
129    pub unsafe fn aux_var_iter(&self) -> AuxVarIter {
130        AuxVarIter::new(self.aux_serialized_iter())
131    }
132
133    /// Iterates over all entries in the auxiliary vector. See [`AuxVarSerializedIter`].
134    /// This is always memory-safe even if the pointers are created for another address space,
135    /// because no pointers are dereference by this iterator.
136    pub fn aux_serialized_iter(&self) -> AuxVarSerializedIter {
137        AuxVarSerializedIter::new(self.get_auxv_ptr())
138    }
139
140    /// Returns the pointer to the beginning of aux variables.
141    fn get_auxv_ptr(&self) -> *const AuxVarSerialized {
142        unsafe {
143            self.get_envv_ptr()
144                // skip all ENV values
145                .add(self.envv_ptr_iter().count())
146                // final null ptr after the envv (+ 8 bytes)
147                .add(1)
148                .cast()
149        }
150    }
151}
152
153/// Iterator that iterates over an array of pointers, that is terminated by a null pointer.
154/// Useful to find all entries of a typical C-string array.
155/// It only returns the pointer itself but doesn't dereferences the data.
156#[derive(Debug)]
157pub struct NullTerminatedArrIter {
158    ptr: *const *const u8,
159}
160
161impl Iterator for NullTerminatedArrIter {
162    type Item = *const u8;
163
164    fn next(&mut self) -> Option<Self::Item> {
165        if unsafe { (*self.ptr).is_null() } {
166            None
167        } else {
168            let c_str_ptr = unsafe { *self.ptr };
169            // + 8 bytes: to next array entry
170            self.ptr = unsafe { self.ptr.add(1) };
171            Some(c_str_ptr)
172        }
173    }
174}
175
176/// Iterator that iterates over an array of null terminated C-strings.
177#[derive(Debug)]
178pub struct CstrIter<'a> {
179    arr_iter: NullTerminatedArrIter,
180    _marker: PhantomData<&'a ()>,
181}
182
183impl<'a> CstrIter<'a> {
184    unsafe fn new(ptr: *const *const u8) -> Self {
185        Self {
186            arr_iter: NullTerminatedArrIter { ptr },
187            _marker: PhantomData::default(),
188        }
189    }
190}
191
192impl<'a> Iterator for CstrIter<'a> {
193    type Item = &'a str;
194
195    fn next(&mut self) -> Option<Self::Item> {
196        self.arr_iter.next().map(|c_str_ptr| {
197            // + null byte
198            let c_str_bytes =
199                unsafe { core::slice::from_raw_parts(c_str_ptr, c_str_len_ptr(c_str_ptr) + 1) };
200            unsafe { core::str::from_utf8_unchecked(c_str_bytes) }
201        })
202    }
203}
204
205/// Iterator over all serialized entries in the auxiliary vector.
206/// This is memory-safe, even if the pointers are for another address space, because
207/// no pointers are dereferenced.
208#[derive(Debug)]
209pub struct AuxVarSerializedIter<'a> {
210    ptr: *const AuxVarSerialized<'a>,
211    done: bool,
212    _marker: PhantomData<&'a ()>,
213}
214
215impl<'a> AuxVarSerializedIter<'a> {
216    fn new(ptr: *const AuxVarSerialized<'a>) -> Self {
217        Self {
218            ptr,
219            done: false,
220            _marker: PhantomData::default(),
221        }
222    }
223}
224
225impl<'a> Iterator for AuxVarSerializedIter<'a> {
226    type Item = AuxVarSerialized<'a>;
227
228    fn next(&mut self) -> Option<Self::Item> {
229        if self.done {
230            None
231        } else {
232            let aux_var_ser = unsafe { self.ptr.as_ref().unwrap() };
233            if aux_var_ser.key() == AuxVarType::Null {
234                if aux_var_ser.val() != 0 {
235                    panic!(
236                        "val of end key is not null but {}! Probably read wrong memory!",
237                        aux_var_ser.val()
238                    );
239                }
240                self.done = true;
241            }
242
243            self.ptr = unsafe { self.ptr.add(1) };
244
245            Some(*aux_var_ser)
246        }
247    }
248}
249
250/// Iterator over all serialized entries in the auxiliary vector.
251/// This is a high-level version of [`AuxVarSerializedIter`] but unsafe,
252/// if the pointers are not valid in the address space of the caller.
253#[derive(Debug)]
254pub struct AuxVarIter<'a> {
255    serialized_iter: AuxVarSerializedIter<'a>,
256}
257
258impl<'a> AuxVarIter<'a> {
259    const fn new(serialized_iter: AuxVarSerializedIter<'a>) -> Self {
260        Self { serialized_iter }
261    }
262}
263
264impl<'a> Iterator for AuxVarIter<'a> {
265    type Item = AuxVar<'a>;
266
267    fn next(&mut self) -> Option<Self::Item> {
268        unsafe {
269            self.serialized_iter
270                .next()
271                .map(|ref x| AuxVar::from_serialized(x))
272        }
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use crate::{AuxVar, InitialLinuxLibcStackLayoutBuilder};
280    use std::vec::Vec;
281
282    // This test is not optimal, because its some kind of "self fulfilling prophecy".
283    // I try to parse the format, that my builder creates..
284    #[test]
285    fn test_parser_with_dereference_data() {
286        let builder = InitialLinuxLibcStackLayoutBuilder::new()
287            .add_arg_v("first_arg\0")
288            .add_arg_v("second_arg")
289            .add_arg_v("third__arg")
290            .add_env_v("ENV1=FOO")
291            .add_env_v("ENV2=BAR")
292            .add_env_v("ENV3=FOOBAR\0")
293            .add_aux_v(AuxVar::Platform("x86_64"))
294            .add_aux_v(AuxVar::Uid(0xdeadbeef));
295        let mut buf = vec![0; builder.total_size()];
296
297        unsafe {
298            // user_addr == write_addr => easy debugging
299            let user_ptr = buf.as_ptr() as u64;
300            builder.serialize_into_buf(buf.as_mut_slice(), user_ptr);
301        }
302
303        let parsed = InitialLinuxLibcStackLayout::from(buf.as_slice());
304        dbg!(parsed.argc());
305        dbg!(parsed.argv_ptr_iter().collect::<Vec<_>>());
306        unsafe {
307            dbg!(parsed.argv_iter().collect::<Vec<_>>());
308        }
309        dbg!(parsed.envv_ptr_iter().collect::<Vec<_>>());
310        unsafe {
311            dbg!(parsed.envv_iter().collect::<Vec<_>>());
312        }
313        dbg!(parsed.aux_serialized_iter().collect::<Vec<_>>());
314    }
315
316    /// Test similar to the one above, but uses "0x1000" as user address. This
317    /// makes it easy to check if everything is at the right offset.
318    #[test]
319    fn test_parser_different_user_ptr() {
320        let builder = InitialLinuxLibcStackLayoutBuilder::new()
321            .add_arg_v("first_arg\0")
322            .add_arg_v("second_arg")
323            .add_arg_v("third__arg")
324            .add_env_v("ENV1=FOO")
325            .add_env_v("ENV2=BAR")
326            .add_env_v("ENV3=FOOBAR\0")
327            .add_aux_v(AuxVar::Platform("x86_64\0"))
328            .add_aux_v(AuxVar::Uid(0xdeadbeef));
329        let mut buf = Vec::with_capacity(builder.total_size());
330        #[allow(clippy::uninit_vec)]
331        unsafe {
332            buf.set_len(buf.capacity());
333            // fill only works after .set_len
334            buf.fill(0);
335        }
336
337        unsafe {
338            // this only works if the data is not dereferenced
339            builder.serialize_into_buf(buf.as_mut_slice(), 0x1000);
340        }
341
342        let parsed = InitialLinuxLibcStackLayout::from(buf.as_slice());
343
344        dbg!(parsed.argv_ptr_iter().collect::<Vec<_>>());
345        dbg!(parsed.envv_ptr_iter().collect::<Vec<_>>());
346
347        // TODO add more sensible test; check offsets etc
348
349        // debug already resolves memory addresses => in this test => memory errors
350        // dbg!(parsed.aux_iter().collect::<Vec<_>>());
351    }
352}