linux_libc_auxv/parser.rs
1/*
2MIT License
3
4Copyright (c) 2021 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24use crate::cstr_util::c_str_len_ptr;
25use crate::{AuxVar, AuxVarSerialized, AuxVarType};
26use core::fmt::Debug;
27use core::marker::PhantomData;
28
29/// Wrapper around a slice of data, that represents the data structure that Linux passes to the
30/// libc on program startup. Usually this is a struct from `rsp` (stack pointer) to `x`. It is no
31/// problem, if you pass for example a slice with 10000 bytes to it, because it will automatically
32/// stop, when the end of the data structure is found. Hence, if the data structure is valid,
33/// invalid memory (above the stack) will never be accessed.
34///
35/// It contains `argc`, `argv`, `envv`, and the auxiliary vector along with additional referenced
36/// payload. The data structure is right above the stack. The initial stack pointer points
37/// to `argc`. See <https://lwn.net/Articles/631631/> for more info.
38///
39/// Instances are created via `InitialLinuxLibcStackLayout::from::<[u8>]`.
40#[derive(Debug)]
41pub struct InitialLinuxLibcStackLayout<'a> {
42 bytes: &'a [u8],
43}
44
45impl<'a> From<&'a [u8]> for InitialLinuxLibcStackLayout<'a> {
46 /// Creates a new [`InitialLinuxLibcStackLayout`].
47 fn from(bytes: &'a [u8]) -> Self {
48 Self { bytes }
49 }
50}
51
52impl<'a> InitialLinuxLibcStackLayout<'a> {
53 /// Returns the number of arguments.
54 #[allow(clippy::missing_const_for_fn)]
55 pub fn argc(&self) -> usize {
56 unsafe { *self.bytes.as_ptr().cast() }
57 }
58
59 /// Returns the number of environment variables.
60 pub fn envc(&self) -> usize {
61 self.envv_ptr_iter().count()
62 }
63
64 /// Returns the pointer to the begin of argv array.
65 fn get_argv_ptr(&self) -> *const *const u8 {
66 // + 1: skip argc
67 let ptr = unsafe { self.bytes.as_ptr().cast::<u64>().add(1) };
68 // C-str array: array of pointers => pointer to pointer to bytes of c-str
69 ptr as *const *const u8
70 }
71
72 /// Iterates over the C-string arguments. See [`CstrIter`].
73 /// This is unsafe, because it will result in segfaults/page faults or invalid memory
74 /// being read, if the pointers are not valid in the address space of the caller.
75 ///
76 /// # Safety
77 /// This function produces UB (page fault, seg fault, read invalid memory), if the referenced
78 /// pointers are not valid inside the address space of the caller.
79 pub unsafe fn argv_iter(&self) -> CstrIter {
80 CstrIter::new(self.get_argv_ptr())
81 }
82
83 /// Iterates only over the pointers of the C-string arguments. See [`NullTerminatedArrIter`].
84 /// This is always memory-safe even if the pointers are created for another address space,
85 /// because no pointers are dereference by this iterator.
86 pub fn argv_ptr_iter(&self) -> NullTerminatedArrIter {
87 NullTerminatedArrIter {
88 ptr: self.get_argv_ptr(),
89 }
90 }
91
92 /// Returns the pointer to the beginning of the envp array.
93 fn get_envv_ptr(&self) -> *const *const u8 {
94 unsafe {
95 self.get_argv_ptr()
96 .add(self.argc())
97 // final null ptr after the envv (+ 8 bytes)
98 .add(1)
99 }
100 }
101
102 /// Iterates over all environment variables. See [`CstrIter`].
103 /// This is unsafe, because it will result in segfaults/page faults or invalid memory
104 /// being read, if the pointers are not valid in the address space of the caller.
105 ///
106 /// # Safety
107 /// This function produces UB (page fault, seg fault, read invalid memory), if the referenced
108 /// pointers are not valid inside the address space of the caller.
109 pub unsafe fn envv_iter(&self) -> CstrIter {
110 CstrIter::new(self.get_envv_ptr())
111 }
112
113 /// Iterates only over the pointers to the environment variables. See [`NullTerminatedArrIter`].
114 /// This is always memory-safe even if the pointers are created for another address space,
115 /// because no pointers are dereference by this iterator.
116 pub fn envv_ptr_iter(&self) -> NullTerminatedArrIter {
117 NullTerminatedArrIter {
118 ptr: self.get_envv_ptr(),
119 }
120 }
121
122 /// Iterates over all entries in the auxiliary vector. See [`AuxVarIter`].
123 /// This is unsafe, because it will result in segfaults/page faults or invalid memory
124 /// being read, if the pointers are not valid in the address space of the caller.
125 ///
126 /// # Safety
127 /// This function produces UB (page fault, seg fault, read invalid memory), if the referenced
128 /// pointers are not valid inside the address space of the caller.
129 pub unsafe fn aux_var_iter(&self) -> AuxVarIter {
130 AuxVarIter::new(self.aux_serialized_iter())
131 }
132
133 /// Iterates over all entries in the auxiliary vector. See [`AuxVarSerializedIter`].
134 /// This is always memory-safe even if the pointers are created for another address space,
135 /// because no pointers are dereference by this iterator.
136 pub fn aux_serialized_iter(&self) -> AuxVarSerializedIter {
137 AuxVarSerializedIter::new(self.get_auxv_ptr())
138 }
139
140 /// Returns the pointer to the beginning of aux variables.
141 fn get_auxv_ptr(&self) -> *const AuxVarSerialized {
142 unsafe {
143 self.get_envv_ptr()
144 // skip all ENV values
145 .add(self.envv_ptr_iter().count())
146 // final null ptr after the envv (+ 8 bytes)
147 .add(1)
148 .cast()
149 }
150 }
151}
152
153/// Iterator that iterates over an array of pointers, that is terminated by a null pointer.
154/// Useful to find all entries of a typical C-string array.
155/// It only returns the pointer itself but doesn't dereferences the data.
156#[derive(Debug)]
157pub struct NullTerminatedArrIter {
158 ptr: *const *const u8,
159}
160
161impl Iterator for NullTerminatedArrIter {
162 type Item = *const u8;
163
164 fn next(&mut self) -> Option<Self::Item> {
165 if unsafe { (*self.ptr).is_null() } {
166 None
167 } else {
168 let c_str_ptr = unsafe { *self.ptr };
169 // + 8 bytes: to next array entry
170 self.ptr = unsafe { self.ptr.add(1) };
171 Some(c_str_ptr)
172 }
173 }
174}
175
176/// Iterator that iterates over an array of null terminated C-strings.
177#[derive(Debug)]
178pub struct CstrIter<'a> {
179 arr_iter: NullTerminatedArrIter,
180 _marker: PhantomData<&'a ()>,
181}
182
183impl<'a> CstrIter<'a> {
184 unsafe fn new(ptr: *const *const u8) -> Self {
185 Self {
186 arr_iter: NullTerminatedArrIter { ptr },
187 _marker: PhantomData::default(),
188 }
189 }
190}
191
192impl<'a> Iterator for CstrIter<'a> {
193 type Item = &'a str;
194
195 fn next(&mut self) -> Option<Self::Item> {
196 self.arr_iter.next().map(|c_str_ptr| {
197 // + null byte
198 let c_str_bytes =
199 unsafe { core::slice::from_raw_parts(c_str_ptr, c_str_len_ptr(c_str_ptr) + 1) };
200 unsafe { core::str::from_utf8_unchecked(c_str_bytes) }
201 })
202 }
203}
204
205/// Iterator over all serialized entries in the auxiliary vector.
206/// This is memory-safe, even if the pointers are for another address space, because
207/// no pointers are dereferenced.
208#[derive(Debug)]
209pub struct AuxVarSerializedIter<'a> {
210 ptr: *const AuxVarSerialized<'a>,
211 done: bool,
212 _marker: PhantomData<&'a ()>,
213}
214
215impl<'a> AuxVarSerializedIter<'a> {
216 fn new(ptr: *const AuxVarSerialized<'a>) -> Self {
217 Self {
218 ptr,
219 done: false,
220 _marker: PhantomData::default(),
221 }
222 }
223}
224
225impl<'a> Iterator for AuxVarSerializedIter<'a> {
226 type Item = AuxVarSerialized<'a>;
227
228 fn next(&mut self) -> Option<Self::Item> {
229 if self.done {
230 None
231 } else {
232 let aux_var_ser = unsafe { self.ptr.as_ref().unwrap() };
233 if aux_var_ser.key() == AuxVarType::Null {
234 if aux_var_ser.val() != 0 {
235 panic!(
236 "val of end key is not null but {}! Probably read wrong memory!",
237 aux_var_ser.val()
238 );
239 }
240 self.done = true;
241 }
242
243 self.ptr = unsafe { self.ptr.add(1) };
244
245 Some(*aux_var_ser)
246 }
247 }
248}
249
250/// Iterator over all serialized entries in the auxiliary vector.
251/// This is a high-level version of [`AuxVarSerializedIter`] but unsafe,
252/// if the pointers are not valid in the address space of the caller.
253#[derive(Debug)]
254pub struct AuxVarIter<'a> {
255 serialized_iter: AuxVarSerializedIter<'a>,
256}
257
258impl<'a> AuxVarIter<'a> {
259 const fn new(serialized_iter: AuxVarSerializedIter<'a>) -> Self {
260 Self { serialized_iter }
261 }
262}
263
264impl<'a> Iterator for AuxVarIter<'a> {
265 type Item = AuxVar<'a>;
266
267 fn next(&mut self) -> Option<Self::Item> {
268 unsafe {
269 self.serialized_iter
270 .next()
271 .map(|ref x| AuxVar::from_serialized(x))
272 }
273 }
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279 use crate::{AuxVar, InitialLinuxLibcStackLayoutBuilder};
280 use std::vec::Vec;
281
282 // This test is not optimal, because its some kind of "self fulfilling prophecy".
283 // I try to parse the format, that my builder creates..
284 #[test]
285 fn test_parser_with_dereference_data() {
286 let builder = InitialLinuxLibcStackLayoutBuilder::new()
287 .add_arg_v("first_arg\0")
288 .add_arg_v("second_arg")
289 .add_arg_v("third__arg")
290 .add_env_v("ENV1=FOO")
291 .add_env_v("ENV2=BAR")
292 .add_env_v("ENV3=FOOBAR\0")
293 .add_aux_v(AuxVar::Platform("x86_64"))
294 .add_aux_v(AuxVar::Uid(0xdeadbeef));
295 let mut buf = vec![0; builder.total_size()];
296
297 unsafe {
298 // user_addr == write_addr => easy debugging
299 let user_ptr = buf.as_ptr() as u64;
300 builder.serialize_into_buf(buf.as_mut_slice(), user_ptr);
301 }
302
303 let parsed = InitialLinuxLibcStackLayout::from(buf.as_slice());
304 dbg!(parsed.argc());
305 dbg!(parsed.argv_ptr_iter().collect::<Vec<_>>());
306 unsafe {
307 dbg!(parsed.argv_iter().collect::<Vec<_>>());
308 }
309 dbg!(parsed.envv_ptr_iter().collect::<Vec<_>>());
310 unsafe {
311 dbg!(parsed.envv_iter().collect::<Vec<_>>());
312 }
313 dbg!(parsed.aux_serialized_iter().collect::<Vec<_>>());
314 }
315
316 /// Test similar to the one above, but uses "0x1000" as user address. This
317 /// makes it easy to check if everything is at the right offset.
318 #[test]
319 fn test_parser_different_user_ptr() {
320 let builder = InitialLinuxLibcStackLayoutBuilder::new()
321 .add_arg_v("first_arg\0")
322 .add_arg_v("second_arg")
323 .add_arg_v("third__arg")
324 .add_env_v("ENV1=FOO")
325 .add_env_v("ENV2=BAR")
326 .add_env_v("ENV3=FOOBAR\0")
327 .add_aux_v(AuxVar::Platform("x86_64\0"))
328 .add_aux_v(AuxVar::Uid(0xdeadbeef));
329 let mut buf = Vec::with_capacity(builder.total_size());
330 #[allow(clippy::uninit_vec)]
331 unsafe {
332 buf.set_len(buf.capacity());
333 // fill only works after .set_len
334 buf.fill(0);
335 }
336
337 unsafe {
338 // this only works if the data is not dereferenced
339 builder.serialize_into_buf(buf.as_mut_slice(), 0x1000);
340 }
341
342 let parsed = InitialLinuxLibcStackLayout::from(buf.as_slice());
343
344 dbg!(parsed.argv_ptr_iter().collect::<Vec<_>>());
345 dbg!(parsed.envv_ptr_iter().collect::<Vec<_>>());
346
347 // TODO add more sensible test; check offsets etc
348
349 // debug already resolves memory addresses => in this test => memory errors
350 // dbg!(parsed.aux_iter().collect::<Vec<_>>());
351 }
352}