linux_libc_auxv/builder/
mod.rs

1/*
2MIT License
3
4Copyright (c) 2021 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24//! Module for [`InitialLinuxLibcStackLayoutBuilder`].
25mod serializer;
26
27use serializer::*;
28
29use crate::cstr_util::{cstr_contains_at_most_terminating_null_byte, cstr_len_with_nullbyte};
30use crate::{AuxVar, AuxVarSerialized, AuxVarType};
31use alloc::collections::BTreeSet;
32use alloc::vec::Vec;
33use core::mem::size_of;
34
35/// Builder to construct the stack layout that a libc implementation under Linux initially
36/// expects. See <https://lwn.net/Articles/631631/> for more info. It helps to write the
37/// arguments, the environment variables, and the auxiliary vector at a given address.
38/// It will translate addresses (pointers) to user addresses. Serialization is done
39/// with [`InitialLinuxLibcStackLayoutBuilder::serialize_into_buf`].
40#[derive(Debug, Default)]
41pub struct InitialLinuxLibcStackLayoutBuilder<'a> {
42    /// List of C-strings for program arguments/argument variables.
43    arg_v: Vec<&'a str>,
44    /// List of C-strings for environment variables.
45    env_v: Vec<&'a str>,
46    /// List of (key=value)-pairs for the auxiliary vector.
47    aux_v: BTreeSet<AuxVar<'a>>,
48}
49
50impl<'a> InitialLinuxLibcStackLayoutBuilder<'a> {
51    /// Creates a new [`InitialLinuxLibcStackLayoutBuilder`]. The AUX entries [`AuxVarType::Null`]
52    /// and [`AuxVarType::ExecFn`] will be always present.
53    pub fn new() -> Self {
54        let mut map = BTreeSet::new();
55        // this should always be present
56        map.insert(AuxVar::ExecFn("\0"));
57        // important; keep this in vector early => length calculation of total keys stays correct
58        map.insert(AuxVar::Null);
59        Self {
60            arg_v: vec![],
61            env_v: vec![],
62            aux_v: map,
63        }
64    }
65
66    /// Serializes the data structure into the provided buffer.
67    ///
68    /// # Parameters
69    /// * `write_buf`: Destination buffer that must be at least [`Self::total_size`] bytes long.
70    /// * `user_ptr`: Stack pointer in user address space. Important, so that all pointers are valid
71    ///               and can be dereferenced by libc (or the entity that parses the structure).
72    ///
73    /// # Safety
74    /// This function is safe, as long as `write_buf` points to valid memory.
75    pub unsafe fn serialize_into_buf(&self, write_buf: &mut [u8], user_ptr: u64) {
76        assert!(
77            write_buf.len() >= self.total_size(),
78            "the buffer is not big enough!"
79        );
80        let write_ptr = write_buf.as_mut_ptr();
81        let mut writer = AuxvSerializer::new(self, write_ptr, user_ptr);
82        writer.write_argc(self.arg_v.len() as u64);
83        for arg in &self.arg_v {
84            writer.write_arg(arg);
85        }
86        writer.write_finish_argv();
87        for env in &self.env_v {
88            writer.write_env(env);
89        }
90        writer.write_finish_envv();
91
92        // this will also write AT_NULL finally, because it is always at last position in `aux_v`.
93        for aux in &self.aux_v {
94            writer.write_aux_entry(aux)
95        }
96
97        writer.write_finish();
98    }
99
100    /// Adds an argument. An argument in the final Linux stack layout is a null-terminated C-string.
101    ///
102    /// # Parameters
103    /// * `c_str` Terminating null byte is not mandatory, but null-bytes in-between will result
104    ///           in a panic.
105    pub fn add_arg_v(mut self, c_str: &'a str) -> Self {
106        assert!(
107            cstr_contains_at_most_terminating_null_byte(c_str.as_bytes()),
108            "null bytes are only allowed at the end!"
109        );
110
111        self.arg_v.push(c_str);
112        self
113    }
114
115    /// Adds an environmental variable. An envv in the final Linux stack layout is a null-terminated
116    /// C-string with a format of `KEY=VALUE\0`.
117    ///
118    /// # Parameters
119    /// * `c_str` Terminating null byte is not mandatory, but null-bytes in-between will result
120    ///           in a panic.
121    pub fn add_env_v(mut self, c_str: &'a str) -> Self {
122        assert!(
123            cstr_contains_at_most_terminating_null_byte(c_str.as_bytes()),
124            "null bytes are only allowed at the end!"
125        );
126
127        self.env_v.push(c_str);
128        self
129    }
130
131    /// Adds an aux entry.
132    ///
133    /// # Parameters
134    /// * `var`: See [`AuxVar`]. Make sure that the payload is correct, i.e.
135    ///          C-strings are null terminated.
136    pub fn add_aux_v(mut self, var: AuxVar<'a>) -> Self {
137        // do some basic validation
138
139        // if no terminating null byte is present, it is okay for convenience.
140        // This can be added manually in the serializer
141        if let Some(cstr) = var.value_payload_cstr() {
142            assert!(
143                cstr_contains_at_most_terminating_null_byte(cstr.as_bytes()),
144                "null bytes are only allowed at the end!"
145            );
146        }
147
148        // insert alone is not enough - either insert or replace
149        if self.aux_v.contains(&var) {
150            self.aux_v.replace(var);
151        } else {
152            self.aux_v.insert(var);
153        }
154        self
155    }
156
157    /// Returns the number in bytes the data structure will have including the final
158    /// null byte.
159    pub fn total_size(&self) -> usize {
160        // final null is 64 byte long
161        self.offset_to_final_null() + size_of::<u64>()
162    }
163
164    /// Returns the total offset from the begin pointer to the aux data area.
165    const fn offset_to_argv_key_area(&self) -> usize {
166        // there is only argc before this
167        size_of::<u64>()
168    }
169
170    /// Returns the total offset from the begin pointer to the aux data area.
171    fn offset_to_envv_key_area(&self) -> usize {
172        self.offset_to_argv_key_area() + self.argv_keys_size()
173    }
174
175    /// Returns the total offset from the begin pointer to the aux data area.
176    fn offset_to_aux_key_area(&self) -> usize {
177        self.offset_to_envv_key_area() + self.envv_keys_size()
178    }
179
180    /// Returns the total offset from the begin pointer to the aux data area.
181    fn offset_to_aux_data_area(&self) -> usize {
182        let mut sum = self.offset_to_aux_key_area() + self.aux_keys_size();
183
184        // TODO seems like Linux does some more magic for stack alignment
185        //  https://elixir.bootlin.com/linux/v5.15.5/source/fs/binfmt_elf.c#L200
186        //  Maybe solve this in the future?! IMHO this looks negligible.
187        //  Some L1 Cache optimizations on x86_64
188
189        // align up to next 16 byte boundary
190        if sum % 16 != 0 {
191            sum += 16 - sum % 16;
192        }
193        sum
194    }
195
196    /// Returns the total offset from the begin pointer to the args data area.
197    fn offset_to_argv_data_area(&self) -> usize {
198        let mut sum = self.offset_to_aux_data_area() + self.aux_data_area_size();
199        // align up to next 16 byte boundary
200        if sum % 16 != 0 {
201            sum += 16 - sum % 16;
202        }
203        sum
204    }
205
206    /// Returns the total offset from the begin pointer to the env data area.
207    fn offset_to_env_data_area(&self) -> usize {
208        self.offset_to_argv_data_area() + self.argv_data_area_size()
209    }
210
211    /// Returns the total offset from the begin pointer to the location of the file name.
212    fn offset_to_filename_data_area(&self) -> usize {
213        self.offset_to_env_data_area() + self.envv_data_area_size()
214    }
215
216    /// Returns the total offset from the begin pointer to the final null (u64).
217    fn offset_to_final_null(&self) -> usize {
218        // bytes for the filename C-string including the final null byte
219        let filename_bytes = self
220            .filename()
221            .map(|aux| cstr_len_with_nullbyte(aux.value_payload_cstr().unwrap().as_bytes()))
222            .unwrap_or(0);
223        self.offset_to_filename_data_area() + filename_bytes
224    }
225
226    /// Returns the number in bytes that all argv entries will occupy.
227    /// Only the entries, but not the referenced data.
228    fn argv_keys_size(&self) -> usize {
229        // +1: null terminated
230        size_of::<u64>() * (self.arg_v.len() + 1)
231    }
232
233    /// Returns the number in bytes that all env entries will occupy.
234    /// Only the entries, but not the referenced data.
235    fn envv_keys_size(&self) -> usize {
236        // +1: null terminated
237        size_of::<u64>() * (self.env_v.len() + 1)
238    }
239
240    /// Returns the number in bytes that all AT entries will occupy.
241    /// Only the entries, but not the referenced data.
242    fn aux_keys_size(&self) -> usize {
243        size_of::<AuxVarSerialized>() * self.aux_v.len()
244    }
245
246    /// Returns the sum of bytes, required to store the C-string of each arg, including
247    /// terminating null bytes.
248    fn argv_data_area_size(&self) -> usize {
249        self.arg_v
250            .iter()
251            .map(|x| cstr_len_with_nullbyte(x.as_bytes()))
252            .sum()
253    }
254
255    /// Returns the sum of bytes, required to store the C-string of each env var, including
256    /// terminating null bytes.
257    fn envv_data_area_size(&self) -> usize {
258        self.env_v
259            .iter()
260            .map(|x| cstr_len_with_nullbyte(x.as_bytes()))
261            .sum()
262    }
263
264    /// Returns the number of all additional aux vec data in the aux data area, except for
265    /// the executable name of [`AuxVarType::AtExecFn`], because it gets special treatment.
266    ///
267    /// Takes into account, that C-strings must be null-terminated.
268    fn aux_data_area_size(&self) -> usize {
269        self.aux_v
270            .iter()
271            .filter(|x| x.key().value_in_data_area())
272            // AtExecFn: file name stands at end of the structure, before the final null byte
273            //           and not in the auxv data area
274            .filter(|x| x.key() != AuxVarType::ExecFn)
275            // for convenience reasons, users can enter string slices without terminating
276            // null byte - take care here manually!
277            .map(|aux| aux.data_area_serialize_byte_count())
278            .sum()
279    }
280
281    /// Returns the filename/executable aux var, if it is present. It needs some special treatment,
282    /// according to <https://lwn.net/Articles/631631/>.
283    ///
284    // Actually, I'm not sure if libc implementations care about the pointer location, as long as
285    // the pointer is correct..
286    fn filename(&self) -> Option<&AuxVar> {
287        self.aux_v.iter().find(|x| x.key() == AuxVarType::ExecFn)
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use crate::AuxVarType;
295
296    #[test]
297    fn test_builder_write_size() {
298        let builder = InitialLinuxLibcStackLayoutBuilder::new();
299
300        let mut expected_size = 8;
301        // 3 * 8: argc, argv[0]=0, envv[0]=0 + padding to 16 byte + null byte
302        assert_eq!(builder.offset_to_argv_key_area(), expected_size);
303        expected_size = 16;
304        assert_eq!(builder.offset_to_envv_key_area(), expected_size);
305        expected_size = 24;
306        assert_eq!(builder.offset_to_aux_key_area(), expected_size);
307
308        // there are two aux keys at minimum (null and file name - (key,value)-pairs)
309        expected_size = 24 + 2 * size_of::<AuxVarSerialized>();
310        if expected_size % 16 != 0 {
311            expected_size += 16 - expected_size % 16;
312        }
313        assert_eq!(builder.offset_to_aux_data_area(), expected_size);
314        // no additional aux data (file name (which is part of aux data) lives in dedicated data area
315        assert_eq!(builder.offset_to_argv_data_area(), expected_size);
316        // no args in this test
317        assert_eq!(builder.offset_to_env_data_area(), expected_size);
318        // no env vars in this test
319        assert_eq!(builder.offset_to_filename_data_area(), expected_size);
320
321        expected_size += 1;
322        // file name is only one byte long
323        assert_eq!(builder.offset_to_final_null(), expected_size);
324
325        expected_size += 8;
326        // final null value (u64)
327        assert_eq!(builder.total_size(), expected_size);
328    }
329
330    #[test]
331    fn test_builder_write_size_2() {
332        let builder = InitialLinuxLibcStackLayoutBuilder::new()
333            .add_arg_v("Foo")
334            .add_env_v("BAR=FOO")
335            .add_aux_v(AuxVar::Platform("x86_64"))
336            .add_aux_v(AuxVar::ExecFn("./executable"));
337
338        assert_eq!(builder.offset_to_argv_key_area(), 8);
339        // + 8 + 8 (one entry + null byte)
340        assert_eq!(builder.offset_to_envv_key_area(), 24);
341        // + 8 + 8 (one entry + null byte)
342        assert_eq!(builder.offset_to_aux_key_area(), 40);
343        // + three keys + align to 16 byte boundary
344        let mut expected_size = 40 + 3 * size_of::<AuxVarSerialized>();
345        if expected_size % 16 != 0 {
346            expected_size += 16 - expected_size % 16;
347        }
348        assert_eq!(builder.offset_to_aux_data_area(), expected_size);
349
350        expected_size += 7;
351        if expected_size % 16 != 0 {
352            expected_size += 16 - expected_size % 16;
353        }
354        // + 7 (length of "x86_64\0") + align to 16 byte boundary
355        assert_eq!(builder.offset_to_argv_data_area(), expected_size);
356
357        expected_size += 4;
358        // + 4 (length of "Foo\0")
359        assert_eq!(builder.offset_to_env_data_area(), expected_size);
360
361        expected_size += 8;
362        // + 8 (length of "BAR=FOO\0")
363        assert_eq!(builder.offset_to_filename_data_area(), expected_size);
364
365        expected_size += 13;
366        // + 13 (length of "./executable\0")
367        assert_eq!(builder.offset_to_final_null(), expected_size);
368    }
369
370    /// Make sure that the AtNull entry is always the last. It must always be present and written
371    /// as last entry.
372    #[test]
373    fn test_builder_aux_final_at_null() {
374        assert_eq!(
375            InitialLinuxLibcStackLayoutBuilder::new()
376                .aux_v
377                .iter()
378                .last()
379                .unwrap()
380                .key(),
381            AuxVarType::Null
382        );
383        assert_eq!(
384            InitialLinuxLibcStackLayoutBuilder::new()
385                .add_aux_v(AuxVar::Clktck(0x1337))
386                .add_aux_v(AuxVar::Null)
387                .add_aux_v(AuxVar::Platform("x86_64"))
388                .aux_v
389                .iter()
390                .last()
391                .unwrap()
392                .key(),
393            AuxVarType::Null
394        );
395    }
396
397    #[test]
398    fn test_builder_serializes_data() {
399        let builder = InitialLinuxLibcStackLayoutBuilder::new()
400            .add_arg_v("Foo")
401            .add_env_v("BAR=FOO\0")
402            .add_aux_v(AuxVar::Platform("x86_64"))
403            .add_aux_v(AuxVar::ExecFn("./executable"))
404            .add_aux_v(AuxVar::Uid(0xdeadbeef))
405            .add_aux_v(AuxVar::Clktck(123456));
406        let mut buf = vec![0; builder.total_size()];
407
408        unsafe {
409            // user_addr == write_addr => easy debugging; segfaults otherwise when resolving pointers
410            let user_ptr = buf.as_ptr();
411            builder.serialize_into_buf(&mut buf, user_ptr as u64);
412        }
413
414        dbg!(&buf);
415
416        /* to check the data structure in an existing C tool
417        println!("unsigned char foo[] = {{");
418        for byte in &buf {
419            println!("     0x{:x},", byte);
420        }
421        println!("}};");*/
422    }
423
424    #[test]
425    fn test_default_filename_gets_replaced() {
426        let expected = "foo";
427        let b = InitialLinuxLibcStackLayoutBuilder::new().add_aux_v(AuxVar::ExecFn(expected));
428        let actual = b.filename().unwrap().value_payload_cstr().unwrap();
429        assert_eq!(actual, expected);
430    }
431}