linux_libc_auxv/builder/mod.rs
1/*
2MIT License
3
4Copyright (c) 2021 Philipp Schuster
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24//! Module for [`InitialLinuxLibcStackLayoutBuilder`].
25mod serializer;
26
27use serializer::*;
28
29use crate::cstr_util::{cstr_contains_at_most_terminating_null_byte, cstr_len_with_nullbyte};
30use crate::{AuxVar, AuxVarSerialized, AuxVarType};
31use alloc::collections::BTreeSet;
32use alloc::vec::Vec;
33use core::mem::size_of;
34
35/// Builder to construct the stack layout that a libc implementation under Linux initially
36/// expects. See <https://lwn.net/Articles/631631/> for more info. It helps to write the
37/// arguments, the environment variables, and the auxiliary vector at a given address.
38/// It will translate addresses (pointers) to user addresses. Serialization is done
39/// with [`InitialLinuxLibcStackLayoutBuilder::serialize_into_buf`].
40#[derive(Debug, Default)]
41pub struct InitialLinuxLibcStackLayoutBuilder<'a> {
42 /// List of C-strings for program arguments/argument variables.
43 arg_v: Vec<&'a str>,
44 /// List of C-strings for environment variables.
45 env_v: Vec<&'a str>,
46 /// List of (key=value)-pairs for the auxiliary vector.
47 aux_v: BTreeSet<AuxVar<'a>>,
48}
49
50impl<'a> InitialLinuxLibcStackLayoutBuilder<'a> {
51 /// Creates a new [`InitialLinuxLibcStackLayoutBuilder`]. The AUX entries [`AuxVarType::Null`]
52 /// and [`AuxVarType::ExecFn`] will be always present.
53 pub fn new() -> Self {
54 let mut map = BTreeSet::new();
55 // this should always be present
56 map.insert(AuxVar::ExecFn("\0"));
57 // important; keep this in vector early => length calculation of total keys stays correct
58 map.insert(AuxVar::Null);
59 Self {
60 arg_v: vec![],
61 env_v: vec![],
62 aux_v: map,
63 }
64 }
65
66 /// Serializes the data structure into the provided buffer.
67 ///
68 /// # Parameters
69 /// * `write_buf`: Destination buffer that must be at least [`Self::total_size`] bytes long.
70 /// * `user_ptr`: Stack pointer in user address space. Important, so that all pointers are valid
71 /// and can be dereferenced by libc (or the entity that parses the structure).
72 ///
73 /// # Safety
74 /// This function is safe, as long as `write_buf` points to valid memory.
75 pub unsafe fn serialize_into_buf(&self, write_buf: &mut [u8], user_ptr: u64) {
76 assert!(
77 write_buf.len() >= self.total_size(),
78 "the buffer is not big enough!"
79 );
80 let write_ptr = write_buf.as_mut_ptr();
81 let mut writer = AuxvSerializer::new(self, write_ptr, user_ptr);
82 writer.write_argc(self.arg_v.len() as u64);
83 for arg in &self.arg_v {
84 writer.write_arg(arg);
85 }
86 writer.write_finish_argv();
87 for env in &self.env_v {
88 writer.write_env(env);
89 }
90 writer.write_finish_envv();
91
92 // this will also write AT_NULL finally, because it is always at last position in `aux_v`.
93 for aux in &self.aux_v {
94 writer.write_aux_entry(aux)
95 }
96
97 writer.write_finish();
98 }
99
100 /// Adds an argument. An argument in the final Linux stack layout is a null-terminated C-string.
101 ///
102 /// # Parameters
103 /// * `c_str` Terminating null byte is not mandatory, but null-bytes in-between will result
104 /// in a panic.
105 pub fn add_arg_v(mut self, c_str: &'a str) -> Self {
106 assert!(
107 cstr_contains_at_most_terminating_null_byte(c_str.as_bytes()),
108 "null bytes are only allowed at the end!"
109 );
110
111 self.arg_v.push(c_str);
112 self
113 }
114
115 /// Adds an environmental variable. An envv in the final Linux stack layout is a null-terminated
116 /// C-string with a format of `KEY=VALUE\0`.
117 ///
118 /// # Parameters
119 /// * `c_str` Terminating null byte is not mandatory, but null-bytes in-between will result
120 /// in a panic.
121 pub fn add_env_v(mut self, c_str: &'a str) -> Self {
122 assert!(
123 cstr_contains_at_most_terminating_null_byte(c_str.as_bytes()),
124 "null bytes are only allowed at the end!"
125 );
126
127 self.env_v.push(c_str);
128 self
129 }
130
131 /// Adds an aux entry.
132 ///
133 /// # Parameters
134 /// * `var`: See [`AuxVar`]. Make sure that the payload is correct, i.e.
135 /// C-strings are null terminated.
136 pub fn add_aux_v(mut self, var: AuxVar<'a>) -> Self {
137 // do some basic validation
138
139 // if no terminating null byte is present, it is okay for convenience.
140 // This can be added manually in the serializer
141 if let Some(cstr) = var.value_payload_cstr() {
142 assert!(
143 cstr_contains_at_most_terminating_null_byte(cstr.as_bytes()),
144 "null bytes are only allowed at the end!"
145 );
146 }
147
148 // insert alone is not enough - either insert or replace
149 if self.aux_v.contains(&var) {
150 self.aux_v.replace(var);
151 } else {
152 self.aux_v.insert(var);
153 }
154 self
155 }
156
157 /// Returns the number in bytes the data structure will have including the final
158 /// null byte.
159 pub fn total_size(&self) -> usize {
160 // final null is 64 byte long
161 self.offset_to_final_null() + size_of::<u64>()
162 }
163
164 /// Returns the total offset from the begin pointer to the aux data area.
165 const fn offset_to_argv_key_area(&self) -> usize {
166 // there is only argc before this
167 size_of::<u64>()
168 }
169
170 /// Returns the total offset from the begin pointer to the aux data area.
171 fn offset_to_envv_key_area(&self) -> usize {
172 self.offset_to_argv_key_area() + self.argv_keys_size()
173 }
174
175 /// Returns the total offset from the begin pointer to the aux data area.
176 fn offset_to_aux_key_area(&self) -> usize {
177 self.offset_to_envv_key_area() + self.envv_keys_size()
178 }
179
180 /// Returns the total offset from the begin pointer to the aux data area.
181 fn offset_to_aux_data_area(&self) -> usize {
182 let mut sum = self.offset_to_aux_key_area() + self.aux_keys_size();
183
184 // TODO seems like Linux does some more magic for stack alignment
185 // https://elixir.bootlin.com/linux/v5.15.5/source/fs/binfmt_elf.c#L200
186 // Maybe solve this in the future?! IMHO this looks negligible.
187 // Some L1 Cache optimizations on x86_64
188
189 // align up to next 16 byte boundary
190 if sum % 16 != 0 {
191 sum += 16 - sum % 16;
192 }
193 sum
194 }
195
196 /// Returns the total offset from the begin pointer to the args data area.
197 fn offset_to_argv_data_area(&self) -> usize {
198 let mut sum = self.offset_to_aux_data_area() + self.aux_data_area_size();
199 // align up to next 16 byte boundary
200 if sum % 16 != 0 {
201 sum += 16 - sum % 16;
202 }
203 sum
204 }
205
206 /// Returns the total offset from the begin pointer to the env data area.
207 fn offset_to_env_data_area(&self) -> usize {
208 self.offset_to_argv_data_area() + self.argv_data_area_size()
209 }
210
211 /// Returns the total offset from the begin pointer to the location of the file name.
212 fn offset_to_filename_data_area(&self) -> usize {
213 self.offset_to_env_data_area() + self.envv_data_area_size()
214 }
215
216 /// Returns the total offset from the begin pointer to the final null (u64).
217 fn offset_to_final_null(&self) -> usize {
218 // bytes for the filename C-string including the final null byte
219 let filename_bytes = self
220 .filename()
221 .map(|aux| cstr_len_with_nullbyte(aux.value_payload_cstr().unwrap().as_bytes()))
222 .unwrap_or(0);
223 self.offset_to_filename_data_area() + filename_bytes
224 }
225
226 /// Returns the number in bytes that all argv entries will occupy.
227 /// Only the entries, but not the referenced data.
228 fn argv_keys_size(&self) -> usize {
229 // +1: null terminated
230 size_of::<u64>() * (self.arg_v.len() + 1)
231 }
232
233 /// Returns the number in bytes that all env entries will occupy.
234 /// Only the entries, but not the referenced data.
235 fn envv_keys_size(&self) -> usize {
236 // +1: null terminated
237 size_of::<u64>() * (self.env_v.len() + 1)
238 }
239
240 /// Returns the number in bytes that all AT entries will occupy.
241 /// Only the entries, but not the referenced data.
242 fn aux_keys_size(&self) -> usize {
243 size_of::<AuxVarSerialized>() * self.aux_v.len()
244 }
245
246 /// Returns the sum of bytes, required to store the C-string of each arg, including
247 /// terminating null bytes.
248 fn argv_data_area_size(&self) -> usize {
249 self.arg_v
250 .iter()
251 .map(|x| cstr_len_with_nullbyte(x.as_bytes()))
252 .sum()
253 }
254
255 /// Returns the sum of bytes, required to store the C-string of each env var, including
256 /// terminating null bytes.
257 fn envv_data_area_size(&self) -> usize {
258 self.env_v
259 .iter()
260 .map(|x| cstr_len_with_nullbyte(x.as_bytes()))
261 .sum()
262 }
263
264 /// Returns the number of all additional aux vec data in the aux data area, except for
265 /// the executable name of [`AuxVarType::AtExecFn`], because it gets special treatment.
266 ///
267 /// Takes into account, that C-strings must be null-terminated.
268 fn aux_data_area_size(&self) -> usize {
269 self.aux_v
270 .iter()
271 .filter(|x| x.key().value_in_data_area())
272 // AtExecFn: file name stands at end of the structure, before the final null byte
273 // and not in the auxv data area
274 .filter(|x| x.key() != AuxVarType::ExecFn)
275 // for convenience reasons, users can enter string slices without terminating
276 // null byte - take care here manually!
277 .map(|aux| aux.data_area_serialize_byte_count())
278 .sum()
279 }
280
281 /// Returns the filename/executable aux var, if it is present. It needs some special treatment,
282 /// according to <https://lwn.net/Articles/631631/>.
283 ///
284 // Actually, I'm not sure if libc implementations care about the pointer location, as long as
285 // the pointer is correct..
286 fn filename(&self) -> Option<&AuxVar> {
287 self.aux_v.iter().find(|x| x.key() == AuxVarType::ExecFn)
288 }
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294 use crate::AuxVarType;
295
296 #[test]
297 fn test_builder_write_size() {
298 let builder = InitialLinuxLibcStackLayoutBuilder::new();
299
300 let mut expected_size = 8;
301 // 3 * 8: argc, argv[0]=0, envv[0]=0 + padding to 16 byte + null byte
302 assert_eq!(builder.offset_to_argv_key_area(), expected_size);
303 expected_size = 16;
304 assert_eq!(builder.offset_to_envv_key_area(), expected_size);
305 expected_size = 24;
306 assert_eq!(builder.offset_to_aux_key_area(), expected_size);
307
308 // there are two aux keys at minimum (null and file name - (key,value)-pairs)
309 expected_size = 24 + 2 * size_of::<AuxVarSerialized>();
310 if expected_size % 16 != 0 {
311 expected_size += 16 - expected_size % 16;
312 }
313 assert_eq!(builder.offset_to_aux_data_area(), expected_size);
314 // no additional aux data (file name (which is part of aux data) lives in dedicated data area
315 assert_eq!(builder.offset_to_argv_data_area(), expected_size);
316 // no args in this test
317 assert_eq!(builder.offset_to_env_data_area(), expected_size);
318 // no env vars in this test
319 assert_eq!(builder.offset_to_filename_data_area(), expected_size);
320
321 expected_size += 1;
322 // file name is only one byte long
323 assert_eq!(builder.offset_to_final_null(), expected_size);
324
325 expected_size += 8;
326 // final null value (u64)
327 assert_eq!(builder.total_size(), expected_size);
328 }
329
330 #[test]
331 fn test_builder_write_size_2() {
332 let builder = InitialLinuxLibcStackLayoutBuilder::new()
333 .add_arg_v("Foo")
334 .add_env_v("BAR=FOO")
335 .add_aux_v(AuxVar::Platform("x86_64"))
336 .add_aux_v(AuxVar::ExecFn("./executable"));
337
338 assert_eq!(builder.offset_to_argv_key_area(), 8);
339 // + 8 + 8 (one entry + null byte)
340 assert_eq!(builder.offset_to_envv_key_area(), 24);
341 // + 8 + 8 (one entry + null byte)
342 assert_eq!(builder.offset_to_aux_key_area(), 40);
343 // + three keys + align to 16 byte boundary
344 let mut expected_size = 40 + 3 * size_of::<AuxVarSerialized>();
345 if expected_size % 16 != 0 {
346 expected_size += 16 - expected_size % 16;
347 }
348 assert_eq!(builder.offset_to_aux_data_area(), expected_size);
349
350 expected_size += 7;
351 if expected_size % 16 != 0 {
352 expected_size += 16 - expected_size % 16;
353 }
354 // + 7 (length of "x86_64\0") + align to 16 byte boundary
355 assert_eq!(builder.offset_to_argv_data_area(), expected_size);
356
357 expected_size += 4;
358 // + 4 (length of "Foo\0")
359 assert_eq!(builder.offset_to_env_data_area(), expected_size);
360
361 expected_size += 8;
362 // + 8 (length of "BAR=FOO\0")
363 assert_eq!(builder.offset_to_filename_data_area(), expected_size);
364
365 expected_size += 13;
366 // + 13 (length of "./executable\0")
367 assert_eq!(builder.offset_to_final_null(), expected_size);
368 }
369
370 /// Make sure that the AtNull entry is always the last. It must always be present and written
371 /// as last entry.
372 #[test]
373 fn test_builder_aux_final_at_null() {
374 assert_eq!(
375 InitialLinuxLibcStackLayoutBuilder::new()
376 .aux_v
377 .iter()
378 .last()
379 .unwrap()
380 .key(),
381 AuxVarType::Null
382 );
383 assert_eq!(
384 InitialLinuxLibcStackLayoutBuilder::new()
385 .add_aux_v(AuxVar::Clktck(0x1337))
386 .add_aux_v(AuxVar::Null)
387 .add_aux_v(AuxVar::Platform("x86_64"))
388 .aux_v
389 .iter()
390 .last()
391 .unwrap()
392 .key(),
393 AuxVarType::Null
394 );
395 }
396
397 #[test]
398 fn test_builder_serializes_data() {
399 let builder = InitialLinuxLibcStackLayoutBuilder::new()
400 .add_arg_v("Foo")
401 .add_env_v("BAR=FOO\0")
402 .add_aux_v(AuxVar::Platform("x86_64"))
403 .add_aux_v(AuxVar::ExecFn("./executable"))
404 .add_aux_v(AuxVar::Uid(0xdeadbeef))
405 .add_aux_v(AuxVar::Clktck(123456));
406 let mut buf = vec![0; builder.total_size()];
407
408 unsafe {
409 // user_addr == write_addr => easy debugging; segfaults otherwise when resolving pointers
410 let user_ptr = buf.as_ptr();
411 builder.serialize_into_buf(&mut buf, user_ptr as u64);
412 }
413
414 dbg!(&buf);
415
416 /* to check the data structure in an existing C tool
417 println!("unsigned char foo[] = {{");
418 for byte in &buf {
419 println!(" 0x{:x},", byte);
420 }
421 println!("}};");*/
422 }
423
424 #[test]
425 fn test_default_filename_gets_replaced() {
426 let expected = "foo";
427 let b = InitialLinuxLibcStackLayoutBuilder::new().add_aux_v(AuxVar::ExecFn(expected));
428 let actual = b.filename().unwrap().value_payload_cstr().unwrap();
429 assert_eq!(actual, expected);
430 }
431}