ser_raw/serialize_impls/
ptrs.rs

1use std::{marker::PhantomData, mem};
2
3use crate::{pos::Addr, Serialize, Serializer};
4
5const PTR_SIZE: usize = mem::size_of::<usize>();
6
7impl<T, S> Serialize<S> for Box<T>
8where
9	S: Serializer,
10	T: Serialize<S> + Sized,
11{
12	fn serialize_data(&self, serializer: &mut S) {
13		// Sanity check that `Box<T>` is just a pointer (evaluated at compile time).
14		// Unsized types are not supported.
15		let _ = SizeCheck::<Box<T>, PTR_SIZE>::ASSERT_SIZE_IS;
16
17		// No need to do anything if box contains ZST
18		// TODO: Should we call `serialize_data()` in case user defines some behavior?
19		if mem::size_of::<T>() == 0 {
20			return;
21		}
22
23		// Write boxed value
24		let ptr_addr = S::Addr::from_ref(self);
25		serializer.push_and_process(&**self, ptr_addr, |serializer| {
26			// Serialize boxed value
27			(**self).serialize_data(serializer);
28		});
29	}
30}
31
32impl<T, S> Serialize<S> for Vec<T>
33where
34	S: Serializer,
35	T: Serialize<S>,
36{
37	fn serialize_data(&self, serializer: &mut S) {
38		// No need to do anything if vec contains ZSTs
39		// TODO: Should we call `serialize_data()` in case user defines some behavior?
40		if mem::size_of::<T>() == 0 {
41			return;
42		}
43
44		// No need to write contents if vec is empty
45		if self.len() == 0 {
46			// Overwrite `capacity = 0` and `ptr = <dangling>` if it's not already
47			serializer.write_correction(|serializer| {
48				if self.capacity() != 0 {
49					unsafe { write_capacity_and_ptr_for_empty_vec(self, serializer) };
50				}
51			});
52
53			return;
54		}
55
56		// Overwrite `capacity = len`, if it's not already
57		serializer.write_correction(|serializer| {
58			if self.capacity() != self.len() {
59				let cap_offset = VecOffsets::<T>::OFFSETS_VEC.capacity();
60				let cap_addr = S::Addr::from_ref_offset(self, cap_offset).addr();
61				unsafe { serializer.write(&self.len(), cap_addr) };
62			}
63		});
64
65		// Write vec's contents
66		let ptr_addr = S::Addr::from_ref_offset(self, VecOffsets::<T>::PTR_OFFSET);
67		serializer.push_and_process_slice(self.as_slice(), ptr_addr, |serializer| {
68			// Serialize vec's contents
69			for value in &**self {
70				value.serialize_data(serializer);
71			}
72		});
73	}
74}
75
76impl<S> Serialize<S> for String
77where S: Serializer
78{
79	fn serialize_data(&self, serializer: &mut S) {
80		// No need to write contents if string is empty
81		if self.len() == 0 {
82			// Overwrite `capacity = 0` and `ptr = <dangling>` if it's not already
83			serializer.write_correction(|serializer| {
84				if self.capacity() != 0 {
85					unsafe { write_capacity_and_ptr_for_empty_string(self, serializer) };
86				}
87			});
88
89			return;
90		}
91
92		// Overwrite `capacity = len`, if it's not already
93		serializer.write_correction(|serializer| {
94			if self.capacity() != self.len() {
95				let cap_offset = OFFSETS_STRING.capacity();
96				let cap_addr = S::Addr::from_ref_offset(self, cap_offset).addr();
97				unsafe { serializer.write(&self.len(), cap_addr) };
98			}
99		});
100
101		// Write string's content
102		let ptr_addr = S::Addr::from_ref_offset(self, STRING_PTR_OFFSET);
103		serializer.push_slice(self.as_bytes(), ptr_addr);
104	}
105}
106
107/// Type for static assertion of size of type
108struct SizeCheck<T, const SIZE: usize> {
109	_marker: PhantomData<T>,
110}
111
112impl<T, const SIZE: usize> SizeCheck<T, SIZE> {
113	const ASSERT_SIZE_IS: () = assert!(mem::size_of::<T>() == SIZE);
114}
115
116/// Type for calculating offset of fields in `Vec<T>` at compile time.
117///
118/// * Offset of `ptr` field: `VecOffsets::<T>::PTR_OFFSET`
119/// * Offset of `len` field: `VecOffsets::<T>::OFFSETS_VEC.len()`.
120/// * Offset of `capacity` field: `VecOffsets::<T>::OFFSETS_VEC.capacity()`.
121///
122/// Godbolt shows all of these are compiled down to static integers:
123/// https://godbolt.org/z/78MzTKo6f
124struct VecOffsets<T> {
125	_marker: PhantomData<T>,
126}
127
128impl<T> VecOffsets<T> {
129	const PTR_INDEX: usize = {
130		// Empty vec does not allocate
131		let vec = Vec::<T>::new();
132		// Will fail to compile if `Vec<T>` is not implemented as 3 x `usize`
133		let bytes: [usize; 3] = unsafe { mem::transmute(vec) };
134		let dangle = mem::align_of::<T>();
135		if bytes[0] == dangle {
136			assert!(bytes[1] == 0 && bytes[2] == 0);
137			0
138		} else if bytes[1] == dangle {
139			assert!(bytes[0] == 0 && bytes[2] == 0);
140			1
141		} else if bytes[2] == dangle {
142			assert!(bytes[0] == 0 && bytes[1] == 0);
143			2
144		} else {
145			panic!("Could not determine offset of Vec's ptr field");
146		}
147	};
148
149	const PTR_OFFSET: usize = Self::PTR_INDEX * PTR_SIZE;
150
151	// `OFFSETS_VEC` is not a valid `Vec<T>` as it violates `Vec`'s invariants.
152	// Either `len` > `capacity`, or `capacity` > 0 and ptr dangling.
153	// However:
154	// 1. We at least ensure ptr is non-null.
155	// 2. We never read or write to the vec, or access its pointer.
156	// 3. `ManuallyDrop` prevents it ever being dropped (which would be UB).
157	// So this hack is *probably* sound.
158	const OFFSETS_VEC: mem::ManuallyDrop<Vec<T>> = {
159		let dangle = mem::align_of::<T>();
160		let bytes = match Self::PTR_INDEX {
161			0 => [dangle, PTR_SIZE, PTR_SIZE * 2],
162			1 => [0, dangle, PTR_SIZE * 2],
163			2 => [0, PTR_SIZE, dangle],
164			_ => unreachable!(),
165		};
166		unsafe { mem::transmute(bytes) }
167	};
168}
169
170// Constants for offset of fields in `String`, calculated at compile time.
171// Uses same hack as `VecOffsets` above.
172//
173// * Offset of `ptr` field: `STRING_PTR_OFFSET`
174// * Offset of `len` field: `OFFSETS_STRING.len()`.
175// * Offset of `capacity` field: `OFFSETS_STRING.capacity()`.
176const STRING_PTR_INDEX: usize = {
177	// Empty string does not allocate
178	let s = String::new();
179	// Will fail to compile if `String` is not implemented as 3 x `usize`
180	let bytes: [usize; 3] = unsafe { mem::transmute(s) };
181	let dangle = 1;
182	if bytes[0] == dangle {
183		assert!(bytes[1] == 0 && bytes[2] == 0);
184		0
185	} else if bytes[1] == dangle {
186		assert!(bytes[0] == 0 && bytes[2] == 0);
187		1
188	} else if bytes[2] == dangle {
189		assert!(bytes[0] == 0 && bytes[1] == 0);
190		2
191	} else {
192		panic!("Could not determine offset of String's ptr field");
193	}
194};
195const STRING_PTR_OFFSET: usize = STRING_PTR_INDEX * PTR_SIZE;
196
197const OFFSETS_STRING: mem::ManuallyDrop<String> = {
198	let dangle = 1;
199	let bytes = match STRING_PTR_INDEX {
200		0 => [dangle, PTR_SIZE, PTR_SIZE * 2],
201		1 => [0, dangle, PTR_SIZE * 2],
202		2 => [0, PTR_SIZE, dangle],
203		_ => unreachable!(),
204	};
205	unsafe { mem::transmute(bytes) }
206};
207
208/// Overwrite `capacity` and `ptr` for empty `Vec<T>`.
209///
210/// Will write both in a single write if the two fields are next to each other,
211/// or fall back to writing each individually. They should be next to each other
212/// as they're both within `RawVec` in Rust's current `Vec` implementation.
213///
214/// `VecOffsets::<T>::OFFSETS_VEC.capacity()`, `VecOffsets::<T>::PTR_OFFSET`,
215/// and `mem::align_of::<T>()` can all be statically evaluated.
216/// So compiler should remove all but one branch and reduce this whole function
217/// down to e.g. `serializer.write(&[0, 8], v as *const Vec<T> as usize)`.
218/// Godbolt seems to confirm this: https://godbolt.org/z/nr5b5jn3x
219#[inline]
220unsafe fn write_capacity_and_ptr_for_empty_vec<T, Ser: Serializer>(
221	v: &Vec<T>,
222	serializer: &mut Ser,
223) {
224	// We know `mem::align_of::<T>()` is correct value for a dangling ptr or
225	// calculating `VecOffsets::<T>::PTR_INDEX` would have errored
226	let dangle = mem::align_of::<T>();
227	let cap_offset = VecOffsets::<T>::OFFSETS_VEC.capacity();
228	let ptr_offset = VecOffsets::<T>::PTR_OFFSET;
229
230	if cap_offset == 0 && ptr_offset == PTR_SIZE {
231		serializer.write(&[0, dangle], Ser::Addr::from_ref(v).addr());
232	} else if cap_offset == PTR_SIZE && ptr_offset == 0 {
233		serializer.write(&[dangle, 0], Ser::Addr::from_ref(v).addr());
234	} else if cap_offset == PTR_SIZE && ptr_offset == PTR_SIZE * 2 {
235		serializer.write(&[0, dangle], Ser::Addr::from_ref_offset(v, PTR_SIZE).addr());
236	} else if cap_offset == PTR_SIZE * 2 && ptr_offset == PTR_SIZE {
237		serializer.write(&[dangle, 0], Ser::Addr::from_ref_offset(v, PTR_SIZE).addr());
238	} else {
239		serializer.write(&0usize, Ser::Addr::from_ref_offset(v, cap_offset).addr());
240		serializer.write(&dangle, Ser::Addr::from_ref_offset(v, ptr_offset).addr());
241	}
242}
243
244/// Overwrite `capacity` and `ptr` for empty `String`.
245///
246/// Will write both in a single write if the two fields are next to each other,
247/// or fall back to writing each individually. They should be next to each other
248/// as they're both within `RawVec` in Rust's current `String` implementation.
249///
250/// `OFFSETS_STRING.capacity()` and `STRING_PTR_OFFSET` can both be statically
251/// evaluated.
252/// So compiler should remove all but one branch and reduce this whole function
253/// down to e.g. `serializer.write(&[0, 8], s as *const String as usize)`.
254#[inline]
255unsafe fn write_capacity_and_ptr_for_empty_string<Ser: Serializer>(
256	s: &String,
257	serializer: &mut Ser,
258) {
259	// We know 1 is correct value for a dangling ptr or calculating
260	// `STRING_PTR_INDEX` would have errored
261	let dangle = 1usize;
262	let cap_offset = OFFSETS_STRING.capacity();
263
264	if cap_offset == 0 && STRING_PTR_OFFSET == PTR_SIZE {
265		serializer.write(&[0, dangle], Ser::Addr::from_ref(s).addr());
266	} else if cap_offset == PTR_SIZE && STRING_PTR_OFFSET == 0 {
267		serializer.write(&[dangle, 0], Ser::Addr::from_ref(s).addr());
268	} else if cap_offset == PTR_SIZE && STRING_PTR_OFFSET == PTR_SIZE * 2 {
269		serializer.write(&[0, dangle], Ser::Addr::from_ref_offset(s, PTR_SIZE).addr());
270	} else if cap_offset == PTR_SIZE * 2 && STRING_PTR_OFFSET == PTR_SIZE {
271		serializer.write(&[dangle, 0], Ser::Addr::from_ref_offset(s, PTR_SIZE).addr());
272	} else {
273		serializer.write(&0usize, Ser::Addr::from_ref_offset(s, cap_offset).addr());
274		serializer.write(
275			&dangle,
276			Ser::Addr::from_ref_offset(s, STRING_PTR_OFFSET).addr(),
277		);
278	}
279}