xwasm/elements/
index_map.rs

1use std::cmp::min;
2use std::iter::{FromIterator, IntoIterator};
3use std::mem;
4use std::slice;
5use std::vec;
6use std::vec::Vec;
7use io;
8
9use super::{Deserialize, Error, Serialize, VarUint32};
10
11/// A map from non-contiguous `u32` keys to values of type `T`, which is
12/// serialized and deserialized ascending order of the keys. Normally used for
13/// relative dense maps with occasional "holes", and stored as an array.
14///
15/// **SECURITY WARNING:** This code is currently subject to a denial of service
16/// attack if you create a map containing the key `u32::MAX`, which should never
17/// happen in normal data. It would be pretty easy to provide a safe
18/// deserializing mechanism which addressed this problem.
19#[derive(Debug, Default)]
20pub struct IndexMap<T> {
21	/// The number of non-`None` entries in this map.
22	len: usize,
23
24	/// A vector of entries. Missing entries are represented as `None`.
25	entries: Vec<Option<T>>,
26}
27
28impl<T> IndexMap<T> {
29	/// Create an empty `IndexMap`, preallocating enough space to store
30	/// `capacity` entries without needing to reallocate the underlying memory.
31	pub fn with_capacity(capacity: usize) -> IndexMap<T> {
32		IndexMap {
33			len: 0,
34			entries: Vec::with_capacity(capacity),
35		}
36	}
37
38	/// Clear the map.
39	pub fn clear(&mut self) {
40		self.entries.clear();
41		self.len = 0;
42	}
43
44	/// Return the name for the specified index, if it exists.
45	pub fn get(&self, idx: u32) -> Option<&T> {
46		match self.entries.get(idx as usize) {
47			Some(&Some(ref value)) => Some(value),
48			Some(&None) | None => None,
49		}
50	}
51
52	/// Does the map contain an entry for the specified index?
53	pub fn contains_key(&self, idx: u32) -> bool {
54		match self.entries.get(idx as usize) {
55			Some(&Some(_)) => true,
56			Some(&None) | None => false,
57		}
58	}
59
60	/// Insert a name into our map, returning the existing value if present.
61	///
62	/// Note: This API is designed for reasonably dense indices based on valid
63	/// data. Inserting a huge `idx` will use up a lot of RAM, and this function
64	/// will not try to protect you against that.
65	pub fn insert(&mut self, idx: u32, value: T) -> Option<T> {
66		let idx = idx as usize;
67		let result = if idx >= self.entries.len() {
68			// We need to grow the array, and add the new element at the end.
69			for _ in 0..(idx - self.entries.len()) {
70				// We can't use `extend(repeat(None)).take(n)`, because that
71				// would require `T` to implement `Clone`.
72				self.entries.push(None);
73			}
74			self.entries.push(Some(value));
75			debug_assert_eq!(idx + 1, self.entries.len());
76			self.len += 1;
77			None
78		} else {
79			// We're either replacing an existing element, or filling in a
80			// missing one.
81			let existing = self.entries[idx].take();
82			if existing.is_none() {
83				self.len += 1;
84			}
85			self.entries[idx] = Some(value);
86			existing
87		};
88		if mem::size_of::<usize>() > 4 {
89			debug_assert!(self.entries.len() <= (::std::u32::MAX as usize) + 1);
90		}
91		#[cfg(slow_assertions)]
92		debug_assert_eq!(self.len, self.slow_len());
93		result
94	}
95
96	/// Remove an item if present and return it.
97	pub fn remove(&mut self, idx: u32) -> Option<T> {
98		let result = match self.entries.get_mut(idx as usize) {
99			Some(value @ &mut Some(_)) => {
100				self.len -= 1;
101				value.take()
102			}
103			Some(&mut None) | None => None,
104		};
105		#[cfg(slow_assertions)]
106		debug_assert_eq!(self.len, self.slow_len());
107		result
108	}
109
110	/// The number of items in this map.
111	pub fn len(&self) -> usize {
112		#[cfg(slow_assertions)]
113		debug_assert_eq!(self.len, self.slow_len());
114		self.len
115	}
116
117	/// Is this map empty?
118	pub fn is_empty(&self) -> bool {
119		self.len == 0
120	}
121
122	/// This function is only compiled when `--cfg slow_assertions` is enabled.
123	/// It computes the `len` value using a slow algorithm.
124	///
125	/// WARNING: This turns a bunch of O(n) operations into O(n^2) operations.
126	/// We may want to remove it once the code is tested, or to put it behind
127	/// a feature flag named `slow_debug_checks`, or somsing like that.
128	#[cfg(slow_assertions)]
129	fn slow_len(&self) -> usize {
130		self.entries.iter().filter(|entry| entry.is_some()).count()
131	}
132
133	/// Create a non-consuming iterator over this `IndexMap`'s keys and values.
134	pub fn iter(&self) -> Iter<T> {
135		// Note that this does the right thing because we use `&self`.
136		self.into_iter()
137	}
138
139	/// Custom deserialization routine.
140	///
141	/// We will allocate an underlying array no larger than `max_entry_space` to
142	/// hold the data, so the maximum index must be less than `max_entry_space`.
143	/// This prevents mallicious *.xwasm files from having a single entry with
144	/// the index `u32::MAX`, which would consume far too much memory.
145	///
146	/// The `deserialize_value` function will be passed the index of the value
147	/// being deserialized, and must deserialize the value.
148	pub fn deserialize_with<R, F>(
149		max_entry_space: usize,
150		deserialize_value: &F,
151		rdr: &mut R,
152	) -> Result<IndexMap<T>, Error>
153	where
154		R: io::Read,
155		F: Fn(u32, &mut R) -> Result<T, Error>,
156	{
157		let len: u32 = VarUint32::deserialize(rdr)?.into();
158		let mut map = IndexMap::with_capacity(len as usize);
159		let mut prev_idx = None;
160		for _ in 0..len {
161			let idx: u32 = VarUint32::deserialize(rdr)?.into();
162			if idx as usize >= max_entry_space {
163				return Err(Error::Other("index is larger than expected"));
164			}
165			match prev_idx {
166				Some(prev) if prev >= idx => {
167					// Supposedly these names must be "sorted by index", so
168					// let's try enforcing that and seeing what happens.
169					return Err(Error::Other("indices are out of order"));
170				}
171				_ => {
172					prev_idx = Some(idx);
173				}
174			}
175			let val = deserialize_value(idx, rdr)?;
176			map.insert(idx, val);
177		}
178		Ok(map)
179	}
180
181}
182
183impl<T: Clone> Clone for IndexMap<T> {
184	fn clone(&self) -> IndexMap<T> {
185		IndexMap {
186			len: self.len,
187			entries: self.entries.clone(),
188		}
189	}
190}
191
192impl<T: PartialEq> PartialEq<IndexMap<T>> for IndexMap<T> {
193	fn eq(&self, other: &IndexMap<T>) -> bool {
194		if self.len() != other.len() {
195			// If the number of non-`None` entries is different, we can't match.
196			false
197		} else {
198			// This is tricky, because one `Vec` might have a bunch of empty
199			// entries at the end which we want to ignore.
200			let smallest_len = min(self.entries.len(), other.entries.len());
201			self.entries[0..smallest_len].eq(&other.entries[0..smallest_len])
202		}
203	}
204}
205
206impl<T: Eq> Eq for IndexMap<T> {}
207
208impl<T> FromIterator<(u32, T)> for IndexMap<T> {
209	/// Create an `IndexMap` from an iterator.
210	///
211	/// Note: This API is designed for reasonably dense indices based on valid
212	/// data. Inserting a huge `idx` will use up a lot of RAM, and this function
213	/// will not try to protect you against that.
214	fn from_iter<I>(iter: I) -> Self
215	where
216		I: IntoIterator<Item = (u32, T)>,
217	{
218		let iter = iter.into_iter();
219		let (lower, upper_opt) = iter.size_hint();
220		let mut map = IndexMap::with_capacity(upper_opt.unwrap_or(lower));
221		for (idx, value) in iter {
222			map.insert(idx, value);
223		}
224		map
225	}
226}
227
228/// An iterator over an `IndexMap` which takes ownership of it.
229pub struct IntoIter<T> {
230	next_idx: u32,
231	remaining_len: usize,
232	iter: vec::IntoIter<Option<T>>,
233}
234
235impl<T> Iterator for IntoIter<T> {
236	type Item = (u32, T);
237
238	fn size_hint(&self) -> (usize, Option<usize>) {
239		(self.remaining_len, Some(self.remaining_len))
240	}
241
242	fn next(&mut self) -> Option<Self::Item> {
243		// Bail early if we know there are no more items. This also keeps us
244		// from repeatedly calling `self.iter.next()` once it has been
245		// exhausted, which is not guaranteed to keep returning `None`.
246		if self.remaining_len == 0 {
247			return None;
248		}
249		while let Some(value_opt) = self.iter.next() {
250			let idx = self.next_idx;
251			self.next_idx += 1;
252			if let Some(value) = value_opt {
253				self.remaining_len -= 1;
254				return Some((idx, value));
255			}
256		}
257		debug_assert_eq!(self.remaining_len, 0);
258		None
259	}
260}
261
262impl<T> IntoIterator for IndexMap<T> {
263	type Item = (u32, T);
264	type IntoIter = IntoIter<T>;
265
266	fn into_iter(self) -> IntoIter<T> {
267		IntoIter {
268			next_idx: 0,
269			remaining_len: self.len,
270			iter: self.entries.into_iter(),
271		}
272	}
273}
274
275/// An iterator over a borrowed `IndexMap`.
276pub struct Iter<'a, T: 'static> {
277	next_idx: u32,
278	remaining_len: usize,
279	iter: slice::Iter<'a, Option<T>>,
280}
281
282impl<'a, T: 'static> Iterator for Iter<'a, T> {
283	type Item = (u32, &'a T);
284
285	fn size_hint(&self) -> (usize, Option<usize>) {
286		(self.remaining_len, Some(self.remaining_len))
287	}
288
289	fn next(&mut self) -> Option<Self::Item> {
290		// Bail early if we know there are no more items. This also keeps us
291		// from repeatedly calling `self.iter.next()` once it has been
292		// exhausted, which is not guaranteed to keep returning `None`.
293		if self.remaining_len == 0 {
294			return None;
295		}
296		while let Some(value_opt) = self.iter.next() {
297			let idx = self.next_idx;
298			self.next_idx += 1;
299			if let &Some(ref value) = value_opt {
300				self.remaining_len -= 1;
301				return Some((idx, value));
302			}
303		}
304		debug_assert_eq!(self.remaining_len, 0);
305		None
306	}
307}
308
309impl<'a, T: 'static> IntoIterator for &'a IndexMap<T> {
310	type Item = (u32, &'a T);
311	type IntoIter = Iter<'a, T>;
312
313	fn into_iter(self) -> Iter<'a, T> {
314		Iter {
315			next_idx: 0,
316			remaining_len: self.len,
317			iter: self.entries.iter(),
318		}
319	}
320}
321
322impl<T> Serialize for IndexMap<T>
323where
324	T: Serialize,
325	Error: From<<T as Serialize>::Error>,
326{
327	type Error = Error;
328
329	fn serialize<W: io::Write>(self, wtr: &mut W) -> Result<(), Self::Error> {
330		VarUint32::from(self.len()).serialize(wtr)?;
331		for (idx, value) in self {
332			VarUint32::from(idx).serialize(wtr)?;
333			value.serialize(wtr)?;
334		}
335		Ok(())
336	}
337}
338
339impl<T: Deserialize> IndexMap<T>
340where
341	T: Deserialize,
342	Error: From<<T as Deserialize>::Error>,
343{
344	/// Deserialize a map containing simple values that support `Deserialize`.
345	/// We will allocate an underlying array no larger than `max_entry_space` to
346	/// hold the data, so the maximum index must be less than `max_entry_space`.
347	pub fn deserialize<R: io::Read>(
348		max_entry_space: usize,
349		rdr: &mut R,
350	) -> Result<Self, Error> {
351		let deserialize_value: fn(u32, &mut R) -> Result<T, Error> = |_idx, rdr| {
352			T::deserialize(rdr).map_err(Error::from)
353		};
354		Self::deserialize_with(max_entry_space, &deserialize_value, rdr)
355	}
356}
357
358#[cfg(test)]
359mod tests {
360	use io;
361	use super::*;
362
363	#[test]
364	fn default_is_empty_no_matter_how_we_look_at_it() {
365		let map = IndexMap::<String>::default();
366		assert_eq!(map.len(), 0);
367		assert!(map.is_empty());
368		assert_eq!(map.iter().collect::<Vec<_>>().len(), 0);
369		assert_eq!(map.into_iter().collect::<Vec<_>>().len(), 0);
370	}
371
372	#[test]
373	fn with_capacity_creates_empty_map() {
374		let map = IndexMap::<String>::with_capacity(10);
375		assert!(map.is_empty());
376	}
377
378	#[test]
379	fn clear_removes_all_values() {
380		let mut map = IndexMap::<String>::default();
381		map.insert(0, "sample value".to_string());
382		assert_eq!(map.len(), 1);
383		map.clear();
384		assert_eq!(map.len(), 0);
385	}
386
387	#[test]
388	fn get_returns_elements_that_are_there_but_nothing_else() {
389		let mut map = IndexMap::<String>::default();
390		map.insert(1, "sample value".to_string());
391		assert_eq!(map.len(), 1);
392		assert_eq!(map.get(0), None);
393		assert_eq!(map.get(1), Some(&"sample value".to_string()));
394		assert_eq!(map.get(2), None);
395	}
396
397	#[test]
398	fn contains_key_returns_true_when_a_key_is_present() {
399		let mut map = IndexMap::<String>::default();
400		map.insert(1, "sample value".to_string());
401		assert!(!map.contains_key(0));
402		assert!(map.contains_key(1));
403		assert!(!map.contains_key(2));
404	}
405
406	#[test]
407	fn insert_behaves_like_other_maps() {
408		let mut map = IndexMap::<String>::default();
409
410		// Insert a key which requires extending our storage.
411		assert_eq!(map.insert(1, "val 1".to_string()), None);
412		assert_eq!(map.len(), 1);
413		assert!(map.contains_key(1));
414
415		// Insert a key which requires filling in a hole.
416		assert_eq!(map.insert(0, "val 0".to_string()), None);
417		assert_eq!(map.len(), 2);
418		assert!(map.contains_key(0));
419
420		// Insert a key which replaces an existing key.
421		assert_eq!(
422			map.insert(1, "val 1.1".to_string()),
423			Some("val 1".to_string())
424		);
425		assert_eq!(map.len(), 2);
426		assert!(map.contains_key(1));
427		assert_eq!(map.get(1), Some(&"val 1.1".to_string()));
428	}
429
430	#[test]
431	fn remove_behaves_like_other_maps() {
432		let mut map = IndexMap::<String>::default();
433		assert_eq!(map.insert(1, "val 1".to_string()), None);
434
435		// Remove an out-of-bounds element.
436		assert_eq!(map.remove(2), None);
437		assert_eq!(map.len(), 1);
438
439		// Remove an in-bounds but missing element.
440		assert_eq!(map.remove(0), None);
441		assert_eq!(map.len(), 1);
442
443		// Remove an existing element.
444		assert_eq!(map.remove(1), Some("val 1".to_string()));
445		assert_eq!(map.len(), 0);
446	}
447
448	#[test]
449	fn partial_eq_works_as_expected_in_simple_cases() {
450		let mut map1 = IndexMap::<String>::default();
451		let mut map2 = IndexMap::<String>::default();
452		assert_eq!(map1, map2);
453
454		map1.insert(1, "a".to_string());
455		map2.insert(1, "a".to_string());
456		assert_eq!(map1, map2);
457
458		map1.insert(0, "b".to_string());
459		assert_ne!(map1, map2);
460		map1.remove(0);
461		assert_eq!(map1, map2);
462
463		map1.insert(1, "not a".to_string());
464		assert_ne!(map1, map2);
465	}
466
467	#[test]
468	fn partial_eq_is_smart_about_none_values_at_the_end() {
469		let mut map1 = IndexMap::<String>::default();
470		let mut map2 = IndexMap::<String>::default();
471
472		map1.insert(1, "a".to_string());
473		map2.insert(1, "a".to_string());
474
475		// Both maps have the same (idx, value) pairs, but map2 has extra space.
476		map2.insert(10, "b".to_string());
477		map2.remove(10);
478		assert_eq!(map1, map2);
479
480		// Both maps have the same (idx, value) pairs, but map1 has extra space.
481		map1.insert(100, "b".to_string());
482		map1.remove(100);
483		assert_eq!(map1, map2);
484
485		// Let's be paranoid.
486		map2.insert(1, "b".to_string());
487		assert_ne!(map1, map2);
488	}
489
490	#[test]
491	fn from_iterator_builds_a_map() {
492		let data = &[
493			// We support out-of-order values here!
494			(3, "val 3"),
495			(2, "val 2"),
496			(5, "val 5"),
497		];
498		let iter = data.iter().map(|&(idx, val)| (idx, val.to_string()));
499		let map = IndexMap::from_iter(iter);
500		assert_eq!(map.len(), 3);
501		assert_eq!(map.get(2), Some(&"val 2".to_string()));
502		assert_eq!(map.get(3), Some(&"val 3".to_string()));
503		assert_eq!(map.get(5), Some(&"val 5".to_string()));
504	}
505
506	#[test]
507	fn iterators_are_well_behaved() {
508		// Create a map with reasonably complex internal structure, making
509		// sure that we have both internal missing elements, and a bunch of
510		// missing elements at the end.
511		let data = &[(3, "val 3"), (2, "val 2"), (5, "val 5")];
512		let src_iter = data.iter().map(|&(idx, val)| (idx, val.to_string()));
513		let mut map = IndexMap::from_iter(src_iter);
514		map.remove(5);
515
516		// Make sure `size_hint` and `next` behave as we expect at each step.
517		{
518			let mut iter1 = map.iter();
519			assert_eq!(iter1.size_hint(), (2, Some(2)));
520			assert_eq!(iter1.next(), Some((2, &"val 2".to_string())));
521			assert_eq!(iter1.size_hint(), (1, Some(1)));
522			assert_eq!(iter1.next(), Some((3, &"val 3".to_string())));
523			assert_eq!(iter1.size_hint(), (0, Some(0)));
524			assert_eq!(iter1.next(), None);
525			assert_eq!(iter1.size_hint(), (0, Some(0)));
526			assert_eq!(iter1.next(), None);
527			assert_eq!(iter1.size_hint(), (0, Some(0)));
528		}
529
530		// Now do the same for a consuming iterator.
531		let mut iter2 = map.into_iter();
532		assert_eq!(iter2.size_hint(), (2, Some(2)));
533		assert_eq!(iter2.next(), Some((2, "val 2".to_string())));
534		assert_eq!(iter2.size_hint(), (1, Some(1)));
535		assert_eq!(iter2.next(), Some((3, "val 3".to_string())));
536		assert_eq!(iter2.size_hint(), (0, Some(0)));
537		assert_eq!(iter2.next(), None);
538		assert_eq!(iter2.size_hint(), (0, Some(0)));
539		assert_eq!(iter2.next(), None);
540		assert_eq!(iter2.size_hint(), (0, Some(0)));
541	}
542
543	#[test]
544	fn serialize_and_deserialize() {
545		let mut map = IndexMap::<String>::default();
546		map.insert(1, "val 1".to_string());
547
548		let mut output = vec![];
549		map.clone()
550			.serialize(&mut output)
551			.expect("serialize failed");
552
553		let mut input = io::Cursor::new(&output);
554		let deserialized = IndexMap::deserialize(2, &mut input).expect("deserialize failed");
555
556		assert_eq!(deserialized, map);
557	}
558
559	#[test]
560	fn deserialize_requires_elements_to_be_in_order() {
561		// Build a in-order example by hand.
562		let mut valid = vec![];
563		VarUint32::from(2u32).serialize(&mut valid).unwrap();
564		VarUint32::from(0u32).serialize(&mut valid).unwrap();
565		"val 0".to_string().serialize(&mut valid).unwrap();
566		VarUint32::from(1u32).serialize(&mut valid).unwrap();
567		"val 1".to_string().serialize(&mut valid).unwrap();
568		let map = IndexMap::<String>::deserialize(2, &mut io::Cursor::new(valid))
569			.expect("unexpected error deserializing");
570		assert_eq!(map.len(), 2);
571
572		// Build an out-of-order example by hand.
573		let mut invalid = vec![];
574		VarUint32::from(2u32).serialize(&mut invalid).unwrap();
575		VarUint32::from(1u32).serialize(&mut invalid).unwrap();
576		"val 1".to_string().serialize(&mut invalid).unwrap();
577		VarUint32::from(0u32).serialize(&mut invalid).unwrap();
578		"val 0".to_string().serialize(&mut invalid).unwrap();
579		let res = IndexMap::<String>::deserialize(2, &mut io::Cursor::new(invalid));
580		assert!(res.is_err());
581	}
582
583	#[test]
584	fn deserialize_enforces_max_idx() {
585		// Build an example with an out-of-bounds index by hand.
586		let mut invalid = vec![];
587		VarUint32::from(1u32).serialize(&mut invalid).unwrap();
588		VarUint32::from(5u32).serialize(&mut invalid).unwrap();
589		"val 5".to_string().serialize(&mut invalid).unwrap();
590		let res = IndexMap::<String>::deserialize(1, &mut io::Cursor::new(invalid));
591		assert!(res.is_err());
592	}
593}