oxidd_core/util/var_name_map.rs
1//! Efficient bi-directional mapping between variables and names
2
3use std::collections::{hash_map::Entry, HashMap};
4use std::fmt;
5use std::iter::FusedIterator;
6use std::ops::Range;
7
8use crate::error::DuplicateVarName;
9use crate::VarNo;
10
11mod unowned {
12 use std::{fmt, ptr::NonNull};
13
14 /// A reference like `&'static T`, but (unsafely) convertible to a [`Box`].
15 /// One may also think of it as an unowned [`Box`], or an [`Rc`] without a
16 /// counter, i.e., external reasoning about the number of references and
17 /// manual dropping.
18 ///
19 /// In context of the `VarNameMap` implementation, we would like to store
20 /// the `str` content only once. We know that there are exactly two
21 /// references for each stored name, so there is no need for a reference
22 /// counter. However, we cannot use [`Box::leak`], and turn the resulting
23 /// `&mut T` into a `&T` without loosing the ability to drop the allocation
24 /// later on. This is because Rust's proposed aliasing model forbids turning
25 /// a `&T` into a `&mut T` again (which would happen in the [`Drop`]
26 /// implementation).
27 // Type invariant: An `Unowned<T>` is generally
28 // [convertible to a reference](std::ptr#pointer-to-reference-conversion).
29 #[derive(Eq)]
30 pub struct Unowned<T: ?Sized>(NonNull<T>);
31
32 // SAFETY: `Unowned<T>` behaves like `&T`, and `&T` is `Send` iff `T` is
33 // `Sync`
34 unsafe impl<T: ?Sized + Sync> Send for Unowned<T> {}
35 // SAFETY: `Unowned<T>` behaves like `&T`, and `&T` is `Sync` iff `T` is
36 // `Sync`
37 unsafe impl<T: ?Sized + Sync> Sync for Unowned<T> {}
38
39 impl<T: ?Sized> Clone for Unowned<T> {
40 #[inline(always)]
41 fn clone(&self) -> Self {
42 *self
43 }
44 }
45 impl<T: ?Sized> Copy for Unowned<T> {}
46
47 impl<T: ?Sized> std::ops::Deref for Unowned<T> {
48 type Target = T;
49
50 #[inline(always)]
51 fn deref(&self) -> &T {
52 // SAFETY: follows from the type invariant
53 unsafe { self.0.as_ref() }
54 }
55 }
56 impl<T: ?Sized> std::borrow::Borrow<T> for Unowned<T> {
57 #[inline(always)]
58 fn borrow(&self) -> &T {
59 self
60 }
61 }
62
63 impl<T: ?Sized + PartialEq> PartialEq for Unowned<T> {
64 #[inline(always)]
65 fn eq(&self, other: &Self) -> bool {
66 **self == **other
67 }
68 }
69 impl<T: ?Sized + std::hash::Hash> std::hash::Hash for Unowned<T> {
70 #[inline(always)]
71 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
72 (**self).hash(state);
73 }
74 }
75
76 impl<T: ?Sized> From<Box<T>> for Unowned<T> {
77 #[inline(always)]
78 fn from(value: Box<T>) -> Self {
79 Self(NonNull::from(Box::leak(value)))
80 }
81 }
82 impl<T: ?Sized> From<&'static T> for Unowned<T> {
83 #[inline(always)]
84 fn from(value: &'static T) -> Self {
85 Self(NonNull::from(value))
86 }
87 }
88 impl<T: ?Sized> Unowned<T> {
89 /// Convert an `Unowned<T>` back to a `Box<T>`
90 ///
91 /// # Safety
92 ///
93 /// 1. `this` must have been created from a [`Box`]
94 /// 2. `this` must be the only existing reference
95 /// 3. If `T` is not `Send`, then `into_box` must be called from the
96 /// thread that created the value of type `T`.
97 #[inline(always)]
98 pub unsafe fn into_box(this: Self) -> Box<T> {
99 // SAFETY: ensured by caller, see above
100 unsafe { Box::from_raw(this.0.as_ptr()) }
101 }
102 }
103
104 impl<T: ?Sized + fmt::Debug> fmt::Debug for Unowned<T> {
105 #[inline(always)]
106 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107 (**self).fmt(f)
108 }
109 }
110 impl<T: ?Sized + fmt::Display> fmt::Display for Unowned<T> {
111 #[inline(always)]
112 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113 (**self).fmt(f)
114 }
115 }
116}
117use unowned::Unowned;
118
119/// Bi-directional mapping between variables and names, intended for the
120/// implementation of a [`Manager`][crate::Manager].
121///
122/// The map requires variable names to be unique, but allows unnamed variables
123/// (represented by empty strings).
124// Type invariant: All `Unowned<str>` values in `index` are created from
125// `Box<str>`. Unless the map is borrowed, there each `Unowned<str>` reference
126// has an aliasing copy in `names`, and no other copies elsewhere. All
127// `Unowned<str>` values in `names` are either a copy of a value in `index` or
128// are the result of `Unowned::from("")`.
129#[derive(Clone)]
130pub struct VarNameMap {
131 names: Vec<Unowned<str>>,
132 index: HashMap<Unowned<str>, VarNo>,
133}
134
135impl Drop for VarNameMap {
136 fn drop(&mut self) {
137 self.names.clear();
138 for (s, _) in self.index.drain() {
139 // SAFETY:
140 // 1. By the type invariant, `s` has been created from a `Box<str>`
141 // 2. Since `self.names` is cleared now, `s` is the only reference
142 // 3. `str` is `Send`
143 drop(unsafe { Unowned::into_box(s) });
144 }
145 }
146}
147
148impl fmt::Debug for VarNameMap {
149 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150 f.debug_map()
151 .entries(self.names.iter().enumerate().filter(|(_, s)| !s.is_empty()))
152 .finish()?;
153 write!(f, " (length: {})", self.len())
154 }
155}
156
157impl Default for VarNameMap {
158 #[inline(always)]
159 fn default() -> Self {
160 Self::new()
161 }
162}
163
164impl VarNameMap {
165 /// Create an empty map
166 #[inline]
167 pub fn new() -> Self {
168 Self {
169 names: Vec::new(),
170 index: HashMap::new(),
171 }
172 }
173
174 /// Get the number of variables (including unnamed ones)
175 #[inline(always)]
176 pub fn len(&self) -> VarNo {
177 self.names.len() as VarNo
178 }
179
180 /// `true` if and only if [`self.len()`][Self::len()] is 0
181 #[inline(always)]
182 pub fn is_empty(&self) -> bool {
183 self.names.is_empty()
184 }
185
186 /// Reserve space for `additional` entries
187 ///
188 /// Adding the next `additional` entries will not cause an allocation in the
189 /// underlying vector and map.
190 #[inline]
191 pub fn reserve(&mut self, additional: VarNo) {
192 let additional = additional.try_into().unwrap_or(usize::MAX);
193 self.names.reserve(additional);
194 self.index.reserve(additional);
195 }
196
197 /// Get the number of named variables
198 #[inline(always)]
199 pub fn named_count(&self) -> VarNo {
200 self.index.len() as VarNo
201 }
202
203 /// Add `additional` unnamed variables
204 ///
205 /// Panics if [`self.len()`][Self::len()] plus `additional` is greater than
206 /// to [`VarNo::MAX`].
207 #[inline]
208 pub fn add_unnamed(&mut self, additional: VarNo) {
209 let msg = "too many variables";
210 let new_len = self.len().checked_add(additional).expect(msg);
211 self.names.resize(new_len.try_into().expect(msg), "".into());
212 }
213
214 /// Add fresh variables with names from `names`
215 ///
216 /// Returns `Ok(range)` on success, where `range` contains the new variable
217 /// numbers. If `names` is too long (there would be more than `VarNo::MAX`
218 /// variables), the remaining elements are not consumed.
219 ///
220 /// If a variable name is not unique, this method returns a
221 /// [`DuplicateVarName`] error.
222 #[track_caller]
223 pub fn add_named<T: IntoIterator<Item = S>, S: Into<String>>(
224 &mut self,
225 names: T,
226 ) -> Result<Range<VarNo>, DuplicateVarName> {
227 let len_pre = self.names.len() as VarNo;
228 let it = names.into_iter();
229 let size_hint = it.size_hint().0;
230 self.index.reserve(size_hint);
231 self.names.reserve(size_hint);
232
233 for (name, v) in it.zip(len_pre..VarNo::MAX) {
234 let name: String = name.into();
235 if name.is_empty() {
236 self.names.push("".into());
237 continue;
238 }
239 let name = name.into_boxed_str().into();
240 match self.index.entry(name) {
241 Entry::Occupied(entry) => {
242 let present_var = *entry.get();
243 // SAFETY:
244 // 1. `name` has been created from a `Box<str>` above
245 // 2. `name` was not added to the map, so it is the only reference
246 // 3. `str` is `Send`
247 let name: Box<str> = unsafe { Unowned::into_box(name) };
248 return Err(DuplicateVarName {
249 name: name.into_string(),
250 present_var,
251 added_vars: len_pre..self.names.len() as VarNo,
252 });
253 }
254 Entry::Vacant(entry) => entry.insert(v),
255 };
256 self.names.push(name);
257 }
258
259 Ok(len_pre..self.names.len() as VarNo)
260 }
261
262 /// Get the variable number for the given name if present, or add a new
263 /// variable
264 ///
265 /// Returns a pair `(var_no, found)`. If the provided variable name is
266 /// empty, this method will always create a fresh variable.
267 ///
268 /// If a variable with the given name is not present yet, and there is no
269 /// variable free in range `0..VarNo::MAX`, then the variable is not added.
270 /// Still, the return value is `VarNo::MAX`.
271 pub fn get_or_add(&mut self, name: impl Into<String>) -> (VarNo, bool) {
272 let name: String = name.into();
273 if name.is_empty() {
274 let n = self.names.len() as VarNo;
275 self.names.push("".into());
276 return (n, false);
277 }
278
279 let name = name.into_boxed_str().into();
280 match self.index.entry(name) {
281 Entry::Occupied(entry) => {
282 // SAFETY:
283 // 1. `name` has been created from a `Box<str>` above
284 // 2. `name` was not added to the map, so it is the only reference
285 // 3. `str` is `Send`
286 drop(unsafe { Unowned::into_box(name) });
287 (*entry.get(), true)
288 }
289 Entry::Vacant(entry) => {
290 let n = self.names.len() as VarNo;
291 if n == VarNo::MAX {
292 // SAFETY: as above
293 drop(unsafe { Unowned::into_box(name) });
294 return (n, false);
295 }
296 entry.insert(n);
297 self.names.push(name);
298 (n, false)
299 }
300 }
301 }
302
303 /// Get the variable number for the given name, if present
304 ///
305 /// `self.name_to_var("")` always returns `None`.
306 #[inline]
307 pub fn name_to_var(&self, name: impl AsRef<str>) -> Option<VarNo> {
308 self.index.get(name.as_ref()).copied()
309 }
310
311 /// Get `var`'s name
312 ///
313 /// For unnamed vars, this will return the empty string.
314 ///
315 /// Panics if `var` is greater or equal to [`self.len()`][Self::len()].
316 #[inline(always)]
317 #[track_caller]
318 pub fn var_name(&self, var: VarNo) -> &str {
319 &self.names[var as usize]
320 }
321
322 /// Label `var` as `name`
323 ///
324 /// An empty name means that the variable will become unnamed, and cannot be
325 /// retrieved via [`Self::name_to_var()`] anymore.
326 ///
327 /// Returns `Err((name, other_var))` if `name` is not unique (and not `""`).
328 ///
329 /// Panics if `var` is greater or equal to the number of variables in this
330 /// map.
331 #[track_caller]
332 pub fn set_var_name(
333 &mut self,
334 var: VarNo,
335 name: impl Into<String>,
336 ) -> Result<(), DuplicateVarName> {
337 let name: String = name.into();
338 if name.is_empty() {
339 self.index
340 .remove(&std::mem::replace(&mut self.names[var as usize], "".into()));
341 return Ok(());
342 }
343 let name = name.into_boxed_str().into();
344 match self.index.entry(name) {
345 Entry::Occupied(entry) => {
346 // SAFETY:
347 // 1. `name` has been created from a `Box<str>` above
348 // 2. `name` was not added to the map, so it is the only reference
349 // 3. `str` is `Send`
350 let name = unsafe { Unowned::into_box(name) };
351 let present_var = *entry.get();
352 if present_var != var {
353 let len = self.len() as VarNo;
354 return Err(DuplicateVarName {
355 name: name.into_string(),
356 present_var,
357 added_vars: len..len,
358 });
359 }
360 }
361 Entry::Vacant(entry) => {
362 let prev = std::mem::replace(&mut self.names[var as usize], name);
363 entry.insert(var);
364 if !prev.is_empty() {
365 // SAFETY:
366 // 1. By the type invariant and since `prev` is not empty, it `prev` has been
367 // created from a `Box<str>`
368 // 2. `prev` was removed from `self.names`, and its copy in `index` has been
369 // dropped since `entry.insert(var)`. By the type invariant, it follows that
370 // `prev` is the only reference.
371 // 3. `str` is `Send`
372 drop(unsafe { Unowned::into_box(prev) });
373 }
374 }
375 };
376 Ok(())
377 }
378
379 /// Iterate over the variable names (including all unnamed variables)
380 pub fn into_names_iter(mut self) -> IntoNamesIter {
381 self.index.clear();
382 IntoNamesIter(std::mem::take(&mut self.names).into_iter())
383 }
384}
385
386/// Owning iterator over variable names
387// Type invariant: All non-empty `Unowned<str>` values are created from
388// `Box<str>`. There are no aliasing copies of these values.
389#[derive(Debug)]
390pub struct IntoNamesIter(std::vec::IntoIter<Unowned<str>>);
391
392/// Convert an [`Unowned<str>`] back to a [`String`]
393///
394/// # Safety
395///
396/// If `s` is non-empty, then the SAFETY conditions for [`Unowned::into_box()`]
397/// must be fulfilled.
398#[inline(always)]
399unsafe fn unowned_to_string(s: Unowned<str>) -> String {
400 if s.is_empty() {
401 String::new()
402 } else {
403 // SAFETY: upheld by caller
404 unsafe { Unowned::into_box(s) }.into_string()
405 }
406}
407
408impl Drop for IntoNamesIter {
409 fn drop(&mut self) {
410 for &name in self.0.as_slice() {
411 // SAFETY follows from type invariant
412 drop(unsafe { unowned_to_string(name) });
413 }
414 }
415}
416
417impl Iterator for IntoNamesIter {
418 type Item = String;
419
420 #[inline]
421 fn next(&mut self) -> Option<Self::Item> {
422 let name = self.0.next()?;
423 // SAFETY follows from type invariant
424 Some(unsafe { unowned_to_string(name) })
425 }
426
427 #[inline]
428 fn nth(&mut self, n: usize) -> Option<Self::Item> {
429 let name = self.0.nth(n)?;
430 // SAFETY follows from type invariant
431 Some(unsafe { unowned_to_string(name) })
432 }
433}
434
435impl ExactSizeIterator for IntoNamesIter {
436 #[inline]
437 fn len(&self) -> usize {
438 self.0.len()
439 }
440}
441impl FusedIterator for IntoNamesIter where std::vec::IntoIter<Unowned<str>>: FusedIterator {}
442
443impl DoubleEndedIterator for IntoNamesIter {
444 #[inline]
445 fn next_back(&mut self) -> Option<Self::Item> {
446 let name = self.0.next_back()?;
447 // SAFETY follows from type invariant
448 Some(unsafe { unowned_to_string(name) })
449 }
450
451 #[inline]
452 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
453 let name = self.0.nth_back(n)?;
454 // SAFETY follows from type invariant
455 Some(unsafe { unowned_to_string(name) })
456 }
457}
458
459// TODO: replace String by Box<str>?