oxidd_core/util/
var_name_map.rs

1//! Efficient bi-directional mapping between variables and names
2
3use std::collections::{hash_map::Entry, HashMap};
4use std::fmt;
5use std::iter::FusedIterator;
6use std::ops::Range;
7
8use crate::error::DuplicateVarName;
9use crate::VarNo;
10
11mod unowned {
12    use std::{fmt, ptr::NonNull};
13
14    /// A reference like `&'static T`, but (unsafely) convertible to a [`Box`].
15    /// One may also think of it as an unowned [`Box`], or an [`Rc`] without a
16    /// counter, i.e., external reasoning about the number of references and
17    /// manual dropping.
18    ///
19    /// In context of the `VarNameMap` implementation, we would like to store
20    /// the `str` content only once. We know that there are exactly two
21    /// references for each stored name, so there is no need for a reference
22    /// counter. However, we cannot use [`Box::leak`], and turn the resulting
23    /// `&mut T` into a `&T` without loosing the ability to drop the allocation
24    /// later on. This is because Rust's proposed aliasing model forbids turning
25    /// a `&T` into a `&mut T` again (which would happen in the [`Drop`]
26    /// implementation).
27    // Type invariant: An `Unowned<T>` is generally
28    // [convertible to a reference](std::ptr#pointer-to-reference-conversion).
29    #[derive(Eq)]
30    pub struct Unowned<T: ?Sized>(NonNull<T>);
31
32    // SAFETY: `Unowned<T>` behaves like `&T`, and `&T` is `Send` iff `T` is
33    // `Sync`
34    unsafe impl<T: ?Sized + Sync> Send for Unowned<T> {}
35    // SAFETY: `Unowned<T>` behaves like `&T`, and `&T` is `Sync` iff `T` is
36    // `Sync`
37    unsafe impl<T: ?Sized + Sync> Sync for Unowned<T> {}
38
39    impl<T: ?Sized> Clone for Unowned<T> {
40        #[inline(always)]
41        fn clone(&self) -> Self {
42            *self
43        }
44    }
45    impl<T: ?Sized> Copy for Unowned<T> {}
46
47    impl<T: ?Sized> std::ops::Deref for Unowned<T> {
48        type Target = T;
49
50        #[inline(always)]
51        fn deref(&self) -> &T {
52            // SAFETY: follows from the type invariant
53            unsafe { self.0.as_ref() }
54        }
55    }
56    impl<T: ?Sized> std::borrow::Borrow<T> for Unowned<T> {
57        #[inline(always)]
58        fn borrow(&self) -> &T {
59            self
60        }
61    }
62
63    impl<T: ?Sized + PartialEq> PartialEq for Unowned<T> {
64        #[inline(always)]
65        fn eq(&self, other: &Self) -> bool {
66            **self == **other
67        }
68    }
69    impl<T: ?Sized + std::hash::Hash> std::hash::Hash for Unowned<T> {
70        #[inline(always)]
71        fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
72            (**self).hash(state);
73        }
74    }
75
76    impl<T: ?Sized> From<Box<T>> for Unowned<T> {
77        #[inline(always)]
78        fn from(value: Box<T>) -> Self {
79            Self(NonNull::from(Box::leak(value)))
80        }
81    }
82    impl<T: ?Sized> From<&'static T> for Unowned<T> {
83        #[inline(always)]
84        fn from(value: &'static T) -> Self {
85            Self(NonNull::from(value))
86        }
87    }
88    impl<T: ?Sized> Unowned<T> {
89        /// Convert an `Unowned<T>` back to a `Box<T>`
90        ///
91        /// # Safety
92        ///
93        /// 1. `this` must have been created from a [`Box`]
94        /// 2. `this` must be the only existing reference
95        /// 3. If `T` is not `Send`, then `into_box` must be called from the
96        ///    thread that created the value of type `T`.
97        #[inline(always)]
98        pub unsafe fn into_box(this: Self) -> Box<T> {
99            // SAFETY: ensured by caller, see above
100            unsafe { Box::from_raw(this.0.as_ptr()) }
101        }
102    }
103
104    impl<T: ?Sized + fmt::Debug> fmt::Debug for Unowned<T> {
105        #[inline(always)]
106        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107            (**self).fmt(f)
108        }
109    }
110    impl<T: ?Sized + fmt::Display> fmt::Display for Unowned<T> {
111        #[inline(always)]
112        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113            (**self).fmt(f)
114        }
115    }
116}
117use unowned::Unowned;
118
119/// Bi-directional mapping between variables and names, intended for the
120/// implementation of a [`Manager`][crate::Manager].
121///
122/// The map requires variable names to be unique, but allows unnamed variables
123/// (represented by empty strings).
124// Type invariant: All `Unowned<str>` values in `index` are created from
125// `Box<str>`. Unless the map is borrowed, there each `Unowned<str>` reference
126// has an aliasing copy in `names`, and no other copies elsewhere. All
127// `Unowned<str>` values in `names` are either a copy of a value in `index` or
128// are the result of `Unowned::from("")`.
129#[derive(Clone)]
130pub struct VarNameMap {
131    names: Vec<Unowned<str>>,
132    index: HashMap<Unowned<str>, VarNo>,
133}
134
135impl Drop for VarNameMap {
136    fn drop(&mut self) {
137        self.names.clear();
138        for (s, _) in self.index.drain() {
139            // SAFETY:
140            // 1. By the type invariant, `s` has been created from a `Box<str>`
141            // 2. Since `self.names` is cleared now, `s` is the only reference
142            // 3. `str` is `Send`
143            drop(unsafe { Unowned::into_box(s) });
144        }
145    }
146}
147
148impl fmt::Debug for VarNameMap {
149    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
150        f.debug_map()
151            .entries(self.names.iter().enumerate().filter(|(_, s)| !s.is_empty()))
152            .finish()?;
153        write!(f, " (length: {})", self.len())
154    }
155}
156
157impl Default for VarNameMap {
158    #[inline(always)]
159    fn default() -> Self {
160        Self::new()
161    }
162}
163
164impl VarNameMap {
165    /// Create an empty map
166    #[inline]
167    pub fn new() -> Self {
168        Self {
169            names: Vec::new(),
170            index: HashMap::new(),
171        }
172    }
173
174    /// Get the number of variables (including unnamed ones)
175    #[inline(always)]
176    pub fn len(&self) -> VarNo {
177        self.names.len() as VarNo
178    }
179
180    /// `true` if and only if [`self.len()`][Self::len()] is 0
181    #[inline(always)]
182    pub fn is_empty(&self) -> bool {
183        self.names.is_empty()
184    }
185
186    /// Reserve space for `additional` entries
187    ///
188    /// Adding the next `additional` entries will not cause an allocation in the
189    /// underlying vector and map.
190    #[inline]
191    pub fn reserve(&mut self, additional: VarNo) {
192        let additional = additional.try_into().unwrap_or(usize::MAX);
193        self.names.reserve(additional);
194        self.index.reserve(additional);
195    }
196
197    /// Get the number of named variables
198    #[inline(always)]
199    pub fn named_count(&self) -> VarNo {
200        self.index.len() as VarNo
201    }
202
203    /// Add `additional` unnamed variables
204    ///
205    /// Panics if [`self.len()`][Self::len()] plus `additional` is greater than
206    /// to [`VarNo::MAX`].
207    #[inline]
208    pub fn add_unnamed(&mut self, additional: VarNo) {
209        let msg = "too many variables";
210        let new_len = self.len().checked_add(additional).expect(msg);
211        self.names.resize(new_len.try_into().expect(msg), "".into());
212    }
213
214    /// Add fresh variables with names from `names`
215    ///
216    /// Returns `Ok(range)` on success, where `range` contains the new variable
217    /// numbers. If `names` is too long (there would be more than `VarNo::MAX`
218    /// variables), the remaining elements are not consumed.
219    ///
220    /// If a variable name is not unique, this method returns a
221    /// [`DuplicateVarName`] error.
222    #[track_caller]
223    pub fn add_named<T: IntoIterator<Item = S>, S: Into<String>>(
224        &mut self,
225        names: T,
226    ) -> Result<Range<VarNo>, DuplicateVarName> {
227        let len_pre = self.names.len() as VarNo;
228        let it = names.into_iter();
229        let size_hint = it.size_hint().0;
230        self.index.reserve(size_hint);
231        self.names.reserve(size_hint);
232
233        for (name, v) in it.zip(len_pre..VarNo::MAX) {
234            let name: String = name.into();
235            if name.is_empty() {
236                self.names.push("".into());
237                continue;
238            }
239            let name = name.into_boxed_str().into();
240            match self.index.entry(name) {
241                Entry::Occupied(entry) => {
242                    let present_var = *entry.get();
243                    // SAFETY:
244                    // 1. `name` has been created from a `Box<str>` above
245                    // 2. `name` was not added to the map, so it is the only reference
246                    // 3. `str` is `Send`
247                    let name: Box<str> = unsafe { Unowned::into_box(name) };
248                    return Err(DuplicateVarName {
249                        name: name.into_string(),
250                        present_var,
251                        added_vars: len_pre..self.names.len() as VarNo,
252                    });
253                }
254                Entry::Vacant(entry) => entry.insert(v),
255            };
256            self.names.push(name);
257        }
258
259        Ok(len_pre..self.names.len() as VarNo)
260    }
261
262    /// Get the variable number for the given name if present, or add a new
263    /// variable
264    ///
265    /// Returns a pair `(var_no, found)`. If the provided variable name is
266    /// empty, this method will always create a fresh variable.
267    ///
268    /// If a variable with the given name is not present yet, and there is no
269    /// variable free in range `0..VarNo::MAX`, then the variable is not added.
270    /// Still, the return value is `VarNo::MAX`.
271    pub fn get_or_add(&mut self, name: impl Into<String>) -> (VarNo, bool) {
272        let name: String = name.into();
273        if name.is_empty() {
274            let n = self.names.len() as VarNo;
275            self.names.push("".into());
276            return (n, false);
277        }
278
279        let name = name.into_boxed_str().into();
280        match self.index.entry(name) {
281            Entry::Occupied(entry) => {
282                // SAFETY:
283                // 1. `name` has been created from a `Box<str>` above
284                // 2. `name` was not added to the map, so it is the only reference
285                // 3. `str` is `Send`
286                drop(unsafe { Unowned::into_box(name) });
287                (*entry.get(), true)
288            }
289            Entry::Vacant(entry) => {
290                let n = self.names.len() as VarNo;
291                if n == VarNo::MAX {
292                    // SAFETY: as above
293                    drop(unsafe { Unowned::into_box(name) });
294                    return (n, false);
295                }
296                entry.insert(n);
297                self.names.push(name);
298                (n, false)
299            }
300        }
301    }
302
303    /// Get the variable number for the given name, if present
304    ///
305    /// `self.name_to_var("")` always returns `None`.
306    #[inline]
307    pub fn name_to_var(&self, name: impl AsRef<str>) -> Option<VarNo> {
308        self.index.get(name.as_ref()).copied()
309    }
310
311    /// Get `var`'s name
312    ///
313    /// For unnamed vars, this will return the empty string.
314    ///
315    /// Panics if `var` is greater or equal to [`self.len()`][Self::len()].
316    #[inline(always)]
317    #[track_caller]
318    pub fn var_name(&self, var: VarNo) -> &str {
319        &self.names[var as usize]
320    }
321
322    /// Label `var` as `name`
323    ///
324    /// An empty name means that the variable will become unnamed, and cannot be
325    /// retrieved via [`Self::name_to_var()`] anymore.
326    ///
327    /// Returns `Err((name, other_var))` if `name` is not unique (and not `""`).
328    ///
329    /// Panics if `var` is greater or equal to the number of variables in this
330    /// map.
331    #[track_caller]
332    pub fn set_var_name(
333        &mut self,
334        var: VarNo,
335        name: impl Into<String>,
336    ) -> Result<(), DuplicateVarName> {
337        let name: String = name.into();
338        if name.is_empty() {
339            self.index
340                .remove(&std::mem::replace(&mut self.names[var as usize], "".into()));
341            return Ok(());
342        }
343        let name = name.into_boxed_str().into();
344        match self.index.entry(name) {
345            Entry::Occupied(entry) => {
346                // SAFETY:
347                // 1. `name` has been created from a `Box<str>` above
348                // 2. `name` was not added to the map, so it is the only reference
349                // 3. `str` is `Send`
350                let name = unsafe { Unowned::into_box(name) };
351                let present_var = *entry.get();
352                if present_var != var {
353                    let len = self.len() as VarNo;
354                    return Err(DuplicateVarName {
355                        name: name.into_string(),
356                        present_var,
357                        added_vars: len..len,
358                    });
359                }
360            }
361            Entry::Vacant(entry) => {
362                let prev = std::mem::replace(&mut self.names[var as usize], name);
363                entry.insert(var);
364                if !prev.is_empty() {
365                    // SAFETY:
366                    // 1. By the type invariant and since `prev` is not empty, it `prev` has been
367                    //    created from a `Box<str>`
368                    // 2. `prev` was removed from `self.names`, and its copy in `index` has been
369                    //    dropped since `entry.insert(var)`. By the type invariant, it follows that
370                    //    `prev` is the only reference.
371                    // 3. `str` is `Send`
372                    drop(unsafe { Unowned::into_box(prev) });
373                }
374            }
375        };
376        Ok(())
377    }
378
379    /// Iterate over the variable names (including all unnamed variables)
380    pub fn into_names_iter(mut self) -> IntoNamesIter {
381        self.index.clear();
382        IntoNamesIter(std::mem::take(&mut self.names).into_iter())
383    }
384}
385
386/// Owning iterator over variable names
387// Type invariant: All non-empty `Unowned<str>` values are created from
388// `Box<str>`. There are no aliasing copies of these values.
389#[derive(Debug)]
390pub struct IntoNamesIter(std::vec::IntoIter<Unowned<str>>);
391
392/// Convert an [`Unowned<str>`] back to a [`String`]
393///
394/// # Safety
395///
396/// If `s` is non-empty, then the SAFETY conditions for [`Unowned::into_box()`]
397/// must be fulfilled.
398#[inline(always)]
399unsafe fn unowned_to_string(s: Unowned<str>) -> String {
400    if s.is_empty() {
401        String::new()
402    } else {
403        // SAFETY: upheld by caller
404        unsafe { Unowned::into_box(s) }.into_string()
405    }
406}
407
408impl Drop for IntoNamesIter {
409    fn drop(&mut self) {
410        for &name in self.0.as_slice() {
411            // SAFETY follows from type invariant
412            drop(unsafe { unowned_to_string(name) });
413        }
414    }
415}
416
417impl Iterator for IntoNamesIter {
418    type Item = String;
419
420    #[inline]
421    fn next(&mut self) -> Option<Self::Item> {
422        let name = self.0.next()?;
423        // SAFETY follows from type invariant
424        Some(unsafe { unowned_to_string(name) })
425    }
426
427    #[inline]
428    fn nth(&mut self, n: usize) -> Option<Self::Item> {
429        let name = self.0.nth(n)?;
430        // SAFETY follows from type invariant
431        Some(unsafe { unowned_to_string(name) })
432    }
433}
434
435impl ExactSizeIterator for IntoNamesIter {
436    #[inline]
437    fn len(&self) -> usize {
438        self.0.len()
439    }
440}
441impl FusedIterator for IntoNamesIter where std::vec::IntoIter<Unowned<str>>: FusedIterator {}
442
443impl DoubleEndedIterator for IntoNamesIter {
444    #[inline]
445    fn next_back(&mut self) -> Option<Self::Item> {
446        let name = self.0.next_back()?;
447        // SAFETY follows from type invariant
448        Some(unsafe { unowned_to_string(name) })
449    }
450
451    #[inline]
452    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
453        let name = self.0.nth_back(n)?;
454        // SAFETY follows from type invariant
455        Some(unsafe { unowned_to_string(name) })
456    }
457}
458
459// TODO: replace String by Box<str>?