Skip to main content

rustpython_ruff_python_ast/
node_index.rs

1use std::num::NonZeroU32;
2use std::sync::atomic::{AtomicU32, Ordering};
3
4/// An AST node that has an index.
5pub trait HasNodeIndex {
6    /// Returns the [`AtomicNodeIndex`] for this node.
7    fn node_index(&self) -> &AtomicNodeIndex;
8}
9
10impl<T> HasNodeIndex for &T
11where
12    T: HasNodeIndex,
13{
14    fn node_index(&self) -> &AtomicNodeIndex {
15        T::node_index(*self)
16    }
17}
18
19/// A unique index for a node within an AST.
20///
21/// Our encoding of 32-bit AST node indices is as follows:
22///
23/// * `u32::MAX`     (1111...1) is reserved as a forbidden value (mapped to 0 for `NonZero`)
24/// * `u32::MAX - 1` (1111...0) is reserved for `NodeIndex::NONE`
25/// * The top two bits encode the sub-AST level:
26///   * 00 is top-level AST
27///   * 01 is sub-AST (string annotation)
28///   * 10 is sub-sub-AST (string annotation in string annotation)
29///   * 11 is forbidden (well, it only appears in the above reserved values)
30/// * The remaining 30 bits are the real (sub)-AST node index
31///
32/// To get the first sub-index of a node's sub-AST we:
33///
34/// * increment the sub-AST level in the high-bits
35/// * at level 1, multiply the real index by 256
36/// * at level 2, multiply the real index by 8
37///
38/// The multiplication gives each node a reserved space of 256 nodes for its sub-AST
39/// to work with ("should be enough for anybody"), and 8 nodes for a sub-sub-AST
40/// (enough for an identifier and maybe some simple unions).
41///
42/// Here are some implications:
43///
44/// * We have 2^30 top-level AST nodes (1 billion)
45/// * To have a string annotation, the parent node needs to be multiplied by 256 without
46///   overflowing 30 bits, so string annotations cannot be used after 2^22 nodes (4 million),
47///   which would be like, a million lines of code.
48/// * To have a sub-string annotation, the top-level node needs to be multiplied
49///   by 256 * 8, so sub-string annotations cannot be used after 2^19 nodes (500 thousand),
50///   or about 100k lines of code.
51///
52/// This feels like a pretty reasonable compromise that will work well in practice,
53/// although it creates some very wonky boundary conditions that will be very unpleasant
54/// if someone runs into them.
55///
56/// That said, string annotations are in many regards "legacy" and so new code ideally
57/// doesn't have to use them, and there's never a real reason to use sub-annotation
58/// let-alone a sub-sub-annotation.
59#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
60#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
61pub struct NodeIndex(NonZeroU32);
62
63impl NodeIndex {
64    /// A placeholder `NodeIndex`.
65    pub const NONE: NodeIndex = NodeIndex(NonZeroU32::new(NodeIndex::_NONE).unwrap());
66
67    // Note that the index `u32::MAX` is reserved for the `NonZeroU32` niche, and
68    // this placeholder also reserves the second highest index.
69    const _NONE: u32 = u32::MAX - 1;
70
71    /// Returns the index as a `u32`. or `None` for `NodeIndex::NONE`.
72    pub fn as_u32(self) -> Option<u32> {
73        if self == NodeIndex::NONE {
74            None
75        } else {
76            Some(self.0.get() - 1)
77        }
78    }
79}
80
81#[derive(Debug, Copy, Clone)]
82pub enum NodeIndexError {
83    NoParent,
84    TooNested,
85    ExhaustedSubIndices,
86    ExhaustedSubSubIndices,
87    OverflowedIndices,
88    OverflowedSubIndices,
89}
90
91const MAX_LEVEL: u32 = 2;
92const LEVEL_BITS: u32 = 32 - MAX_LEVEL.leading_zeros();
93const LEVEL_SHIFT: u32 = 32 - LEVEL_BITS;
94const LEVEL_MASK: u32 = ((LEVEL_BITS << 1) - 1) << LEVEL_SHIFT;
95const SUB_NODES: u32 = 256;
96const SUB_SUB_NODES: u32 = 8;
97pub const MAX_REAL_INDEX: u32 = (1 << LEVEL_SHIFT) - 1;
98
99/// sub-AST level is stored in the top two bits
100pub fn sub_ast_level(index: u32) -> u32 {
101    (index & LEVEL_MASK) >> LEVEL_SHIFT
102}
103
104/// Get the first and last index of the sub-AST of the input
105pub fn sub_indices(index: u32) -> Result<(u32, u32), NodeIndexError> {
106    let level = sub_ast_level(index);
107    if level >= MAX_LEVEL {
108        return Err(NodeIndexError::TooNested);
109    }
110    let next_level = (level + 1) << LEVEL_SHIFT;
111    let without_level = index & !LEVEL_MASK;
112    let (nodes_in_level, error_kind) = if level == 0 {
113        (SUB_NODES, NodeIndexError::OverflowedIndices)
114    } else if level == 1 {
115        (SUB_SUB_NODES, NodeIndexError::OverflowedSubIndices)
116    } else {
117        unreachable!(
118            "Someone made a mistake updating the encoding of node indices: {index:08X} had level {level}"
119        );
120    };
121
122    // If this overflows the file has hundreds of thousands of lines of code,
123    // but that *can* happen (we just can't support string annotations that deep)
124    let sub_index_without_level = without_level
125        .checked_mul(nodes_in_level)
126        .ok_or(error_kind)?;
127    if sub_index_without_level > MAX_REAL_INDEX {
128        return Err(error_kind);
129    }
130
131    let first_index = sub_index_without_level | next_level;
132    // Can't overflow by construction
133    let last_index = first_index + nodes_in_level - 1;
134    Ok((first_index, last_index))
135}
136
137impl From<u32> for NodeIndex {
138    fn from(value: u32) -> Self {
139        match NonZeroU32::new(value + 1).map(NodeIndex) {
140            None | Some(NodeIndex::NONE) => panic!("exceeded maximum `NodeIndex`"),
141            Some(index) => index,
142        }
143    }
144}
145
146impl std::fmt::Debug for NodeIndex {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        if *self == Self::NONE {
149            f.debug_tuple("NodeIndex(None)").finish()
150        } else {
151            f.debug_tuple("NodeIndex").field(&self.0).finish()
152        }
153    }
154}
155
156/// A unique index for a node within an AST.
157///
158/// This type is interiorly mutable to allow assigning node indices
159/// on-demand after parsing.
160#[cfg_attr(feature = "get-size", derive(get_size2::GetSize))]
161pub struct AtomicNodeIndex(AtomicU32);
162
163#[allow(clippy::declare_interior_mutable_const)]
164impl AtomicNodeIndex {
165    /// A placeholder `AtomicNodeIndex`.
166    pub const NONE: AtomicNodeIndex = AtomicNodeIndex(AtomicU32::new(NodeIndex::_NONE));
167
168    /// Load the current value of the `AtomicNodeIndex`.
169    pub fn load(&self) -> NodeIndex {
170        let index = NonZeroU32::new(self.0.load(Ordering::Relaxed))
171            .expect("value stored was a valid `NodeIndex`");
172
173        NodeIndex(index)
174    }
175
176    /// Set the value of the `AtomicNodeIndex`.
177    pub fn set(&self, index: NodeIndex) {
178        self.0.store(index.0.get(), Ordering::Relaxed);
179    }
180}
181
182impl Default for AtomicNodeIndex {
183    fn default() -> Self {
184        Self::NONE
185    }
186}
187
188impl std::fmt::Debug for AtomicNodeIndex {
189    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190        std::fmt::Debug::fmt(&self.load(), f)
191    }
192}
193
194impl std::hash::Hash for AtomicNodeIndex {
195    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
196        self.load().hash(state);
197    }
198}
199
200impl PartialOrd for AtomicNodeIndex {
201    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
202        Some(self.cmp(other))
203    }
204}
205
206impl Ord for AtomicNodeIndex {
207    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
208        self.load().cmp(&other.load())
209    }
210}
211
212impl Eq for AtomicNodeIndex {}
213
214impl PartialEq for AtomicNodeIndex {
215    fn eq(&self, other: &Self) -> bool {
216        self.load() == other.load()
217    }
218}
219
220impl Clone for AtomicNodeIndex {
221    fn clone(&self) -> Self {
222        Self(AtomicU32::from(self.0.load(Ordering::Relaxed)))
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::{AtomicNodeIndex, NodeIndex};
229
230    #[test]
231    fn test_node_index() {
232        let index = AtomicNodeIndex::NONE;
233
234        assert_eq!(index.load(), NodeIndex::NONE);
235        assert_eq!(format!("{index:?}"), "NodeIndex(None)");
236
237        index.set(NodeIndex::from(1));
238        assert_eq!(index.load(), NodeIndex::from(1));
239        assert_eq!(index.load().as_u32(), Some(1));
240
241        let index = NodeIndex::from(0);
242        assert_eq!(index.as_u32(), Some(0));
243
244        let index = NodeIndex::NONE;
245        assert_eq!(index.as_u32(), None);
246    }
247}