Skip to main content

dag_types/
id.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8//! # id
9//!
10//! Defines types around [`Id`].
11
12use std::fmt;
13use std::io;
14use std::ops;
15
16#[cfg(feature = "serialize-abomonation")]
17use abomonation_derive::Abomonation;
18pub use minibytes::Bytes;
19use serde::Deserialize;
20use serde::Serialize;
21
22/// An integer [`Id`] representing a node in the graph.
23/// [`Id`]s are topologically sorted.
24#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
25#[derive(Serialize, Deserialize)]
26#[cfg_attr(feature = "serialize-abomonation", derive(Abomonation))]
27pub struct Id(pub u64);
28
29/// Name of a vertex in the graph.
30#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)]
31#[serde(transparent)]
32pub struct Vertex(pub Bytes);
33
34impl AsRef<[u8]> for Vertex {
35    fn as_ref(&self) -> &[u8] {
36        &self.0
37    }
38}
39
40impl Vertex {
41    pub fn to_hex(&self) -> String {
42        const HEX_CHARS: &[u8] = b"0123456789abcdef";
43        let mut v = Vec::with_capacity(self.0.len() * 2);
44        for &byte in self.as_ref() {
45            v.push(HEX_CHARS[(byte >> 4) as usize]);
46            v.push(HEX_CHARS[(byte & 0xf) as usize]);
47        }
48        unsafe { String::from_utf8_unchecked(v) }
49    }
50
51    /// Convert from hex.
52    ///
53    /// If `len(hex)` is an odd number, hex + '0' will be used.
54    pub fn from_hex(hex: &[u8]) -> io::Result<Self> {
55        let mut bytes = vec![0u8; (hex.len() + 1) / 2];
56        for (i, byte) in hex.iter().enumerate() {
57            let value = match byte {
58                b'0'..=b'9' => byte - b'0',
59                b'a'..=b'f' => byte - b'a' + 10,
60                b'A'..=b'F' => byte - b'A' + 10,
61                _ => {
62                    return Err(io::Error::new(
63                        io::ErrorKind::InvalidInput,
64                        format!("{:?} is not a hex character", *byte as char),
65                    ));
66                }
67            };
68            if i & 1 == 0 {
69                bytes[i / 2] |= value << 4;
70            } else {
71                bytes[i / 2] |= value;
72            }
73        }
74        Ok(Vertex(Bytes::from(bytes)))
75    }
76
77    pub fn copy_from(value: &[u8]) -> Self {
78        Self(value.to_vec().into())
79    }
80}
81
82impl<T> From<T> for Vertex
83where
84    Bytes: From<T>,
85{
86    fn from(value: T) -> Self {
87        Self(Bytes::from(value))
88    }
89}
90
91impl fmt::Debug for Vertex {
92    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
93        if self.0.len() >= 4 || !looks_like_ascii_identifier(self.as_ref()) {
94            // Use hex format for long names (ex. binary commit hashes).
95            let hex = self.to_hex();
96            // Truncate to specified width (ex. '{:#.12}').
97            if let Some(width) = f.precision() {
98                let truncated = hex.get(..width).unwrap_or(&hex);
99                f.write_str(truncated)
100            } else {
101                f.write_str(&hex)
102            }
103        } else {
104            // Do not use hex if it looks like an ASCII identifier.
105            match std::str::from_utf8(self.as_ref()) {
106                Ok(s) => write!(f, "{}", s),
107                Err(_) => write!(f, "{}", self.to_hex()),
108            }
109        }
110    }
111}
112
113fn looks_like_ascii_identifier(bytes: &[u8]) -> bool {
114    let mut iter = bytes.iter().copied();
115    if !(iter.next().unwrap_or(b'\0') as char).is_ascii_alphabetic() {
116        return false;
117    }
118    iter.all(|b| b.is_ascii_alphanumeric())
119}
120
121/// An integer that separates distinct groups of [`Id`]s.
122///
123/// This can be seen as a way to pre-allocate consecutive integers
124/// for one group to make segments less fragmented.
125///
126/// `(Group, Id)` are also topologically sorted.
127#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
128#[derive(Serialize, Deserialize)]
129pub struct Group(pub usize);
130
131impl Group {
132    /// The "master" group. usually `ancestors(master)`.
133    /// - Expected to have most of the commits in a repo.
134    /// - Ideally free from fragmentation. In other words,
135    ///   `ancestors(master)` can be represented in a single Span.
136    /// - Ideally has limited heads. Does not scale with too many heads.
137    /// - Vertexs (commit hashes) might be lazy.
138    pub const MASTER: Self = Self(0);
139
140    /// The "non-master" group.
141    /// - Concrete vertexes not in the "master" group. For example, public
142    ///   release branches, local feature branches.
143    /// - Expected to have multiple heads. In other words, is fragmented.
144    /// - Expected to be sparse referred. For example, the "visible heads"
145    ///   will refer to a bounded subset in this group.
146    /// - Expected to be non-lazy. Code paths assume Vertexs
147    ///   (commit hashes) are known in this group.
148    pub const NON_MASTER: Self = Self(1);
149
150    /// The "virtual" group.
151    /// - Typically, "null" and "wdir()".
152    /// - Should not be written to disk.
153    /// - Not lazy.
154    pub const VIRTUAL: Self = Self(2);
155
156    /// Groups designed to be persisted to disk.
157    pub const PERSIST: [Self; 2] = [Self::MASTER, Self::NON_MASTER];
158
159    pub const ALL: [Self; 3] = [Self::MASTER, Self::NON_MASTER, Self::VIRTUAL];
160    pub const MAX: Self = Self::ALL[Self::COUNT - 1];
161
162    pub const COUNT: usize = Self::ALL.len();
163
164    // 1 byte for Group so it's easier to remove everything in a group.
165    pub const BITS: u32 = 8;
166    pub const BYTES: usize = 1;
167
168    /// The first [`Id`] in this group.
169    pub const fn min_id(self) -> Id {
170        Id((self.0 as u64) << (64 - Self::BITS))
171    }
172
173    /// The maximum [`Id`] in this group.
174    pub const fn max_id(self) -> Id {
175        Id(self.min_id().0 + ((1u64 << (64 - Self::BITS)) - 1))
176    }
177
178    /// Convert to array.
179    pub const fn bytes(self) -> [u8; 1] {
180        [self.0 as u8]
181    }
182
183    /// Convert to hex array.
184    pub fn hex_bytes(self) -> [u8; 2] {
185        if self.0 < 10 {
186            [b'0', b'0' + (self.0 as u8)]
187        } else {
188            unreachable!()
189        }
190    }
191
192    /// Test if the `Group` is valid.
193    pub fn is_valid(self) -> bool {
194        self.0 <= Self::MAX.0
195    }
196}
197
198impl Id {
199    /// The [`Group`] of an Id.
200    pub fn group(self) -> Group {
201        let group = (self.0 >> (64 - Group::BITS)) as usize;
202        Group(group)
203    }
204
205    /// Test if the `Id` is valid.
206    pub fn is_valid(self) -> bool {
207        self.group().is_valid()
208    }
209
210    /// Similar to `self..=other`.
211    pub fn to(self, other: Id) -> IdIter {
212        IdIter {
213            current: self,
214            end: other,
215        }
216    }
217
218    /// Convert to a byte array. Useful for indexedlog range query.
219    pub fn to_bytearray(self) -> [u8; 8] {
220        // The field can be used for index range query. So it has to be BE.
221        self.0.to_be().to_ne_bytes()
222    }
223
224    /// Similar to `to_bytearray`, but insert a `prefix` at the head.
225    /// Useful for segment queries where `level` is the `prefix`.
226    pub fn to_prefixed_bytearray(self, prefix: u8) -> [u8; 9] {
227        let a = self.to_bytearray();
228        [prefix, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]]
229    }
230
231    /// Test if this `Id` is in the VIRTUAL group.
232    pub fn is_virtual(self) -> bool {
233        self.group() == Group::VIRTUAL
234    }
235
236    pub const MAX: Self = Group::ALL[Group::COUNT - 1].max_id();
237    pub const MIN: Self = Group::ALL[0].min_id();
238}
239
240impl fmt::Display for Id {
241    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
242        let group = self.group();
243        match group {
244            Group::NON_MASTER => write!(f, "N")?,
245            Group::VIRTUAL => write!(f, "V")?,
246            _ => {}
247        }
248        write!(f, "{}", self.0 - group.min_id().0)
249    }
250}
251
252impl fmt::Debug for Id {
253    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254        write!(f, "{}", self)
255    }
256}
257
258impl fmt::Display for Group {
259    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
260        match *self {
261            Group::MASTER => write!(f, "Group Master"),
262            Group::NON_MASTER => write!(f, "Group Non-Master"),
263            Group::VIRTUAL => write!(f, "Group Virtual"),
264            _ => write!(f, "Group {}", self.0),
265        }
266    }
267}
268
269impl ops::Add<u64> for Id {
270    type Output = Id;
271
272    fn add(self, other: u64) -> Self {
273        Self(self.0 + other)
274    }
275}
276
277impl ops::Sub<u64> for Id {
278    type Output = Id;
279
280    fn sub(self, other: u64) -> Self {
281        Self(self.0 - other)
282    }
283}
284
285impl ops::Add<u8> for Group {
286    type Output = Group;
287
288    fn add(self, other: u8) -> Self {
289        Self(self.0 + other as usize)
290    }
291}
292
293impl ops::Sub<u8> for Group {
294    type Output = Group;
295
296    fn sub(self, other: u8) -> Self {
297        Self(self.0 - other as usize)
298    }
299}
300
301// Consider replacing this with iter::Step once it's stable.
302pub struct IdIter {
303    current: Id,
304    end: Id,
305}
306
307impl Iterator for IdIter {
308    type Item = Id;
309
310    fn next(&mut self) -> Option<Id> {
311        if self.current > self.end {
312            None
313        } else {
314            let result = self.current;
315            self.current = self.current + 1;
316            Some(result)
317        }
318    }
319}
320
321#[cfg(any(test, feature = "for-tests"))]
322use quickcheck::Arbitrary;
323#[cfg(any(test, feature = "for-tests"))]
324use quickcheck::Gen;
325
326#[cfg(any(test, feature = "for-tests"))]
327impl Arbitrary for Id {
328    fn arbitrary(g: &mut Gen) -> Self {
329        let group = Group((u32::arbitrary(g) & 1) as usize);
330        group.min_id() + u64::arbitrary(g) % (group.max_id().0 - group.min_id().0)
331    }
332}
333
334// For convenience.
335impl std::cmp::PartialEq<u64> for Id {
336    fn eq(&self, other: &u64) -> bool {
337        self.0 == *other
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use quickcheck::quickcheck;
344
345    use super::*;
346
347    #[test]
348    fn test_vertex_from_hex_odd() {
349        let vertex = Vertex::from_hex(b"a").unwrap();
350        let vertex2 = Vertex::from_hex(b"a0").unwrap();
351        assert_eq!(vertex, vertex2);
352        assert_eq!(vertex.to_hex(), "a0");
353    }
354
355    quickcheck! {
356        fn test_vertex_hex_roundtrip(slice: Vec<u8>) -> bool {
357            let vertex = Vertex::from(slice);
358            let hex = vertex.to_hex();
359            let vertex2 = Vertex::from_hex(hex.as_bytes()).unwrap();
360            vertex2 == vertex
361        }
362    }
363}