dag/
segment.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8//! # segment
9//!
10//! Segmented DAG. See [`IdDag`] for the main structure.
11
12use std::fmt;
13use std::fmt::Debug;
14use std::fmt::Formatter;
15use std::io::Cursor;
16
17use bitflags::bitflags;
18use byteorder::BigEndian;
19use byteorder::ByteOrder;
20use byteorder::ReadBytesExt;
21use byteorder::WriteBytesExt;
22pub use dag_types::segment::FlatSegment;
23pub use dag_types::segment::PreparedFlatSegments;
24use minibytes::Bytes;
25use serde::Deserialize;
26use serde::Serialize;
27use vlqencoding::VLQDecode;
28use vlqencoding::VLQDecodeAt;
29use vlqencoding::VLQEncode;
30
31use crate::errors::bug;
32use crate::errors::programming;
33use crate::id::Id;
34use crate::IdSpan;
35use crate::Level;
36use crate::Result;
37
38/// [`Segment`] represents a range of [`Id`]s in an [`IdDag`] graph.
39/// It provides methods to access properties of the segments, including the range itself,
40/// parents, and level information.
41///
42/// This is the main struct used by the `dag` crate. It is mostly read from a store.
43/// Once read, it's immutable.
44#[derive(Clone, Eq, Serialize, Deserialize)]
45pub struct Segment(pub(crate) Bytes);
46
47/// In memory representation of a segment. Mostly useful for external use-cases.
48///
49/// Unlike `Segment`, it is not necessarily read from a store and can be
50/// constructed on the fly.
51#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
52pub struct IdSegment {
53    pub low: Id,
54    pub high: Id,
55    pub parents: Vec<Id>,
56    pub has_root: bool,
57    pub level: Level,
58}
59
60// Serialization format for Segment:
61//
62// ```plain,ignore
63// SEGMENT := FLAG (1B) + LEVEL (1B) + HIGH (8B) + vlq(HIGH-LOW) + vlq(PARENT_COUNT) + vlq(VLQ, PARENTS)
64// ```
65//
66// The reason HIGH is not stored in VLQ is because it's used by range lookup,
67// and vlq `[u8]` order does not match integer order.
68//
69// The reason HIGH-LOW is used instead of LOW is because it is more compact
70// for the worse case (i.e. each flat segment has length 1). Each segment has
71// only 1 byte overhead.
72
73bitflags! {
74    pub struct SegmentFlags: u8 {
75        /// This segment has roots (i.e. there is at least one id in
76        /// `low..=high`, `parents(id)` is empty).
77        ///
78        /// This flag must be set correct for correctness.
79        const HAS_ROOT = 0b1;
80
81        /// This segment is the only head in `0..=high`.
82        /// In other words, `heads(0..=high)` is `[high]`.
83        ///
84        /// This flag should not be set if the segment is either a high-level
85        /// segment, or in a non-master group.
86        ///
87        /// This flag is an optimization. Not setting it might hurt performance
88        /// but not correctness.
89        const ONLY_HEAD = 0b10;
90    }
91}
92
93impl Segment {
94    pub(crate) const OFFSET_FLAGS: usize = 0;
95    pub(crate) const OFFSET_LEVEL: usize = Self::OFFSET_FLAGS + 1;
96    pub(crate) const OFFSET_HIGH: usize = Self::OFFSET_LEVEL + 1;
97    pub(crate) const OFFSET_DELTA: usize = Self::OFFSET_HIGH + 8;
98
99    pub(crate) fn flags(&self) -> Result<SegmentFlags> {
100        match self.0.get(Self::OFFSET_FLAGS) {
101            Some(bits) => Ok(SegmentFlags::from_bits_truncate(*bits)),
102            None => bug("cannot read Segment::flags"),
103        }
104    }
105
106    pub(crate) fn has_root(&self) -> Result<bool> {
107        Ok(self.flags()?.contains(SegmentFlags::HAS_ROOT))
108    }
109
110    pub(crate) fn only_head(&self) -> Result<bool> {
111        Ok(self.flags()?.contains(SegmentFlags::ONLY_HEAD))
112    }
113
114    pub(crate) fn high(&self) -> Result<Id> {
115        match self.0.get(Self::OFFSET_HIGH..Self::OFFSET_HIGH + 8) {
116            Some(slice) => Ok(Id(BigEndian::read_u64(slice))),
117            None => bug("cannot read Segment::high"),
118        }
119    }
120
121    // high - low
122    fn delta(&self) -> Result<u64> {
123        let (len, _) = self.0.read_vlq_at(Self::OFFSET_DELTA)?;
124        Ok(len)
125    }
126
127    pub(crate) fn span(&self) -> Result<IdSpan> {
128        let high = self.high()?;
129        let delta = self.delta()?;
130        let low = high - delta;
131        Ok((low..=high).into())
132    }
133
134    pub(crate) fn head(&self) -> Result<Id> {
135        self.high()
136    }
137
138    pub(crate) fn low(&self) -> Result<Id> {
139        Ok(self.span()?.low)
140    }
141
142    pub(crate) fn level(&self) -> Result<Level> {
143        match self.0.get(Self::OFFSET_LEVEL) {
144            Some(level) => Ok(*level),
145            None => bug("cannot read Segment::level"),
146        }
147    }
148
149    pub(crate) fn parent_count(&self) -> Result<usize> {
150        let mut cur = Cursor::new(&self.0);
151        cur.set_position(Self::OFFSET_DELTA as u64);
152        let _: u64 = cur.read_vlq()?;
153        let parent_count: usize = cur.read_vlq()?;
154        Ok(parent_count)
155    }
156
157    pub(crate) fn parents(&self) -> Result<Vec<Id>> {
158        let mut cur = Cursor::new(&self.0);
159        cur.set_position(Self::OFFSET_DELTA as u64);
160        let _: u64 = cur.read_vlq()?;
161        let parent_count: usize = cur.read_vlq()?;
162        let mut result = Vec::with_capacity(parent_count);
163        for _ in 0..parent_count {
164            result.push(Id(cur.read_vlq()?));
165        }
166        Ok(result)
167    }
168
169    /// Duplicate the segment with `high` set to a new value.
170    pub(crate) fn with_high(&self, high: Id) -> Result<Self> {
171        let span = self.span()?;
172        if high.group() != span.high.group() || high < span.low {
173            return programming(format!(
174                "with_high got invalid input (segment: {:?} new_high: {:?})",
175                self, high,
176            ));
177        }
178        let parents = self.parents()?;
179        let seg = Self::new(self.flags()?, self.level()?, span.low, high, &parents);
180        Ok(seg)
181    }
182
183    pub(crate) fn new(
184        flags: SegmentFlags,
185        level: Level,
186        low: Id,
187        high: Id,
188        parents: &[Id],
189    ) -> Self {
190        debug_assert!(high >= low);
191        debug_assert!(parents.iter().all(|&p| p < low));
192        let mut buf = Vec::with_capacity(1 + 8 + (parents.len() + 2) * 4);
193        buf.write_u8(flags.bits()).unwrap();
194        buf.write_u8(level).unwrap();
195        buf.write_u64::<BigEndian>(high.0).unwrap();
196        buf.write_vlq(high.0 - low.0).unwrap();
197        buf.write_vlq(parents.len()).unwrap();
198        for parent in parents {
199            buf.write_vlq(parent.0).unwrap();
200        }
201        Self(buf.into())
202    }
203}
204
205impl PartialEq for Segment {
206    fn eq(&self, other: &Self) -> bool {
207        self.0[..] == other.0[..]
208    }
209}
210
211impl Debug for Segment {
212    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
213        let span = self.span().unwrap();
214        if self.has_root().unwrap() {
215            write!(f, "R")?;
216        }
217        if self.only_head().unwrap() {
218            write!(f, "H")?;
219        }
220        let parents = self.parents().unwrap();
221        write!(f, "{}-{}{:?}", span.low, span.high, parents,)?;
222        if span.low > span.high {
223            write!(f, " (Invalid Span!!)")?;
224        }
225        if parents.iter().any(|&p| p >= span.low) {
226            write!(f, " (Invalid Parent!!)")?;
227        }
228        Ok(())
229    }
230}
231
232impl Debug for IdSegment {
233    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
234        write!(
235            f,
236            "L{} {}..={} {:?}{}",
237            self.level,
238            self.low,
239            self.high,
240            &self.parents,
241            if self.has_root { "R" } else { "" }
242        )
243    }
244}
245
246/// Describe bytes of a Segment.
247/// This is only for troubleshooting purpose.
248pub fn describe_segment_bytes(data: &[u8]) -> String {
249    let mut message = String::new();
250    let mut cur = Cursor::new(data);
251    let mut start = 0;
252    let mut explain = |cur: &Cursor<_>, m: String| {
253        let end = cur.position() as usize;
254        message += &format!("# {}: {}\n", hex(&data[start..end]), m);
255        start = end;
256    };
257    if let Ok(flags) = cur.read_u8() {
258        let flags = SegmentFlags::from_bits_truncate(flags);
259        explain(&cur, format!("Flags = {:?}", flags));
260    }
261    if let Ok(lv) = cur.read_u8() {
262        explain(&cur, format!("Level = {:?}", lv));
263    }
264    if let Ok(head) = cur.read_u64::<BigEndian>() {
265        explain(&cur, format!("High = {}", Id(head)));
266        if let Ok(delta) = VLQDecode::<u64>::read_vlq(&mut cur) {
267            let low = head - delta;
268            explain(&cur, format!("Delta = {} (Low = {})", delta, Id(low)));
269        }
270    }
271    if let Ok(count) = VLQDecode::<usize>::read_vlq(&mut cur) {
272        explain(&cur, format!("Parent count = {}", count));
273        for i in 0..count {
274            if let Ok(p) = VLQDecode::<u64>::read_vlq(&mut cur) {
275                explain(&cur, format!("Parents[{}] = {}", i, Id(p)));
276            }
277        }
278    }
279    message
280}
281
282pub(crate) fn hex(bytes: &[u8]) -> String {
283    bytes
284        .iter()
285        .cloned()
286        .map(|b| format!("{:02x}", b))
287        .collect::<Vec<String>>()
288        .join(" ")
289}
290
291#[cfg(test)]
292mod tests {
293    use quickcheck::quickcheck;
294
295    use super::*;
296
297    #[test]
298    fn test_segment_roundtrip() {
299        fn prop(has_root: bool, level: Level, range1: u64, range2: u64, parents: Vec<u64>) -> bool {
300            let flags = if has_root {
301                SegmentFlags::HAS_ROOT
302            } else {
303                SegmentFlags::empty()
304            };
305            let low = u64::min(range1, range2);
306            let high = u64::max(range1, range2);
307            let parents: Vec<Id> = parents.into_iter().filter(|&p| p < low).map(Id).collect();
308            let low = Id(low);
309            let high = Id(high);
310            let node = Segment::new(flags, level, low, high, &parents);
311            node.flags().unwrap() == flags
312                && node.level().unwrap() == level
313                && node.span().unwrap() == (low..=high).into()
314                && node.parents().unwrap() == parents
315        }
316        quickcheck(prop as fn(bool, Level, u64, u64, Vec<u64>) -> bool);
317    }
318
319    #[test]
320    fn test_describe() {
321        let seg = Segment::new(
322            SegmentFlags::ONLY_HEAD,
323            3,
324            Id(101),
325            Id(202),
326            &[Id(90), Id(80)],
327        );
328        assert_eq!(
329            describe_segment_bytes(&seg.0),
330            r#"# 02: Flags = ONLY_HEAD
331# 03: Level = 3
332# 00 00 00 00 00 00 00 ca: High = 202
333# 65: Delta = 101 (Low = 101)
334# 02: Parent count = 2
335# 5a: Parents[0] = 90
336# 50: Parents[1] = 80
337"#
338        );
339    }
340
341    #[test]
342    fn test_invalid_fmt() {
343        let bytes = Bytes::from_static(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 1, 10]);
344        let segment = Segment(bytes);
345        assert_eq!(format!("{:?}", segment), "10-10[10] (Invalid Parent!!)");
346    }
347}