Skip to main content

dag/
segment.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8//! # segment
9//!
10//! Segmented DAG. See [`IdDag`] for the main structure.
11
12use std::fmt;
13use std::fmt::Debug;
14use std::fmt::Formatter;
15use std::io::Cursor;
16
17use bitflags::bitflags;
18use byteorder::BigEndian;
19use byteorder::ByteOrder;
20use byteorder::ReadBytesExt;
21use byteorder::WriteBytesExt;
22pub use dag_types::segment::FlatSegment;
23pub use dag_types::segment::PreparedFlatSegments;
24use minibytes::Bytes;
25use serde::Deserialize;
26use serde::Serialize;
27use vlqencoding::VLQDecode;
28use vlqencoding::VLQDecodeAt;
29use vlqencoding::VLQEncode;
30
31use crate::errors::bug;
32use crate::errors::programming;
33use crate::id::Id;
34use crate::IdSpan;
35use crate::Level;
36use crate::Result;
37
38/// [`Segment`] represents a range of [`Id`]s in an [`IdDag`] graph.
39/// It provides methods to access properties of the segments, including the range itself,
40/// parents, and level information.
41///
42/// This is the main struct used by the `dag` crate. It is mostly read from a store.
43/// Once read, it's immutable.
44#[derive(Clone, Eq, Serialize, Deserialize)]
45pub struct Segment(pub(crate) Bytes);
46
47/// In memory representation of a segment. Mostly useful for external use-cases.
48///
49/// Unlike `Segment`, it is not necessarily read from a store and can be
50/// constructed on the fly.
51#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
52pub struct IdSegment {
53    pub low: Id,
54    pub high: Id,
55    pub parents: Vec<Id>,
56    pub has_root: bool,
57    pub level: Level,
58}
59
60// Serialization format for Segment:
61//
62// ```plain,ignore
63// SEGMENT := FLAG (1B) + LEVEL (1B) + HIGH (8B) + vlq(HIGH-LOW) + vlq(PARENT_COUNT) + vlq(VLQ, PARENTS)
64// ```
65//
66// The reason HIGH is not stored in VLQ is because it's used by range lookup,
67// and vlq `[u8]` order does not match integer order.
68//
69// The reason HIGH-LOW is used instead of LOW is because it is more compact
70// for the worse case (i.e. each flat segment has length 1). Each segment has
71// only 1 byte overhead.
72
73bitflags! {
74    #[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Clone, Copy)]
75    pub struct SegmentFlags: u8 {
76        /// This segment has roots (i.e. there is at least one id in
77        /// `low..=high`, `parents(id)` is empty).
78        ///
79        /// This flag must be set correct for correctness.
80        const HAS_ROOT = 0b1;
81
82        /// This segment is the only head in `0..=high`.
83        /// In other words, `heads(0..=high)` is `[high]`.
84        ///
85        /// This flag should not be set if the segment is either a high-level
86        /// segment, or in a non-master group.
87        ///
88        /// This flag is an optimization. Not setting it might hurt performance
89        /// but not correctness.
90        const ONLY_HEAD = 0b10;
91    }
92}
93
94impl Segment {
95    pub(crate) const OFFSET_FLAGS: usize = 0;
96    pub(crate) const OFFSET_LEVEL: usize = Self::OFFSET_FLAGS + 1;
97    pub(crate) const OFFSET_HIGH: usize = Self::OFFSET_LEVEL + 1;
98    pub(crate) const OFFSET_DELTA: usize = Self::OFFSET_HIGH + 8;
99
100    pub(crate) fn flags(&self) -> Result<SegmentFlags> {
101        match self.0.get(Self::OFFSET_FLAGS) {
102            Some(bits) => Ok(SegmentFlags::from_bits_truncate(*bits)),
103            None => bug("cannot read Segment::flags"),
104        }
105    }
106
107    pub(crate) fn has_root(&self) -> Result<bool> {
108        Ok(self.flags()?.contains(SegmentFlags::HAS_ROOT))
109    }
110
111    pub(crate) fn only_head(&self) -> Result<bool> {
112        Ok(self.flags()?.contains(SegmentFlags::ONLY_HEAD))
113    }
114
115    pub(crate) fn high(&self) -> Result<Id> {
116        match self.0.get(Self::OFFSET_HIGH..Self::OFFSET_HIGH + 8) {
117            Some(slice) => Ok(Id(BigEndian::read_u64(slice))),
118            None => bug("cannot read Segment::high"),
119        }
120    }
121
122    // high - low
123    fn delta(&self) -> Result<u64> {
124        let (len, _) = self.0.read_vlq_at(Self::OFFSET_DELTA)?;
125        Ok(len)
126    }
127
128    pub(crate) fn span(&self) -> Result<IdSpan> {
129        let high = self.high()?;
130        let delta = self.delta()?;
131        let low = high - delta;
132        Ok((low..=high).into())
133    }
134
135    pub(crate) fn head(&self) -> Result<Id> {
136        self.high()
137    }
138
139    pub(crate) fn low(&self) -> Result<Id> {
140        Ok(self.span()?.low)
141    }
142
143    pub(crate) fn level(&self) -> Result<Level> {
144        match self.0.get(Self::OFFSET_LEVEL) {
145            Some(level) => Ok(*level),
146            None => bug("cannot read Segment::level"),
147        }
148    }
149
150    pub(crate) fn parent_count(&self) -> Result<usize> {
151        let mut cur = Cursor::new(&self.0);
152        cur.set_position(Self::OFFSET_DELTA as u64);
153        let _: u64 = cur.read_vlq()?;
154        let parent_count: usize = cur.read_vlq()?;
155        Ok(parent_count)
156    }
157
158    pub(crate) fn parents(&self) -> Result<Vec<Id>> {
159        let mut cur = Cursor::new(&self.0);
160        cur.set_position(Self::OFFSET_DELTA as u64);
161        let _: u64 = cur.read_vlq()?;
162        let parent_count: usize = cur.read_vlq()?;
163        let mut result = Vec::with_capacity(parent_count);
164        for _ in 0..parent_count {
165            result.push(Id(cur.read_vlq()?));
166        }
167        Ok(result)
168    }
169
170    /// Duplicate the segment with `high` set to a new value.
171    pub(crate) fn with_high(&self, high: Id) -> Result<Self> {
172        let span = self.span()?;
173        if high.group() != span.high.group() || high < span.low {
174            return programming(format!(
175                "with_high got invalid input (segment: {:?} new_high: {:?})",
176                self, high,
177            ));
178        }
179        let parents = self.parents()?;
180        let seg = Self::new(self.flags()?, self.level()?, span.low, high, &parents);
181        Ok(seg)
182    }
183
184    pub(crate) fn new(
185        flags: SegmentFlags,
186        level: Level,
187        low: Id,
188        high: Id,
189        parents: &[Id],
190    ) -> Self {
191        assert!(low.is_valid());
192        assert!(high.is_valid());
193        assert_eq!(low.group(), high.group());
194        assert!(high >= low);
195        assert!(parents.iter().all(|&p| p < low));
196        assert!(parents.iter().all(|&p| p.is_valid()));
197        let mut buf = Vec::with_capacity(1 + 8 + (parents.len() + 2) * 4);
198        buf.write_u8(flags.bits()).unwrap();
199        buf.write_u8(level).unwrap();
200        buf.write_u64::<BigEndian>(high.0).unwrap();
201        buf.write_vlq(high.0 - low.0).unwrap();
202        buf.write_vlq(parents.len()).unwrap();
203        for parent in parents {
204            buf.write_vlq(parent.0).unwrap();
205        }
206        Self(buf.into())
207    }
208}
209
210impl PartialEq for Segment {
211    fn eq(&self, other: &Self) -> bool {
212        self.0[..] == other.0[..]
213    }
214}
215
216impl Debug for Segment {
217    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
218        let span = self.span().unwrap();
219        if self.has_root().unwrap() {
220            write!(f, "R")?;
221        }
222        if self.only_head().unwrap() {
223            write!(f, "H")?;
224        }
225        let parents = self.parents().unwrap();
226        write!(f, "{}-{}{:?}", span.low, span.high, parents,)?;
227        if span.low > span.high {
228            write!(f, " (Invalid Span!!)")?;
229        }
230        if parents.iter().any(|&p| p >= span.low) {
231            write!(f, " (Invalid Parent!!)")?;
232        }
233        Ok(())
234    }
235}
236
237impl Debug for IdSegment {
238    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
239        write!(
240            f,
241            "L{} {}..={} {:?}{}",
242            self.level,
243            self.low,
244            self.high,
245            &self.parents,
246            if self.has_root { "R" } else { "" }
247        )
248    }
249}
250
251/// Describe bytes of a Segment.
252/// This is only for troubleshooting purpose.
253pub fn describe_segment_bytes(data: &[u8]) -> String {
254    let mut message = String::new();
255    let mut cur = Cursor::new(data);
256    let mut start = 0;
257    let mut explain = |cur: &Cursor<_>, m: String| {
258        let end = cur.position() as usize;
259        message += &format!("# {}: {}\n", hex(&data[start..end]), m);
260        start = end;
261    };
262    if let Ok(flags) = cur.read_u8() {
263        let flags = SegmentFlags::from_bits_truncate(flags);
264        explain(&cur, format!("Flags = {:?}", flags));
265    }
266    if let Ok(lv) = cur.read_u8() {
267        explain(&cur, format!("Level = {:?}", lv));
268    }
269    if let Ok(head) = cur.read_u64::<BigEndian>() {
270        explain(&cur, format!("High = {}", Id(head)));
271        if let Ok(delta) = VLQDecode::<u64>::read_vlq(&mut cur) {
272            let low = head - delta;
273            explain(&cur, format!("Delta = {} (Low = {})", delta, Id(low)));
274        }
275    }
276    if let Ok(count) = VLQDecode::<usize>::read_vlq(&mut cur) {
277        explain(&cur, format!("Parent count = {}", count));
278        for i in 0..count {
279            if let Ok(p) = VLQDecode::<u64>::read_vlq(&mut cur) {
280                explain(&cur, format!("Parents[{}] = {}", i, Id(p)));
281            }
282        }
283    }
284    message
285}
286
287pub(crate) fn hex(bytes: &[u8]) -> String {
288    bytes
289        .iter()
290        .cloned()
291        .map(|b| format!("{:02x}", b))
292        .collect::<Vec<String>>()
293        .join(" ")
294}
295
296#[cfg(test)]
297mod tests {
298    use dag_types::Group;
299    use quickcheck::quickcheck;
300
301    use super::*;
302    use crate::tests::dbg;
303
304    #[test]
305    fn test_segment_roundtrip() {
306        fn prop(
307            has_root: bool,
308            level: Level,
309            group: u8,
310            range1: u64,
311            range2: u64,
312            parents: Vec<u64>,
313        ) -> bool {
314            let flags = if has_root {
315                SegmentFlags::HAS_ROOT
316            } else {
317                SegmentFlags::empty()
318            };
319            let group = Group::ALL[(group as usize) % Group::ALL.len()];
320            let range = group.max_id().0 - group.min_id().0;
321            let mut low = group.min_id() + (range1 % range);
322            let mut high = group.min_id() + (range2 % range);
323            if high < low {
324                (low, high) = (high, low);
325            }
326            let parents: Vec<Id> = parents.into_iter().filter(|&p| p < low.0).map(Id).collect();
327            let node = Segment::new(flags, level, low, high, &parents);
328            node.flags().unwrap() == flags
329                && node.level().unwrap() == level
330                && node.span().unwrap() == (low..=high).into()
331                && node.parents().unwrap() == parents
332        }
333        quickcheck(prop as fn(bool, Level, u8, u64, u64, Vec<u64>) -> bool);
334    }
335
336    #[test]
337    fn test_describe() {
338        let seg = Segment::new(
339            SegmentFlags::ONLY_HEAD,
340            3,
341            Id(101),
342            Id(202),
343            &[Id(90), Id(80)],
344        );
345        assert_eq!(
346            describe_segment_bytes(&seg.0),
347            r#"# 02: Flags = SegmentFlags(ONLY_HEAD)
348# 03: Level = 3
349# 00 00 00 00 00 00 00 ca: High = 202
350# 65: Delta = 101 (Low = 101)
351# 02: Parent count = 2
352# 5a: Parents[0] = 90
353# 50: Parents[1] = 80
354"#
355        );
356    }
357
358    #[test]
359    fn test_invalid_fmt() {
360        let bytes = Bytes::from_static(&[0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 1, 10]);
361        let segment = Segment(bytes);
362        assert_eq!(dbg(segment), "10-10[10] (Invalid Parent!!)");
363    }
364}