dag/
iddag.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8use std::collections::BTreeSet;
9use std::collections::BinaryHeap;
10use std::collections::HashMap;
11use std::collections::VecDeque;
12use std::fmt;
13use std::fmt::Debug;
14use std::fmt::Formatter;
15use std::ops::Deref;
16#[cfg(any(test, feature = "indexedlog-backend"))]
17use std::path::Path;
18use std::sync::atomic::AtomicUsize;
19use std::sync::atomic::Ordering;
20
21use indexmap::set::IndexSet;
22use serde::Deserialize;
23use serde::Serialize;
24use tracing::debug;
25use tracing::trace;
26
27use crate::errors::bug;
28use crate::errors::NotFoundError;
29use crate::id::Group;
30use crate::id::Id;
31use crate::iddagstore::IdDagStore;
32use crate::iddagstore::InProcessStore;
33#[cfg(any(test, feature = "indexedlog-backend"))]
34use crate::iddagstore::IndexedLogStore;
35use crate::ops::Persist;
36#[cfg(any(test, feature = "indexedlog-backend"))]
37use crate::ops::TryClone;
38use crate::segment::FlatSegment;
39use crate::segment::PreparedFlatSegments;
40use crate::segment::Segment;
41use crate::segment::SegmentFlags;
42use crate::spanset;
43use crate::types_ext::PreparedFlatSegmentsExt;
44use crate::Error::Programming;
45use crate::IdSegment;
46use crate::IdSet;
47use crate::IdSpan;
48use crate::Level;
49use crate::Result;
50use crate::VerLink;
51
52/// Structure to store a DAG of integers, with indexes to speed up ancestry queries.
53///
54/// A segment is defined as `(level: int, low: int, high: int, parents: [int])` on
55/// a topo-sorted integer DAG. It covers all integers in `low..=high` range, and
56/// must satisfy:
57/// - `high` is the *only* head in the sub DAG covered by the segment.
58/// - `parents` do not have entries within `low..=high` range.
59/// - If `level` is 0, for any integer `x` in `low+1..=high` range, `x`'s parents
60///   must be `x - 1`.
61///
62/// See `slides/201904-segmented-changelog/segmented-changelog.pdf` for pretty
63/// graphs about how segments help with ancestry queries.
64///
65/// [`IdDag`] is often used together with [`IdMap`] to allow customized names
66/// on vertexes. The [`NameDag`] type provides an easy-to-use interface to
67/// keep [`IdDag`] and [`IdMap`] in sync.
68#[derive(Clone, Serialize, Deserialize)]
69pub struct IdDag<Store> {
70    pub(crate) store: Store,
71    #[serde(skip, default = "default_seg_size")]
72    new_seg_size: usize,
73    #[serde(skip, default = "VerLink::new")]
74    version: VerLink,
75}
76
77/// See benches/segment_sizes.rs (D16660078) for this choice.
78const DEFAULT_SEG_SIZE: AtomicUsize = AtomicUsize::new(16);
79
80/// Maximum meaningful level. 4 is chosen because it is good enough
81/// for an existing large repo (level 5 is not built because it
82/// cannot merge level 4 segments).
83const MAX_MEANINGFUL_LEVEL: Level = 4;
84
85#[cfg(any(test, feature = "indexedlog-backend"))]
86impl IdDag<IndexedLogStore> {
87    /// Open [`IdDag`] at the given directory. Create it on demand.
88    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
89        let store = IndexedLogStore::open(path)?;
90        Self::open_from_store(store)
91    }
92}
93
94impl<S> IdDag<S> {
95    /// Set the maximum size of a new high-level segment.
96    ///
97    /// This does not affect existing segments.
98    ///
99    /// This might help performance a bit for certain rare types of DAGs.
100    /// The default value is Usually good enough.
101    pub fn set_new_segment_size(&mut self, size: usize) {
102        self.new_seg_size = size.max(2);
103    }
104
105    /// Get the segment size used for building new high-level segments.
106    pub(crate) fn get_new_segment_size(&self) -> usize {
107        self.new_seg_size
108    }
109}
110
111#[cfg(any(test, feature = "indexedlog-backend"))]
112impl TryClone for IdDag<IndexedLogStore> {
113    /// Attempt to clone the `IdDag`.
114    fn try_clone(&self) -> Result<Self> {
115        let store = self.store.try_clone()?;
116        Ok(Self {
117            store,
118            new_seg_size: self.new_seg_size,
119            version: self.version.clone(),
120        })
121    }
122}
123
124impl IdDag<InProcessStore> {
125    /// Instantiate an [`IdDag`] that stores all it's data in process. Useful for scenarios that
126    /// do not require data persistance.
127    pub fn new_in_process() -> Self {
128        let store = InProcessStore::new();
129        Self {
130            store,
131            new_seg_size: default_seg_size(),
132            version: VerLink::new(),
133        }
134    }
135}
136
137impl<Store: IdDagStore> IdDag<Store> {
138    pub(crate) fn open_from_store(store: Store) -> Result<Self> {
139        let dag = Self {
140            store,
141            new_seg_size: default_seg_size(),
142            version: VerLink::new(),
143        };
144        Ok(dag)
145    }
146}
147
148impl<Store: IdDagStore> IdDag<Store> {
149    /// Add a new segment.
150    ///
151    /// For simplicity, it does not check if the new segment overlaps with
152    /// an existing segment (which is a logic error). Those checks can be
153    /// offline.
154    pub(crate) fn insert(
155        &mut self,
156        flags: SegmentFlags,
157        level: Level,
158        low: Id,
159        high: Id,
160        parents: &[Id],
161    ) -> Result<()> {
162        self.version.bump();
163        self.store.insert(flags, level, low, high, parents)
164    }
165
166    /// Returns whether the iddag contains segments for the given `id`.
167    pub fn contains_id(&self, id: Id) -> Result<bool> {
168        Ok(self.all()?.contains(id))
169    }
170
171    pub(crate) fn version(&self) -> &VerLink {
172        &self.version
173    }
174}
175
176// Build segments.
177impl<Store: IdDagStore> IdDag<Store> {
178    /// Make sure the [`IdDag`] contains the given id (and all its ancestor ids)
179    /// by building up segments on demand.
180    ///
181    /// `get_parents` describes the DAG. Its input and output are `Id`s.
182    ///
183    /// This is often used together with [`crate::idmap::IdMap`].
184    ///
185    /// This is inefficient. If you have `PreparedFlatSegments`, call
186    /// `build_flat_segments_from_prepared_flat_segments` instead.
187    pub fn build_segments<F>(&mut self, high: Id, get_parents: &F) -> Result<usize>
188    where
189        F: Fn(Id) -> Result<Vec<Id>>,
190    {
191        // Step 1, figure out id ranges to be inserted using DFS.
192        // We cannot call `push_edge` like step 2 here, because the
193        // `id`s are in DESC order. `push_edge` really need ASC order.
194        let old_ids = self.all_ids_in_segment_level(0)?;
195        let mut to_insert_ids = IdSet::empty();
196        let mut to_visit = vec![high];
197        while let Some(id) = to_visit.pop() {
198            if old_ids.contains(id) || to_insert_ids.contains(id) {
199                continue;
200            }
201            to_insert_ids.push(id);
202            let parent_ids = get_parents(id)?;
203            to_visit.extend_from_slice(&parent_ids);
204        }
205
206        // Step 2, convert the to_insert_ids to PreparedFlatSegments.
207        // Iterate them in ascending order so `push_edge` works efficiently.
208        let mut prepared = PreparedFlatSegments::default();
209        for id in to_insert_ids.iter_asc() {
210            let parent_ids = get_parents(id)?;
211            prepared.push_edge(id, &parent_ids);
212        }
213
214        // Step 3, call build_segments_from_prepared_flat_segments.
215        self.build_segments_from_prepared_flat_segments(&prepared)
216    }
217
218    /// Similar to `build_segments`, but takes `PreparedFlatSegments` instead
219    /// of `get_parents`.
220    pub fn build_segments_from_prepared_flat_segments(
221        &mut self,
222        outcome: &PreparedFlatSegments,
223    ) -> Result<usize> {
224        let (count, set) = self.build_flat_segments_from_prepared_flat_segments(outcome)?;
225        let count = count + self.build_all_high_level_segments(Level::MAX, set)?;
226        Ok(count)
227    }
228
229    /// Build flat segments using the outcome from `add_head`.
230    /// This is not public because it does not keep high-level segments in sync.
231    ///
232    /// Return `(count, set)`. Number of segments inserted, and `IdSet` covered
233    /// by inserted segments.
234    fn build_flat_segments_from_prepared_flat_segments(
235        &mut self,
236        outcome: &PreparedFlatSegments,
237    ) -> Result<(usize, IdSet)> {
238        let mut inserted_id_set = IdSet::empty();
239        if outcome.segments.is_empty() {
240            return Ok((0, inserted_id_set));
241        }
242
243        let mut head_ids: BTreeSet<Id> = self.heads(self.master_group()?)?.iter_desc().collect();
244        let mut covered = self.all_ids_in_groups(&[Group::MASTER])?;
245        let mut get_flags = |parents: &[Id], head: Id, covered: &IdSet| {
246            let mut flags = SegmentFlags::empty();
247            if parents.is_empty() {
248                flags |= SegmentFlags::HAS_ROOT
249            }
250            if head.group() == Group::MASTER {
251                for p in parents.iter() {
252                    head_ids.remove(p);
253                }
254                // ONLY_HEAD means it heads(0..=head) = [head], or ancestors([head]) = 0..=head.
255                // The 0..=head range cannot have gaps. Because other segments
256                // might be inserted to the gaps later and they will have heads.
257                let has_no_gap = match covered.iter_span_asc().next() {
258                    Some(span) => span.contains(head),
259                    None => false,
260                };
261                if has_no_gap && head_ids.range(..=head).next().is_none() {
262                    flags |= SegmentFlags::ONLY_HEAD;
263                }
264                head_ids.insert(head);
265            }
266            flags
267        };
268        let mut last_high = None;
269        for seg in &outcome.segments {
270            if let Some(last_high) = last_high {
271                if last_high >= seg.low {
272                    return bug(format!(
273                        "PreparedFlatSegments are not sorted: {:?}",
274                        &outcome.segments
275                    ));
276                }
277            }
278            last_high = Some(seg.high);
279            covered.push(seg.low..=seg.high);
280
281            let flags = get_flags(&seg.parents, seg.high, &covered);
282            tracing::trace!(
283                "inserting flat segment {}..={} {:?} {:?}",
284                seg.low,
285                seg.high,
286                &seg.parents,
287                &flags
288            );
289            inserted_id_set.push(seg.low..=seg.high);
290            self.insert(flags, 0, seg.low, seg.high, &seg.parents)?;
291        }
292        Ok((outcome.segments.len(), inserted_id_set))
293    }
294
295    /// Incrementally build high level segments at the given `level`.
296    ///
297    /// The new, high level segments are built on top of the lower level
298    /// (`level - 1`) segments. Each high level segment covers at most `size`
299    /// `level - 1` segments.
300    ///
301    /// The last segment per level per group is dropped because it's likely to
302    /// be incomplete. This helps reduce fragmentation, and also allows the last
303    /// flat segment per group to be mutable, because it will not be included
304    /// in Level 1 segments.
305    ///
306    /// `inserted_lower_level_id_set` specifies newly inserted segments at the
307    /// lower level.
308    ///
309    /// Return `(count, set)`. `count` is the number of segments inserted.
310    /// `set` is an `IdSet` that covers ranges of segments inserted.
311    fn build_high_level_segments(
312        &mut self,
313        level: Level,
314        inserted_lower_level_id_set: &IdSet,
315    ) -> Result<(usize, IdSet)> {
316        let mut inserted_id_set = IdSet::empty();
317        if level == 0 {
318            // Do nothing. Level 0 is not considered high level.
319            return Ok((0, inserted_id_set));
320        }
321        let size = self.new_seg_size;
322
323        // Figure out lower level segments to consider.
324        //
325        // Legend:
326        //  [...] existing (one or more) segments
327        //  [N] newly inserted (in inserted_lower_level_id_set)
328        //
329        // Flat segments:          [..............] gap [.......] gap [...........]
330        // Lower level segments:   [.......][ N ]       [.....]       [....][ N ]
331        // This level segments:    [.....]              [...]         [..]
332        //   missing:                     [     ]            []           [     ]
333        //   need_consider:               [     ]                         [     ]
334        //                                                    ^
335        //                   no need to consdier because it does not have [N]
336
337        let missing = self
338            .all_ids_in_segment_level(level - 1)?
339            .difference(&self.all_ids_in_segment_level(level)?);
340        let need_consider = IdSet::from_sorted_spans(
341            missing
342                .as_spans()
343                .iter()
344                .filter(|s| inserted_lower_level_id_set.contains(s.high))
345                .copied(),
346        );
347        tracing::debug!(
348            "building lv{} segment from lv{} ranges {:?}",
349            level,
350            level - 1,
351            &need_consider
352        );
353
354        let mut insert_count = 0;
355        let mut new_segments_per_considering_span = Vec::new();
356        let mut lower_segments_len = 0;
357        for considering_span in need_consider.as_spans() {
358            tracing::trace!(" considering {:?}", &considering_span);
359            // `get_parents` is on the previous level of segments.
360            let get_parents = |head: Id| -> Result<Vec<Id>> {
361                if let Some(seg) = self.find_segment_by_head_and_level(head, level - 1)? {
362                    seg.parents()
363                } else {
364                    bug("get_parents called with wrong head in build_high_level_segments")
365                }
366            };
367
368            let new_segments = {
369                // Find all segments on the previous level that haven't been built.
370                let segments: Vec<_> =
371                    self.segments_in_span_ascending(*considering_span, level - 1)?;
372                tracing::trace!("  in lower-level: {:?}", &segments);
373                lower_segments_len += segments.len();
374
375                // Sanity check: They should be sorted and not overlapping.
376                for i in 1..segments.len() {
377                    if segments[i - 1].high()? >= segments[i].span()?.low {
378                        let msg = format!(
379                            "level {} segments {:?} are overlapping or not sorted!",
380                            level,
381                            &segments[i - 1..=i]
382                        );
383                        return bug(msg);
384                    }
385                }
386
387                // Build the graph from the first head. `low_idx` is the
388                // index of `segments` (level - 1).
389
390                // find_segment scans low level segments (segments[low_idx..]),
391                // merges them on the fly, and returns a high-level segment:
392                // (new_idx, low, high, parents, has_root).
393                // new_idx + 1 is the next low_idx that should be passed to find_segment
394                // to calculate the next high-level segment.
395                let find_segment = |low_idx: usize| -> Result<_> {
396                    let segment_low = segments[low_idx].span()?.low;
397                    let mut heads = BTreeSet::new();
398                    let mut parents = IndexSet::new();
399                    let mut candidate = None;
400                    let mut has_root = false;
401                    for i in low_idx..segments.len().min(low_idx + size) {
402                        // [--------------------------] level (to build)
403                        // [------] [------] [--------] level - 1 (lower level)
404                        // ^        ^
405                        // |        segments[i].low
406                        // segment_low
407                        let span = segments[i].span()?;
408                        // Discontinuous?
409                        if i > low_idx && segments[i - 1].head()? + 1 != span.low {
410                            break;
411                        }
412                        let head = span.high;
413                        if !has_root && segments[i].has_root()? {
414                            has_root = true;
415                        }
416                        heads.insert(head);
417                        let direct_parents = get_parents(head)?;
418                        for p in &direct_parents {
419                            if *p >= span.low {
420                                return bug(format!(
421                                    "invalid lv{} segment: {:?} (parent >= low)",
422                                    level - 1,
423                                    &segments[i]
424                                ));
425                            }
426                            if *p < segment_low {
427                                // No need to remove p from heads, since it cannot be a head.
428                                parents.insert(*p);
429                            } else {
430                                heads.remove(p);
431                            }
432                        }
433                        if heads.len() == 1 {
434                            candidate = Some((i, segment_low, head, parents.len(), has_root));
435                        }
436                    }
437                    // There must be at least one valid high-level segment,
438                    // because `segments[low_idx]` is such a high-level segment.
439                    let (new_idx, low, high, parent_count, has_root) = candidate.unwrap();
440                    let parents = parents.into_iter().take(parent_count).collect::<Vec<Id>>();
441                    Ok((new_idx, low, high, parents, has_root))
442                };
443
444                let mut idx = 0;
445                let mut new_segments = Vec::new();
446                while idx < segments.len() {
447                    let segment_info = find_segment(idx)?;
448                    idx = segment_info.0 + 1;
449                    new_segments.push(segment_info);
450                }
451
452                tracing::trace!("  new segments: {:?}", &new_segments);
453                new_segments
454            };
455
456            new_segments_per_considering_span.push(new_segments);
457        }
458
459        // No point to introduce new levels if it has the same segment count
460        // as the lower level.
461        if level > self.max_level()?
462            && new_segments_per_considering_span
463                .iter()
464                .map(|s| s.len())
465                .sum::<usize>()
466                >= lower_segments_len
467        {
468            tracing::debug!("no need to introduce new level");
469            return Ok((0, inserted_id_set));
470        }
471
472        for mut new_segments in new_segments_per_considering_span {
473            // Drop the last segment. It could be incomplete.
474            new_segments.pop();
475
476            insert_count += new_segments.len();
477
478            for (_, low, high, parents, has_root) in new_segments {
479                let flags = if has_root {
480                    SegmentFlags::HAS_ROOT
481                } else {
482                    SegmentFlags::empty()
483                };
484                tracing::trace!(
485                    " inserting lv{} segment {}..{} {:?} {:?}",
486                    level,
487                    low,
488                    high,
489                    &parents,
490                    &flags
491                );
492                inserted_id_set.push(low..=high);
493                self.insert(flags, level, low, high, &parents)?;
494            }
495        }
496
497        Ok((insert_count, inserted_id_set))
498    }
499
500    /// Build high level segments using default setup.
501    ///
502    /// `new_flat_id_set` covers the flat segments newly inserted.
503    ///
504    /// Return number of segments inserted.
505    fn build_all_high_level_segments(
506        &mut self,
507        max_level: Level,
508        new_flat_id_set: IdSet,
509    ) -> Result<usize> {
510        let mut total = 0;
511        let max_level = max_level.min(MAX_MEANINGFUL_LEVEL);
512        let mut new_id_set = new_flat_id_set;
513        for level in 1..=max_level {
514            let (count, new_ids) = self.build_high_level_segments(level, &new_id_set)?;
515            tracing::debug!("new {} lv{} segments: {:?}", count, level, &new_ids);
516            if count == 0 {
517                break;
518            }
519            new_id_set = new_ids;
520            total += count;
521        }
522        Ok(total)
523    }
524}
525
526impl<Store: IdDagStore> IdDag<Store> {
527    /// Returns the [`FlatSegment`] entries that are used by this [`IdDag`].
528    pub fn flat_segments(&self, group: Group) -> Result<PreparedFlatSegments> {
529        let segments = self.flat_segments_range(group.min_id(), group.max_id())?;
530        Ok(PreparedFlatSegments {
531            segments: segments.into_iter().collect(),
532        })
533    }
534
535    /// Return all flat segments that overlap with range (and potentially cover
536    /// larger range than supplied).
537    fn flat_segments_range(&self, min: Id, max_incl: Id) -> Result<Vec<FlatSegment>> {
538        let level = 0;
539        let mut segments = Vec::new();
540        for sr in self.iter_segments_ascending(min, level)? {
541            let segment = sr?;
542            let span = segment.span()?;
543            if span.low > max_incl {
544                break;
545            }
546            let fs = FlatSegment {
547                low: span.low,
548                high: span.high,
549                parents: segment.parents()?,
550            };
551            segments.push(fs);
552        }
553        Ok(segments)
554    }
555
556    /// Extract flat segments that cover the given `set` exactly.
557    pub fn idset_to_flat_segments(&self, set: IdSet) -> Result<PreparedFlatSegments> {
558        let mut segments = Vec::new();
559
560        let (min, max) = if let (Some(min), Some(max)) = (set.min(), set.max()) {
561            (min, max)
562        } else {
563            return Ok(PreparedFlatSegments {
564                segments: segments.into_iter().collect(),
565            });
566        };
567        let segs = self.flat_segments_range(min, max)?;
568        let seg_iter = segs.into_iter().rev();
569
570        let push = |seg: FlatSegment| segments.push(seg);
571        let span_iter = set.as_spans().iter().cloned();
572        spanset::intersect_iter(seg_iter, span_iter, push);
573
574        Ok(PreparedFlatSegments {
575            segments: segments.into_iter().collect(),
576        })
577    }
578}
579
580// User-facing DAG-related algorithms.
581pub trait IdDagAlgorithm: IdDagStore {
582    /// Return a [`IdSet`] that covers all ids stored in this [`IdDag`].
583    fn all(&self) -> Result<IdSet> {
584        self.all_ids_in_groups(&Group::ALL)
585    }
586
587    /// Return a [`IdSet`] that covers all ids stored in the master group.
588    fn master_group(&self) -> Result<IdSet> {
589        self.all_ids_in_groups(&[Group::MASTER])
590    }
591
592    /// Calculate all ancestors reachable from any id from the given set.
593    ///
594    /// ```plain,ignore
595    /// union(ancestors(i) for i in set)
596    /// ```
597    fn ancestors(&self, mut set: IdSet) -> Result<IdSet> {
598        fn trace(msg: &dyn Fn() -> String) {
599            trace!(target: "dag::algo::ancestors", "{}", msg());
600        }
601        debug!(target: "dag::algo::ancestors", "ancestors({:?})", &set);
602        if set.count() > 2 {
603            // Try to (greatly) reduce the size of the `set` to make calculation cheaper.
604            set = self.heads_ancestors(set)?;
605            trace(&|| format!("simplified to {:?}", &set));
606        }
607        let mut result = IdSet::empty();
608        let mut to_visit: BinaryHeap<_> = set.iter_desc().collect();
609        let max_level = self.max_level()?;
610        'outer: while let Some(id) = to_visit.pop() {
611            if result.contains(id) {
612                // If `id` is in `result`, then `ancestors(id)` are all in `result`.
613                continue;
614            }
615            trace(&|| format!(" lookup {:?}", id));
616            let flat_seg = self.find_flat_segment_including_id(id)?;
617            if let Some(ref s) = flat_seg {
618                if s.only_head()? {
619                    // Fast path.
620                    trace(&|| format!(" push ..={:?} (only head fast path)", id));
621                    result.push_span((Id::MIN..=id).into());
622                    break 'outer;
623                }
624            }
625            for level in (1..=max_level).rev() {
626                let seg = self.find_segment_by_head_and_level(id, level)?;
627                if let Some(seg) = seg {
628                    let span = seg.span()?.into();
629                    trace(&|| format!(" push lv{} {:?}", level, &span));
630                    result.push_span(span);
631                    let parents = seg.parents()?;
632                    trace(&|| format!(" follow parents {:?}", &parents));
633                    for parent in parents {
634                        to_visit.push(parent);
635                    }
636                    continue 'outer;
637                }
638            }
639            if let Some(seg) = flat_seg {
640                let span = (seg.span()?.low..=id).into();
641                trace(&|| format!(" push lv0 {:?}", &span));
642                result.push_span(span);
643                let parents = seg.parents()?;
644                trace(&|| format!(" follow parents {:?}", &parents));
645                for parent in parents {
646                    to_visit.push(parent);
647                }
648            } else {
649                return bug("flat segments are expected to cover everything but they are not");
650            }
651        }
652
653        trace(&|| format!(" result: {:?}", &result));
654
655        Ok(result)
656    }
657
658    /// Like `ancestors` but follows only the first parents.
659    fn first_ancestors(&self, set: IdSet) -> Result<IdSet> {
660        fn trace(msg: &dyn Fn() -> String) {
661            trace!(target: "dag::algo::first_ancestors", "{}", msg());
662        }
663        debug!(target: "dag::algo::first_ancestors", "first_ancestors({:?})", &set);
664        let mut result = IdSet::empty();
665        let mut to_visit: BinaryHeap<_> = set.iter_desc().collect();
666        // Lookup flat segments to figure out the first ancestors.
667        while let Some(id) = to_visit.pop() {
668            if result.contains(id) {
669                // If `id` is in `result`, then `ancestors(id)` are all in `result`.
670                continue;
671            }
672            trace(&|| format!(" visit {:?}", &id));
673            let flat_seg = self.find_flat_segment_including_id(id)?;
674            if let Some(ref seg) = flat_seg {
675                let span = seg.span()?;
676                result.push_span((span.low..=id).into());
677                trace(&|| format!(" push {:?}..={:?}", span.low, id));
678                if let Some(&p) = seg.parents()?.get(0) {
679                    to_visit.push(p);
680                }
681            }
682        }
683        trace(&|| format!(" result: {:?}", &result));
684        Ok(result)
685    }
686
687    /// Calculate merges within the given set.
688    fn merges(&self, set: IdSet) -> Result<IdSet> {
689        fn trace(msg: &dyn Fn() -> String) {
690            trace!(target: "dag::algo::merges", "{}", msg());
691        }
692        debug!(target: "dag::algo::merges", "merges({:?})", &set);
693
694        let mut result = IdSet::empty();
695
696        // Check overlapped flat segments. By definition, merges can only be the
697        // "low"s of flat segments.
698
699        // Process the given span overlapped with the segment.
700        // Return the next "high" id for segment lookup.
701        // Return None if there is no segment to check for the given span.
702        let mut process_seg = |span: &IdSpan, seg: Segment| -> Result<Option<Id>> {
703            trace(&|| format!(" process {:?} seg {:?}", &span, &seg));
704            let seg_span = seg.span()?;
705            let low = seg_span.low;
706            if low < span.low {
707                return Ok(None);
708            }
709            if seg.parent_count()? >= 2 {
710                // span.low <= low <= high <= span.high
711                debug_assert!(set.contains(low));
712                trace(&|| format!(" push merge {:?}", &low));
713                result.push_span(low.into());
714            }
715            if seg_span.low > Id(0) {
716                Ok(Some(seg_span.low - 1))
717            } else {
718                Ok(None)
719            }
720        };
721
722        for span in set.as_spans() {
723            // Cannot use iter_segments_descending, since it skips overlapping
724            // segments (seg.high > span.high and seg.low > span.low). Use
725            // find_flat_segment_including_id to find the first overlapping
726            // segment, then use iter_segments_descending to handle a large
727            // span (ex. all()) efficiently.
728            let high = match self.find_flat_segment_including_id(span.high)? {
729                None => continue,
730                Some(seg) => match process_seg(span, seg)? {
731                    None => continue,
732                    Some(id) => id,
733                },
734            };
735            'iter_seg: for seg in self.iter_segments_descending(high, 0)? {
736                let seg = seg?;
737                match process_seg(span, seg)? {
738                    None => break 'iter_seg,
739                    Some(_) => {}
740                }
741            }
742        }
743
744        trace(&|| format!(" result: {:?}", &result));
745
746        Ok(result)
747    }
748
749    /// Calculate parents of the given set.
750    ///
751    /// Note: [`IdSet`] does not preserve order. Use [`IdDag::parent_ids`] if
752    /// order is needed.
753    fn parents(&self, mut set: IdSet) -> Result<IdSet> {
754        fn trace(msg: &dyn Fn() -> String) {
755            trace!(target: "dag::algo::parents", "{}", msg());
756        }
757        debug!(target: "dag::algo::parents", "parents({:?})", &set);
758
759        let mut result = IdSet::empty();
760        let max_level = self.max_level()?;
761
762        'outer: while let Some(head) = set.max() {
763            trace(&|| format!("check head {:?}", head));
764            // For high-level segments. If the set covers the entire segment, then
765            // the parents is (the segment - its head + its parents).
766            for level in (1..=max_level).rev() {
767                if let Some(seg) = self.find_segment_by_head_and_level(head, level)? {
768                    let seg_span = seg.span()?;
769                    if set.contains(seg_span) {
770                        let seg_set = IdSet::from(seg_span);
771                        let mut parent_set = seg_set.difference(&head.into());
772                        parent_set.push_set(&IdSet::from_spans(seg.parents()?));
773                        set = set.difference(&seg_set);
774                        result = result.union(&parent_set);
775                        trace(&|| format!(" push lv{} {:?}", level, &parent_set));
776                        continue 'outer;
777                    }
778                }
779            }
780
781            // A flat segment contains information to calculate
782            // parents(subset of the segment).
783            let seg = match self.find_flat_segment_including_id(head)? {
784                Some(seg) => seg,
785                None => return head.not_found(),
786            };
787            let seg_span = seg.span()?;
788            let seg_low = seg_span.low;
789            let seg_set: IdSet = seg_span.into();
790            let seg_set = seg_set.intersection(&set);
791
792            // Get parents for a linear set (ex. parent(i) is (i - 1)).
793            fn parents_linear(set: &IdSet) -> IdSet {
794                debug_assert!(!set.contains(Id::MIN));
795                IdSet::from_sorted_spans(set.as_spans().iter().map(|s| s.low - 1..=s.high - 1))
796            }
797
798            let parent_set = if seg_set.contains(seg_low) {
799                let mut parent_set = parents_linear(&seg_set.difference(&IdSet::from(seg_low)));
800                parent_set.push_set(&IdSet::from_spans(seg.parents()?));
801                parent_set
802            } else {
803                parents_linear(&seg_set)
804            };
805
806            set = set.difference(&seg_set);
807            trace(&|| format!(" push lv0 {:?}", &parent_set));
808            result = result.union(&parent_set);
809        }
810
811        trace(&|| format!(" result: {:?}", &result));
812
813        Ok(result)
814    }
815
816    /// Get parents of a single `id`. Preserve the order.
817    fn parent_ids(&self, id: Id) -> Result<Vec<Id>> {
818        let seg = match self.find_flat_segment_including_id(id)? {
819            Some(seg) => seg,
820            None => return id.not_found(),
821        };
822        let span = seg.span()?;
823        if id == span.low {
824            Ok(seg.parents()?)
825        } else {
826            Ok(vec![id - 1])
827        }
828    }
829
830    /// Calculate the n-th first ancestor. If `n` is 0, return `id` unchanged.
831    /// If `n` is 1, return the first parent of `id`.
832    fn first_ancestor_nth(&self, id: Id, n: u64) -> Result<Id> {
833        match self.try_first_ancestor_nth(id, n)? {
834            None => Err(Programming(format!(
835                "{}~{} cannot be resolved - no parents",
836                &id, n
837            ))),
838            Some(id) => Ok(id),
839        }
840    }
841
842    /// Calculate the n-th first ancestor. If `n` is 0, return `id` unchanged.
843    /// If `n` is 1, return the first parent of `id`.
844    /// If `n` is too large, exceeding the distance between the root and `id`,
845    /// return `None`.
846    fn try_first_ancestor_nth(&self, mut id: Id, mut n: u64) -> Result<Option<Id>> {
847        // PERF: this can have fast paths from high-level segments if high-level
848        // segments have extra information.
849        while n > 0 {
850            let seg = self
851                .find_flat_segment_including_id(id)?
852                .ok_or_else(|| id.not_found_error())?;
853            // segment: low ... id ... high
854            //          \________/
855            //            delta
856            let low = seg.span()?.low;
857            let delta = id.0 - low.0;
858            let step = delta.min(n);
859            id = id - step;
860            n -= step;
861            if n > 0 {
862                // Follow the first parent.
863                id = match seg.parents()?.get(0) {
864                    None => return Ok(None),
865                    Some(&id) => id,
866                };
867                n -= 1;
868            }
869        }
870        Ok(Some(id))
871    }
872
873    /// Convert an `id` to `x~n` form with the given constraint.
874    ///
875    /// Return `None` if the conversion can not be done with the constraints.
876    fn to_first_ancestor_nth(
877        &self,
878        id: Id,
879        constraint: FirstAncestorConstraint,
880    ) -> Result<Option<(Id, u64)>> {
881        match constraint {
882            FirstAncestorConstraint::None => Ok(Some((id, 0))),
883            FirstAncestorConstraint::KnownUniversally { heads } => {
884                self.to_first_ancestor_nth_known_universally(id, heads)
885            }
886        }
887    }
888
889    /// See `FirstAncestorConstraint::KnownUniversally`.
890    ///
891    /// Try to represent `id` as `x~n` (revset notation) form, where `x` must
892    /// be in `heads + parents(merge() & ancestors(heads))`.
893    ///
894    /// Return `None` if `id` is not part of `ancestors(heads)`.
895    fn to_first_ancestor_nth_known_universally(
896        &self,
897        id: Id,
898        heads: IdSet,
899    ) -> Result<Option<(Id, u64)>> {
900        // Do not track errors for the first time. This has lower overhead.
901        match self.to_first_ancestor_nth_known_universally_with_errors(id, &heads, None) {
902            Ok(v) => Ok(v),
903            Err(_) => {
904                // Pay additional overhead to get error message.
905                self.to_first_ancestor_nth_known_universally_with_errors(
906                    id,
907                    &heads,
908                    Some(Vec::new()),
909                )
910            }
911        }
912    }
913
914    /// Internal implementation of `to_first_ancestor_nth_known_universally`.
915    ///
916    /// If `details` is not `None`, it is used to store extra error messages
917    /// with some extra overhead.
918    fn to_first_ancestor_nth_known_universally_with_errors(
919        &self,
920        mut id: Id,
921        heads: &IdSet,
922        mut details: Option<Vec<String>>,
923    ) -> Result<Option<(Id, u64)>> {
924        // To figure `x~n` we look at the flat segment containing `id`, and check:
925        // - If the flat segment overlaps with heads, then just use the overlapped id as `x`.
926        // - Try to find parents of merges within the flat segment (the merges belong to other
927        //   "child segment"s). If the merge is an ancestor of `heads`, then the parent of the
928        //   merge can be used as `x`, if `n` is not 0.
929        // - Otherwise, try to follow connected flat segment (with single parent), convert the
930        //   `x~n` question to a `y~(n+m)` question, then start over.
931        let mut trace = |msg: &dyn Fn() -> String| {
932            if let Some(details) = details.as_mut() {
933                details.push(msg().trim().to_string());
934            }
935            trace!(target: "dag::algo::toxn", "{}", msg());
936        };
937
938        let ancestors = self.ancestors(heads.clone())?;
939        if !ancestors.contains(id) {
940            return Ok(None);
941        }
942
943        let mut n = 0;
944        debug!(target: "dag::algo::toxn", "resolving {:?}", id);
945        let result = 'outer: loop {
946            let seg = self
947                .find_flat_segment_including_id(id)?
948                .ok_or_else(|| id.not_found_error())?;
949            let span = seg.span()?;
950            let head = span.high;
951            trace(&|| format!(" in seg {:?}", &seg));
952            // Can we use an `id` from `heads` as `x`?
953            let intersected = heads.intersection(&(id..=head).into());
954            if !intersected.is_empty() {
955                let head = intersected.min().unwrap();
956                n += head.0 - id.0;
957                trace(&|| format!("  contains head ({:?})", head));
958                break 'outer (head, n);
959            }
960            // Does this segment contain any `x` that is an ancestor of a merge,
961            // that can be used as `x`?
962            //
963            // Note the `x` does not have to be the head of `seg`. For example,
964            //
965            //      1--2--3--4 (seg, span: 1..=4)
966            //          \
967            //           5--6  (child_seg, span: 5..=6, parents: [2])
968            //
969            // During `to_first_ancestor_nth(2, [6])`, `1` needs to be translated
970            // to `6~3`, not `4~3`. Needs to lookup parents in the range `1..=4`.
971            let mut next_id_n = None;
972            let parent_span = span.low.max(id)..=span.high;
973            for entry in self.iter_flat_segments_with_parent_span(parent_span.into())? {
974                let (parent_id, child_seg) = entry?;
975                trace(&|| format!("  {:?} has child seg ({:?})", parent_id, &child_seg));
976                let child_low = child_seg.low()?;
977                if !ancestors.contains(child_low) {
978                    // `child_low` is outside `ancestors(heads)`, cannot use it
979                    // or its parents as references.
980                    trace(&|| "   child seg out of range".to_string());
981                    continue;
982                }
983
984                if child_seg.parent_count()? > 1 {
985                    // `child_low` is a merge included in `ancestors`, and
986                    // `parent_id` is a parent of the merge. Therefore
987                    // `parent_id` might be used as `x`.
988                    let next_n = n + parent_id.0 - id.0;
989                    if next_n > 0 {
990                        break 'outer (parent_id, next_n);
991                    }
992
993                    // Fragmented linear segments. Convert id~n to next_id~next_n.
994                    let child_parents = child_seg.parents()?;
995                    match child_parents.get(0) {
996                        None => {
997                            return bug(format!(
998                                "segment {:?} should have parent {:?}",
999                                &child_seg, &parent_id
1000                            ));
1001                        }
1002                        Some(p) => {
1003                            if &parent_id != p {
1004                                // Not the first parent. Do not follow it.
1005                                trace(&|| {
1006                                    format!(
1007                                        "   child seg cannot be followed ({:?} is not p1)",
1008                                        &parent_id
1009                                    )
1010                                });
1011                                continue;
1012                            }
1013                        }
1014                    }
1015                }
1016
1017                debug_assert!(ancestors.contains(child_low));
1018                let next_id = child_low;
1019                let next_n = n + 1 + parent_id.0 - id.0;
1020                trace(&|| format!("  follow {:?}~{}", next_id, next_n));
1021                next_id_n = Some((next_id, next_n));
1022                break;
1023            }
1024            match next_id_n {
1025                // This should not happen if indexes and segments are legit.
1026                None => {
1027                    let mut message = format!(
1028                        concat!(
1029                            "cannot convert {} to x~n form (x must be in ",
1030                            "`H + parents(ancestors(H) & merge())` where H = {:?})",
1031                        ),
1032                        id, &heads,
1033                    );
1034                    if let Some(details) = details {
1035                        if !details.is_empty() {
1036                            message += &format!(" (trace: {})", details.join(", "));
1037                        }
1038                    }
1039                    return Err(Programming(message));
1040                }
1041                Some((next_id, next_n)) => {
1042                    id = next_id;
1043                    n = next_n;
1044                }
1045            }
1046        };
1047        trace!(target: "dag::algo::toxn", " found: {:?}", &result);
1048        Ok(Some(result))
1049    }
1050
1051    /// Calculate heads of the given set.
1052    fn heads(&self, set: IdSet) -> Result<IdSet> {
1053        Ok(set.difference(&self.parents(set.clone())?))
1054    }
1055
1056    /// Calculate children for a single `Id`.
1057    fn children_id(&self, id: Id) -> Result<IdSet> {
1058        let mut result = BTreeSet::new();
1059        fn trace(msg: &dyn Fn() -> String) {
1060            trace!(target: "dag::algo::children_id", "{}", msg());
1061        }
1062        debug!(target: "dag::algo::children_id", "children_id({:?})", id);
1063        for seg in self.iter_flat_segments_with_parent(id)? {
1064            let seg = seg?;
1065            let child_id = seg.low()?;
1066            trace(&|| format!(" push {:?} via parent index", child_id));
1067            result.insert(child_id);
1068        }
1069        if let Some(seg) = self.find_flat_segment_including_id(id)? {
1070            let span = seg.span()?;
1071            if span.high != id {
1072                let child_id = id + 1;
1073                trace(&|| format!(" push {:?} via flat segment definition", child_id));
1074                result.insert(child_id);
1075            }
1076        }
1077        let result = IdSet::from_sorted_spans(result.into_iter().rev());
1078        trace(&|| format!(" result: {:?}", &result));
1079        Ok(result)
1080    }
1081
1082    /// Calculate children of the given set.
1083    fn children(&self, set: IdSet) -> Result<IdSet> {
1084        if set.count() < 5 {
1085            let result = set
1086                .iter_desc()
1087                .fold(Ok(IdSet::empty()), |acc: Result<IdSet>, id| match acc {
1088                    Ok(acc) => Ok(acc.union(&self.children_id(id)?)),
1089                    Err(err) => Err(err),
1090                })?;
1091            #[cfg(test)]
1092            {
1093                let result_set = self.children_set(set)?;
1094                assert_eq!(result.as_spans(), result_set.as_spans());
1095            }
1096            Ok(result)
1097        } else {
1098            self.children_set(set)
1099        }
1100    }
1101
1102    fn children_set(&self, set: IdSet) -> Result<IdSet> {
1103        fn trace(msg: &dyn Fn() -> String) {
1104            trace!(target: "dag::algo::children", "{}", msg());
1105        }
1106        debug!(target: "dag::algo::children", "children({:?})", &set);
1107
1108        // The algorithm works as follows:
1109        // - Iterate through level N segments [1].
1110        // - Considering a level N segment S:
1111        //   Could we take the entire S?
1112        //     - If `set` covers `S - S.head + S.parents`, then yes, take S
1113        //       and continue with the next level N segment.
1114        //   Could we ignore the entire S and check the next level N segment?
1115        //     - If (S + S.parents) do not overlap with `set`, then yes, skip.
1116        //   No fast paths. Is S a flat segment?
1117        //     - No:  Iterate through level N-1 segments covered by S,
1118        //            recursively (goto [1]).
1119        //     - Yes: Figure out children in the flat segment.
1120        //            Push them to the result.
1121
1122        struct Context<'a, Store: ?Sized> {
1123            this: &'a Store,
1124            set: IdSet,
1125            // Lower bound based on the input set.
1126            result_lower_bound: Id,
1127            result: IdSet,
1128        }
1129
1130        fn visit_segments<S: IdDagStore + ?Sized>(
1131            ctx: &mut Context<S>,
1132            mut range: IdSpan,
1133            level: Level,
1134        ) -> Result<()> {
1135            trace(&|| format!("visit range {:?} lv{}", &range, level));
1136            let mut visited = false;
1137            for seg in ctx.this.iter_segments_descending(range.high, level)? {
1138                let seg = seg?;
1139                let span = seg.span()?;
1140                visited = true;
1141                trace(&|| format!(" seg {:?}", &seg));
1142                // `range` is all valid. If a high-level segment misses it, try
1143                // a lower level one.
1144                if span.high < range.high {
1145                    let low_id = (span.high + 1).max(range.low);
1146                    if low_id > range.high {
1147                        return Ok(());
1148                    }
1149                    let missing_range = IdSpan::from(low_id..=range.high);
1150                    if level > 0 {
1151                        trace(&|| {
1152                            format!("  visit missing range at lower level: {:?}", &missing_range)
1153                        });
1154                        visit_segments(ctx, missing_range, level - 1)?;
1155                    } else {
1156                        return bug(format!(
1157                            "flat segments should have covered: {:?} returned by all() (range: {:?})",
1158                            missing_range, range,
1159                        ));
1160                    }
1161                }
1162
1163                // Update range.high for the next iteration.
1164                range.high = span.low.max(Id(1)) - 1;
1165
1166                // Stop iteration?
1167                if span.high < range.low || span.high < ctx.result_lower_bound {
1168                    break;
1169                }
1170
1171                let parents = seg.parents()?;
1172
1173                // Count of parents overlapping with `set`.
1174                let overlapped_parents = parents.iter().filter(|p| ctx.set.contains(**p)).count();
1175
1176                // Remove the `high`. This segment cannot calculate
1177                // `children(high)`. If `high` matches a `parent` of
1178                // another segment, that segment will handle it.
1179                // This is related to the flat segment children path
1180                // below.
1181                let intersection = ctx
1182                    .set
1183                    .intersection(&span.into())
1184                    .difference(&span.high.into());
1185
1186                if !seg.has_root()? {
1187                    // A segment must have at least one parent to be rootless.
1188                    debug_assert!(!parents.is_empty());
1189                    // Fast path: Take the entire segment directly.
1190                    if overlapped_parents == parents.len()
1191                        && intersection.count() + 1 == span.count()
1192                    {
1193                        trace(&|| format!(" push lv{} {:?} (rootless fast path)", level, &span));
1194                        ctx.result.push_span(span);
1195                        continue;
1196                    }
1197                }
1198
1199                if !intersection.is_empty() {
1200                    if level > 0 {
1201                        visit_segments(ctx, span, level - 1)?;
1202                        continue;
1203                    } else {
1204                        // Flat segment children path.
1205                        // children(x..=y) = (x+1)..=(y+1) if x..=(y+1) is in a flat segment.
1206                        let seg_children = IdSet::from_spans(
1207                            intersection
1208                                .as_spans()
1209                                .iter()
1210                                .map(|s| s.low + 1..=s.high + 1),
1211                        );
1212                        trace(&|| format!(" push {:?} (flat segment range)", &seg_children));
1213                        ctx.result.push_set(&seg_children);
1214                    }
1215                }
1216
1217                if overlapped_parents > 0 {
1218                    if level > 0 {
1219                        visit_segments(ctx, span, level - 1)?;
1220                    } else {
1221                        // child(any parent) = lowest id in this flat segment.
1222                        trace(&|| {
1223                            format!(" push {:?} (overlapped parents of flat segment)", &span.low)
1224                        });
1225                        ctx.result.push_span(span.low.into());
1226                    }
1227                }
1228            }
1229            // The high level segment misses the range. Try a lower level.
1230            if !visited {
1231                if level == 0 {
1232                    return bug(format!(
1233                        "flat segments should have covered: {:?} returned by all()",
1234                        range,
1235                    ));
1236                }
1237                visit_segments(ctx, range, level - 1)?;
1238            }
1239            Ok(())
1240        }
1241
1242        let result_lower_bound = set.min().unwrap_or(Id::MAX);
1243        let mut ctx = Context {
1244            this: self,
1245            set,
1246            result_lower_bound,
1247            result: IdSet::empty(),
1248        };
1249
1250        let max_level = self.max_level()?;
1251        for span in self.all()?.as_spans() {
1252            visit_segments(&mut ctx, *span, max_level)?;
1253        }
1254
1255        trace(&|| format!(" result: {:?}", &ctx.result));
1256
1257        Ok(ctx.result)
1258    }
1259
1260    /// Calculate roots of the given set.
1261    fn roots(&self, set: IdSet) -> Result<IdSet> {
1262        Ok(set.difference(&self.children(set.clone())?))
1263    }
1264
1265    /// Calculate one "greatest common ancestor" of the given set.
1266    ///
1267    /// If there are no common ancestors, return None.
1268    /// If there are multiple greatest common ancestors, pick one arbitrarily.
1269    /// Use `gca_all` to get all of them.
1270    fn gca_one(&self, set: IdSet) -> Result<Option<Id>> {
1271        // The set is sorted in DESC order. Therefore its first item can be used as the result.
1272        Ok(self.common_ancestors(set)?.max())
1273    }
1274
1275    /// Calculate all "greatest common ancestor"s of the given set.
1276    /// `gca_one` is faster if an arbitrary answer is ok.
1277    fn gca_all(&self, set: IdSet) -> Result<IdSet> {
1278        self.heads_ancestors(self.common_ancestors(set)?)
1279    }
1280
1281    /// Calculate all common ancestors of the given set.
1282    ///
1283    /// ```plain,ignore
1284    /// intersect(ancestors(i) for i in set)
1285    /// ```
1286    fn common_ancestors(&self, set: IdSet) -> Result<IdSet> {
1287        let result = match set.count() {
1288            0 => set,
1289            1 => self.ancestors(set)?,
1290            2 => {
1291                // Fast path that does not calculate "heads".
1292                let mut iter = set.iter_desc();
1293                let a = iter.next().unwrap();
1294                let b = iter.next().unwrap();
1295                self.ancestors(a.into())?
1296                    .intersection(&self.ancestors(b.into())?)
1297            }
1298            _ => {
1299                // Try to reduce the size of `set`.
1300                // `common_ancestors(X)` = `common_ancestors(roots(X))`.
1301                let set = self.roots(set)?;
1302                set.iter_desc()
1303                    .fold(Ok(IdSet::full()), |set: Result<IdSet>, id| {
1304                        Ok(set?.intersection(&self.ancestors(id.into())?))
1305                    })?
1306            }
1307        };
1308        Ok(result)
1309    }
1310
1311    /// Test if `ancestor_id` is an ancestor of `descendant_id`.
1312    fn is_ancestor(&self, ancestor_id: Id, descendant_id: Id) -> Result<bool> {
1313        let set = self.ancestors(descendant_id.into())?;
1314        Ok(set.contains(ancestor_id))
1315    }
1316
1317    /// Calculate "heads" of the ancestors of the given [`IdSet`]. That is,
1318    /// Find Y, which is the smallest subset of set X, where `ancestors(Y)` is
1319    /// `ancestors(X)`.
1320    ///
1321    /// This is faster than calculating `heads(ancestors(set))`.
1322    ///
1323    /// This is different from `heads`. In case set contains X and Y, and Y is
1324    /// an ancestor of X, but not the immediate ancestor, `heads` will include
1325    /// Y while this function won't.
1326    fn heads_ancestors(&self, set: IdSet) -> Result<IdSet> {
1327        let mut remaining = set;
1328        let mut result = IdSet::empty();
1329        while let Some(id) = remaining.max() {
1330            result.push_span((id..=id).into());
1331            // Remove ancestors reachable from that head.
1332            remaining = remaining.difference(&self.ancestors(id.into())?);
1333        }
1334        Ok(result)
1335    }
1336
1337    /// Calculate the "dag range" - ids reachable from both sides.
1338    ///
1339    /// ```plain,ignore
1340    /// intersect(ancestors(heads), descendants(roots))
1341    /// ```
1342    ///
1343    /// This is O(flat segments), or O(merges).
1344    fn range(&self, roots: IdSet, mut heads: IdSet) -> Result<IdSet> {
1345        if roots.is_empty() {
1346            return Ok(IdSet::empty());
1347        }
1348        if heads.is_empty() {
1349            return Ok(IdSet::empty());
1350        }
1351
1352        fn trace(msg: &dyn Fn() -> String) {
1353            trace!(target: "dag::algo::range", "{}", msg());
1354        }
1355        debug!(target: "dag::algo::range", "range({:?}, {:?})", &roots, &heads);
1356
1357        // Remove uninteresting heads. Make `ancestors(heads)` a bit easier.
1358        let min_root_id = roots.min().unwrap();
1359        let min_head_id = heads.min().unwrap();
1360        if min_head_id < min_root_id {
1361            let span = min_root_id..=Id::MAX;
1362            heads = heads.intersection(&span.into());
1363            trace(&|| format!(" removed unreachable heads: {:?}", &heads));
1364        }
1365
1366        let ancestors_of_heads = self.ancestors(heads)?;
1367        let result = self.descendants_intersection(&roots, &ancestors_of_heads)?;
1368
1369        #[cfg(test)]
1370        {
1371            let intersection = ancestors_of_heads.intersection(&result);
1372            assert_eq!(result.as_spans(), intersection.as_spans());
1373        }
1374
1375        trace(&|| format!(" result: {:?}", &result));
1376        Ok(result)
1377    }
1378
1379    /// Calculate the descendants of the given set.
1380    ///
1381    /// Logically equivalent to `range(set, all())`.
1382    ///
1383    /// This is O(flat segments), or O(merges).
1384    fn descendants(&self, set: IdSet) -> Result<IdSet> {
1385        debug!(target: "dag::algo::descendants", "descendants({:?})", &set);
1386        let roots = set;
1387        let result = self.descendants_intersection(&roots, &self.all()?)?;
1388        trace!(target: "dag::algo::descendants", " result: {:?}", &result);
1389        Ok(result)
1390    }
1391
1392    /// Calculate (descendants(roots) & ancestors).
1393    ///
1394    /// This is O(flat segments), or O(merges).
1395    ///
1396    /// `ancestors(ancestors)` must be equal to `ancestors`.
1397    fn descendants_intersection(&self, roots: &IdSet, ancestors: &IdSet) -> Result<IdSet> {
1398        fn trace(msg: &dyn Fn() -> String) {
1399            trace!(target: "dag::algo::descendants_intersection", "{}", msg());
1400        }
1401
1402        debug_assert_eq!(
1403            ancestors.count(),
1404            self.ancestors(ancestors.clone())?.count()
1405        );
1406
1407        // Filter out roots that are not reachable from `ancestors`.
1408        let roots = ancestors.intersection(roots);
1409        let min_root = match roots.min() {
1410            Some(id) => id,
1411            None => return Ok(IdSet::empty()),
1412        };
1413        let max_root = roots.max().unwrap();
1414
1415        // `result` could be initially `roots`. However that breaks ordering
1416        // (cannot use `result.push_span_asc` below).
1417        let mut result = IdSet::empty();
1418
1419        // For the master group, use linear scan for flat segments. This is
1420        // usually more efficient, because the master group usually only has 1
1421        // head, and most segments will be included.
1422        let master_max_id = ancestors
1423            .max()
1424            .unwrap_or(Id::MIN)
1425            .min(Group::MASTER.max_id());
1426        for seg in self.iter_segments_ascending(min_root, 0)? {
1427            let seg = seg?;
1428            let span = seg.span()?;
1429            if span.low > master_max_id {
1430                break;
1431            }
1432            trace(&|| format!(" visit {:?}", &seg));
1433            let parents = seg.parents()?;
1434            let low = if !parents.is_empty()
1435                && parents
1436                    .iter()
1437                    .any(|&p| result.contains(p) || roots.contains(p))
1438            {
1439                // Include `span.low` if any (span.low's) parent is in the result set.
1440                span.low
1441            } else {
1442                // No parents in `result` set.
1443                match result
1444                    .intersection_span_min(span)
1445                    .or_else(|| roots.intersection(&span.into()).min())
1446                {
1447                    // `span` intersect partially with `roots | result`.
1448                    Some(id) => id,
1449                    None => continue,
1450                }
1451            };
1452            if low > master_max_id {
1453                break;
1454            }
1455            let result_span = IdSpan::from(low..=span.high);
1456            trace(&|| format!("  push {:?}", &result_span));
1457            result.push_span_asc(result_span);
1458        }
1459
1460        // For the non-master group, only check flat segments covered by
1461        // `ancestors`.
1462        //
1463        // This is usually more efficient, because the non-master group can
1464        // have lots of heads (created in the past) that are no longer visible
1465        // or interesting. For a typical query like `x::y`, it might just select
1466        // a few heads in the non-master group. It's a waste of time to iterate
1467        // through lots of invisible segments.
1468        let non_master_spans = ancestors.intersection(
1469            &IdSpan::from(Group::NON_MASTER.min_id()..=Group::NON_MASTER.max_id()).into(),
1470        );
1471        // Visit in ascending order.
1472        let mut span_iter = non_master_spans.as_spans().iter().rev().cloned();
1473        let mut next_optional_span = span_iter.next();
1474        while let Some(next_span) = next_optional_span {
1475            // The "next_span" could be larger than a flat segment.
1476            let seg = match self.find_flat_segment_including_id(next_span.low)? {
1477                Some(seg) => seg,
1478                None => break,
1479            };
1480            let seg_span = seg.span()?;
1481            trace(&|| format!(" visit {:?} => {:?}", &next_span, &seg));
1482            // The overlap part of the flat segment and the span from 'ancestors'.
1483            let mut overlap_span =
1484                IdSpan::from(seg_span.low.max(next_span.low)..=seg_span.high.min(next_span.high));
1485            if roots.contains(overlap_span.low) {
1486                // Descendants includes 'overlap_span' since 'low' is in 'roots'.
1487                // (no need to check 'result' - it does not include anything in 'overlap')
1488                trace(&|| format!("  push {:?} (root contains low)", &overlap_span));
1489                result.push_span_asc(overlap_span);
1490            } else if next_span.low == seg_span.low {
1491                let parents = seg.parents()?;
1492                if !parents.is_empty()
1493                    && parents
1494                        .into_iter()
1495                        .any(|p| result.contains(p) || roots.contains(p))
1496                {
1497                    // Descendants includes 'overlap_span' since parents are in roots or result.
1498                    trace(&|| format!("  push {:?} (root contains parent)", &overlap_span));
1499                    result.push_span_asc(overlap_span);
1500                } else if overlap_span.low <= max_root && overlap_span.high >= min_root {
1501                    // If 'overlap_span' overlaps with roots, part of it should be in
1502                    // 'Descendants' result:
1503                    //
1504                    //            root1  root2
1505                    //               v    v
1506                    //    (low) |-- overlap-span --| (high)
1507                    //               |-------------|
1508                    //               push this part to result
1509                    let roots_intesection = roots.intersection(&overlap_span.into());
1510                    if let Some(id) = roots_intesection.min() {
1511                        overlap_span.low = id;
1512                        trace(&|| format!("  push {:?} (root in span)", &overlap_span));
1513                        result.push_span_asc(overlap_span);
1514                    }
1515                }
1516            } else {
1517                // This block practically does not happen if `ancestors` is
1518                // really "ancestors" (aka. `ancestors(ancestors)` is
1519                // `ancestors`), because `ancestors` will not include
1520                // a flat segment without including the segment's low id.
1521                //
1522                // But, in case it happens (because `ancestors` is weird),
1523                // do something sensible.
1524
1525                // `next_span.low - 1` is the parent of `next_span.low`,
1526                debug_assert!(
1527                    false,
1528                    "ancestors in descendants_intersection is
1529                              not real ancestors"
1530                );
1531                let p = next_span.low - 1;
1532                if result.contains(p) || roots.contains(p) {
1533                    trace(&|| format!("  push {:?} ({:?} was included)", &overlap_span, p));
1534                    result.push_span_asc(overlap_span);
1535                }
1536            }
1537            // Update next_optional_span.
1538            next_optional_span = IdSpan::try_from_bounds(overlap_span.high + 1..=next_span.high)
1539                .or_else(|| span_iter.next());
1540        }
1541
1542        trace(&|| format!(" intersect with {:?}", &ancestors));
1543        result = result.intersection(ancestors);
1544
1545        Ok(result)
1546    }
1547
1548    /// Find segments that cover the given `id_set` exactly.
1549    /// Returned segments are in DESC order.
1550    fn id_set_to_id_segments(&self, id_set: &IdSet) -> Result<VecDeque<IdSegment>> {
1551        let max_level = self.max_level()?;
1552        self.id_set_to_id_segments_with_max_level(id_set, max_level)
1553    }
1554
1555    /// Find lower level segments that cover the given `id_segment` exactly.
1556    /// Returned segments are in DESC order.
1557    fn id_segment_to_lower_level_id_segments(
1558        &self,
1559        id_segment: &IdSegment,
1560    ) -> Result<VecDeque<IdSegment>> {
1561        let max_level = match id_segment.level {
1562            0 => return Err(Programming(
1563                "id_segment_to_lower_level_id_segments() requires non-flat (level > 0) segments"
1564                    .to_string(),
1565            )),
1566            l => l - 1,
1567        };
1568        let span = IdSpan::new(id_segment.low, id_segment.high);
1569        let id_set = IdSet::from(span);
1570        self.id_set_to_id_segments_with_max_level(&id_set, max_level)
1571    }
1572
1573    /// Find segments with max level limitation that cover the given `id_set` exactly.
1574    /// Returned segments are in DESC order.
1575    fn id_set_to_id_segments_with_max_level(
1576        &self,
1577        id_set: &IdSet,
1578        max_level: Level,
1579    ) -> Result<VecDeque<IdSegment>> {
1580        fn trace(msg: &dyn Fn() -> String) {
1581            trace!(target: "dag::algo::tosegments", "{}", msg());
1582        }
1583        debug!(target: "dag::algo::tosegments", "id_set_to_id_segments({:?}, level={})", &id_set, max_level);
1584
1585        let max_level = max_level.min(self.max_level()?);
1586        let mut result = VecDeque::new();
1587        'next_span: for span in id_set.iter_span_desc() {
1588            trace(&|| format!(" visiting span {:?}", &span));
1589            let mut span: IdSpan = *span;
1590
1591            'current_span: loop {
1592                // Try high level segments.
1593                for level in (1..=max_level).rev() {
1594                    let seg = match self.find_segment_by_head_and_level(span.high, level)? {
1595                        None => continue,
1596                        Some(seg) => seg,
1597                    };
1598
1599                    let seg_span = seg.span()?;
1600                    debug_assert_eq!(seg_span.high, span.high);
1601
1602                    if seg_span.low < span.low {
1603                        trace(&|| format!("  skip  lv {} seg {:?}", level, &seg));
1604                        continue;
1605                    } else {
1606                        trace(&|| format!("  found lv {} seg {:?}", level, &seg));
1607                    }
1608
1609                    let id_seg = IdSegment {
1610                        high: seg_span.high,
1611                        low: seg_span.low,
1612                        parents: seg.parents()?,
1613                        level,
1614                        has_root: seg.has_root()?,
1615                    };
1616                    trace(&|| format!("  push {}..={}", id_seg.low, id_seg.high));
1617                    result.push_back(id_seg);
1618
1619                    if seg_span.low == span.low {
1620                        continue 'next_span;
1621                    } else {
1622                        span.high = seg_span.low - 1;
1623                        debug_assert!(span.high >= span.low);
1624                        continue 'current_span;
1625                    }
1626                }
1627
1628                // Query flat segments.
1629                let seg = match self.find_flat_segment_including_id(span.high)? {
1630                    None => return bug(format!("flat segments does not cover {:?}", span)),
1631                    Some(seg) => seg,
1632                };
1633                trace(&|| format!("  found flat seg {:?}", &seg));
1634
1635                let seg_span = seg.span()?;
1636                debug_assert!(seg_span.high >= span.high);
1637
1638                let (low, parents) = if seg_span.low < span.low {
1639                    (span.low, vec![span.low - 1])
1640                } else {
1641                    (seg_span.low, seg.parents()?)
1642                };
1643
1644                let has_root = parents.is_empty();
1645                let id_seg = IdSegment {
1646                    high: span.high,
1647                    low,
1648                    parents,
1649                    level: 0,
1650                    has_root,
1651                };
1652                trace(&|| format!("  push {}..={}", id_seg.low, id_seg.high));
1653                result.push_back(id_seg);
1654
1655                if low == span.low {
1656                    continue 'next_span;
1657                } else {
1658                    span.high = low - 1;
1659                    debug_assert!(span.high >= span.low);
1660                }
1661            }
1662        }
1663        Ok(result)
1664    }
1665}
1666
1667impl<S: IdDagStore> IdDagAlgorithm for S {}
1668
1669impl<Store: IdDagStore> Deref for IdDag<Store> {
1670    type Target = dyn IdDagAlgorithm;
1671
1672    fn deref(&self) -> &Self::Target {
1673        &self.store
1674    }
1675}
1676
1677// Full IdMap -> Sparse IdMap
1678impl<Store: IdDagStore> IdDag<Store> {
1679    /// Copy a subset of "Universal" mapping from `full_idmap` to
1680    /// `sparse_idmap`. See [`IdDag::universal`].
1681    pub async fn write_sparse_idmap<M: crate::idmap::IdMapWrite>(
1682        &self,
1683        full_idmap: &dyn crate::ops::IdConvert,
1684        sparse_idmap: &mut M,
1685    ) -> Result<()> {
1686        for id in self.universal_ids()? {
1687            let name = full_idmap.vertex_name(id).await?;
1688            sparse_idmap.insert(id, name.as_ref()).await?
1689        }
1690        Ok(())
1691    }
1692
1693    /// Return a subset of [`Id`]s that should be "Universal", including:
1694    ///
1695    /// - Heads of the master group.
1696    /// - Parents of merges (a merge is an id with multiple parents)
1697    ///   in the MASTER group.
1698    ///
1699    /// See also [`FirstAncestorConstraint::KnownUniversally`].
1700    ///
1701    /// Complexity: `O(flat segments)` for both time and space.
1702    pub fn universal_ids(&self) -> Result<BTreeSet<Id>> {
1703        let mut result = BTreeSet::new();
1704        for seg in self.next_segments(Id::MIN, 0)? {
1705            let parents = seg.parents()?;
1706            // Is it a merge?
1707            if parents.len() >= 2 {
1708                for id in parents {
1709                    debug_assert_eq!(id.group(), Group::MASTER);
1710                    result.insert(id);
1711                }
1712            }
1713        }
1714        for head in self.heads_ancestors(self.master_group()?)? {
1715            debug_assert_eq!(head.group(), Group::MASTER);
1716            result.insert(head);
1717        }
1718        Ok(result)
1719    }
1720}
1721
1722/// There are many `x~n`s that all resolves to a single commit.
1723/// Constraint about `x~n`.
1724#[derive(Clone)]
1725pub enum FirstAncestorConstraint {
1726    /// No constraints.
1727    None,
1728
1729    /// `x` and its slice is expected to be known both locally and remotely.
1730    ///
1731    /// Practically, this means `x` is either:
1732    /// - referred explicitly by `heads`.
1733    /// - a parent of a merge that is an ancestor of `heads`.
1734    ///   (a merge is a vertex with more than one parents)
1735    ///   (at clone and pull time, client gets a sparse idmap including them)
1736    ///
1737    /// This also enforces `x` to be part of `ancestors(heads)`.
1738    KnownUniversally { heads: IdSet },
1739}
1740
1741impl<Store: IdDagStore> IdDag<Store> {
1742    /// Export non-master DAG as parent_id_func on HashMap.
1743    ///
1744    /// This can be expensive if there are a lot of non-master ids.
1745    /// It is currently only used to rebuild non-master groups after
1746    /// id re-assignment.
1747    pub fn non_master_parent_ids(&self) -> Result<HashMap<Id, Vec<Id>>> {
1748        let mut parents = HashMap::new();
1749        let start = Group::NON_MASTER.min_id();
1750        for seg in self.next_segments(start, 0)? {
1751            let span = seg.span()?;
1752            parents.insert(span.low, seg.parents()?);
1753            for i in (span.low + 1).to(span.high) {
1754                parents.insert(i, vec![i - 1]);
1755            }
1756        }
1757        Ok(parents)
1758    }
1759
1760    /// Remove all non master Group identifiers from the DAG.
1761    pub fn remove_non_master(&mut self) -> Result<()> {
1762        // Non-append-only change. Use a new incompatible version.
1763        self.version = VerLink::new();
1764        self.store.remove_non_master()
1765    }
1766
1767    /// Remove `set` and their descendants. Return `descendents(set)`.
1768    ///
1769    /// The returned `descendants(set)` is usually used to remove
1770    /// related entries in the `IdMap` to keep the IdMap and IdDag
1771    /// in sync.
1772    pub(crate) fn strip(&mut self, set: IdSet) -> Result<IdSet> {
1773        fn trace(msg: &dyn Fn() -> String) {
1774            trace!(target: "dag::iddag::remove", "{}", msg());
1775        }
1776
1777        let set = self.descendants(set)?;
1778        trace(&|| format!("descendants(set) = {:?}", &set));
1779
1780        if set.is_empty() {
1781            return Ok(set);
1782        }
1783
1784        // [(segment, new_high)]
1785        let mut to_resize: Vec<(Segment, Option<Id>)> = Vec::new();
1786
1787        for span in set.iter_span_desc() {
1788            trace(&|| format!(" visiting span {:?}", &span));
1789
1790            // span:   (low)    [------------] (high)
1791            // [seg]:       [-------][---][--]
1792            // new_seg:     [--]
1793            for seg in self.iter_segments_descending(span.high, 0)? {
1794                let seg = seg?;
1795                let seg_span = seg.span()?;
1796                debug_assert!(seg_span.high <= span.high); // by iter_segments_descending
1797                if seg_span.high < span.low {
1798                    break;
1799                }
1800
1801                let new_high = if seg_span.low < span.low {
1802                    let new_high = span.low - 1;
1803                    trace(&|| format!("  truncate flat seg {:?} at {}", &seg, new_high));
1804                    Some(new_high)
1805                } else {
1806                    trace(&|| format!("  remove flat seg {:?}", &seg));
1807                    None
1808                };
1809                // It's not possible to truncate a segment twice iterating different `span`s.
1810                // seg:     [------------]
1811                // [span]:     [---]  [--] <- not possible
1812                // This is because "descendants" are selected. If `i` is in the `set`, and `i`
1813                // is in a flat `seg` then `i..=seg.high` should all be in the `set` to remove.
1814                debug_assert!(to_resize.iter().all(|(s, _)| s != &seg));
1815                to_resize.push((seg, new_high));
1816            }
1817        }
1818
1819        // Notes about why we can keep existing SegmentFlags when resizing:
1820        //
1821        // There are only 2 flags: HAS_ROOT, and ONLY_HEAD.
1822        // - HAS_ROOT flag can be preserved. It's based on `parents.is_empty()`
1823        //   which is not changed by resizing.
1824        // - ONLY_HEAD flag can be perserved. Suppose the current flat segment
1825        //   has ONLY_HEAD set (i.e. heads(0:high) = [high]) and is being truncated
1826        //   from low..=high to low..=mid:
1827        //
1828        //          |<-new seg->|<-span (to remove)->|
1829        //          |<-------seg-------->|
1830        //          low-------mid-----high
1831        //
1832        //   descendants(set) is noted as `X` for removal. Now we prove that all
1833        //   `X`s must be > mid. Suppose there is an X that is <= mid, X cannot
1834        //   be in the low--high flat segment otherwise mid will be removed.
1835        //   X must be an ancestor of "high" by ONLY_HEAD definition (otherwise
1836        //   there will be more than 1 heads in 0:high). So there must be a path
1837        //   from X to high but the path does not go through mid. The graph looks
1838        //   like:
1839        //
1840        //          low-------mid--Y--high   (a flat segment)
1841        //                        /
1842        //              X--...----           (X:: are to be removed)
1843        //
1844        //   The problem is that Y, min(X::high & mid::high), is a merge. It
1845        //   contradicts flat segment defination that only "low" can be a merge.
1846        //
1847        //   Therefore X (to be removed) must be > mid. Nothing in 0:mid is removed.
1848        //   Because low..=high is a flat segment, removing `(mid+1):high`
1849        //   is removing a linear sub-graph and cannot increase number of heads.
1850        //   Therefore the new truncated segment low..=mid can still have the
1851        //   ONLY_HEAD flag.
1852
1853        for (seg, new_high) in to_resize {
1854            self.store.resize_flat_segment(&seg, new_high)?;
1855        }
1856
1857        // strip() is not an append-only change. Use an incompatible version.
1858        self.version = VerLink::new();
1859
1860        Ok(set)
1861    }
1862}
1863
1864impl<Store: Persist> Persist for IdDag<Store> {
1865    type Lock = <Store as Persist>::Lock;
1866
1867    fn lock(&mut self) -> Result<Self::Lock> {
1868        self.store.lock()
1869    }
1870
1871    fn reload(&mut self, lock: &Self::Lock) -> Result<()> {
1872        self.store.reload(lock)
1873    }
1874
1875    fn persist(&mut self, lock: &Self::Lock) -> Result<()> {
1876        self.store.persist(lock)
1877    }
1878}
1879
1880impl<Store: IdDagStore> Debug for IdDag<Store> {
1881    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1882        let mut first = true;
1883        for level in 0..=self.max_level().unwrap_or_default() {
1884            if !first {
1885                write!(f, "\n")?;
1886            }
1887            first = false;
1888            write!(f, "Lv{}:", level)?;
1889
1890            for group in Group::ALL.iter() {
1891                let segments = self.next_segments(group.min_id(), level).unwrap();
1892                if !segments.is_empty() {
1893                    for segment in segments {
1894                        write!(f, " {:?}", segment)?;
1895                    }
1896                }
1897            }
1898        }
1899        Ok(())
1900    }
1901}
1902
1903/// Lazily answer `any(...)`, `all(...)`.
1904struct LazyPredicate<P> {
1905    ids: Vec<Id>,
1906    predicate: P,
1907    true_count: usize,
1908    false_count: usize,
1909}
1910
1911impl<P: Fn(Id) -> Result<bool>> LazyPredicate<P> {
1912    pub fn new(ids: Vec<Id>, predicate: P) -> Self {
1913        Self {
1914            ids,
1915            predicate,
1916            true_count: 0,
1917            false_count: 0,
1918        }
1919    }
1920
1921    pub fn any(&mut self) -> Result<bool> {
1922        loop {
1923            if self.true_count > 0 {
1924                return Ok(true);
1925            }
1926            if self.true_count + self.false_count == self.ids.len() {
1927                return Ok(false);
1928            }
1929            self.test_one()?;
1930        }
1931    }
1932
1933    pub fn all(&mut self) -> Result<bool> {
1934        loop {
1935            if self.true_count == self.ids.len() {
1936                return Ok(true);
1937            }
1938            if self.false_count > 0 {
1939                return Ok(false);
1940            }
1941            self.test_one()?;
1942        }
1943    }
1944
1945    fn test_one(&mut self) -> Result<()> {
1946        let i = self.true_count + self.false_count;
1947        if (self.predicate)(self.ids[i])? {
1948            self.true_count += 1;
1949        } else {
1950            self.false_count += 1;
1951        }
1952        Ok(())
1953    }
1954}
1955
1956fn default_seg_size() -> usize {
1957    DEFAULT_SEG_SIZE.load(Ordering::Acquire)
1958}
1959
1960/// Update global default segment size. The segment size affects high-level
1961/// segments. A level N segment covers at most "segment size" level N-1
1962/// segments.
1963///
1964/// Setting this to a smaller number like 3 can help demonstrate the concept
1965/// using smaller graphs. Panic if `new_size` is less than 2.
1966///
1967/// Existing IdDags are not affected.
1968pub fn set_default_seg_size(new_size: usize) {
1969    DEFAULT_SEG_SIZE.store(new_size, Ordering::Release);
1970}
1971
1972#[cfg(test)]
1973mod tests {
1974    use tempfile::tempdir;
1975
1976    use super::*;
1977    use crate::iddagstore::tests::dump_store_state;
1978
1979    #[test]
1980    fn test_segment_basic_lookups() {
1981        let dir = tempdir().unwrap();
1982        let mut dag = IdDag::open(dir.path()).unwrap();
1983        assert_eq!(dag.all().unwrap().count(), 0);
1984
1985        let flags = SegmentFlags::empty();
1986
1987        dag.insert(flags, 0, Id::MIN, Id(50), &vec![]).unwrap();
1988        assert_eq!(dag.all().unwrap().max(), Some(Id(50)));
1989
1990        dag.insert(flags, 0, Id(51), Id(100), &vec![Id(50)])
1991            .unwrap();
1992        assert_eq!(dag.all().unwrap().max(), Some(Id(100)));
1993
1994        dag.insert(flags, 0, Id(101), Id(150), &vec![]).unwrap();
1995        assert_eq!(dag.all().unwrap().max(), Some(Id(150)));
1996
1997        dag.insert(flags, 1, Id::MIN, Id(150), &vec![]).unwrap();
1998        assert_eq!(dag.all().unwrap().max(), Some(Id(150)));
1999
2000        // Helper functions to make the below lines shorter.
2001        let low_by_head = |head, level| match dag.find_segment_by_head_and_level(Id(head), level) {
2002            Ok(Some(seg)) => seg.span().unwrap().low.0 as i64,
2003            Ok(None) => -1,
2004            _ => panic!("unexpected error"),
2005        };
2006
2007        let low_by_id = |id| match dag.find_flat_segment_including_id(Id(id)) {
2008            Ok(Some(seg)) => seg.span().unwrap().low.0 as i64,
2009            Ok(None) => -1,
2010            _ => panic!("unexpected error"),
2011        };
2012
2013        assert_eq!(low_by_head(0, 0), -1);
2014        assert_eq!(low_by_head(49, 0), -1);
2015        assert_eq!(low_by_head(50, 0), -1); // 0..=50 is merged into 0..=100.
2016        assert_eq!(low_by_head(51, 0), -1);
2017        assert_eq!(low_by_head(150, 0), 101);
2018        assert_eq!(low_by_head(100, 1), -1);
2019
2020        assert_eq!(low_by_id(0), 0);
2021        assert_eq!(low_by_id(30), 0);
2022        assert_eq!(low_by_id(49), 0);
2023        assert_eq!(low_by_id(50), 0);
2024        assert_eq!(low_by_id(51), 0);
2025        assert_eq!(low_by_id(52), 0);
2026        assert_eq!(low_by_id(99), 0);
2027        assert_eq!(low_by_id(100), 0);
2028        assert_eq!(low_by_id(101), 101);
2029        assert_eq!(low_by_id(102), 101);
2030        assert_eq!(low_by_id(149), 101);
2031        assert_eq!(low_by_id(150), 101);
2032        assert_eq!(low_by_id(151), -1);
2033    }
2034
2035    fn get_parents(id: Id) -> Result<Vec<Id>> {
2036        match id.0 {
2037            0 | 1 | 2 => Ok(Vec::new()),
2038            _ => Ok(vec![id - 1, Id(id.0 / 2)]),
2039        }
2040    }
2041
2042    #[test]
2043    fn test_sync_reload() {
2044        let dir = tempdir().unwrap();
2045        let mut dag = IdDag::open(dir.path()).unwrap();
2046        assert_eq!(dag.all().unwrap().max(), None);
2047
2048        let lock = dag.lock().unwrap();
2049        dag.reload(&lock).unwrap();
2050        dag.build_segments(Id(0), &get_parents).unwrap();
2051        dag.build_segments(Id(1001), &get_parents).unwrap();
2052
2053        dag.persist(&lock).unwrap();
2054        drop(lock);
2055
2056        assert_eq!(dag.max_level().unwrap(), 3);
2057        assert_eq!(
2058            dag.children(Id(1000).into())
2059                .unwrap()
2060                .iter_desc()
2061                .collect::<Vec<Id>>(),
2062            vec![Id(1001)]
2063        );
2064    }
2065
2066    #[test]
2067    fn test_all() {
2068        let dir = tempdir().unwrap();
2069        let mut dag = IdDag::open(dir.path()).unwrap();
2070        assert!(dag.all().unwrap().is_empty());
2071        dag.build_segments(Id(1001), &get_parents).unwrap();
2072        let all = dag.all().unwrap();
2073        // Id 0 is not referred.
2074        assert_eq!(format!("{:?}", &all), "1..=1001");
2075        assert_eq!(all.count(), 1001);
2076
2077        // Insert discontinuous segments.
2078        let mut dag = IdDag::new_in_process();
2079        // 10..=20
2080        dag.build_segments(Id(20), &|p| {
2081            Ok(if p > Id(10) { vec![p - 1] } else { vec![] })
2082        })
2083        .unwrap();
2084        // 3..=5, and 30..=40
2085        dag.build_segments(Id(40), &|p| {
2086            Ok(if p == Id(30) {
2087                vec![Id(5), Id(20)]
2088            } else if p > Id(3) {
2089                vec![p - 1]
2090            } else {
2091                vec![]
2092            })
2093        })
2094        .unwrap();
2095        let all = dag.all().unwrap();
2096        assert_eq!(format!("{:?}", &all), "3 4 5 10..=20 30..=40");
2097        assert_eq!(all.count(), 25);
2098    }
2099
2100    #[test]
2101    fn test_flat_segments() {
2102        let dir = tempdir().unwrap();
2103        let test_dir = tempdir().unwrap();
2104        let mut dag = IdDag::open(dir.path()).unwrap();
2105        let mut test_dag = IdDag::open(test_dir.path()).unwrap();
2106
2107        let empty_dag_segments = dag.flat_segments(Group::MASTER).unwrap();
2108        test_dag
2109            .build_segments_from_prepared_flat_segments(&empty_dag_segments)
2110            .unwrap();
2111        assert!(test_dag.all().unwrap().is_empty());
2112
2113        dag.build_segments(Id(0), &get_parents).unwrap();
2114        dag.build_segments(Id(1001), &get_parents).unwrap();
2115        let flat_segments = dag.flat_segments(Group::MASTER).unwrap();
2116        test_dag
2117            .build_segments_from_prepared_flat_segments(&flat_segments)
2118            .unwrap();
2119
2120        assert_eq!(test_dag.max_level().unwrap(), 3);
2121        assert_eq!(test_dag.all().unwrap().count(), 1002);
2122
2123        let subset_flat_segments = dag
2124            .idset_to_flat_segments(IdSet::from_spans(vec![2..=4]))
2125            .unwrap();
2126        assert_eq!(subset_flat_segments.segments.len(), 3);
2127    }
2128
2129    #[test]
2130    fn test_discontinous_flat_segment_only_head() {
2131        let prepared = PreparedFlatSegments {
2132            segments: vec![
2133                FlatSegment {
2134                    low: Id(0),
2135                    high: Id(10),
2136                    parents: vec![],
2137                },
2138                FlatSegment {
2139                    low: Id(20),
2140                    high: Id(30),
2141                    parents: vec![Id(10)],
2142                },
2143            ]
2144            .into_iter()
2145            .collect(),
2146        };
2147
2148        // Segment 20..=30 shouldn't have the "ONLY_HEAD" flag because of the gap.
2149        // In debug output it does not have "H" prefix.
2150        let mut dag = IdDag::new_in_process();
2151        dag.build_segments_from_prepared_flat_segments(&prepared)
2152            .unwrap();
2153        let iter = dag.iter_segments_ascending(Id(0), 0).unwrap();
2154        assert_eq!(dbg_iter(iter), "[RH0-10[], 20-30[10]]");
2155    }
2156
2157    #[test]
2158    fn test_roots_max_level_empty() {
2159        // Create segments in a way that the highest level
2160        // contains no segments in the master group.
2161        let mut iddag = IdDag::new_in_process();
2162        let mut prepared = PreparedFlatSegments {
2163            segments: vec![
2164                FlatSegment {
2165                    low: Id(0),
2166                    high: Id(10),
2167                    parents: vec![],
2168                },
2169                FlatSegment {
2170                    low: nid(0),
2171                    high: nid(4),
2172                    parents: vec![],
2173                },
2174            ]
2175            .into_iter()
2176            .collect(),
2177        };
2178        for i in 1..=3 {
2179            prepared.segments.insert(FlatSegment {
2180                low: nid(5 * i),
2181                high: nid(5 * i + 1),
2182                parents: vec![],
2183            });
2184            prepared.segments.insert(FlatSegment {
2185                low: nid(5 * i + 2),
2186                high: nid(5 * i + 4),
2187                parents: vec![nid(5 * i + 1), nid(5 * i - 1)],
2188            });
2189        }
2190        iddag.set_new_segment_size(2);
2191        iddag
2192            .build_segments_from_prepared_flat_segments(&prepared)
2193            .unwrap();
2194
2195        // The highest level is not 0. But the highest level exist in the non-master
2196        // group, not the master group.
2197        assert_eq!(iddag.max_level().unwrap(), 2);
2198
2199        let all = iddag.all().unwrap();
2200        assert_eq!(format!("{:?}", &all), "0..=10 N0..=N19");
2201
2202        let roots = iddag.roots(all).unwrap();
2203        assert_eq!(format!("{:?}", roots), "0 N0 N5 N10 N15");
2204    }
2205
2206    #[test]
2207    fn test_strip() {
2208        let mut iddag = IdDag::new_in_process();
2209        let mut prepared = PreparedFlatSegments::default();
2210        prepared.segments.insert(FlatSegment {
2211            low: Id(0),
2212            high: Id(100),
2213            parents: Vec::new(),
2214        });
2215        prepared.segments.insert(FlatSegment {
2216            low: Id(101),
2217            high: Id(200),
2218            parents: vec![Id(50)],
2219        });
2220        prepared.segments.insert(FlatSegment {
2221            low: Id(201),
2222            high: Id(300),
2223            parents: vec![Id(90)],
2224        });
2225        prepared.segments.insert(FlatSegment {
2226            low: nid(0),
2227            high: nid(100),
2228            parents: vec![Id(50), Id(250)],
2229        });
2230        prepared.segments.insert(FlatSegment {
2231            low: nid(101),
2232            high: nid(200),
2233            parents: vec![Id(50)],
2234        });
2235        iddag.set_new_segment_size(2);
2236        iddag
2237            .build_segments_from_prepared_flat_segments(&prepared)
2238            .unwrap();
2239        let all_before_remove = iddag.all().unwrap();
2240        let removed = iddag.strip(Id(70).into()).unwrap();
2241        let all_after_remove = iddag.all().unwrap();
2242        assert_eq!(
2243            all_before_remove.difference(&removed).as_spans(),
2244            all_after_remove.as_spans()
2245        );
2246        assert_eq!(
2247            all_after_remove.union(&removed).as_spans(),
2248            all_before_remove.as_spans()
2249        );
2250        assert_eq!(format!("{:?}", &removed), "70..=100 201..=300 N0..=N100");
2251        assert_eq!(
2252            dump_store_state(&iddag.store, &all_before_remove),
2253            "\nLv0: RH0-69[], 101-200[50], N101-N200[50]\nP->C: 50->101, 50->N101"
2254        );
2255    }
2256
2257    #[test]
2258    fn test_id_set_to_id_segments() {
2259        let mut iddag = IdDag::new_in_process();
2260
2261        // Insert some segments. Create a few levels.
2262        let mut prepared = PreparedFlatSegments::default();
2263        for g in &Group::ALL {
2264            let mut parents = vec![];
2265            for i in 0..=3 {
2266                let base = g.min_id() + 10 * i;
2267                prepared.segments.insert(FlatSegment {
2268                    low: base,
2269                    high: base + 4,
2270                    parents: parents.clone(),
2271                });
2272                prepared.segments.insert(FlatSegment {
2273                    low: base + 5,
2274                    high: base + 9,
2275                    parents: vec![base + 1, base + 4],
2276                });
2277                parents.push(base + 9);
2278            }
2279        }
2280        iddag.set_new_segment_size(2);
2281        iddag
2282            .build_segments_from_prepared_flat_segments(&prepared)
2283            .unwrap();
2284        // The highest level is not 0.
2285        assert_eq!(iddag.max_level().unwrap(), 2);
2286
2287        // Tests about id_set_to_id_segments_with_max_level.
2288        let t = |id_set, level| -> Vec<String> {
2289            let id_segs = iddag
2290                .id_set_to_id_segments_with_max_level(&id_set, level)
2291                .unwrap();
2292
2293            // Verify against other special-case APIs.
2294            if level >= iddag.max_level().unwrap() {
2295                let id_segs2 = iddag.id_set_to_id_segments(&id_set).unwrap();
2296                assert_eq!(&id_segs, &id_segs2);
2297            }
2298            if level == 0 {
2299                let flat_segments = iddag.idset_to_flat_segments(id_set).unwrap().segments;
2300                let id_segs2: Vec<IdSegment> = flat_segments
2301                    .into_iter()
2302                    .rev()
2303                    .map(|f| IdSegment {
2304                        high: f.high,
2305                        low: f.low,
2306                        parents: f.parents.clone(),
2307                        level: 0,
2308                        has_root: f.parents.is_empty(),
2309                    })
2310                    .collect();
2311                assert_eq!(&id_segs, &id_segs2);
2312            }
2313
2314            id_segs.into_iter().map(dbg).collect::<Vec<_>>()
2315        };
2316
2317        // Match a flat segment.
2318        assert_eq!(t(IdSet::from_spans(vec![10..=14]), 3), ["L0 10..=14 [9]"]);
2319
2320        // Match multiple flat segments.
2321        assert_eq!(
2322            t(IdSet::from_spans(vec![20..=29]), 3),
2323            ["L0 25..=29 [21, 24]", "L0 20..=24 [9, 19]"]
2324        );
2325
2326        // Match partial flat segments.
2327        assert_eq!(
2328            t(IdSet::from_spans(vec![21..=28]), 3),
2329            ["L0 25..=28 [21, 24]", "L0 21..=24 [20]"]
2330        );
2331
2332        // Match a high level segment.
2333        assert_eq!(t(IdSet::from_spans(vec![0..=24]), 3), ["L2 0..=24 []R"]);
2334
2335        // Respect max_level.
2336        assert_eq!(
2337            t(IdSet::from_spans(vec![0..=24]), 1),
2338            ["L1 15..=24 [11, 14, 9]", "L1 0..=14 []R"]
2339        );
2340
2341        // Match various segments. Pick the highest level possible.
2342        assert_eq!(
2343            t(IdSet::from_spans(vec![Id(0)..=Id(39), nid(0)..=nid(39)]), 3),
2344            [
2345                "L0 N35..=N39 [N31, N34]",
2346                "L0 N30..=N34 [N9, N19, N29]",
2347                "L1 N20..=N29 [N9, N19]",
2348                "L2 N0..=N19 []R",
2349                "L0 35..=39 [31, 34]",
2350                "L1 25..=34 [21, 24, 9, 19]",
2351                "L2 0..=24 []R"
2352            ]
2353        );
2354
2355        // Related APIs not covered by tests above.
2356        let high_level_id_seg = iddag
2357            .id_set_to_id_segments(&IdSet::from_spans(vec![0..=39]))
2358            .unwrap()
2359            .back()
2360            .unwrap()
2361            .clone();
2362        assert_eq!(format!("{:?}", &high_level_id_seg), "L2 0..=24 []R");
2363        let low_level_id_segs = iddag
2364            .id_segment_to_lower_level_id_segments(&high_level_id_seg)
2365            .unwrap();
2366        assert_eq!(
2367            format!("{:?}", &low_level_id_segs),
2368            "[L1 15..=24 [11, 14, 9], L1 0..=14 []R]"
2369        );
2370    }
2371
2372    fn dbg_iter<'a, T: std::fmt::Debug>(iter: Box<dyn Iterator<Item = Result<T>> + 'a>) -> String {
2373        let v = iter.map(|s| s.unwrap()).collect::<Vec<_>>();
2374        dbg(v)
2375    }
2376
2377    fn dbg<T: std::fmt::Debug>(t: T) -> String {
2378        format!("{:?}", t)
2379    }
2380
2381    fn nid(i: u64) -> Id {
2382        Group::NON_MASTER.min_id() + i
2383    }
2384}