Skip to main content

dag/
iddag.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8use std::collections::BTreeSet;
9use std::collections::BinaryHeap;
10use std::collections::VecDeque;
11use std::fmt;
12use std::fmt::Debug;
13use std::fmt::Formatter;
14use std::ops::Deref;
15#[cfg(any(test, feature = "indexedlog-backend"))]
16use std::path::Path;
17use std::sync::atomic::AtomicUsize;
18use std::sync::atomic::Ordering;
19
20use indexmap::set::IndexSet;
21use serde::Deserialize;
22use serde::Serialize;
23use tracing::debug;
24use tracing::trace;
25
26use crate::errors::bug;
27use crate::errors::NotFoundError;
28use crate::id::Group;
29use crate::id::Id;
30use crate::iddagstore::IdDagStore;
31#[cfg(any(test, feature = "indexedlog-backend"))]
32use crate::iddagstore::IndexedLogStore;
33use crate::iddagstore::MemStore;
34use crate::idset;
35use crate::idset::Span;
36use crate::ops::Persist;
37use crate::ops::StorageVersion;
38#[cfg(any(test, feature = "indexedlog-backend"))]
39use crate::ops::TryClone;
40use crate::segment::FlatSegment;
41use crate::segment::PreparedFlatSegments;
42use crate::segment::Segment;
43use crate::segment::SegmentFlags;
44use crate::types_ext::PreparedFlatSegmentsExt;
45use crate::Error::Programming;
46use crate::IdSegment;
47use crate::IdSet;
48use crate::IdSpan;
49use crate::Level;
50use crate::Result;
51use crate::VerLink;
52
53/// Structure to store a DAG of integers, with indexes to speed up ancestry queries.
54///
55/// A segment is defined as `(level: int, low: int, high: int, parents: [int])` on
56/// a topo-sorted integer DAG. It covers all integers in `low..=high` range, and
57/// must satisfy:
58/// - `high` is the *only* head in the sub DAG covered by the segment.
59/// - `parents` do not have entries within `low..=high` range.
60/// - If `level` is 0, for any integer `x` in `low+1..=high` range, `x`'s parents
61///   must be `x - 1`.
62///
63/// See `slides/201904-segmented-changelog/segmented-changelog.pdf` for pretty
64/// graphs about how segments help with ancestry queries.
65///
66/// [`IdDag`] is often used together with [`IdMap`] to allow customized names
67/// on vertexes. The [`Dag`] type provides an easy-to-use interface to
68/// keep [`IdDag`] and [`IdMap`] in sync.
69#[derive(Clone, Serialize, Deserialize)]
70pub struct IdDag<Store> {
71    pub(crate) store: Store,
72    #[serde(skip, default = "default_seg_size")]
73    new_seg_size: usize,
74    #[serde(skip, default = "VerLink::new")]
75    version: VerLink,
76}
77
78/// See benches/segment_sizes.rs (D16660078) for this choice.
79const DEFAULT_SEG_SIZE: AtomicUsize = AtomicUsize::new(16);
80
81/// Maximum meaningful level. 4 is chosen because it is good enough
82/// for an existing large repo (level 5 is not built because it
83/// cannot merge level 4 segments).
84const MAX_MEANINGFUL_LEVEL: Level = 4;
85
86#[cfg(any(test, feature = "indexedlog-backend"))]
87impl IdDag<IndexedLogStore> {
88    /// Open [`IdDag`] at the given directory. Create it on demand.
89    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
90        let store = IndexedLogStore::open(path)?;
91        Self::open_from_store(store)
92    }
93}
94
95impl<S> IdDag<S> {
96    /// Set the maximum size of a new high-level segment.
97    ///
98    /// This does not affect existing segments.
99    ///
100    /// This might help performance a bit for certain rare types of DAGs.
101    /// The default value is Usually good enough.
102    pub fn set_new_segment_size(&mut self, size: usize) {
103        self.new_seg_size = size.max(2);
104    }
105
106    /// Get the segment size used for building new high-level segments.
107    pub(crate) fn get_new_segment_size(&self) -> usize {
108        self.new_seg_size
109    }
110}
111
112#[cfg(any(test, feature = "indexedlog-backend"))]
113impl TryClone for IdDag<IndexedLogStore> {
114    /// Attempt to clone the `IdDag`.
115    fn try_clone(&self) -> Result<Self> {
116        let store = self.store.try_clone()?;
117        Ok(Self {
118            store,
119            new_seg_size: self.new_seg_size,
120            version: self.version.clone(),
121        })
122    }
123}
124
125impl IdDag<MemStore> {
126    /// Instantiate an [`IdDag`] that stores all it's data in process. Useful for scenarios that
127    /// do not require data persistance.
128    pub fn new_in_memory() -> Self {
129        let store = MemStore::new();
130        Self {
131            store,
132            new_seg_size: default_seg_size(),
133            version: VerLink::new(),
134        }
135    }
136}
137
138impl<Store: IdDagStore> IdDag<Store> {
139    pub(crate) fn open_from_store(store: Store) -> Result<Self> {
140        let version = store.verlink();
141        let dag = Self {
142            store,
143            new_seg_size: default_seg_size(),
144            version,
145        };
146        Ok(dag)
147    }
148}
149
150impl<Store: IdDagStore> IdDag<Store> {
151    /// Add a new segment.
152    ///
153    /// For simplicity, it does not check if the new segment overlaps with
154    /// an existing segment (which is a logic error). Those checks can be
155    /// offline.
156    pub(crate) fn insert(
157        &mut self,
158        flags: SegmentFlags,
159        level: Level,
160        low: Id,
161        high: Id,
162        parents: &[Id],
163    ) -> Result<()> {
164        if !low.is_virtual() {
165            self.version.bump();
166        }
167        self.store.insert(flags, level, low, high, parents)
168    }
169
170    /// Returns whether the iddag contains segments for the given `id`.
171    pub fn contains_id(&self, id: Id) -> Result<bool> {
172        Ok(self.all_with_virtual()?.contains(id))
173    }
174
175    pub(crate) fn version(&self) -> &VerLink {
176        &self.version
177    }
178}
179
180// Build segments.
181impl<Store: IdDagStore> IdDag<Store> {
182    /// Make sure the [`IdDag`] contains the given id (and all its ancestor ids)
183    /// by building up segments on demand.
184    ///
185    /// `get_parents` describes the DAG. Its input and output are `Id`s.
186    ///
187    /// This is often used together with [`crate::idmap::IdMap`].
188    ///
189    /// This is inefficient. If you have `PreparedFlatSegments`, call
190    /// `build_flat_segments_from_prepared_flat_segments` instead.
191    pub fn build_segments<F>(&mut self, high: Id, get_parents: &F) -> Result<usize>
192    where
193        F: Fn(Id) -> Result<Vec<Id>>,
194    {
195        // Step 1, figure out id ranges to be inserted using DFS.
196        // We cannot call `push_edge` like step 2 here, because the
197        // `id`s are in DESC order. `push_edge` really need ASC order.
198        let old_ids = self.all_ids_in_segment_level(0)?;
199        let mut to_insert_ids = IdSet::empty();
200        let mut to_visit = vec![high];
201        while let Some(id) = to_visit.pop() {
202            if old_ids.contains(id) || to_insert_ids.contains(id) {
203                continue;
204            }
205            to_insert_ids.push(id);
206            let parent_ids = get_parents(id)?;
207            to_visit.extend_from_slice(&parent_ids);
208        }
209
210        // Step 2, convert the to_insert_ids to PreparedFlatSegments.
211        // Iterate them in ascending order so `push_edge` works efficiently.
212        let mut prepared = PreparedFlatSegments::default();
213        for id in to_insert_ids.iter_asc() {
214            let parent_ids = get_parents(id)?;
215            prepared.push_edge(id, &parent_ids);
216        }
217
218        // Step 3, call build_segments_from_prepared_flat_segments.
219        self.build_segments_from_prepared_flat_segments(&prepared)
220    }
221
222    /// Similar to `build_segments`, but takes `PreparedFlatSegments` instead
223    /// of `get_parents`.
224    pub fn build_segments_from_prepared_flat_segments(
225        &mut self,
226        outcome: &PreparedFlatSegments,
227    ) -> Result<usize> {
228        let (count, set) = self.build_flat_segments_from_prepared_flat_segments(outcome)?;
229        let count = count + self.build_all_high_level_segments(Level::MAX, set)?;
230        Ok(count)
231    }
232
233    /// Build flat segments using the outcome from `add_head`.
234    /// This is not public because it does not keep high-level segments in sync.
235    ///
236    /// Return `(count, set)`. Number of segments inserted, and `IdSet` covered
237    /// by inserted segments.
238    fn build_flat_segments_from_prepared_flat_segments(
239        &mut self,
240        outcome: &PreparedFlatSegments,
241    ) -> Result<(usize, IdSet)> {
242        let mut inserted_id_set = IdSet::empty();
243        if outcome.segments.is_empty() {
244            return Ok((0, inserted_id_set));
245        }
246
247        let mut head_ids: BTreeSet<Id> = self.heads(self.master_group()?)?.iter_desc().collect();
248        let mut covered = self.all_ids_in_groups(&[Group::MASTER])?;
249        let mut get_flags = |parents: &[Id], head: Id, covered: &IdSet| {
250            let mut flags = SegmentFlags::empty();
251            if parents.is_empty() {
252                flags |= SegmentFlags::HAS_ROOT
253            }
254            if head.group() == Group::MASTER {
255                for p in parents.iter() {
256                    head_ids.remove(p);
257                }
258                // ONLY_HEAD means it heads(0..=head) = [head], or ancestors([head]) = 0..=head.
259                // The 0..=head range cannot have gaps. Because other segments
260                // might be inserted to the gaps later and they will have heads.
261                let has_no_gap = match covered.iter_span_asc().next() {
262                    Some(span) => span.contains(head),
263                    None => false,
264                };
265                if has_no_gap && head_ids.range(..=head).next().is_none() {
266                    flags |= SegmentFlags::ONLY_HEAD;
267                }
268                head_ids.insert(head);
269            }
270            flags
271        };
272        let mut last_high = None;
273        for seg in &outcome.segments {
274            if let Some(last_high) = last_high {
275                if last_high >= seg.low {
276                    return bug(format!(
277                        "PreparedFlatSegments are not sorted: {:?}",
278                        &outcome.segments
279                    ));
280                }
281            }
282            last_high = Some(seg.high);
283            covered.push(seg.low..=seg.high);
284
285            let flags = get_flags(&seg.parents, seg.high, &covered);
286            tracing::trace!(
287                "inserting flat segment {}..={} {:?} {:?}",
288                seg.low,
289                seg.high,
290                &seg.parents,
291                &flags
292            );
293            inserted_id_set.push(seg.low..=seg.high);
294            self.insert(flags, 0, seg.low, seg.high, &seg.parents)?;
295        }
296        Ok((outcome.segments.len(), inserted_id_set))
297    }
298
299    /// Incrementally build high level segments at the given `level`.
300    ///
301    /// The new, high level segments are built on top of the lower level
302    /// (`level - 1`) segments. Each high level segment covers at most `size`
303    /// `level - 1` segments.
304    ///
305    /// The last segment per level per group is dropped because it's likely to
306    /// be incomplete. This helps reduce fragmentation, and also allows the last
307    /// flat segment per group to be mutable, because it will not be included
308    /// in Level 1 segments.
309    ///
310    /// `inserted_lower_level_id_set` specifies newly inserted segments at the
311    /// lower level.
312    ///
313    /// Return `(count, set)`. `count` is the number of segments inserted.
314    /// `set` is an `IdSet` that covers ranges of segments inserted.
315    fn build_high_level_segments(
316        &mut self,
317        level: Level,
318        inserted_lower_level_id_set: &IdSet,
319    ) -> Result<(usize, IdSet)> {
320        // Exclude VIRTUAL spans.
321        let inserted_lower_level_id_set = inserted_lower_level_id_set
322            .difference(&Span::new(Group::VIRTUAL.min_id(), Group::VIRTUAL.max_id()).into());
323        let mut inserted_id_set = IdSet::empty();
324        if level == 0 {
325            // Do nothing. Level 0 is not considered high level.
326            return Ok((0, inserted_id_set));
327        }
328        let size = self.new_seg_size;
329
330        // Figure out lower level segments to consider.
331        //
332        // Legend:
333        //  [...] existing (one or more) segments
334        //  [N] newly inserted (in inserted_lower_level_id_set)
335        //
336        // Flat segments:          [..............] gap [.......] gap [...........]
337        // Lower level segments:   [.......][ N ]       [.....]       [....][ N ]
338        // This level segments:    [.....]              [...]         [..]
339        //   missing:                     [     ]            []           [     ]
340        //   need_consider:               [     ]                         [     ]
341        //                                                    ^
342        //                   no need to consdier because it does not have [N]
343
344        let missing = self
345            .all_ids_in_segment_level(level - 1)?
346            .difference(&self.all_ids_in_segment_level(level)?);
347        let need_consider = IdSet::from_sorted_spans(
348            missing
349                .as_spans()
350                .iter()
351                .filter(|s| inserted_lower_level_id_set.contains(s.high))
352                .copied(),
353        );
354        tracing::debug!(
355            "building lv{} segment from lv{} ranges {:?}",
356            level,
357            level - 1,
358            &need_consider
359        );
360
361        let mut insert_count = 0;
362        let mut new_segments_per_considering_span = Vec::new();
363        let mut lower_segments_len = 0;
364        for considering_span in need_consider.as_spans() {
365            tracing::trace!(" considering {:?}", &considering_span);
366            // `get_parents` is on the previous level of segments.
367            let get_parents = |head: Id| -> Result<Vec<Id>> {
368                if let Some(seg) = self.find_segment_by_head_and_level(head, level - 1)? {
369                    seg.parents()
370                } else {
371                    bug("get_parents called with wrong head in build_high_level_segments")
372                }
373            };
374
375            let new_segments = {
376                // Find all segments on the previous level that haven't been built.
377                let segments: Vec<_> =
378                    self.segments_in_span_ascending(*considering_span, level - 1)?;
379                tracing::trace!("  in lower-level: {:?}", &segments);
380                lower_segments_len += segments.len();
381
382                // Sanity check: They should be sorted and not overlapping.
383                for i in 1..segments.len() {
384                    if segments[i - 1].high()? >= segments[i].span()?.low {
385                        let msg = format!(
386                            "level {} segments {:?} are overlapping or not sorted!",
387                            level,
388                            &segments[i - 1..=i]
389                        );
390                        return bug(msg);
391                    }
392                }
393
394                // Build the graph from the first head. `low_idx` is the
395                // index of `segments` (level - 1).
396
397                // find_segment scans low level segments (segments[low_idx..]),
398                // merges them on the fly, and returns a high-level segment:
399                // (new_idx, low, high, parents, has_root).
400                // new_idx + 1 is the next low_idx that should be passed to find_segment
401                // to calculate the next high-level segment.
402                let find_segment = |low_idx: usize| -> Result<_> {
403                    let segment_low = segments[low_idx].span()?.low;
404                    let mut heads = BTreeSet::new();
405                    let mut parents = IndexSet::new();
406                    let mut candidate = None;
407                    let mut has_root = false;
408                    for i in low_idx..segments.len().min(low_idx + size) {
409                        // [--------------------------] level (to build)
410                        // [------] [------] [--------] level - 1 (lower level)
411                        // ^        ^
412                        // |        segments[i].low
413                        // segment_low
414                        let span = segments[i].span()?;
415                        // Discontinuous?
416                        if i > low_idx && segments[i - 1].head()? + 1 != span.low {
417                            break;
418                        }
419                        let head = span.high;
420                        if !has_root && segments[i].has_root()? {
421                            has_root = true;
422                        }
423                        heads.insert(head);
424                        let direct_parents = get_parents(head)?;
425                        for p in &direct_parents {
426                            if *p >= span.low {
427                                return bug(format!(
428                                    "invalid lv{} segment: {:?} (parent >= low)",
429                                    level - 1,
430                                    &segments[i]
431                                ));
432                            }
433                            if *p < segment_low {
434                                // No need to remove p from heads, since it cannot be a head.
435                                parents.insert(*p);
436                            } else {
437                                heads.remove(p);
438                            }
439                        }
440                        if heads.len() == 1 {
441                            candidate = Some((i, segment_low, head, parents.len(), has_root));
442                        }
443                    }
444                    // There must be at least one valid high-level segment,
445                    // because `segments[low_idx]` is such a high-level segment.
446                    let (new_idx, low, high, parent_count, has_root) = candidate.unwrap();
447                    let parents = parents.into_iter().take(parent_count).collect::<Vec<Id>>();
448                    Ok((new_idx, low, high, parents, has_root))
449                };
450
451                let mut idx = 0;
452                let mut new_segments = Vec::new();
453                while idx < segments.len() {
454                    let segment_info = find_segment(idx)?;
455                    idx = segment_info.0 + 1;
456                    new_segments.push(segment_info);
457                }
458
459                tracing::trace!("  new segments: {:?}", &new_segments);
460                new_segments
461            };
462
463            new_segments_per_considering_span.push(new_segments);
464        }
465
466        // No point to introduce new levels if it has the same segment count
467        // as the lower level.
468        if level > self.max_level()?
469            && new_segments_per_considering_span
470                .iter()
471                .map(|s| s.len())
472                .sum::<usize>()
473                >= lower_segments_len
474        {
475            tracing::debug!("no need to introduce new level");
476            return Ok((0, inserted_id_set));
477        }
478
479        for mut new_segments in new_segments_per_considering_span {
480            // Drop the last segment. It could be incomplete.
481            new_segments.pop();
482
483            insert_count += new_segments.len();
484
485            for (_, low, high, parents, has_root) in new_segments {
486                let flags = if has_root {
487                    SegmentFlags::HAS_ROOT
488                } else {
489                    SegmentFlags::empty()
490                };
491                tracing::trace!(
492                    " inserting lv{} segment {}..{} {:?} {:?}",
493                    level,
494                    low,
495                    high,
496                    &parents,
497                    &flags
498                );
499                inserted_id_set.push(low..=high);
500                self.insert(flags, level, low, high, &parents)?;
501            }
502        }
503
504        Ok((insert_count, inserted_id_set))
505    }
506
507    /// Build high level segments using default setup.
508    ///
509    /// `new_flat_id_set` covers the flat segments newly inserted.
510    ///
511    /// Return number of segments inserted.
512    fn build_all_high_level_segments(
513        &mut self,
514        max_level: Level,
515        new_flat_id_set: IdSet,
516    ) -> Result<usize> {
517        let mut total = 0;
518        let max_level = max_level.min(MAX_MEANINGFUL_LEVEL);
519        let mut new_id_set = new_flat_id_set;
520        for level in 1..=max_level {
521            let (count, new_ids) = self.build_high_level_segments(level, &new_id_set)?;
522            tracing::debug!("new {} lv{} segments: {:?}", count, level, &new_ids);
523            if count == 0 {
524                break;
525            }
526            new_id_set = new_ids;
527            total += count;
528        }
529        Ok(total)
530    }
531}
532
533impl<Store: IdDagStore> IdDag<Store> {
534    /// Returns the [`FlatSegment`] entries that are used by this [`IdDag`].
535    pub fn flat_segments(&self, group: Group) -> Result<PreparedFlatSegments> {
536        let segments = self.flat_segments_range(group.min_id(), group.max_id())?;
537        Ok(PreparedFlatSegments {
538            segments: segments.into_iter().collect(),
539        })
540    }
541
542    /// Return all flat segments that overlap with range (and potentially cover
543    /// larger range than supplied).
544    fn flat_segments_range(&self, min: Id, max_incl: Id) -> Result<Vec<FlatSegment>> {
545        let level = 0;
546        let mut segments = Vec::new();
547        for sr in self.iter_segments_ascending(min, level)? {
548            let segment = sr?;
549            let span = segment.span()?;
550            if span.low > max_incl {
551                break;
552            }
553            let fs = FlatSegment {
554                low: span.low,
555                high: span.high,
556                parents: segment.parents()?,
557            };
558            segments.push(fs);
559        }
560        Ok(segments)
561    }
562
563    /// Extract flat segments that cover the given `set` exactly.
564    pub fn idset_to_flat_segments(&self, set: IdSet) -> Result<PreparedFlatSegments> {
565        let mut segments = Vec::new();
566
567        let (min, max) = if let (Some(min), Some(max)) = (set.min(), set.max()) {
568            (min, max)
569        } else {
570            return Ok(PreparedFlatSegments {
571                segments: segments.into_iter().collect(),
572            });
573        };
574        let segs = self.flat_segments_range(min, max)?;
575        let seg_iter = segs.into_iter().rev();
576
577        let push = |seg: FlatSegment| segments.push(seg);
578        let span_iter = set.as_spans().iter().cloned();
579        idset::intersect_iter(seg_iter, span_iter, push);
580
581        Ok(PreparedFlatSegments {
582            segments: segments.into_iter().collect(),
583        })
584    }
585}
586
587// User-facing DAG-related algorithms.
588pub trait IdDagAlgorithm: IdDagStore {
589    /// Return a [`IdSet`] that covers all ids stored in this [`IdDag`],
590    /// i.e. everything including the VIRTUAL group.
591    fn all_with_virtual(&self) -> Result<IdSet> {
592        // Intentionally skip the VIRTUAL group.
593        self.all_ids_in_groups(&Group::ALL)
594    }
595
596    /// Return a [`IdSet`] that covers persistable ids stored in this [`IdDag`],
597    /// i.e. everything exluding the VIRTUAL group.
598    fn all(&self) -> Result<IdSet> {
599        // Intentionally skip the VIRTUAL group.
600        self.all_ids_in_groups(&[Group::MASTER, Group::NON_MASTER])
601    }
602
603    /// Return a [`IdSet`] that covers all ids stored in the master group.
604    fn master_group(&self) -> Result<IdSet> {
605        self.all_ids_in_groups(&[Group::MASTER])
606    }
607
608    /// Calculate all ancestors reachable from any id from the given set.
609    ///
610    /// ```plain,ignore
611    /// union(ancestors(i) for i in set)
612    /// ```
613    fn ancestors(&self, mut set: IdSet) -> Result<IdSet> {
614        fn trace(msg: &dyn Fn() -> String) {
615            trace!(target: "dag::algo::ancestors", "{}", msg());
616        }
617        debug!(target: "dag::algo::ancestors", "ancestors({:?})", &set);
618        if set.count() > 2 {
619            // Try to (greatly) reduce the size of the `set` to make calculation cheaper.
620            set = self.heads_ancestors(set)?;
621            trace(&|| format!("simplified to {:?}", &set));
622        }
623        let mut result = IdSet::empty();
624        let mut to_visit: BinaryHeap<_> = set.iter_desc().collect();
625        let max_level = self.max_level()?;
626        'outer: while let Some(id) = to_visit.pop() {
627            if result.contains(id) {
628                // If `id` is in `result`, then `ancestors(id)` are all in `result`.
629                continue;
630            }
631            trace(&|| format!(" lookup {:?}", id));
632            let flat_seg = self.find_flat_segment_including_id(id)?;
633            if let Some(ref s) = flat_seg {
634                if s.only_head()? {
635                    // Fast path.
636                    trace(&|| format!(" push ..={:?} (only head fast path)", id));
637                    result.push_span((Id::MIN..=id).into());
638                    break 'outer;
639                }
640            }
641            for level in (1..=max_level).rev() {
642                let seg = self.find_segment_by_head_and_level(id, level)?;
643                if let Some(seg) = seg {
644                    let span = seg.span()?;
645                    trace(&|| format!(" push lv{} {:?}", level, &span));
646                    result.push_span(span);
647                    let parents = seg.parents()?;
648                    trace(&|| format!(" follow parents {:?}", &parents));
649                    for parent in parents {
650                        to_visit.push(parent);
651                    }
652                    continue 'outer;
653                }
654            }
655            if let Some(seg) = flat_seg {
656                let span = (seg.span()?.low..=id).into();
657                trace(&|| format!(" push lv0 {:?}", &span));
658                result.push_span(span);
659                let parents = seg.parents()?;
660                trace(&|| format!(" follow parents {:?}", &parents));
661                for parent in parents {
662                    to_visit.push(parent);
663                }
664            } else {
665                // The current design requires flat segments to cover all Ids.
666                // Potentially they can be made lazy. But that would be a large change.
667                return bug(format!(
668                    "flat segments should cover all Ids but {:?} is not covered",
669                    id
670                ));
671            }
672        }
673
674        trace(&|| format!(" result: {:?}", &result));
675
676        Ok(result)
677    }
678
679    /// Like `ancestors` but follows only the first parents.
680    fn first_ancestors(&self, set: IdSet) -> Result<IdSet> {
681        fn trace(msg: &dyn Fn() -> String) {
682            trace!(target: "dag::algo::first_ancestors", "{}", msg());
683        }
684        debug!(target: "dag::algo::first_ancestors", "first_ancestors({:?})", &set);
685        let mut result = IdSet::empty();
686        let mut to_visit: BinaryHeap<_> = set.iter_desc().collect();
687        // Lookup flat segments to figure out the first ancestors.
688        while let Some(id) = to_visit.pop() {
689            if result.contains(id) {
690                // If `id` is in `result`, then `ancestors(id)` are all in `result`.
691                continue;
692            }
693            trace(&|| format!(" visit {:?}", &id));
694            let flat_seg = self.find_flat_segment_including_id(id)?;
695            if let Some(ref seg) = flat_seg {
696                let span = seg.span()?;
697                result.push_span((span.low..=id).into());
698                trace(&|| format!(" push {:?}..={:?}", span.low, id));
699                if let Some(&p) = seg.parents()?.first() {
700                    to_visit.push(p);
701                }
702            }
703        }
704        trace(&|| format!(" result: {:?}", &result));
705        Ok(result)
706    }
707
708    /// Calculate merges within the given set.
709    fn merges(&self, set: IdSet) -> Result<IdSet> {
710        fn trace(msg: &dyn Fn() -> String) {
711            trace!(target: "dag::algo::merges", "{}", msg());
712        }
713        debug!(target: "dag::algo::merges", "merges({:?})", &set);
714
715        let mut result = IdSet::empty();
716
717        // Check overlapped flat segments. By definition, merges can only be the
718        // "low"s of flat segments.
719
720        // Process the given span overlapped with the segment.
721        // Return the next "high" id for segment lookup.
722        // Return None if there is no segment to check for the given span.
723        let mut process_seg = |span: &IdSpan, seg: Segment| -> Result<Option<Id>> {
724            trace(&|| format!(" process {:?} seg {:?}", &span, &seg));
725            let seg_span = seg.span()?;
726            let low = seg_span.low;
727            if low < span.low {
728                return Ok(None);
729            }
730            if seg.parent_count()? >= 2 {
731                // span.low <= low <= high <= span.high
732                debug_assert!(set.contains(low));
733                trace(&|| format!(" push merge {:?}", &low));
734                result.push_span(low.into());
735            }
736            if seg_span.low > Id(0) {
737                Ok(Some(seg_span.low - 1))
738            } else {
739                Ok(None)
740            }
741        };
742
743        for span in set.as_spans() {
744            // Cannot use iter_segments_descending, since it skips overlapping
745            // segments (seg.high > span.high and seg.low > span.low). Use
746            // find_flat_segment_including_id to find the first overlapping
747            // segment, then use iter_segments_descending to handle a large
748            // span (ex. all()) efficiently.
749            let high = match self.find_flat_segment_including_id(span.high)? {
750                None => continue,
751                Some(seg) => match process_seg(span, seg)? {
752                    None => continue,
753                    Some(id) => id,
754                },
755            };
756            'iter_seg: for seg in self.iter_segments_descending(high, 0)? {
757                let seg = seg?;
758                match process_seg(span, seg)? {
759                    None => break 'iter_seg,
760                    Some(_) => {}
761                }
762            }
763        }
764
765        trace(&|| format!(" result: {:?}", &result));
766
767        Ok(result)
768    }
769
770    /// Calculate parents of the given set.
771    ///
772    /// Note: [`IdSet`] does not preserve order. Use [`IdDag::parent_ids`] if
773    /// order is needed.
774    fn parents(&self, mut set: IdSet) -> Result<IdSet> {
775        fn trace(msg: &dyn Fn() -> String) {
776            trace!(target: "dag::algo::parents", "{}", msg());
777        }
778        debug!(target: "dag::algo::parents", "parents({:?})", &set);
779
780        let mut result = IdSet::empty();
781        let max_level = self.max_level()?;
782
783        'outer: while let Some(head) = set.max() {
784            trace(&|| format!("check head {:?}", head));
785            // For high-level segments. If the set covers the entire segment, then
786            // the parents is (the segment - its head + its parents).
787            for level in (1..=max_level).rev() {
788                if let Some(seg) = self.find_segment_by_head_and_level(head, level)? {
789                    let seg_span = seg.span()?;
790                    if set.contains(seg_span) {
791                        let seg_set = IdSet::from(seg_span);
792                        let mut parent_set = seg_set.difference(&head.into());
793                        parent_set.push_set(&IdSet::from_spans(seg.parents()?));
794                        set = set.difference(&seg_set);
795                        result = result.union(&parent_set);
796                        trace(&|| format!(" push lv{} {:?}", level, &parent_set));
797                        continue 'outer;
798                    }
799                }
800            }
801
802            // A flat segment contains information to calculate
803            // parents(subset of the segment).
804            let seg = match self.find_flat_segment_including_id(head)? {
805                Some(seg) => seg,
806                None => return head.not_found(),
807            };
808            let seg_span = seg.span()?;
809            let seg_low = seg_span.low;
810            let seg_set: IdSet = seg_span.into();
811            let seg_set = seg_set.intersection(&set);
812
813            // Get parents for a linear set (ex. parent(i) is (i - 1)).
814            fn parents_linear(set: &IdSet) -> IdSet {
815                debug_assert!(!set.contains(Id::MIN));
816                IdSet::from_sorted_spans(set.as_spans().iter().map(|s| s.low - 1..=s.high - 1))
817            }
818
819            let parent_set = if seg_set.contains(seg_low) {
820                let mut parent_set = parents_linear(&seg_set.difference(&IdSet::from(seg_low)));
821                parent_set.push_set(&IdSet::from_spans(seg.parents()?));
822                parent_set
823            } else {
824                parents_linear(&seg_set)
825            };
826
827            set = set.difference(&seg_set);
828            trace(&|| format!(" push lv0 {:?}", &parent_set));
829            result = result.union(&parent_set);
830        }
831
832        trace(&|| format!(" result: {:?}", &result));
833
834        Ok(result)
835    }
836
837    /// Get parents of a single `id`. Preserve the order.
838    fn parent_ids(&self, id: Id) -> Result<Vec<Id>> {
839        let seg = match self.find_flat_segment_including_id(id)? {
840            Some(seg) => seg,
841            None => return id.not_found(),
842        };
843        let span = seg.span()?;
844        if id == span.low {
845            Ok(seg.parents()?)
846        } else {
847            Ok(vec![id - 1])
848        }
849    }
850
851    /// Calculate the n-th first ancestor. If `n` is 0, return `id` unchanged.
852    /// If `n` is 1, return the first parent of `id`.
853    fn first_ancestor_nth(&self, id: Id, n: u64) -> Result<Id> {
854        match self.try_first_ancestor_nth(id, n)? {
855            None => Err(Programming(format!(
856                "{}~{} cannot be resolved - no parents",
857                &id, n
858            ))),
859            Some(id) => Ok(id),
860        }
861    }
862
863    /// Calculate the n-th first ancestor. If `n` is 0, return `id` unchanged.
864    /// If `n` is 1, return the first parent of `id`.
865    /// If `n` is too large, exceeding the distance between the root and `id`,
866    /// return `None`.
867    fn try_first_ancestor_nth(&self, mut id: Id, mut n: u64) -> Result<Option<Id>> {
868        // PERF: this can have fast paths from high-level segments if high-level
869        // segments have extra information.
870        while n > 0 {
871            let seg = self
872                .find_flat_segment_including_id(id)?
873                .ok_or_else(|| id.not_found_error())?;
874            // segment: low ... id ... high
875            //          \________/
876            //            delta
877            let low = seg.span()?.low;
878            let delta = id.0 - low.0;
879            let step = delta.min(n);
880            id = id - step;
881            n -= step;
882            if n > 0 {
883                // Follow the first parent.
884                id = match seg.parents()?.first() {
885                    None => return Ok(None),
886                    Some(&id) => id,
887                };
888                n -= 1;
889            }
890        }
891        Ok(Some(id))
892    }
893
894    /// Convert an `id` to `x~n` form with the given constraint.
895    ///
896    /// Return `None` if the conversion can not be done with the constraints.
897    fn to_first_ancestor_nth(
898        &self,
899        id: Id,
900        constraint: FirstAncestorConstraint,
901    ) -> Result<Option<(Id, u64)>> {
902        match constraint {
903            FirstAncestorConstraint::None => Ok(Some((id, 0))),
904            FirstAncestorConstraint::KnownUniversally { heads } => {
905                self.to_first_ancestor_nth_known_universally(id, heads)
906            }
907        }
908    }
909
910    /// See `FirstAncestorConstraint::KnownUniversally`.
911    ///
912    /// Try to represent `id` as `x~n` (revset notation) form, where `x` must
913    /// be in `heads + parents(merge() & ancestors(heads))`.
914    ///
915    /// Return `None` if `id` is not part of `ancestors(heads)`.
916    fn to_first_ancestor_nth_known_universally(
917        &self,
918        id: Id,
919        heads: IdSet,
920    ) -> Result<Option<(Id, u64)>> {
921        // Do not track errors for the first time. This has lower overhead.
922        match self.to_first_ancestor_nth_known_universally_with_errors(id, &heads, None) {
923            Ok(v) => Ok(v),
924            Err(_) => {
925                // Pay additional overhead to get error message.
926                self.to_first_ancestor_nth_known_universally_with_errors(
927                    id,
928                    &heads,
929                    Some(Vec::new()),
930                )
931            }
932        }
933    }
934
935    /// Internal implementation of `to_first_ancestor_nth_known_universally`.
936    ///
937    /// If `details` is not `None`, it is used to store extra error messages
938    /// with some extra overhead.
939    fn to_first_ancestor_nth_known_universally_with_errors(
940        &self,
941        mut id: Id,
942        heads: &IdSet,
943        mut details: Option<Vec<String>>,
944    ) -> Result<Option<(Id, u64)>> {
945        // To figure `x~n` we look at the flat segment containing `id`, and check:
946        // - If the flat segment overlaps with heads, then just use the overlapped id as `x`.
947        // - Try to find parents of merges within the flat segment (the merges belong to other
948        //   "child segment"s). If the merge is an ancestor of `heads`, then the parent of the
949        //   merge can be used as `x`, if `n` is not 0.
950        // - Otherwise, try to follow connected flat segment (with single parent), convert the
951        //   `x~n` question to a `y~(n+m)` question, then start over.
952        let mut trace = |msg: &dyn Fn() -> String| {
953            if let Some(details) = details.as_mut() {
954                details.push(msg().trim().to_string());
955            }
956            trace!(target: "dag::algo::toxn", "{}", msg());
957        };
958
959        let ancestors = self.ancestors(heads.clone())?;
960        if !ancestors.contains(id) {
961            return Ok(None);
962        }
963
964        let mut n = 0;
965        debug!(target: "dag::algo::toxn", "resolving {:?}", id);
966        let result = 'outer: loop {
967            let seg = self
968                .find_flat_segment_including_id(id)?
969                .ok_or_else(|| id.not_found_error())?;
970            let span = seg.span()?;
971            let head = span.high;
972            trace(&|| format!(" in seg {:?}", &seg));
973            // Can we use an `id` from `heads` as `x`?
974            let intersected = heads.intersection(&(id..=head).into());
975            if !intersected.is_empty() {
976                let head = intersected.min().unwrap();
977                n += head.0 - id.0;
978                trace(&|| format!("  contains head ({:?})", head));
979                break 'outer (head, n);
980            }
981            // Does this segment contain any `x` that is an ancestor of a merge,
982            // that can be used as `x`?
983            //
984            // Note the `x` does not have to be the head of `seg`. For example,
985            //
986            //      1--2--3--4 (seg, span: 1..=4)
987            //          \
988            //           5--6  (child_seg, span: 5..=6, parents: [2])
989            //
990            // During `to_first_ancestor_nth(2, [6])`, `1` needs to be translated
991            // to `6~3`, not `4~3`. Needs to lookup parents in the range `1..=4`.
992            let mut next_id_n = None;
993            let parent_span = span.low.max(id)..=span.high;
994            for entry in self.iter_flat_segments_with_parent_span(parent_span.into())? {
995                let (parent_id, child_seg) = entry?;
996                trace(&|| format!("  {:?} has child seg ({:?})", parent_id, &child_seg));
997                let child_low = child_seg.low()?;
998                if !ancestors.contains(child_low) {
999                    // `child_low` is outside `ancestors(heads)`, cannot use it
1000                    // or its parents as references.
1001                    trace(&|| "   child seg out of range".to_string());
1002                    continue;
1003                }
1004
1005                if child_seg.parent_count()? > 1 {
1006                    // `child_low` is a merge included in `ancestors`, and
1007                    // `parent_id` is a parent of the merge. Therefore
1008                    // `parent_id` might be used as `x`.
1009                    let next_n = n + parent_id.0 - id.0;
1010                    if next_n > 0 {
1011                        break 'outer (parent_id, next_n);
1012                    }
1013
1014                    // Fragmented linear segments. Convert id~n to next_id~next_n.
1015                    let child_parents = child_seg.parents()?;
1016                    match child_parents.first() {
1017                        None => {
1018                            return bug(format!(
1019                                "segment {:?} should have parent {:?}",
1020                                &child_seg, &parent_id
1021                            ));
1022                        }
1023                        Some(p) => {
1024                            if &parent_id != p {
1025                                // Not the first parent. Do not follow it.
1026                                trace(&|| {
1027                                    format!(
1028                                        "   child seg cannot be followed ({:?} is not p1)",
1029                                        &parent_id
1030                                    )
1031                                });
1032                                continue;
1033                            }
1034                        }
1035                    }
1036                }
1037
1038                debug_assert!(ancestors.contains(child_low));
1039                let next_id = child_low;
1040                let next_n = n + 1 + parent_id.0 - id.0;
1041                trace(&|| format!("  follow {:?}~{}", next_id, next_n));
1042                next_id_n = Some((next_id, next_n));
1043                break;
1044            }
1045            match next_id_n {
1046                // This should not happen if indexes and segments are legit.
1047                None => {
1048                    let mut message = format!(
1049                        concat!(
1050                            "cannot convert {} to x~n form (x must be in ",
1051                            "`H + parents(ancestors(H) & merge())` where H = {:?})",
1052                        ),
1053                        id, &heads,
1054                    );
1055                    if let Some(details) = details {
1056                        if !details.is_empty() {
1057                            message += &format!(" (trace: {})", details.join(", "));
1058                        }
1059                    }
1060                    return Err(Programming(message));
1061                }
1062                Some((next_id, next_n)) => {
1063                    id = next_id;
1064                    n = next_n;
1065                }
1066            }
1067        };
1068        trace!(target: "dag::algo::toxn", " found: {:?}", &result);
1069        Ok(Some(result))
1070    }
1071
1072    /// Calculate heads of the given set.
1073    fn heads(&self, set: IdSet) -> Result<IdSet> {
1074        Ok(set.difference(&self.parents(set.clone())?))
1075    }
1076
1077    /// Calculate children for a single `Id`.
1078    fn children_id(&self, id: Id) -> Result<IdSet> {
1079        let mut result = BTreeSet::new();
1080        fn trace(msg: &dyn Fn() -> String) {
1081            trace!(target: "dag::algo::children_id", "{}", msg());
1082        }
1083        debug!(target: "dag::algo::children_id", "children_id({:?})", id);
1084        for seg in self.iter_flat_segments_with_parent(id)? {
1085            let seg = seg?;
1086            let child_id = seg.low()?;
1087            trace(&|| format!(" push {:?} via parent index", child_id));
1088            result.insert(child_id);
1089        }
1090        if let Some(seg) = self.find_flat_segment_including_id(id)? {
1091            let span = seg.span()?;
1092            if span.high != id {
1093                let child_id = id + 1;
1094                trace(&|| format!(" push {:?} via flat segment definition", child_id));
1095                result.insert(child_id);
1096            }
1097        }
1098        let result = IdSet::from_sorted_spans(result.into_iter().rev());
1099        trace(&|| format!(" result: {:?}", &result));
1100        Ok(result)
1101    }
1102
1103    /// Calculate children of the given set.
1104    fn children(&self, set: IdSet) -> Result<IdSet> {
1105        if set.count() < 5 {
1106            let result = set
1107                .iter_desc()
1108                .fold(Ok(IdSet::empty()), |acc: Result<IdSet>, id| match acc {
1109                    Ok(acc) => Ok(acc.union(&self.children_id(id)?)),
1110                    Err(err) => Err(err),
1111                })?;
1112            #[cfg(test)]
1113            {
1114                let result_set = self.children_set(set)?;
1115                assert_eq!(result.as_spans(), result_set.as_spans());
1116            }
1117            Ok(result)
1118        } else {
1119            self.children_set(set)
1120        }
1121    }
1122
1123    fn children_set(&self, set: IdSet) -> Result<IdSet> {
1124        fn trace(msg: &dyn Fn() -> String) {
1125            trace!(target: "dag::algo::children", "{}", msg());
1126        }
1127        debug!(target: "dag::algo::children", "children({:?})", &set);
1128
1129        // The algorithm works as follows:
1130        // - Iterate through level N segments [1].
1131        // - Considering a level N segment S:
1132        //   Could we take the entire S?
1133        //     - If `set` covers `S - S.head + S.parents`, then yes, take S
1134        //       and continue with the next level N segment.
1135        //   Could we ignore the entire S and check the next level N segment?
1136        //     - If (S + S.parents) do not overlap with `set`, then yes, skip.
1137        //   No fast paths. Is S a flat segment?
1138        //     - No:  Iterate through level N-1 segments covered by S,
1139        //            recursively (goto [1]).
1140        //     - Yes: Figure out children in the flat segment.
1141        //            Push them to the result.
1142
1143        struct Context<'a, Store: ?Sized> {
1144            this: &'a Store,
1145            set: IdSet,
1146            // Lower bound based on the input set.
1147            result_lower_bound: Id,
1148            result: IdSet,
1149        }
1150
1151        fn visit_segments<S: IdDagStore + ?Sized>(
1152            ctx: &mut Context<S>,
1153            mut range: IdSpan,
1154            level: Level,
1155        ) -> Result<()> {
1156            trace(&|| format!("visit range {:?} lv{}", &range, level));
1157            let mut visited = false;
1158            for seg in ctx.this.iter_segments_descending(range.high, level)? {
1159                let seg = seg?;
1160                let span = seg.span()?;
1161                visited = true;
1162                trace(&|| format!(" seg {:?}", &seg));
1163                // `range` is all valid. If a high-level segment misses it, try
1164                // a lower level one.
1165                if span.high < range.high {
1166                    let low_id = (span.high + 1).max(range.low);
1167                    if low_id > range.high {
1168                        return Ok(());
1169                    }
1170                    let missing_range = IdSpan::from(low_id..=range.high);
1171                    if level > 0 {
1172                        trace(&|| {
1173                            format!("  visit missing range at lower level: {:?}", &missing_range)
1174                        });
1175                        visit_segments(ctx, missing_range, level - 1)?;
1176                    } else {
1177                        return bug(format!(
1178                            "flat segments should have covered: {:?} returned by all() (range: {:?})",
1179                            missing_range, range,
1180                        ));
1181                    }
1182                }
1183
1184                // Update range.high for the next iteration.
1185                range.high = span.low.max(Id(1)) - 1;
1186
1187                // Stop iteration?
1188                if span.high < range.low || span.high < ctx.result_lower_bound {
1189                    break;
1190                }
1191
1192                let parents = seg.parents()?;
1193
1194                // Count of parents overlapping with `set`.
1195                let overlapped_parents = parents.iter().filter(|p| ctx.set.contains(**p)).count();
1196
1197                // Remove the `high`. This segment cannot calculate
1198                // `children(high)`. If `high` matches a `parent` of
1199                // another segment, that segment will handle it.
1200                // This is related to the flat segment children path
1201                // below.
1202                let intersection = ctx
1203                    .set
1204                    .intersection(&span.into())
1205                    .difference(&span.high.into());
1206
1207                if !seg.has_root()? {
1208                    // A segment must have at least one parent to be rootless.
1209                    debug_assert!(!parents.is_empty());
1210                    // Fast path: Take the entire segment directly.
1211                    if overlapped_parents == parents.len()
1212                        && intersection.count() + 1 == span.count()
1213                    {
1214                        trace(&|| format!(" push lv{} {:?} (rootless fast path)", level, &span));
1215                        ctx.result.push_span(span);
1216                        continue;
1217                    }
1218                }
1219
1220                if !intersection.is_empty() {
1221                    if level > 0 {
1222                        visit_segments(ctx, span, level - 1)?;
1223                        continue;
1224                    } else {
1225                        // Flat segment children path.
1226                        // children(x..=y) = (x+1)..=(y+1) if x..=(y+1) is in a flat segment.
1227                        let seg_children = IdSet::from_spans(
1228                            intersection
1229                                .as_spans()
1230                                .iter()
1231                                .map(|s| s.low + 1..=s.high + 1),
1232                        );
1233                        trace(&|| format!(" push {:?} (flat segment range)", &seg_children));
1234                        ctx.result.push_set(&seg_children);
1235                    }
1236                }
1237
1238                if overlapped_parents > 0 {
1239                    if level > 0 {
1240                        visit_segments(ctx, span, level - 1)?;
1241                    } else {
1242                        // child(any parent) = lowest id in this flat segment.
1243                        trace(&|| {
1244                            format!(" push {:?} (overlapped parents of flat segment)", &span.low)
1245                        });
1246                        ctx.result.push_span(span.low.into());
1247                    }
1248                }
1249            }
1250            // The high level segment misses the range. Try a lower level.
1251            if !visited {
1252                if level == 0 {
1253                    return bug(format!(
1254                        "flat segments should have covered: {:?} returned by all()",
1255                        range,
1256                    ));
1257                }
1258                visit_segments(ctx, range, level - 1)?;
1259            }
1260            Ok(())
1261        }
1262
1263        let result_lower_bound = set.min().unwrap_or(Id::MAX);
1264        let mut ctx = Context {
1265            this: self,
1266            set,
1267            result_lower_bound,
1268            result: IdSet::empty(),
1269        };
1270
1271        let max_level = self.max_level()?;
1272        for span in self.all_with_virtual()?.as_spans() {
1273            visit_segments(&mut ctx, *span, max_level)?;
1274        }
1275
1276        trace(&|| format!(" result: {:?}", &ctx.result));
1277
1278        Ok(ctx.result)
1279    }
1280
1281    /// Calculate roots of the given set.
1282    fn roots(&self, set: IdSet) -> Result<IdSet> {
1283        Ok(set.difference(&self.children(set.clone())?))
1284    }
1285
1286    /// Calculate one "greatest common ancestor" of the given set.
1287    ///
1288    /// If there are no common ancestors, return None.
1289    /// If there are multiple greatest common ancestors, pick one arbitrarily.
1290    /// Use `gca_all` to get all of them.
1291    fn gca_one(&self, set: IdSet) -> Result<Option<Id>> {
1292        // The set is sorted in DESC order. Therefore its first item can be used as the result.
1293        Ok(self.common_ancestors(set)?.max())
1294    }
1295
1296    /// Calculate all "greatest common ancestor"s of the given set.
1297    /// `gca_one` is faster if an arbitrary answer is ok.
1298    fn gca_all(&self, set: IdSet) -> Result<IdSet> {
1299        self.heads_ancestors(self.common_ancestors(set)?)
1300    }
1301
1302    /// Calculate all common ancestors of the given set.
1303    ///
1304    /// ```plain,ignore
1305    /// intersect(ancestors(i) for i in set)
1306    /// ```
1307    fn common_ancestors(&self, set: IdSet) -> Result<IdSet> {
1308        let result = match set.count() {
1309            0 => set,
1310            1 => self.ancestors(set)?,
1311            2 => {
1312                // Fast path that does not calculate "heads".
1313                let mut iter = set.iter_desc();
1314                let a = iter.next().unwrap();
1315                let b = iter.next().unwrap();
1316                self.ancestors(a.into())?
1317                    .intersection(&self.ancestors(b.into())?)
1318            }
1319            _ => {
1320                // Try to reduce the size of `set`.
1321                // `common_ancestors(X)` = `common_ancestors(roots(X))`.
1322                let set = self.roots(set)?;
1323                set.iter_desc()
1324                    .fold(Ok(IdSet::full()), |set: Result<IdSet>, id| {
1325                        Ok(set?.intersection(&self.ancestors(id.into())?))
1326                    })?
1327            }
1328        };
1329        Ok(result)
1330    }
1331
1332    /// Test if `ancestor_id` is an ancestor of `descendant_id`.
1333    fn is_ancestor(&self, ancestor_id: Id, descendant_id: Id) -> Result<bool> {
1334        let set = self.ancestors(descendant_id.into())?;
1335        Ok(set.contains(ancestor_id))
1336    }
1337
1338    /// Calculate "heads" of the ancestors of the given [`IdSet`]. That is,
1339    /// Find Y, which is the smallest subset of set X, where `ancestors(Y)` is
1340    /// `ancestors(X)`.
1341    ///
1342    /// This is faster than calculating `heads(ancestors(set))`.
1343    ///
1344    /// This is different from `heads`. In case set contains X and Y, and Y is
1345    /// an ancestor of X, but not the immediate ancestor, `heads` will include
1346    /// Y while this function won't.
1347    fn heads_ancestors(&self, set: IdSet) -> Result<IdSet> {
1348        let mut remaining = set;
1349        let mut result = IdSet::empty();
1350        while let Some(id) = remaining.max() {
1351            result.push_span((id..=id).into());
1352            // Remove ancestors reachable from that head.
1353            remaining = remaining.difference(&self.ancestors(id.into())?);
1354        }
1355        Ok(result)
1356    }
1357
1358    /// Calculate the "dag range" - ids reachable from both sides.
1359    ///
1360    /// ```plain,ignore
1361    /// intersect(ancestors(heads), descendants(roots))
1362    /// ```
1363    ///
1364    /// This is O(flat segments), or O(merges).
1365    fn range(&self, roots: IdSet, mut heads: IdSet) -> Result<IdSet> {
1366        if roots.is_empty() {
1367            return Ok(IdSet::empty());
1368        }
1369        if heads.is_empty() {
1370            return Ok(IdSet::empty());
1371        }
1372
1373        fn trace(msg: &dyn Fn() -> String) {
1374            trace!(target: "dag::algo::range", "{}", msg());
1375        }
1376        debug!(target: "dag::algo::range", "range({:?}, {:?})", &roots, &heads);
1377
1378        // Remove uninteresting heads. Make `ancestors(heads)` a bit easier.
1379        let min_root_id = roots.min().unwrap();
1380        let min_head_id = heads.min().unwrap();
1381        if min_head_id < min_root_id {
1382            let span = min_root_id..=Id::MAX;
1383            heads = heads.intersection(&span.into());
1384            trace(&|| format!(" removed unreachable heads: {:?}", &heads));
1385        }
1386
1387        let ancestors_of_heads = self.ancestors(heads)?;
1388        let result = self.descendants_intersection(&roots, &ancestors_of_heads)?;
1389
1390        #[cfg(test)]
1391        {
1392            let intersection = ancestors_of_heads.intersection(&result);
1393            assert_eq!(result.as_spans(), intersection.as_spans());
1394        }
1395
1396        trace(&|| format!(" result: {:?}", &result));
1397        Ok(result)
1398    }
1399
1400    /// Calculate the descendants of the given set.
1401    ///
1402    /// Logically equivalent to `range(set, all())`.
1403    ///
1404    /// This is O(flat segments), or O(merges).
1405    fn descendants(&self, set: IdSet) -> Result<IdSet> {
1406        debug!(target: "dag::algo::descendants", "descendants({:?})", &set);
1407        let roots = set;
1408        let all = self.all_with_virtual()?;
1409        let result = self.descendants_intersection(&roots, &all)?;
1410        trace!(target: "dag::algo::descendants", " result: {:?}", &result);
1411        Ok(result)
1412    }
1413
1414    /// Calculate (descendants(roots) & ancestors).
1415    ///
1416    /// This is O(flat segments), or O(merges).
1417    ///
1418    /// `ancestors(ancestors)` must be equal to `ancestors`.
1419    fn descendants_intersection(&self, roots: &IdSet, ancestors: &IdSet) -> Result<IdSet> {
1420        fn trace(msg: &dyn Fn() -> String) {
1421            trace!(target: "dag::algo::descendants_intersection", "{}", msg());
1422        }
1423
1424        debug_assert_eq!(
1425            ancestors.count(),
1426            self.ancestors(ancestors.clone())?.count()
1427        );
1428
1429        // Filter out roots that are not reachable from `ancestors`.
1430        let roots = ancestors.intersection(roots);
1431        let min_root = match roots.min() {
1432            Some(id) => id,
1433            None => return Ok(IdSet::empty()),
1434        };
1435        let max_root = roots.max().unwrap();
1436
1437        // `result` could be initially `roots`. However that breaks ordering
1438        // (cannot use `result.push_span_asc` below).
1439        let mut result = IdSet::empty();
1440
1441        // For the master group, use linear scan for flat segments. This is
1442        // usually more efficient, because the master group usually only has 1
1443        // head, and most segments will be included.
1444        let master_max_id = ancestors
1445            .max()
1446            .unwrap_or(Id::MIN)
1447            .min(Group::MASTER.max_id());
1448        for seg in self.iter_segments_ascending(min_root, 0)? {
1449            let seg = seg?;
1450            let span = seg.span()?;
1451            if span.low > master_max_id {
1452                break;
1453            }
1454            trace(&|| format!(" visit {:?}", &seg));
1455            let parents = seg.parents()?;
1456            let low = if !parents.is_empty()
1457                && parents
1458                    .iter()
1459                    .any(|&p| result.contains(p) || roots.contains(p))
1460            {
1461                // Include `span.low` if any (span.low's) parent is in the result set.
1462                span.low
1463            } else {
1464                // No parents in `result` set.
1465                match result
1466                    .intersection_span_min(span)
1467                    .or_else(|| roots.intersection(&span.into()).min())
1468                {
1469                    // `span` intersect partially with `roots | result`.
1470                    Some(id) => id,
1471                    None => continue,
1472                }
1473            };
1474            if low > master_max_id {
1475                break;
1476            }
1477            let result_span = IdSpan::from(low..=span.high);
1478            trace(&|| format!("  push {:?}", &result_span));
1479            result.push_span_asc(result_span);
1480        }
1481
1482        // For the non-master group, only check flat segments covered by
1483        // `ancestors`.
1484        //
1485        // This is usually more efficient, because the non-master group can
1486        // have lots of heads (created in the past) that are no longer visible
1487        // or interesting. For a typical query like `x::y`, it might just select
1488        // a few heads in the non-master group. It's a waste of time to iterate
1489        // through lots of invisible segments.
1490        let non_master_spans = ancestors
1491            .intersection(&IdSpan::from(Group::NON_MASTER.min_id()..=Group::MAX.max_id()).into());
1492        // Visit in ascending order.
1493        let mut span_iter = non_master_spans.as_spans().iter().rev().cloned();
1494        let mut next_optional_span = span_iter.next();
1495        while let Some(next_span) = next_optional_span {
1496            // The "next_span" could be larger than a flat segment.
1497            let seg = match self.find_flat_segment_including_id(next_span.low)? {
1498                Some(seg) => seg,
1499                None => break,
1500            };
1501            let seg_span = seg.span()?;
1502            trace(&|| format!(" visit {:?} => {:?}", &next_span, &seg));
1503            // The overlap part of the flat segment and the span from 'ancestors'.
1504            let mut overlap_span =
1505                IdSpan::from(seg_span.low.max(next_span.low)..=seg_span.high.min(next_span.high));
1506            if roots.contains(overlap_span.low) {
1507                // Descendants includes 'overlap_span' since 'low' is in 'roots'.
1508                // (no need to check 'result' - it does not include anything in 'overlap')
1509                trace(&|| format!("  push {:?} (root contains low)", &overlap_span));
1510                result.push_span_asc(overlap_span);
1511            } else if next_span.low == seg_span.low {
1512                let parents = seg.parents()?;
1513                if !parents.is_empty()
1514                    && parents
1515                        .into_iter()
1516                        .any(|p| result.contains(p) || roots.contains(p))
1517                {
1518                    // Descendants includes 'overlap_span' since parents are in roots or result.
1519                    trace(&|| format!("  push {:?} (root contains parent)", &overlap_span));
1520                    result.push_span_asc(overlap_span);
1521                } else if overlap_span.low <= max_root && overlap_span.high >= min_root {
1522                    // If 'overlap_span' overlaps with roots, part of it should be in
1523                    // 'Descendants' result:
1524                    //
1525                    //            root1  root2
1526                    //               v    v
1527                    //    (low) |-- overlap-span --| (high)
1528                    //               |-------------|
1529                    //               push this part to result
1530                    let roots_intesection = roots.intersection(&overlap_span.into());
1531                    if let Some(id) = roots_intesection.min() {
1532                        overlap_span.low = id;
1533                        trace(&|| format!("  push {:?} (root in span)", &overlap_span));
1534                        result.push_span_asc(overlap_span);
1535                    }
1536                }
1537            } else {
1538                // This block practically does not happen if `ancestors` is
1539                // really "ancestors" (aka. `ancestors(ancestors)` is
1540                // `ancestors`), because `ancestors` will not include
1541                // a flat segment without including the segment's low id.
1542                //
1543                // But, in case it happens (because `ancestors` is weird),
1544                // do something sensible.
1545
1546                // `next_span.low - 1` is the parent of `next_span.low`,
1547                debug_assert!(
1548                    false,
1549                    "ancestors in descendants_intersection is
1550                              not real ancestors"
1551                );
1552                let p = next_span.low - 1;
1553                if result.contains(p) || roots.contains(p) {
1554                    trace(&|| format!("  push {:?} ({:?} was included)", &overlap_span, p));
1555                    result.push_span_asc(overlap_span);
1556                }
1557            }
1558            // Update next_optional_span.
1559            next_optional_span = IdSpan::try_from_bounds(overlap_span.high + 1..=next_span.high)
1560                .or_else(|| span_iter.next());
1561        }
1562
1563        trace(&|| format!(" intersect with {:?}", &ancestors));
1564        result = result.intersection(ancestors);
1565
1566        Ok(result)
1567    }
1568
1569    /// Find segments that cover the given `id_set` exactly.
1570    /// Returned segments are in DESC order.
1571    fn id_set_to_id_segments(&self, id_set: &IdSet) -> Result<VecDeque<IdSegment>> {
1572        let max_level = self.max_level()?;
1573        self.id_set_to_id_segments_with_max_level(id_set, max_level)
1574    }
1575
1576    /// Find lower level segments that cover the given `id_segment` exactly.
1577    /// Returned segments are in DESC order.
1578    fn id_segment_to_lower_level_id_segments(
1579        &self,
1580        id_segment: &IdSegment,
1581    ) -> Result<VecDeque<IdSegment>> {
1582        let max_level = match id_segment.level {
1583            0 => return Err(Programming(
1584                "id_segment_to_lower_level_id_segments() requires non-flat (level > 0) segments"
1585                    .to_string(),
1586            )),
1587            l => l - 1,
1588        };
1589        let span = IdSpan::new(id_segment.low, id_segment.high);
1590        let id_set = IdSet::from(span);
1591        self.id_set_to_id_segments_with_max_level(&id_set, max_level)
1592    }
1593
1594    /// Find segments with max level limitation that cover the given `id_set` exactly.
1595    /// Returned segments are in DESC order.
1596    fn id_set_to_id_segments_with_max_level(
1597        &self,
1598        id_set: &IdSet,
1599        max_level: Level,
1600    ) -> Result<VecDeque<IdSegment>> {
1601        fn trace(msg: &dyn Fn() -> String) {
1602            trace!(target: "dag::algo::tosegments", "{}", msg());
1603        }
1604        debug!(target: "dag::algo::tosegments", "id_set_to_id_segments({:?}, level={})", &id_set, max_level);
1605
1606        let max_level = max_level.min(self.max_level()?);
1607        let mut result = VecDeque::new();
1608        'next_span: for span in id_set.iter_span_desc() {
1609            trace(&|| format!(" visiting span {:?}", &span));
1610            let mut span: IdSpan = *span;
1611
1612            'current_span: loop {
1613                // Try high level segments.
1614                for level in (1..=max_level).rev() {
1615                    let seg = match self.find_segment_by_head_and_level(span.high, level)? {
1616                        None => continue,
1617                        Some(seg) => seg,
1618                    };
1619
1620                    let seg_span = seg.span()?;
1621                    debug_assert_eq!(seg_span.high, span.high);
1622
1623                    if seg_span.low < span.low {
1624                        trace(&|| format!("  skip  lv {} seg {:?}", level, &seg));
1625                        continue;
1626                    } else {
1627                        trace(&|| format!("  found lv {} seg {:?}", level, &seg));
1628                    }
1629
1630                    let id_seg = IdSegment {
1631                        high: seg_span.high,
1632                        low: seg_span.low,
1633                        parents: seg.parents()?,
1634                        level,
1635                        has_root: seg.has_root()?,
1636                    };
1637                    trace(&|| format!("  push {}..={}", id_seg.low, id_seg.high));
1638                    result.push_back(id_seg);
1639
1640                    if seg_span.low == span.low {
1641                        continue 'next_span;
1642                    } else {
1643                        span.high = seg_span.low - 1;
1644                        debug_assert!(span.high >= span.low);
1645                        continue 'current_span;
1646                    }
1647                }
1648
1649                // Query flat segments.
1650                let seg = match self.find_flat_segment_including_id(span.high)? {
1651                    None => return bug(format!("flat segments does not cover {:?}", span)),
1652                    Some(seg) => seg,
1653                };
1654                trace(&|| format!("  found flat seg {:?}", &seg));
1655
1656                let seg_span = seg.span()?;
1657                debug_assert!(seg_span.high >= span.high);
1658
1659                let (low, parents) = if seg_span.low < span.low {
1660                    (span.low, vec![span.low - 1])
1661                } else {
1662                    (seg_span.low, seg.parents()?)
1663                };
1664
1665                let has_root = parents.is_empty();
1666                let id_seg = IdSegment {
1667                    high: span.high,
1668                    low,
1669                    parents,
1670                    level: 0,
1671                    has_root,
1672                };
1673                trace(&|| format!("  push {}..={}", id_seg.low, id_seg.high));
1674                result.push_back(id_seg);
1675
1676                if low == span.low {
1677                    continue 'next_span;
1678                } else {
1679                    span.high = low - 1;
1680                    debug_assert!(span.high >= span.low);
1681                }
1682            }
1683        }
1684        Ok(result)
1685    }
1686
1687    /// Suggest the next `Id` to test, during a bisect.
1688    ///
1689    /// - `(low, high)` are explicitly marked ends, either `(good, bad)` or `(bad, good)`.
1690    /// - `skip` is an explicitly skipped set.
1691    ///
1692    /// Return `(id_to_bisect_next, untested_set, roots(high::))`.
1693    ///
1694    /// If `id_to_bisect_next` is `None`, the bisect is completed. At this time,
1695    /// `roots(high::)` is the "first good/bad" set. `untested_set` is usually
1696    /// empty, or a subset of `skip`.
1697    fn suggest_bisect(
1698        &self,
1699        low: &IdSet,
1700        high: &IdSet,
1701        skip: &IdSet,
1702    ) -> Result<(Option<Id>, IdSet, IdSet)> {
1703        debug!(target: "dag::algo::suggest_bisect", "suggest_bisect({:?}, {:?}, {:?})", low, high, skip);
1704        // Handling the two ends is NOT symmetric. For example, assuming high = bad:
1705        //
1706        //     Y  high (bad)
1707        //    / \
1708        //  A10  B10
1709        //   :    :
1710        //  A01  B01
1711        //    \ /
1712        //     X  low (good)
1713        //
1714        // - If A05 is "good", then A01::A05 is implicit "good", B01::B10 remains unknown.
1715        // - If A05 is "bad", then A05::Y is implicit "bad", B01::B10 can be skipped.
1716        //
1717        // The final bisect output is a single commit ("first bad"). The contract does not require us
1718        // to figure out all "first bad"s in all branches. If X is good, A05 is bad, then we know one
1719        // of the "first bad"s must exist in X::A05 and we can ignore other parts like B01::B10.
1720
1721        // low::low = implicit good.
1722        let connected_low = self.range(low.clone(), low.clone())?;
1723
1724        // Ignore interesting parts - see above for why this is only for "high".
1725        let high = self.roots(self.range(high.clone(), high.clone())?)?;
1726
1727        // The bisect range.
1728        let untested = self
1729            .range(low.clone(), high.clone())?
1730            .difference(&connected_low)
1731            .difference(&high);
1732        trace!(target: "dag::algo::suggest_bisect", " untested = {:?}", untested);
1733
1734        let total = untested.count();
1735        let ideal = (total + 1) / 2;
1736
1737        // Consider each linear ranges (flat segments).
1738        let flat_segments =
1739            self.id_set_to_id_segments_with_max_level(&untested.difference(skip), 0)?;
1740        let mut best_score = 0;
1741        let mut best_id = None;
1742        for id_segment in flat_segments.into_iter().rev() {
1743            // Pick "P", let "len(ancestors(P) & untested)" be "x".
1744            // - If P is good, bisect total -= x.
1745            // - If P is bad, bisect total -= (total + 1 - x).
1746            // We want to make good progress even in the worst case, maximize "min(total + 1 - x, x)".
1747            // The best happens when "total + 1 - x" is "x", when x = ideal.
1748            //
1749            // if good, remove "ancestors(P)" from "untested",
1750            // if bad, remove "P + (untested - ancestors(P))" from "untested".
1751
1752            // "ancestors(high)" can be cheaper to calculate than "ancestor(low)", if it follows a
1753            // high-level segment. Once we get "ancestors(high)", we can calculate the "count" for
1754            // "ancestors(p for p in low..=high)" without calling (slower) "ancestors(p)", because
1755            // the segment is "flat" (level 0), it cannot have merges except for "low".
1756            let ancestors_high = self.ancestors(id_segment.high.into())?;
1757            let high_count = ancestors_high.intersection(&untested).count();
1758            let low_count = high_count.saturating_sub(id_segment.high.0 - id_segment.low.0);
1759            let best_count = ideal.max(low_count).min(high_count);
1760            let score = best_count.min(total + 1 - best_count);
1761            if score > best_score {
1762                let id = id_segment.low + (best_count - low_count);
1763                trace!(target: "dag::algo::suggest_bisect", "  id={} score={}", id, score);
1764                best_score = score;
1765                best_id = Some(id);
1766                if best_count == ideal {
1767                    break;
1768                }
1769            }
1770        }
1771
1772        Ok((best_id, untested, high))
1773    }
1774}
1775
1776impl<S: IdDagStore> IdDagAlgorithm for S {}
1777
1778impl<Store: IdDagStore> Deref for IdDag<Store> {
1779    type Target = dyn IdDagAlgorithm;
1780
1781    fn deref(&self) -> &Self::Target {
1782        &self.store
1783    }
1784}
1785
1786// Full IdMap -> Sparse IdMap
1787impl<Store: IdDagStore> IdDag<Store> {
1788    /// Copy a subset of "Universal" mapping from `full_idmap` to
1789    /// `sparse_idmap`. See [`IdDag::universal`].
1790    pub async fn write_sparse_idmap<M: crate::idmap::IdMapWrite>(
1791        &self,
1792        full_idmap: &dyn crate::ops::IdConvert,
1793        sparse_idmap: &mut M,
1794    ) -> Result<()> {
1795        for id in self.universal_ids()? {
1796            let name = full_idmap.vertex_name(id).await?;
1797            sparse_idmap.insert(id, name.as_ref()).await?
1798        }
1799        Ok(())
1800    }
1801
1802    /// Return a subset of [`Id`]s that should be "Universal", including:
1803    ///
1804    /// - Heads of the master group.
1805    /// - Parents of merges (a merge is an id with multiple parents)
1806    ///   in the MASTER group.
1807    ///
1808    /// See also [`FirstAncestorConstraint::KnownUniversally`].
1809    ///
1810    /// Complexity: `O(flat segments)` for both time and space.
1811    pub fn universal_ids(&self) -> Result<BTreeSet<Id>> {
1812        let mut result = BTreeSet::new();
1813        for seg in self.next_segments(Id::MIN, 0)? {
1814            let parents = seg.parents()?;
1815            // Is it a merge?
1816            if parents.len() >= 2 {
1817                for id in parents {
1818                    debug_assert_eq!(id.group(), Group::MASTER);
1819                    result.insert(id);
1820                }
1821            }
1822        }
1823        for head in self.heads_ancestors(self.master_group()?)? {
1824            debug_assert_eq!(head.group(), Group::MASTER);
1825            result.insert(head);
1826        }
1827        Ok(result)
1828    }
1829}
1830
1831/// There are many `x~n`s that all resolves to a single commit.
1832/// Constraint about `x~n`.
1833#[derive(Clone)]
1834pub enum FirstAncestorConstraint {
1835    /// No constraints.
1836    None,
1837
1838    /// `x` and its slice is expected to be known both locally and remotely.
1839    ///
1840    /// Practically, this means `x` is either:
1841    /// - referred explicitly by `heads`.
1842    /// - a parent of a merge that is an ancestor of `heads`.
1843    ///   (a merge is a vertex with more than one parents)
1844    ///   (at clone and pull time, client gets a sparse idmap including them)
1845    ///
1846    /// This also enforces `x` to be part of `ancestors(heads)`.
1847    KnownUniversally { heads: IdSet },
1848}
1849
1850impl<Store: IdDagStore> IdDag<Store> {
1851    /// Remove `set` and their descendants. Return `descendents(set)`.
1852    ///
1853    /// The returned `descendants(set)` is usually used to remove
1854    /// related entries in the `IdMap` to keep the IdMap and IdDag
1855    /// in sync.
1856    pub(crate) fn strip(&mut self, set: IdSet) -> Result<IdSet> {
1857        fn trace(msg: &dyn Fn() -> String) {
1858            trace!(target: "dag::iddag::remove", "{}", msg());
1859        }
1860
1861        let set = self.descendants(set)?;
1862        trace(&|| format!("descendants(set) = {:?}", &set));
1863
1864        if set.is_empty() {
1865            return Ok(set);
1866        }
1867
1868        // [(segment, new_high)]
1869        let mut to_resize: Vec<(Segment, Option<Id>)> = Vec::new();
1870
1871        for span in set.iter_span_desc() {
1872            trace(&|| format!(" visiting span {:?}", &span));
1873
1874            // span:   (low)    [------------] (high)
1875            // [seg]:       [-------][---][--]
1876            // new_seg:     [--]
1877            for seg in self.iter_segments_descending(span.high, 0)? {
1878                let seg = seg?;
1879                let seg_span = seg.span()?;
1880                debug_assert!(seg_span.high <= span.high); // by iter_segments_descending
1881                if seg_span.high < span.low {
1882                    break;
1883                }
1884
1885                let new_high = if seg_span.low < span.low {
1886                    let new_high = span.low - 1;
1887                    trace(&|| format!("  truncate flat seg {:?} at {}", &seg, new_high));
1888                    Some(new_high)
1889                } else {
1890                    trace(&|| format!("  remove flat seg {:?}", &seg));
1891                    None
1892                };
1893                // It's not possible to truncate a segment twice iterating different `span`s.
1894                // seg:     [------------]
1895                // [span]:     [---]  [--] <- not possible
1896                // This is because "descendants" are selected. If `i` is in the `set`, and `i`
1897                // is in a flat `seg` then `i..=seg.high` should all be in the `set` to remove.
1898                debug_assert!(to_resize.iter().all(|(s, _)| s != &seg));
1899                to_resize.push((seg, new_high));
1900            }
1901        }
1902
1903        // Notes about why we can keep existing SegmentFlags when resizing:
1904        //
1905        // There are only 2 flags: HAS_ROOT, and ONLY_HEAD.
1906        // - HAS_ROOT flag can be preserved. It's based on `parents.is_empty()`
1907        //   which is not changed by resizing.
1908        // - ONLY_HEAD flag can be perserved. Suppose the current flat segment
1909        //   has ONLY_HEAD set (i.e. heads(0:high) = [high]) and is being truncated
1910        //   from low..=high to low..=mid:
1911        //
1912        //          |<-new seg->|<-span (to remove)->|
1913        //          |<-------seg-------->|
1914        //          low-------mid-----high
1915        //
1916        //   descendants(set) is noted as `X` for removal. Now we prove that all
1917        //   `X`s must be > mid. Suppose there is an X that is <= mid, X cannot
1918        //   be in the low--high flat segment otherwise mid will be removed.
1919        //   X must be an ancestor of "high" by ONLY_HEAD definition (otherwise
1920        //   there will be more than 1 heads in 0:high). So there must be a path
1921        //   from X to high but the path does not go through mid. The graph looks
1922        //   like:
1923        //
1924        //          low-------mid--Y--high   (a flat segment)
1925        //                        /
1926        //              X--...----           (X:: are to be removed)
1927        //
1928        //   The problem is that Y, min(X::high & mid::high), is a merge. It
1929        //   contradicts flat segment defination that only "low" can be a merge.
1930        //
1931        //   Therefore X (to be removed) must be > mid. Nothing in 0:mid is removed.
1932        //   Because low..=high is a flat segment, removing `(mid+1):high`
1933        //   is removing a linear sub-graph and cannot increase number of heads.
1934        //   Therefore the new truncated segment low..=mid can still have the
1935        //   ONLY_HEAD flag.
1936
1937        for (seg, new_high) in to_resize {
1938            self.store.resize_flat_segment(&seg, new_high)?;
1939        }
1940
1941        // strip() is not an append-only change. Use an incompatible version.
1942        // However, if it's just for the VIRTUAL group, then do not bump version.
1943        let need_version_bump = match (
1944            set.min().map(|id| id.group()),
1945            set.max().map(|id| id.group()),
1946        ) {
1947            (Some(Group::VIRTUAL), Some(Group::VIRTUAL)) => false,
1948            _ => true,
1949        };
1950        if need_version_bump {
1951            self.version = VerLink::new();
1952        }
1953
1954        Ok(set)
1955    }
1956}
1957
1958impl<Store: Persist + IdDagStore> Persist for IdDag<Store> {
1959    type Lock = <Store as Persist>::Lock;
1960
1961    fn lock(&mut self) -> Result<Self::Lock> {
1962        self.store.lock()
1963    }
1964
1965    fn reload(&mut self, lock: &Self::Lock) -> Result<()> {
1966        self.store.reload(lock)
1967    }
1968
1969    fn persist(&mut self, lock: &Self::Lock) -> Result<()> {
1970        self.store.persist(lock)?;
1971        self.store.cache_verlink(&self.version);
1972        Ok(())
1973    }
1974}
1975
1976impl<Store: IdDagStore> Debug for IdDag<Store> {
1977    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1978        let mut first = true;
1979        for level in 0..=self.max_level().unwrap_or_default() {
1980            if !first {
1981                write!(f, "\n")?;
1982            }
1983            first = false;
1984            write!(f, "Lv{}:", level)?;
1985
1986            for group in Group::ALL.iter() {
1987                let segments = self.next_segments(group.min_id(), level).unwrap();
1988                if !segments.is_empty() {
1989                    for segment in segments {
1990                        write!(f, " {:?}", segment)?;
1991                    }
1992                }
1993            }
1994        }
1995        Ok(())
1996    }
1997}
1998
1999/// Lazily answer `any(...)`, `all(...)`.
2000struct LazyPredicate<P> {
2001    ids: Vec<Id>,
2002    predicate: P,
2003    true_count: usize,
2004    false_count: usize,
2005}
2006
2007impl<P: Fn(Id) -> Result<bool>> LazyPredicate<P> {
2008    pub fn new(ids: Vec<Id>, predicate: P) -> Self {
2009        Self {
2010            ids,
2011            predicate,
2012            true_count: 0,
2013            false_count: 0,
2014        }
2015    }
2016
2017    pub fn any(&mut self) -> Result<bool> {
2018        loop {
2019            if self.true_count > 0 {
2020                return Ok(true);
2021            }
2022            if self.true_count + self.false_count == self.ids.len() {
2023                return Ok(false);
2024            }
2025            self.test_one()?;
2026        }
2027    }
2028
2029    pub fn all(&mut self) -> Result<bool> {
2030        loop {
2031            if self.true_count == self.ids.len() {
2032                return Ok(true);
2033            }
2034            if self.false_count > 0 {
2035                return Ok(false);
2036            }
2037            self.test_one()?;
2038        }
2039    }
2040
2041    fn test_one(&mut self) -> Result<()> {
2042        let i = self.true_count + self.false_count;
2043        if (self.predicate)(self.ids[i])? {
2044            self.true_count += 1;
2045        } else {
2046            self.false_count += 1;
2047        }
2048        Ok(())
2049    }
2050}
2051
2052impl<S: StorageVersion> StorageVersion for IdDag<S> {
2053    fn storage_version(&self) -> (u64, u64) {
2054        self.store.storage_version()
2055    }
2056}
2057
2058fn default_seg_size() -> usize {
2059    DEFAULT_SEG_SIZE.load(Ordering::Acquire)
2060}
2061
2062/// Update global default segment size. The segment size affects high-level
2063/// segments. A level N segment covers at most "segment size" level N-1
2064/// segments.
2065///
2066/// Setting this to a smaller number like 3 can help demonstrate the concept
2067/// using smaller graphs. Panic if `new_size` is less than 2.
2068///
2069/// Existing IdDags are not affected.
2070pub fn set_default_seg_size(new_size: usize) {
2071    DEFAULT_SEG_SIZE.store(new_size, Ordering::Release);
2072}
2073
2074#[cfg(test)]
2075mod tests {
2076    use tempfile::tempdir;
2077
2078    use super::*;
2079    use crate::iddagstore::tests::dump_store_state;
2080    use crate::tests::dbg;
2081    use crate::tests::dbg_iter;
2082    use crate::tests::nid;
2083    use crate::tests::vid;
2084
2085    #[test]
2086    fn test_segment_basic_lookups() {
2087        let dir = tempdir().unwrap();
2088        let mut dag = IdDag::open(dir.path()).unwrap();
2089        assert_eq!(dag.all().unwrap().count(), 0);
2090
2091        let flags = SegmentFlags::empty();
2092
2093        dag.insert(flags, 0, Id::MIN, Id(50), &[]).unwrap();
2094        assert_eq!(dag.all().unwrap().max(), Some(Id(50)));
2095
2096        dag.insert(flags, 0, Id(51), Id(100), &[Id(50)]).unwrap();
2097        assert_eq!(dag.all().unwrap().max(), Some(Id(100)));
2098
2099        dag.insert(flags, 0, Id(101), Id(150), &[]).unwrap();
2100        assert_eq!(dag.all().unwrap().max(), Some(Id(150)));
2101
2102        dag.insert(flags, 1, Id::MIN, Id(150), &[]).unwrap();
2103        assert_eq!(dag.all().unwrap().max(), Some(Id(150)));
2104
2105        // Helper functions to make the below lines shorter.
2106        let low_by_head = |head, level| match dag.find_segment_by_head_and_level(Id(head), level) {
2107            Ok(Some(seg)) => seg.span().unwrap().low.0 as i64,
2108            Ok(None) => -1,
2109            _ => panic!("unexpected error"),
2110        };
2111
2112        let low_by_id = |id| match dag.find_flat_segment_including_id(Id(id)) {
2113            Ok(Some(seg)) => seg.span().unwrap().low.0 as i64,
2114            Ok(None) => -1,
2115            _ => panic!("unexpected error"),
2116        };
2117
2118        assert_eq!(low_by_head(0, 0), -1);
2119        assert_eq!(low_by_head(49, 0), -1);
2120        assert_eq!(low_by_head(50, 0), -1); // 0..=50 is merged into 0..=100.
2121        assert_eq!(low_by_head(51, 0), -1);
2122        assert_eq!(low_by_head(150, 0), 101);
2123        assert_eq!(low_by_head(100, 1), -1);
2124
2125        assert_eq!(low_by_id(0), 0);
2126        assert_eq!(low_by_id(30), 0);
2127        assert_eq!(low_by_id(49), 0);
2128        assert_eq!(low_by_id(50), 0);
2129        assert_eq!(low_by_id(51), 0);
2130        assert_eq!(low_by_id(52), 0);
2131        assert_eq!(low_by_id(99), 0);
2132        assert_eq!(low_by_id(100), 0);
2133        assert_eq!(low_by_id(101), 101);
2134        assert_eq!(low_by_id(102), 101);
2135        assert_eq!(low_by_id(149), 101);
2136        assert_eq!(low_by_id(150), 101);
2137        assert_eq!(low_by_id(151), -1);
2138    }
2139
2140    fn get_parents(id: Id) -> Result<Vec<Id>> {
2141        match id.0 {
2142            0..=2 => Ok(Vec::new()),
2143            _ => Ok(vec![id - 1, Id(id.0 / 2)]),
2144        }
2145    }
2146
2147    #[test]
2148    fn test_sync_reload() {
2149        let dir = tempdir().unwrap();
2150        let mut dag = IdDag::open(dir.path()).unwrap();
2151        assert_eq!(dag.all().unwrap().max(), None);
2152
2153        let lock = dag.lock().unwrap();
2154        dag.reload(&lock).unwrap();
2155        dag.build_segments(Id(0), &get_parents).unwrap();
2156        dag.build_segments(Id(1001), &get_parents).unwrap();
2157
2158        dag.persist(&lock).unwrap();
2159        drop(lock);
2160
2161        assert_eq!(dag.max_level().unwrap(), 3);
2162        assert_eq!(
2163            dag.children(Id(1000).into())
2164                .unwrap()
2165                .iter_desc()
2166                .collect::<Vec<Id>>(),
2167            vec![Id(1001)]
2168        );
2169    }
2170
2171    #[test]
2172    fn test_all() {
2173        let dir = tempdir().unwrap();
2174        let mut dag = IdDag::open(dir.path()).unwrap();
2175        assert!(dag.all().unwrap().is_empty());
2176        dag.build_segments(Id(1001), &get_parents).unwrap();
2177        let all = dag.all().unwrap();
2178        // Id 0 is not referred.
2179        assert_eq!(dbg(&all), "1..=1001");
2180        assert_eq!(all.count(), 1001);
2181
2182        // Insert discontinuous segments.
2183        let mut dag = IdDag::new_in_memory();
2184        // 10..=20
2185        dag.build_segments(Id(20), &|p| {
2186            Ok(if p > Id(10) { vec![p - 1] } else { vec![] })
2187        })
2188        .unwrap();
2189        // 3..=5, and 30..=40
2190        dag.build_segments(Id(40), &|p| {
2191            Ok(if p == Id(30) {
2192                vec![Id(5), Id(20)]
2193            } else if p > Id(3) {
2194                vec![p - 1]
2195            } else {
2196                vec![]
2197            })
2198        })
2199        .unwrap();
2200        let all = dag.all().unwrap();
2201        assert_eq!(dbg(&all), "3 4 5 10..=20 30..=40");
2202        assert_eq!(all.count(), 25);
2203    }
2204
2205    #[test]
2206    fn test_flat_segments() {
2207        let dir = tempdir().unwrap();
2208        let test_dir = tempdir().unwrap();
2209        let mut dag = IdDag::open(dir.path()).unwrap();
2210        let mut test_dag = IdDag::open(test_dir.path()).unwrap();
2211
2212        let empty_dag_segments = dag.flat_segments(Group::MASTER).unwrap();
2213        test_dag
2214            .build_segments_from_prepared_flat_segments(&empty_dag_segments)
2215            .unwrap();
2216        assert!(test_dag.all().unwrap().is_empty());
2217
2218        dag.build_segments(Id(0), &get_parents).unwrap();
2219        dag.build_segments(Id(1001), &get_parents).unwrap();
2220        let flat_segments = dag.flat_segments(Group::MASTER).unwrap();
2221        test_dag
2222            .build_segments_from_prepared_flat_segments(&flat_segments)
2223            .unwrap();
2224
2225        assert_eq!(test_dag.max_level().unwrap(), 3);
2226        assert_eq!(test_dag.all().unwrap().count(), 1002);
2227
2228        let subset_flat_segments = dag
2229            .idset_to_flat_segments(IdSet::from_spans(vec![2..=4]))
2230            .unwrap();
2231        assert_eq!(subset_flat_segments.segments.len(), 3);
2232    }
2233
2234    #[test]
2235    fn test_discontinous_flat_segment_only_head() {
2236        let prepared = PreparedFlatSegments {
2237            segments: vec![
2238                FlatSegment {
2239                    low: Id(0),
2240                    high: Id(10),
2241                    parents: vec![],
2242                },
2243                FlatSegment {
2244                    low: Id(20),
2245                    high: Id(30),
2246                    parents: vec![Id(10)],
2247                },
2248            ]
2249            .into_iter()
2250            .collect(),
2251        };
2252
2253        // Segment 20..=30 shouldn't have the "ONLY_HEAD" flag because of the gap.
2254        // In debug output it does not have "H" prefix.
2255        let mut dag = IdDag::new_in_memory();
2256        dag.build_segments_from_prepared_flat_segments(&prepared)
2257            .unwrap();
2258        let iter = dag.iter_segments_ascending(Id(0), 0).unwrap();
2259        assert_eq!(dbg_iter(iter), "[RH0-10[], 20-30[10]]");
2260    }
2261
2262    #[test]
2263    fn test_roots_max_level_empty() {
2264        // Create segments in a way that the highest level
2265        // contains no segments in the master group.
2266        let mut iddag = IdDag::new_in_memory();
2267        let mut prepared = PreparedFlatSegments {
2268            segments: vec![
2269                FlatSegment {
2270                    low: Id(0),
2271                    high: Id(10),
2272                    parents: vec![],
2273                },
2274                FlatSegment {
2275                    low: nid(0),
2276                    high: nid(4),
2277                    parents: vec![],
2278                },
2279            ]
2280            .into_iter()
2281            .collect(),
2282        };
2283        for i in 1..=3 {
2284            prepared.segments.insert(FlatSegment {
2285                low: nid(5 * i),
2286                high: nid(5 * i + 1),
2287                parents: vec![],
2288            });
2289            prepared.segments.insert(FlatSegment {
2290                low: nid(5 * i + 2),
2291                high: nid(5 * i + 4),
2292                parents: vec![nid(5 * i + 1), nid(5 * i - 1)],
2293            });
2294        }
2295        iddag.set_new_segment_size(2);
2296        iddag
2297            .build_segments_from_prepared_flat_segments(&prepared)
2298            .unwrap();
2299
2300        // The highest level is not 0. But the highest level exist in the non-master
2301        // group, not the master group.
2302        assert_eq!(iddag.max_level().unwrap(), 2);
2303
2304        let all = iddag.all().unwrap();
2305        assert_eq!(dbg(&all), "0..=10 N0..=N19");
2306
2307        let roots = iddag.roots(all).unwrap();
2308        assert_eq!(dbg(roots), "0 N0 N5 N10 N15");
2309    }
2310
2311    #[test]
2312    fn test_strip() {
2313        let mut iddag = IdDag::new_in_memory();
2314        let mut prepared = PreparedFlatSegments::default();
2315        prepared.segments.insert(FlatSegment {
2316            low: Id(0),
2317            high: Id(100),
2318            parents: Vec::new(),
2319        });
2320        prepared.segments.insert(FlatSegment {
2321            low: Id(101),
2322            high: Id(200),
2323            parents: vec![Id(50)],
2324        });
2325        prepared.segments.insert(FlatSegment {
2326            low: Id(201),
2327            high: Id(300),
2328            parents: vec![Id(90)],
2329        });
2330        prepared.segments.insert(FlatSegment {
2331            low: nid(0),
2332            high: nid(100),
2333            parents: vec![Id(50), Id(250)],
2334        });
2335        prepared.segments.insert(FlatSegment {
2336            low: nid(101),
2337            high: nid(200),
2338            parents: vec![Id(50)],
2339        });
2340        iddag.set_new_segment_size(2);
2341        iddag
2342            .build_segments_from_prepared_flat_segments(&prepared)
2343            .unwrap();
2344        let all_before_remove = iddag.all().unwrap();
2345        let removed = iddag.strip(Id(70).into()).unwrap();
2346        let all_after_remove = iddag.all().unwrap();
2347        assert_eq!(
2348            all_before_remove.difference(&removed).as_spans(),
2349            all_after_remove.as_spans()
2350        );
2351        assert_eq!(
2352            all_after_remove.union(&removed).as_spans(),
2353            all_before_remove.as_spans()
2354        );
2355        assert_eq!(dbg(&removed), "70..=100 201..=300 N0..=N100");
2356        assert_eq!(
2357            dump_store_state(&iddag.store, &all_before_remove),
2358            "\nLv0: RH0-69[], 101-200[50], N101-N200[50]\nP->C: 50->101, 50->N101"
2359        );
2360    }
2361
2362    #[test]
2363    fn test_virtual() -> Result<()> {
2364        let dir = tempdir()?;
2365        let mut iddag = IdDag::open(dir.path())?;
2366        let mut prepared = PreparedFlatSegments::default();
2367        prepared.segments.insert(FlatSegment {
2368            low: nid(0),
2369            high: nid(5),
2370            parents: Vec::new(),
2371        });
2372        prepared.segments.insert(FlatSegment {
2373            low: vid(0),
2374            high: vid(3),
2375            parents: vec![nid(2)],
2376        });
2377        prepared.segments.insert(FlatSegment {
2378            low: vid(5),
2379            high: vid(8),
2380            parents: vec![nid(3), vid(2)],
2381        });
2382        iddag.build_segments_from_prepared_flat_segments(&prepared)?;
2383
2384        // all() skips VIRTUAL.
2385        assert_eq!(dbg(iddag.all()?), "N0..=N5");
2386
2387        // all_with_virtual() includes VIRTUAL.
2388        assert_eq!(dbg(iddag.all_with_virtual()?), "N0..=N5 V0..=V3 V5..=V8");
2389
2390        // children() follows into VIRTUAL.
2391        assert_eq!(dbg(iddag.children(nid(3).into())?), "N4 V5");
2392        assert_eq!(dbg(iddag.children_id(nid(3))?), "N4 V5");
2393        assert_eq!(dbg(iddag.children_set(nid(3).into())?), "N4 V5");
2394        assert_eq!(dbg(iddag.children_id(vid(2))?), "V3 V5");
2395        assert_eq!(dbg(iddag.children_set(vid(2).into())?), "V3 V5");
2396
2397        // descenants() follows into VIRTUAL.
2398        assert_eq!(
2399            dbg(iddag.descendants(nid(2).into())?),
2400            "N2..=N5 V0..=V3 V5..=V8"
2401        );
2402        assert_eq!(dbg(iddag.descendants(vid(2).into())?), "V2 V3 V5..=V8");
2403
2404        // range() works with VIRTUAL.
2405        assert_eq!(
2406            dbg(iddag.range(nid(1).into(), vid(2).into())?),
2407            "N1 N2 V0 V1 V2"
2408        );
2409
2410        // Reloading drops VIRTUAL.
2411        {
2412            let lock = iddag.lock()?;
2413            iddag.persist(&lock)?;
2414            let iddag2 = IdDag::open(dir.path())?;
2415            assert_eq!(dbg(iddag2.all_with_virtual()?), "N0..=N5");
2416        }
2417
2418        Ok(())
2419    }
2420
2421    #[test]
2422    fn test_id_set_to_id_segments() {
2423        let mut iddag = IdDag::new_in_memory();
2424
2425        // Insert some segments. Create a few levels.
2426        let mut prepared = PreparedFlatSegments::default();
2427        for g in &Group::ALL {
2428            let mut parents = vec![];
2429            for i in 0..=3 {
2430                let base = g.min_id() + 10 * i;
2431                prepared.segments.insert(FlatSegment {
2432                    low: base,
2433                    high: base + 4,
2434                    parents: parents.clone(),
2435                });
2436                prepared.segments.insert(FlatSegment {
2437                    low: base + 5,
2438                    high: base + 9,
2439                    parents: vec![base + 1, base + 4],
2440                });
2441                parents.push(base + 9);
2442            }
2443        }
2444        iddag.set_new_segment_size(2);
2445        iddag
2446            .build_segments_from_prepared_flat_segments(&prepared)
2447            .unwrap();
2448        // The highest level is not 0.
2449        assert_eq!(iddag.max_level().unwrap(), 2);
2450
2451        // Tests about id_set_to_id_segments_with_max_level.
2452        let t = |id_set, level| -> Vec<String> {
2453            let id_segs = iddag
2454                .id_set_to_id_segments_with_max_level(&id_set, level)
2455                .unwrap();
2456
2457            // Verify against other special-case APIs.
2458            if level >= iddag.max_level().unwrap() {
2459                let id_segs2 = iddag.id_set_to_id_segments(&id_set).unwrap();
2460                assert_eq!(&id_segs, &id_segs2);
2461            }
2462            if level == 0 {
2463                let flat_segments = iddag.idset_to_flat_segments(id_set).unwrap().segments;
2464                let id_segs2: Vec<IdSegment> = flat_segments
2465                    .into_iter()
2466                    .rev()
2467                    .map(|f| IdSegment {
2468                        high: f.high,
2469                        low: f.low,
2470                        parents: f.parents.clone(),
2471                        level: 0,
2472                        has_root: f.parents.is_empty(),
2473                    })
2474                    .collect();
2475                assert_eq!(&id_segs, &id_segs2);
2476            }
2477
2478            id_segs.into_iter().map(dbg).collect::<Vec<_>>()
2479        };
2480
2481        // Match a flat segment.
2482        assert_eq!(t(IdSet::from_spans(vec![10..=14]), 3), ["L0 10..=14 [9]"]);
2483
2484        // Match multiple flat segments.
2485        assert_eq!(
2486            t(IdSet::from_spans(vec![20..=29]), 3),
2487            ["L0 25..=29 [21, 24]", "L0 20..=24 [9, 19]"]
2488        );
2489
2490        // Match partial flat segments.
2491        assert_eq!(
2492            t(IdSet::from_spans(vec![21..=28]), 3),
2493            ["L0 25..=28 [21, 24]", "L0 21..=24 [20]"]
2494        );
2495
2496        // Match a high level segment.
2497        assert_eq!(t(IdSet::from_spans(vec![0..=24]), 3), ["L2 0..=24 []R"]);
2498
2499        // Respect max_level.
2500        assert_eq!(
2501            t(IdSet::from_spans(vec![0..=24]), 1),
2502            ["L1 15..=24 [11, 14, 9]", "L1 0..=14 []R"]
2503        );
2504
2505        // Match various segments. Pick the highest level possible.
2506        assert_eq!(
2507            t(IdSet::from_spans(vec![Id(0)..=Id(39), nid(0)..=nid(39)]), 3),
2508            [
2509                "L0 N35..=N39 [N31, N34]",
2510                "L0 N30..=N34 [N9, N19, N29]",
2511                "L1 N20..=N29 [N9, N19]",
2512                "L2 N0..=N19 []R",
2513                "L0 35..=39 [31, 34]",
2514                "L1 25..=34 [21, 24, 9, 19]",
2515                "L2 0..=24 []R"
2516            ]
2517        );
2518
2519        // Related APIs not covered by tests above.
2520        let high_level_id_seg = iddag
2521            .id_set_to_id_segments(&IdSet::from_spans(vec![0..=39]))
2522            .unwrap()
2523            .back()
2524            .unwrap()
2525            .clone();
2526        assert_eq!(dbg(&high_level_id_seg), "L2 0..=24 []R");
2527        let low_level_id_segs = iddag
2528            .id_segment_to_lower_level_id_segments(&high_level_id_seg)
2529            .unwrap();
2530        assert_eq!(
2531            dbg(&low_level_id_segs),
2532            "[L1 15..=24 [11, 14, 9], L1 0..=14 []R]"
2533        );
2534    }
2535}