monument 0.13.4

A fast and flexible composing engine.
Documentation
use datasize::DataSize;

use super::graph::{StartIdx, SuccIdx};

/// A container of prefix paths, stored as a linked-list style tree such that common prefixes are
/// only stored once.
#[derive(Debug)]
pub(super) struct Paths {
    /// Flat vector of [`PathNode`]s, stored compactly as 64-bit numbers.  The bits are used as
    /// follows:
    ///
    /// |   Bits   | [`PathNode::Start`] |  [`PathNode::Cons`]  | [`PathNode::Empty`] |
    /// |----------|---------------------|----------------------|---------------------|
    /// | ` 0..32` |        zeros        | `last` ([`PathId`])  | `next` ([`PathId`]) |
    /// | `32..63` |     [`StartIdx`]    | `succ` ([`SuccIdx`]) |        zeros        |
    /// | `63..64` |        zero         |         zero         |         one         |
    nodes: index_vec::IndexVec<PathId, u64>,
    /// The first `num_starts` elements of `nodes` are all [`PathNode::Start`]s
    num_starts: usize,
    /// The index of an [`PathNode::Empty`] node.  The empty nodes form a linked list, of which
    /// this is the head.
    first_empty: PathId,

    /// Number of non-[`PathNode::Empty`] nodes
    size: usize,
}

#[derive(Debug)]
enum PathNode {
    /// This node has no predecessors and is the start of a path
    Start(StartIdx),
    /// This node extends the `PathNode` at the `usize` index by taking the [`SuccIdx`]th successor
    /// of the currently reached node.
    Cons { last: PathId, succ: SuccIdx },
    /// This slot is empty and the next empty slot is at `next`
    Empty { next: PathId },
}

impl Paths {
    /// Crates an empty set of [`Paths`].
    pub(super) fn new() -> Self {
        Self {
            nodes: index_vec::IndexVec::new(),
            num_starts: 0,
            first_empty: NULL_NODE_IDX, // No empty nodes in the linked list
            size: 0,
        }
    }

    /// Add a new [`PathNode::Start`].
    ///
    /// # Panics
    ///
    /// Panics if any non-start nodes have been `add`ed
    pub(super) fn add_start(&mut self, start_idx: StartIdx) -> PathId {
        assert_eq!(
            self.nodes.len(),
            self.num_starts,
            "`add_start` called after other nodes were `add`ed."
        );
        self.size += 1;
        self.num_starts += 1;
        self.nodes.push(as_high_31_bits(start_idx.raw()))
    }

    /// Adds a new [`PathNode::Cons`] to this set of paths, returning its [`PathId`].
    pub(super) fn add(&mut self, last: PathId, succ: SuccIdx) -> PathId {
        // Generate the node data
        let mut node_data = last.raw() as u64; // Put `last` into the low 32 bits
        node_data |= as_high_31_bits(succ.raw()); // Put `succ` into the high 31 bits.  Note that
                                                  // this also leaves the 'empty' bit still as `0`

        self.size += 1; // It doesn't matter how the new node is added, the size always increases

        if self.first_empty == NULL_NODE_IDX {
            // No more empty nodes, so push to the vector
            self.nodes.push(node_data)
        } else {
            // Get the index of the first empty node, and remove it from the head of the empty list
            let empty_idx = self.first_empty;
            let next_empty = match self.get(empty_idx) {
                PathNode::Empty { next } => next,
                _ => panic!("`next_empty` pointed to a non-empty node"),
            };
            self.first_empty = next_empty;
            // Replace the empty node with the new node
            self.nodes[empty_idx] = node_data;
            empty_idx
        }
    }

    /// Run a garbage collection pass to mark any unreachable nodes as 'empty'
    pub(super) fn gc(&mut self, heads: impl IntoIterator<Item = PathId>) {
        // Mark which nodes are reachable, by marking all the non-start nodes as 'empty' then
        // marking any node reachable from `heads` as non-empty.  Start nodes cannot be GCed.
        for node_data in self.nodes.iter_mut().skip(self.num_starts) {
            *node_data |= EMPTY_BIT_MASK;
        }
        for head in heads {
            self.mark_path_as_non_empty(head);
        }
        // Rebuild the empty node list and count the number of non-empty nodes
        self.size = self.nodes.len();
        self.first_empty = NULL_NODE_IDX;
        for (idx, node_data) in self.nodes.iter_mut_enumerated().skip(self.num_starts) {
            let is_empty = *node_data & EMPTY_BIT_MASK != 0;
            if is_empty {
                // Push this node to the `empty` list
                *node_data = EMPTY_BIT_MASK | (self.first_empty.raw() as u64);
                self.first_empty = idx;
                self.size -= 1;
            }
        }
    }

    /// Mark this [`PathId`] and everything it reaches as 'non-empty'
    fn mark_path_as_non_empty(&mut self, head: PathId) {
        // Skip this if we've already marked this node (and its descendants) as reachable
        if self.nodes[head] & EMPTY_BIT_MASK == 0 {
            return;
        }
        // Set this node's 'empty' bit to `0` (it would have been set to `1` at the start of the GC
        // pass)
        self.nodes[head] &= !EMPTY_BIT_MASK;
        match self.get(head) {
            PathNode::Start(_) => {} // We've finished this linked list
            PathNode::Cons { last, succ: _ } => self.mark_path_as_non_empty(last),
            PathNode::Empty { .. } => unreachable!("We just set the 'empty' bit to 0"),
        }
    }

    /// Return the path which finishes at a given [`PathId`].
    pub(super) fn flatten(&self, node_idx: PathId) -> (StartIdx, Vec<SuccIdx>) {
        let mut succs = Vec::new();
        let start_idx = self.flatten_recursive(node_idx, &mut succs);
        (start_idx, succs)
    }

    fn flatten_recursive(&self, node_idx: PathId, succs: &mut Vec<SuccIdx>) -> StartIdx {
        match self.get(node_idx) {
            PathNode::Start(start_idx) => start_idx,
            PathNode::Cons { last, succ } => {
                let start_idx = self.flatten_recursive(last, succs);
                succs.push(succ);
                start_idx
            }
            PathNode::Empty { .. } => panic!("GCed nodes shouldn't be in a path"),
        }
    }

    /// Gets the [`PathNode`] at a given [`PathId`].
    fn get(&self, idx: PathId) -> PathNode {
        let data = self.nodes[idx];
        // Deconstruct the 64-bit value
        let is_empty = (data & EMPTY_BIT_MASK) != 0; // Node is `Empty` iff `EMPTY_BIT` is 1
        let is_start = idx.index() < self.num_starts; // Node is `Start` if it's within `num_starts`
        let low_32_bits = PathId::from_raw(data as u32); // Read bits `0..32`
        let high_31_bits = (data >> 32) as u32 & 0x7FFF_FFFF; // Read bits `32..63`

        // Construct node
        if is_start {
            assert!(!is_empty); // Start nodes shouldn't be GCed
            PathNode::Start(StartIdx::from_raw(high_31_bits))
        } else if is_empty {
            PathNode::Empty { next: low_32_bits }
        } else {
            PathNode::Cons {
                last: low_32_bits,
                succ: SuccIdx::from_raw(high_31_bits),
            }
        }
    }

    #[allow(dead_code)]
    fn debug_nodes(&self) {
        for (idx, data) in self.nodes.iter_enumerated() {
            let empty_bit = data >> 63;
            let high_31_bits = (data >> 32) & 0x7FFF_FFFF;
            let low_32_bits = *data as u32;
            println!(
                "{:>4?}: {} {:031b} {:032b} -> {:?}",
                idx.raw(),
                empty_bit,
                high_31_bits,
                low_32_bits,
                self.get(idx)
            );
        }
    }
}

impl DataSize for Paths {
    const IS_DYNAMIC: bool = true;

    const STATIC_HEAP_SIZE: usize = 0;

    fn estimate_heap_size(&self) -> usize {
        std::mem::size_of::<Self>() + self.nodes.len() * std::mem::size_of::<u64>()
    }
}

#[track_caller]
fn as_high_31_bits(v: u32) -> u64 {
    assert_eq!(
        v & (1 << 31),
        0,
        "High 31-bit values should be smaller than 2^31"
    );
    (v as u64) << 32
}

const EMPTY_BIT_MASK: u64 = 1 << 63; // Bits `63..64`

index_vec::define_index_type! {
    #[derive(DataSize)]
    pub struct PathId = u32;
}
const NULL_NODE_IDX: PathId = PathId::from_raw_unchecked(u32::MAX);