intrex 0.1.1

Intrusive doubly-linked lists with items addressed by indices
Documentation
//! Sorting a singly-linked list
use arrayvec::ArrayVec;

/// Provides methods to manipulate a singly-linked list for comparison sort
/// (e.g., [`sort`]).
pub trait SortAccessor<Index> {
    /// Get the next item of `index` in the list.
    fn next(&mut self, index: Index) -> Option<Index>;

    /// Set the next item of `index` in the list.
    fn set_next(&mut self, index: Index, next: Option<Index>);

    /// Check if the value at `lhs` is less than the one at `rhs`.
    fn lt(&mut self, lhs: Index, rhs: Index) -> bool;
}

/// Sort the elements in a singly-linked list.
///
/// `first_i` specifies the first element of the list. Returns the new first
/// element.
///
/// This is a stable sort.
///
/// This function is moderately stack intensive - it requires a stack space of
/// at least `(size_of::<Index>() + 1) * usize::BITS` bytes (not counting
/// padding).
///
/// The current implementation is based on:
///
/// > Munro, J. Ian, and Sebastian Wild. "[Nearly-optimal mergesorts: Fast,
/// > practical sorting methods that optimally adapt to existing runs][1]."
/// > *arXiv preprint arXiv:1805.04154* (2018).
///
/// [1]: https://arxiv.org/abs/1805.04154
pub fn sort<Index>(acc: &mut impl SortAccessor<Index>, first_i: Option<Index>) -> Option<Index>
where
    Index: Clone,
{
    struct Fragment<Index> {
        first_i: Index,
        power: u8,
    }

    let mut fragments: ArrayVec<Fragment<Index>, { usize::BITS as usize }> = <_>::default();

    let mut cursor_i = first_i;
    let mut cursor_pos = 0;

    let mut run = find_run(acc, &mut cursor_i, &mut cursor_pos)?;

    let global_len =
        run.len + core::iter::successors(cursor_i.clone(), |i| acc.next(i.clone())).count();

    while let Some(next_run) = find_run(acc, &mut cursor_i, &mut cursor_pos) {
        let power = power(run.start_pos, run.len, next_run.len, global_len);

        let mut first_i = run.first_i;

        while let Some(fragment) = fragments.last()
            && fragment.power > power
        {
            first_i = merge(acc, fragment.first_i.clone(), first_i);
            fragments.pop();
        }

        fragments.push(Fragment { first_i, power });

        debug_assert!(fragments.len() <= global_len.ilog2() as usize + 2);

        run = next_run;
    }

    // Should have visited all input elements
    debug_assert_eq!(global_len, cursor_pos);
    debug_assert!(cursor_i.is_none());

    let mut first_i = run.first_i;

    while let Some(fragment) = fragments.pop() {
        first_i = merge(acc, fragment.first_i.clone(), first_i);
    }

    Some(first_i)
}

struct Run<Index> {
    first_i: Index,
    start_pos: usize,
    len: usize,
}

/// Find the next run and advance `*p_cursor_i` and `*p_cursor_pos`.
///
/// Returns `None` if and only if `p_cursor_i == None`.
#[inline]
fn find_run<Index>(
    acc: &mut impl SortAccessor<Index>,
    p_cursor_i: &mut Option<Index>,
    p_cursor_pos: &mut usize,
) -> Option<Run<Index>>
where
    Index: Clone,
{
    let start_pos = *p_cursor_pos;

    // Initialize the run with `*p_cursor`
    let mut len = 1;
    let first_i = p_cursor_i.clone()?;
    let mut last = first_i.clone();

    *p_cursor_i = acc.next(last.clone());
    *p_cursor_pos += 1;

    // Try to add as many elements as possible without breaking the run
    while let Some(next) = p_cursor_i.clone() {
        // If `next < last`, the run is broken
        if acc.lt(next.clone(), last.clone()) {
            acc.set_next(last, None);
            break;
        }

        // Add `next` to the run
        len += 1;
        last = next;
        *p_cursor_i = acc.next(last.clone());
        *p_cursor_pos += 1;
    }

    Some(Run {
        first_i,
        start_pos,
        len,
    })
}

fn merge<Index>(acc: &mut impl SortAccessor<Index>, i1: Index, i2: Index) -> Index
where
    Index: Clone,
{
    let (first_i, mut maybe_i1, mut maybe_i2) = if acc.lt(i2.clone(), i1.clone()) {
        (i2.clone(), Some(i1), acc.next(i2))
    } else {
        (i1.clone(), acc.next(i1), Some(i2))
    };

    let mut last_i = first_i.clone();

    loop {
        match (maybe_i1.clone(), maybe_i2.clone()) {
            (Some(i1), Some(i2)) => {
                if acc.lt(i2.clone(), i1.clone()) {
                    acc.set_next(last_i, Some(i2.clone()));
                    last_i = i2.clone();
                    maybe_i2 = acc.next(i2);
                } else {
                    acc.set_next(last_i, Some(i1.clone()));
                    last_i = i1.clone();
                    maybe_i1 = acc.next(i1);
                }
            }
            (Some(i), None) | (None, Some(i)) => {
                acc.set_next(last_i, Some(i));
                break;
            }
            (None, None) => unreachable!(),
        }
    }

    first_i
}

fn power(start_pos: usize, len0: usize, len1: usize, global_len: usize) -> u8 {
    debug_assert!(start_pos + len0 + len1 <= global_len);
    debug_assert_ne!(len0, 0);
    debug_assert_ne!(len1, 0);

    let mut mid0 = 2 * start_pos + len0;
    let mut mid1 = mid0 + len0 + len1;

    let mut power = 0;
    loop {
        if mid0 >= global_len {
            mid0 -= global_len;
            mid1 -= global_len;
        } else if mid1 >= global_len {
            return power;
        }
        mid0 <<= 1;
        mid1 <<= 1;
        power += 1;
    }
}

#[cfg(test)]
mod tests {
    use std::vec::Vec;

    use super::*;
    use derive_more::{From, Into};
    use proptest::prelude::*;
    use typed_index_collections::TiVec;

    #[derive(Debug)]
    struct Node {
        value: (u8, u8),
        next: Option<NodeI>,
        visited: bool,
    }

    #[derive(Debug, Clone, Copy, PartialEq, Eq, From, Into)]
    struct NodeI(usize);

    struct NodeAccessor<'a> {
        nodes: &'a mut TiVec<NodeI, Node>,
        num_comparisons: &'a mut usize,
    }

    impl SortAccessor<NodeI> for NodeAccessor<'_> {
        fn next(&mut self, index: NodeI) -> Option<NodeI> {
            self.nodes[index].next
        }

        fn set_next(&mut self, index: NodeI, next: Option<NodeI>) {
            self.nodes[index].next = next;
        }

        fn lt(&mut self, lhs: NodeI, rhs: NodeI) -> bool {
            *self.num_comparisons += 1;
            let [(lhs, _), (rhs, _)] = [lhs, rhs].map(|i| self.nodes[i].value);
            lhs < rhs
        }
    }

    #[proptest::property_test]
    fn pt_sort(
        #[strategy = prop::collection::vec((0..16_u8, any::<u8>()), 0..100)] vec: Vec<(u8, u8)>,
    ) {
        let mut nodes = TiVec::with_capacity(vec.len());

        // Build a list
        let mut first_i = None;
        for &value in vec.iter().rev() {
            first_i = Some(nodes.push_and_get_key(Node {
                value,
                next: first_i,
                visited: false,
            }));
        }

        // Sort it
        let mut num_comparisons = 0;
        first_i = sort(
            &mut NodeAccessor {
                nodes: &mut nodes,
                num_comparisons: &mut num_comparisons,
            },
            first_i,
        );

        // Check it
        let mut got_sorted = Vec::with_capacity(vec.len());
        {
            let mut maybe_i = first_i;
            while let Some(i) = maybe_i {
                let node = &mut nodes[i];

                assert!(!node.visited, "cycle was found in sorted list:\n{nodes:?}");
                node.visited = true;

                got_sorted.push(node.value);

                maybe_i = node.next;
            }
        }

        let mut expected_sorted = vec;
        expected_sorted.sort_by_key(|&(x, _)| x);

        assert_eq!(got_sorted, expected_sorted);

        // Check time complexity
        if !nodes.is_empty() {
            assert!(num_comparisons <= nodes.len() * (nodes.len().ilog2() as usize + 1));
        }
    }
}