tokitai-dl 0.1.2

Vendored, additive bridge between tokitai-operator (verified DL kernels) and god-graph (DL model analysis). 100% non-invasive position was deliberately broken in 0.1.1 — see ADR-0004 for the rationale.
Documentation
//! Bridge between tokitai's `FiniteSite` and a god-graph
//! `Partitioner`.
//!
//! god-graph's `src/parallel/partitioner.rs` is on disk but is **not**
//! exposed as `pub mod parallel` in `god-graph/src/lib.rs`. The
//! `parallel-cluster` Cargo feature pulls in some `rayon` machinery
//! but does not yet expose a public `Partitioner` trait. Until
//! god-graph does, tokitai-dl ships a *local* [`Partitioner`] trait
//! and an [`FiniteSitePartitioner`] newtype that implements it for
//! [`tokitai_operator::object::sheaf::FiniteSite`].
//!
//! # Migration path
//!
//! When god-graph exposes `pub mod parallel` and a public
//! `Partitioner` trait, the local trait here should be replaced
//! with `impl god_graph::parallel::Partitioner for FiniteSite`,
//! and `FiniteSitePartitioner` can become a type alias. See
//! `docs/KNOWN_LIMITATIONS.md` for the tracking entry.
//!
//! # What the partitioner does
//!
//! Given a `FiniteSite` with `n` opens, every `OpenId` becomes one
//! partition. Items in the input slice are assigned to partitions
//! based on which `OpenId` they "belong to". Because the
//! `tokitai_operator::object::sheaf` types do not yet carry an
//! `item -> OpenId` mapping, the default implementation uses the
//! supplied [`SectionTable`] when one is present, and falls back to
//! a deterministic round-robin assignment otherwise.
//!
//! This module is gated on the `operator` feature only (it does
//! not require god-graph).

use tokitai_operator::object::sheaf::{FiniteSite, OpenId, SectionTable};

/// A partitioner splits a slice of items into a `Vec` of partition
/// index lists.
///
/// `partition(&[items])` returns one `Vec<usize>` per partition;
/// each inner `Vec` is the indices of `items` that ended up in
/// that partition. `num_partitions()` returns the number of
/// partitions (equal to `partition(&[]).len()` for any non-empty
/// site).
///
/// # Why this trait is local
///
/// See the module-level doc comment. god-graph's
/// `parallel::partitioner` module is not yet `pub`; the moment it
/// is, this trait can be replaced with
/// `impl god_graph::parallel::Partitioner for FiniteSite`.
///
/// # Example
///
/// ```rust
/// # #[cfg(feature = "operator")]
/// # fn demo() {
/// use tokitai_dl::sheaf_partitioner::{FiniteSitePartitioner, Partitioner};
/// use tokitai_operator::object::sheaf::{FiniteSite, OpenId};
///
/// // Build a 2-open site, partition a slice of usize items round-robin.
/// let site = FiniteSite::new(vec![OpenId("a".into()), OpenId("b".into())], vec![]);
/// let p = FiniteSitePartitioner::new(site);
/// let parts: Vec<Vec<usize>> = p.partition(&[10, 20, 30, 40]);
/// assert_eq!(p.num_partitions(), 2);
/// assert_eq!(parts[0], vec![0, 2]); // items 0, 2 -> partition 0
/// assert_eq!(parts[1], vec![1, 3]); // items 1, 3 -> partition 1
/// # }
/// ```
pub trait Partitioner {
    /// The element type being partitioned. Typically a tensor id
    /// (`usize`) or a section value.
    type Item;

    /// Partition `items` into `num_partitions()` buckets and
    /// return one `Vec<usize>` of indices per partition.
    fn partition(&self, items: &[Self::Item]) -> Vec<Vec<usize>>;

    /// Number of partitions the partitioner produces. Equal to
    /// `partition(&[]).len()` for any non-empty site (an empty
    /// site has zero partitions and zero opens).
    fn num_partitions(&self) -> usize;
}

/// A `Partitioner` whose partitions are the opens of a
/// `tokitai FiniteSite`.
///
/// Construct via [`FiniteSitePartitioner::new`] (round-robin
/// assignment) or [`FiniteSitePartitioner::with_section_table`]
/// (deterministic assignment driven by the table's `OpenId`
/// ordering).
pub struct FiniteSitePartitioner {
    /// The site whose opens become the partitions.
    site: FiniteSite,
    /// Optional table that drives assignment. When `Some(table)`,
    /// an item with value `v` is assigned to the partition whose
    /// `OpenId` is the `table.get(&v)`-th key in the BTreeMap's
    /// sorted order. When `None`, the partitioner falls back to
    /// deterministic round-robin.
    table: Option<SectionTable<usize>>,
}

impl FiniteSitePartitioner {
    /// Create a new partitioner over the opens of `site` with no
    /// `SectionTable`. Items are assigned round-robin.
    ///
    /// # Example
    ///
    /// ```rust
    /// # #[cfg(feature = "operator")]
    /// # fn demo() {
    /// use tokitai_dl::sheaf_partitioner::{FiniteSitePartitioner, Partitioner};
    /// use tokitai_operator::object::sheaf::{FiniteSite, OpenId};
    ///
    /// let site = FiniteSite::new(vec![OpenId("a".into()), OpenId("b".into())], vec![]);
    /// let p = FiniteSitePartitioner::new(site);
    /// // Six items spread round-robin over two partitions: [0, 2, 4] and [1, 3, 5].
    /// let parts = p.partition(&[10, 11, 12, 13, 14, 15]);
    /// assert_eq!(parts[0], vec![0, 2, 4]);
    /// assert_eq!(parts[1], vec![1, 3, 5]);
    /// # }
    /// ```
    pub fn new(site: FiniteSite) -> Self {
        Self { site, table: None }
    }

    /// Create a new partitioner over the opens of `site` driven by
    /// `table`. The `SectionTable` keys are sorted by `OpenId`
    /// ordering, so a value of `v` lands in the partition at
    /// `table.get(&open_i).unwrap_or(0) % num_opens` where `open_i`
    /// is the i-th `OpenId` returned by `site.opens()`.
    ///
    /// # Example
    ///
    /// ```rust
    /// # #[cfg(feature = "operator")]
    /// # fn demo() {
    /// use tokitai_dl::sheaf_partitioner::{FiniteSitePartitioner, Partitioner};
    /// use tokitai_operator::object::sheaf::{FiniteSite, OpenId, SectionTable};
    ///
    /// let site = FiniteSite::new(
    ///     vec![OpenId("a".into()), OpenId("b".into()), OpenId("c".into())],
    ///     vec![],
    /// );
    /// let mut table: SectionTable<usize> = SectionTable::new();
    /// table.insert(OpenId("a".into()), 100);
    /// table.insert(OpenId("b".into()), 200);
    /// table.insert(OpenId("c".into()), 300);
    /// let p = FiniteSitePartitioner::with_section_table(site, table);
    /// // Each item lands in the partition whose open maps to it.
    /// let parts = p.partition(&[100, 200, 300]);
    /// assert_eq!(parts[0], vec![0]); // 100 -> OpenId "a" -> partition 0
    /// assert_eq!(parts[1], vec![1]); // 200 -> OpenId "b" -> partition 1
    /// assert_eq!(parts[2], vec![2]); // 300 -> OpenId "c" -> partition 2
    /// # }
    /// ```
    pub fn with_section_table(site: FiniteSite, table: SectionTable<usize>) -> Self {
        Self {
            site,
            table: Some(table),
        }
    }

    /// Borrow the underlying site.
    ///
    /// # No example
    ///
    /// Trivial getter: returns a shared reference to the inner
    /// `FiniteSite`. The function is too small to add a runnable
    /// example that would not duplicate the `with_section_table`
    /// example above.
    pub fn site(&self) -> &FiniteSite {
        &self.site
    }

    /// Borrow the optional `SectionTable` driving the assignment.
    ///
    /// # No example
    ///
    /// Trivial getter: returns `Some(table)` if a `SectionTable` was
    /// provided at construction time, else `None`. See
    /// `FiniteSitePartitioner::with_section_table` for a runnable
    /// example that exercises this accessor.
    pub fn table(&self) -> Option<&SectionTable<usize>> {
        self.table.as_ref()
    }
}

impl Partitioner for FiniteSitePartitioner {
    type Item = usize;

    fn num_partitions(&self) -> usize {
        self.site.opens.len()
    }

    fn partition(&self, items: &[Self::Item]) -> Vec<Vec<usize>> {
        let n = self.num_partitions();
        let mut out: Vec<Vec<usize>> = vec![Vec::new(); n];
        if n == 0 {
            return out;
        }

        match &self.table {
            Some(table) => {
                // The site opens are stored as a `Vec<OpenId>` (not
                // a `BTreeMap`), but `SectionTable::keys()` is
                // sorted because the backing store is a
                // `BTreeMap<OpenId, T>`. We sort the site opens
                // once so that the partition index is stable.
                let mut sorted_opens: Vec<&OpenId> = self.site.opens.iter().collect();
                sorted_opens.sort();
                for (idx, item) in items.iter().enumerate() {
                    // Walk every open in sorted order. The first
                    // open whose stored value equals the item
                    // wins. Items that do not appear in the table
                    // fall back to round-robin so the partitioner
                    // is total.
                    let mut placed = false;
                    for (rank, open) in sorted_opens.iter().enumerate() {
                        if table.get(open) == Some(item) {
                            out[rank].push(idx);
                            placed = true;
                            break;
                        }
                    }
                    if !placed {
                        out[idx % n].push(idx);
                    }
                }
            }
            None => {
                for (idx, _item) in items.iter().enumerate() {
                    out[idx % n].push(idx);
                }
            }
        }

        out
    }
}