Skip to main content

feral_scotch/
lib.rs

1//! SCOTCH-style nested-dissection fill-reducing ordering.
2//!
3//! Clean-room pure-Rust implementation of the algorithms described in
4//! Pellegrini, "SCOTCH: A software package for static mapping by dual
5//! recursive bipartitioning of process and architecture graphs"
6//! (HPCN Europe, 1996), §3. This crate provides an alternative to
7//! [`feral_metis`](../feral_metis) with:
8//!
9//! - Graph compression (supervariable merging before partitioning).
10//! - Direct vertex-separator FM (tighter separators than edge-cut
11//!   conversion on structured meshes).
12//! - Adaptive refinement (boundary / halo FM). A band-FM refiner
13//!   (`band_fm`) is implemented and unit-tested but is not yet wired
14//!   into the default ND driver.
15//!
16//! The public surface conforms to the FERAL ordering-crate contract
17//! (`dev/plans/ordering-crate-contract.md`): [`CscPattern`],
18//! [`OrderingStats`], [`OrderingError`], and [`CONTRACT_VERSION`] are
19//! re-exported from `feral-ordering-core`.
20//!
21//! **Status: S1–S5 complete.** [`scotch_order`] and
22//! [`scotch_order_full`] run the full pipeline — graph compression,
23//! connected-component split, multilevel coarsening, best-of-
24//! `n_sep_trials` initial bisection, halo-FM uncoarsening, direct
25//! vertex separator, and recursive ND with an AMD leaf fallback.
26//!
27//! ## Clean-room provenance
28//!
29//! No code is copied or paraphrased from SCOTCH's C source
30//! (`libscotch/`, CeCILL-C). Algorithms are reconstructed from
31//! Pellegrini 1996 §3 and the research notes under `dev/research/`.
32//! Constants, data layouts, and hash choices are independently
33//! justified and documented per-module.
34
35#![forbid(unsafe_code)]
36#![deny(missing_docs)]
37
38#[allow(dead_code)]
39mod band_fm;
40#[allow(dead_code)]
41mod compress;
42#[allow(dead_code)]
43mod graph;
44#[allow(dead_code)]
45mod halo_fm;
46mod node_nd;
47#[allow(dead_code)]
48mod vertex_separator;
49
50#[cfg(test)]
51mod test_util;
52
53pub use feral_ordering_core::{CscPattern, OrderingError, OrderingStats, CONTRACT_VERSION};
54
55/// Tunable parameters for SCOTCH nested-dissection ordering.
56///
57/// Defaults mirror SCOTCH 7.0's vertex-separation ordering defaults as
58/// documented in the audit of `dev/plans/ordering-scotch.md`:
59/// `cmin = 100` (coarsening floor), `amd_switch = 120`,
60/// `n_sep_trials = 5`, `fm_move_cap = 200`, `bal = 0.05`,
61/// `compress_ratio = 0.7`, `seed = 0xDEAD_BEEF`.
62///
63/// **S1 status:** only [`compress`](Self::compress) and
64/// [`compress_ratio`](Self::compress_ratio) are consumed by shipped
65/// code. The remaining fields are defined at the S1 boundary so that
66/// the public type does not drift between milestones.
67#[derive(Debug, Clone)]
68pub struct ScotchOptions {
69    /// Apply graph compression before partitioning.
70    pub compress: bool,
71    /// Compress only if `n_compressed / n < compress_ratio` (i.e.,
72    /// compress only when compression saves at least
73    /// `(1 - compress_ratio) * 100` % of vertices). SCOTCH uses 0.75;
74    /// we follow the plan's slightly more aggressive 0.7.
75    pub compress_ratio: f64,
76    /// Switch from recursive ND to AMD on subproblems with at most
77    /// this many vertices (SCOTCH default: 120).
78    pub amd_switch: u32,
79    /// Stop coarsening when the graph has fewer than this many
80    /// vertices (SCOTCH `cmin` = 100 for vertex-separation contexts).
81    pub coarsen_floor: u32,
82    /// Number of separator trials at each recursion level (SCOTCH
83    /// default: 5).
84    pub n_sep_trials: u32,
85    /// FM refinement: per-pass move cap (SCOTCH default: 200).
86    pub fm_move_cap: u32,
87    /// FM refinement: per-call pass cap. SCOTCH's default is "passes
88    /// until no improvement"; we clamp at 32 for bounded runtime.
89    pub fm_pass_cap: u32,
90    /// Imbalance tolerance (SCOTCH default: 0.05).
91    pub max_imbalance: f64,
92    /// Deterministic RNG seed for coarsening matching.
93    pub seed: u64,
94}
95
96impl Default for ScotchOptions {
97    fn default() -> Self {
98        Self {
99            compress: true,
100            compress_ratio: 0.7,
101            amd_switch: 120,
102            coarsen_floor: 100,
103            n_sep_trials: 5,
104            fm_move_cap: 200,
105            fm_pass_cap: 32,
106            max_imbalance: 0.05,
107            seed: 0xDEAD_BEEF,
108        }
109    }
110}
111
112/// Crate-specific diagnostic counters for SCOTCH nested dissection.
113///
114/// Zero-initialized at the start of each ordering call and filled in
115/// as the pipeline runs. S1 only touches the compression counters.
116#[derive(Debug, Default, Clone, PartialEq, Eq)]
117pub struct ScotchStats {
118    /// Number of vertices dropped by compression at the top level
119    /// (`n_original - n_compressed`). Zero if compression was
120    /// attempted but returned `None`, or if compression is disabled.
121    pub n_compressed_out: u32,
122    /// Number of coarsening levels built. Unused until S2.
123    pub n_levels: u32,
124    /// Number of top-level connected components encountered. Unused
125    /// until S5.
126    pub n_components: u32,
127    /// Number of vertices assigned to a separator at any level.
128    /// Unused until S5.
129    pub n_separator_vertices: u32,
130    /// Number of FM passes executed across all levels. Unused until
131    /// S3/S4.
132    pub n_fm_passes: u32,
133    /// Number of times the recursion bottomed out in the AMD leaf
134    /// fallback. Unused until S5.
135    pub n_amd_leaf_calls: u32,
136}
137
138/// Compute a fill-reducing SCOTCH nested-dissection ordering.
139///
140/// Thin wrapper over [`scotch_order_full`] that discards the
141/// diagnostic stats. Returns a permutation `perm` (new-to-old).
142pub fn scotch_order(pattern: &CscPattern<'_>) -> Result<Vec<i32>, OrderingError> {
143    scotch_order_full(pattern, &ScotchOptions::default()).map(|(perm, _, _)| perm)
144}
145
146/// Contract-conforming ordering producer.
147///
148/// Signature matches the shape every FERAL ordering crate must expose
149/// per `dev/plans/ordering-crate-contract.md`: input is a
150/// full-symmetric [`CscPattern`] and options; output is a three-tuple
151/// of `(perm, OrderingStats, crate-stats)`, with errors in
152/// [`OrderingError`].
153///
154/// `OrderingStats.time_us` is the wall-clock time of this call.
155/// `fill_estimate` and `flop_estimate` stay `None` — SCOTCH does not
156/// produce them at the ordering boundary; they belong to a downstream
157/// symbolic analysis.
158///
159/// Runs the S1–S5 pipeline: optional graph compression, connected-
160/// component split, multilevel coarsening, best-of-`n_sep_trials`
161/// initial bisection, halo-FM uncoarsening refinement, direct
162/// vertex-separator via two-sided FM, recursion on each side with an
163/// AMD leaf fallback at `amd_switch`.
164pub fn scotch_order_full(
165    pattern: &CscPattern<'_>,
166    opts: &ScotchOptions,
167) -> Result<(Vec<i32>, OrderingStats, ScotchStats), OrderingError> {
168    if pattern.col_ptr.len() != pattern.n + 1 {
169        return Err(OrderingError::MalformedInput);
170    }
171    let t0 = std::time::Instant::now();
172    let mut stats = ScotchStats::default();
173    let perm = node_nd::scotch_nd_order(pattern, opts, &mut stats)?;
174    let ordering_stats = OrderingStats {
175        time_us: t0.elapsed().as_micros() as u64,
176        fill_estimate: None,
177        flop_estimate: None,
178    };
179    Ok((perm, ordering_stats, stats))
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn options_defaults_match_plan() {
188        let o = ScotchOptions::default();
189        assert!(o.compress);
190        assert_eq!(o.compress_ratio, 0.7);
191        assert_eq!(o.amd_switch, 120);
192        assert_eq!(o.coarsen_floor, 100);
193        assert_eq!(o.n_sep_trials, 5);
194        assert_eq!(o.fm_move_cap, 200);
195        assert_eq!(o.fm_pass_cap, 32);
196        assert_eq!(o.max_imbalance, 0.05);
197        assert_eq!(o.seed, 0xDEAD_BEEF);
198    }
199
200    #[test]
201    fn stats_default_is_zeros() {
202        let s = ScotchStats::default();
203        assert_eq!(s.n_compressed_out, 0);
204        assert_eq!(s.n_levels, 0);
205        assert_eq!(s.n_components, 0);
206        assert_eq!(s.n_separator_vertices, 0);
207        assert_eq!(s.n_fm_passes, 0);
208        assert_eq!(s.n_amd_leaf_calls, 0);
209    }
210
211    #[test]
212    fn contract_version_matches_core() {
213        assert_eq!(CONTRACT_VERSION, feral_ordering_core::CONTRACT_VERSION);
214    }
215
216    #[test]
217    fn scotch_order_trivial_diagonal() {
218        let cp = vec![0i32, 1, 2, 3];
219        let ri = vec![0i32, 1, 2];
220        let pat = CscPattern::new(3, &cp, &ri).unwrap();
221        let perm = scotch_order(&pat).expect("trivial 3-vertex diagonal orders");
222        assert_eq!(perm.len(), 3);
223        let mut seen = [false; 3];
224        for &p in &perm {
225            assert!((0..3).contains(&p));
226            seen[p as usize] = true;
227        }
228        assert!(seen.iter().all(|&s| s));
229    }
230
231    #[test]
232    fn scotch_order_full_populates_time_us() {
233        let cp = vec![0i32, 1, 2, 3];
234        let ri = vec![0i32, 1, 2];
235        let pat = CscPattern::new(3, &cp, &ri).unwrap();
236        let (_perm, ostats, _sstats) =
237            scotch_order_full(&pat, &ScotchOptions::default()).expect("ok");
238        // time_us is populated; fill/flop remain None.
239        assert!(ostats.fill_estimate.is_none());
240        assert!(ostats.flop_estimate.is_none());
241    }
242
243    #[test]
244    fn scotch_order_rejects_malformed_col_ptr() {
245        // col_ptr length must equal n + 1. Construct a well-formed
246        // CscPattern for n = 3, then pass it to scotch_order_full
247        // with an options struct and a lying n via a fresh pattern
248        // that fails the upstream validator.
249        let cp = vec![0i32, 1];
250        let ri: Vec<i32> = Vec::new();
251        // CscPattern::new with n=2 and col_ptr of length 2 rejects at
252        // construction; go through the public API by constructing a
253        // valid n=0 pattern and mutating — since we don't have public
254        // mutation, simply verify that CscPattern::new rejects the
255        // malformed case (the scotch_order_full guard is a defence-
256        // in-depth and is covered by tests in node_nd).
257        assert!(CscPattern::new(2, &cp, &ri).is_none());
258    }
259}