feral_scotch/lib.rs
1//! SCOTCH-style nested-dissection fill-reducing ordering.
2//!
3//! Clean-room pure-Rust implementation of the algorithms described in
4//! Pellegrini, "SCOTCH: A software package for static mapping by dual
5//! recursive bipartitioning of process and architecture graphs"
6//! (HPCN Europe, 1996), §3. This crate provides an alternative to
7//! [`feral_metis`](../feral_metis) with:
8//!
9//! - Graph compression (supervariable merging before partitioning).
10//! - Direct vertex-separator FM (tighter separators than edge-cut
11//! conversion on structured meshes).
12//! - Adaptive refinement (boundary / halo FM). A band-FM refiner
13//! (`band_fm`) is implemented and unit-tested but is not yet wired
14//! into the default ND driver.
15//!
16//! The public surface conforms to the FERAL ordering-crate contract
17//! (`dev/plans/ordering-crate-contract.md`): [`CscPattern`],
18//! [`OrderingStats`], [`OrderingError`], and [`CONTRACT_VERSION`] are
19//! re-exported from `feral-ordering-core`.
20//!
21//! **Status: S1–S5 complete.** [`scotch_order`] and
22//! [`scotch_order_full`] run the full pipeline — graph compression,
23//! connected-component split, multilevel coarsening, best-of-
24//! `n_sep_trials` initial bisection, halo-FM uncoarsening, direct
25//! vertex separator, and recursive ND with an AMD leaf fallback.
26//!
27//! ## Clean-room provenance
28//!
29//! No code is copied or paraphrased from SCOTCH's C source
30//! (`libscotch/`, CeCILL-C). Algorithms are reconstructed from
31//! Pellegrini 1996 §3 and the research notes under `dev/research/`.
32//! Constants, data layouts, and hash choices are independently
33//! justified and documented per-module.
34
35#![forbid(unsafe_code)]
36#![deny(missing_docs)]
37
38#[allow(dead_code)]
39mod band_fm;
40#[allow(dead_code)]
41mod compress;
42#[allow(dead_code)]
43mod graph;
44#[allow(dead_code)]
45mod halo_fm;
46mod node_nd;
47#[allow(dead_code)]
48mod vertex_separator;
49
50#[cfg(test)]
51mod test_util;
52
53pub use feral_ordering_core::{CscPattern, OrderingError, OrderingStats, CONTRACT_VERSION};
54
55/// Tunable parameters for SCOTCH nested-dissection ordering.
56///
57/// Defaults mirror SCOTCH 7.0's vertex-separation ordering defaults as
58/// documented in the audit of `dev/plans/ordering-scotch.md`:
59/// `cmin = 100` (coarsening floor), `amd_switch = 120`,
60/// `n_sep_trials = 5`, `fm_move_cap = 200`, `bal = 0.05`,
61/// `compress_ratio = 0.7`, `seed = 0xDEAD_BEEF`.
62///
63/// **S1 status:** only [`compress`](Self::compress) and
64/// [`compress_ratio`](Self::compress_ratio) are consumed by shipped
65/// code. The remaining fields are defined at the S1 boundary so that
66/// the public type does not drift between milestones.
67#[derive(Debug, Clone)]
68pub struct ScotchOptions {
69 /// Apply graph compression before partitioning.
70 pub compress: bool,
71 /// Compress only if `n_compressed / n < compress_ratio` (i.e.,
72 /// compress only when compression saves at least
73 /// `(1 - compress_ratio) * 100` % of vertices). SCOTCH uses 0.75;
74 /// we follow the plan's slightly more aggressive 0.7.
75 pub compress_ratio: f64,
76 /// Switch from recursive ND to AMD on subproblems with at most
77 /// this many vertices (SCOTCH default: 120).
78 pub amd_switch: u32,
79 /// Stop coarsening when the graph has fewer than this many
80 /// vertices (SCOTCH `cmin` = 100 for vertex-separation contexts).
81 pub coarsen_floor: u32,
82 /// Number of separator trials at each recursion level (SCOTCH
83 /// default: 5).
84 pub n_sep_trials: u32,
85 /// FM refinement: per-pass move cap (SCOTCH default: 200).
86 pub fm_move_cap: u32,
87 /// FM refinement: per-call pass cap. SCOTCH's default is "passes
88 /// until no improvement"; we clamp at 32 for bounded runtime.
89 pub fm_pass_cap: u32,
90 /// Imbalance tolerance (SCOTCH default: 0.05).
91 pub max_imbalance: f64,
92 /// Deterministic RNG seed for coarsening matching.
93 pub seed: u64,
94}
95
96impl Default for ScotchOptions {
97 fn default() -> Self {
98 Self {
99 compress: true,
100 compress_ratio: 0.7,
101 amd_switch: 120,
102 coarsen_floor: 100,
103 n_sep_trials: 5,
104 fm_move_cap: 200,
105 fm_pass_cap: 32,
106 max_imbalance: 0.05,
107 seed: 0xDEAD_BEEF,
108 }
109 }
110}
111
112/// Crate-specific diagnostic counters for SCOTCH nested dissection.
113///
114/// Zero-initialized at the start of each ordering call and filled in
115/// as the pipeline runs. S1 only touches the compression counters.
116#[derive(Debug, Default, Clone, PartialEq, Eq)]
117pub struct ScotchStats {
118 /// Number of vertices dropped by compression at the top level
119 /// (`n_original - n_compressed`). Zero if compression was
120 /// attempted but returned `None`, or if compression is disabled.
121 pub n_compressed_out: u32,
122 /// Number of coarsening levels built. Unused until S2.
123 pub n_levels: u32,
124 /// Number of top-level connected components encountered. Unused
125 /// until S5.
126 pub n_components: u32,
127 /// Number of vertices assigned to a separator at any level.
128 /// Unused until S5.
129 pub n_separator_vertices: u32,
130 /// Number of FM passes executed across all levels. Unused until
131 /// S3/S4.
132 pub n_fm_passes: u32,
133 /// Number of times the recursion bottomed out in the AMD leaf
134 /// fallback. Unused until S5.
135 pub n_amd_leaf_calls: u32,
136}
137
138/// Compute a fill-reducing SCOTCH nested-dissection ordering.
139///
140/// Thin wrapper over [`scotch_order_full`] that discards the
141/// diagnostic stats. Returns a permutation `perm` (new-to-old).
142pub fn scotch_order(pattern: &CscPattern<'_>) -> Result<Vec<i32>, OrderingError> {
143 scotch_order_full(pattern, &ScotchOptions::default()).map(|(perm, _, _)| perm)
144}
145
146/// Contract-conforming ordering producer.
147///
148/// Signature matches the shape every FERAL ordering crate must expose
149/// per `dev/plans/ordering-crate-contract.md`: input is a
150/// full-symmetric [`CscPattern`] and options; output is a three-tuple
151/// of `(perm, OrderingStats, crate-stats)`, with errors in
152/// [`OrderingError`].
153///
154/// `OrderingStats.time_us` is the wall-clock time of this call.
155/// `fill_estimate` and `flop_estimate` stay `None` — SCOTCH does not
156/// produce them at the ordering boundary; they belong to a downstream
157/// symbolic analysis.
158///
159/// Runs the S1–S5 pipeline: optional graph compression, connected-
160/// component split, multilevel coarsening, best-of-`n_sep_trials`
161/// initial bisection, halo-FM uncoarsening refinement, direct
162/// vertex-separator via two-sided FM, recursion on each side with an
163/// AMD leaf fallback at `amd_switch`.
164pub fn scotch_order_full(
165 pattern: &CscPattern<'_>,
166 opts: &ScotchOptions,
167) -> Result<(Vec<i32>, OrderingStats, ScotchStats), OrderingError> {
168 if pattern.col_ptr.len() != pattern.n + 1 {
169 return Err(OrderingError::MalformedInput);
170 }
171 let t0 = std::time::Instant::now();
172 let mut stats = ScotchStats::default();
173 let perm = node_nd::scotch_nd_order(pattern, opts, &mut stats)?;
174 let ordering_stats = OrderingStats {
175 time_us: t0.elapsed().as_micros() as u64,
176 fill_estimate: None,
177 flop_estimate: None,
178 };
179 Ok((perm, ordering_stats, stats))
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn options_defaults_match_plan() {
188 let o = ScotchOptions::default();
189 assert!(o.compress);
190 assert_eq!(o.compress_ratio, 0.7);
191 assert_eq!(o.amd_switch, 120);
192 assert_eq!(o.coarsen_floor, 100);
193 assert_eq!(o.n_sep_trials, 5);
194 assert_eq!(o.fm_move_cap, 200);
195 assert_eq!(o.fm_pass_cap, 32);
196 assert_eq!(o.max_imbalance, 0.05);
197 assert_eq!(o.seed, 0xDEAD_BEEF);
198 }
199
200 #[test]
201 fn stats_default_is_zeros() {
202 let s = ScotchStats::default();
203 assert_eq!(s.n_compressed_out, 0);
204 assert_eq!(s.n_levels, 0);
205 assert_eq!(s.n_components, 0);
206 assert_eq!(s.n_separator_vertices, 0);
207 assert_eq!(s.n_fm_passes, 0);
208 assert_eq!(s.n_amd_leaf_calls, 0);
209 }
210
211 #[test]
212 fn contract_version_matches_core() {
213 assert_eq!(CONTRACT_VERSION, feral_ordering_core::CONTRACT_VERSION);
214 }
215
216 #[test]
217 fn scotch_order_trivial_diagonal() {
218 let cp = vec![0i32, 1, 2, 3];
219 let ri = vec![0i32, 1, 2];
220 let pat = CscPattern::new(3, &cp, &ri).unwrap();
221 let perm = scotch_order(&pat).expect("trivial 3-vertex diagonal orders");
222 assert_eq!(perm.len(), 3);
223 let mut seen = [false; 3];
224 for &p in &perm {
225 assert!((0..3).contains(&p));
226 seen[p as usize] = true;
227 }
228 assert!(seen.iter().all(|&s| s));
229 }
230
231 #[test]
232 fn scotch_order_full_populates_time_us() {
233 let cp = vec![0i32, 1, 2, 3];
234 let ri = vec![0i32, 1, 2];
235 let pat = CscPattern::new(3, &cp, &ri).unwrap();
236 let (_perm, ostats, _sstats) =
237 scotch_order_full(&pat, &ScotchOptions::default()).expect("ok");
238 // time_us is populated; fill/flop remain None.
239 assert!(ostats.fill_estimate.is_none());
240 assert!(ostats.flop_estimate.is_none());
241 }
242
243 #[test]
244 fn scotch_order_rejects_malformed_col_ptr() {
245 // col_ptr length must equal n + 1. Construct a well-formed
246 // CscPattern for n = 3, then pass it to scotch_order_full
247 // with an options struct and a lying n via a fresh pattern
248 // that fails the upstream validator.
249 let cp = vec![0i32, 1];
250 let ri: Vec<i32> = Vec::new();
251 // CscPattern::new with n=2 and col_ptr of length 2 rejects at
252 // construction; go through the public API by constructing a
253 // valid n=0 pattern and mutating — since we don't have public
254 // mutation, simply verify that CscPattern::new rejects the
255 // malformed case (the scotch_order_full guard is a defence-
256 // in-depth and is covered by tests in node_nd).
257 assert!(CscPattern::new(2, &cp, &ri).is_none());
258 }
259}