feral_amf/lib.rs
1//! Approximate Minimum Fill (AMF / HAMF4) fill-reducing ordering.
2//!
3//! Standalone clean-room implementation of the AMF quotient-graph
4//! algorithm (Amestoy 1999 habilitation thesis, MUMPS HAMF4
5//! variant). The metric is approximate fill rather than approximate
6//! degree — the per-variable score is
7//!
8//! ```text
9//! RMF(i) = ( deg(i) * (deg(i) - 1 + 2*degme) - WF(i) ) / (nv(i) + 1)
10//! ```
11//!
12//! quantized into a `2N + 2` head-array with a coarse stride above
13//! `N`. The inner loop, including the lazy `WF(e)` element cache,
14//! the supervariable max-merge of `WF`, and the saturated/regular
15//! RMF branch, lives in `feral-ordering-core` behind the `MinFill`
16//! `Metric` impl.
17//!
18//! The public surface conforms to the FERAL ordering-crate
19//! contract (`dev/plans/ordering-crate-contract.md`). `CscPattern`,
20//! `OrderingStats`, `OrderingError`, and `CONTRACT_VERSION` are
21//! re-exported from `feral-ordering-core`.
22//!
23//! HAMF4 always aggressively absorbs elements, so [`AmfOptions`]
24//! does not expose an `aggressive` knob; the `dense_alpha` knob
25//! lives in the shared workspace and behaves identically to
26//! `feral-amd`'s.
27
28#![forbid(unsafe_code)]
29#![deny(missing_docs)]
30
31mod stats;
32
33pub use feral_ordering_core::{CscPattern, OrderingError, OrderingStats, CONTRACT_VERSION};
34pub use stats::AmfStats;
35
36use feral_ordering_core::quotient_graph::{order, MinFill, WorkspaceOptions};
37use std::time::Instant;
38
39/// Tunable parameters for AMF ordering.
40///
41/// Defaults: `dense_alpha = 10.0`. HAMF4 always aggressively
42/// absorbs absorbed elements during the inner Pass-2 loop, so
43/// there is no `aggressive` knob.
44#[derive(Debug, Clone)]
45pub struct AmfOptions {
46 /// Dense-row threshold multiplier. A variable with initial
47 /// degree exceeding `min(max(16, floor(dense_alpha * sqrt(n))), n)`
48 /// is deferred to the end of the ordering — the `max(16)` floor is
49 /// applied before the `min(n)` cap, matching faer `amd.rs:173-179`.
50 /// A negative value uses a raw threshold of `n - 2` with the same
51 /// clamps; for `n >= 18` that is exactly `n - 2`, suppressing
52 /// deferral for all but true hubs of degree `n - 1`.
53 pub dense_alpha: f64,
54}
55
56impl Default for AmfOptions {
57 fn default() -> Self {
58 Self { dense_alpha: 10.0 }
59 }
60}
61
62/// Compute a fill-reducing AMF ordering.
63///
64/// Returns a permutation `perm` (new-to-old) such that factoring
65/// `P·A·Pᵀ` with `P[k] = perm[k]` produces less fill than the
66/// natural ordering. The input must be the full symmetric pattern
67/// (both halves present).
68pub fn amf_order(pattern: &CscPattern<'_>) -> Result<Vec<i32>, OrderingError> {
69 amf_order_opts(pattern, &AmfOptions::default()).map(|(perm, _)| perm)
70}
71
72/// Compute an AMF ordering and return the crate-specific diagnostic
73/// counters.
74///
75/// See [`amf_order`] and [`AmfStats`]. Callers that also need the
76/// shared [`OrderingStats`] (wall time, fill estimate) should use
77/// [`amf_order_full`] instead.
78pub fn amf_order_with_stats(
79 pattern: &CscPattern<'_>,
80) -> Result<(Vec<i32>, AmfStats), OrderingError> {
81 amf_order_opts(pattern, &AmfOptions::default())
82}
83
84/// Compute an AMF ordering with explicit options.
85///
86/// Returns `(perm, amf_stats)`. See [`amf_order_full`] for the
87/// contract-conforming three-tuple return.
88pub fn amf_order_opts(
89 pattern: &CscPattern<'_>,
90 opts: &AmfOptions,
91) -> Result<(Vec<i32>, AmfStats), OrderingError> {
92 amf_order_full(pattern, opts).map(|(perm, _, amf_stats)| (perm, amf_stats))
93}
94
95/// Contract-conforming ordering producer.
96///
97/// Signature matches the shape every FERAL ordering crate must
98/// expose per `dev/plans/ordering-crate-contract.md`: input is a
99/// full-symmetric [`CscPattern`] and options; output is a
100/// three-tuple of `(perm, OrderingStats, crate-stats)`, with
101/// errors in [`OrderingError`].
102///
103/// `OrderingStats.time_us` is the wall-clock time of this call.
104/// `fill_estimate` and `flop_estimate` are left as `None` for AMF —
105/// the per-crate [`AmfStats`] carries `ndiv` / `nms_lu` / `nms_ldl`
106/// flop counters that may be surfaced here in a future revision
107/// without bumping the contract.
108pub fn amf_order_full(
109 pattern: &CscPattern<'_>,
110 opts: &AmfOptions,
111) -> Result<(Vec<i32>, OrderingStats, AmfStats), OrderingError> {
112 let t0 = Instant::now();
113 let ws_opts = WorkspaceOptions {
114 dense_alpha: opts.dense_alpha,
115 };
116 // HAMF4 always aggressively absorbs; pass `aggressive = true`
117 // unconditionally. The flag only gates the AMD-specific
118 // `(p3 == pn) && (elen[i] == 1)` mass-elimination branch which
119 // the AMF loop reuses with the same semantics.
120 let (perm, diag) = order::<MinFill>(pattern, &ws_opts, true)?;
121 let amf_stats = AmfStats {
122 ncmpa: diag.ncmpa,
123 n_clear_flag: 0,
124 n_mass_elim: diag.n_mass_elim,
125 n_supervar_merge: diag.n_supervar_merge,
126 n_dense_deferred: diag.ndense.max(0) as u32,
127 ndiv: diag.flops.ndiv.max(0.0) as u64,
128 nms_lu: diag.flops.nms_lu.max(0.0) as u64,
129 nms_ldl: diag.flops.nms_ldl.max(0.0) as u64,
130 };
131 let ordering_stats = OrderingStats {
132 time_us: t0.elapsed().as_micros() as u64,
133 fill_estimate: None,
134 flop_estimate: None,
135 };
136 Ok((perm, ordering_stats, amf_stats))
137}