nornir 0.5.0

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
//! AMD ROCm runtime-lib discovery + GPU probe for the ort embedder.
//!
//! The mirror image of [`super::cuda`] for AMD silicon. The ort
//! `MIGraphXExecutionProvider` / `ROCmExecutionProvider` are `dlopen`ed lazily
//! when the session is built, and they `NEED` the ROCm runtime libs
//! (`libamdhip64`, `libMIOpen`, `librocblas`, `libmigraphx_c`, …). glibc fixes
//! the loader search path at process start, so the robust fix — same as the
//! CUDA path — is to **`dlopen` the ROCm libs ourselves with `RTLD_GLOBAL`**
//! before the provider loads, so its `NEEDED` sonames resolve against the
//! already-resident globals.
//!
//! Discovery order (first dir that contains a given lib wins):
//!   1. `NORNIR_ROCM_LIBS` — explicit colon-separated dirs (highest priority).
//!   2. `$ROCM_PATH` / `$HIP_PATH` → `…/lib` (and `…/lib64`).
//!   3. Known system dirs: `/opt/rocm*/lib`, `/usr/lib/x86_64-linux-gnu`.
//!
//! Best-effort: anything missing just means ort falls back to CPU (no panic).
//! [`available`] is the runtime probe the embedder uses to decide whether to
//! even request the AMD GPU EP — when it returns `false` (no ROCm on the box)
//! the embedder requests CUDA-then-CPU exactly as before, so a non-AMD machine
//! is completely unaffected.
//!
//! Cargo feature: `embed-ort-rocm` (which also turns on `embed-ort`).

use std::path::{Path, PathBuf};
use std::sync::OnceLock;

/// Leaf-first load order: a lib's own `NEEDED` ROCm deps must already be global
/// when it loads. `libamdhip64` (the HIP runtime) underpins the rest, so it
/// loads first; the MIGraphX umbrella loads last.
const ORDERED_SONAMES: &[&str] = &[
    "libamdhip64.so",   // HIP runtime (the AMD analogue of libcudart)
    "librocblas.so",    // BLAS
    "libhipblas.so",    // HIP BLAS shim
    "librocfft.so",     // FFT
    "libMIOpen.so",     // DNN primitives (the cuDNN analogue)
    "libmigraphx_c.so", // MIGraphX C API (the provider's graph compiler)
];

/// Outcome of a ROCm preload attempt (for logging / diagnostics).
#[derive(Debug, Default, Clone)]
pub struct RocmPreload {
    /// Sonames successfully `dlopen`ed (kept resident for the process life).
    pub loaded: Vec<String>,
    /// Directories that were searched.
    pub dirs: Vec<PathBuf>,
    /// True if the HIP runtime (`libamdhip64`) loaded.
    pub hip: bool,
    /// True if MIOpen (DNN) loaded.
    pub miopen: bool,
}

// Loaded libraries are leaked on purpose: they must stay resident for the whole
// process so the provider can resolve against them.
static PRELOAD: OnceLock<RocmPreload> = OnceLock::new();

/// Discover + `dlopen` ROCm libs once. Idempotent; safe to call before every
/// session build. **Applies the gfx HSA override (if needed) first** — it must
/// be in the env *before* `libamdhip64` is `dlopen`ed, because HIP reads
/// `HSA_OVERRIDE_GFX_VERSION` when the runtime initializes.
pub fn ensure() -> &'static RocmPreload {
    PRELOAD.get_or_init(|| {
        apply_gfx_override();
        apply_migraphx_cache_dir();
        run()
    })
}

/// The MIGraphX EP env var for the compiled-model cache directory. ONNX Runtime
/// 1.24's AMD GPU path is **MIGraphX-only** (the standalone ROCm EP was removed),
/// and its EP calls `save_compiled_model(program, <this dir>)` to persist the
/// graph-compiled+tuned program (a big `.mxr`). When the dir is UNSET it writes
/// to `""/<hash>.mxr` and the session **errors out at first embed** ("Failure
/// opening file") — so a turnkey AMD box MUST have this pointed at a writable dir.
pub const ORT_MIGRAPHX_MODEL_CACHE_PATH_ENV: &str = "ORT_MIGRAPHX_MODEL_CACHE_PATH";
/// The MIGraphX EP's other cache-path var (set in lockstep so either ORT minor
/// reads a valid dir).
pub const ORT_MIGRAPHX_CACHE_PATH_ENV: &str = "ORT_MIGRAPHX_CACHE_PATH";

/// nornir's default MIGraphX compiled-model cache dir: `~/.cache/nornir/migraphx`
/// (sibling of the model cache `~/.cache/nornir/models`), or `./.nornir-migraphx`
/// when `$HOME` is unreadable. Pure (no I/O) — the caller creates it.
pub fn default_migraphx_cache_dir() -> PathBuf {
    match std::env::var_os("HOME") {
        Some(home) if !home.is_empty() => {
            Path::new(&home).join(".cache").join("nornir").join("migraphx")
        }
        _ => PathBuf::from(".nornir-migraphx"),
    }
}

/// Point the MIGraphX EP's model cache at a writable dir *if the user hasn't*,
/// creating it. Without this the MIGraphX EP crashes on first embed writing to an
/// empty path (see [`ORT_MIGRAPHX_MODEL_CACHE_PATH_ENV`]); WITH it the first embed
/// compiles+tunes once (~12 s) then warm runs hit the cache (~1 s, ~10×). Never
/// clobbers a user-set value. Idempotent (guarded by the `ensure` OnceLock).
pub fn apply_migraphx_cache_dir() {
    if std::env::var_os(ORT_MIGRAPHX_MODEL_CACHE_PATH_ENV).is_some_and(|v| !v.is_empty()) {
        return; // user pinned it — leave both vars untouched.
    }
    let dir = default_migraphx_cache_dir();
    if std::fs::create_dir_all(&dir).is_err() {
        return; // can't create → leave unset (EP still errors, but we don't lie).
    }
    // SAFETY: single-threaded selection point, before the MIGraphX EP is built.
    unsafe {
        std::env::set_var(ORT_MIGRAPHX_MODEL_CACHE_PATH_ENV, &dir);
        if std::env::var_os(ORT_MIGRAPHX_CACHE_PATH_ENV).is_none() {
            std::env::set_var(ORT_MIGRAPHX_CACHE_PATH_ENV, &dir);
        }
    }
}

/// `gfx<ISA>` → the `HSA_OVERRIDE_GFX_VERSION` value that spoofs it to the
/// nearest ROCm-supported code object, for the case where the installed ROCm
/// doesn't ship a native code object for the part. The value is the ISA written
/// as `major.minor.step` (e.g. gfx1151 → `11.5.1`). Used ONLY as a fallback when
/// native detection failed — a part ROCm supports natively gets NO override.
/// Keyed on the *detected* gfx so we never hardcode one part's value blindly.
pub const GFX_HSA_OVERRIDE_TABLE: &[(&str, &str)] = &[
    ("gfx1150", "11.5.0"), // Strix Point iGPU (Radeon 890M)
    ("gfx1151", "11.5.1"), // Strix Halo iGPU
    ("gfx1103", "11.0.3"), // Phoenix / Hawk Point iGPU
    ("gfx1102", "11.0.2"), // Navi 33
    ("gfx1101", "11.0.1"), // Navi 32
    ("gfx1100", "11.0.0"), // Navi 31
    ("gfx1036", "10.3.6"), // Raphael iGPU
    ("gfx1035", "10.3.5"), // Rembrandt iGPU
];

/// gfx targets recent ROCm (7.x) recognizes NATIVELY — for these, `rocminfo`
/// sees the device without any override, so nornir must NOT set one (a spurious
/// override would point HIP at the *wrong* code object). gfx1150 (Strix Point,
/// the box this was validated on) is here: ROCm 7.2.4 supports it natively.
pub const GFX_NATIVE_ROCM: &[&str] = &[
    "gfx1150", "gfx1151", "gfx1100", "gfx1101", "gfx1102", // RDNA3/3.5
    "gfx942", "gfx90a", "gfx908", // CDNA
];

/// Look up the HSA override value for a detected gfx target. `None` if unknown.
pub fn hsa_override_for(gfx: &str) -> Option<&'static str> {
    GFX_HSA_OVERRIDE_TABLE.iter().find(|(g, _)| *g == gfx).map(|(_, v)| *v)
}

/// True if recent ROCm recognizes this gfx target natively (no override needed).
pub fn gfx_natively_supported(gfx: &str) -> bool {
    GFX_NATIVE_ROCM.contains(&gfx)
}

/// Env var a user can set to force a specific HSA gfx override (e.g. `11.0.0`),
/// or to `""`/`"off"`/`"none"` to disable nornir's auto-override entirely.
pub const NORNIR_HSA_OVERRIDE_ENV: &str = "NORNIR_HSA_OVERRIDE";

/// Runtime probe: is the AMD ROCm GPU path usable on this box? True iff the AMD
/// driver is present **and** the HIP runtime + MIOpen libs were found and
/// loaded. When false, the caller should not request the AMD EP — it would only
/// fall back to CPU, and on a non-AMD machine the lib `dlopen`s all miss
/// harmlessly. This is the `available()`-style probe G1 asks for: a missing
/// ROCm install degrades to CPU, never errors.
pub fn available() -> bool {
    let p = ensure();
    driver_present() && p.hip && p.miopen
}

/// What [`decide_hsa_override`] concluded for this box (cached for `doctor`).
#[derive(Debug, Default, Clone)]
pub struct GfxInfo {
    /// The detected iGPU/dGPU ISA name, e.g. `"gfx1151"` (None if unreadable).
    pub gfx: Option<String>,
    /// The value nornir applied to `HSA_OVERRIDE_GFX_VERSION` (None = left alone).
    pub applied_override: Option<String>,
    /// True when the user had already set `HSA_OVERRIDE_GFX_VERSION` (never clobbered).
    pub user_preset: bool,
    /// Human note on *why* the decision was made (for `doctor`).
    pub reason: String,
}

static GFX: OnceLock<GfxInfo> = OnceLock::new();

/// Read the GPU ISA target from sysfs without spawning `rocminfo`. The amdgpu
/// driver exports it per-node at
/// `/sys/class/kfd/kfd/topology/nodes/*/properties` (the `gfx_target_version`
/// line is a packed decimal, e.g. `110501` → gfx1151). We also fall back to the
/// `$NORNIR_GFX_OVERRIDE` test hook so the decision is exercisable with no GPU.
pub fn detect_gfx() -> Option<String> {
    // Test/diagnostic hook: pretend a specific gfx target is present.
    if let Some(v) = std::env::var_os("NORNIR_GFX_OVERRIDE") {
        let s = v.to_string_lossy().trim().to_string();
        if !s.is_empty() {
            return Some(s);
        }
    }
    let root = std::path::Path::new("/sys/class/kfd/kfd/topology/nodes");
    let entries = std::fs::read_dir(root).ok()?;
    for e in entries.flatten() {
        let props = e.path().join("properties");
        let Ok(text) = std::fs::read_to_string(&props) else {
            continue;
        };
        if let Some(g) = parse_gfx_target(&text) {
            return Some(g);
        }
    }
    None
}

/// Parse a `gfx_target_version 110501` line out of a kfd `properties` file and
/// render it as `gfx<major><minor><step>` (`110501` → `gfx1151`). Pure (testable
/// off a fixture string); the packed form is `major*10000 + minor*100 + step`.
pub fn parse_gfx_target(properties: &str) -> Option<String> {
    for line in properties.lines() {
        let mut it = line.split_whitespace();
        if it.next() == Some("gfx_target_version") {
            let packed: u32 = it.next()?.parse().ok()?;
            if packed == 0 {
                return None; // CPU node / no GPU on this topology entry
            }
            let major = packed / 10000;
            let minor = (packed / 100) % 100;
            let step = packed % 100;
            // gfx ISA names concatenate the decimal components: gfx1151 packs as
            // 11_05_01, gfx942 as 9_04_02. The step is a hex nibble in the name
            // (gfx90a), so render step in hex; major/minor are plain decimals.
            return Some(format!("gfx{major}{minor}{step:x}"));
        }
    }
    None
}

/// PURE decision: given the *detected* gfx name, whether the user already pinned
/// `HSA_OVERRIDE_GFX_VERSION`, and the `$NORNIR_HSA_OVERRIDE` policy value, decide
/// what (if anything) nornir should set `HSA_OVERRIDE_GFX_VERSION` to.
///
/// Table-driven off the probe; no part's value is hardcoded as a default. Rules
/// (no I/O — trivially testable):
///   - User already set `HSA_OVERRIDE_GFX_VERSION` → NEVER clobber (None).
///   - `$NORNIR_HSA_OVERRIDE` = `off`/`none`/`""` → disabled (None).
///   - `$NORNIR_HSA_OVERRIDE` = an explicit value → use it verbatim.
///   - gfx target recent ROCm supports NATIVELY (e.g. gfx1150 Strix Point on
///     ROCm 7.2.4) → NO override; native detection works.
///   - gfx target NOT natively supported but in [`GFX_HSA_OVERRIDE_TABLE`] →
///     apply its nearest-ISA value (e.g. an older ROCm + gfx1151).
///   - unknown gfx → leave alone (a wrong override is worse than none).
pub fn decide_hsa_override(
    detected_gfx: Option<&str>,
    user_preset: bool,
    policy: Option<&str>,
) -> (Option<String>, String) {
    if user_preset {
        return (None, "HSA_OVERRIDE_GFX_VERSION already set by user — left untouched".into());
    }
    if let Some(pol) = policy {
        let p = pol.trim();
        if p.is_empty() || p.eq_ignore_ascii_case("off") || p.eq_ignore_ascii_case("none") {
            return (None, format!("auto-override disabled via {NORNIR_HSA_OVERRIDE_ENV}={pol}"));
        }
        return (
            Some(p.to_string()),
            format!("explicit {NORNIR_HSA_OVERRIDE_ENV}={p} applied"),
        );
    }
    match detected_gfx {
        None => (None, "no GPU ISA readable from sysfs → no override".into()),
        Some(g) if gfx_natively_supported(g) => (
            None,
            format!("{g} is supported natively by recent ROCm → no override (native detection works)"),
        ),
        Some(g) => match hsa_override_for(g) {
            Some(v) => (
                Some(v.to_string()),
                format!("{g} not natively listed → HSA_OVERRIDE_GFX_VERSION={v} \
                         (spoof to nearest supported ISA from the gfx table)"),
            ),
            None => (
                None,
                format!("{g} detected, no table entry → no override (let ROCm decide)"),
            ),
        },
    }
}

/// Compute the gfx decision for this box (cached). Reads sysfs + the env policy
/// and runs the pure [`decide_hsa_override`]; does NOT mutate the environment —
/// [`apply_gfx_override`] does that, exactly once, before HIP loads.
pub fn gfx_info() -> &'static GfxInfo {
    GFX.get_or_init(|| {
        let gfx = detect_gfx();
        let user_preset = std::env::var_os("HSA_OVERRIDE_GFX_VERSION")
            .is_some_and(|v| !v.is_empty());
        let policy = std::env::var("NORNIR_HSA_OVERRIDE").ok();
        let (applied_override, reason) =
            decide_hsa_override(gfx.as_deref(), user_preset, policy.as_deref());
        GfxInfo { gfx, applied_override, user_preset, reason }
    })
}

/// Apply the gfx HSA override to the process env *if* [`gfx_info`] decided one is
/// needed and the user hasn't already set it. Idempotent (the OnceLock guards the
/// decision); never clobbers a user value. Called from [`ensure`] before any HIP
/// `dlopen`, which is the only point where the override can still take effect.
pub fn apply_gfx_override() {
    let info = gfx_info();
    if let Some(val) = &info.applied_override {
        if std::env::var_os("HSA_OVERRIDE_GFX_VERSION").is_none() {
            // SAFETY: single-threaded selection point, before any ROCm/HIP use.
            unsafe { std::env::set_var("HSA_OVERRIDE_GFX_VERSION", val) };
        }
    }
}

/// True if the XDNA NPU device node (`/dev/accel/accel0`) is present. This is the
/// Ryzen AI NPU — a SEPARATE accelerator from the ROCm iGPU, reached via a
/// VitisAI / XDNA execution provider that nornir does NOT implement. Reported by
/// `doctor` only, as a detected-but-unused future EP.
pub fn xdna_npu_present() -> bool {
    std::path::Path::new("/dev/accel/accel0").exists()
}

/// True if the kfd compute device node (`/dev/kfd`) is present — the amdgpu
/// compute interface ROCm needs. Detect-only (creating it needs the kernel
/// driver + root).
pub fn kfd_present() -> bool {
    std::path::Path::new("/dev/kfd").exists()
}

fn run() -> RocmPreload {
    let dirs = candidate_dirs();
    let mut out = RocmPreload {
        dirs: dirs.clone(),
        ..Default::default()
    };
    for soname in ORDERED_SONAMES {
        if let Some(path) = find_lib(&dirs, soname) {
            if dlopen_global(&path) {
                out.loaded.push(soname.to_string());
                if soname.starts_with("libamdhip64") {
                    out.hip = true;
                } else if soname.starts_with("libMIOpen") {
                    out.miopen = true;
                }
            }
        }
    }
    out
}

/// True if the AMD HIP runtime (`libamdhip64.so`) is resolvable as a bare soname
/// — i.e. ROCm is installed system-wide and the loader can find it. As with the
/// NVIDIA driver, nornir can **detect** this but cannot install it: the `amdgpu`
/// kernel module + ROCm packages need root. Only the *runtime* libs are
/// discoverable via [`ensure`].
pub fn driver_present() -> bool {
    use libloading::os::unix::{Library, RTLD_NOW};
    // SAFETY: probing a well-known system soname; handle dropped immediately.
    unsafe { Library::open(Some(std::ffi::OsStr::new("libamdhip64.so")), RTLD_NOW) }.is_ok()
}

fn yn(b: bool) -> &'static str {
    if b {
        "found"
    } else {
        "MISSING"
    }
}

/// Preflight the ROCm GPU path: `(gpu_ready, human_report)`. Mirrors the CUDA
/// [`super::cuda::preflight`]; drives a future `nornir vector doctor --rocm`.
pub fn preflight() -> (bool, String) {
    let p = ensure();
    let driver = driver_present();
    let gpu_ready = available();

    let gfx = gfx_info();
    let ort = onnxruntime_dylib();

    let mut s = String::from("nornir ROCm preflight (embed-ort-rocm AMD GPU path)\n");
    s.push_str(&format!("  /dev/kfd (amdgpu compute)   : {}\n", yn(kfd_present())));
    s.push_str(&format!(
        "  iGPU/dGPU ISA target        : {}\n",
        gfx.gfx.as_deref().unwrap_or("(not readable from sysfs)")
    ));
    s.push_str(&format!("  AMD HIP runtime libamdhip64 : {}\n", yn(driver && p.hip)));
    s.push_str(&format!("  MIOpen (ROCm DNN)           : {}\n", yn(p.miopen)));
    s.push_str(&format!(
        "  ROCm-onnxruntime            : {}\n",
        match &ort {
            Some(path) => format!("found ({})", path.display()),
            None => "MISSING".into(),
        }
    ));
    s.push_str(&format!(
        "  HSA_OVERRIDE_GFX_VERSION    : {}\n",
        match (&gfx.applied_override, gfx.user_preset) {
            (_, true) => "(user-set; not touched by nornir)".into(),
            (Some(v), false) => format!("{v} (applied by nornir) — {}", gfx.reason),
            (None, false) => format!("not applied — {}", gfx.reason),
        }
    ));
    s.push_str(&format!(
        "  MIGraphX model cache        : {}\n",
        match std::env::var(ORT_MIGRAPHX_MODEL_CACHE_PATH_ENV) {
            Ok(v) if !v.is_empty() => format!("{v} (compiled-model `.mxr` cache; warm embeds ~10× faster)"),
            _ => format!(
                "UNSET → MIGraphX errors on first embed; nornir defaults it to {}",
                default_migraphx_cache_dir().display()
            ),
        }
    ));
    s.push_str(&format!(
        "  ROCm libs loaded           : {}\n",
        if p.loaded.is_empty() { "(none)".into() } else { p.loaded.join(", ") }
    ));
    s.push_str(&format!(
        "  dirs searched              : {}\n",
        p.dirs.iter().map(|d| d.display().to_string()).collect::<Vec<_>>().join(", ")
    ));
    if xdna_npu_present() {
        s.push_str(
            "  XDNA NPU (/dev/accel/accel0): detected — SEPARATE Ryzen AI accelerator, \
             unused (no VitisAI/XDNA EP yet)\n",
        );
    }
    let missing: Vec<&str> = ORDERED_SONAMES
        .iter()
        .copied()
        .filter(|n| !p.loaded.iter().any(|l| l == n))
        .collect();
    if !missing.is_empty() {
        s.push_str(&format!("  runtime libs not found     : {}\n", missing.join(", ")));
    }
    s.push_str(&format!(
        "\n  verdict: AMD GPU embedding {}\n",
        if gpu_ready { "READY ✓" } else { "unavailable → CPU fallback" }
    ));
    if gpu_ready {
        s.push_str("  → all set; embed-ort runs on the AMD GPU via MIGraphX/ROCm.\n");
    } else {
        // Exact, copy-paste fix commands for THIS box (distro-aware).
        let facts = HintFacts {
            gpu_ready,
            runtime_present: driver && p.hip && p.miopen,
            onnxruntime_present: ort.is_some(),
            gfx: gfx.gfx.clone(),
        };
        s.push_str(&rocm_install_hint(detect_distro(), &facts));
    }
    (gpu_ready, s)
}

fn candidate_dirs() -> Vec<PathBuf> {
    let mut dirs: Vec<PathBuf> = Vec::new();
    let push = |p: PathBuf, dirs: &mut Vec<PathBuf>| {
        if p.is_dir() && !dirs.contains(&p) {
            dirs.push(p);
        }
    };

    // 1. Explicit override.
    if let Some(v) = std::env::var_os("NORNIR_ROCM_LIBS") {
        for p in std::env::split_paths(&v) {
            push(p, &mut dirs);
        }
    }
    // 2. ROCM_PATH / HIP_PATH → <root>/lib(64).
    for key in ["ROCM_PATH", "HIP_PATH"] {
        if let Some(root) = std::env::var_os(key) {
            push(Path::new(&root).join("lib"), &mut dirs);
            push(Path::new(&root).join("lib64"), &mut dirs);
        }
    }
    // 3. Known system locations.
    for sys in [
        "/opt/nornir/rocm",
        "/opt/rocm/lib",
        "/opt/rocm-7.0.0/lib",
        "/opt/rocm-6.4.0/lib",
        "/opt/rocm-6.2.0/lib",
        "/opt/rocm-6.1.0/lib",
        "/opt/rocm-6.0.0/lib",
        "/opt/rocm/lib64",
        "/usr/lib/x86_64-linux-gnu",
    ] {
        push(PathBuf::from(sys), &mut dirs);
    }
    dirs
}

/// First dir containing `soname` (exact) or a `soname.<n>` variant.
fn find_lib(dirs: &[PathBuf], soname: &str) -> Option<PathBuf> {
    for d in dirs {
        let exact = d.join(soname);
        if exact.exists() {
            return Some(exact);
        }
        // e.g. libMIOpen.so → match libMIOpen.so.1.0
        if let Ok(entries) = std::fs::read_dir(d) {
            for e in entries.flatten() {
                if let Some(name) = e.file_name().to_str() {
                    if name.starts_with(soname) {
                        return Some(e.path());
                    }
                }
            }
        }
    }
    None
}

fn dlopen_global(path: &Path) -> bool {
    use libloading::os::unix::{Library, RTLD_GLOBAL, RTLD_NOW};
    // SAFETY: loading a ROCm shared lib by absolute path; the handle is leaked
    // so it stays resident for the provider to resolve against.
    match unsafe { Library::open(Some(path), RTLD_NOW | RTLD_GLOBAL) } {
        Ok(lib) => {
            std::mem::forget(lib);
            true
        }
        Err(_) => false,
    }
}

/// True if `path` `dlopen`s **and** exports `OrtGetApiBase` (a real onnxruntime,
/// not a same-named decoy). Mirrors [`super::cuda`]'s probe.
fn loadable(path: &Path) -> bool {
    use libloading::os::unix::{Library, RTLD_NOW};
    let arg: &std::ffi::OsStr = path.as_os_str();
    // SAFETY: probing a shared object; handle dropped at scope end.
    let Ok(lib) = (unsafe { Library::open(Some(arg), RTLD_NOW) }) else {
        return false;
    };
    // SAFETY: looking up a symbol on a live handle; only testing existence.
    unsafe { lib.get::<unsafe extern "C" fn() -> *const std::ffi::c_void>(b"OrtGetApiBase") }.is_ok()
}

/// Find a ROCm-enabled `libonnxruntime.so` for the dynamic-load ort backend.
/// Search order mirrors [`super::cuda::onnxruntime_dylib`]:
///   1. `$ORT_DYLIB_PATH` (explicit; honoured if it loads).
///   2. The ROCm discovery dirs ([`candidate_dirs`], incl. `/opt/nornir/rocm`).
///   3. The bare soname via the system loader.
pub fn onnxruntime_dylib() -> Option<PathBuf> {
    if let Some(p) = std::env::var_os("ORT_DYLIB_PATH") {
        let path = PathBuf::from(p);
        if !path.as_os_str().is_empty() && loadable(&path) {
            return Some(path);
        }
    }
    if let Some(path) = find_lib(&candidate_dirs(), "libonnxruntime.so") {
        if loadable(&path) {
            return Some(path);
        }
    }
    let bare = PathBuf::from("libonnxruntime.so");
    if loadable(&bare) {
        return Some(bare);
    }
    None
}

/// Probe + arm the dynamic-load ort path against the ROCm-enabled onnxruntime:
/// set `ORT_DYLIB_PATH` to its absolute location and return `true`, else leave
/// the env untouched and return `false`. Sibling of
/// [`super::cuda::arm_onnxruntime`].
pub fn arm_onnxruntime() -> bool {
    match onnxruntime_dylib() {
        Some(path) => {
            if path.is_absolute() {
                // SAFETY: single-threaded selection point, before any ort use.
                unsafe { std::env::set_var("ORT_DYLIB_PATH", &path) };
            }
            true
        }
        None => false,
    }
}

/// `nornir vector setup-rocm` — pin a complete ROCm runtime set (+ a
/// ROCm-enabled `libonnxruntime.so` if found) into `target` (default
/// `/opt/nornir/rocm`, a built-in search dir) by **copying** the libs from
/// wherever discovery finds them on this box. After this the AMD GPU "just
/// works" for every nornir process with no env. Sibling of
/// [`super::cuda::setup`]. Returns `(copied, missing)` soname lists.
pub fn setup(target: &Path) -> anyhow::Result<(Vec<String>, Vec<String>)> {
    use anyhow::Context;
    let dirs = candidate_dirs();
    std::fs::create_dir_all(target)
        .with_context(|| format!("create {} (need root? try sudo)", target.display()))?;
    let mut copied = Vec::new();
    let mut missing = Vec::new();

    for soname in ORDERED_SONAMES {
        match find_lib(&dirs, soname) {
            Some(src) => {
                let dst = target.join(src.file_name().unwrap_or_default());
                std::fs::copy(&src, &dst)
                    .with_context(|| format!("copy {} -> {}", src.display(), dst.display()))?;
                // The plain `.so` name the loader asks for, alongside the
                // fully-versioned file we copied.
                let alias = target.join(soname);
                if !alias.exists() {
                    std::fs::copy(&src, &alias).ok();
                }
                copied.push(soname.to_string());
            }
            None => missing.push(soname.to_string()),
        }
    }
    // A ROCm-enabled libonnxruntime.so, if we can find one to colocate.
    match onnxruntime_dylib() {
        Some(src) if src.is_absolute() => {
            let dst = target.join("libonnxruntime.so");
            std::fs::copy(&src, &dst)
                .with_context(|| format!("copy {} -> {}", src.display(), dst.display()))?;
            copied.push("libonnxruntime.so".to_string());
        }
        _ => missing.push("libonnxruntime.so (ROCm-enabled)".to_string()),
    }
    Ok((copied, missing))
}

/// The facts [`rocm_install_hint`] decides on. A test injects this directly;
/// [`preflight`] fills it from the real probes. Pure data — no I/O.
#[derive(Debug, Default, Clone)]
pub struct HintFacts {
    /// AMD GPU embedding is fully ready (driver + runtime + onnxruntime). When
    /// true the hint is empty (nothing to fix).
    pub gpu_ready: bool,
    /// The ROCm runtime (HIP + MIOpen) is present.
    pub runtime_present: bool,
    /// A ROCm-enabled `libonnxruntime.so` is loadable.
    pub onnxruntime_present: bool,
    /// Detected GPU ISA target (e.g. `"gfx1151"`), if readable.
    pub gfx: Option<String>,
}

/// Linux distro family, for emitting the right package-manager commands.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Distro {
    /// Debian/Ubuntu family (`apt`).
    Debian,
    /// RHEL/Fedora family (`dnf`).
    Fedora,
    /// Anything else / undetected.
    Unknown,
}

/// Detect the distro family from `/etc/os-release` (`$NORNIR_DISTRO` overrides,
/// for tests). Reads `ID` + `ID_LIKE`.
pub fn detect_distro() -> Distro {
    if let Ok(v) = std::env::var("NORNIR_DISTRO") {
        return match v.to_ascii_lowercase().as_str() {
            "debian" | "ubuntu" | "apt" => Distro::Debian,
            "fedora" | "rhel" | "dnf" => Distro::Fedora,
            _ => Distro::Unknown,
        };
    }
    let text = std::fs::read_to_string("/etc/os-release").unwrap_or_default();
    classify_os_release(&text)
}

/// PURE: classify an `/etc/os-release` body into a [`Distro`] family.
pub fn classify_os_release(os_release: &str) -> Distro {
    let mut id = String::new();
    let mut like = String::new();
    for line in os_release.lines() {
        let line = line.trim();
        if let Some(v) = line.strip_prefix("ID=") {
            id = v.trim_matches('"').to_ascii_lowercase();
        } else if let Some(v) = line.strip_prefix("ID_LIKE=") {
            like = v.trim_matches('"').to_ascii_lowercase();
        }
    }
    let hay = format!("{id} {like}");
    if hay.contains("debian") || hay.contains("ubuntu") {
        Distro::Debian
    } else if hay.contains("rhel") || hay.contains("fedora") || hay.contains("centos") {
        Distro::Fedora
    } else {
        Distro::Unknown
    }
}

/// PURE, testable: exact copy-paste install commands to make the ROCm GPU path
/// work on THIS box, given the distro and a [`preflight`]-style fact snapshot.
/// Returns `String::new()` when the GPU path is already ready (nothing to fix) —
/// callers only show it when ROCm is unavailable. Mirrors the CUDA preflight's
/// "run THESE commands" UX but for AMD/ROCm/Strix-Halo (userspace-only, since
/// /dev/kfd is already present on Strix).
pub fn rocm_install_hint(distro: Distro, facts: &HintFacts) -> String {
    if facts.gpu_ready {
        return String::new();
    }
    let mut s = String::from("\n  FIX — run these, then re-run `nornir vector doctor`:\n");

    if !facts.runtime_present {
        match distro {
            Distro::Debian => {
                s.push_str(
                    "  # 1. ROCm 7.x userspace via AMD's apt repo (userspace-only; /dev/kfd comes\n\
                     \x20  #    from the in-tree amdgpu driver, so NO DKMS needed). On Ubuntu 26.04\n\
                     \x20  #    LTS point at the noble (24.04) repo — AMD has no resolute/26.04 build\n\
                     \x20  #    yet and ROCm's userspace libs are forward-compatible:\n\
                     \x20    sudo mkdir -p /etc/apt/keyrings\n\
                     \x20    curl -s https://repo.radeon.com/rocm/rocm.gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/rocm.gpg\n\
                     \x20    echo 'deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/latest noble main' | sudo tee /etc/apt/sources.list.d/rocm.list\n\
                     \x20    printf 'Package: *\\nPin: release o=repo.radeon.com\\nPin-Priority: 600\\n' | sudo tee /etc/apt/preferences.d/rocm-pin-600\n\
                     \x20    sudo apt-get update\n\
                     \x20    sudo apt-get install -y rocminfo rocm-hip-runtime rocm-smi-lib migraphx rocblas miopen-hip rocrand\n",
                );
            }
            Distro::Fedora => {
                s.push_str(
                    "  # 1. ROCm 6.4+/7.0 userspace (gfx1151 needs ≥6.4; 7.0 lists it natively):\n\
                     \x20    sudo dnf install -y rocm-hip rocm-hip-runtime miopen-hip migraphx rocblas\n\
                     \x20    # (or AMD's amdgpu-install: sudo amdgpu-install --usecase=rocm --no-dkms)\n",
                );
            }
            Distro::Unknown => {
                s.push_str(
                    "  # 1. Install ROCm 6.4+/7.0 userspace for your distro (gfx1151 needs ≥6.4):\n\
                     \x20    AMD's amdgpu-install --usecase=rocm --no-dkms, or your distro's\n\
                     \x20    rocm-hip-runtime + miopen-hip + migraphx + rocblas packages.\n\
                     \x20    /dev/kfd is already present, so only the USERSPACE is needed.\n",
                );
            }
        }
    }

    if !facts.onnxruntime_present {
        s.push_str(
            "  # 2. A ROCm-enabled onnxruntime (the ort dynamic-load dylib):\n\
             \x20    pip install onnxruntime-rocm   # provides libonnxruntime.so with ROCm+MIGraphX EPs\n\
             \x20    # nornir finds it via ORT_DYLIB_PATH=/path/to/libonnxruntime.so, or drop it in\n\
             \x20    # /opt/nornir/rocm (a built-in search dir) — see step 3.\n",
        );
    }

    if let Some(gfx) = facts.gfx.as_deref() {
        if gfx_natively_supported(gfx) {
            s.push_str(&format!(
                "  # {gfx}: supported natively by recent ROCm (e.g. ROCm 7.2.4 sees gfx1150 — \
                 Strix Point —\n\
                 \x20    and gfx1151 — Strix Halo — directly), so NO HSA override is needed.\n",
            ));
        } else if let Some(v) = hsa_override_for(gfx) {
            s.push_str(&format!(
                "  # {gfx}: older ROCm may need HSA_OVERRIDE_GFX_VERSION={v} — nornir now sets this\n\
                 \x20    AUTOMATICALLY for non-native parts (override via NORNIR_HSA_OVERRIDE, or\n\
                 \x20    NORNIR_HSA_OVERRIDE=off to disable). Upgrading ROCm usually removes the need.\n",
            ));
        }
    }

    s.push_str(
        "  # 3. Pin the discovered libs so the GPU works with no env, every run:\n\
         \x20    sudo nornir vector setup-rocm     # copies the ROCm set (+ onnxruntime) into /opt/nornir/rocm\n",
    );
    s
}

#[cfg(test)]
mod tests {
    use super::*;

    /// LAW (inject-and-assert): on a box with no ROCm, the probe must return a
    /// *sensible* answer — `available()` false, `driver_present()` false, and a
    /// non-empty preflight report whose verdict names CPU fallback. We can't
    /// inject an AMD GPU here, so we assert the no-GPU branch concretely; if a
    /// real GPU IS present, the inverse invariants are asserted instead.
    #[test]
    fn probe_degrades_to_cpu_without_rocm() {
        let p = ensure();
        // Discovery always produces a dir list (system dirs are unconditional).
        assert!(!p.dirs.is_empty(), "candidate dirs must be populated");

        let (ready, report) = preflight();
        // available() and preflight() must agree.
        assert_eq!(ready, available(), "preflight verdict must match available()");
        assert!(report.contains("ROCm preflight"), "report header missing: {report}");

        if available() {
            // If a real AMD GPU IS present, the invariants still hold.
            assert!(driver_present() && p.hip && p.miopen, "available() implies hip+miopen+driver");
            assert!(report.contains("READY"), "ready box should say READY: {report}");
        } else {
            // The common CI/dev case: no AMD GPU. Everything degrades cleanly.
            assert!(!ready, "no ROCm => not ready");
            assert!(report.contains("CPU fallback"), "verdict should name CPU fallback: {report}");
            // On a no-ROCm box the doctor MUST print actionable copy-paste fix
            // commands ("run THESE, then re-run doctor"), not just a verdict.
            assert!(report.contains("FIX — run these"), "no-ROCm report must carry fix commands: {report}");
            assert!(report.contains("nornir vector setup-rocm"), "fix must mention setup-rocm: {report}");
        }
    }

    /// PURE: the packed `gfx_target_version` decimal → gfx ISA name, including
    /// the gfx1151 (Strix Halo) case (`110501`) and the CPU-node 0 case.
    #[test]
    fn parse_gfx_target_decodes_strix_halo() {
        assert_eq!(parse_gfx_target("gfx_target_version 110501").as_deref(), Some("gfx1151"));
        // Other RDNA3 parts round-trip too.
        assert_eq!(parse_gfx_target("gfx_target_version 110000").as_deref(), Some("gfx1100"));
        assert_eq!(parse_gfx_target("gfx_target_version 90402").as_deref(), Some("gfx942"));
        // A CPU topology node reports 0 → no GPU ISA.
        assert_eq!(parse_gfx_target("gfx_target_version 0"), None);
        // Surrounding lines don't confuse it.
        let props = "cpu_cores_count 16\ngfx_target_version 110501\nsimd_count 8\n";
        assert_eq!(parse_gfx_target(props).as_deref(), Some("gfx1151"));
        assert_eq!(parse_gfx_target("no version here"), None);
    }

    /// LAW (inject-assert): the gfx/HSA-override decision is a PURE, table-driven
    /// function. A part recent ROCm supports natively (gfx1150 Strix Point,
    /// gfx1151 Strix Halo) gets NO override; the table value is only applied when
    /// the part is NOT natively supported; a user value is NEVER clobbered;
    /// `off`/empty policy disables it; an explicit policy is honoured verbatim;
    /// an unknown gfx gets no override.
    #[test]
    fn hsa_override_decision_is_pure_and_safe() {
        // The validated box: gfx1150 (Strix Point) — natively supported → NO override.
        let (val, why) = decide_hsa_override(Some("gfx1150"), false, None);
        assert_eq!(val, None, "gfx1150 is native on ROCm 7.2.4 → no override");
        assert!(why.contains("nativ"), "reason should say native: {why}");
        // gfx1151 (Strix Halo) is also native on recent ROCm → no override.
        assert_eq!(decide_hsa_override(Some("gfx1151"), false, None).0, None);

        // A part NOT in the native set but in the table (simulating an older ROCm
        // that lacks the code object) → its table value applies.
        let (val, _) = decide_hsa_override(Some("gfx1103"), false, None);
        assert_eq!(val.as_deref(), Some("11.0.3"), "non-native table part gets its override");
        // The table itself maps gfx1150→11.5.0, gfx1151→11.5.1 (used if ever non-native).
        assert_eq!(hsa_override_for("gfx1150"), Some("11.5.0"));
        assert_eq!(hsa_override_for("gfx1151"), Some("11.5.1"));

        // Never clobber a user-set HSA_OVERRIDE_GFX_VERSION.
        let (val, why) = decide_hsa_override(Some("gfx1103"), true, None);
        assert_eq!(val, None, "must not clobber user value");
        assert!(why.contains("user"), "reason should say user-set: {why}");

        // Policy "off"/"none"/"" disables the auto-override.
        for off in ["off", "none", "", "  "] {
            let (val, _) = decide_hsa_override(Some("gfx1103"), false, Some(off));
            assert_eq!(val, None, "policy {off:?} should disable override");
        }

        // Explicit policy value is used verbatim.
        let (val, _) = decide_hsa_override(Some("gfx1150"), false, Some("11.0.0"));
        assert_eq!(val.as_deref(), Some("11.0.0"));

        // An unknown GPU / no GPU gets no override (a wrong one is worse than none).
        assert_eq!(decide_hsa_override(Some("gfx9999"), false, None).0, None);
        assert_eq!(decide_hsa_override(None, false, None).0, None);
    }

    /// PURE: distro classification from `/etc/os-release` bodies.
    #[test]
    fn classify_os_release_picks_family() {
        assert_eq!(classify_os_release("ID=ubuntu\nID_LIKE=debian\n"), Distro::Debian);
        assert_eq!(classify_os_release("ID=debian\n"), Distro::Debian);
        assert_eq!(classify_os_release("ID=fedora\n"), Distro::Fedora);
        assert_eq!(classify_os_release("ID=\"rhel\"\nID_LIKE=\"fedora\"\n"), Distro::Fedora);
        assert_eq!(classify_os_release("ID=arch\n"), Distro::Unknown);
    }

    /// The default MIGraphX cache dir hangs off `$HOME/.cache/nornir/migraphx`
    /// (sibling of the model cache), falling back to a relative dir when `$HOME`
    /// is unreadable. ONNX Runtime 1.24 is MIGraphX-only, and an UNSET cache dir
    /// makes the EP error on first embed — so this default is what keeps the AMD
    /// GPU path turnkey.
    #[test]
    fn default_migraphx_cache_dir_is_under_home_cache() {
        let dir = default_migraphx_cache_dir();
        // Always lands in the nornir cache namespace, never an empty/"" path.
        assert!(dir.ends_with("nornir/migraphx") || dir == Path::new(".nornir-migraphx"));
        assert!(!dir.as_os_str().is_empty(), "cache dir is never the empty path the EP chokes on");
    }

    /// LAW (inject-assert): the install hint is a PURE function of (distro, facts)
    /// and emits the RIGHT package-manager commands per distro, the gfx1151
    /// HSA note when relevant, the onnxruntime-rocm step when it's missing, and
    /// the setup-rocm one-liner — and is EMPTY when the GPU is already ready.
    #[test]
    fn rocm_install_hint_is_distro_aware_and_gated() {
        // Already ready → no hint at all.
        let ready = HintFacts { gpu_ready: true, ..Default::default() };
        assert!(rocm_install_hint(Distro::Debian, &ready).is_empty(), "ready box gets no hint");

        // Strix Halo, nothing installed, Debian.
        let strix = HintFacts {
            gpu_ready: false,
            runtime_present: false,
            onnxruntime_present: false,
            gfx: Some("gfx1151".into()),
        };
        let deb = rocm_install_hint(Distro::Debian, &strix);
        assert!(deb.contains("apt-get install"), "Debian hint must use apt-get: {deb}");
        // The exact, validated ROCm-7.x noble-repo recipe.
        assert!(deb.contains("repo.radeon.com/rocm/rocm.gpg.key"), "must add AMD's repo key: {deb}");
        assert!(
            deb.contains("https://repo.radeon.com/rocm/apt/latest noble main"),
            "must point at the noble repo (26.04 has no resolute build yet): {deb}"
        );
        assert!(deb.contains("/etc/apt/preferences.d/rocm-pin-600"), "must pin the repo: {deb}");
        assert!(
            deb.contains("rocm-hip-runtime") && deb.contains("migraphx") && deb.contains("miopen-hip"),
            "must install the runtime + migraphx + miopen: {deb}"
        );
        assert!(deb.contains("noble"), "must note the noble repo fallback: {deb}");
        assert!(deb.contains("onnxruntime-rocm"), "must give the onnxruntime step: {deb}");
        // gfx1151 (Strix Halo) is natively supported → the hint must say NO
        // override is needed, NOT print an HSA_OVERRIDE line.
        assert!(deb.contains("natively"), "native part must note no override needed: {deb}");
        assert!(!deb.contains("HSA_OVERRIDE_GFX_VERSION=11.5.1"), "native gfx1151 must not force an override: {deb}");
        assert!(deb.contains("nornir vector setup-rocm"), "must give the setup-rocm one-liner: {deb}");
        assert!(deb.contains("re-run `nornir vector doctor`"), "verdict must be actionable: {deb}");

        // A NON-native part (e.g. gfx1103 on an old ROCm) DOES get the auto-override note.
        let old = HintFacts {
            gpu_ready: false,
            runtime_present: false,
            onnxruntime_present: false,
            gfx: Some("gfx1103".into()),
        };
        let old_deb = rocm_install_hint(Distro::Debian, &old);
        assert!(old_deb.contains("HSA_OVERRIDE_GFX_VERSION=11.0.3"), "non-native part gets its override note: {old_deb}");

        // Fedora emits dnf instead of apt.
        let fed = rocm_install_hint(Distro::Fedora, &strix);
        assert!(fed.contains("dnf"), "Fedora hint must use dnf: {fed}");
        assert!(!fed.contains("apt update"), "Fedora hint must not use apt: {fed}");

        // Runtime present but onnxruntime missing → only step 2 + setup, no
        // distro install block, and no gfx note for a non-1151 part.
        let just_ort = HintFacts {
            gpu_ready: false,
            runtime_present: true,
            onnxruntime_present: false,
            gfx: Some("gfx1100".into()),
        };
        let h = rocm_install_hint(Distro::Debian, &just_ort);
        assert!(h.contains("onnxruntime-rocm"), "missing-ort hint must give the ort step: {h}");
        assert!(!h.contains("amdgpu-install"), "runtime present ⇒ no driver install block: {h}");
        assert!(!h.contains("HSA_OVERRIDE_GFX_VERSION"), "no gfx1151 ⇒ no HSA note: {h}");
    }
}