hwlocality/topology/
mod.rs

1//! Hardware topology (main hwloc API entry point)
2//!
3//! A [`Topology`] contains everything hwloc knows about the hardware and
4//! software structure of a system. Among other things, it can be used to query
5//! the system topology and to bind threads and processes to hardware CPU cores
6//! and NUMA nodes. It is the main entry point of the hwloc API through which
7//! almost any other feature of the library is accessed.
8
9pub mod builder;
10#[cfg(feature = "hwloc-2_3_0")]
11pub mod editor;
12pub mod export;
13pub mod support;
14
15use self::{
16    builder::{BuildFlags, TopologyBuilder, TypeFilter},
17    support::FeatureSupport,
18};
19#[cfg(all(feature = "hwloc-2_3_0", doc))]
20use crate::topology::support::MiscSupport;
21use crate::{
22    bitmap::{Bitmap, BitmapRef, SpecializedBitmap},
23    cpu::cpuset::CpuSet,
24    errors::{self, ForeignObjectError, RawHwlocError},
25    ffi::transparent::AsNewtype,
26    memory::nodeset::NodeSet,
27    object::{
28        depth::{Depth, NormalDepth},
29        types::ObjectType,
30        TopologyObject,
31    },
32};
33use bitflags::bitflags;
34use errno::Errno;
35use hwlocality_sys::{
36    hwloc_bitmap_s, hwloc_distrib_flags_e, hwloc_topology, hwloc_type_filter_e,
37    HWLOC_DISTRIB_FLAG_REVERSE,
38};
39use libc::EINVAL;
40#[allow(unused)]
41#[cfg(test)]
42use similar_asserts::assert_eq;
43use std::{
44    collections::BTreeMap,
45    fmt::{self, Debug, Pointer},
46    ops::Deref,
47    ptr::{self, NonNull},
48    sync::OnceLock,
49};
50use strum::IntoEnumIterator;
51use thiserror::Error;
52
53/// Main entry point to the hwloc API
54///
55/// A `Topology` contains everything hwloc knows about the hardware and software
56/// structure of a system. It can be used to query the system topology and to
57/// bind threads and processes to hardware CPU cores and NUMA nodes.
58///
59/// Since there are **many** things you can do with a `Topology`, the API is
60/// broken down into sections roughly following the structure of the upstream
61/// hwloc documentation:
62///
63/// - [Topology building](#topology-building)
64/// - [Full object lists](#full-object-lists) (specific to Rust bindings)
65/// - [Object levels, depths and types](#object-levels-depths-and-types)
66/// - [CPU cache statistics](#cpu-cache-statistics) (specific to Rust bindings)
67/// - [CPU binding](#cpu-binding)
68/// - [Memory binding](#memory-binding)
69#[cfg_attr(
70    feature = "hwloc-2_3_0",
71    doc = "- [Modifying a loaded topology](#modifying-a-loaded-topology) (hwloc 2.3+)"
72)]
73/// - [Finding objects inside a CPU set](#finding-objects-inside-a-cpu-set)
74/// - [Finding objects covering at least a CPU set](#finding-objects-covering-at-least-a-cpu-set)
75/// - [Finding other objects](#finding-other-objects)
76/// - [Distributing work items over a topology](#distributing-work-items-over-a-topology)
77/// - [CPU and node sets of entire topologies](#cpu-and-node-sets-of-entire-topologies)
78/// - [Finding I/O objects](#finding-io-objects)
79/// - [Exporting Topologies to XML](#exporting-topologies-to-xml)
80/// - [Exporting Topologies to Synthetic](#exporting-topologies-to-synthetic)
81/// - [Retrieve distances between objects](#retrieve-distances-between-objects)
82#[cfg_attr(
83    feature = "hwloc-2_3_0",
84    doc = "- [Comparing memory node attributes for finding where to allocate on](#comparing-memory-node-attributes-for-finding-where-to-allocate-on) (hwloc 2.3+)"
85)]
86#[cfg_attr(
87    feature = "hwloc-2_4_0",
88    doc = "- [Kinds of CPU cores](#kinds-of-cpu-cores) (hwloc 2.4+)"
89)]
90#[cfg_attr(
91    any(doc, target_os = "linux"),
92    doc = "- [Linux-specific helpers](#linux-specific-helpers)"
93)]
94#[cfg_attr(
95    any(doc, all(target_os = "windows", feature = "hwloc-2_5_0")),
96    doc = "- [Windows-specific helpers](#windows-specific-helpers) (hwloc 2.5+)"
97)]
98//
99// --- Implementation details ---
100//
101// Since the Topology API is _huge_, not all of it is implemented in the
102// topology module. Instead, functionality which is very strongly related to
103// one other code module is implemented inside that module, leaving this module
104// focused on basic lifecycle and cross-cutting issues.
105//
106// # Safety
107//
108// As a type invariant, the inner pointer is assumed to always point to a valid
109// fully built, non-aliased topology.
110//
111// Any binding to an hwloc topology function that takes a user-provided
112// &TopologyObject parameter **must** check that this object does belongs to the
113// topology using the Topology::contains() method before passing it to hwloc.
114#[doc(alias = "hwloc_topology")]
115#[doc(alias = "hwloc_topology_t")]
116pub struct Topology(NonNull<hwloc_topology>);
117
118/// # Topology building
119//
120// --- Implementation details ---
121//
122// Upstream docs:
123// - Creation: https://hwloc.readthedocs.io/en/stable/group__hwlocality__creation.html
124// - Build queries: https://hwloc.readthedocs.io/en/stable/group__hwlocality__configuration.html
125impl Topology {
126    /// Creates a new Topology.
127    ///
128    /// If no customization of the build process is needed, this method is the
129    /// main entry point to this crate. A topology is returned, which contains
130    /// the logical representation of the physical hardware.
131    ///
132    /// # Examples
133    ///
134    /// ```
135    /// # use hwlocality::Topology;
136    /// let topology = Topology::new()?;
137    /// # Ok::<(), eyre::Report>(())
138    /// ```
139    #[allow(clippy::missing_errors_doc)]
140    pub fn new() -> Result<Self, RawHwlocError> {
141        TopologyBuilder::new().build()
142    }
143
144    /// Test topology instance
145    ///
146    /// Used to avoid redundant calls to Topology::new() in unit tests and
147    /// doctests that need read-only access to a topology.
148    ///
149    /// Configured to expose as many concepts from hwloc as possible (disallowed
150    /// CPUs, I/O objects, etc).
151    ///
152    /// Do not use this in doctests where the fact that the topology
153    /// configuration matters. And do not, under any circumstances, use this in
154    /// non-test code.
155    ///
156    /// NOTE: In an ideal world, this would be cfg(any(test, doctest)), but that
157    ///       doesn't work for doctests yet:
158    ///       https://github.com/rust-lang/rust/issues/67295
159    #[doc(hidden)]
160    pub fn test_instance() -> &'static Self {
161        static INSTANCE: OnceLock<Topology> = OnceLock::new();
162        INSTANCE.get_or_init(|| {
163            Self::builder()
164                .with_flags(BuildFlags::INCLUDE_DISALLOWED)
165                .expect("INCLUDE_DISALLOWED should always work")
166                .with_common_type_filter(TypeFilter::KeepAll)
167                .expect("KeepAll should be a supported common type filter")
168                .build()
169                .expect("Failed to initialize main test Topology")
170        })
171    }
172
173    /// Like test_instance, but separate instance
174    ///
175    /// Used to test that operations correctly detect foreign topology objects
176    #[cfg(feature = "hwloc-2_3_0")]
177    #[doc(hidden)]
178    pub fn foreign_instance() -> &'static Self {
179        static INSTANCE: OnceLock<Topology> = OnceLock::new();
180        INSTANCE.get_or_init(|| Self::test_instance().clone())
181    }
182
183    /// Prepare to create a Topology with custom configuration
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// # use hwlocality::topology::{Topology, builder::BuildFlags};
189    /// let flags = BuildFlags::INCLUDE_DISALLOWED;
190    /// let topology = Topology::builder().with_flags(flags)?.build()?;
191    /// assert_eq!(topology.build_flags(), flags);
192    /// # Ok::<(), eyre::Report>(())
193    /// ```
194    #[doc(alias = "hwloc_topology_init")]
195    pub fn builder() -> TopologyBuilder {
196        TopologyBuilder::new()
197    }
198
199    /// Check that this topology is compatible with the current hwloc library
200    ///
201    /// This is useful when using the same topology structure (in memory) in
202    /// different libraries that may use different hwloc installations (for
203    /// instance if one library embeds a specific version of hwloc, while
204    /// another library uses a default system-wide hwloc installation).
205    ///
206    /// If all libraries/programs use the same hwloc installation, this function
207    /// always returns `true`.
208    //
209    // TODO: Propagate note about interprocess sharing from upstream docs
210    //       once interprocess sharing is implemented.
211    ///
212    /// # Examples
213    ///
214    /// ```
215    /// # use hwlocality::Topology;
216    /// assert!(Topology::new()?.is_abi_compatible());
217    /// # Ok::<(), eyre::Report>(())
218    /// ```
219    #[doc(alias = "hwloc_topology_abi_check")]
220    pub fn is_abi_compatible(&self) -> bool {
221        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
222        //         - hwloc ops are trusted not to modify *const parameters
223        let result = errors::call_hwloc_zero_or_minus1("hwloc_topology_abi_check", || unsafe {
224            hwlocality_sys::hwloc_topology_abi_check(self.as_ptr())
225        });
226        match result {
227            Ok(()) => true,
228            // Lack of tarpaulin coverage expected, this check should never fail
229            // unless something horribly wrong is happening.
230            #[cfg(not(tarpaulin_include))]
231            Err(RawHwlocError {
232                errno: Some(Errno(EINVAL)),
233                ..
234            }) => false,
235            #[cfg(not(tarpaulin_include))]
236            #[cfg(windows)]
237            Err(RawHwlocError { errno: None, .. }) => {
238                // As explained in the RawHwlocError documentation, errno values
239                // may not correctly propagate from hwloc to hwlocality on
240                // Windows. Since there is only one expected errno value here,
241                // we'll interpret lack of errno as EINVAL on Windows.
242                false
243            }
244            Err(raw_err) => unreachable!("Unexpected hwloc error: {raw_err}"),
245        }
246    }
247
248    /// Flags that were used to build this topology
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// # use hwlocality::topology::{Topology, builder::BuildFlags};
254    /// assert_eq!(Topology::new()?.build_flags(), BuildFlags::empty());
255    /// # Ok::<(), eyre::Report>(())
256    /// ```
257    #[doc(alias = "hwloc_topology_get_flags")]
258    pub fn build_flags(&self) -> BuildFlags {
259        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
260        //         - hwloc ops are trusted not to modify *const parameters
261        let result = BuildFlags::from_bits_retain(unsafe {
262            hwlocality_sys::hwloc_topology_get_flags(self.as_ptr())
263        });
264        assert!(result.is_valid(), "hwloc returned invalid flags");
265        result
266    }
267
268    /// Was the topology built using the system running this program?
269    ///
270    /// It may not have been if, for instance, it was built using another
271    /// file-system root or loaded from a synthetic or XML textual description.
272    ///
273    /// # Examples
274    ///
275    /// ```
276    /// # use hwlocality::Topology;
277    /// assert!(Topology::new()?.is_this_system());
278    /// # Ok::<(), eyre::Report>(())
279    /// ```
280    #[doc(alias = "hwloc_topology_is_thissystem")]
281    pub fn is_this_system(&self) -> bool {
282        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
283        //         - hwloc ops are trusted not to modify *const parameters
284        errors::call_hwloc_bool("hwloc_topology_is_thissystem", || unsafe {
285            hwlocality_sys::hwloc_topology_is_thissystem(self.as_ptr())
286        })
287        .expect("Should not involve faillible syscalls")
288    }
289
290    /// Supported hwloc features with this topology on this machine
291    ///
292    /// This is the information that one gets via the `hwloc-info --support` CLI.
293    ///
294    /// The reported features are what the current topology supports on the
295    /// current machine. If the topology was exported to XML from another
296    /// machine and later imported here, support still describes what is
297    /// supported for this imported topology after import. By default, binding
298    /// will be reported as unsupported in this case (see
299    /// [`BuildFlags::ASSUME_THIS_SYSTEM`]).
300    ///
301    #[cfg_attr(
302        feature = "hwloc-2_3_0",
303        doc = "On hwloc 2.3+, [`BuildFlags::IMPORT_SUPPORT`] may be used during topology building to"
304    )]
305    #[cfg_attr(
306        feature = "hwloc-2_3_0",
307        doc = "report the supported features of the original remote machine instead. If"
308    )]
309    #[cfg_attr(
310        feature = "hwloc-2_3_0",
311        doc = "it was successfully imported, [`MiscSupport::imported()`] will be set."
312    )]
313    ///
314    /// # Examples
315    ///
316    /// ```
317    /// # let topology = hwlocality::Topology::test_instance();
318    /// println!("{:?}", topology.feature_support());
319    /// # Ok::<(), eyre::Report>(())
320    /// ```
321    #[doc(alias = "hwloc_topology_get_support")]
322    pub fn feature_support(&self) -> &FeatureSupport {
323        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
324        //         - hwloc ops are trusted not to modify *const parameters
325        let ptr = errors::call_hwloc_ptr("hwloc_topology_get_support", || unsafe {
326            hwlocality_sys::hwloc_topology_get_support(self.as_ptr())
327        })
328        .expect("Unexpected hwloc error");
329        // SAFETY: - If hwloc succeeded, the output is assumed to be valid and
330        //           point to a valid target devoid of mutable aliases
331        //         - Output reference will be bound the the lifetime of &self by
332        //           the borrow checker
333        unsafe { ptr.as_ref().as_newtype() }
334    }
335
336    /// Quickly check a support flag
337    ///
338    /// # Examples
339    ///
340    /// ```
341    /// # use hwlocality::topology::{Topology, support::{FeatureSupport, DiscoverySupport}};
342    /// let topology = Topology::new()?;
343    /// assert!(topology.supports(
344    ///     FeatureSupport::discovery,
345    ///     DiscoverySupport::pu_count
346    /// ));
347    /// # Ok::<(), eyre::Report>(())
348    /// ```
349    pub fn supports<Group>(
350        &self,
351        get_group: fn(&FeatureSupport) -> Option<&Group>,
352        check_feature: fn(&Group) -> bool,
353    ) -> bool {
354        get_group(self.feature_support()).is_some_and(check_feature)
355    }
356
357    /// Filtering that was applied for the given object type
358    ///
359    /// # Examples
360    ///
361    /// ```
362    /// # use hwlocality::{
363    /// #     object::types::ObjectType,
364    /// #     topology::builder::TypeFilter
365    /// # };
366    /// # let topology = hwlocality::Topology::test_instance();
367    /// #
368    /// // PUs, NUMANodes and Machine are always kept
369    /// let always_there = [ObjectType::PU,
370    ///                     ObjectType::NUMANode,
371    ///                     ObjectType::Machine];
372    /// for ty in always_there {
373    ///     assert_eq!(topology.type_filter(ty)?, TypeFilter::KeepAll);
374    /// }
375    ///
376    /// // Groups are only kept if they bring extra structure
377    /// assert_eq!(
378    ///     topology.type_filter(ObjectType::Group)?,
379    ///     TypeFilter::KeepStructure
380    /// );
381    /// # Ok::<(), eyre::Report>(())
382    /// ```
383    #[allow(clippy::missing_errors_doc)]
384    #[doc(alias = "hwloc_topology_get_type_filter")]
385    pub fn type_filter(&self, ty: ObjectType) -> Result<TypeFilter, RawHwlocError> {
386        let mut filter = hwloc_type_filter_e::MAX;
387        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
388        //         - hwloc ops are trusted not to modify *const parameters
389        //         - By construction, ObjectType only exposes values that map into
390        //           hwloc_obj_type_t values understood by the configured version
391        //           of hwloc, and build.rs checks that the active version of
392        //           hwloc is not older than that, so into() may only generate
393        //           valid hwloc_obj_type_t values for current hwloc
394        //         - filter is an out-parameter, initial value shouldn't matter
395        errors::call_hwloc_zero_or_minus1("hwloc_topology_get_type_filter", || unsafe {
396            hwlocality_sys::hwloc_topology_get_type_filter(self.as_ptr(), ty.into(), &mut filter)
397        })?;
398        // SAFETY: Filter is from a successful hwloc API call, so it should be
399        //         a valid hwloc type filter that can be fed back to hwloc.
400        Ok(unsafe { TypeFilter::from_hwloc(filter) })
401    }
402}
403
404/// # Distributing work items over a topology
405//
406// --- Implementation details ---
407//
408// Inspired by https://hwloc.readthedocs.io/en/stable/group__hwlocality__helper__distribute.html,
409// but the inline header implementation had to be rewritten in Rust.
410impl Topology {
411    /// Distribute `num_items` work items over the topology under `roots`
412    ///
413    /// Given a number of work items to be processed (which can be, for example,
414    /// a set of threads to be spawned), this function will assign a cpuset to
415    /// each of them according to a recursive linear distribution algorithm.
416    /// Such an algorithm spreads work evenly across CPUs and ensures that
417    /// work-items with neighboring indices in the output array are processed by
418    /// neighbouring locations in the topology, which have a high chance of
419    /// sharing resources like fast CPU caches.
420    ///
421    /// The set of CPUs over which work items are distributed is designated by a
422    /// set of root [`TopologyObject`]s with associated CPUs. The root objects
423    /// must be part of this [`Topology`], or the [`ForeignRoot`] error will be
424    /// returned. Their CPU sets must also be disjoint, or the
425    /// [`OverlappingRoots`] error will be returned. You can distribute items
426    /// across all CPUs in the topology by setting `roots` to
427    /// `&[topology.root_object()]`.
428    ///
429    /// Since the purpose of `roots` is to designate which CPUs items should be
430    /// allocated to, root objects should normally have a CPU set. If that is
431    /// not the case (e.g. if some roots designate NUMA nodes or I/O objects
432    /// like storage or GPUs), the algorithm will walk up affected roots'
433    /// ancestor chains to locate the first ancestor with CPUs in the topology,
434    /// which represents the CPUs closest to the object of interest. If none of
435    /// the CPUs of that ancestor is available for binding, that root will be
436    /// ignored, unless this is true of all roots in which case the
437    /// [`EmptyRoots`] error is returned.
438    ///
439    /// If there is no depth limit, which can be achieved by setting `max_depth`
440    /// to [`NormalDepth::MAX`], the distribution will be done down to the
441    /// granularity of individual CPUs, i.e. if there are more work items that
442    /// CPUs, each work item will be assigned one CPU. By setting the
443    /// `max_depth` parameter to a lower limit, you can distribute work at a
444    /// coarser granularity, e.g. across L3 caches, giving the OS some leeway to
445    /// move tasks across CPUs sharing that cache.
446    ///
447    /// By default, output cpusets follow the logical topology children order.
448    /// By setting `flags` to [`DistributeFlags::REVERSE`], you can ask for them
449    /// to be provided in reverse order instead (from last child to first child).
450    ///
451    /// # Errors
452    ///
453    /// - [`EmptyRoots`] if there are no CPUs to distribute work to (the
454    ///   union of all root cpusets is empty).
455    /// - [`ForeignRoot`] if some of the specified roots do not belong to this
456    ///   topology.
457    /// - [`OverlappingRoots`] if some of the roots have overlapping CPU sets.
458    ///
459    /// [`EmptyRoots`]: DistributeError::EmptyRoots
460    /// [`ForeignRoot`]: DistributeError::ForeignRoot
461    /// [`OverlappingRoots`]: DistributeError::OverlappingRoots
462    #[allow(clippy::missing_docs_in_private_items)]
463    #[doc(alias = "hwloc_distrib")]
464    pub fn distribute_items(
465        &self,
466        roots: &[&TopologyObject],
467        num_items: usize,
468        max_depth: NormalDepth,
469        flags: DistributeFlags,
470    ) -> Result<Vec<CpuSet>, DistributeError> {
471        // Make sure all roots belong to this topology
472        for root in roots.iter().copied() {
473            if !self.contains(root) {
474                return Err(DistributeError::ForeignRoot(root.into()));
475            }
476        }
477
478        // Handle the trivial case where 0 items are distributed
479        if num_items == 0 {
480            return Ok(Vec::new());
481        }
482
483        /// Inner recursive distribution algorithm
484        fn recurse<'self_>(
485            roots_and_cpusets: impl DoubleEndedIterator<Item = ObjSetWeightDepth<'self_>> + Clone,
486            num_items: usize,
487            max_depth: NormalDepth,
488            flags: DistributeFlags,
489            result: &mut Vec<CpuSet>,
490        ) {
491            // Debug mode checks
492            #[cfg(not(tarpaulin_include))]
493            debug_assert_ne!(
494                roots_and_cpusets.clone().count(),
495                0,
496                "Can't distribute to 0 roots"
497            );
498            #[cfg(not(tarpaulin_include))]
499            debug_assert_ne!(
500                num_items, 0,
501                "Shouldn't try to distribute 0 items (just don't call this function)"
502            );
503            let initial_len = result.len();
504
505            // Total number of cpus covered by the active roots
506            let total_weight: usize = roots_and_cpusets
507                .clone()
508                .map(|(_, _, weight, _)| weight)
509                .sum();
510
511            // Subset of CPUs and items covered so far
512            let mut given_weight = 0;
513            let mut given_items = 0;
514
515            // What to do with each root
516            let process_root = |(root, cpuset, weight, depth): ObjSetWeightDepth<'_>| {
517                // Give this root a chunk of the work-items proportional to its
518                // weight, with a bias towards giving more CPUs to first roots
519                let next_given_weight = given_weight + weight;
520                let next_given_items = weight_to_items(next_given_weight, total_weight, num_items);
521                let my_items = next_given_items - given_items;
522
523                // Keep recursing until we reach the bottom of the topology,
524                // run out of items to distribute, or hit the depth limit
525                if root.normal_arity() > 0 && my_items > 1 && depth < max_depth {
526                    recurse(
527                        root.normal_children().filter_map(decode_normal_obj),
528                        my_items,
529                        max_depth,
530                        flags,
531                        result,
532                    );
533                } else if my_items > 0 {
534                    // All items attributed to this root get this root's cpuset
535                    for _ in 0..my_items {
536                        result.push(cpuset.clone_target());
537                    }
538                } else {
539                    // No item attributed to this root, merge cpuset with
540                    // the previous root.
541                    let mut iter = result.iter_mut().rev();
542                    let last = iter.next().expect("First chunk cannot be empty");
543                    for other in iter {
544                        if other == last {
545                            *other |= cpuset;
546                        }
547                    }
548                    *last |= cpuset;
549                }
550
551                // Prepare to process the next root
552                given_weight = next_given_weight;
553                given_items = next_given_items;
554            };
555
556            // Process roots in the order dictated by flags
557            if flags.contains(DistributeFlags::REVERSE) {
558                roots_and_cpusets.rev().for_each(process_root);
559            } else {
560                roots_and_cpusets.for_each(process_root);
561            }
562
563            // Debug mode checks
564            #[cfg(not(tarpaulin_include))]
565            debug_assert_eq!(
566                result.len() - initial_len,
567                num_items,
568                "This function should distribute the requested number of items"
569            );
570        }
571
572        // Check roots, walk up to their first normal ancestor as necessary
573        let decoded_roots = roots.iter().copied().filter_map(|root| {
574            let mut root_then_ancestors = std::iter::once(root)
575                .chain(root.ancestors())
576                .skip_while(|candidate| !candidate.object_type().is_normal());
577            root_then_ancestors.find_map(decode_normal_obj)
578        });
579        if decoded_roots.clone().count() == 0 {
580            return Err(DistributeError::EmptyRoots);
581        }
582        if sets_overlap(decoded_roots.clone().map(|(_, root_set, _, _)| root_set)) {
583            return Err(DistributeError::OverlappingRoots);
584        }
585
586        // Run the recursion, collect results
587        let mut result = Vec::with_capacity(num_items);
588        recurse(decoded_roots, num_items, max_depth, flags, &mut result);
589        #[cfg(not(tarpaulin_include))]
590        debug_assert_eq!(
591            result.len(),
592            num_items,
593            "This function should produce one result per input item"
594        );
595        Ok(result)
596    }
597}
598//
599#[cfg(not(tarpaulin_include))]
600bitflags! {
601    /// Flags to be given to [`Topology::distribute_items()`]
602    #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
603    #[doc(alias = "hwloc_distrib_flags_e")]
604    pub struct DistributeFlags: hwloc_distrib_flags_e {
605        /// Distribute in reverse order, starting from the last objects
606        const REVERSE = HWLOC_DISTRIB_FLAG_REVERSE;
607    }
608}
609//
610crate::impl_arbitrary_for_bitflags!(DistributeFlags, hwloc_distrib_flags_e);
611//
612impl Default for DistributeFlags {
613    fn default() -> Self {
614        Self::empty()
615    }
616}
617//
618/// Error returned by [`Topology::distribute_items()`]
619#[derive(Clone, Debug, Eq, Error, Hash, PartialEq)]
620pub enum DistributeError {
621    /// Error returned when the specified roots contain no accessible CPUs
622    ///
623    /// This can happen if either an empty roots list is specified, or if the
624    /// topology was built with [`BuildFlags::INCLUDE_DISALLOWED`] and the
625    /// specified roots only contain disallowed CPUs.
626    #[error("no CPU is accessible from the distribution roots")]
627    EmptyRoots,
628
629    /// Some of the specified roots do not belong to this topology
630    #[error("distribution root {0}")]
631    ForeignRoot(#[from] ForeignObjectError),
632
633    /// Specified roots overlap ith each other
634    #[error("distribution roots overlap with each other")]
635    OverlappingRoots,
636}
637
638/// Part of the implementation of [`Topology::distribute_items()`] that tells,
639/// given a number of items to distribute, a cpuset weight, and the sum of all
640/// cpuset weights, how many items should be distributed, rounding up.
641fn weight_to_items(given_weight: usize, total_weight: usize, num_items: usize) -> usize {
642    // Weights represent CPU counts and num_items represents
643    // something that users reasonably want to count. Neither
644    // should overflow u128 even on highly speculative future
645    // hardware where usize would be larger than 128 bits
646    #[allow(clippy::missing_docs_in_private_items)]
647    const TOO_LARGE: &str = "Such large inputs are not supported yet";
648    let cast = |x: usize| u128::try_from(x).expect(TOO_LARGE);
649
650    // Numerator product can only fail if both given_weight and
651    // num_items are >=2^64, which is equally improbable.
652    let numerator = cast(given_weight)
653        .checked_mul(cast(num_items))
654        .expect(TOO_LARGE);
655    let denominator = cast(total_weight);
656    let my_items = numerator / denominator + u128::from(numerator % denominator != 0);
657
658    // Cast can only fail if given_weight > tot_weight,
659    // otherwise we expect my_items <= num_items
660    debug_assert!(
661        given_weight <= total_weight,
662        "Cannot distribute more weight than the active root's total weight"
663    );
664    my_items
665        .try_into()
666        .expect("Cannot happen if computation is correct")
667}
668
669/// Part of the implementation of [`Topology::distribute_items()`] that extracts
670/// information from a [`TopologyObject`] that is known to be normal, and
671/// returns this information if the object has a non-empty cpuset
672fn decode_normal_obj(obj: &TopologyObject) -> Option<ObjSetWeightDepth<'_>> {
673    debug_assert!(
674        obj.object_type().is_normal(),
675        "This function only works on normal objects"
676    );
677    let cpuset = obj.cpuset().expect("Normal objects should have cpusets");
678    let weight = cpuset
679        .weight()
680        .expect("Topology objects should not have infinite cpusets");
681    let depth = obj.depth().expect_normal();
682    (weight > 0).then_some((obj, cpuset, weight, depth))
683}
684
685/// Information that is extracted by [`decode_normal_obj()`]
686type ObjSetWeightDepth<'object> = (
687    &'object TopologyObject,
688    BitmapRef<'object, CpuSet>,
689    usize,
690    NormalDepth,
691);
692
693/// Truth that an iterator of cpusets contains overlapping sets
694///
695/// `sets` can yield `&'_ CpuSet` or `BitmapRef<'_, CpuSet>`.
696fn sets_overlap(mut sets: impl Iterator<Item = impl Deref<Target = CpuSet>>) -> bool {
697    sets.try_fold(CpuSet::new(), |mut acc, set| {
698        let set: &CpuSet = &set;
699        if acc.intersects(set) {
700            None
701        } else {
702            acc |= set;
703            Some(acc)
704        }
705    })
706    .is_none()
707}
708
709/// # CPU and node sets of entire topologies
710//
711// --- Implementation details ---
712//
713// Upstream docs: https://hwloc.readthedocs.io/en/stable/group__hwlocality__helper__topology__sets.html
714impl Topology {
715    /// Topology CPU set
716    ///
717    /// This is equivalent to calling [`TopologyObject::cpuset()`] on
718    /// the topology's root object.
719    ///
720    /// # Example
721    ///
722    /// ```rust
723    /// # use hwlocality::Topology;
724    /// # let topology = Topology::test_instance();
725    /// println!("Visible CPUs in this topology: {}", topology.cpuset());
726    /// # Ok::<_, eyre::Report>(())
727    /// ```
728    #[doc(alias = "hwloc_topology_get_topology_cpuset")]
729    pub fn cpuset(&self) -> BitmapRef<'_, CpuSet> {
730        // SAFETY: This is one of the hwloc functions allowed here
731        unsafe {
732            self.topology_set(
733                "hwloc_topology_get_topology_cpuset",
734                hwlocality_sys::hwloc_topology_get_topology_cpuset,
735            )
736        }
737    }
738
739    /// Complete CPU set
740    ///
741    /// This is equivalent to calling [`TopologyObject::complete_cpuset()`] on
742    /// the topology's root object.
743    ///
744    /// # Example
745    ///
746    /// ```rust
747    /// # use hwlocality::Topology;
748    /// # let topology = Topology::test_instance();
749    /// println!(
750    ///     "Overall CPUs in this topology: {}",
751    ///     topology.complete_cpuset()
752    /// );
753    /// # Ok::<_, eyre::Report>(())
754    /// ```
755    #[doc(alias = "hwloc_topology_get_complete_cpuset")]
756    pub fn complete_cpuset(&self) -> BitmapRef<'_, CpuSet> {
757        // SAFETY: This is one of the hwloc functions allowed here
758        unsafe {
759            self.topology_set(
760                "hwloc_topology_get_complete_cpuset",
761                hwlocality_sys::hwloc_topology_get_complete_cpuset,
762            )
763        }
764    }
765
766    /// Allowed CPU set
767    ///
768    /// If [`BuildFlags::INCLUDE_DISALLOWED`] was not set, this is identical to
769    /// [`Topology::cpuset()`]: all visible PUs are allowed.
770    ///
771    /// Otherwise, you can check whether a particular cpuset contains allowed
772    /// PUs by calling `cpuset.intersects(topology.allowed_cpuset())`, and if so
773    /// you can get the set of allowed PUs with
774    /// `cpuset & topology.allowed_cpuset()`.
775    ///
776    /// # Example
777    ///
778    /// ```rust
779    /// # use hwlocality::Topology;
780    /// # let topology = Topology::test_instance();
781    /// println!(
782    ///     "Allowed CPUs in this topology: {}",
783    ///     topology.allowed_cpuset()
784    /// );
785    /// # Ok::<_, eyre::Report>(())
786    /// ```
787    #[doc(alias = "hwloc_topology_get_allowed_cpuset")]
788    pub fn allowed_cpuset(&self) -> BitmapRef<'_, CpuSet> {
789        // SAFETY: This is one of the hwloc functions allowed here
790        unsafe {
791            self.topology_set(
792                "hwloc_topology_get_allowed_cpuset",
793                hwlocality_sys::hwloc_topology_get_allowed_cpuset,
794            )
795        }
796    }
797
798    /// Topology node set
799    ///
800    /// This is equivalent to calling [`TopologyObject::nodeset()`] on
801    /// the topology's root object.
802    ///
803    /// # Example
804    ///
805    /// ```rust
806    /// # use hwlocality::Topology;
807    /// # let topology = Topology::test_instance();
808    /// println!("Visible NUMA nodes in this topology: {}", topology.nodeset());
809    /// # Ok::<_, eyre::Report>(())
810    /// ```
811    #[doc(alias = "hwloc_topology_get_topology_nodeset")]
812    pub fn nodeset(&self) -> BitmapRef<'_, NodeSet> {
813        // SAFETY: This is one of the hwloc functions allowed here
814        unsafe {
815            self.topology_set(
816                "hwloc_topology_get_topology_nodeset",
817                hwlocality_sys::hwloc_topology_get_topology_nodeset,
818            )
819        }
820    }
821
822    /// Complete node set
823    ///
824    /// This is equivalent to calling [`TopologyObject::complete_nodeset()`] on
825    /// the topology's root object.
826    ///
827    /// # Example
828    ///
829    /// ```rust
830    /// # use hwlocality::Topology;
831    /// # let topology = Topology::test_instance();
832    /// println!(
833    ///     "Overall NUMA nodes in this topology: {}",
834    ///     topology.complete_nodeset()
835    /// );
836    /// # Ok::<_, eyre::Report>(())
837    /// ```
838    #[doc(alias = "hwloc_topology_get_complete_nodeset")]
839    pub fn complete_nodeset(&self) -> BitmapRef<'_, NodeSet> {
840        // SAFETY: This is one of the hwloc functions allowed here
841        unsafe {
842            self.topology_set(
843                "hwloc_topology_get_complete_nodeset",
844                hwlocality_sys::hwloc_topology_get_complete_nodeset,
845            )
846        }
847    }
848
849    /// Allowed node set
850    ///
851    /// If [`BuildFlags::INCLUDE_DISALLOWED`] was not set, this is identical to
852    /// [`Topology::nodeset()`]: all visible NUMA nodes are allowed.
853    ///
854    /// Otherwise, you can check whether a particular nodeset contains allowed
855    /// NUMA nodes by calling `nodeset.intersects(topology.allowed_nodeset())`,
856    /// and if so you can get the set of allowed NUMA nodes with
857    /// `nodeset & topology.allowed_nodeset()`.
858    ///
859    /// # Example
860    ///
861    /// ```rust
862    /// # use hwlocality::Topology;
863    /// # let topology = Topology::test_instance();
864    /// println!(
865    ///     "Allowed NUMA nodes in this topology: {}",
866    ///     topology.allowed_nodeset()
867    /// );
868    /// # Ok::<_, eyre::Report>(())
869    /// ```
870    #[doc(alias = "hwloc_topology_get_allowed_nodeset")]
871    pub fn allowed_nodeset(&self) -> BitmapRef<'_, NodeSet> {
872        // SAFETY: This is one of the hwloc functions allowed here
873        unsafe {
874            self.topology_set(
875                "hwloc_topology_get_allowed_nodeset",
876                hwlocality_sys::hwloc_topology_get_allowed_nodeset,
877            )
878        }
879    }
880
881    /// Query a topology-wide `CpuSet` or `NodeSet`
882    ///
883    /// # Safety
884    ///
885    /// `getter` must be one of the functions described in the ["CPU and node
886    /// sets of entire
887    /// topologies"](https://hwloc.readthedocs.io/en/stable/group__hwlocality__helper__topology__sets.html)
888    /// section of the hwloc documentation, which means in particular that it...
889    ///
890    /// - Cannot return NULL
891    /// - Must return a pointer attached to the topology
892    unsafe fn topology_set<'topology, Set: SpecializedBitmap>(
893        &'topology self,
894        getter_name: &'static str,
895        getter: unsafe extern "C" fn(*const hwloc_topology) -> *const hwloc_bitmap_s,
896    ) -> BitmapRef<'topology, Set> {
897        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
898        //         - hwloc ops are trusted not to modify *const parameters
899        //         - If this operation is successful, it should return a valid
900        //           bitmap pointer
901        //         - Output bitmap is indeed bound to the topology's lifetime
902        let bitmap_ref = unsafe {
903            let bitmap_ptr = errors::call_hwloc_ptr(getter_name, || getter(self.as_ptr()))
904                .expect("According to their docs, these functions cannot return NULL");
905            Bitmap::borrow_from_nonnull::<'topology>(bitmap_ptr)
906        };
907        bitmap_ref.cast()
908    }
909}
910
911// # General-purpose internal utilities
912impl Topology {
913    /// Contained hwloc topology pointer (for interaction with hwloc)
914    pub(crate) fn as_ptr(&self) -> *const hwloc_topology {
915        self.0.as_ptr()
916    }
917
918    /// Contained mutable hwloc topology pointer (for interaction with hwloc)
919    ///
920    /// Be warned that as a result of hwloc employing lazy caching techniques,
921    /// almost every interaction that requires `*mut hwloc_topology` is unsafe
922    /// unless followed by `hwloc_topology_refresh()`. This subtlety is handled
923    /// by the [`Topology::edit()`] mechanism.
924    pub(crate) fn as_mut_ptr(&mut self) -> *mut hwloc_topology {
925        self.0.as_ptr()
926    }
927
928    /// Check if a [`TopologyObject`] is part of this topology
929    ///
930    /// This check is a safety precondition to any hwloc topology method
931    /// binding that takes user-originated `&TopologyObject`s as a parameter
932    /// and passes it to an hwloc topology method.
933    ///
934    /// While this is not expected to happen often and will in fact often
935    /// require the user to jump through some serious hoops like creating
936    /// another `static` or `thread_local` topology, unfortunately there is
937    /// always a way to do it, and safe Rust code must remain safe even in the
938    /// craziest of edge cases...
939    pub(crate) fn contains(&self, object: &TopologyObject) -> bool {
940        let expected_root = self.root_object();
941        let actual_root = std::iter::once(object)
942            .chain(object.ancestors())
943            .last()
944            .expect("By definition, this iterator always has >= 1 element");
945        ptr::eq(expected_root, actual_root)
946    }
947}
948
949/// Cloning is unfortunately only available on hwloc v2.3.0 and above, because
950/// that's when hwloc introduced the option to manually refresh its internal
951/// caches, which are thread-unsafe and more generally violate Rust's aliasing
952/// rules. These caches are invalidated upon cloning.
953#[cfg(feature = "hwloc-2_3_0")]
954impl Clone for Topology {
955    #[doc(alias = "hwloc_topology_dup")]
956    fn clone(&self) -> Self {
957        // Create the clone
958        let mut clone = ptr::null_mut();
959        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
960        //         - hwloc ops are trusted not to modify *const parameters
961        //         - clone is an out-parameter, it can have any initial value
962        errors::call_hwloc_zero_or_minus1("hwloc_topology_dup", || unsafe {
963            hwlocality_sys::hwloc_topology_dup(&mut clone, self.as_ptr())
964        })
965        .expect("Duplicating a topology only fail with ENOMEM, which is a panic in Rust");
966        let clone = NonNull::new(clone).expect("Got null pointer from hwloc_topology_dup");
967
968        // Topology duplication puts the clone in an un-refreshed state, which
969        // does not play well with Rust's aliasing rules. Therefore we force
970        // an hwloc refresh at this point
971        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
972        //         - hwloc ops are trusted to keep *mut parameters in a
973        //           valid state unless stated otherwise
974        let result = errors::call_hwloc_zero_or_minus1("hwloc_topology_refresh", || unsafe {
975            hwlocality_sys::hwloc_topology_refresh(clone.as_ptr())
976        });
977
978        // Make sure the refresh went well
979        #[cfg(not(tarpaulin_include))]
980        if let Err(e) = result {
981            // If not, dispose of the clone then panic
982            // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
983            //         - Topology will not be usable again after Drop
984            unsafe { hwlocality_sys::hwloc_topology_destroy(clone.as_ptr()) }
985            panic!("ERROR: Failed to refresh topology clone ({e}), so it's stuck in a state that violates Rust aliasing rules...");
986        }
987        Self(clone)
988    }
989}
990
991impl Debug for Topology {
992    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
993        let mut debug = f.debug_struct("Topology");
994
995        // Topology building properties
996        debug
997            .field("is_abi_compatible", &self.is_abi_compatible())
998            .field("build_flags", &self.build_flags())
999            .field("is_this_system", &self.is_this_system())
1000            .field("feature_support", self.feature_support());
1001        let type_filters = ObjectType::iter()
1002            .map(|ty| {
1003                (
1004                    format!("{ty}"),
1005                    self.type_filter(ty)
1006                        .expect("should always succeed when called with a valid type"),
1007                )
1008            })
1009            .collect::<BTreeMap<_, _>>();
1010        debug.field("type_filter", &type_filters);
1011
1012        // TopologyObject hierarchy
1013        let objects_per_depth = NormalDepth::iter_range(NormalDepth::MIN, self.depth())
1014            .map(Depth::from)
1015            .chain(Depth::VIRTUAL_DEPTHS.iter().copied())
1016            .filter_map(|depth| {
1017                let objs = self.objects_at_depth(depth).collect::<Vec<_>>();
1018                (!objs.is_empty()).then_some((format!("{depth}"), objs))
1019            })
1020            .collect::<Vec<_>>();
1021        debug
1022            // Contains the info from most other topology queries
1023            .field("objects_per_depth", &objects_per_depth)
1024            .field("memory_parents_depth", &self.memory_parents_depth());
1025
1026        // CPU and node sets of the entire topology
1027        debug
1028            .field("cpuset", &self.cpuset())
1029            .field("complete_cpuset", &self.complete_cpuset())
1030            .field("allowed_cpuset", &self.allowed_cpuset())
1031            .field("nodeset", &self.nodeset())
1032            .field("complete_nodeset", &self.complete_nodeset())
1033            .field("allowed_nodeset", &self.allowed_nodeset());
1034
1035        // Object distances
1036        debug.field("distances", &self.distances(None));
1037
1038        // Memory attributes
1039        #[cfg(feature = "hwloc-2_3_0")]
1040        {
1041            debug.field("memory_attributes", &self.dump_memory_attributes());
1042        }
1043
1044        // Kinds of CPU cores
1045        #[cfg(feature = "hwloc-2_4_0")]
1046        {
1047            let cpu_kinds = self.cpu_kinds().into_iter().flatten().collect::<Vec<_>>();
1048            debug.field("cpu_kinds", &cpu_kinds);
1049        }
1050        debug.finish()
1051    }
1052}
1053
1054impl Drop for Topology {
1055    #[doc(alias = "hwloc_topology_destroy")]
1056    fn drop(&mut self) {
1057        // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
1058        //         - Topology will not be usable again after Drop
1059        unsafe { hwlocality_sys::hwloc_topology_destroy(self.as_mut_ptr()) }
1060    }
1061}
1062
1063impl PartialEq for Topology {
1064    fn eq(&self, other: &Self) -> bool {
1065        /// Check equality of basic topology properties
1066        macro_rules! check_all_equal {
1067            ($($property:ident),*) => {
1068                if (
1069                    $(
1070                        Topology::$property(self) != Topology::$property(other)
1071                    )||*
1072                ) {
1073                    return false;
1074                }
1075            };
1076        }
1077        check_all_equal!(
1078            is_abi_compatible,
1079            build_flags,
1080            is_this_system,
1081            feature_support,
1082            memory_parents_depth,
1083            cpuset,
1084            complete_cpuset,
1085            allowed_cpuset,
1086            nodeset,
1087            complete_nodeset,
1088            allowed_nodeset
1089        );
1090
1091        /// Retrieve all type filters to check they are the same
1092        fn type_filters(
1093            topology: &Topology,
1094        ) -> impl Iterator<Item = Result<TypeFilter, RawHwlocError>> + '_ {
1095            ObjectType::iter().map(|ty| topology.type_filter(ty))
1096        }
1097        if !type_filters(self).eq(type_filters(other)) {
1098            return false;
1099        }
1100
1101        // Check that the object hierarchy is the same
1102        if !self.has_same_object_hierarchy(other) {
1103            return false;
1104        }
1105
1106        // Check that object distances are the same
1107        let same_distances = match (self.distances(None), other.distances(None)) {
1108            (Ok(distances1), Ok(distances2)) => {
1109                distances1.len() == distances2.len()
1110                    && distances1
1111                        .into_iter()
1112                        .zip(&distances2)
1113                        .all(|(d1, d2)| d1.eq_modulo_topology(d2))
1114            }
1115            (Err(e1), Err(e2)) => e1 == e2,
1116            (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
1117        };
1118        if !same_distances {
1119            return false;
1120        }
1121
1122        // Check that memory attributes are the same
1123        #[cfg(feature = "hwloc-2_3_0")]
1124        {
1125            if !self
1126                .dump_memory_attributes()
1127                .eq_modulo_topology(&other.dump_memory_attributes())
1128            {
1129                return false;
1130            }
1131        }
1132
1133        // Check that CPU core kinds are the same
1134        #[cfg(feature = "hwloc-2_4_0")]
1135        {
1136            let same_kinds = match (self.cpu_kinds(), other.cpu_kinds()) {
1137                (Ok(kinds1), Ok(kinds2)) => kinds1.eq(kinds2),
1138                (Err(e1), Err(e2)) => e1 == e2,
1139                (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
1140            };
1141            if !same_kinds {
1142                return false;
1143            }
1144        }
1145        true
1146    }
1147}
1148
1149impl Pointer for Topology {
1150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1151        <NonNull<hwloc_topology> as Pointer>::fmt(&self.0, f)
1152    }
1153}
1154
1155// SAFETY: No shared mutability
1156unsafe impl Send for Topology {}
1157
1158// SAFETY: No shared mutability
1159unsafe impl Sync for Topology {}
1160
1161#[allow(clippy::too_many_lines)]
1162#[cfg(test)]
1163mod tests {
1164    use super::*;
1165    use crate::ffi::PositiveInt;
1166    use bitflags::Flags;
1167    use proptest::prelude::*;
1168    #[allow(unused)]
1169    use similar_asserts::assert_eq;
1170    use static_assertions::{assert_impl_all, assert_not_impl_any};
1171    use std::{
1172        collections::BTreeSet,
1173        error::Error,
1174        fmt::{Binary, Display, LowerExp, LowerHex, Octal, UpperExp, UpperHex},
1175        hash::Hash,
1176        io::{self, Read},
1177        num::NonZeroU8,
1178        ops::{
1179            BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not, Sub, SubAssign,
1180        },
1181        panic::UnwindSafe,
1182    };
1183
1184    // Check that public types in this module keep implementing all expected
1185    // traits, in the interest of detecting future semver-breaking changes
1186    assert_impl_all!(DistributeFlags:
1187        Binary, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,
1188        Copy, Debug, Default, Extend<DistributeFlags>, Flags,
1189        FromIterator<DistributeFlags>, Hash, IntoIterator<Item=DistributeFlags>,
1190        LowerHex, Not, Octal, Sized, Sub, SubAssign, Sync, UpperHex, Unpin,
1191        UnwindSafe
1192    );
1193    assert_not_impl_any!(DistributeFlags:
1194        Display, Drop, PartialOrd, Pointer, LowerExp, Read, UpperExp,
1195        fmt::Write, io::Write
1196    );
1197    assert_impl_all!(Topology:
1198        Debug, Drop, PartialEq, Pointer, Sized, Sync, Unpin, UnwindSafe
1199    );
1200    #[cfg(feature = "hwloc-2_3_0")]
1201    assert_impl_all!(Topology: Clone);
1202    assert_not_impl_any!(Topology:
1203        Binary, Copy, Default, Deref, Display, IntoIterator, LowerExp, LowerHex,
1204        Octal, Read, UpperExp, UpperHex, fmt::Write, io::Write
1205    );
1206    assert_impl_all!(DistributeError:
1207        Clone, Error, Hash, Sized, Sync, Unpin, UnwindSafe
1208    );
1209    assert_not_impl_any!(DistributeError:
1210        Binary, Copy, Default, Deref, Drop, IntoIterator,
1211        LowerExp, LowerHex, Octal, PartialOrd, Pointer, Read,
1212        UpperExp, UpperHex, fmt::Write, io::Write
1213    );
1214
1215    #[test]
1216    fn default_distribute_flags() {
1217        assert_eq!(DistributeFlags::default(), DistributeFlags::empty());
1218    }
1219
1220    #[cfg(feature = "hwloc-2_3_0")]
1221    #[test]
1222    fn clone() -> Result<(), TestCaseError> {
1223        use crate::topology::builder::tests::DataSource;
1224        let topology = Topology::test_instance();
1225        let clone = topology.clone();
1226        assert_ne!(format!("{:p}", *topology), format!("{clone:p}"));
1227        builder::tests::check_topology(
1228            topology,
1229            DataSource::ThisSystem,
1230            topology.build_flags(),
1231            |ty| Ok(topology.type_filter(ty).unwrap()),
1232        )?;
1233        assert!(!topology.contains(clone.root_object()));
1234        assert_eq!(topology, &clone);
1235        Ok(())
1236    }
1237
1238    #[allow(clippy::print_stdout, clippy::use_debug)]
1239    #[test]
1240    fn debug_and_self_eq() {
1241        let topology = Topology::test_instance();
1242        assert_eq!(topology, topology);
1243        println!("{topology:#?}");
1244    }
1245
1246    /// Bias the `max_depth` input to `distribute_items` tests so that
1247    /// interesting depth values below the maximum possible depth are sampled
1248    /// often enough
1249    fn max_depth() -> impl Strategy<Value = NormalDepth> {
1250        prop_oneof![
1251            4 => (0..usize::from(Topology::test_instance().depth()))
1252                    .prop_map(|us| NormalDepth::try_from(us).unwrap()),
1253            1 => any::<NormalDepth>()
1254        ]
1255    }
1256
1257    proptest! {
1258        // Check that absence of roots to distribute too is reported correctly
1259        #[test]
1260        fn distribute_nowhere(num_items: NonZeroU8, max_depth in max_depth(), flags: DistributeFlags) {
1261            let num_items = usize::from(num_items.get());
1262            prop_assert_eq!(
1263                Topology::test_instance().distribute_items(&[], num_items, max_depth, flags),
1264                Err(DistributeError::EmptyRoots)
1265            );
1266        }
1267    }
1268
1269    /// Generate valid (disjoint) roots for [`Topology::distribute_items()`],
1270    /// taken from [`Topology::test_instance()`]
1271    fn disjoint_roots() -> impl Strategy<Value = Vec<&'static TopologyObject>> {
1272        /// Number of PUs below a normal object
1273        fn normal_weight(obj: &TopologyObject) -> usize {
1274            obj.cpuset()
1275                .expect("normal object should have a cpuset")
1276                .weight()
1277                .expect("normal object should have a finite cpuset")
1278        }
1279
1280        /// Pick N objects below a certain root
1281        ///
1282        /// `root` must be a normal object that has at least `num_objects` PUs
1283        /// underneath it.
1284        ///
1285        /// Objects will be ordered by logical index, the client is responsible
1286        /// for shuffling them at the very end.
1287        fn pick_disjoint_objects(
1288            root: &'static TopologyObject,
1289            num_objects: usize,
1290        ) -> impl Strategy<Value = Vec<&'static TopologyObject>> {
1291            // Validate the request
1292            assert!(
1293                root.object_type().is_normal(),
1294                "root object should be normal"
1295            );
1296            assert!(num_objects <= normal_weight(root));
1297
1298            // Honor the request
1299            match num_objects {
1300                // Picking no objects is trivial
1301                0 => Just(Vec::new()).boxed(),
1302
1303                // Picking a single object is easy too, just pick any object in
1304                // the subtree below this root
1305                1 => {
1306                    let topology = Topology::test_instance();
1307                    let subtree_objects = topology
1308                        .normal_objects()
1309                        .filter(|obj| obj.is_in_subtree(root) || ptr::eq(*obj, root))
1310                        .collect::<Vec<_>>();
1311                    prop::sample::select(subtree_objects)
1312                        .prop_map(|obj| vec![obj])
1313                        .boxed()
1314                }
1315
1316                // If we need to pick 2 or more objects, we must distribute them
1317                // across the root's children
1318                _ => {
1319                    // Each child can at most yield as many objects as there are
1320                    // PUs underneath it, and we must not yield more PUs than
1321                    // this. This is achieved through the dirty trick of first
1322                    // duplicating each child once per underlying PU...
1323                    let degenerate_children = root
1324                        .normal_children()
1325                        .flat_map(|child| std::iter::repeat(child).take(normal_weight(child)))
1326                        .collect::<Vec<_>>();
1327                    debug_assert_eq!(degenerate_children.len(), normal_weight(root));
1328
1329                    // Then picking a subsequence of that duplicated sequence...
1330                    prop::sample::subsequence(degenerate_children, num_objects)
1331                        .prop_flat_map(|selected_degenerate| {
1332                            // Then deduplicating again to find out how many
1333                            // objects were allocated to each child.
1334                            let mut count_per_child =
1335                                Vec::<(&'static TopologyObject, usize)>::new();
1336                            'children: for child in selected_degenerate {
1337                                if let Some((last_child, last_count)) = count_per_child.last_mut() {
1338                                    if ptr::eq(child, *last_child) {
1339                                        *last_count += 1;
1340                                        continue 'children;
1341                                    }
1342                                }
1343                                count_per_child.push((child, 1));
1344                            }
1345
1346                            // Once we have the deduplicated per-child work
1347                            // allocation, we do the recursive request, which
1348                            // will yield a vector of results for each child...
1349                            let nested_objs = count_per_child
1350                                .into_iter()
1351                                .map(|(child, count)| pick_disjoint_objects(child, count))
1352                                .collect::<Vec<_>>();
1353
1354                            // ...and we flatten that into a single vector of
1355                            // merged results
1356                            nested_objs.prop_map(|nested_objs| {
1357                                nested_objs.into_iter().flatten().collect::<Vec<_>>()
1358                            })
1359                        })
1360                        .boxed()
1361                }
1362            }
1363        }
1364
1365        // Finally, we use the above logic to generate any valid number of roots
1366        let root = Topology::test_instance().root_object();
1367        (1..=normal_weight(root))
1368            .prop_flat_map(|num_objects| pick_disjoint_objects(root, num_objects))
1369            .prop_shuffle()
1370    }
1371
1372    proptest! {
1373        // Check that requests to distribute 0 items are handled correctly
1374        #[test]
1375        fn distribute_nothing(
1376            disjoint_roots in disjoint_roots(),
1377            max_depth in max_depth(),
1378            flags: DistributeFlags,
1379        ) {
1380            prop_assert_eq!(
1381                Topology::test_instance().distribute_items(&disjoint_roots, 0, max_depth, flags),
1382                Ok(Vec::new())
1383            );
1384        }
1385    }
1386
1387    /// Provide a pessimistic set of leaves amongst which
1388    /// [`Topology::distribute_items()`] could distribute work, given roots and
1389    /// a max depth and a number of items to be distributed
1390    ///
1391    /// The actual set can be more restrained in the presence of heterogeneous
1392    /// root objects.
1393    fn find_possible_leaves<'output>(
1394        roots: &[&'output TopologyObject],
1395        max_depth: NormalDepth,
1396    ) -> Vec<&'output TopologyObject> {
1397        let mut input = roots.to_vec();
1398        let mut output = Vec::new();
1399        for _ in PositiveInt::iter_range(PositiveInt::MIN, max_depth) {
1400            let mut new_leaves = false;
1401            for obj in input.drain(..) {
1402                if obj.normal_arity() > 0 && obj.depth().expect_normal() < max_depth {
1403                    output.extend(obj.normal_children());
1404                    new_leaves = true;
1405                } else {
1406                    output.push(obj);
1407                }
1408            }
1409            std::mem::swap(&mut input, &mut output);
1410            if !new_leaves {
1411                break;
1412            }
1413        }
1414        input
1415    }
1416
1417    proptest! {
1418        // Check that distribute_items works well given correct inputs
1419        #[test]
1420        fn distribute_correct(
1421            disjoint_roots in disjoint_roots(),
1422            max_depth in max_depth(),
1423            num_items: NonZeroU8,
1424            flags: DistributeFlags,
1425        ) {
1426            // Set up test state
1427            let topology = Topology::test_instance();
1428            let num_items = usize::from(num_items.get());
1429
1430            // Check results on normal roots
1431            #[allow(clippy::cast_precision_loss)]
1432            {
1433                // Run the computation
1434                let item_sets = topology
1435                    .distribute_items(&disjoint_roots, num_items, max_depth, flags)
1436                    .unwrap();
1437                prop_assert_eq!(item_sets.len(), num_items);
1438
1439                // Predict among which leaves of the object tree work items could
1440                // potentially have been distributed
1441                let possible_leaf_objects = find_possible_leaves(&disjoint_roots, max_depth);
1442                let mut possible_leaf_sets = possible_leaf_objects
1443                    .iter()
1444                    .map(|obj| obj.cpuset().unwrap().clone_target())
1445                    .collect::<BTreeSet<CpuSet>>();
1446
1447                // Count how many work items were distributed to each leaf
1448                let mut items_per_set = Vec::<(CpuSet, usize)>::new();
1449                for item_set in item_sets {
1450                    match items_per_set.last_mut() {
1451                        Some((last_set, count)) if last_set == &item_set => *count += 1,
1452                        _ => items_per_set.push((item_set, 1)),
1453                    }
1454                }
1455
1456                // Make sure work items were indeed only distributed to these leaves
1457                for item_set in items_per_set.iter().map(|(set, _count)| set) {
1458                    // ...but bear in mind that the algorithm may merge adjacent
1459                    // leaf cpusets if there are fewer items than leaf cpusets.
1460                    if !possible_leaf_sets.contains(item_set) {
1461                        // Compute the expected effect of merging
1462                        let subsets = possible_leaf_objects
1463                            .iter()
1464                            .map(|obj| obj.cpuset().unwrap())
1465                            .skip_while(|&subset| !item_set.includes(subset))
1466                            .take_while(|&subset| item_set.includes(subset))
1467                            .map(|subset| subset.clone_target())
1468                            .collect::<Vec<_>>();
1469                        let merged_set = subsets.iter().fold(CpuSet::new(), |mut acc, subset| {
1470                            acc |= subset;
1471                            acc
1472                        });
1473
1474                        // Check if this indeed what happened
1475                        prop_assert_eq!(
1476                            item_set,
1477                            &merged_set,
1478                            "Distributed set {} is not part of expected leaf sets {:?}",
1479                            item_set,
1480                            possible_leaf_sets
1481                        );
1482                        prop_assert_eq!(
1483                            items_per_set
1484                                .iter()
1485                                .filter(|(item_set, _count)| item_set.intersects(&merged_set))
1486                                .count(),
1487                            1,
1488                            "Merging should only occurs when 1 item is shared by N leaves"
1489                        );
1490
1491                        // Take the merging of leaves into account
1492                        for subset in subsets {
1493                            prop_assert!(possible_leaf_sets.remove(&subset));
1494                        }
1495                        prop_assert!(possible_leaf_sets.insert(merged_set));
1496                    }
1497                }
1498
1499                // Make sure the leaves that were actually used are disjoint (i.e.
1500                // no distributing to a parent AND its children)
1501                prop_assert!(!sets_overlap(items_per_set.iter().map(|(set, _count)| set)));
1502
1503                // Check that the distribution is as close to ideal as possible,
1504                // given that work items cannot be split in halves
1505                let total_weight = items_per_set
1506                    .iter()
1507                    .map(|(leaf_set, _count)| leaf_set.weight().unwrap())
1508                    .sum::<usize>();
1509                for (leaf_set, items_per_leaf) in &items_per_set {
1510                    let cpu_share = leaf_set.weight().unwrap() as f64 / total_weight as f64;
1511                    let ideal_share = num_items as f64 * cpu_share;
1512                    prop_assert!((*items_per_leaf as f64 - ideal_share).abs() <= 1.0);
1513                }
1514
1515                // Check that the distribution is biased towards earlier or later
1516                // leaves depending on distribution flags, due to roots being
1517                // enumerated in reverse order
1518                let (first_set, items_per_leaf) = items_per_set.first().unwrap();
1519                let first_set_intersects = |root: Option<&TopologyObject>| {
1520                    prop_assert!(first_set.intersects(root.unwrap().cpuset().unwrap()));
1521                    Ok(())
1522                };
1523                if flags.contains(DistributeFlags::REVERSE) {
1524                    first_set_intersects(disjoint_roots.last().copied())?;
1525                } else {
1526                    first_set_intersects(disjoint_roots.first().copied())?;
1527                }
1528                let cpu_share = first_set.weight().unwrap() as f64 / total_weight as f64;
1529                let ideal_share = num_items as f64 * cpu_share;
1530                let bias = *items_per_leaf as f64 - ideal_share;
1531                prop_assert!(
1532                    bias >= 0.0,
1533                    "Earlier roots should get favored for item allocation"
1534                );
1535            }
1536        }
1537    }
1538
1539    /// To the random input provided by `disjoint_roots`, add an extra root that
1540    /// overlaps with the existing ones
1541    fn overlapping_roots() -> impl Strategy<Value = Vec<&'static TopologyObject>> {
1542        disjoint_roots().prop_flat_map(|disjoint_roots| {
1543            // Randomly pick a CPU in the set covered by the disjoint roots, and
1544            // find the smallest hwloc object (normally a PU) that covers it.
1545            // This will be used to check distribute_items's handling of
1546            // duplicate roots.
1547            let topology = Topology::test_instance();
1548            let full_set = disjoint_roots.iter().fold(CpuSet::new(), |mut acc, root| {
1549                acc |= root.cpuset().unwrap();
1550                acc
1551            });
1552            let overlapping_pu = (0..full_set.weight().unwrap()).prop_map(move |cpu_idx| {
1553                topology
1554                    .smallest_object_covering_cpuset(&CpuSet::from(
1555                        full_set.iter_set().nth(cpu_idx).unwrap(),
1556                    ))
1557                    .unwrap()
1558            });
1559
1560            // Empirically, this can pick a PU without a cpuset on weird systems
1561            // like GitHub's CI nodes . Address this by going up ancestors until
1562            // a cpuset is found.
1563            let overlapping_root = overlapping_pu.prop_map(|pu| {
1564                std::iter::once(pu)
1565                    .chain(pu.ancestors())
1566                    .find(|root| root.cpuset().is_some())
1567                    .unwrap()
1568            });
1569
1570            // Insert this overlapping object at a random position
1571            let num_roots = disjoint_roots.len();
1572            (Just(disjoint_roots), overlapping_root, 0..num_roots).prop_map(
1573                |(mut disjoint_roots, overlapping_root, bad_root_idx)| {
1574                    disjoint_roots.insert(bad_root_idx, overlapping_root);
1575                    disjoint_roots
1576                },
1577            )
1578        })
1579    }
1580
1581    proptest! {
1582        #[test]
1583        fn distribute_overlapping(
1584            overlapping_roots in overlapping_roots(),
1585            max_depth in max_depth(),
1586            num_items: NonZeroU8,
1587            flags: DistributeFlags,
1588        ) {
1589            let num_items = usize::from(num_items.get());
1590            prop_assert_eq!(
1591                Topology::test_instance().distribute_items(&overlapping_roots, num_items, max_depth, flags),
1592                Err(DistributeError::OverlappingRoots)
1593            );
1594        }
1595    }
1596
1597    /// Pick a random normal object from the foreign test instance
1598    #[cfg(feature = "hwloc-2_3_0")]
1599    fn foreign_normal_object() -> impl Strategy<Value = &'static TopologyObject> {
1600        static FOREIGNERS: OnceLock<Box<[&'static TopologyObject]>> = OnceLock::new();
1601        let objects =
1602            FOREIGNERS.get_or_init(|| Topology::foreign_instance().normal_objects().collect());
1603        prop::sample::select(&objects[..])
1604    }
1605
1606    /// To the random input provided by `disjoint_roots`, add an extra root from
1607    /// a different topology, and report at which index it was added
1608    #[cfg(feature = "hwloc-2_3_0")]
1609    fn foreign_roots_and_idx() -> impl Strategy<Value = (Vec<&'static TopologyObject>, usize)> {
1610        // Pick a set of disjoint roots and a foreign object
1611        (disjoint_roots(), foreign_normal_object()).prop_flat_map(
1612            |(disjoint_roots, foreign_object)| {
1613                // Insert the foreign object at a random position, report where
1614                let num_roots = disjoint_roots.len();
1615                (Just((disjoint_roots, foreign_object)), 0..num_roots).prop_map(
1616                    |((mut disjoint_roots, foreign_object), bad_root_idx)| {
1617                        disjoint_roots.insert(bad_root_idx, foreign_object);
1618                        (disjoint_roots, bad_root_idx)
1619                    },
1620                )
1621            },
1622        )
1623    }
1624
1625    #[cfg(feature = "hwloc-2_3_0")]
1626    proptest! {
1627        #[test]
1628        fn distribute_foreign(
1629            (foreign_roots, foreign_idx) in foreign_roots_and_idx(),
1630            max_depth in max_depth(),
1631            num_items: NonZeroU8,
1632            flags: DistributeFlags,
1633        ) {
1634            let num_items = usize::from(num_items.get());
1635            let foreign_object = foreign_roots[foreign_idx];
1636            prop_assert_eq!(
1637                Topology::test_instance().distribute_items(&foreign_roots, num_items, max_depth, flags),
1638                Err(DistributeError::ForeignRoot(foreign_object.into()))
1639            );
1640        }
1641    }
1642}