hwlocality/topology/mod.rs
1//! Hardware topology (main hwloc API entry point)
2//!
3//! A [`Topology`] contains everything hwloc knows about the hardware and
4//! software structure of a system. Among other things, it can be used to query
5//! the system topology and to bind threads and processes to hardware CPU cores
6//! and NUMA nodes. It is the main entry point of the hwloc API through which
7//! almost any other feature of the library is accessed.
8
9pub mod builder;
10#[cfg(feature = "hwloc-2_3_0")]
11pub mod editor;
12pub mod export;
13pub mod support;
14
15use self::{
16 builder::{BuildFlags, TopologyBuilder, TypeFilter},
17 support::FeatureSupport,
18};
19#[cfg(all(feature = "hwloc-2_3_0", doc))]
20use crate::topology::support::MiscSupport;
21use crate::{
22 bitmap::{Bitmap, BitmapRef, SpecializedBitmap},
23 cpu::cpuset::CpuSet,
24 errors::{self, ForeignObjectError, RawHwlocError},
25 ffi::transparent::AsNewtype,
26 memory::nodeset::NodeSet,
27 object::{
28 depth::{Depth, NormalDepth},
29 types::ObjectType,
30 TopologyObject,
31 },
32};
33use bitflags::bitflags;
34use errno::Errno;
35use hwlocality_sys::{
36 hwloc_bitmap_s, hwloc_distrib_flags_e, hwloc_topology, hwloc_type_filter_e,
37 HWLOC_DISTRIB_FLAG_REVERSE,
38};
39use libc::EINVAL;
40#[allow(unused)]
41#[cfg(test)]
42use similar_asserts::assert_eq;
43use std::{
44 collections::BTreeMap,
45 fmt::{self, Debug, Pointer},
46 ops::Deref,
47 ptr::{self, NonNull},
48 sync::OnceLock,
49};
50use strum::IntoEnumIterator;
51use thiserror::Error;
52
53/// Main entry point to the hwloc API
54///
55/// A `Topology` contains everything hwloc knows about the hardware and software
56/// structure of a system. It can be used to query the system topology and to
57/// bind threads and processes to hardware CPU cores and NUMA nodes.
58///
59/// Since there are **many** things you can do with a `Topology`, the API is
60/// broken down into sections roughly following the structure of the upstream
61/// hwloc documentation:
62///
63/// - [Topology building](#topology-building)
64/// - [Full object lists](#full-object-lists) (specific to Rust bindings)
65/// - [Object levels, depths and types](#object-levels-depths-and-types)
66/// - [CPU cache statistics](#cpu-cache-statistics) (specific to Rust bindings)
67/// - [CPU binding](#cpu-binding)
68/// - [Memory binding](#memory-binding)
69#[cfg_attr(
70 feature = "hwloc-2_3_0",
71 doc = "- [Modifying a loaded topology](#modifying-a-loaded-topology) (hwloc 2.3+)"
72)]
73/// - [Finding objects inside a CPU set](#finding-objects-inside-a-cpu-set)
74/// - [Finding objects covering at least a CPU set](#finding-objects-covering-at-least-a-cpu-set)
75/// - [Finding other objects](#finding-other-objects)
76/// - [Distributing work items over a topology](#distributing-work-items-over-a-topology)
77/// - [CPU and node sets of entire topologies](#cpu-and-node-sets-of-entire-topologies)
78/// - [Finding I/O objects](#finding-io-objects)
79/// - [Exporting Topologies to XML](#exporting-topologies-to-xml)
80/// - [Exporting Topologies to Synthetic](#exporting-topologies-to-synthetic)
81/// - [Retrieve distances between objects](#retrieve-distances-between-objects)
82#[cfg_attr(
83 feature = "hwloc-2_3_0",
84 doc = "- [Comparing memory node attributes for finding where to allocate on](#comparing-memory-node-attributes-for-finding-where-to-allocate-on) (hwloc 2.3+)"
85)]
86#[cfg_attr(
87 feature = "hwloc-2_4_0",
88 doc = "- [Kinds of CPU cores](#kinds-of-cpu-cores) (hwloc 2.4+)"
89)]
90#[cfg_attr(
91 any(doc, target_os = "linux"),
92 doc = "- [Linux-specific helpers](#linux-specific-helpers)"
93)]
94#[cfg_attr(
95 any(doc, all(target_os = "windows", feature = "hwloc-2_5_0")),
96 doc = "- [Windows-specific helpers](#windows-specific-helpers) (hwloc 2.5+)"
97)]
98//
99// --- Implementation details ---
100//
101// Since the Topology API is _huge_, not all of it is implemented in the
102// topology module. Instead, functionality which is very strongly related to
103// one other code module is implemented inside that module, leaving this module
104// focused on basic lifecycle and cross-cutting issues.
105//
106// # Safety
107//
108// As a type invariant, the inner pointer is assumed to always point to a valid
109// fully built, non-aliased topology.
110//
111// Any binding to an hwloc topology function that takes a user-provided
112// &TopologyObject parameter **must** check that this object does belongs to the
113// topology using the Topology::contains() method before passing it to hwloc.
114#[doc(alias = "hwloc_topology")]
115#[doc(alias = "hwloc_topology_t")]
116pub struct Topology(NonNull<hwloc_topology>);
117
118/// # Topology building
119//
120// --- Implementation details ---
121//
122// Upstream docs:
123// - Creation: https://hwloc.readthedocs.io/en/stable/group__hwlocality__creation.html
124// - Build queries: https://hwloc.readthedocs.io/en/stable/group__hwlocality__configuration.html
125impl Topology {
126 /// Creates a new Topology.
127 ///
128 /// If no customization of the build process is needed, this method is the
129 /// main entry point to this crate. A topology is returned, which contains
130 /// the logical representation of the physical hardware.
131 ///
132 /// # Examples
133 ///
134 /// ```
135 /// # use hwlocality::Topology;
136 /// let topology = Topology::new()?;
137 /// # Ok::<(), eyre::Report>(())
138 /// ```
139 #[allow(clippy::missing_errors_doc)]
140 pub fn new() -> Result<Self, RawHwlocError> {
141 TopologyBuilder::new().build()
142 }
143
144 /// Test topology instance
145 ///
146 /// Used to avoid redundant calls to Topology::new() in unit tests and
147 /// doctests that need read-only access to a topology.
148 ///
149 /// Configured to expose as many concepts from hwloc as possible (disallowed
150 /// CPUs, I/O objects, etc).
151 ///
152 /// Do not use this in doctests where the fact that the topology
153 /// configuration matters. And do not, under any circumstances, use this in
154 /// non-test code.
155 ///
156 /// NOTE: In an ideal world, this would be cfg(any(test, doctest)), but that
157 /// doesn't work for doctests yet:
158 /// https://github.com/rust-lang/rust/issues/67295
159 #[doc(hidden)]
160 pub fn test_instance() -> &'static Self {
161 static INSTANCE: OnceLock<Topology> = OnceLock::new();
162 INSTANCE.get_or_init(|| {
163 Self::builder()
164 .with_flags(BuildFlags::INCLUDE_DISALLOWED)
165 .expect("INCLUDE_DISALLOWED should always work")
166 .with_common_type_filter(TypeFilter::KeepAll)
167 .expect("KeepAll should be a supported common type filter")
168 .build()
169 .expect("Failed to initialize main test Topology")
170 })
171 }
172
173 /// Like test_instance, but separate instance
174 ///
175 /// Used to test that operations correctly detect foreign topology objects
176 #[cfg(feature = "hwloc-2_3_0")]
177 #[doc(hidden)]
178 pub fn foreign_instance() -> &'static Self {
179 static INSTANCE: OnceLock<Topology> = OnceLock::new();
180 INSTANCE.get_or_init(|| Self::test_instance().clone())
181 }
182
183 /// Prepare to create a Topology with custom configuration
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// # use hwlocality::topology::{Topology, builder::BuildFlags};
189 /// let flags = BuildFlags::INCLUDE_DISALLOWED;
190 /// let topology = Topology::builder().with_flags(flags)?.build()?;
191 /// assert_eq!(topology.build_flags(), flags);
192 /// # Ok::<(), eyre::Report>(())
193 /// ```
194 #[doc(alias = "hwloc_topology_init")]
195 pub fn builder() -> TopologyBuilder {
196 TopologyBuilder::new()
197 }
198
199 /// Check that this topology is compatible with the current hwloc library
200 ///
201 /// This is useful when using the same topology structure (in memory) in
202 /// different libraries that may use different hwloc installations (for
203 /// instance if one library embeds a specific version of hwloc, while
204 /// another library uses a default system-wide hwloc installation).
205 ///
206 /// If all libraries/programs use the same hwloc installation, this function
207 /// always returns `true`.
208 //
209 // TODO: Propagate note about interprocess sharing from upstream docs
210 // once interprocess sharing is implemented.
211 ///
212 /// # Examples
213 ///
214 /// ```
215 /// # use hwlocality::Topology;
216 /// assert!(Topology::new()?.is_abi_compatible());
217 /// # Ok::<(), eyre::Report>(())
218 /// ```
219 #[doc(alias = "hwloc_topology_abi_check")]
220 pub fn is_abi_compatible(&self) -> bool {
221 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
222 // - hwloc ops are trusted not to modify *const parameters
223 let result = errors::call_hwloc_zero_or_minus1("hwloc_topology_abi_check", || unsafe {
224 hwlocality_sys::hwloc_topology_abi_check(self.as_ptr())
225 });
226 match result {
227 Ok(()) => true,
228 // Lack of tarpaulin coverage expected, this check should never fail
229 // unless something horribly wrong is happening.
230 #[cfg(not(tarpaulin_include))]
231 Err(RawHwlocError {
232 errno: Some(Errno(EINVAL)),
233 ..
234 }) => false,
235 #[cfg(not(tarpaulin_include))]
236 #[cfg(windows)]
237 Err(RawHwlocError { errno: None, .. }) => {
238 // As explained in the RawHwlocError documentation, errno values
239 // may not correctly propagate from hwloc to hwlocality on
240 // Windows. Since there is only one expected errno value here,
241 // we'll interpret lack of errno as EINVAL on Windows.
242 false
243 }
244 Err(raw_err) => unreachable!("Unexpected hwloc error: {raw_err}"),
245 }
246 }
247
248 /// Flags that were used to build this topology
249 ///
250 /// # Examples
251 ///
252 /// ```
253 /// # use hwlocality::topology::{Topology, builder::BuildFlags};
254 /// assert_eq!(Topology::new()?.build_flags(), BuildFlags::empty());
255 /// # Ok::<(), eyre::Report>(())
256 /// ```
257 #[doc(alias = "hwloc_topology_get_flags")]
258 pub fn build_flags(&self) -> BuildFlags {
259 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
260 // - hwloc ops are trusted not to modify *const parameters
261 let result = BuildFlags::from_bits_retain(unsafe {
262 hwlocality_sys::hwloc_topology_get_flags(self.as_ptr())
263 });
264 assert!(result.is_valid(), "hwloc returned invalid flags");
265 result
266 }
267
268 /// Was the topology built using the system running this program?
269 ///
270 /// It may not have been if, for instance, it was built using another
271 /// file-system root or loaded from a synthetic or XML textual description.
272 ///
273 /// # Examples
274 ///
275 /// ```
276 /// # use hwlocality::Topology;
277 /// assert!(Topology::new()?.is_this_system());
278 /// # Ok::<(), eyre::Report>(())
279 /// ```
280 #[doc(alias = "hwloc_topology_is_thissystem")]
281 pub fn is_this_system(&self) -> bool {
282 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
283 // - hwloc ops are trusted not to modify *const parameters
284 errors::call_hwloc_bool("hwloc_topology_is_thissystem", || unsafe {
285 hwlocality_sys::hwloc_topology_is_thissystem(self.as_ptr())
286 })
287 .expect("Should not involve faillible syscalls")
288 }
289
290 /// Supported hwloc features with this topology on this machine
291 ///
292 /// This is the information that one gets via the `hwloc-info --support` CLI.
293 ///
294 /// The reported features are what the current topology supports on the
295 /// current machine. If the topology was exported to XML from another
296 /// machine and later imported here, support still describes what is
297 /// supported for this imported topology after import. By default, binding
298 /// will be reported as unsupported in this case (see
299 /// [`BuildFlags::ASSUME_THIS_SYSTEM`]).
300 ///
301 #[cfg_attr(
302 feature = "hwloc-2_3_0",
303 doc = "On hwloc 2.3+, [`BuildFlags::IMPORT_SUPPORT`] may be used during topology building to"
304 )]
305 #[cfg_attr(
306 feature = "hwloc-2_3_0",
307 doc = "report the supported features of the original remote machine instead. If"
308 )]
309 #[cfg_attr(
310 feature = "hwloc-2_3_0",
311 doc = "it was successfully imported, [`MiscSupport::imported()`] will be set."
312 )]
313 ///
314 /// # Examples
315 ///
316 /// ```
317 /// # let topology = hwlocality::Topology::test_instance();
318 /// println!("{:?}", topology.feature_support());
319 /// # Ok::<(), eyre::Report>(())
320 /// ```
321 #[doc(alias = "hwloc_topology_get_support")]
322 pub fn feature_support(&self) -> &FeatureSupport {
323 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
324 // - hwloc ops are trusted not to modify *const parameters
325 let ptr = errors::call_hwloc_ptr("hwloc_topology_get_support", || unsafe {
326 hwlocality_sys::hwloc_topology_get_support(self.as_ptr())
327 })
328 .expect("Unexpected hwloc error");
329 // SAFETY: - If hwloc succeeded, the output is assumed to be valid and
330 // point to a valid target devoid of mutable aliases
331 // - Output reference will be bound the the lifetime of &self by
332 // the borrow checker
333 unsafe { ptr.as_ref().as_newtype() }
334 }
335
336 /// Quickly check a support flag
337 ///
338 /// # Examples
339 ///
340 /// ```
341 /// # use hwlocality::topology::{Topology, support::{FeatureSupport, DiscoverySupport}};
342 /// let topology = Topology::new()?;
343 /// assert!(topology.supports(
344 /// FeatureSupport::discovery,
345 /// DiscoverySupport::pu_count
346 /// ));
347 /// # Ok::<(), eyre::Report>(())
348 /// ```
349 pub fn supports<Group>(
350 &self,
351 get_group: fn(&FeatureSupport) -> Option<&Group>,
352 check_feature: fn(&Group) -> bool,
353 ) -> bool {
354 get_group(self.feature_support()).is_some_and(check_feature)
355 }
356
357 /// Filtering that was applied for the given object type
358 ///
359 /// # Examples
360 ///
361 /// ```
362 /// # use hwlocality::{
363 /// # object::types::ObjectType,
364 /// # topology::builder::TypeFilter
365 /// # };
366 /// # let topology = hwlocality::Topology::test_instance();
367 /// #
368 /// // PUs, NUMANodes and Machine are always kept
369 /// let always_there = [ObjectType::PU,
370 /// ObjectType::NUMANode,
371 /// ObjectType::Machine];
372 /// for ty in always_there {
373 /// assert_eq!(topology.type_filter(ty)?, TypeFilter::KeepAll);
374 /// }
375 ///
376 /// // Groups are only kept if they bring extra structure
377 /// assert_eq!(
378 /// topology.type_filter(ObjectType::Group)?,
379 /// TypeFilter::KeepStructure
380 /// );
381 /// # Ok::<(), eyre::Report>(())
382 /// ```
383 #[allow(clippy::missing_errors_doc)]
384 #[doc(alias = "hwloc_topology_get_type_filter")]
385 pub fn type_filter(&self, ty: ObjectType) -> Result<TypeFilter, RawHwlocError> {
386 let mut filter = hwloc_type_filter_e::MAX;
387 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
388 // - hwloc ops are trusted not to modify *const parameters
389 // - By construction, ObjectType only exposes values that map into
390 // hwloc_obj_type_t values understood by the configured version
391 // of hwloc, and build.rs checks that the active version of
392 // hwloc is not older than that, so into() may only generate
393 // valid hwloc_obj_type_t values for current hwloc
394 // - filter is an out-parameter, initial value shouldn't matter
395 errors::call_hwloc_zero_or_minus1("hwloc_topology_get_type_filter", || unsafe {
396 hwlocality_sys::hwloc_topology_get_type_filter(self.as_ptr(), ty.into(), &mut filter)
397 })?;
398 // SAFETY: Filter is from a successful hwloc API call, so it should be
399 // a valid hwloc type filter that can be fed back to hwloc.
400 Ok(unsafe { TypeFilter::from_hwloc(filter) })
401 }
402}
403
404/// # Distributing work items over a topology
405//
406// --- Implementation details ---
407//
408// Inspired by https://hwloc.readthedocs.io/en/stable/group__hwlocality__helper__distribute.html,
409// but the inline header implementation had to be rewritten in Rust.
410impl Topology {
411 /// Distribute `num_items` work items over the topology under `roots`
412 ///
413 /// Given a number of work items to be processed (which can be, for example,
414 /// a set of threads to be spawned), this function will assign a cpuset to
415 /// each of them according to a recursive linear distribution algorithm.
416 /// Such an algorithm spreads work evenly across CPUs and ensures that
417 /// work-items with neighboring indices in the output array are processed by
418 /// neighbouring locations in the topology, which have a high chance of
419 /// sharing resources like fast CPU caches.
420 ///
421 /// The set of CPUs over which work items are distributed is designated by a
422 /// set of root [`TopologyObject`]s with associated CPUs. The root objects
423 /// must be part of this [`Topology`], or the [`ForeignRoot`] error will be
424 /// returned. Their CPU sets must also be disjoint, or the
425 /// [`OverlappingRoots`] error will be returned. You can distribute items
426 /// across all CPUs in the topology by setting `roots` to
427 /// `&[topology.root_object()]`.
428 ///
429 /// Since the purpose of `roots` is to designate which CPUs items should be
430 /// allocated to, root objects should normally have a CPU set. If that is
431 /// not the case (e.g. if some roots designate NUMA nodes or I/O objects
432 /// like storage or GPUs), the algorithm will walk up affected roots'
433 /// ancestor chains to locate the first ancestor with CPUs in the topology,
434 /// which represents the CPUs closest to the object of interest. If none of
435 /// the CPUs of that ancestor is available for binding, that root will be
436 /// ignored, unless this is true of all roots in which case the
437 /// [`EmptyRoots`] error is returned.
438 ///
439 /// If there is no depth limit, which can be achieved by setting `max_depth`
440 /// to [`NormalDepth::MAX`], the distribution will be done down to the
441 /// granularity of individual CPUs, i.e. if there are more work items that
442 /// CPUs, each work item will be assigned one CPU. By setting the
443 /// `max_depth` parameter to a lower limit, you can distribute work at a
444 /// coarser granularity, e.g. across L3 caches, giving the OS some leeway to
445 /// move tasks across CPUs sharing that cache.
446 ///
447 /// By default, output cpusets follow the logical topology children order.
448 /// By setting `flags` to [`DistributeFlags::REVERSE`], you can ask for them
449 /// to be provided in reverse order instead (from last child to first child).
450 ///
451 /// # Errors
452 ///
453 /// - [`EmptyRoots`] if there are no CPUs to distribute work to (the
454 /// union of all root cpusets is empty).
455 /// - [`ForeignRoot`] if some of the specified roots do not belong to this
456 /// topology.
457 /// - [`OverlappingRoots`] if some of the roots have overlapping CPU sets.
458 ///
459 /// [`EmptyRoots`]: DistributeError::EmptyRoots
460 /// [`ForeignRoot`]: DistributeError::ForeignRoot
461 /// [`OverlappingRoots`]: DistributeError::OverlappingRoots
462 #[allow(clippy::missing_docs_in_private_items)]
463 #[doc(alias = "hwloc_distrib")]
464 pub fn distribute_items(
465 &self,
466 roots: &[&TopologyObject],
467 num_items: usize,
468 max_depth: NormalDepth,
469 flags: DistributeFlags,
470 ) -> Result<Vec<CpuSet>, DistributeError> {
471 // Make sure all roots belong to this topology
472 for root in roots.iter().copied() {
473 if !self.contains(root) {
474 return Err(DistributeError::ForeignRoot(root.into()));
475 }
476 }
477
478 // Handle the trivial case where 0 items are distributed
479 if num_items == 0 {
480 return Ok(Vec::new());
481 }
482
483 /// Inner recursive distribution algorithm
484 fn recurse<'self_>(
485 roots_and_cpusets: impl DoubleEndedIterator<Item = ObjSetWeightDepth<'self_>> + Clone,
486 num_items: usize,
487 max_depth: NormalDepth,
488 flags: DistributeFlags,
489 result: &mut Vec<CpuSet>,
490 ) {
491 // Debug mode checks
492 #[cfg(not(tarpaulin_include))]
493 debug_assert_ne!(
494 roots_and_cpusets.clone().count(),
495 0,
496 "Can't distribute to 0 roots"
497 );
498 #[cfg(not(tarpaulin_include))]
499 debug_assert_ne!(
500 num_items, 0,
501 "Shouldn't try to distribute 0 items (just don't call this function)"
502 );
503 let initial_len = result.len();
504
505 // Total number of cpus covered by the active roots
506 let total_weight: usize = roots_and_cpusets
507 .clone()
508 .map(|(_, _, weight, _)| weight)
509 .sum();
510
511 // Subset of CPUs and items covered so far
512 let mut given_weight = 0;
513 let mut given_items = 0;
514
515 // What to do with each root
516 let process_root = |(root, cpuset, weight, depth): ObjSetWeightDepth<'_>| {
517 // Give this root a chunk of the work-items proportional to its
518 // weight, with a bias towards giving more CPUs to first roots
519 let next_given_weight = given_weight + weight;
520 let next_given_items = weight_to_items(next_given_weight, total_weight, num_items);
521 let my_items = next_given_items - given_items;
522
523 // Keep recursing until we reach the bottom of the topology,
524 // run out of items to distribute, or hit the depth limit
525 if root.normal_arity() > 0 && my_items > 1 && depth < max_depth {
526 recurse(
527 root.normal_children().filter_map(decode_normal_obj),
528 my_items,
529 max_depth,
530 flags,
531 result,
532 );
533 } else if my_items > 0 {
534 // All items attributed to this root get this root's cpuset
535 for _ in 0..my_items {
536 result.push(cpuset.clone_target());
537 }
538 } else {
539 // No item attributed to this root, merge cpuset with
540 // the previous root.
541 let mut iter = result.iter_mut().rev();
542 let last = iter.next().expect("First chunk cannot be empty");
543 for other in iter {
544 if other == last {
545 *other |= cpuset;
546 }
547 }
548 *last |= cpuset;
549 }
550
551 // Prepare to process the next root
552 given_weight = next_given_weight;
553 given_items = next_given_items;
554 };
555
556 // Process roots in the order dictated by flags
557 if flags.contains(DistributeFlags::REVERSE) {
558 roots_and_cpusets.rev().for_each(process_root);
559 } else {
560 roots_and_cpusets.for_each(process_root);
561 }
562
563 // Debug mode checks
564 #[cfg(not(tarpaulin_include))]
565 debug_assert_eq!(
566 result.len() - initial_len,
567 num_items,
568 "This function should distribute the requested number of items"
569 );
570 }
571
572 // Check roots, walk up to their first normal ancestor as necessary
573 let decoded_roots = roots.iter().copied().filter_map(|root| {
574 let mut root_then_ancestors = std::iter::once(root)
575 .chain(root.ancestors())
576 .skip_while(|candidate| !candidate.object_type().is_normal());
577 root_then_ancestors.find_map(decode_normal_obj)
578 });
579 if decoded_roots.clone().count() == 0 {
580 return Err(DistributeError::EmptyRoots);
581 }
582 if sets_overlap(decoded_roots.clone().map(|(_, root_set, _, _)| root_set)) {
583 return Err(DistributeError::OverlappingRoots);
584 }
585
586 // Run the recursion, collect results
587 let mut result = Vec::with_capacity(num_items);
588 recurse(decoded_roots, num_items, max_depth, flags, &mut result);
589 #[cfg(not(tarpaulin_include))]
590 debug_assert_eq!(
591 result.len(),
592 num_items,
593 "This function should produce one result per input item"
594 );
595 Ok(result)
596 }
597}
598//
599#[cfg(not(tarpaulin_include))]
600bitflags! {
601 /// Flags to be given to [`Topology::distribute_items()`]
602 #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
603 #[doc(alias = "hwloc_distrib_flags_e")]
604 pub struct DistributeFlags: hwloc_distrib_flags_e {
605 /// Distribute in reverse order, starting from the last objects
606 const REVERSE = HWLOC_DISTRIB_FLAG_REVERSE;
607 }
608}
609//
610crate::impl_arbitrary_for_bitflags!(DistributeFlags, hwloc_distrib_flags_e);
611//
612impl Default for DistributeFlags {
613 fn default() -> Self {
614 Self::empty()
615 }
616}
617//
618/// Error returned by [`Topology::distribute_items()`]
619#[derive(Clone, Debug, Eq, Error, Hash, PartialEq)]
620pub enum DistributeError {
621 /// Error returned when the specified roots contain no accessible CPUs
622 ///
623 /// This can happen if either an empty roots list is specified, or if the
624 /// topology was built with [`BuildFlags::INCLUDE_DISALLOWED`] and the
625 /// specified roots only contain disallowed CPUs.
626 #[error("no CPU is accessible from the distribution roots")]
627 EmptyRoots,
628
629 /// Some of the specified roots do not belong to this topology
630 #[error("distribution root {0}")]
631 ForeignRoot(#[from] ForeignObjectError),
632
633 /// Specified roots overlap ith each other
634 #[error("distribution roots overlap with each other")]
635 OverlappingRoots,
636}
637
638/// Part of the implementation of [`Topology::distribute_items()`] that tells,
639/// given a number of items to distribute, a cpuset weight, and the sum of all
640/// cpuset weights, how many items should be distributed, rounding up.
641fn weight_to_items(given_weight: usize, total_weight: usize, num_items: usize) -> usize {
642 // Weights represent CPU counts and num_items represents
643 // something that users reasonably want to count. Neither
644 // should overflow u128 even on highly speculative future
645 // hardware where usize would be larger than 128 bits
646 #[allow(clippy::missing_docs_in_private_items)]
647 const TOO_LARGE: &str = "Such large inputs are not supported yet";
648 let cast = |x: usize| u128::try_from(x).expect(TOO_LARGE);
649
650 // Numerator product can only fail if both given_weight and
651 // num_items are >=2^64, which is equally improbable.
652 let numerator = cast(given_weight)
653 .checked_mul(cast(num_items))
654 .expect(TOO_LARGE);
655 let denominator = cast(total_weight);
656 let my_items = numerator / denominator + u128::from(numerator % denominator != 0);
657
658 // Cast can only fail if given_weight > tot_weight,
659 // otherwise we expect my_items <= num_items
660 debug_assert!(
661 given_weight <= total_weight,
662 "Cannot distribute more weight than the active root's total weight"
663 );
664 my_items
665 .try_into()
666 .expect("Cannot happen if computation is correct")
667}
668
669/// Part of the implementation of [`Topology::distribute_items()`] that extracts
670/// information from a [`TopologyObject`] that is known to be normal, and
671/// returns this information if the object has a non-empty cpuset
672fn decode_normal_obj(obj: &TopologyObject) -> Option<ObjSetWeightDepth<'_>> {
673 debug_assert!(
674 obj.object_type().is_normal(),
675 "This function only works on normal objects"
676 );
677 let cpuset = obj.cpuset().expect("Normal objects should have cpusets");
678 let weight = cpuset
679 .weight()
680 .expect("Topology objects should not have infinite cpusets");
681 let depth = obj.depth().expect_normal();
682 (weight > 0).then_some((obj, cpuset, weight, depth))
683}
684
685/// Information that is extracted by [`decode_normal_obj()`]
686type ObjSetWeightDepth<'object> = (
687 &'object TopologyObject,
688 BitmapRef<'object, CpuSet>,
689 usize,
690 NormalDepth,
691);
692
693/// Truth that an iterator of cpusets contains overlapping sets
694///
695/// `sets` can yield `&'_ CpuSet` or `BitmapRef<'_, CpuSet>`.
696fn sets_overlap(mut sets: impl Iterator<Item = impl Deref<Target = CpuSet>>) -> bool {
697 sets.try_fold(CpuSet::new(), |mut acc, set| {
698 let set: &CpuSet = &set;
699 if acc.intersects(set) {
700 None
701 } else {
702 acc |= set;
703 Some(acc)
704 }
705 })
706 .is_none()
707}
708
709/// # CPU and node sets of entire topologies
710//
711// --- Implementation details ---
712//
713// Upstream docs: https://hwloc.readthedocs.io/en/stable/group__hwlocality__helper__topology__sets.html
714impl Topology {
715 /// Topology CPU set
716 ///
717 /// This is equivalent to calling [`TopologyObject::cpuset()`] on
718 /// the topology's root object.
719 ///
720 /// # Example
721 ///
722 /// ```rust
723 /// # use hwlocality::Topology;
724 /// # let topology = Topology::test_instance();
725 /// println!("Visible CPUs in this topology: {}", topology.cpuset());
726 /// # Ok::<_, eyre::Report>(())
727 /// ```
728 #[doc(alias = "hwloc_topology_get_topology_cpuset")]
729 pub fn cpuset(&self) -> BitmapRef<'_, CpuSet> {
730 // SAFETY: This is one of the hwloc functions allowed here
731 unsafe {
732 self.topology_set(
733 "hwloc_topology_get_topology_cpuset",
734 hwlocality_sys::hwloc_topology_get_topology_cpuset,
735 )
736 }
737 }
738
739 /// Complete CPU set
740 ///
741 /// This is equivalent to calling [`TopologyObject::complete_cpuset()`] on
742 /// the topology's root object.
743 ///
744 /// # Example
745 ///
746 /// ```rust
747 /// # use hwlocality::Topology;
748 /// # let topology = Topology::test_instance();
749 /// println!(
750 /// "Overall CPUs in this topology: {}",
751 /// topology.complete_cpuset()
752 /// );
753 /// # Ok::<_, eyre::Report>(())
754 /// ```
755 #[doc(alias = "hwloc_topology_get_complete_cpuset")]
756 pub fn complete_cpuset(&self) -> BitmapRef<'_, CpuSet> {
757 // SAFETY: This is one of the hwloc functions allowed here
758 unsafe {
759 self.topology_set(
760 "hwloc_topology_get_complete_cpuset",
761 hwlocality_sys::hwloc_topology_get_complete_cpuset,
762 )
763 }
764 }
765
766 /// Allowed CPU set
767 ///
768 /// If [`BuildFlags::INCLUDE_DISALLOWED`] was not set, this is identical to
769 /// [`Topology::cpuset()`]: all visible PUs are allowed.
770 ///
771 /// Otherwise, you can check whether a particular cpuset contains allowed
772 /// PUs by calling `cpuset.intersects(topology.allowed_cpuset())`, and if so
773 /// you can get the set of allowed PUs with
774 /// `cpuset & topology.allowed_cpuset()`.
775 ///
776 /// # Example
777 ///
778 /// ```rust
779 /// # use hwlocality::Topology;
780 /// # let topology = Topology::test_instance();
781 /// println!(
782 /// "Allowed CPUs in this topology: {}",
783 /// topology.allowed_cpuset()
784 /// );
785 /// # Ok::<_, eyre::Report>(())
786 /// ```
787 #[doc(alias = "hwloc_topology_get_allowed_cpuset")]
788 pub fn allowed_cpuset(&self) -> BitmapRef<'_, CpuSet> {
789 // SAFETY: This is one of the hwloc functions allowed here
790 unsafe {
791 self.topology_set(
792 "hwloc_topology_get_allowed_cpuset",
793 hwlocality_sys::hwloc_topology_get_allowed_cpuset,
794 )
795 }
796 }
797
798 /// Topology node set
799 ///
800 /// This is equivalent to calling [`TopologyObject::nodeset()`] on
801 /// the topology's root object.
802 ///
803 /// # Example
804 ///
805 /// ```rust
806 /// # use hwlocality::Topology;
807 /// # let topology = Topology::test_instance();
808 /// println!("Visible NUMA nodes in this topology: {}", topology.nodeset());
809 /// # Ok::<_, eyre::Report>(())
810 /// ```
811 #[doc(alias = "hwloc_topology_get_topology_nodeset")]
812 pub fn nodeset(&self) -> BitmapRef<'_, NodeSet> {
813 // SAFETY: This is one of the hwloc functions allowed here
814 unsafe {
815 self.topology_set(
816 "hwloc_topology_get_topology_nodeset",
817 hwlocality_sys::hwloc_topology_get_topology_nodeset,
818 )
819 }
820 }
821
822 /// Complete node set
823 ///
824 /// This is equivalent to calling [`TopologyObject::complete_nodeset()`] on
825 /// the topology's root object.
826 ///
827 /// # Example
828 ///
829 /// ```rust
830 /// # use hwlocality::Topology;
831 /// # let topology = Topology::test_instance();
832 /// println!(
833 /// "Overall NUMA nodes in this topology: {}",
834 /// topology.complete_nodeset()
835 /// );
836 /// # Ok::<_, eyre::Report>(())
837 /// ```
838 #[doc(alias = "hwloc_topology_get_complete_nodeset")]
839 pub fn complete_nodeset(&self) -> BitmapRef<'_, NodeSet> {
840 // SAFETY: This is one of the hwloc functions allowed here
841 unsafe {
842 self.topology_set(
843 "hwloc_topology_get_complete_nodeset",
844 hwlocality_sys::hwloc_topology_get_complete_nodeset,
845 )
846 }
847 }
848
849 /// Allowed node set
850 ///
851 /// If [`BuildFlags::INCLUDE_DISALLOWED`] was not set, this is identical to
852 /// [`Topology::nodeset()`]: all visible NUMA nodes are allowed.
853 ///
854 /// Otherwise, you can check whether a particular nodeset contains allowed
855 /// NUMA nodes by calling `nodeset.intersects(topology.allowed_nodeset())`,
856 /// and if so you can get the set of allowed NUMA nodes with
857 /// `nodeset & topology.allowed_nodeset()`.
858 ///
859 /// # Example
860 ///
861 /// ```rust
862 /// # use hwlocality::Topology;
863 /// # let topology = Topology::test_instance();
864 /// println!(
865 /// "Allowed NUMA nodes in this topology: {}",
866 /// topology.allowed_nodeset()
867 /// );
868 /// # Ok::<_, eyre::Report>(())
869 /// ```
870 #[doc(alias = "hwloc_topology_get_allowed_nodeset")]
871 pub fn allowed_nodeset(&self) -> BitmapRef<'_, NodeSet> {
872 // SAFETY: This is one of the hwloc functions allowed here
873 unsafe {
874 self.topology_set(
875 "hwloc_topology_get_allowed_nodeset",
876 hwlocality_sys::hwloc_topology_get_allowed_nodeset,
877 )
878 }
879 }
880
881 /// Query a topology-wide `CpuSet` or `NodeSet`
882 ///
883 /// # Safety
884 ///
885 /// `getter` must be one of the functions described in the ["CPU and node
886 /// sets of entire
887 /// topologies"](https://hwloc.readthedocs.io/en/stable/group__hwlocality__helper__topology__sets.html)
888 /// section of the hwloc documentation, which means in particular that it...
889 ///
890 /// - Cannot return NULL
891 /// - Must return a pointer attached to the topology
892 unsafe fn topology_set<'topology, Set: SpecializedBitmap>(
893 &'topology self,
894 getter_name: &'static str,
895 getter: unsafe extern "C" fn(*const hwloc_topology) -> *const hwloc_bitmap_s,
896 ) -> BitmapRef<'topology, Set> {
897 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
898 // - hwloc ops are trusted not to modify *const parameters
899 // - If this operation is successful, it should return a valid
900 // bitmap pointer
901 // - Output bitmap is indeed bound to the topology's lifetime
902 let bitmap_ref = unsafe {
903 let bitmap_ptr = errors::call_hwloc_ptr(getter_name, || getter(self.as_ptr()))
904 .expect("According to their docs, these functions cannot return NULL");
905 Bitmap::borrow_from_nonnull::<'topology>(bitmap_ptr)
906 };
907 bitmap_ref.cast()
908 }
909}
910
911// # General-purpose internal utilities
912impl Topology {
913 /// Contained hwloc topology pointer (for interaction with hwloc)
914 pub(crate) fn as_ptr(&self) -> *const hwloc_topology {
915 self.0.as_ptr()
916 }
917
918 /// Contained mutable hwloc topology pointer (for interaction with hwloc)
919 ///
920 /// Be warned that as a result of hwloc employing lazy caching techniques,
921 /// almost every interaction that requires `*mut hwloc_topology` is unsafe
922 /// unless followed by `hwloc_topology_refresh()`. This subtlety is handled
923 /// by the [`Topology::edit()`] mechanism.
924 pub(crate) fn as_mut_ptr(&mut self) -> *mut hwloc_topology {
925 self.0.as_ptr()
926 }
927
928 /// Check if a [`TopologyObject`] is part of this topology
929 ///
930 /// This check is a safety precondition to any hwloc topology method
931 /// binding that takes user-originated `&TopologyObject`s as a parameter
932 /// and passes it to an hwloc topology method.
933 ///
934 /// While this is not expected to happen often and will in fact often
935 /// require the user to jump through some serious hoops like creating
936 /// another `static` or `thread_local` topology, unfortunately there is
937 /// always a way to do it, and safe Rust code must remain safe even in the
938 /// craziest of edge cases...
939 pub(crate) fn contains(&self, object: &TopologyObject) -> bool {
940 let expected_root = self.root_object();
941 let actual_root = std::iter::once(object)
942 .chain(object.ancestors())
943 .last()
944 .expect("By definition, this iterator always has >= 1 element");
945 ptr::eq(expected_root, actual_root)
946 }
947}
948
949/// Cloning is unfortunately only available on hwloc v2.3.0 and above, because
950/// that's when hwloc introduced the option to manually refresh its internal
951/// caches, which are thread-unsafe and more generally violate Rust's aliasing
952/// rules. These caches are invalidated upon cloning.
953#[cfg(feature = "hwloc-2_3_0")]
954impl Clone for Topology {
955 #[doc(alias = "hwloc_topology_dup")]
956 fn clone(&self) -> Self {
957 // Create the clone
958 let mut clone = ptr::null_mut();
959 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
960 // - hwloc ops are trusted not to modify *const parameters
961 // - clone is an out-parameter, it can have any initial value
962 errors::call_hwloc_zero_or_minus1("hwloc_topology_dup", || unsafe {
963 hwlocality_sys::hwloc_topology_dup(&mut clone, self.as_ptr())
964 })
965 .expect("Duplicating a topology only fail with ENOMEM, which is a panic in Rust");
966 let clone = NonNull::new(clone).expect("Got null pointer from hwloc_topology_dup");
967
968 // Topology duplication puts the clone in an un-refreshed state, which
969 // does not play well with Rust's aliasing rules. Therefore we force
970 // an hwloc refresh at this point
971 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
972 // - hwloc ops are trusted to keep *mut parameters in a
973 // valid state unless stated otherwise
974 let result = errors::call_hwloc_zero_or_minus1("hwloc_topology_refresh", || unsafe {
975 hwlocality_sys::hwloc_topology_refresh(clone.as_ptr())
976 });
977
978 // Make sure the refresh went well
979 #[cfg(not(tarpaulin_include))]
980 if let Err(e) = result {
981 // If not, dispose of the clone then panic
982 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
983 // - Topology will not be usable again after Drop
984 unsafe { hwlocality_sys::hwloc_topology_destroy(clone.as_ptr()) }
985 panic!("ERROR: Failed to refresh topology clone ({e}), so it's stuck in a state that violates Rust aliasing rules...");
986 }
987 Self(clone)
988 }
989}
990
991impl Debug for Topology {
992 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
993 let mut debug = f.debug_struct("Topology");
994
995 // Topology building properties
996 debug
997 .field("is_abi_compatible", &self.is_abi_compatible())
998 .field("build_flags", &self.build_flags())
999 .field("is_this_system", &self.is_this_system())
1000 .field("feature_support", self.feature_support());
1001 let type_filters = ObjectType::iter()
1002 .map(|ty| {
1003 (
1004 format!("{ty}"),
1005 self.type_filter(ty)
1006 .expect("should always succeed when called with a valid type"),
1007 )
1008 })
1009 .collect::<BTreeMap<_, _>>();
1010 debug.field("type_filter", &type_filters);
1011
1012 // TopologyObject hierarchy
1013 let objects_per_depth = NormalDepth::iter_range(NormalDepth::MIN, self.depth())
1014 .map(Depth::from)
1015 .chain(Depth::VIRTUAL_DEPTHS.iter().copied())
1016 .filter_map(|depth| {
1017 let objs = self.objects_at_depth(depth).collect::<Vec<_>>();
1018 (!objs.is_empty()).then_some((format!("{depth}"), objs))
1019 })
1020 .collect::<Vec<_>>();
1021 debug
1022 // Contains the info from most other topology queries
1023 .field("objects_per_depth", &objects_per_depth)
1024 .field("memory_parents_depth", &self.memory_parents_depth());
1025
1026 // CPU and node sets of the entire topology
1027 debug
1028 .field("cpuset", &self.cpuset())
1029 .field("complete_cpuset", &self.complete_cpuset())
1030 .field("allowed_cpuset", &self.allowed_cpuset())
1031 .field("nodeset", &self.nodeset())
1032 .field("complete_nodeset", &self.complete_nodeset())
1033 .field("allowed_nodeset", &self.allowed_nodeset());
1034
1035 // Object distances
1036 debug.field("distances", &self.distances(None));
1037
1038 // Memory attributes
1039 #[cfg(feature = "hwloc-2_3_0")]
1040 {
1041 debug.field("memory_attributes", &self.dump_memory_attributes());
1042 }
1043
1044 // Kinds of CPU cores
1045 #[cfg(feature = "hwloc-2_4_0")]
1046 {
1047 let cpu_kinds = self.cpu_kinds().into_iter().flatten().collect::<Vec<_>>();
1048 debug.field("cpu_kinds", &cpu_kinds);
1049 }
1050 debug.finish()
1051 }
1052}
1053
1054impl Drop for Topology {
1055 #[doc(alias = "hwloc_topology_destroy")]
1056 fn drop(&mut self) {
1057 // SAFETY: - Topology is trusted to contain a valid ptr (type invariant)
1058 // - Topology will not be usable again after Drop
1059 unsafe { hwlocality_sys::hwloc_topology_destroy(self.as_mut_ptr()) }
1060 }
1061}
1062
1063impl PartialEq for Topology {
1064 fn eq(&self, other: &Self) -> bool {
1065 /// Check equality of basic topology properties
1066 macro_rules! check_all_equal {
1067 ($($property:ident),*) => {
1068 if (
1069 $(
1070 Topology::$property(self) != Topology::$property(other)
1071 )||*
1072 ) {
1073 return false;
1074 }
1075 };
1076 }
1077 check_all_equal!(
1078 is_abi_compatible,
1079 build_flags,
1080 is_this_system,
1081 feature_support,
1082 memory_parents_depth,
1083 cpuset,
1084 complete_cpuset,
1085 allowed_cpuset,
1086 nodeset,
1087 complete_nodeset,
1088 allowed_nodeset
1089 );
1090
1091 /// Retrieve all type filters to check they are the same
1092 fn type_filters(
1093 topology: &Topology,
1094 ) -> impl Iterator<Item = Result<TypeFilter, RawHwlocError>> + '_ {
1095 ObjectType::iter().map(|ty| topology.type_filter(ty))
1096 }
1097 if !type_filters(self).eq(type_filters(other)) {
1098 return false;
1099 }
1100
1101 // Check that the object hierarchy is the same
1102 if !self.has_same_object_hierarchy(other) {
1103 return false;
1104 }
1105
1106 // Check that object distances are the same
1107 let same_distances = match (self.distances(None), other.distances(None)) {
1108 (Ok(distances1), Ok(distances2)) => {
1109 distances1.len() == distances2.len()
1110 && distances1
1111 .into_iter()
1112 .zip(&distances2)
1113 .all(|(d1, d2)| d1.eq_modulo_topology(d2))
1114 }
1115 (Err(e1), Err(e2)) => e1 == e2,
1116 (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
1117 };
1118 if !same_distances {
1119 return false;
1120 }
1121
1122 // Check that memory attributes are the same
1123 #[cfg(feature = "hwloc-2_3_0")]
1124 {
1125 if !self
1126 .dump_memory_attributes()
1127 .eq_modulo_topology(&other.dump_memory_attributes())
1128 {
1129 return false;
1130 }
1131 }
1132
1133 // Check that CPU core kinds are the same
1134 #[cfg(feature = "hwloc-2_4_0")]
1135 {
1136 let same_kinds = match (self.cpu_kinds(), other.cpu_kinds()) {
1137 (Ok(kinds1), Ok(kinds2)) => kinds1.eq(kinds2),
1138 (Err(e1), Err(e2)) => e1 == e2,
1139 (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
1140 };
1141 if !same_kinds {
1142 return false;
1143 }
1144 }
1145 true
1146 }
1147}
1148
1149impl Pointer for Topology {
1150 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1151 <NonNull<hwloc_topology> as Pointer>::fmt(&self.0, f)
1152 }
1153}
1154
1155// SAFETY: No shared mutability
1156unsafe impl Send for Topology {}
1157
1158// SAFETY: No shared mutability
1159unsafe impl Sync for Topology {}
1160
1161#[allow(clippy::too_many_lines)]
1162#[cfg(test)]
1163mod tests {
1164 use super::*;
1165 use crate::ffi::PositiveInt;
1166 use bitflags::Flags;
1167 use proptest::prelude::*;
1168 #[allow(unused)]
1169 use similar_asserts::assert_eq;
1170 use static_assertions::{assert_impl_all, assert_not_impl_any};
1171 use std::{
1172 collections::BTreeSet,
1173 error::Error,
1174 fmt::{Binary, Display, LowerExp, LowerHex, Octal, UpperExp, UpperHex},
1175 hash::Hash,
1176 io::{self, Read},
1177 num::NonZeroU8,
1178 ops::{
1179 BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not, Sub, SubAssign,
1180 },
1181 panic::UnwindSafe,
1182 };
1183
1184 // Check that public types in this module keep implementing all expected
1185 // traits, in the interest of detecting future semver-breaking changes
1186 assert_impl_all!(DistributeFlags:
1187 Binary, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign,
1188 Copy, Debug, Default, Extend<DistributeFlags>, Flags,
1189 FromIterator<DistributeFlags>, Hash, IntoIterator<Item=DistributeFlags>,
1190 LowerHex, Not, Octal, Sized, Sub, SubAssign, Sync, UpperHex, Unpin,
1191 UnwindSafe
1192 );
1193 assert_not_impl_any!(DistributeFlags:
1194 Display, Drop, PartialOrd, Pointer, LowerExp, Read, UpperExp,
1195 fmt::Write, io::Write
1196 );
1197 assert_impl_all!(Topology:
1198 Debug, Drop, PartialEq, Pointer, Sized, Sync, Unpin, UnwindSafe
1199 );
1200 #[cfg(feature = "hwloc-2_3_0")]
1201 assert_impl_all!(Topology: Clone);
1202 assert_not_impl_any!(Topology:
1203 Binary, Copy, Default, Deref, Display, IntoIterator, LowerExp, LowerHex,
1204 Octal, Read, UpperExp, UpperHex, fmt::Write, io::Write
1205 );
1206 assert_impl_all!(DistributeError:
1207 Clone, Error, Hash, Sized, Sync, Unpin, UnwindSafe
1208 );
1209 assert_not_impl_any!(DistributeError:
1210 Binary, Copy, Default, Deref, Drop, IntoIterator,
1211 LowerExp, LowerHex, Octal, PartialOrd, Pointer, Read,
1212 UpperExp, UpperHex, fmt::Write, io::Write
1213 );
1214
1215 #[test]
1216 fn default_distribute_flags() {
1217 assert_eq!(DistributeFlags::default(), DistributeFlags::empty());
1218 }
1219
1220 #[cfg(feature = "hwloc-2_3_0")]
1221 #[test]
1222 fn clone() -> Result<(), TestCaseError> {
1223 use crate::topology::builder::tests::DataSource;
1224 let topology = Topology::test_instance();
1225 let clone = topology.clone();
1226 assert_ne!(format!("{:p}", *topology), format!("{clone:p}"));
1227 builder::tests::check_topology(
1228 topology,
1229 DataSource::ThisSystem,
1230 topology.build_flags(),
1231 |ty| Ok(topology.type_filter(ty).unwrap()),
1232 )?;
1233 assert!(!topology.contains(clone.root_object()));
1234 assert_eq!(topology, &clone);
1235 Ok(())
1236 }
1237
1238 #[allow(clippy::print_stdout, clippy::use_debug)]
1239 #[test]
1240 fn debug_and_self_eq() {
1241 let topology = Topology::test_instance();
1242 assert_eq!(topology, topology);
1243 println!("{topology:#?}");
1244 }
1245
1246 /// Bias the `max_depth` input to `distribute_items` tests so that
1247 /// interesting depth values below the maximum possible depth are sampled
1248 /// often enough
1249 fn max_depth() -> impl Strategy<Value = NormalDepth> {
1250 prop_oneof![
1251 4 => (0..usize::from(Topology::test_instance().depth()))
1252 .prop_map(|us| NormalDepth::try_from(us).unwrap()),
1253 1 => any::<NormalDepth>()
1254 ]
1255 }
1256
1257 proptest! {
1258 // Check that absence of roots to distribute too is reported correctly
1259 #[test]
1260 fn distribute_nowhere(num_items: NonZeroU8, max_depth in max_depth(), flags: DistributeFlags) {
1261 let num_items = usize::from(num_items.get());
1262 prop_assert_eq!(
1263 Topology::test_instance().distribute_items(&[], num_items, max_depth, flags),
1264 Err(DistributeError::EmptyRoots)
1265 );
1266 }
1267 }
1268
1269 /// Generate valid (disjoint) roots for [`Topology::distribute_items()`],
1270 /// taken from [`Topology::test_instance()`]
1271 fn disjoint_roots() -> impl Strategy<Value = Vec<&'static TopologyObject>> {
1272 /// Number of PUs below a normal object
1273 fn normal_weight(obj: &TopologyObject) -> usize {
1274 obj.cpuset()
1275 .expect("normal object should have a cpuset")
1276 .weight()
1277 .expect("normal object should have a finite cpuset")
1278 }
1279
1280 /// Pick N objects below a certain root
1281 ///
1282 /// `root` must be a normal object that has at least `num_objects` PUs
1283 /// underneath it.
1284 ///
1285 /// Objects will be ordered by logical index, the client is responsible
1286 /// for shuffling them at the very end.
1287 fn pick_disjoint_objects(
1288 root: &'static TopologyObject,
1289 num_objects: usize,
1290 ) -> impl Strategy<Value = Vec<&'static TopologyObject>> {
1291 // Validate the request
1292 assert!(
1293 root.object_type().is_normal(),
1294 "root object should be normal"
1295 );
1296 assert!(num_objects <= normal_weight(root));
1297
1298 // Honor the request
1299 match num_objects {
1300 // Picking no objects is trivial
1301 0 => Just(Vec::new()).boxed(),
1302
1303 // Picking a single object is easy too, just pick any object in
1304 // the subtree below this root
1305 1 => {
1306 let topology = Topology::test_instance();
1307 let subtree_objects = topology
1308 .normal_objects()
1309 .filter(|obj| obj.is_in_subtree(root) || ptr::eq(*obj, root))
1310 .collect::<Vec<_>>();
1311 prop::sample::select(subtree_objects)
1312 .prop_map(|obj| vec![obj])
1313 .boxed()
1314 }
1315
1316 // If we need to pick 2 or more objects, we must distribute them
1317 // across the root's children
1318 _ => {
1319 // Each child can at most yield as many objects as there are
1320 // PUs underneath it, and we must not yield more PUs than
1321 // this. This is achieved through the dirty trick of first
1322 // duplicating each child once per underlying PU...
1323 let degenerate_children = root
1324 .normal_children()
1325 .flat_map(|child| std::iter::repeat(child).take(normal_weight(child)))
1326 .collect::<Vec<_>>();
1327 debug_assert_eq!(degenerate_children.len(), normal_weight(root));
1328
1329 // Then picking a subsequence of that duplicated sequence...
1330 prop::sample::subsequence(degenerate_children, num_objects)
1331 .prop_flat_map(|selected_degenerate| {
1332 // Then deduplicating again to find out how many
1333 // objects were allocated to each child.
1334 let mut count_per_child =
1335 Vec::<(&'static TopologyObject, usize)>::new();
1336 'children: for child in selected_degenerate {
1337 if let Some((last_child, last_count)) = count_per_child.last_mut() {
1338 if ptr::eq(child, *last_child) {
1339 *last_count += 1;
1340 continue 'children;
1341 }
1342 }
1343 count_per_child.push((child, 1));
1344 }
1345
1346 // Once we have the deduplicated per-child work
1347 // allocation, we do the recursive request, which
1348 // will yield a vector of results for each child...
1349 let nested_objs = count_per_child
1350 .into_iter()
1351 .map(|(child, count)| pick_disjoint_objects(child, count))
1352 .collect::<Vec<_>>();
1353
1354 // ...and we flatten that into a single vector of
1355 // merged results
1356 nested_objs.prop_map(|nested_objs| {
1357 nested_objs.into_iter().flatten().collect::<Vec<_>>()
1358 })
1359 })
1360 .boxed()
1361 }
1362 }
1363 }
1364
1365 // Finally, we use the above logic to generate any valid number of roots
1366 let root = Topology::test_instance().root_object();
1367 (1..=normal_weight(root))
1368 .prop_flat_map(|num_objects| pick_disjoint_objects(root, num_objects))
1369 .prop_shuffle()
1370 }
1371
1372 proptest! {
1373 // Check that requests to distribute 0 items are handled correctly
1374 #[test]
1375 fn distribute_nothing(
1376 disjoint_roots in disjoint_roots(),
1377 max_depth in max_depth(),
1378 flags: DistributeFlags,
1379 ) {
1380 prop_assert_eq!(
1381 Topology::test_instance().distribute_items(&disjoint_roots, 0, max_depth, flags),
1382 Ok(Vec::new())
1383 );
1384 }
1385 }
1386
1387 /// Provide a pessimistic set of leaves amongst which
1388 /// [`Topology::distribute_items()`] could distribute work, given roots and
1389 /// a max depth and a number of items to be distributed
1390 ///
1391 /// The actual set can be more restrained in the presence of heterogeneous
1392 /// root objects.
1393 fn find_possible_leaves<'output>(
1394 roots: &[&'output TopologyObject],
1395 max_depth: NormalDepth,
1396 ) -> Vec<&'output TopologyObject> {
1397 let mut input = roots.to_vec();
1398 let mut output = Vec::new();
1399 for _ in PositiveInt::iter_range(PositiveInt::MIN, max_depth) {
1400 let mut new_leaves = false;
1401 for obj in input.drain(..) {
1402 if obj.normal_arity() > 0 && obj.depth().expect_normal() < max_depth {
1403 output.extend(obj.normal_children());
1404 new_leaves = true;
1405 } else {
1406 output.push(obj);
1407 }
1408 }
1409 std::mem::swap(&mut input, &mut output);
1410 if !new_leaves {
1411 break;
1412 }
1413 }
1414 input
1415 }
1416
1417 proptest! {
1418 // Check that distribute_items works well given correct inputs
1419 #[test]
1420 fn distribute_correct(
1421 disjoint_roots in disjoint_roots(),
1422 max_depth in max_depth(),
1423 num_items: NonZeroU8,
1424 flags: DistributeFlags,
1425 ) {
1426 // Set up test state
1427 let topology = Topology::test_instance();
1428 let num_items = usize::from(num_items.get());
1429
1430 // Check results on normal roots
1431 #[allow(clippy::cast_precision_loss)]
1432 {
1433 // Run the computation
1434 let item_sets = topology
1435 .distribute_items(&disjoint_roots, num_items, max_depth, flags)
1436 .unwrap();
1437 prop_assert_eq!(item_sets.len(), num_items);
1438
1439 // Predict among which leaves of the object tree work items could
1440 // potentially have been distributed
1441 let possible_leaf_objects = find_possible_leaves(&disjoint_roots, max_depth);
1442 let mut possible_leaf_sets = possible_leaf_objects
1443 .iter()
1444 .map(|obj| obj.cpuset().unwrap().clone_target())
1445 .collect::<BTreeSet<CpuSet>>();
1446
1447 // Count how many work items were distributed to each leaf
1448 let mut items_per_set = Vec::<(CpuSet, usize)>::new();
1449 for item_set in item_sets {
1450 match items_per_set.last_mut() {
1451 Some((last_set, count)) if last_set == &item_set => *count += 1,
1452 _ => items_per_set.push((item_set, 1)),
1453 }
1454 }
1455
1456 // Make sure work items were indeed only distributed to these leaves
1457 for item_set in items_per_set.iter().map(|(set, _count)| set) {
1458 // ...but bear in mind that the algorithm may merge adjacent
1459 // leaf cpusets if there are fewer items than leaf cpusets.
1460 if !possible_leaf_sets.contains(item_set) {
1461 // Compute the expected effect of merging
1462 let subsets = possible_leaf_objects
1463 .iter()
1464 .map(|obj| obj.cpuset().unwrap())
1465 .skip_while(|&subset| !item_set.includes(subset))
1466 .take_while(|&subset| item_set.includes(subset))
1467 .map(|subset| subset.clone_target())
1468 .collect::<Vec<_>>();
1469 let merged_set = subsets.iter().fold(CpuSet::new(), |mut acc, subset| {
1470 acc |= subset;
1471 acc
1472 });
1473
1474 // Check if this indeed what happened
1475 prop_assert_eq!(
1476 item_set,
1477 &merged_set,
1478 "Distributed set {} is not part of expected leaf sets {:?}",
1479 item_set,
1480 possible_leaf_sets
1481 );
1482 prop_assert_eq!(
1483 items_per_set
1484 .iter()
1485 .filter(|(item_set, _count)| item_set.intersects(&merged_set))
1486 .count(),
1487 1,
1488 "Merging should only occurs when 1 item is shared by N leaves"
1489 );
1490
1491 // Take the merging of leaves into account
1492 for subset in subsets {
1493 prop_assert!(possible_leaf_sets.remove(&subset));
1494 }
1495 prop_assert!(possible_leaf_sets.insert(merged_set));
1496 }
1497 }
1498
1499 // Make sure the leaves that were actually used are disjoint (i.e.
1500 // no distributing to a parent AND its children)
1501 prop_assert!(!sets_overlap(items_per_set.iter().map(|(set, _count)| set)));
1502
1503 // Check that the distribution is as close to ideal as possible,
1504 // given that work items cannot be split in halves
1505 let total_weight = items_per_set
1506 .iter()
1507 .map(|(leaf_set, _count)| leaf_set.weight().unwrap())
1508 .sum::<usize>();
1509 for (leaf_set, items_per_leaf) in &items_per_set {
1510 let cpu_share = leaf_set.weight().unwrap() as f64 / total_weight as f64;
1511 let ideal_share = num_items as f64 * cpu_share;
1512 prop_assert!((*items_per_leaf as f64 - ideal_share).abs() <= 1.0);
1513 }
1514
1515 // Check that the distribution is biased towards earlier or later
1516 // leaves depending on distribution flags, due to roots being
1517 // enumerated in reverse order
1518 let (first_set, items_per_leaf) = items_per_set.first().unwrap();
1519 let first_set_intersects = |root: Option<&TopologyObject>| {
1520 prop_assert!(first_set.intersects(root.unwrap().cpuset().unwrap()));
1521 Ok(())
1522 };
1523 if flags.contains(DistributeFlags::REVERSE) {
1524 first_set_intersects(disjoint_roots.last().copied())?;
1525 } else {
1526 first_set_intersects(disjoint_roots.first().copied())?;
1527 }
1528 let cpu_share = first_set.weight().unwrap() as f64 / total_weight as f64;
1529 let ideal_share = num_items as f64 * cpu_share;
1530 let bias = *items_per_leaf as f64 - ideal_share;
1531 prop_assert!(
1532 bias >= 0.0,
1533 "Earlier roots should get favored for item allocation"
1534 );
1535 }
1536 }
1537 }
1538
1539 /// To the random input provided by `disjoint_roots`, add an extra root that
1540 /// overlaps with the existing ones
1541 fn overlapping_roots() -> impl Strategy<Value = Vec<&'static TopologyObject>> {
1542 disjoint_roots().prop_flat_map(|disjoint_roots| {
1543 // Randomly pick a CPU in the set covered by the disjoint roots, and
1544 // find the smallest hwloc object (normally a PU) that covers it.
1545 // This will be used to check distribute_items's handling of
1546 // duplicate roots.
1547 let topology = Topology::test_instance();
1548 let full_set = disjoint_roots.iter().fold(CpuSet::new(), |mut acc, root| {
1549 acc |= root.cpuset().unwrap();
1550 acc
1551 });
1552 let overlapping_pu = (0..full_set.weight().unwrap()).prop_map(move |cpu_idx| {
1553 topology
1554 .smallest_object_covering_cpuset(&CpuSet::from(
1555 full_set.iter_set().nth(cpu_idx).unwrap(),
1556 ))
1557 .unwrap()
1558 });
1559
1560 // Empirically, this can pick a PU without a cpuset on weird systems
1561 // like GitHub's CI nodes . Address this by going up ancestors until
1562 // a cpuset is found.
1563 let overlapping_root = overlapping_pu.prop_map(|pu| {
1564 std::iter::once(pu)
1565 .chain(pu.ancestors())
1566 .find(|root| root.cpuset().is_some())
1567 .unwrap()
1568 });
1569
1570 // Insert this overlapping object at a random position
1571 let num_roots = disjoint_roots.len();
1572 (Just(disjoint_roots), overlapping_root, 0..num_roots).prop_map(
1573 |(mut disjoint_roots, overlapping_root, bad_root_idx)| {
1574 disjoint_roots.insert(bad_root_idx, overlapping_root);
1575 disjoint_roots
1576 },
1577 )
1578 })
1579 }
1580
1581 proptest! {
1582 #[test]
1583 fn distribute_overlapping(
1584 overlapping_roots in overlapping_roots(),
1585 max_depth in max_depth(),
1586 num_items: NonZeroU8,
1587 flags: DistributeFlags,
1588 ) {
1589 let num_items = usize::from(num_items.get());
1590 prop_assert_eq!(
1591 Topology::test_instance().distribute_items(&overlapping_roots, num_items, max_depth, flags),
1592 Err(DistributeError::OverlappingRoots)
1593 );
1594 }
1595 }
1596
1597 /// Pick a random normal object from the foreign test instance
1598 #[cfg(feature = "hwloc-2_3_0")]
1599 fn foreign_normal_object() -> impl Strategy<Value = &'static TopologyObject> {
1600 static FOREIGNERS: OnceLock<Box<[&'static TopologyObject]>> = OnceLock::new();
1601 let objects =
1602 FOREIGNERS.get_or_init(|| Topology::foreign_instance().normal_objects().collect());
1603 prop::sample::select(&objects[..])
1604 }
1605
1606 /// To the random input provided by `disjoint_roots`, add an extra root from
1607 /// a different topology, and report at which index it was added
1608 #[cfg(feature = "hwloc-2_3_0")]
1609 fn foreign_roots_and_idx() -> impl Strategy<Value = (Vec<&'static TopologyObject>, usize)> {
1610 // Pick a set of disjoint roots and a foreign object
1611 (disjoint_roots(), foreign_normal_object()).prop_flat_map(
1612 |(disjoint_roots, foreign_object)| {
1613 // Insert the foreign object at a random position, report where
1614 let num_roots = disjoint_roots.len();
1615 (Just((disjoint_roots, foreign_object)), 0..num_roots).prop_map(
1616 |((mut disjoint_roots, foreign_object), bad_root_idx)| {
1617 disjoint_roots.insert(bad_root_idx, foreign_object);
1618 (disjoint_roots, bad_root_idx)
1619 },
1620 )
1621 },
1622 )
1623 }
1624
1625 #[cfg(feature = "hwloc-2_3_0")]
1626 proptest! {
1627 #[test]
1628 fn distribute_foreign(
1629 (foreign_roots, foreign_idx) in foreign_roots_and_idx(),
1630 max_depth in max_depth(),
1631 num_items: NonZeroU8,
1632 flags: DistributeFlags,
1633 ) {
1634 let num_items = usize::from(num_items.get());
1635 let foreign_object = foreign_roots[foreign_idx];
1636 prop_assert_eq!(
1637 Topology::test_instance().distribute_items(&foreign_roots, num_items, max_depth, flags),
1638 Err(DistributeError::ForeignRoot(foreign_object.into()))
1639 );
1640 }
1641 }
1642}