#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "topo.h"
#include "string.h"
#include "sys.h"
#include <ucs/datastruct/khash.h>
#include <ucs/type/spinlock.h>
#include <ucs/debug/assert.h>
#include <ucs/debug/log.h>
#include <ucs/time/time.h>
#include <inttypes.h>
#include <float.h>
#define UCS_TOPO_MAX_SYS_DEVICES 256
#define UCS_TOPO_SYSFS_PCI_PREFIX "/sys/bus/pci/devices/"
#define UCS_TOPO_SYSFS_DEVICES_ROOT "/sys/devices"
#define UCS_TOPO_DEVICE_NAME_UNKNOWN "<unknown>"
#define UCS_TOPO_DEVICE_NAME_INVALID "<invalid>"
typedef int64_t ucs_bus_id_bit_rep_t;
typedef struct {
ucs_sys_bus_id_t bus_id;
char *name;
} ucs_topo_sys_device_info_t;
KHASH_MAP_INIT_INT64(bus_to_sys_dev, ucs_sys_device_t);
typedef struct ucs_topo_global_ctx {
ucs_spinlock_t lock;
khash_t(bus_to_sys_dev) bus_to_sys_dev_hash;
ucs_topo_sys_device_info_t devices[UCS_TOPO_MAX_SYS_DEVICES];
unsigned num_devices;
} ucs_topo_global_ctx_t;
const ucs_sys_dev_distance_t ucs_topo_default_distance = {
.latency = 0,
.bandwidth = DBL_MAX
};
static ucs_topo_global_ctx_t ucs_topo_global_ctx;
static ucs_bus_id_bit_rep_t
ucs_topo_get_bus_id_bit_repr(const ucs_sys_bus_id_t *bus_id)
{
return (((uint64_t)bus_id->domain << 24) |
((uint64_t)bus_id->bus << 16) |
((uint64_t)bus_id->slot << 8) |
(bus_id->function));
}
void ucs_topo_init()
{
ucs_spinlock_init(&ucs_topo_global_ctx.lock, 0);
kh_init_inplace(bus_to_sys_dev, &ucs_topo_global_ctx.bus_to_sys_dev_hash);
ucs_topo_global_ctx.num_devices = 0;
}
void ucs_topo_cleanup()
{
ucs_topo_sys_device_info_t *device;
while (ucs_topo_global_ctx.num_devices-- > 0) {
device = &ucs_topo_global_ctx.devices[ucs_topo_global_ctx.num_devices];
ucs_free(device->name);
}
kh_destroy_inplace(bus_to_sys_dev,
&ucs_topo_global_ctx.bus_to_sys_dev_hash);
ucs_spinlock_destroy(&ucs_topo_global_ctx.lock);
}
unsigned ucs_topo_num_devices()
{
unsigned num_devices;
ucs_spin_lock(&ucs_topo_global_ctx.lock);
num_devices = ucs_topo_global_ctx.num_devices;
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
return num_devices;
}
static void ucs_topo_bus_id_str(const ucs_sys_bus_id_t *bus_id, int abbreviate,
char *str, size_t max)
{
if (abbreviate && (bus_id->domain == 0)) {
ucs_snprintf_safe(str, max, "%02x:%02x.%d", bus_id->bus, bus_id->slot,
bus_id->function);
} else {
ucs_snprintf_safe(str, max, "%04x:%02x:%02x.%d", bus_id->domain,
bus_id->bus, bus_id->slot, bus_id->function);
}
}
ucs_status_t ucs_topo_find_device_by_bus_id(const ucs_sys_bus_id_t *bus_id,
ucs_sys_device_t *sys_dev)
{
ucs_bus_id_bit_rep_t bus_id_bit_rep;
ucs_kh_put_t kh_put_status;
khiter_t hash_it;
char *name;
bus_id_bit_rep = ucs_topo_get_bus_id_bit_repr(bus_id);
ucs_spin_lock(&ucs_topo_global_ctx.lock);
hash_it = kh_put(
bus_to_sys_dev ,
&ucs_topo_global_ctx.bus_to_sys_dev_hash ,
bus_id_bit_rep , &kh_put_status);
if (kh_put_status == UCS_KH_PUT_KEY_PRESENT) {
*sys_dev = kh_value(&ucs_topo_global_ctx.bus_to_sys_dev_hash, hash_it);
ucs_debug("bus id 0x%"PRIx64" exists. sys_dev = %u", bus_id_bit_rep,
*sys_dev);
} else if ((kh_put_status == UCS_KH_PUT_BUCKET_EMPTY) ||
(kh_put_status == UCS_KH_PUT_BUCKET_CLEAR)) {
ucs_assert_always(ucs_topo_global_ctx.num_devices <
UCS_TOPO_MAX_SYS_DEVICES);
*sys_dev = ucs_topo_global_ctx.num_devices;
++ucs_topo_global_ctx.num_devices;
ucs_debug("bus id 0x%"PRIx64" doesn't exist. sys_dev = %u",
bus_id_bit_rep, *sys_dev);
kh_value(&ucs_topo_global_ctx.bus_to_sys_dev_hash, hash_it) = *sys_dev;
name = ucs_malloc(UCS_SYS_BDF_NAME_MAX, "sys_dev_bdf_name");
if (name != NULL) {
ucs_topo_bus_id_str(bus_id, 1, name, UCS_SYS_BDF_NAME_MAX);
}
ucs_topo_global_ctx.devices[*sys_dev].bus_id = *bus_id;
ucs_topo_global_ctx.devices[*sys_dev].name = name;
}
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
return UCS_OK;
}
ucs_status_t ucs_topo_get_device_bus_id(ucs_sys_device_t sys_dev,
ucs_sys_bus_id_t *bus_id)
{
if (sys_dev >= ucs_topo_global_ctx.num_devices) {
return UCS_ERR_NO_ELEM;
}
*bus_id = ucs_topo_global_ctx.devices[sys_dev].bus_id;
return UCS_OK;
}
static ucs_status_t
ucs_topo_get_sysfs_path(ucs_sys_device_t sys_dev, char *path, size_t max)
{
const size_t prefix_length = strlen(UCS_TOPO_SYSFS_PCI_PREFIX);
char link_path[PATH_MAX];
ucs_status_t status;
if (max < PATH_MAX) {
status = UCS_ERR_BUFFER_TOO_SMALL;
goto out;
}
ucs_spin_lock(&ucs_topo_global_ctx.lock);
if (sys_dev >= ucs_topo_global_ctx.num_devices) {
ucs_error("system device %d is invalid (max: %d)", sys_dev,
ucs_topo_global_ctx.num_devices);
status = UCS_ERR_INVALID_PARAM;
goto out_unlock;
}
ucs_strncpy_safe(link_path, UCS_TOPO_SYSFS_PCI_PREFIX, PATH_MAX);
ucs_topo_bus_id_str(&ucs_topo_global_ctx.devices[sys_dev].bus_id, 0,
link_path + prefix_length, PATH_MAX - prefix_length);
if (realpath(link_path, path) == NULL) {
status = UCS_ERR_IO_ERROR;
goto out_unlock;
}
status = UCS_OK;
out_unlock:
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
out:
return status;
}
static int ucs_topo_is_sys_root(const char *path)
{
return !strcmp(path, UCS_TOPO_SYSFS_DEVICES_ROOT);
}
static int ucs_topo_is_pci_root(const char *path)
{
int count;
sscanf(path, UCS_TOPO_SYSFS_DEVICES_ROOT "/pci%*d:%*d%n", &count);
return count == strlen(path);
}
static void ucs_topo_sys_root_distance(ucs_sys_dev_distance_t *distance)
{
distance->latency = 500e-9;
switch (ucs_arch_get_cpu_model()) {
case UCS_CPU_MODEL_AMD_ROME:
distance->bandwidth = 5100 * UCS_MBYTE;
break;
default:
distance->bandwidth = 220 * UCS_MBYTE;
break;
}
}
static void ucs_topo_pci_root_distance(const char *path1, const char *path2,
ucs_sys_dev_distance_t *distance)
{
size_t path_distance = ucs_path_calc_distance(path1, path2);
ucs_trace_data("distance between '%s' and '%s' is %zu", path1, path2,
path_distance);
ucs_assertv(path_distance > 0, "path1=%s path2=%s", path1, path2);
distance->latency = 300e-9;
distance->bandwidth = ucs_min(3500.0 * UCS_MBYTE,
(19200.0 * UCS_MBYTE) / path_distance);
}
ucs_status_t ucs_topo_get_distance(ucs_sys_device_t device1,
ucs_sys_device_t device2,
ucs_sys_dev_distance_t *distance)
{
char path1[PATH_MAX], path2[PATH_MAX], common_path[PATH_MAX];
ucs_status_t status;
if ((device1 == UCS_SYS_DEVICE_ID_UNKNOWN) ||
(device2 == UCS_SYS_DEVICE_ID_UNKNOWN) || (device1 == device2)) {
*distance = ucs_topo_default_distance;
return UCS_OK;
}
status = ucs_topo_get_sysfs_path(device1, path1, sizeof(path1));
if (status != UCS_OK) {
return status;
}
status = ucs_topo_get_sysfs_path(device2, path2, sizeof(path2));
if (status != UCS_OK) {
return status;
}
ucs_path_get_common_parent(path1, path2, common_path);
if (ucs_topo_is_sys_root(common_path)) {
ucs_topo_sys_root_distance(distance);
} else if (ucs_topo_is_pci_root(common_path)) {
ucs_topo_pci_root_distance(path1, path2, distance);
} else {
*distance = ucs_topo_default_distance;
}
return UCS_OK;
}
const char *ucs_topo_distance_str(const ucs_sys_dev_distance_t *distance,
char *buffer, size_t max)
{
UCS_STRING_BUFFER_FIXED(strb, buffer, max);
ucs_string_buffer_appendf(&strb, "%.0fns ",
distance->latency * UCS_NSEC_PER_SEC);
if (distance->bandwidth <= UCS_PBYTE) {
ucs_string_buffer_appendf(&strb, "%.2fMB/s",
distance->bandwidth / UCS_MBYTE);
} else {
ucs_string_buffer_appendf(&strb, ">1PB/s");
}
return ucs_string_buffer_cstr(&strb);
}
const char *
ucs_topo_sys_device_bdf_name(ucs_sys_device_t sys_dev, char *buffer, size_t max)
{
if (sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) {
ucs_strncpy_safe(buffer, UCS_TOPO_DEVICE_NAME_UNKNOWN, max);
} else {
ucs_spin_lock(&ucs_topo_global_ctx.lock);
if (sys_dev < ucs_topo_global_ctx.num_devices) {
ucs_topo_bus_id_str(&ucs_topo_global_ctx.devices[sys_dev].bus_id, 0,
buffer, max);
} else {
ucs_strncpy_safe(buffer, UCS_TOPO_DEVICE_NAME_INVALID, max);
}
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
}
return buffer;
}
ucs_status_t
ucs_topo_find_device_by_bdf_name(const char *name, ucs_sys_device_t *sys_dev)
{
ucs_sys_bus_id_t bus_id;
int num_fields;
num_fields = sscanf(name, "%hx:%hhx:%hhx.%hhx", &bus_id.domain, &bus_id.bus,
&bus_id.slot, &bus_id.function);
if (num_fields == 4) {
return ucs_topo_find_device_by_bus_id(&bus_id, sys_dev);
}
bus_id.domain = 0;
num_fields = sscanf(name, "%hhx:%hhx.%hhx", &bus_id.bus, &bus_id.slot,
&bus_id.function);
if (num_fields == 3) {
return ucs_topo_find_device_by_bus_id(&bus_id, sys_dev);
}
return UCS_ERR_INVALID_PARAM;
}
ucs_status_t
ucs_topo_sys_device_set_name(ucs_sys_device_t sys_dev, const char *name)
{
ucs_spin_lock(&ucs_topo_global_ctx.lock);
if (sys_dev >= ucs_topo_global_ctx.num_devices) {
ucs_error("system device %d is invalid (max: %d)", sys_dev,
ucs_topo_global_ctx.num_devices);
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
return UCS_ERR_INVALID_PARAM;
}
ucs_free(ucs_topo_global_ctx.devices[sys_dev].name);
ucs_topo_global_ctx.devices[sys_dev].name = ucs_strdup(name,
"sys_dev_name");
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
return UCS_OK;
}
const char *ucs_topo_sys_device_get_name(ucs_sys_device_t sys_dev)
{
const char *name;
if (sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) {
name = UCS_TOPO_DEVICE_NAME_UNKNOWN;
} else {
ucs_spin_lock(&ucs_topo_global_ctx.lock);
if (sys_dev < ucs_topo_global_ctx.num_devices) {
name = ucs_topo_global_ctx.devices[sys_dev].name;
} else {
name = UCS_TOPO_DEVICE_NAME_INVALID;
}
ucs_spin_unlock(&ucs_topo_global_ctx.lock);
}
return name;
}
void ucs_topo_print_info(FILE *stream)
{
char bdf_name[UCS_SYS_BDF_NAME_MAX];
volatile ucs_sys_device_t sys_dev;
for (sys_dev = 0; sys_dev < ucs_topo_global_ctx.num_devices; ++sys_dev) {
fprintf(stream, " %d %*s %s\n", sys_dev, UCS_SYS_BDF_NAME_MAX,
ucs_topo_sys_device_bdf_name(sys_dev, bdf_name,
sizeof(bdf_name)),
ucs_topo_global_ctx.devices[sys_dev].name);
}
}