#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "wireup.h"
#include "wireup_cm.h"
#include "address.h"
#include <ucs/algorithm/qsort_r.h>
#include <ucs/datastruct/queue.h>
#include <ucs/sys/sock.h>
#include <ucp/core/ucp_ep.inl>
#include <string.h>
#include <inttypes.h>
#define UCP_WIREUP_RMA_BW_TEST_MSG_SIZE 262144
#define UCP_WIREUP_UCT_EVENT_CAP_FLAGS (UCT_IFACE_FLAG_EVENT_SEND_COMP | \
UCT_IFACE_FLAG_EVENT_RECV)
#define UCP_WIREUP_CHECK_AMO_FLAGS(_ae, _criteria, _context, _addr_index, _op, _size) \
if (!ucs_test_all_flags((_ae)->iface_attr.atomic.atomic##_size._op##_flags, \
(_criteria)->remote_atomic_flags.atomic##_size._op##_flags)) { \
char desc[256]; \
ucs_trace("addr[%d] %s: no %s", (_addr_index), \
ucp_find_tl_name_by_csum((_context), (_ae)->tl_name_csum), \
ucp_wireup_get_missing_amo_flag_desc_##_op( \
(_ae)->iface_attr.atomic.atomic##_size._op##_flags, \
(_criteria)->remote_atomic_flags.atomic##_size._op##_flags, \
(_size), desc, sizeof(desc))); \
continue; \
}
typedef struct ucp_wireup_atomic_flag {
const char *name;
const char *fetch;
} ucp_wireup_atomic_flag_t;
typedef struct {
ucp_rsc_index_t rsc_index;
unsigned addr_index;
unsigned path_index;
ucp_md_index_t dst_md_index;
ucs_sys_device_t dst_sys_dev;
ucp_lane_type_mask_t lane_types;
size_t seg_size;
double score[UCP_LANE_TYPE_LAST];
} ucp_wireup_lane_desc_t;
typedef struct {
ucp_wireup_criteria_t criteria;
uint64_t local_dev_bitmap;
uint64_t remote_dev_bitmap;
ucp_md_map_t md_map;
ucp_lane_type_t lane_type;
unsigned max_lanes;
} ucp_wireup_select_bw_info_t;
typedef struct {
ucp_ep_h ep;
unsigned ep_init_flags;
ucp_tl_bitmap_t tl_bitmap;
const ucp_unpacked_address_t *address;
int allow_am;
int show_error;
} ucp_wireup_select_params_t;
typedef struct {
ucp_wireup_lane_desc_t lane_descs[UCP_MAX_LANES];
ucp_lane_index_t num_lanes;
unsigned ucp_ep_init_flags;
} ucp_wireup_select_context_t;
static const char *ucp_wireup_md_flags[] = {
[ucs_ilog2(UCT_MD_FLAG_ALLOC)] = "memory allocation",
[ucs_ilog2(UCT_MD_FLAG_REG)] = "memory registration",
[ucs_ilog2(UCT_MD_FLAG_INVALIDATE)] = "memory invalidation",
[ucs_ilog2(UCT_MD_FLAG_RKEY_PTR)] = "obtain remote memory pointer"
};
static const char *ucp_wireup_iface_flags[] = {
[ucs_ilog2(UCT_IFACE_FLAG_AM_SHORT)] = "am short",
[ucs_ilog2(UCT_IFACE_FLAG_AM_BCOPY)] = "am bcopy",
[ucs_ilog2(UCT_IFACE_FLAG_AM_ZCOPY)] = "am zcopy",
[ucs_ilog2(UCT_IFACE_FLAG_PUT_SHORT)] = "put short",
[ucs_ilog2(UCT_IFACE_FLAG_PUT_BCOPY)] = "put bcopy",
[ucs_ilog2(UCT_IFACE_FLAG_PUT_ZCOPY)] = "put zcopy",
[ucs_ilog2(UCT_IFACE_FLAG_GET_SHORT)] = "get short",
[ucs_ilog2(UCT_IFACE_FLAG_GET_BCOPY)] = "get bcopy",
[ucs_ilog2(UCT_IFACE_FLAG_GET_ZCOPY)] = "get zcopy",
[ucs_ilog2(UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE)] = "peer failure handler",
[ucs_ilog2(UCT_IFACE_FLAG_CONNECT_TO_IFACE)] = "connect to iface",
[ucs_ilog2(UCT_IFACE_FLAG_CONNECT_TO_EP)] = "connect to ep",
[ucs_ilog2(UCT_IFACE_FLAG_AM_DUP)] = "full reliability",
[ucs_ilog2(UCT_IFACE_FLAG_CB_SYNC)] = "sync callback",
[ucs_ilog2(UCT_IFACE_FLAG_CB_ASYNC)] = "async callback",
[ucs_ilog2(UCT_IFACE_FLAG_PENDING)] = "pending",
[ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_SHORT)] = "tag eager short",
[ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_BCOPY)] = "tag eager bcopy",
[ucs_ilog2(UCT_IFACE_FLAG_TAG_EAGER_ZCOPY)] = "tag eager zcopy",
[ucs_ilog2(UCT_IFACE_FLAG_TAG_RNDV_ZCOPY)] = "tag rndv zcopy",
[ucs_ilog2(UCT_IFACE_FLAG_EP_CHECK)] = "ep check",
[ucs_ilog2(UCT_IFACE_FLAG_EP_KEEPALIVE)] = "ep keepalive"
};
static const char *ucp_wireup_event_flags[] = {
[ucs_ilog2(UCT_IFACE_FLAG_EVENT_SEND_COMP)] = "send completion event",
[ucs_ilog2(UCT_IFACE_FLAG_EVENT_RECV)] = "tag or active message event",
[ucs_ilog2(UCT_IFACE_FLAG_EVENT_RECV_SIG)] = "signaled message event"
};
static const char *ucp_wireup_peer_flags[] = {
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_CONNECT_TO_IFACE)] = "connect to iface",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_AM_SYNC)] = "am sync callback",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_CB_ASYNC)] = "async callback",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_PUT)] = "put",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_GET)] = "get",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_TAG_EAGER)] = "tag_eager",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_TAG_RNDV)] = "tag_rndv",
[ucs_ilog2(UCP_ADDR_IFACE_FLAG_EVENT_RECV)] = "tag_am_recv_event"
};
static ucp_wireup_atomic_flag_t ucp_wireup_atomic_desc[] = {
[UCT_ATOMIC_OP_ADD] = {.name = "add", .fetch = "fetch-"},
[UCT_ATOMIC_OP_AND] = {.name = "and", .fetch = "fetch-"},
[UCT_ATOMIC_OP_OR] = {.name = "or", .fetch = "fetch-"},
[UCT_ATOMIC_OP_XOR] = {.name = "xor", .fetch = "fetch-"},
[UCT_ATOMIC_OP_SWAP] = {.name = "swap", .fetch = ""},
[UCT_ATOMIC_OP_CSWAP] = {.name = "cswap", .fetch = ""}
};
static double ucp_wireup_aux_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr);
static const char *
ucp_wireup_get_missing_flag_desc(uint64_t flags, uint64_t required_flags,
const char ** flag_descs)
{
ucs_assert((required_flags & (~flags)) != 0);
return flag_descs[ucs_ffs64(required_flags & (~flags))];
}
static const char *
ucp_wireup_get_missing_amo_flag_desc(uint64_t flags, uint64_t required_flags,
int op_size, int fetch, char *buf, size_t len)
{
int idx;
ucs_assert((required_flags & (~flags)) != 0);
idx = ucs_ffs64(required_flags & (~flags));
snprintf(buf, len, "%d-bit atomic %s%s", op_size,
fetch ? ucp_wireup_atomic_desc[idx].fetch : "",
ucp_wireup_atomic_desc[idx].name);
return buf;
}
static const char *
ucp_wireup_get_missing_amo_flag_desc_op(uint64_t flags, uint64_t required_flags,
int op_size, char *buf, size_t len)
{
return ucp_wireup_get_missing_amo_flag_desc(flags, required_flags, op_size, 0, buf, len);
}
static const char *
ucp_wireup_get_missing_amo_flag_desc_fop(uint64_t flags, uint64_t required_flags,
int op_size, char *buf, size_t len)
{
return ucp_wireup_get_missing_amo_flag_desc(flags, required_flags, op_size, 1, buf, len);
}
static int ucp_wireup_check_flags(const uct_tl_resource_desc_t *resource,
uint64_t flags, uint64_t required_flags,
const char *title, const char ** flag_descs,
char *reason, size_t max)
{
const char *missing_flag_desc;
if (ucs_test_all_flags(flags, required_flags)) {
return 1;
}
if (required_flags) {
missing_flag_desc = ucp_wireup_get_missing_flag_desc(flags, required_flags,
flag_descs);
ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : not suitable for %s, no %s",
UCT_TL_RESOURCE_DESC_ARG(resource), title,
missing_flag_desc);
snprintf(reason, max, UCT_TL_RESOURCE_DESC_FMT" - no %s",
UCT_TL_RESOURCE_DESC_ARG(resource), missing_flag_desc);
}
return 0;
}
static int ucp_wireup_check_amo_flags(const uct_tl_resource_desc_t *resource,
uint64_t flags, uint64_t required_flags,
int op_size, int fetch,
const char *title, char *reason,
size_t max)
{
char missing_flag_desc[256];
if (ucs_test_all_flags(flags, required_flags)) {
return 1;
}
if (required_flags) {
ucp_wireup_get_missing_amo_flag_desc(flags, required_flags,
op_size, fetch, missing_flag_desc,
sizeof(missing_flag_desc));
ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : not suitable for %s, no %s",
UCT_TL_RESOURCE_DESC_ARG(resource), title,
missing_flag_desc);
snprintf(reason, max, UCT_TL_RESOURCE_DESC_FMT" - no %s",
UCT_TL_RESOURCE_DESC_ARG(resource), missing_flag_desc);
}
return 0;
}
static int
ucp_wireup_check_keepalive(const ucp_wireup_select_params_t *select_params,
const uct_tl_resource_desc_t *resource,
uint64_t flags, uint64_t required_flags,
const char *title, const char **flag_descs,
char *reason, size_t max)
{
ucp_worker_h worker = select_params->ep->worker;
char title_keepalive[128];
char title_ep_check[128];
ucs_snprintf_safe(title_keepalive, sizeof(title_keepalive),
"%s with keepalive", title);
ucs_snprintf_safe(title_ep_check, sizeof(title_ep_check),
"%s with ep_check", title);
return
!ucp_worker_keepalive_is_enabled(worker) ||
!(select_params->ep_init_flags &
UCP_EP_INIT_ERR_MODE_PEER_FAILURE) ||
ucp_wireup_check_flags(resource, flags, UCT_IFACE_FLAG_EP_KEEPALIVE,
title_keepalive, ucp_wireup_iface_flags,
reason, max) ||
ucp_wireup_check_flags(resource, flags, UCT_IFACE_FLAG_EP_CHECK,
title_ep_check, ucp_wireup_iface_flags,
reason, max);
}
static void
ucp_wireup_init_select_info(double score, unsigned addr_index,
ucp_rsc_index_t rsc_index,
uint8_t priority,
ucp_wireup_select_info_t *select_info)
{
ucs_assert((score >= 0.0) || (rsc_index == UCP_NULL_RESOURCE));
select_info->score = score;
select_info->addr_index = addr_index;
select_info->path_index = 0;
select_info->rsc_index = rsc_index;
select_info->priority = priority;
}
static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
const ucp_wireup_select_context_t *select_ctx,
const ucp_wireup_select_params_t *select_params,
const ucp_wireup_criteria_t *criteria, ucp_tl_bitmap_t tl_bitmap,
uint64_t remote_md_map, uint64_t local_dev_bitmap,
uint64_t remote_dev_bitmap, int show_error,
ucp_wireup_select_info_t *select_info)
{
const ucp_unpacked_address_t *address = select_params->address;
ucp_ep_h ep = select_params->ep;
ucp_worker_h worker = ep->worker;
ucp_context_h context = worker->context;
ucp_wireup_select_info_t sinfo = {0};
int found = 0;
uint64_t local_iface_flags = criteria->local_iface_flags;
int has_cm =
ucp_ep_init_flags_has_cm(select_params->ep_init_flags);
uint64_t local_md_flags;
ucp_tl_addr_bitmap_t addr_index_map, rsc_addr_index_map;
const ucp_wireup_lane_desc_t *lane_desc;
unsigned addr_index;
uct_tl_resource_desc_t *resource;
const ucp_address_entry_t *ae;
ucp_rsc_index_t rsc_index;
ucp_lane_index_t lane;
char tls_info[256];
char *p, *endp;
uct_iface_attr_t *iface_attr;
uct_md_attr_t *md_attr;
int is_reachable;
double score;
uint8_t priority;
p = tls_info;
endp = tls_info + sizeof(tls_info) - 1;
tls_info[0] = '\0';
UCS_BITMAP_AND_INPLACE(&tl_bitmap, select_params->tl_bitmap);
UCS_BITMAP_AND_INPLACE(&tl_bitmap, context->tl_bitmap);
show_error = (select_params->show_error && show_error);
UCS_BITMAP_CLEAR(&addr_index_map);
ucp_unpacked_address_for_each(ae, address) {
addr_index = ucp_unpacked_address_index(address, ae);
if (!(remote_dev_bitmap & UCS_BIT(ae->dev_index))) {
ucs_trace("addr[%d]: not in use, because on device[%d]",
addr_index, ae->dev_index);
continue;
} else if ((ae->md_index != UCP_NULL_RESOURCE) &&
!(remote_md_map & UCS_BIT(ae->md_index))) {
ucs_trace("addr[%d]: not in use, because on md[%d]", addr_index,
ae->md_index);
continue;
}
ucs_assert(ucs_test_all_flags(UCP_ADDRESS_IFACE_EVENT_FLAGS,
criteria->remote_event_flags));
if (!ucs_test_all_flags(ae->iface_attr.flags, criteria->remote_iface_flags)) {
ucs_trace("addr[%d] %s: no %s", addr_index,
ucp_find_tl_name_by_csum(context, ae->tl_name_csum),
ucp_wireup_get_missing_flag_desc(ae->iface_attr.flags,
criteria->remote_iface_flags,
ucp_wireup_peer_flags));
continue;
}
if (!ucs_test_all_flags(ae->iface_attr.flags, criteria->remote_event_flags)) {
ucs_trace("addr[%d] %s: no %s", addr_index,
ucp_find_tl_name_by_csum(context, ae->tl_name_csum),
ucp_wireup_get_missing_flag_desc(ae->iface_attr.flags,
criteria->remote_event_flags,
ucp_wireup_peer_flags));
continue;
}
UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, op, 32);
UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, op, 64);
UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, fop, 32);
UCP_WIREUP_CHECK_AMO_FLAGS(ae, criteria, context, addr_index, fop, 64);
UCS_BITMAP_SET(addr_index_map, addr_index);
}
if (UCS_BITMAP_IS_ZERO_INPLACE(&addr_index_map)) {
snprintf(p, endp - p, "%s ", ucs_status_string(UCS_ERR_UNSUPPORTED));
p += strlen(p);
goto out;
}
UCS_BITMAP_FOR_EACH_BIT(tl_bitmap, rsc_index) {
local_md_flags = criteria->local_md_flags;
resource = &context->tl_rscs[rsc_index].tl_rsc;
iface_attr = ucp_worker_iface_get_attr(worker, rsc_index);
md_attr =
&context->tl_mds[context->tl_rscs[rsc_index].md_index].attr;
if ((context->tl_rscs[rsc_index].flags & UCP_TL_RSC_FLAG_AUX) &&
!(criteria->tl_rsc_flags & UCP_TL_RSC_FLAG_AUX)) {
continue;
}
if (select_params->ep_init_flags & UCP_EP_INIT_CONNECT_TO_IFACE_ONLY) {
local_iface_flags |= UCT_IFACE_FLAG_CONNECT_TO_IFACE;
} else if (ucp_wireup_connect_p2p(worker, rsc_index, has_cm)) {
local_md_flags &= ~UCT_MD_FLAG_INVALIDATE;
}
if (!ucp_wireup_check_flags(resource, md_attr->cap.flags,
local_md_flags, criteria->title,
ucp_wireup_md_flags, p, endp - p) ||
!ucp_wireup_check_flags(resource, iface_attr->cap.flags,
local_iface_flags, criteria->title,
ucp_wireup_iface_flags, p, endp - p) ||
!ucp_wireup_check_keepalive(select_params, resource,
iface_attr->cap.flags,
criteria->local_iface_flags, criteria->title,
ucp_wireup_iface_flags, p, endp - p) ||
!ucp_wireup_check_flags(resource, iface_attr->cap.event_flags,
criteria->local_event_flags, criteria->title,
ucp_wireup_event_flags, p, endp - p) ||
!ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic32.op_flags,
criteria->local_atomic_flags.atomic32.op_flags,
32, 0, criteria->title, p, endp - p) ||
!ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic64.op_flags,
criteria->local_atomic_flags.atomic64.op_flags,
64, 0, criteria->title, p, endp - p) ||
!ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic32.fop_flags,
criteria->local_atomic_flags.atomic32.fop_flags,
32, 1, criteria->title, p, endp - p) ||
!ucp_wireup_check_amo_flags(resource, iface_attr->cap.atomic64.fop_flags,
criteria->local_atomic_flags.atomic64.fop_flags,
64, 1, criteria->title, p, endp - p))
{
p += strlen(p);
snprintf(p, endp - p, ", ");
p += strlen(p);
continue;
}
if (!UCS_BITMAP_GET(tl_bitmap, rsc_index)) {
ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : disabled by tl_bitmap",
UCT_TL_RESOURCE_DESC_ARG(resource));
snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - disabled for %s, ",
UCT_TL_RESOURCE_DESC_ARG(resource), criteria->title);
p += strlen(p);
continue;
} else if (!(local_dev_bitmap &
UCS_BIT(context->tl_rscs[rsc_index].dev_index))) {
ucs_trace(UCT_TL_RESOURCE_DESC_FMT " : disabled by device bitmap",
UCT_TL_RESOURCE_DESC_ARG(resource));
snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - disabled for %s, ",
UCT_TL_RESOURCE_DESC_ARG(resource), criteria->title);
p += strlen(p);
continue;
}
if (select_ctx->num_lanes < UCP_MAX_LANES) {
rsc_addr_index_map = addr_index_map;
} else {
UCS_BITMAP_CLEAR(&rsc_addr_index_map);
for (lane = 0; lane < select_ctx->num_lanes; ++lane) {
lane_desc = &select_ctx->lane_descs[lane];
if (lane_desc->rsc_index == rsc_index) {
UCS_BITMAP_SET(rsc_addr_index_map, lane_desc->addr_index);
}
}
UCS_BITMAP_AND_INPLACE(&rsc_addr_index_map, addr_index_map);
}
is_reachable = 0;
UCS_BITMAP_FOR_EACH_BIT(rsc_addr_index_map, addr_index) {
ae = &address->address_list[addr_index];
if (!ucp_wireup_is_reachable(ep, select_params->ep_init_flags,
rsc_index, ae)) {
continue;
}
score = criteria->calc_score(context, md_attr, iface_attr,
&ae->iface_attr);
priority = iface_attr->priority + ae->iface_attr.priority;
ucs_trace(UCT_TL_RESOURCE_DESC_FMT "->addr[%u] : %s score %.2f priority %d",
UCT_TL_RESOURCE_DESC_ARG(resource),
addr_index, criteria->title, score, priority);
is_reachable = 1;
if (!found || (ucp_score_prio_cmp(score, priority, sinfo.score,
sinfo.priority) > 0)) {
ucp_wireup_init_select_info(score, addr_index, rsc_index,
priority, &sinfo);
found = 1;
}
}
if (!is_reachable) {
ucs_trace(UCT_TL_RESOURCE_DESC_FMT" : unreachable ",
UCT_TL_RESOURCE_DESC_ARG(resource));
snprintf(p, endp - p, UCT_TL_RESOURCE_DESC_FMT" - %s, ",
UCT_TL_RESOURCE_DESC_ARG(resource),
ucs_status_string(UCS_ERR_UNREACHABLE));
p += strlen(p);
}
}
out:
if (p >= tls_info + 2) {
*(p - 2) = '\0';
}
if (!found) {
if (show_error) {
ucs_error("no %s transport to %s: %s", criteria->title,
address->name, tls_info);
}
return UCS_ERR_UNREACHABLE;
}
ucs_trace("ep %p: selected for %s: " UCT_TL_RESOURCE_DESC_FMT " md[%d]"
" -> '%s' address[%d],md[%d],rsc[%u] score %.2f",
ep, criteria->title,
UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[sinfo.rsc_index].tl_rsc),
context->tl_rscs[sinfo.rsc_index].md_index, ucp_ep_peer_name(ep),
sinfo.addr_index, address->address_list[sinfo.addr_index].md_index,
address->address_list[sinfo.addr_index].iface_attr.dst_rsc_index,
sinfo.score);
*select_info = sinfo;
return UCS_OK;
}
static inline double ucp_wireup_tl_iface_latency(ucp_context_h context,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
if (remote_iface_attr->addr_version == UCP_OBJECT_VERSION_V1) {
return ucs_max(iface_attr->latency.c, remote_iface_attr->lat_ovh) +
(iface_attr->latency.m * context->config.est_num_eps);
} else {
return ucs_max(remote_iface_attr->lat_ovh,
ucp_tl_iface_latency(context, &iface_attr->latency));
}
}
static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_lane_desc(
const ucp_wireup_select_info_t *select_info,
ucp_md_index_t dst_md_index, ucs_sys_device_t dst_sys_dev,
ucp_lane_type_t lane_type, unsigned seg_size,
ucp_wireup_select_context_t *select_ctx)
{
ucp_wireup_lane_desc_t *lane_desc;
ucp_lane_type_t lane_type_iter;
ucp_lane_index_t lane;
for (lane_desc = select_ctx->lane_descs;
lane_desc < select_ctx->lane_descs + select_ctx->num_lanes; ++lane_desc) {
if ((lane_desc->rsc_index == select_info->rsc_index) &&
(lane_desc->addr_index == select_info->addr_index) &&
(lane_desc->path_index == select_info->path_index))
{
lane = lane_desc - select_ctx->lane_descs;
ucs_assertv_always(dst_md_index == lane_desc->dst_md_index,
"lane[%d].dst_md_index=%d, dst_md_index=%d",
lane, lane_desc->dst_md_index, dst_md_index);
ucs_assertv_always(!(lane_desc->lane_types & UCS_BIT(lane_type)),
"lane[%d]=0x%x |= 0x%x", lane, lane_desc->lane_types,
lane_type);
goto out_update_score;
}
}
if (select_ctx->num_lanes >= UCP_MAX_LANES) {
ucs_error("cannot add %s lane - reached limit (%d)",
ucp_lane_type_info[lane_type].short_name,
select_ctx->num_lanes);
return UCS_ERR_EXCEEDS_LIMIT;
}
lane_desc = &select_ctx->lane_descs[select_ctx->num_lanes];
++select_ctx->num_lanes;
lane_desc->rsc_index = select_info->rsc_index;
lane_desc->addr_index = select_info->addr_index;
lane_desc->path_index = select_info->path_index;
lane_desc->dst_md_index = dst_md_index;
lane_desc->dst_sys_dev = dst_sys_dev;
lane_desc->lane_types = UCS_BIT(lane_type);
lane_desc->seg_size = seg_size;
for (lane_type_iter = UCP_LANE_TYPE_FIRST;
lane_type_iter < UCP_LANE_TYPE_LAST;
++lane_type_iter) {
lane_desc->score[lane_type_iter] = 0.0;
}
out_update_score:
lane_desc->score[lane_type] = select_info->score;
lane_desc->lane_types |= UCS_BIT(lane_type);
return UCS_OK;
}
static UCS_F_NOINLINE ucs_status_t
ucp_wireup_add_lane(const ucp_wireup_select_params_t *select_params,
const ucp_wireup_select_info_t *select_info,
ucp_lane_type_t lane_type,
ucp_wireup_select_context_t *select_ctx)
{
ucp_address_entry_t *addr_list = select_params->address->address_list;
unsigned addr_index = select_info->addr_index;
return ucp_wireup_add_lane_desc(select_info, addr_list[addr_index].md_index,
addr_list[addr_index].sys_dev, lane_type,
addr_list[addr_index].iface_attr.seg_size,
select_ctx);
}
static int ucp_wireup_compare_score(const void *elem1, const void *elem2,
void *arg, ucp_lane_type_t lane_type)
{
const ucp_lane_index_t *lane1 = elem1;
const ucp_lane_index_t *lane2 = elem2;
const ucp_wireup_lane_desc_t *lanes = arg;
double score1, score2;
score1 = (*lane1 == UCP_NULL_LANE) ? 0.0 : lanes[*lane1].score[lane_type];
score2 = (*lane2 == UCP_NULL_LANE) ? 0.0 : lanes[*lane2].score[lane_type];
return (score1 < score2) ? 1 : ((score1 > score2) ? -1 : 0);
}
static int ucp_wireup_compare_lane_am_bw_score(const void *elem1, const void *elem2,
void *arg)
{
return ucp_wireup_compare_score(elem1, elem2, arg, UCP_LANE_TYPE_AM_BW);
}
static int ucp_wireup_compare_lane_rma_score(const void *elem1, const void *elem2,
void *arg)
{
return ucp_wireup_compare_score(elem1, elem2, arg, UCP_LANE_TYPE_RMA);
}
static int ucp_wireup_compare_lane_rma_bw_score(const void *elem1, const void *elem2,
void *arg)
{
return ucp_wireup_compare_score(elem1, elem2, arg, UCP_LANE_TYPE_RMA_BW);
}
static int ucp_wireup_compare_lane_amo_score(const void *elem1, const void *elem2,
void *arg)
{
return ucp_wireup_compare_score(elem1, elem2, arg, UCP_LANE_TYPE_AMO);
}
static void ucp_wireup_unset_tl_by_md(const ucp_wireup_select_params_t *sparams,
const ucp_wireup_select_info_t *sinfo,
ucp_tl_bitmap_t *tl_bitmap,
uint64_t *remote_md_map)
{
ucp_context_h context = sparams->ep->worker->context;
const ucp_address_entry_t *ae = &sparams->address->
address_list[sinfo->addr_index];
ucp_md_index_t md_index = context->tl_rscs[sinfo->rsc_index].md_index;
ucp_md_index_t dst_md_index = ae->md_index;
ucp_rsc_index_t i;
*remote_md_map &= ~UCS_BIT(dst_md_index);
UCS_BITMAP_FOR_EACH_BIT(context->tl_bitmap, i) {
if (context->tl_rscs[i].md_index == md_index) {
UCS_BITMAP_UNSET(*tl_bitmap, i);
}
}
}
static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(
const ucp_wireup_select_params_t *select_params,
const ucp_wireup_criteria_t *criteria, ucp_tl_bitmap_t tl_bitmap,
ucp_lane_type_t lane_type, ucp_wireup_select_context_t *select_ctx)
{
ucp_wireup_criteria_t mem_criteria = *criteria;
ucp_wireup_select_info_t select_info = {0};
int show_error = !select_params->allow_am;
double reg_score = 0;
uint64_t remote_md_map;
ucs_status_t status;
char title[64];
remote_md_map = UINT64_MAX;
snprintf(title, sizeof(title), criteria->title, "registered");
mem_criteria.title = title;
mem_criteria.local_md_flags = UCT_MD_FLAG_REG | criteria->local_md_flags;
status = ucp_wireup_select_transport(select_ctx, select_params,
&mem_criteria, tl_bitmap,
remote_md_map, UINT64_MAX, UINT64_MAX,
show_error, &select_info);
if (status == UCS_OK) {
status = ucp_wireup_add_lane(select_params, &select_info, lane_type,
select_ctx);
if (status == UCS_OK) {
ucp_wireup_unset_tl_by_md(select_params, &select_info, &tl_bitmap,
&remote_md_map);
reg_score = select_info.score;
}
}
if (status != UCS_OK) {
if (!select_params->allow_am) {
return status;
}
select_ctx->ucp_ep_init_flags |= UCP_EP_INIT_CREATE_AM_LANE;
}
snprintf(title, sizeof(title), criteria->title, "allocated");
mem_criteria.title = title;
mem_criteria.local_md_flags = UCT_MD_FLAG_ALLOC | criteria->local_md_flags;
for (;;) {
status = ucp_wireup_select_transport(select_ctx, select_params,
&mem_criteria, tl_bitmap,
remote_md_map, UINT64_MAX,
UINT64_MAX, 0, &select_info);
if ((status != UCS_OK) ||
(ucp_score_cmp(select_info.score, reg_score) <= 0)) {
break;
}
status = ucp_wireup_add_lane(select_params, &select_info, lane_type,
select_ctx);
if (status != UCS_OK) {
break;
}
ucp_wireup_unset_tl_by_md(select_params, &select_info, &tl_bitmap,
&remote_md_map);
}
return UCS_OK;
}
static uint64_t ucp_ep_get_context_features(const ucp_ep_h ep)
{
return ep->worker->context->config.features;
}
static double ucp_wireup_rma_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead +
(4096.0 / ucs_min(ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth),
remote_iface_attr->bandwidth)));
}
static void ucp_wireup_fill_peer_err_criteria(ucp_wireup_criteria_t *criteria,
unsigned ep_init_flags)
{
if (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE) {
criteria->local_iface_flags |= UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE;
}
}
static void ucp_wireup_fill_aux_criteria(ucp_wireup_criteria_t *criteria,
unsigned ep_init_flags)
{
criteria->title = "auxiliary";
criteria->local_md_flags = 0;
criteria->local_iface_flags = UCT_IFACE_FLAG_AM_BCOPY |
UCT_IFACE_FLAG_PENDING;
criteria->remote_iface_flags = UCP_ADDR_IFACE_FLAG_AM_SYNC;
if (!ucp_ep_init_flags_has_cm(ep_init_flags)) {
criteria->local_iface_flags |= UCT_IFACE_FLAG_CONNECT_TO_IFACE;
criteria->remote_iface_flags |= UCP_ADDR_IFACE_FLAG_CONNECT_TO_IFACE |
UCP_ADDR_IFACE_FLAG_CB_ASYNC;
}
criteria->local_event_flags = 0;
criteria->remote_event_flags = 0;
criteria->calc_score = ucp_wireup_aux_score_func;
criteria->tl_rsc_flags = UCP_TL_RSC_FLAG_AUX;
ucp_wireup_fill_peer_err_criteria(criteria, ep_init_flags);
}
static void ucp_wireup_clean_amo_criteria(ucp_wireup_criteria_t *criteria)
{
memset(&criteria->remote_atomic_flags, 0,
sizeof(criteria->remote_atomic_flags));
memset(&criteria->local_atomic_flags, 0,
sizeof(criteria->local_atomic_flags));
}
static int ucp_wireup_allow_am_emulation_layer(unsigned ep_init_flags)
{
return !(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE);
}
static unsigned
ucp_wireup_ep_init_flags(const ucp_wireup_select_params_t *select_params,
const ucp_wireup_select_context_t *select_ctx)
{
return select_params->ep_init_flags | select_ctx->ucp_ep_init_flags;
}
static ucs_status_t
ucp_wireup_add_cm_lane(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx)
{
ucp_wireup_select_info_t select_info;
if (!ucp_ep_init_flags_has_cm(select_params->ep_init_flags)) {
return UCS_OK;
}
ucp_wireup_init_select_info(0., UINT_MAX, UCP_NULL_RESOURCE, 0,
&select_info);
return ucp_wireup_add_lane_desc(&select_info, UCP_NULL_RESOURCE,
UCS_SYS_DEVICE_ID_UNKNOWN, UCP_LANE_TYPE_CM,
UINT_MAX, select_ctx);
}
static ucs_status_t
ucp_wireup_add_rma_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx)
{
ucp_wireup_criteria_t criteria = {0};
unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params,
select_ctx);
if ((!(ucp_ep_get_context_features(select_params->ep) & UCP_FEATURE_RMA) &&
!(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE)) ||
(ep_init_flags & UCP_EP_INIT_CREATE_AM_LANE_ONLY)) {
return UCS_OK;
}
if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) {
criteria.title = "copy across memory types";
criteria.local_iface_flags = UCT_IFACE_FLAG_PUT_SHORT;
criteria.remote_iface_flags = UCP_ADDR_IFACE_FLAG_PUT;
} else {
criteria.title = "remote %s memory access";
criteria.remote_iface_flags = UCP_ADDR_IFACE_FLAG_PUT |
UCP_ADDR_IFACE_FLAG_GET;
criteria.local_iface_flags = UCT_IFACE_FLAG_PUT_SHORT |
UCT_IFACE_FLAG_PUT_BCOPY |
UCT_IFACE_FLAG_GET_BCOPY |
UCT_IFACE_FLAG_PENDING;
}
criteria.remote_event_flags = 0;
criteria.local_event_flags = 0;
criteria.calc_score = ucp_wireup_rma_score_func;
criteria.tl_rsc_flags = 0;
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags);
return ucp_wireup_add_memaccess_lanes(select_params, &criteria,
ucp_tl_bitmap_max, UCP_LANE_TYPE_RMA,
select_ctx);
}
double ucp_wireup_amo_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead);
}
static ucs_status_t
ucp_wireup_add_amo_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx)
{
ucp_worker_h worker = select_params->ep->worker;
ucp_context_h context = worker->context;
ucp_wireup_criteria_t criteria = {0};
unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params,
select_ctx);
ucp_rsc_index_t rsc_index;
ucp_tl_bitmap_t tl_bitmap;
if (!ucs_test_flags(context->config.features, UCP_FEATURE_AMO32,
UCP_FEATURE_AMO64) ||
(ep_init_flags & (UCP_EP_INIT_FLAG_MEM_TYPE |
UCP_EP_INIT_CREATE_AM_LANE_ONLY))) {
return UCS_OK;
}
ucp_context_uct_atomic_iface_flags(context, &criteria.remote_atomic_flags);
criteria.title = "atomic operations on %s memory";
criteria.local_iface_flags = UCT_IFACE_FLAG_PENDING;
criteria.remote_iface_flags = 0;
criteria.local_event_flags = 0;
criteria.remote_event_flags = 0;
criteria.local_atomic_flags = criteria.remote_atomic_flags;
criteria.calc_score = ucp_wireup_amo_score_func;
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags);
tl_bitmap = worker->atomic_tls;
UCS_BITMAP_FOR_EACH_BIT(context->tl_bitmap, rsc_index) {
if (ucp_worker_is_tl_2iface(worker, rsc_index)) {
UCS_BITMAP_SET(tl_bitmap, rsc_index);
}
}
return ucp_wireup_add_memaccess_lanes(select_params, &criteria, tl_bitmap,
UCP_LANE_TYPE_AMO, select_ctx);
}
static double ucp_wireup_am_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + remote_iface_attr->overhead);
}
static double ucp_wireup_rma_bw_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return 1 / ((UCP_WIREUP_RMA_BW_TEST_MSG_SIZE /
ucs_min(ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth),
remote_iface_attr->bandwidth)) +
ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead +
ucs_linear_func_apply(md_attr->reg_cost,
UCP_WIREUP_RMA_BW_TEST_MSG_SIZE));
}
static inline int
ucp_wireup_is_am_required(const ucp_wireup_select_params_t *select_params,
const ucp_wireup_select_context_t *select_ctx)
{
ucp_ep_h ep = select_params->ep;
ucp_context_h context = ep->worker->context;
unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params,
select_ctx);
ucp_lane_index_t lane;
if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) {
return 0;
}
if (ep_init_flags & UCP_EP_INIT_CREATE_AM_LANE) {
return 1;
}
if (ucp_ep_get_context_features(ep) & (UCP_FEATURE_TAG |
UCP_FEATURE_STREAM |
UCP_FEATURE_AM)) {
return 1;
}
if (context->config.ext.proto_enable &&
(context->num_mem_type_detect_mds > 0) &&
(ucp_ep_get_context_features(ep) & UCP_FEATURE_RMA)) {
return 1;
}
for (lane = 0; lane < select_ctx->num_lanes; ++lane) {
if (!ucp_worker_is_tl_2iface(ep->worker,
select_ctx->lane_descs[lane].rsc_index)) {
return 1;
}
}
return 0;
}
static ucs_status_t
ucp_wireup_add_am_lane(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_info_t *am_info,
ucp_wireup_select_context_t *select_ctx)
{
ucp_worker_h worker = select_params->ep->worker;
ucp_tl_bitmap_t tl_bitmap = select_params->tl_bitmap;
ucp_wireup_criteria_t criteria = {0};
const uct_iface_attr_t *iface_attr;
ucs_status_t status;
if (!ucp_wireup_is_am_required(select_params, select_ctx)) {
memset(am_info, 0, sizeof(*am_info));
return UCS_OK;
}
for (;;) {
criteria.title = "active messages";
criteria.remote_iface_flags = UCP_ADDR_IFACE_FLAG_AM_SYNC;
criteria.local_iface_flags = UCT_IFACE_FLAG_AM_BCOPY;
criteria.remote_event_flags = 0;
criteria.local_event_flags = 0;
criteria.calc_score = ucp_wireup_am_score_func;
ucp_wireup_fill_peer_err_criteria(&criteria,
ucp_wireup_ep_init_flags(select_params,
select_ctx));
if (ucs_test_all_flags(ucp_ep_get_context_features(select_params->ep),
UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) {
criteria.local_event_flags = UCP_WIREUP_UCT_EVENT_CAP_FLAGS;
}
status = ucp_wireup_select_transport(select_ctx, select_params,
&criteria, tl_bitmap, UINT64_MAX,
UINT64_MAX, UINT64_MAX, 1,
am_info);
if (status != UCS_OK) {
return status;
}
iface_attr = ucp_worker_iface_get_attr(worker, am_info->rsc_index);
if (iface_attr->cap.am.max_bcopy < UCP_MIN_BCOPY) {
ucs_debug("ep %p: rsc_index[%d] am.max_bcopy is too small: %zu, "
"expected: >= %d", select_params->ep, am_info->rsc_index,
iface_attr->cap.am.max_bcopy, UCP_MIN_BCOPY);
UCS_BITMAP_UNSET(tl_bitmap, am_info->rsc_index);
continue;
}
return ucp_wireup_add_lane(select_params, am_info, UCP_LANE_TYPE_AM,
select_ctx);
}
}
static double ucp_wireup_am_bw_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
double size = iface_attr->cap.am.max_bcopy;
double t = (size / ucs_min(ucp_tl_iface_bandwidth(context, &iface_attr->bandwidth),
remote_iface_attr->bandwidth)) +
iface_attr->overhead + remote_iface_attr->overhead +
ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr);
return size / t * 1e-5;
}
static unsigned
ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params,
const ucp_wireup_select_bw_info_t *bw_info,
ucp_tl_bitmap_t tl_bitmap, ucp_lane_index_t excl_lane,
ucp_wireup_select_context_t *select_ctx)
{
ucp_ep_h ep = select_params->ep;
ucp_context_h context = ep->worker->context;
ucp_wireup_select_info_t sinfo = {0};
unsigned local_dev_count[UCP_MAX_RESOURCES] = {0};
unsigned remote_dev_count[UCP_MAX_RESOURCES] = {0};
const uct_iface_attr_t *iface_attr;
const ucp_address_entry_t *ae;
ucs_status_t status;
unsigned num_lanes;
uint64_t local_dev_bitmap;
uint64_t remote_dev_bitmap;
ucp_rsc_index_t dev_index;
ucp_md_map_t md_map;
ucp_rsc_index_t rsc_index;
unsigned addr_index;
num_lanes = 0;
md_map = bw_info->md_map;
local_dev_bitmap = bw_info->local_dev_bitmap;
remote_dev_bitmap = bw_info->remote_dev_bitmap;
while ((num_lanes < bw_info->max_lanes) &&
(ucs_popcount(md_map) < UCP_MAX_OP_MDS)) {
if (excl_lane == UCP_NULL_LANE) {
status = ucp_wireup_select_transport(select_ctx, select_params,
&bw_info->criteria, tl_bitmap,
UINT64_MAX, local_dev_bitmap,
remote_dev_bitmap, 0, &sinfo);
if (status != UCS_OK) {
break;
}
rsc_index = sinfo.rsc_index;
addr_index = sinfo.addr_index;
dev_index = context->tl_rscs[rsc_index].dev_index;
sinfo.path_index = local_dev_count[dev_index];
status = ucp_wireup_add_lane(select_params, &sinfo,
bw_info->lane_type, select_ctx);
if (status != UCS_OK) {
break;
}
num_lanes++;
} else {
addr_index = select_ctx->lane_descs[excl_lane].addr_index;
rsc_index = select_ctx->lane_descs[excl_lane].rsc_index;
dev_index = context->tl_rscs[rsc_index].dev_index;
excl_lane = UCP_NULL_LANE;
}
iface_attr = ucp_worker_iface_get_attr(ep->worker, rsc_index);
++local_dev_count[dev_index];
if (local_dev_count[dev_index] >= iface_attr->dev_num_paths) {
local_dev_bitmap &= ~UCS_BIT(dev_index);
}
ae = &select_params->address->address_list[addr_index];
++remote_dev_count[ae->dev_index];
if (remote_dev_count[ae->dev_index] >= ae->dev_num_paths) {
remote_dev_bitmap &= ~UCS_BIT(ae->dev_index);
}
md_map |= UCS_BIT(context->tl_rscs[rsc_index].md_index);
}
return num_lanes;
}
static ucs_status_t
ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx)
{
ucp_ep_h ep = select_params->ep;
ucp_context_h context = ep->worker->context;
unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params,
select_ctx);
ucp_lane_index_t lane_desc_idx, am_lane;
ucp_wireup_select_bw_info_t bw_info;
unsigned num_am_bw_lanes;
if (!(ucp_ep_get_context_features(ep) &
(UCP_FEATURE_TAG | UCP_FEATURE_AM)) ||
(ep_init_flags & (UCP_EP_INIT_FLAG_MEM_TYPE |
UCP_EP_INIT_CREATE_AM_LANE_ONLY)) ||
(context->config.ext.max_eager_lanes < 2)) {
return UCS_OK;
}
bw_info.criteria.title = "high-bw active messages";
bw_info.criteria.local_md_flags = 0;
bw_info.criteria.remote_iface_flags = UCP_ADDR_IFACE_FLAG_AM_SYNC;
bw_info.criteria.local_iface_flags = UCT_IFACE_FLAG_AM_BCOPY;
bw_info.criteria.remote_event_flags = 0;
bw_info.criteria.local_event_flags = 0;
bw_info.criteria.calc_score = ucp_wireup_am_bw_score_func;
bw_info.criteria.tl_rsc_flags = 0;
ucp_wireup_clean_amo_criteria(&bw_info.criteria);
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags);
if (ucs_test_all_flags(ucp_ep_get_context_features(ep),
UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) {
bw_info.criteria.local_event_flags = UCP_WIREUP_UCT_EVENT_CAP_FLAGS;
}
bw_info.local_dev_bitmap = UINT64_MAX;
bw_info.remote_dev_bitmap = UINT64_MAX;
bw_info.md_map = 0;
bw_info.max_lanes = context->config.ext.max_eager_lanes - 1;
bw_info.lane_type = UCP_LANE_TYPE_AM_BW;
am_lane = UCP_NULL_LANE;
for (lane_desc_idx = 0; lane_desc_idx < select_ctx->num_lanes; ++lane_desc_idx) {
if (select_ctx->lane_descs[lane_desc_idx].lane_types &
UCS_BIT(UCP_LANE_TYPE_AM)) {
am_lane = lane_desc_idx;
break;
}
}
num_am_bw_lanes = ucp_wireup_add_bw_lanes(select_params, &bw_info,
ucp_tl_bitmap_max, am_lane,
select_ctx);
return ((am_lane != UCP_NULL_LANE) || (num_am_bw_lanes > 0)) ? UCS_OK :
UCS_ERR_UNREACHABLE;
}
static uint64_t ucp_wireup_get_rma_bw_iface_flags(ucp_rndv_mode_t rndv_mode)
{
switch (rndv_mode) {
case UCP_RNDV_MODE_AUTO:
return (UCT_IFACE_FLAG_GET_ZCOPY | UCT_IFACE_FLAG_PUT_ZCOPY);
case UCP_RNDV_MODE_GET_ZCOPY:
return UCT_IFACE_FLAG_GET_ZCOPY;
case UCP_RNDV_MODE_PUT_ZCOPY:
return UCT_IFACE_FLAG_PUT_ZCOPY;
default:
return 0;
}
}
static uint64_t ucp_wireup_get_rndv_peer_flags(ucp_rndv_mode_t rndv_mode)
{
switch (rndv_mode) {
case UCP_RNDV_MODE_AUTO:
return (UCP_ADDR_IFACE_FLAG_GET | UCP_ADDR_IFACE_FLAG_PUT);
case UCP_RNDV_MODE_GET_ZCOPY:
return UCP_ADDR_IFACE_FLAG_GET;
case UCP_RNDV_MODE_PUT_ZCOPY:
return UCP_ADDR_IFACE_FLAG_PUT;
default:
return 0;
}
}
static ucs_status_t
ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx)
{
ucp_ep_h ep = select_params->ep;
ucp_context_h context = ep->worker->context;
unsigned ep_init_flags = ucp_wireup_ep_init_flags(select_params,
select_ctx);
uint64_t iface_rma_flags = 0;
uint64_t peer_rma_flags = 0;
const ucp_rndv_mode_t rndv_modes[] = {
context->config.ext.rndv_mode,
UCP_RNDV_MODE_GET_ZCOPY,
UCP_RNDV_MODE_PUT_ZCOPY
};
ucp_wireup_select_bw_info_t bw_info;
ucs_memory_type_t mem_type;
size_t added_lanes;
uint64_t md_reg_flag;
ucp_tl_bitmap_t tl_bitmap;
uint8_t i;
if (ep_init_flags & UCP_EP_INIT_CREATE_AM_LANE_ONLY) {
return UCS_OK;
}
if (ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) {
md_reg_flag = 0;
} else if (ucp_ep_get_context_features(ep) &
(UCP_FEATURE_TAG | UCP_FEATURE_AM)) {
md_reg_flag = UCT_MD_FLAG_REG;
} else {
return UCS_OK;
}
bw_info.criteria.remote_iface_flags = 0;
bw_info.criteria.local_iface_flags = UCT_IFACE_FLAG_PENDING;
bw_info.criteria.remote_event_flags = 0;
bw_info.criteria.local_event_flags = 0;
bw_info.criteria.calc_score = ucp_wireup_rma_bw_score_func;
bw_info.criteria.tl_rsc_flags = 0;
bw_info.criteria.local_md_flags = md_reg_flag;
ucp_wireup_clean_amo_criteria(&bw_info.criteria);
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags);
if (ucs_test_all_flags(ucp_ep_get_context_features(ep),
UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) {
bw_info.criteria.local_event_flags = UCP_WIREUP_UCT_EVENT_CAP_FLAGS;
}
bw_info.local_dev_bitmap = UINT64_MAX;
bw_info.remote_dev_bitmap = UINT64_MAX;
bw_info.md_map = 0;
if (!(ep_init_flags & UCP_EP_INIT_FLAG_MEM_TYPE) &&
(context->config.ext.rndv_mode == UCP_RNDV_MODE_AUTO)) {
bw_info.lane_type = UCP_LANE_TYPE_RKEY_PTR;
bw_info.criteria.title = "obtain remote memory pointer";
bw_info.criteria.local_md_flags |= UCT_MD_FLAG_RKEY_PTR;
bw_info.max_lanes = 1;
ucp_wireup_add_bw_lanes(select_params, &bw_info,
context->mem_type_access_tls[UCS_MEMORY_TYPE_HOST],
UCP_NULL_LANE, select_ctx);
}
bw_info.lane_type = UCP_LANE_TYPE_RMA_BW;
bw_info.criteria.title = "high-bw remote memory access";
bw_info.max_lanes = context->config.ext.max_rndv_lanes;
bw_info.criteria.local_md_flags = md_reg_flag;
if (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE) {
bw_info.criteria.local_md_flags |= UCT_MD_FLAG_INVALIDATE;
}
UCS_STATIC_ASSERT(UCS_MEMORY_TYPE_HOST == 0);
for (i = 0; i < ucs_static_array_size(rndv_modes); i++) {
bw_info.criteria.local_iface_flags &= ~iface_rma_flags;
bw_info.criteria.remote_iface_flags &= ~peer_rma_flags;
iface_rma_flags = ucp_wireup_get_rma_bw_iface_flags(rndv_modes[i]);
peer_rma_flags = ucp_wireup_get_rndv_peer_flags(rndv_modes[i]);
bw_info.criteria.local_iface_flags |= iface_rma_flags;
bw_info.criteria.remote_iface_flags |= peer_rma_flags;
added_lanes = 0;
UCS_BITMAP_CLEAR(&tl_bitmap);
for (mem_type = UCS_MEMORY_TYPE_HOST;
mem_type < UCS_MEMORY_TYPE_LAST; mem_type++) {
if (UCS_BITMAP_IS_ZERO_INPLACE(
&context->mem_type_access_tls[mem_type])) {
continue;
}
added_lanes += ucp_wireup_add_bw_lanes(
select_params, &bw_info,
UCP_TL_BITMAP_AND_NOT(
context->mem_type_access_tls[mem_type], tl_bitmap),
UCP_NULL_LANE, select_ctx);
UCS_BITMAP_OR_INPLACE(&tl_bitmap,
context->mem_type_access_tls[mem_type]);
}
if (added_lanes ||
(context->config.ext.rndv_mode != UCP_RNDV_MODE_AUTO)) {
break;
}
}
return UCS_OK;
}
static ucs_status_t
ucp_wireup_add_tag_lane(const ucp_wireup_select_params_t *select_params,
const ucp_wireup_select_info_t *am_info,
ucp_err_handling_mode_t err_mode,
ucp_wireup_select_context_t *select_ctx)
{
ucp_ep_h ep = select_params->ep;
ucp_wireup_criteria_t criteria = {0};
ucp_wireup_select_info_t select_info = {0};
unsigned ep_init_flags = ucp_wireup_ep_init_flags(
select_params, select_ctx);
ucs_status_t status;
if (!(ucp_ep_get_context_features(ep) & UCP_FEATURE_TAG) ||
(ep_init_flags & (UCP_EP_INIT_FLAG_MEM_TYPE |
UCP_EP_INIT_CREATE_AM_LANE_ONLY)) ||
(err_mode != UCP_ERR_HANDLING_MODE_NONE)) {
return UCS_OK;
}
criteria.title = "tag_offload";
criteria.local_md_flags = UCT_MD_FLAG_REG;
criteria.remote_iface_flags = UCP_ADDR_IFACE_FLAG_TAG_EAGER |
UCP_ADDR_IFACE_FLAG_TAG_RNDV |
UCP_ADDR_IFACE_FLAG_GET;
criteria.local_iface_flags = UCT_IFACE_FLAG_TAG_EAGER_BCOPY |
UCT_IFACE_FLAG_TAG_RNDV_ZCOPY |
UCT_IFACE_FLAG_GET_ZCOPY |
UCT_IFACE_FLAG_PENDING;
criteria.remote_event_flags = 0;
criteria.calc_score = ucp_wireup_am_score_func;
if (ucs_test_all_flags(ucp_ep_get_context_features(ep),
UCP_FEATURE_WAKEUP)) {
criteria.local_event_flags = UCP_WIREUP_UCT_EVENT_CAP_FLAGS;
}
status = ucp_wireup_select_transport(select_ctx, select_params, &criteria,
ucp_tl_bitmap_max, UINT64_MAX,
UINT64_MAX, UINT64_MAX, 0,
&select_info);
if ((status == UCS_OK) &&
(ucp_score_cmp(select_info.score,
am_info->score) >= 0)) {
return ucp_wireup_add_lane(select_params, &select_info,
UCP_LANE_TYPE_TAG, select_ctx);
}
return UCS_OK;
}
static ucp_lane_index_t
ucp_wireup_select_wireup_msg_lane(ucp_worker_h worker,
unsigned ep_init_flags,
const ucp_address_entry_t *address_list,
const ucp_wireup_lane_desc_t *lane_descs,
ucp_lane_index_t num_lanes)
{
ucp_context_h context = worker->context;
ucp_lane_index_t p2p_lane = UCP_NULL_LANE;
ucp_wireup_criteria_t criteria = {0};
uct_tl_resource_desc_t *resource;
ucp_rsc_index_t rsc_index;
uct_iface_attr_t *attrs;
ucp_lane_index_t lane;
unsigned addr_index;
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags);
for (lane = 0; lane < num_lanes; ++lane) {
if (lane_descs[lane].rsc_index == UCP_NULL_RESOURCE) {
continue;
}
rsc_index = lane_descs[lane].rsc_index;
addr_index = lane_descs[lane].addr_index;
resource = &context->tl_rscs[rsc_index].tl_rsc;
attrs = ucp_worker_iface_get_attr(worker, rsc_index);
if (ucp_wireup_check_flags(resource,
attrs->cap.flags,
criteria.local_iface_flags, criteria.title,
ucp_wireup_iface_flags, NULL, 0) &&
ucp_wireup_check_flags(resource,
attrs->cap.event_flags,
criteria.local_event_flags, criteria.title,
ucp_wireup_event_flags, NULL, 0) &&
ucp_wireup_check_flags(resource,
address_list[addr_index].iface_attr.flags,
criteria.remote_iface_flags, criteria.title,
ucp_wireup_peer_flags, NULL, 0) &&
ucp_wireup_check_flags(resource,
address_list[addr_index].iface_attr.flags,
criteria.remote_event_flags, criteria.title,
ucp_wireup_peer_flags, NULL, 0))
{
return lane;
} else if (ucp_worker_is_tl_p2p(worker, rsc_index)) {
p2p_lane = lane;
}
}
return p2p_lane;
}
static UCS_F_NOINLINE void
ucp_wireup_select_params_init(ucp_wireup_select_params_t *select_params,
ucp_ep_h ep, unsigned ep_init_flags,
const ucp_unpacked_address_t *remote_address,
ucp_tl_bitmap_t tl_bitmap, int show_error)
{
select_params->ep = ep;
select_params->ep_init_flags = ep_init_flags;
select_params->tl_bitmap = tl_bitmap;
select_params->address = remote_address;
select_params->allow_am =
ucp_wireup_allow_am_emulation_layer(ep_init_flags);
select_params->show_error = show_error;
}
static UCS_F_NOINLINE ucs_status_t
ucp_wireup_search_lanes(const ucp_wireup_select_params_t *select_params,
ucp_err_handling_mode_t err_mode,
ucp_wireup_select_context_t *select_ctx)
{
ucp_wireup_select_info_t am_info;
ucs_status_t status;
memset(select_ctx, 0, sizeof(*select_ctx));
status = ucp_wireup_add_cm_lane(select_params, select_ctx);
if (status != UCS_OK) {
return status;
}
status = ucp_wireup_add_rma_lanes(select_params, select_ctx);
if (status != UCS_OK) {
return status;
}
status = ucp_wireup_add_amo_lanes(select_params, select_ctx);
if (status != UCS_OK) {
return status;
}
status = ucp_wireup_add_am_lane(select_params, &am_info, select_ctx);
if (status != UCS_OK) {
return status;
}
status = ucp_wireup_add_rma_bw_lanes(select_params, select_ctx);
if (status != UCS_OK) {
return status;
}
status = ucp_wireup_add_tag_lane(select_params, &am_info, err_mode,
select_ctx);
if (status != UCS_OK) {
return status;
}
status = ucp_wireup_add_am_bw_lanes(select_params, select_ctx);
if (status != UCS_OK) {
return status;
}
if (select_ctx->num_lanes == 0) {
ucs_error("No transports selected to %s (features: 0x%"PRIx64")",
select_params->address->name,
ucp_ep_get_context_features(select_params->ep));
return UCS_ERR_UNREACHABLE;
}
return UCS_OK;
}
static void ucp_wireup_init_keepalive_map(ucp_worker_h worker,
ucp_ep_config_key_t *key)
{
ucp_context_h context = worker->context;
int shm_added_ep_check = 0;
uct_tl_resource_desc_t *resource;
ucp_lane_index_t lane;
ucp_rsc_index_t rsc_index;
ucp_rsc_index_t dev_index;
uct_iface_attr_t *iface_attr;
uint64_t dev_map_used;
key->ep_check_map = 0;
if (key->err_mode == UCP_ERR_HANDLING_MODE_NONE) {
return;
}
dev_map_used = 0;
for (lane = 0; lane < key->num_lanes; ++lane) {
rsc_index = key->lanes[lane].rsc_index;
if (rsc_index == UCP_NULL_RESOURCE) {
continue;
}
dev_index = context->tl_rscs[rsc_index].dev_index;
ucs_assert(dev_index < (sizeof(dev_map_used) * 8));
iface_attr = ucp_worker_iface_get_attr(worker, rsc_index);
if (iface_attr->cap.flags & UCT_IFACE_FLAG_EP_KEEPALIVE) {
dev_map_used |= UCS_BIT(dev_index);
}
}
for (lane = 0; lane < key->num_lanes; ++lane) {
rsc_index = key->lanes[lane].rsc_index;
if (rsc_index == UCP_NULL_RESOURCE) {
continue;
}
resource = &context->tl_rscs[rsc_index].tl_rsc;
dev_index = context->tl_rscs[rsc_index].dev_index;
ucs_assert(dev_index < (sizeof(dev_map_used) * 8));
iface_attr = ucp_worker_iface_get_attr(worker, rsc_index);
if (!(UCS_BIT(dev_index) & dev_map_used) &&
(iface_attr->cap.flags & UCT_IFACE_FLAG_EP_CHECK)) {
ucs_assert(!(key->ep_check_map & UCS_BIT(lane)));
if (resource->dev_type & UCT_DEVICE_TYPE_SHM) {
if (shm_added_ep_check) {
continue;
}
shm_added_ep_check = 1;
}
key->ep_check_map |= UCS_BIT(lane);
dev_map_used |= UCS_BIT(dev_index);
}
}
}
static UCS_F_NOINLINE void
ucp_wireup_construct_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx,
unsigned *addr_indices, ucp_ep_config_key_t *key)
{
ucp_ep_h ep = select_params->ep;
ucp_worker_h worker = ep->worker;
ucp_context_h context = worker->context;
ucp_rsc_index_t rsc_index;
ucp_md_index_t md_index;
ucp_lane_index_t lane;
ucp_lane_index_t i;
key->num_lanes = select_ctx->num_lanes;
for (lane = 0; lane < key->num_lanes; ++lane) {
ucs_assert(select_ctx->lane_descs[lane].lane_types != 0);
key->lanes[lane].rsc_index = select_ctx->lane_descs[lane].rsc_index;
key->lanes[lane].dst_md_index = select_ctx->lane_descs[lane].dst_md_index;
key->lanes[lane].dst_sys_dev = select_ctx->lane_descs[lane].dst_sys_dev;
key->lanes[lane].path_index = select_ctx->lane_descs[lane].path_index;
key->lanes[lane].lane_types = select_ctx->lane_descs[lane].lane_types;
key->lanes[lane].seg_size = select_ctx->lane_descs[lane].seg_size;
addr_indices[lane] = select_ctx->lane_descs[lane].addr_index;
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_CM)) {
ucs_assert(key->cm_lane == UCP_NULL_LANE);
key->cm_lane = lane;
ucs_assert(ucs_popcount(select_ctx->lane_descs[lane].lane_types) == 1);
continue;
}
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_AM)) {
ucs_assert(key->am_lane == UCP_NULL_LANE);
key->am_lane = lane;
}
if ((select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_AM_BW)) &&
(lane < UCP_MAX_LANES - 1)) {
key->am_bw_lanes[lane + 1] = lane;
}
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_RMA)) {
key->rma_lanes[lane] = lane;
}
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_RMA_BW)) {
key->rma_bw_lanes[lane] = lane;
}
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_RKEY_PTR)) {
ucs_assert(key->rkey_ptr_lane == UCP_NULL_LANE);
key->rkey_ptr_lane = lane;
}
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_AMO)) {
key->amo_lanes[lane] = lane;
}
if (select_ctx->lane_descs[lane].lane_types & UCS_BIT(UCP_LANE_TYPE_TAG)) {
ucs_assert(key->tag_lane == UCP_NULL_LANE);
key->tag_lane = lane;
}
}
ucs_qsort_r(key->am_bw_lanes + 1, UCP_MAX_LANES - 1, sizeof(ucp_lane_index_t),
ucp_wireup_compare_lane_am_bw_score, select_ctx->lane_descs);
ucs_qsort_r(key->rma_lanes, UCP_MAX_LANES, sizeof(ucp_lane_index_t),
ucp_wireup_compare_lane_rma_score, select_ctx->lane_descs);
ucs_qsort_r(key->rma_bw_lanes, UCP_MAX_LANES, sizeof(ucp_lane_index_t),
ucp_wireup_compare_lane_rma_bw_score, select_ctx->lane_descs);
ucs_qsort_r(key->amo_lanes, UCP_MAX_LANES, sizeof(ucp_lane_index_t),
ucp_wireup_compare_lane_amo_score, select_ctx->lane_descs);
if (
!ucp_ep_init_flags_has_cm(select_params->ep_init_flags) ||
!(ep->flags & UCP_EP_FLAG_LOCAL_CONNECTED)) {
key->wireup_msg_lane =
ucp_wireup_select_wireup_msg_lane(worker,
ucp_wireup_ep_init_flags(select_params,
select_ctx),
select_params->address->address_list,
select_ctx->lane_descs,
key->num_lanes);
}
for (i = 0;
(key->rma_bw_lanes[i] != UCP_NULL_LANE) &&
(ucs_popcount(key->rma_bw_md_map) < UCP_MAX_OP_MDS); i++) {
lane = key->rma_bw_lanes[i];
rsc_index = select_ctx->lane_descs[lane].rsc_index;
md_index = context->tl_rscs[rsc_index].md_index;
if ((context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_NEED_RKEY) &&
!(strstr(context->tl_rscs[rsc_index].tl_rsc.tl_name, "ugni"))) {
key->rma_bw_md_map |= UCS_BIT(md_index);
}
}
if ((key->rkey_ptr_lane != UCP_NULL_LANE) &&
(ucs_popcount(key->rma_bw_md_map) < UCP_MAX_OP_MDS)) {
rsc_index = select_ctx->lane_descs[key->rkey_ptr_lane].rsc_index;
md_index = context->tl_rscs[rsc_index].md_index;
key->rma_bw_md_map |= UCS_BIT(md_index);
}
key->am_bw_lanes[0] = key->am_lane;
ucp_wireup_init_keepalive_map(worker, key);
}
ucs_status_t
ucp_wireup_select_lanes(ucp_ep_h ep, unsigned ep_init_flags,
ucp_tl_bitmap_t tl_bitmap,
const ucp_unpacked_address_t *remote_address,
unsigned *addr_indices, ucp_ep_config_key_t *key)
{
ucp_worker_h worker = ep->worker;
ucp_tl_bitmap_t scalable_tl_bitmap = worker->scalable_tl_bitmap;
ucp_wireup_select_context_t select_ctx;
ucp_wireup_select_params_t select_params;
ucs_status_t status;
UCS_BITMAP_AND_INPLACE(&scalable_tl_bitmap, tl_bitmap);
if (!UCS_BITMAP_IS_ZERO_INPLACE(&scalable_tl_bitmap)) {
ucp_wireup_select_params_init(&select_params, ep, ep_init_flags,
remote_address, scalable_tl_bitmap, 0);
status = ucp_wireup_search_lanes(&select_params, key->err_mode,
&select_ctx);
if (status == UCS_OK) {
goto out;
}
}
ucp_wireup_select_params_init(&select_params, ep, ep_init_flags,
remote_address, tl_bitmap, 1);
status = ucp_wireup_search_lanes(&select_params, key->err_mode,
&select_ctx);
if (status != UCS_OK) {
return status;
}
out:
ucp_wireup_construct_lanes(&select_params, &select_ctx, addr_indices, key);
ucs_assert(!ucs_test_all_flags(ep_init_flags,
UCP_EP_INIT_CREATE_AM_LANE_ONLY |
UCP_EP_INIT_CM_PHASE) ||
(key->num_lanes == 2));
return UCS_OK;
}
static double ucp_wireup_aux_score_func(ucp_context_h context,
const uct_md_attr_t *md_attr,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return (1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + remote_iface_attr->overhead));
}
ucs_status_t
ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags,
ucp_tl_bitmap_t tl_bitmap,
const ucp_unpacked_address_t *remote_address,
ucp_wireup_select_info_t *select_info)
{
ucp_wireup_select_context_t select_ctx = {};
ucp_wireup_criteria_t criteria = {};
ucp_wireup_select_params_t select_params;
ucp_wireup_select_params_init(&select_params, ep, ep_init_flags,
remote_address, tl_bitmap, 1);
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags);
return ucp_wireup_select_transport(&select_ctx, &select_params, &criteria,
ucp_tl_bitmap_max, UINT64_MAX,
UINT64_MAX, UINT64_MAX, 1, select_info);
}