#ifndef UCT_IB_MD_H_
#define UCT_IB_MD_H_
#include "ib_device.h"
#include <uct/base/uct_md.h>
#include <ucs/stats/stats.h>
#include <ucs/memory/numa.h>
#include <ucs/memory/rcache.h>
#define UCT_IB_MD_MAX_MR_SIZE 0x80000000UL
#define UCT_IB_MD_PACKED_RKEY_SIZE sizeof(uint64_t)
#define UCT_IB_MD_DEFAULT_GID_INDEX 0
#define UCT_IB_MEM_ACCESS_FLAGS (IBV_ACCESS_LOCAL_WRITE | \
IBV_ACCESS_REMOTE_WRITE | \
IBV_ACCESS_REMOTE_READ | \
IBV_ACCESS_REMOTE_ATOMIC)
#define UCT_IB_MEM_DEREG 0
#define UCT_IB_CONFIG_PREFIX "IB_"
enum {
UCT_IB_MD_STAT_MEM_ALLOC,
UCT_IB_MD_STAT_MEM_REG,
UCT_IB_MD_STAT_LAST
};
enum {
UCT_IB_MEM_FLAG_ODP = UCS_BIT(0),
UCT_IB_MEM_FLAG_ATOMIC_MR = UCS_BIT(1),
UCT_IB_MEM_ACCESS_REMOTE_ATOMIC = UCS_BIT(2),
UCT_IB_MEM_MULTITHREADED = UCS_BIT(3),
UCT_IB_MEM_FLAG_RELAXED_ORDERING = UCS_BIT(4),
};
enum {
UCT_IB_DEVX_OBJ_RCQP,
UCT_IB_DEVX_OBJ_RCSRQ,
UCT_IB_DEVX_OBJ_DCT,
UCT_IB_DEVX_OBJ_DCSRQ,
UCT_IB_DEVX_OBJ_DCI
};
typedef struct uct_ib_md_ext_config {
int eth_pause;
int prefer_nearest_device;
int enable_indirect_atomic;
#ifdef HAVE_EXP_UMR
unsigned max_inline_klm_list;
#endif
struct {
ucs_numa_policy_t numa_policy;
int prefetch;
size_t max_size;
} odp;
size_t gid_index;
size_t min_mt_reg;
size_t mt_reg_chunk;
int mt_reg_bind;
} uct_ib_md_ext_config_t;
typedef struct uct_ib_mem {
uint32_t lkey;
uint32_t rkey;
uint32_t atomic_rkey;
uint32_t flags;
} uct_ib_mem_t;
typedef union uct_ib_mr {
struct ibv_mr *ib;
} uct_ib_mr_t;
typedef enum {
UCT_IB_MR_DEFAULT,
UCT_IB_MR_STRICT_ORDER,
UCT_IB_MR_LAST
} uct_ib_mr_type_t;
typedef struct uct_ib_md {
uct_md_t super;
ucs_rcache_t *rcache;
uct_mem_h global_odp;
struct ibv_pd *pd;
uct_ib_device_t dev;
ucs_linear_func_t reg_cost;
struct uct_ib_md_ops *ops;
UCS_STATS_NODE_DECLARE(stats)
uct_ib_md_ext_config_t config;
struct {
uct_ib_device_spec_t *specs;
unsigned count;
} custom_devices;
int check_subnet_filter;
uint64_t subnet_filter;
double pci_bw;
int relaxed_order;
int fork_init;
size_t memh_struct_size;
uint64_t reg_mem_types;
} uct_ib_md_t;
typedef struct uct_ib_md_config {
uct_md_config_t super;
UCS_CONFIG_STRING_ARRAY_FIELD(rmtd) reg_methods;
uct_md_rcache_config_t rcache;
ucs_linear_func_t uc_reg_cost;
unsigned fork_init;
int async_events;
uct_ib_md_ext_config_t ext;
UCS_CONFIG_STRING_ARRAY_FIELD(spec) custom_devices;
char *subnet_prefix;
UCS_CONFIG_ARRAY_FIELD(ucs_config_bw_spec_t, device) pci_bw;
unsigned devx;
unsigned devx_objs;
ucs_on_off_auto_value_t mr_relaxed_order;
int enable_gpudirect_rdma;
} uct_ib_md_config_t;
typedef ucs_status_t (*uct_ib_md_open_func_t)(struct ibv_device *ibv_device,
const uct_ib_md_config_t *md_config,
struct uct_ib_md **md_p);
typedef void (*uct_ib_md_cleanup_func_t)(struct uct_ib_md *);
typedef ucs_status_t (*uct_ib_md_reg_key_func_t)(struct uct_ib_md *md,
void *address, size_t length,
uint64_t access,
uct_ib_mem_t *memh,
uct_ib_mr_type_t mr_type,
int silent);
typedef ucs_status_t (*uct_ib_md_dereg_key_func_t)(struct uct_ib_md *md,
uct_ib_mem_t *memh,
uct_ib_mr_type_t mr_type);
typedef ucs_status_t (*uct_ib_md_reg_atomic_key_func_t)(struct uct_ib_md *md,
uct_ib_mem_t *memh);
typedef ucs_status_t (*uct_ib_md_dereg_atomic_key_func_t)(struct uct_ib_md *md,
uct_ib_mem_t *memh);
typedef ucs_status_t (*uct_ib_md_reg_multithreaded_func_t)(uct_ib_md_t *md,
void *address,
size_t length,
uint64_t access,
uct_ib_mem_t *memh,
uct_ib_mr_type_t mr_type,
int silent);
typedef ucs_status_t (*uct_ib_md_dereg_multithreaded_func_t)(uct_ib_md_t *md,
uct_ib_mem_t *memh,
uct_ib_mr_type_t mr_type);
typedef ucs_status_t (*uct_ib_md_mem_prefetch_func_t)(uct_ib_md_t *md,
uct_ib_mem_t *memh,
void *addr, size_t length);
typedef ucs_status_t (*uct_ib_md_get_atomic_mr_id_func_t)(uct_ib_md_t *md,
uint8_t *mr_id);
typedef struct uct_ib_md_ops {
uct_ib_md_open_func_t open;
uct_ib_md_cleanup_func_t cleanup;
uct_ib_md_reg_key_func_t reg_key;
uct_ib_md_dereg_key_func_t dereg_key;
uct_ib_md_reg_atomic_key_func_t reg_atomic_key;
uct_ib_md_dereg_atomic_key_func_t dereg_atomic_key;
uct_ib_md_reg_multithreaded_func_t reg_multithreaded;
uct_ib_md_dereg_multithreaded_func_t dereg_multithreaded;
uct_ib_md_mem_prefetch_func_t mem_prefetch;
uct_ib_md_get_atomic_mr_id_func_t get_atomic_mr_id;
} uct_ib_md_ops_t;
typedef struct uct_ib_rcache_region {
ucs_rcache_region_t super;
uct_ib_mem_t memh;
} uct_ib_rcache_region_t;
typedef struct uct_ib_md_ops_entry {
ucs_list_link_t list;
const char *name;
uct_ib_md_ops_t *ops;
int priority;
} uct_ib_md_ops_entry_t;
#define UCT_IB_MD_OPS(_md_ops, _priority) \
extern ucs_list_link_t uct_ib_md_ops_list; \
UCS_STATIC_INIT { \
static uct_ib_md_ops_entry_t *p, entry = { \
.name = UCS_PP_MAKE_STRING(_md_ops), \
.ops = &_md_ops, \
.priority = _priority, \
}; \
ucs_list_for_each(p, &uct_ib_md_ops_list, list) { \
if (p->priority < _priority) { \
ucs_list_insert_before(&p->list, &entry.list); \
return; \
} \
} \
ucs_list_add_tail(&uct_ib_md_ops_list, &entry.list); \
}
extern uct_component_t uct_ib_component;
static inline uint32_t uct_ib_md_direct_rkey(uct_rkey_t uct_rkey)
{
return (uint32_t)uct_rkey;
}
static uint32_t uct_ib_md_indirect_rkey(uct_rkey_t uct_rkey)
{
return uct_rkey >> 32;
}
static UCS_F_ALWAYS_INLINE void
uct_ib_md_pack_rkey(uint32_t rkey, uint32_t atomic_rkey, void *rkey_buffer)
{
uint64_t *rkey_p = (uint64_t*)rkey_buffer;
*rkey_p = (((uint64_t)atomic_rkey) << 32) | rkey;
ucs_trace("packed rkey: direct 0x%x indirect 0x%x", rkey, atomic_rkey);
}
static inline uint32_t uct_ib_resolve_atomic_rkey(uct_rkey_t uct_rkey,
uint16_t atomic_mr_offset,
uint64_t *remote_addr_p)
{
uint32_t atomic_rkey = uct_ib_md_indirect_rkey(uct_rkey);
if (atomic_rkey == UCT_IB_INVALID_RKEY) {
return uct_ib_md_direct_rkey(uct_rkey);
} else {
*remote_addr_p += atomic_mr_offset;
return atomic_rkey;
}
}
static inline uint16_t uct_ib_md_atomic_offset(uint8_t atomic_mr_id)
{
return 8 * atomic_mr_id;
}
static inline void
uct_ib_memh_init_keys(uct_ib_mem_t *memh, uint32_t lkey, uint32_t rkey)
{
memh->lkey = lkey;
memh->rkey = rkey;
}
static inline uct_ib_mr_type_t
uct_ib_memh_get_atomic_base_mr_type(uct_ib_mem_t *memh)
{
if (memh->flags & UCT_IB_MEM_FLAG_RELAXED_ORDERING) {
return UCT_IB_MR_STRICT_ORDER;
} else {
return UCT_IB_MR_DEFAULT;
}
}
static UCS_F_ALWAYS_INLINE uint32_t uct_ib_memh_get_lkey(uct_mem_h memh)
{
ucs_assert(memh != UCT_MEM_HANDLE_NULL);
return ((uct_ib_mem_t*)memh)->lkey;
}
ucs_status_t uct_ib_md_open(uct_component_t *component, const char *md_name,
const uct_md_config_t *uct_md_config, uct_md_h *md_p);
int uct_ib_device_is_accessible(struct ibv_device *device);
ucs_status_t uct_ib_md_open_common(uct_ib_md_t *md,
struct ibv_device *ib_device,
const uct_ib_md_config_t *md_config);
void uct_ib_md_close(uct_md_h uct_md);
ucs_status_t uct_ib_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
uint64_t access, struct ibv_mr **mr_p, int silent);
ucs_status_t uct_ib_dereg_mr(struct ibv_mr *mr);
ucs_status_t uct_ib_dereg_mrs(struct ibv_mr **mrs, size_t mr_num);
ucs_status_t
uct_ib_md_handle_mr_list_multithreaded(uct_ib_md_t *md, void *address,
size_t length, uint64_t access,
size_t chunk, struct ibv_mr **mrs,
int silent);
void uct_ib_md_parse_relaxed_order(uct_ib_md_t *md,
const uct_ib_md_config_t *md_config);
ucs_status_t uct_ib_reg_key_impl(uct_ib_md_t *md, void *address,
size_t length, uint64_t access_flags,
uct_ib_mem_t *memh, uct_ib_mr_t *mrs,
uct_ib_mr_type_t mr_type, int silent);
#endif