#ifndef UCP_PROTO_COMMON_H_
#define UCP_PROTO_COMMON_H_
#include "proto.h"
#include "proto_select.h"
#include <uct/api/v2/uct_v2.h>
#define UCP_PROTO_TIME_FMT(_time_var) " " #_time_var ": %.2f ns"
#define UCP_PROTO_TIME_ARG(_time_val) ((_time_val) * 1e9)
#define UCP_PROTO_PERF_FUNC_TIME_FMT "%.2f+%.3f*N"
#define UCP_PROTO_PERF_FUNC_TIME_ARG(_perf_func) \
((_perf_func)->c * 1e9), ((_perf_func)->m * 1e9 * UCS_KBYTE)
#define UCP_PROTO_PERF_FUNC_BW_FMT "%.2f"
#define UCP_PROTO_PERF_FUNC_BW_ARG(_perf_func) \
(1.0 / ((_perf_func)->m * UCS_MBYTE))
#define UCP_PROTO_PERF_FUNC_FMT(_perf_var) " " #_perf_var ": " \
UCP_PROTO_PERF_FUNC_TIME_FMT " ns/KB, " \
UCP_PROTO_PERF_FUNC_BW_FMT " MB/s"
#define UCP_PROTO_PERF_FUNC_ARG(_perf_func) \
UCP_PROTO_PERF_FUNC_TIME_ARG(_perf_func), \
UCP_PROTO_PERF_FUNC_BW_ARG(_perf_func)
#define UCP_PROTO_PERF_FUNC_TYPES_FMT \
UCP_PROTO_PERF_FUNC_FMT(sigle) \
UCP_PROTO_PERF_FUNC_FMT(multi)
#define UCP_PROTO_PERF_FUNC_TYPES_ARG(_perf_func) \
UCP_PROTO_PERF_FUNC_ARG((&(_perf_func)[UCP_PROTO_PERF_TYPE_SINGLE])), \
UCP_PROTO_PERF_FUNC_ARG((&(_perf_func)[UCP_PROTO_PERF_TYPE_MULTI]))
#define UCP_PROTO_COMMON_OFFSET_INVALID PTRDIFF_MAX
typedef enum {
UCP_PROTO_COMMON_INIT_FLAG_SEND_ZCOPY = UCS_BIT(0),
UCP_PROTO_COMMON_INIT_FLAG_RECV_ZCOPY = UCS_BIT(1),
UCP_PROTO_COMMON_INIT_FLAG_REMOTE_ACCESS = UCS_BIT(2),
UCP_PROTO_COMMON_INIT_FLAG_RESPONSE = UCS_BIT(3),
UCP_PROTO_COMMON_INIT_FLAG_SINGLE_FRAG = UCS_BIT(4),
UCP_PROTO_COMMON_INIT_FLAG_HDR_ONLY = UCS_BIT(5),
UCP_PROTO_COMMON_INIT_FLAG_RKEY_PTR = UCS_BIT(6),
UCP_PROTO_COMMON_INIT_FLAG_MIN_FRAG = UCS_BIT(7),
} ucp_proto_common_init_flags_t;
typedef struct {
ucp_proto_init_params_t super;
double latency;
double overhead;
size_t cfg_thresh;
unsigned cfg_priority;
size_t min_length;
size_t max_length;
ptrdiff_t min_frag_offs;
ptrdiff_t max_frag_offs;
ptrdiff_t max_iov_offs;
size_t hdr_size;
uct_ep_operation_t send_op;
uct_ep_operation_t memtype_op;
unsigned flags;
} ucp_proto_common_init_params_t;
typedef struct {
double send_pre_overhead;
double send_post_overhead;
double recv_overhead;
double bandwidth;
double latency;
double sys_latency;
size_t min_length;
size_t max_frag;
} ucp_proto_common_tl_perf_t;
typedef struct {
ucp_lane_index_t lane;
ucp_rsc_index_t memh_index;
ucp_md_index_t rkey_index;
uint8_t max_iov;
} ucp_proto_common_lane_priv_t;
typedef void (*ucp_proto_init_cb_t)(ucp_request_t *req);
typedef ucs_status_t (*ucp_proto_complete_cb_t)(ucp_request_t *req);
void ucp_proto_common_lane_priv_init(const ucp_proto_common_init_params_t *params,
ucp_md_map_t md_map, ucp_lane_index_t lane,
ucp_proto_common_lane_priv_t *lane_priv);
void ucp_proto_common_lane_priv_str(const ucp_proto_common_lane_priv_t *lpriv,
ucs_string_buffer_t *strb);
ucp_rsc_index_t
ucp_proto_common_get_md_index(const ucp_proto_init_params_t *params,
ucp_lane_index_t lane);
ucs_sys_device_t
ucp_proto_common_get_sys_dev(const ucp_proto_init_params_t *params,
ucp_lane_index_t lane);
void ucp_proto_common_get_lane_distance(const ucp_proto_init_params_t *params,
ucp_lane_index_t lane,
ucs_sys_device_t sys_dev,
ucs_sys_dev_distance_t *distance);
const uct_iface_attr_t *
ucp_proto_common_get_iface_attr(const ucp_proto_init_params_t *params,
ucp_lane_index_t lane);
size_t ucp_proto_common_get_iface_attr_field(const uct_iface_attr_t *iface_attr,
ptrdiff_t field_offset,
size_t dfl_value);
ucs_status_t
ucp_proto_common_lane_perf_attr(const ucp_proto_init_params_t *params,
ucp_lane_index_t lane, uct_ep_operation_t op,
uint64_t uct_field_mask,
uct_perf_attr_t* perf_attr);
ucs_status_t
ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
ucp_lane_index_t lane,
ucp_proto_common_tl_perf_t *perf);
ucp_lane_index_t
ucp_proto_common_find_lanes(const ucp_proto_common_init_params_t *params,
ucp_lane_type_t lane_type, uint64_t tl_cap_flags,
ucp_lane_index_t max_lanes,
ucp_lane_map_t exclude_map,
ucp_lane_index_t *lanes);
ucp_md_map_t
ucp_proto_common_reg_md_map(const ucp_proto_common_init_params_t *params,
ucp_lane_map_t lane_map);
ucp_lane_index_t
ucp_proto_common_find_am_bcopy_hdr_lane(const ucp_proto_init_params_t *params);
void ucp_proto_common_add_ppln_range(const ucp_proto_init_params_t *init_params,
const ucp_proto_perf_range_t *frag_range,
size_t max_length);
void ucp_proto_common_init_base_caps(
const ucp_proto_common_init_params_t *params, size_t min_length);
void ucp_proto_common_add_perf_range(
const ucp_proto_common_init_params_t *params, size_t max_length,
const ucs_linear_func_t *send_time, ucs_linear_func_t recv_overhead,
const ucs_linear_func_t *xfer_time, ucs_linear_func_t bias);
ucs_status_t
ucp_proto_common_init_caps(const ucp_proto_common_init_params_t *params,
const ucp_proto_common_tl_perf_t *perf,
ucp_md_map_t reg_md_map);
void ucp_proto_request_zcopy_completion(uct_completion_t *self);
void ucp_proto_trace_selected(ucp_request_t *req, size_t msg_length);
void ucp_proto_request_select_error(ucp_request_t *req,
ucp_proto_select_t *proto_select,
ucp_worker_cfg_index_t rkey_cfg_index,
const ucp_proto_select_param_t *sel_param,
size_t msg_length);
void ucp_proto_common_zcopy_adjust_min_frag_always(ucp_request_t *req,
size_t min_frag_diff,
uct_iov_t *iov,
size_t iovcnt,
size_t *offset_p);
void ucp_proto_request_abort(ucp_request_t *req, ucs_status_t status);
ucs_linear_func_t
ucp_proto_common_memreg_time(const ucp_proto_common_init_params_t *params,
ucp_md_map_t reg_md_map);
ucs_status_t
ucp_proto_common_buffer_copy_time(ucp_worker_h worker, const char *title,
ucs_memory_type_t local_mem_type,
ucs_memory_type_t remote_mem_type,
uct_ep_operation_t memtype_op,
ucs_linear_func_t *copy_time);
#endif