#include "H5FDdrvr_module.h"
#include "H5private.h"
#include "H5CXprivate.h"
#include "H5Dprivate.h"
#include "H5Eprivate.h"
#include "H5Fprivate.h"
#include "H5FDprivate.h"
#include "H5FDmpi.h"
#include "H5Iprivate.h"
#include "H5MMprivate.h"
#include "H5Pprivate.h"
#ifdef H5_HAVE_PARALLEL
static hid_t H5FD_MPIO_g = 0;
hbool_t H5FD_mpi_opt_types_g = TRUE;
static char H5FD_mpi_native_g[] = "native";
typedef struct H5FD_mpio_t {
H5FD_t pub;
MPI_File f;
MPI_Comm comm;
MPI_Info info;
int mpi_rank;
int mpi_size;
haddr_t eof;
haddr_t eoa;
haddr_t last_eoa;
haddr_t local_eof;
} H5FD_mpio_t;
static herr_t H5FD_mpio_term(void);
static void *H5FD_mpio_fapl_get(H5FD_t *_file);
static void *H5FD_mpio_fapl_copy(const void *_old_fa);
static herr_t H5FD_mpio_fapl_free(void *_fa);
static H5FD_t *H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id,
haddr_t maxaddr);
static herr_t H5FD_mpio_close(H5FD_t *_file);
static herr_t H5FD_mpio_query(const H5FD_t *_f1, unsigned long *flags);
static haddr_t H5FD_mpio_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
static herr_t H5FD_mpio_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
static haddr_t H5FD_mpio_get_eof(const H5FD_t *_file, H5FD_mem_t type);
static herr_t H5FD_mpio_get_handle(H5FD_t *_file, hid_t fapl, void** file_handle);
static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
size_t size, void *buf);
static herr_t H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
size_t size, const void *buf);
static herr_t H5FD_mpio_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
static herr_t H5FD_mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
static int H5FD_mpio_mpi_rank(const H5FD_t *_file);
static int H5FD_mpio_mpi_size(const H5FD_t *_file);
static MPI_Comm H5FD_mpio_communicator(const H5FD_t *_file);
static const H5FD_class_mpi_t H5FD_mpio_g = {
{
"mpio",
HADDR_MAX,
H5F_CLOSE_SEMI,
H5FD_mpio_term,
NULL,
NULL,
NULL,
sizeof(H5FD_mpio_fapl_t),
H5FD_mpio_fapl_get,
H5FD_mpio_fapl_copy,
H5FD_mpio_fapl_free,
0,
NULL,
NULL,
H5FD_mpio_open,
H5FD_mpio_close,
NULL,
H5FD_mpio_query,
NULL,
NULL,
NULL,
H5FD_mpio_get_eoa,
H5FD_mpio_set_eoa,
H5FD_mpio_get_eof,
H5FD_mpio_get_handle,
H5FD_mpio_read,
H5FD_mpio_write,
H5FD_mpio_flush,
H5FD_mpio_truncate,
NULL,
NULL,
H5FD_FLMAP_DICHOTOMY
},
H5FD_mpio_mpi_rank,
H5FD_mpio_mpi_size,
H5FD_mpio_communicator
};
#ifdef H5FDmpio_DEBUG
static int H5FD_mpio_Debug[256] =
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
#endif
static herr_t
H5FD__init_package(void)
{
herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC
if(H5FD_mpio_init() < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "unable to initialize mpio VFD")
done:
FUNC_LEAVE_NOAPI(ret_value)
}
hid_t
H5FD_mpio_init(void)
{
#ifdef H5FDmpio_DEBUG
static int H5FD_mpio_Debug_inited = 0;
#endif
const char *s;
hid_t ret_value = H5I_INVALID_HID;
FUNC_ENTER_NOAPI(H5I_INVALID_HID)
if(H5I_VFL != H5I_get_type(H5FD_MPIO_g))
H5FD_MPIO_g = H5FD_register((const H5FD_class_t *)&H5FD_mpio_g, sizeof(H5FD_class_mpi_t), FALSE);
s = HDgetenv("HDF5_MPI_OPT_TYPES");
if(s && HDisdigit(*s)) {
long env_val = HDstrtol(s, NULL, 0);
H5FD_mpi_opt_types_g = (0 == env_val) ? FALSE : TRUE;
}
#ifdef H5FDmpio_DEBUG
if(!H5FD_mpio_Debug_inited) {
s = HDgetenv("H5FD_mpio_Debug");
if(s) {
while(*s) {
H5FD_mpio_Debug[(int)*s]++;
s++;
}
}
H5FD_mpio_Debug_inited++;
}
#endif
ret_value = H5FD_MPIO_g;
done:
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_term(void)
{
FUNC_ENTER_NOAPI_NOINIT_NOERR
H5FD_MPIO_g = 0;
FUNC_LEAVE_NOAPI(SUCCEED)
}
herr_t
H5Pset_fapl_mpio(hid_t fapl_id, MPI_Comm comm, MPI_Info info)
{
H5FD_mpio_fapl_t fa;
H5P_genplist_t *plist;
herr_t ret_value;
FUNC_ENTER_API(FAIL)
H5TRACE3("e", "iMcMi", fapl_id, comm, info);
if(fapl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list")
if(MPI_COMM_NULL == comm)
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "MPI_COMM_NULL is not a valid communicator")
fa.comm = comm;
fa.info = info;
ret_value = H5P_set_driver(plist, H5FD_MPIO, &fa);
done:
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pget_fapl_mpio(hid_t fapl_id, MPI_Comm *comm, MPI_Info *info)
{
H5P_genplist_t *plist;
const H5FD_mpio_fapl_t *fa;
MPI_Comm comm_tmp = MPI_COMM_NULL;
hbool_t comm_copied = FALSE;
int mpi_code;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE3("e", "ixx", fapl_id, comm, info);
if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list")
if(H5FD_MPIO != H5P_peek_driver(plist))
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver")
if(NULL == (fa = (const H5FD_mpio_fapl_t *)H5P_peek_driver_info(plist)))
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info")
if(comm) {
if(MPI_SUCCESS != (mpi_code = MPI_Comm_dup(fa->comm, &comm_tmp)))
HMPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code)
comm_copied = TRUE;
}
if(info) {
if(MPI_INFO_NULL != fa->info) {
if(MPI_SUCCESS != (mpi_code = MPI_Info_dup(fa->info, info)))
HMPI_GOTO_ERROR(FAIL, "MPI_Info_dup failed", mpi_code)
}
else
*info = MPI_INFO_NULL;
}
if(comm)
*comm = comm_tmp;
done:
if(ret_value < 0)
if(comm_copied)
MPI_Comm_free(&comm_tmp);
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pset_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t xfer_mode)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE2("e", "iDt", dxpl_id, xfer_mode);
if(dxpl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
if(H5FD_MPIO_INDEPENDENT != xfer_mode && H5FD_MPIO_COLLECTIVE != xfer_mode)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "incorrect xfer_mode")
if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
done:
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pget_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t *xfer_mode)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE2("e", "ix", dxpl_id, xfer_mode);
if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
if(xfer_mode)
if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, xfer_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to get value")
done:
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pset_dxpl_mpio_collective_opt(hid_t dxpl_id, H5FD_mpio_collective_opt_t opt_mode)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE2("e", "iDc", dxpl_id, opt_mode);
if(dxpl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
if(H5P_set(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME, &opt_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
done:
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pset_dxpl_mpio_chunk_opt(hid_t dxpl_id, H5FD_mpio_chunk_opt_t opt_mode)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE2("e", "iDh", dxpl_id, opt_mode);
if(dxpl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
if(H5P_set(plist, H5D_XFER_MPIO_CHUNK_OPT_HARD_NAME, &opt_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
done:
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pset_dxpl_mpio_chunk_opt_num(hid_t dxpl_id, unsigned num_chunk_per_proc)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE2("e", "iIu", dxpl_id, num_chunk_per_proc);
if(dxpl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
if(H5P_set(plist, H5D_XFER_MPIO_CHUNK_OPT_NUM_NAME, &num_chunk_per_proc) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
done:
FUNC_LEAVE_API(ret_value)
}
herr_t
H5Pset_dxpl_mpio_chunk_opt_ratio(hid_t dxpl_id, unsigned percent_num_proc_per_chunk)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(FAIL)
H5TRACE2("e", "iIu", dxpl_id, percent_num_proc_per_chunk);
if(dxpl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list")
if(NULL == (plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
if(H5P_set(plist, H5D_XFER_MPIO_CHUNK_OPT_RATIO_NAME, &percent_num_proc_per_chunk) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
done:
FUNC_LEAVE_API(ret_value)
}
static void *
H5FD_mpio_fapl_get(H5FD_t *_file)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
H5FD_mpio_fapl_t *fa = NULL;
void *ret_value;
FUNC_ENTER_NOAPI_NOINIT
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
if(NULL == (fa = (H5FD_mpio_fapl_t *)H5MM_calloc(sizeof(H5FD_mpio_fapl_t))))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed")
if(FAIL == H5FD_mpi_comm_info_dup(file->comm, file->info, &fa->comm, &fa->info))
HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed")
ret_value = fa;
done:
FUNC_LEAVE_NOAPI(ret_value)
}
static void *
H5FD_mpio_fapl_copy(const void *_old_fa)
{
void *ret_value = NULL;
const H5FD_mpio_fapl_t *old_fa = (const H5FD_mpio_fapl_t*)_old_fa;
H5FD_mpio_fapl_t *new_fa = NULL;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
HDfprintf(stderr, "enter H5FD_mpio_fapl_copy\n");
#endif
if(NULL == (new_fa = (H5FD_mpio_fapl_t *)H5MM_malloc(sizeof(H5FD_mpio_fapl_t))))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed")
H5MM_memcpy(new_fa, old_fa, sizeof(H5FD_mpio_fapl_t));
if(FAIL == H5FD_mpi_comm_info_dup(old_fa->comm, old_fa->info, &new_fa->comm, &new_fa->info))
HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed")
ret_value = new_fa;
done:
if (NULL == ret_value){
if (new_fa)
H5MM_xfree(new_fa);
}
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
HDfprintf(stderr, "leaving H5FD_mpio_fapl_copy\n");
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_fapl_free(void *_fa)
{
herr_t ret_value = SUCCEED;
H5FD_mpio_fapl_t *fa = (H5FD_mpio_fapl_t*)_fa;
FUNC_ENTER_NOAPI_NOINIT_NOERR
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
HDfprintf(stderr, "in H5FD_mpio_fapl_free\n");
#endif
HDassert(fa);
HDassert(MPI_COMM_NULL!=fa->comm);
H5FD_mpi_comm_info_free(&fa->comm, &fa->info);
H5MM_xfree(fa);
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
HDfprintf(stderr, "leaving H5FD_mpio_fapl_free\n");
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
herr_t
H5FD_set_mpio_atomicity(H5FD_t *_file, hbool_t flag)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
int mpi_code;
int temp_flag;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
if(FALSE == flag)
temp_flag = 0;
else
temp_flag = 1;
if(MPI_SUCCESS != (mpi_code = MPI_File_set_atomicity(file->f, temp_flag)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_atomicity", mpi_code)
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
herr_t
H5FD_get_mpio_atomicity(H5FD_t *_file, hbool_t *flag)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
int mpi_code;
int temp_flag;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
if(MPI_SUCCESS != (mpi_code = MPI_File_get_atomicity(file->f, &temp_flag)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_get_atomicity", mpi_code)
if(0 != temp_flag)
*flag = TRUE;
else
*flag = FALSE;
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static H5FD_t *
H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id,
haddr_t H5_ATTR_UNUSED maxaddr)
{
H5FD_mpio_t *file=NULL;
MPI_File fh;
unsigned file_opened=0;
int mpi_amode;
int mpi_rank;
int mpi_size;
int mpi_code;
MPI_Offset size;
const H5FD_mpio_fapl_t *fa = NULL;
H5FD_mpio_fapl_t _fa;
H5P_genplist_t *plist;
MPI_Comm comm_dup = MPI_COMM_NULL;
MPI_Info info_dup = MPI_INFO_NULL;
H5FD_t *ret_value;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering - name = \"%s\", flags = 0x%x, fapl_id = %d, maxaddr = %lu\n", FUNC, name, flags, (int)fapl_id, (unsigned long)maxaddr);
#endif
if(NULL == (plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list")
if(H5P_FILE_ACCESS_DEFAULT == fapl_id || H5FD_MPIO != H5P_peek_driver(plist)) {
_fa.comm = MPI_COMM_SELF;
_fa.info = MPI_INFO_NULL;
fa = &_fa;
}
else {
if(NULL == (fa = (const H5FD_mpio_fapl_t *)H5P_peek_driver_info(plist)))
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, NULL, "bad VFL driver info")
}
if(FAIL == H5FD_mpi_comm_info_dup(fa->comm, fa->info, &comm_dup, &info_dup))
HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed")
mpi_amode = (flags & H5F_ACC_RDWR) ? MPI_MODE_RDWR : MPI_MODE_RDONLY;
if(flags & H5F_ACC_CREAT)
mpi_amode |= MPI_MODE_CREATE;
if(flags & H5F_ACC_EXCL)
mpi_amode |= MPI_MODE_EXCL;
#ifdef H5FDmpio_DEBUG
if(MPI_INFO_NULL != info_dup) {
char debug_str[128];
int flag;
MPI_Info_get(fa->info, H5F_MPIO_DEBUG_KEY, sizeof(debug_str) - 1, debug_str, &flag);
if(flag) {
int i;
HDfprintf(stdout, "H5FD_mpio debug flags = '%s'\n", debug_str);
for(i = 0; debug_str[i] && i < 128; ++i)
H5FD_mpio_Debug[(int)debug_str[i]] = 1;
}
}
#endif
if(MPI_SUCCESS != (mpi_code = MPI_File_open(comm_dup, name, mpi_amode, info_dup, &fh)))
HMPI_GOTO_ERROR(NULL, "MPI_File_open failed", mpi_code)
file_opened=1;
if (MPI_SUCCESS != (mpi_code=MPI_Comm_rank (comm_dup, &mpi_rank)))
HMPI_GOTO_ERROR(NULL, "MPI_Comm_rank failed", mpi_code)
if (MPI_SUCCESS != (mpi_code=MPI_Comm_size (comm_dup, &mpi_size)))
HMPI_GOTO_ERROR(NULL, "MPI_Comm_size failed", mpi_code)
if(NULL == (file = (H5FD_mpio_t *)H5MM_calloc(sizeof(H5FD_mpio_t))))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed")
file->f = fh;
file->comm = comm_dup;
file->info = info_dup;
file->mpi_rank = mpi_rank;
file->mpi_size = mpi_size;
if (mpi_rank == 0) {
if (MPI_SUCCESS != (mpi_code=MPI_File_get_size(fh, &size)))
HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code)
}
if(MPI_SUCCESS != (mpi_code = MPI_Bcast(&size, (int)sizeof(MPI_Offset), MPI_BYTE, 0, comm_dup)))
HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
if(size && (flags & H5F_ACC_TRUNC)) {
if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(fh, (MPI_Offset)0)))
HMPI_GOTO_ERROR(NULL, "MPI_File_set_size failed", mpi_code)
if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(comm_dup)))
HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mpi_code)
size = 0;
}
file->eof = H5FD_mpi_MPIOff_to_haddr(size);
file->local_eof = file->eof;
ret_value = (H5FD_t*)file;
done:
if(ret_value == NULL) {
if(file_opened)
MPI_File_close(&fh);
if (MPI_COMM_NULL != comm_dup)
MPI_Comm_free(&comm_dup);
if (MPI_INFO_NULL != info_dup)
MPI_Info_free(&info_dup);
if (file)
H5MM_xfree(file);
}
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_close(H5FD_t *_file)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
int mpi_code;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
if(MPI_SUCCESS != (mpi_code = MPI_File_close(&(file->f))))
HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mpi_code)
H5FD_mpi_comm_info_free(&file->comm, &file->info);
H5MM_xfree(file);
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_query(const H5FD_t H5_ATTR_UNUSED *_file, unsigned long *flags )
{
FUNC_ENTER_NOAPI_NOINIT_NOERR
if(flags) {
*flags = 0;
*flags |= H5FD_FEAT_AGGREGATE_METADATA;
*flags |= H5FD_FEAT_AGGREGATE_SMALLDATA;
*flags |= H5FD_FEAT_HAS_MPI;
*flags |= H5FD_FEAT_ALLOCATE_EARLY;
*flags |= H5FD_FEAT_DEFAULT_VFD_COMPATIBLE;
}
FUNC_LEAVE_NOAPI(SUCCEED)
}
static haddr_t
H5FD_mpio_get_eoa(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type)
{
const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file;
FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
FUNC_LEAVE_NOAPI(file->eoa)
}
static herr_t
H5FD_mpio_set_eoa(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, haddr_t addr)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
file->eoa = addr;
FUNC_LEAVE_NOAPI(SUCCEED)
}
static haddr_t
H5FD_mpio_get_eof(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type)
{
const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file;
FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
FUNC_LEAVE_NOAPI(file->eof)
}
static herr_t
H5FD_mpio_get_handle(H5FD_t *_file, hid_t H5_ATTR_UNUSED fapl, void** file_handle)
{
H5FD_mpio_t *file = (H5FD_mpio_t *)_file;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
if(!file_handle)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid")
*file_handle = &(file->f);
done:
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_get_info(H5FD_t *_file, void** mpi_info)
{
H5FD_mpio_t *file = (H5FD_mpio_t *)_file;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
if(!mpi_info)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "mpi info not valid")
*mpi_info = &(file->info);
done:
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
hid_t H5_ATTR_UNUSED dxpl_id, haddr_t addr, size_t size, void *buf)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
MPI_Offset mpi_off;
MPI_Status mpi_stat;
int mpi_code;
MPI_Datatype buf_type = MPI_BYTE;
int size_i;
#if MPI_VERSION >= 3
MPI_Count bytes_read = 0;
MPI_Count type_size;
MPI_Count io_size;
MPI_Count n;
#else
int bytes_read = 0;
int type_size;
int io_size;
int n;
#endif
hbool_t use_view_this_time = FALSE;
hbool_t rank0_bcast = FALSE;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
HDassert(file);
HDassert(H5FD_MPIO==file->pub.driver_id);
HDassert(buf);
HDmemset(&mpi_stat,0,sizeof(MPI_Status));
if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
size_i = (int)size;
if((hsize_t)size_i != size)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'r'])
HDfprintf(stdout, "%s: mpi_off = %ld size_i = %d\n", FUNC, (long)mpi_off, size_i);
#endif
if(type == H5FD_MEM_DRAW) {
H5FD_mpio_xfer_t xfer_mode;
if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
MPI_Datatype file_type;
use_view_this_time = TRUE;
if(H5CX_get_mpi_coll_datatypes(&buf_type, &file_type) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O datatypes")
if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
mpi_off = 0;
}
}
if(use_view_this_time) {
H5FD_mpio_collective_opt_t coll_opt_mode;
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'r'])
HDfprintf(stdout, "%s: using MPIO collective mode\n", FUNC);
#endif
if(H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property")
if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'r'])
HDfprintf(stdout, "%s: doing MPI collective IO\n", FUNC);
#endif
if(H5CX_get_mpio_rank0_bcast()) {
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'r'])
HDfprintf(stdout, "%s: doing read-rank0-and-MPI_Bcast\n", FUNC);
#endif
rank0_bcast = TRUE;
if(file->mpi_rank == 0)
if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
if(MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
}
else
if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
}
else {
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'r'])
HDfprintf(stdout, "%s: doing MPI independent IO\n", FUNC);
#endif
if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
}
if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
}
else
if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
if(!rank0_bcast || (rank0_bcast && file->mpi_rank == 0) ) {
#if MPI_VERSION >= 3
if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
#else
if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
#endif
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
}
if(rank0_bcast)
if(MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_LONG_LONG, 0, file->comm))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0)
#if MPI_VERSION >= 3
if(MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
#else
if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
#endif
HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
io_size = type_size * size_i;
if(bytes_read < 0 || bytes_read > io_size)
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
if((n = (io_size - bytes_read)) > 0)
HDmemset((char*)buf+bytes_read, 0, (size_t)n);
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
haddr_t addr, size_t size, const void *buf)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
MPI_Offset mpi_off;
MPI_Status mpi_stat;
MPI_Datatype buf_type = MPI_BYTE;
int mpi_code;
#if MPI_VERSION >= 3
MPI_Count bytes_written;
MPI_Count type_size;
MPI_Count io_size;
#else
int bytes_written;
int type_size;
int io_size;
#endif
int size_i;
hbool_t use_view_this_time = FALSE;
H5FD_mpio_xfer_t xfer_mode;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
HDassert(file);
HDassert(H5FD_MPIO==file->pub.driver_id);
HDassert(buf);
HDassert(!H5CX_get_mpi_file_flushing());
HDmemset(&mpi_stat, 0, sizeof(MPI_Status));
if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
size_i = (int)size;
if((hsize_t)size_i != size)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'w'])
HDfprintf(stdout, "%s: mpi_off = %ld size_i = %d\n", FUNC, (long)mpi_off, size_i);
#endif
if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
MPI_Datatype file_type;
use_view_this_time = TRUE;
if(H5CX_get_mpi_coll_datatypes(&buf_type, &file_type) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O datatypes")
if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
mpi_off = 0;
}
if(use_view_this_time) {
H5FD_mpio_collective_opt_t coll_opt_mode;
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "H5FD_mpio_write: using MPIO collective mode\n");
#endif
if(H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property")
if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "H5FD_mpio_write: doing MPI collective IO\n");
#endif
if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
}
else {
if(type != H5FD_MEM_DRAW)
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "Metadata Coll opt property should be collective at this point")
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n");
#endif
if(MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
}
if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
}
else
if(MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
#if MPI_VERSION >= 3
if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_written)))
#else
if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written)))
#endif
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
#if MPI_VERSION >= 3
if(MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
#else
if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
#endif
HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
io_size = type_size * size_i;
if(bytes_written != io_size)
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed")
file->eof = HADDR_UNDEF;
if(bytes_written && ((bytes_written + addr) > file->local_eof))
file->local_eof = addr + bytes_written;
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving, proc %d: ret_value = %d\n", FUNC, file->mpi_rank, ret_value );
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_flush(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t closing)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
int mpi_code;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
if(!closing)
if(MPI_SUCCESS != (mpi_code = MPI_File_sync(file->f)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mpi_code)
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static herr_t
H5FD_mpio_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5_ATTR_UNUSED closing)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Entering\n", FUNC);
#endif
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
if(!H5F_addr_eq(file->eoa, file->last_eoa)) {
int mpi_code;
MPI_Offset size;
MPI_Offset needed_eof;
if(!H5CX_get_mpi_file_flushing())
if(MPI_SUCCESS != (mpi_code = MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
if(0 == file->mpi_rank)
if(MPI_SUCCESS != (mpi_code = MPI_File_get_size(file->f, &size)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_get_size failed", mpi_code)
if(MPI_SUCCESS != (mpi_code = MPI_Bcast(&size, (int)sizeof(MPI_Offset), MPI_BYTE, 0, file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
if(H5FD_mpi_haddr_to_MPIOff(file->eoa, &needed_eof) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset")
if(size != needed_eof) {
if(MPI_SUCCESS != (mpi_code = MPI_File_set_size(file->f, needed_eof)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code)
if(MPI_SUCCESS != (mpi_code = MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
}
file->last_eoa = file->eoa;
}
done:
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving\n", FUNC);
#endif
FUNC_LEAVE_NOAPI(ret_value)
}
static int
H5FD_mpio_mpi_rank(const H5FD_t *_file)
{
const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file;
FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
FUNC_LEAVE_NOAPI(file->mpi_rank)
}
static int
H5FD_mpio_mpi_size(const H5FD_t *_file)
{
const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file;
FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
FUNC_LEAVE_NOAPI(file->mpi_size)
}
static MPI_Comm
H5FD_mpio_communicator(const H5FD_t *_file)
{
const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file;
FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
HDassert(H5FD_MPIO == file->pub.driver_id);
FUNC_LEAVE_NOAPI(file->comm)
}
#endif