#include "db_config.h"
#include "db_int.h"
#include "dbinc/mp.h"
static int __memp_init_config __P((ENV *, MPOOL *));
static void __memp_region_size __P((ENV *, roff_t *, u_int32_t *));
#define MPOOL_DEFAULT_PAGESIZE (4 * 1024)
int
__memp_open(env, create_ok)
ENV *env;
int create_ok;
{
DB_ENV *dbenv;
DB_MPOOL *dbmp;
MPOOL *mp, *mp_i;
REGINFO reginfo;
roff_t cache_size, max_size, reg_size;
u_int i, max_nreg;
u_int32_t htab_buckets, *regids;
int ret;
dbenv = env->dbenv;
cache_size = 0;
__memp_region_size(env, &max_size, &htab_buckets);
if ((ret = __os_calloc(env, 1, sizeof(*dbmp), &dbmp)) != 0)
return (ret);
LIST_INIT(&dbmp->dbregq);
TAILQ_INIT(&dbmp->dbmfq);
dbmp->env = env;
memset(®info, 0, sizeof(REGINFO));
reginfo.env = env;
reginfo.type = REGION_TYPE_MPOOL;
reginfo.id = INVALID_REGION_ID;
reginfo.flags = REGION_JOIN_OK;
reg_size = sizeof(MPOOL);
reg_size += MPOOL_FILE_BUCKETS * sizeof(DB_MPOOL_HASH);
reg_size += htab_buckets * sizeof(DB_MPOOL_HASH);
reg_size += (dbenv->mp_pagesize == 0 ?
MPOOL_DEFAULT_PAGESIZE : dbenv->mp_pagesize) * 10;
if (reg_size > max_size)
reg_size = max_size;
if (create_ok)
F_SET(®info, REGION_CREATE_OK);
if ((ret = __env_region_attach(env, ®info, reg_size, max_size)) != 0)
goto err;
cache_size = reginfo.rp->max;
if (F_ISSET(env, ENV_PRIVATE))
reginfo.max_alloc = reginfo.rp->max;
if (F_ISSET(®info, REGION_CREATE)) {
max_nreg = __memp_max_regions(env);
if ((ret = __os_calloc(env,
max_nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0)
goto err;
dbmp->reginfo[0] = reginfo;
for (i = 1; i < max_nreg; ++i)
dbmp->reginfo[i].id = INVALID_REGION_ID;
if ((ret = __memp_init(env, dbmp,
0, htab_buckets, max_nreg)) != 0)
goto err;
mp = R_ADDR(dbmp->reginfo, dbmp->reginfo[0].rp->primary);
regids = R_ADDR(dbmp->reginfo, mp->regids);
regids[0] = dbmp->reginfo[0].id;
for (i = 1; i < dbenv->mp_ncache; ++i) {
dbmp->reginfo[i].env = env;
dbmp->reginfo[i].type = REGION_TYPE_MPOOL;
dbmp->reginfo[i].id = INVALID_REGION_ID;
dbmp->reginfo[i].flags = REGION_CREATE_OK;
if ((ret = __env_region_attach(
env, &dbmp->reginfo[i], reg_size, max_size)) != 0)
goto err;
if (F_ISSET(env, ENV_PRIVATE))
dbmp->reginfo[i].max_alloc = max_size;
cache_size += dbmp->reginfo[i].rp->max;
if ((ret = __memp_init(env, dbmp,
i, htab_buckets, max_nreg)) != 0)
goto err;
regids[i] = dbmp->reginfo[i].id;
}
mp->gbytes = (u_int32_t) (cache_size / GIGABYTE);
mp->bytes = (u_int32_t) (cache_size % GIGABYTE);
} else {
mp = R_ADDR(®info, reginfo.rp->primary);
dbenv->mp_ncache = mp->nreg;
if ((ret = __os_calloc(env,
mp->max_nreg, sizeof(REGINFO), &dbmp->reginfo)) != 0)
goto err;
for (i = 0; i < dbenv->mp_ncache; ++i)
dbmp->reginfo[i].id = INVALID_REGION_ID;
dbmp->reginfo[0] = reginfo;
regids = R_ADDR(dbmp->reginfo, mp->regids);
for (i = 1; i < dbenv->mp_ncache; ++i) {
dbmp->reginfo[i].env = env;
dbmp->reginfo[i].type = REGION_TYPE_MPOOL;
dbmp->reginfo[i].id = regids[i];
dbmp->reginfo[i].flags = REGION_JOIN_OK;
if ((ret = __env_region_attach(
env, &dbmp->reginfo[i], 0, 0)) != 0)
goto err;
}
}
for (i = 0; i < dbenv->mp_ncache; ++i) {
mp_i = dbmp->reginfo[i].primary =
R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary);
dbmp->reginfo[i].mtx_alloc = mp_i->mtx_region;
}
if ((ret = __mutex_alloc(env,
MTX_MPOOL_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbmp->mutex)) != 0)
goto err;
env->mp_handle = dbmp;
if ((ret = __memp_init_config(env, mp)) != 0)
return (ret);
return (0);
err: env->mp_handle = NULL;
if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) {
for (i = 0; i < dbenv->mp_ncache; ++i)
if (dbmp->reginfo[i].id != INVALID_REGION_ID)
(void)__env_region_detach(
env, &dbmp->reginfo[i], 0);
__os_free(env, dbmp->reginfo);
}
(void)__mutex_free(env, &dbmp->mutex);
__os_free(env, dbmp);
return (ret);
}
int
__memp_init(env, dbmp, reginfo_off, htab_buckets, max_nreg)
ENV *env;
DB_MPOOL *dbmp;
u_int reginfo_off, max_nreg;
u_int32_t htab_buckets;
{
BH *frozen_bhp;
BH_FROZEN_ALLOC *frozen;
DB_ENV *dbenv;
DB_MPOOL_HASH *htab, *hp;
MPOOL *mp, *main_mp;
REGINFO *infop;
db_mutex_t mtx_base, mtx_discard, mtx_prev;
u_int32_t i;
int ret;
void *p;
dbenv = env->dbenv;
infop = &dbmp->reginfo[reginfo_off];
if ((ret = __env_alloc(infop, sizeof(MPOOL), &infop->primary)) != 0)
goto mem_err;
infop->rp->primary = R_OFFSET(infop, infop->primary);
mp = infop->primary;
memset(mp, 0, sizeof(*mp));
if ((ret =
__mutex_alloc(env, MTX_MPOOL_REGION, 0, &mp->mtx_region)) != 0)
return (ret);
if (reginfo_off == 0) {
ZERO_LSN(mp->lsn);
mp->nreg = dbenv->mp_ncache;
mp->max_nreg = max_nreg;
if ((ret = __env_alloc(&dbmp->reginfo[0],
max_nreg * sizeof(u_int32_t), &p)) != 0)
goto mem_err;
mp->regids = R_OFFSET(dbmp->reginfo, p);
mp->nbuckets = dbenv->mp_ncache * htab_buckets;
if ((ret = __env_alloc(infop,
MPOOL_FILE_BUCKETS * sizeof(DB_MPOOL_HASH), &htab)) != 0)
goto mem_err;
mp->ftab = R_OFFSET(infop, htab);
for (i = 0; i < MPOOL_FILE_BUCKETS; i++) {
if ((ret = __mutex_alloc(env,
MTX_MPOOL_FILE_BUCKET, 0, &htab[i].mtx_hash)) != 0)
return (ret);
SH_TAILQ_INIT(&htab[i].hash_bucket);
atomic_init(&htab[i].hash_page_dirty, 0);
}
mtx_base = mtx_prev = MUTEX_INVALID;
if (!MUTEX_ON(env) || F_ISSET(env, ENV_PRIVATE))
goto no_prealloc;
for (i = 0; i < mp->max_nreg * dbenv->mp_mtxcount; i++) {
if ((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET,
DB_MUTEX_SHARED, &mtx_discard)) != 0)
return (ret);
if (i == 0)
mtx_base = mtx_discard;
else
DB_ASSERT(env, mtx_base == MUTEX_INVALID ||
mtx_discard == mtx_prev + 1);
mtx_prev = mtx_discard;
}
} else {
main_mp = dbmp->reginfo[0].primary;
htab = R_ADDR(&dbmp->reginfo[0], main_mp->htab);
mtx_base = htab[0].mtx_hash;
}
no_prealloc:
if (MUTEX_ON(env))
mtx_base += reginfo_off * dbenv->mp_mtxcount;
if ((ret = __env_alloc(infop,
htab_buckets * sizeof(DB_MPOOL_HASH), &htab)) != 0)
goto mem_err;
mp->htab = R_OFFSET(infop, htab);
for (i = 0; i < htab_buckets; i++) {
hp = &htab[i];
if (!MUTEX_ON(env) || dbenv->mp_mtxcount == 0)
hp->mtx_hash = MUTEX_INVALID;
else if (F_ISSET(env, ENV_PRIVATE)) {
if (i >= dbenv->mp_mtxcount)
hp->mtx_hash =
htab[i % dbenv->mp_mtxcount].mtx_hash;
else if
((ret = __mutex_alloc(env, MTX_MPOOL_HASH_BUCKET,
DB_MUTEX_SHARED, &hp->mtx_hash)) != 0)
return (ret);
} else
hp->mtx_hash = mtx_base + (i % dbenv->mp_mtxcount);
SH_TAILQ_INIT(&hp->hash_bucket);
atomic_init(&hp->hash_page_dirty, 0);
#ifdef HAVE_STATISTICS
hp->hash_io_wait = 0;
hp->hash_frozen = hp->hash_thawed = hp->hash_frozen_freed = 0;
#endif
hp->flags = 0;
ZERO_LSN(hp->old_reader);
}
mp->htab_buckets = htab_buckets;
mp->htab_mutexes = dbenv->mp_mtxcount;
mp->pagesize = dbenv->mp_pagesize == 0 ?
MPOOL_DEFAULT_PAGESIZE : dbenv->mp_pagesize;
SH_TAILQ_INIT(&mp->free_frozen);
SH_TAILQ_INIT(&mp->alloc_frozen);
if ((ret = __env_alloc(infop,
sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE), &frozen)) != 0)
goto mem_err;
SH_TAILQ_INSERT_TAIL(&mp->alloc_frozen, frozen, links);
frozen_bhp = (BH *)(frozen + 1);
frozen_bhp->mtx_buf = MUTEX_INVALID;
SH_TAILQ_INSERT_TAIL(&mp->free_frozen, frozen_bhp, hq);
mp->gbytes = dbenv->mp_gbytes;
mp->bytes = dbenv->mp_bytes;
infop->mtx_alloc = mp->mtx_region;
return (0);
mem_err:__db_errx(env, DB_STR("3026",
"Unable to allocate memory for mpool region"));
return (ret);
}
u_int32_t
__memp_max_regions(env)
ENV *env;
{
DB_ENV *dbenv;
roff_t reg_size, max_size;
size_t max_nreg;
dbenv = env->dbenv;
if (dbenv->mp_max_gbytes == 0 && dbenv->mp_max_bytes == 0)
return (dbenv->mp_ncache);
__memp_region_size(env, ®_size, NULL);
max_size =
(roff_t)dbenv->mp_max_gbytes * GIGABYTE + dbenv->mp_max_bytes;
max_nreg = (max_size + reg_size / 2) / reg_size;
DB_ASSERT(env, max_nreg == (u_int32_t)max_nreg);
if (max_nreg <= dbenv->mp_ncache)
max_nreg = dbenv->mp_ncache;
return ((u_int32_t)max_nreg);
}
static void
__memp_region_size(env, reg_sizep, htab_bucketsp)
ENV *env;
roff_t *reg_sizep;
u_int32_t *htab_bucketsp;
{
DB_ENV *dbenv;
roff_t reg_size, cache_size;
u_int32_t pgsize;
dbenv = env->dbenv;
cache_size = (roff_t)dbenv->mp_gbytes * GIGABYTE + dbenv->mp_bytes;
reg_size = cache_size / dbenv->mp_ncache;
if (reg_sizep != NULL)
*reg_sizep = reg_size;
if (htab_bucketsp != NULL) {
if (dbenv->mp_tablesize != 0)
*htab_bucketsp = __db_tablesize(dbenv->mp_tablesize);
else {
if ((pgsize = dbenv->mp_pagesize) == 0)
pgsize = MPOOL_DEFAULT_PAGESIZE;
*htab_bucketsp = __db_tablesize(
(u_int32_t)(reg_size / (2.5 * pgsize)));
}
}
}
u_int32_t
__memp_region_mutex_count(env)
ENV *env;
{
DB_ENV *dbenv;
u_int32_t htab_buckets;
roff_t reg_size;
u_int32_t max_region, num_per_cache, pgsize;
dbenv = env->dbenv;
__memp_region_size(env, ®_size, &htab_buckets);
if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION))
pgsize = sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE);
if ((pgsize = dbenv->mp_pagesize) == 0)
pgsize = MPOOL_DEFAULT_PAGESIZE;
max_region = __memp_max_regions(env);
if (dbenv->mp_mtxcount != 0)
htab_buckets = dbenv->mp_mtxcount;
else
dbenv->mp_mtxcount = htab_buckets;
num_per_cache = htab_buckets + (u_int32_t)(reg_size / pgsize);
return ((max_region * num_per_cache) + 50 + MPOOL_FILE_BUCKETS);
}
static int
__memp_init_config(env, mp)
ENV *env;
MPOOL *mp;
{
DB_ENV *dbenv;
dbenv = env->dbenv;
MPOOL_SYSTEM_LOCK(env);
if (dbenv->mp_mmapsize != 0)
mp->mp_mmapsize = (db_size_t)dbenv->mp_mmapsize;
if (dbenv->mp_maxopenfd != 0)
mp->mp_maxopenfd = dbenv->mp_maxopenfd;
if (dbenv->mp_maxwrite != 0)
mp->mp_maxwrite = dbenv->mp_maxwrite;
if (dbenv->mp_maxwrite_sleep != 0)
mp->mp_maxwrite_sleep = dbenv->mp_maxwrite_sleep;
MPOOL_SYSTEM_UNLOCK(env);
return (0);
}
int
__memp_env_refresh(env)
ENV *env;
{
BH *bhp;
BH_FROZEN_ALLOC *frozen_alloc;
DB_MPOOL *dbmp;
DB_MPOOLFILE *dbmfp;
DB_MPOOL_HASH *hp;
DB_MPREG *mpreg;
MPOOL *mp, *c_mp;
REGINFO *infop;
u_int32_t bucket, i, nreg;
int ret, t_ret;
ret = 0;
dbmp = env->mp_handle;
mp = dbmp->reginfo[0].primary;
nreg = mp->nreg;
hp = R_ADDR(&dbmp->reginfo[0], mp->htab);
if (!F_ISSET(env, ENV_PRIVATE))
goto not_priv;
for (i = 0; i < nreg; ++i) {
infop = &dbmp->reginfo[i];
c_mp = infop->primary;
for (hp = R_ADDR(infop, c_mp->htab), bucket = 0;
bucket < c_mp->htab_buckets; ++hp, ++bucket) {
while ((bhp = SH_TAILQ_FIRST(
&hp->hash_bucket, __bh)) != NULL)
if (F_ISSET(bhp, BH_FROZEN))
SH_TAILQ_REMOVE(
&hp->hash_bucket, bhp,
hq, __bh);
else {
if (F_ISSET(bhp, BH_DIRTY)) {
atomic_dec(env,
&hp->hash_page_dirty);
F_CLR(bhp,
BH_DIRTY | BH_DIRTY_CREATE);
}
atomic_inc(env, &bhp->ref);
if ((t_ret = __memp_bhfree(dbmp, infop,
R_ADDR(dbmp->reginfo,
bhp->mf_offset), hp, bhp,
BH_FREE_FREEMEM |
BH_FREE_UNLOCKED)) != 0 && ret == 0)
ret = t_ret;
}
}
MPOOL_REGION_LOCK(env, infop);
while ((frozen_alloc = SH_TAILQ_FIRST(
&c_mp->alloc_frozen, __bh_frozen_a)) != NULL) {
SH_TAILQ_REMOVE(&c_mp->alloc_frozen, frozen_alloc,
links, __bh_frozen_a);
__env_alloc_free(infop, frozen_alloc);
}
MPOOL_REGION_UNLOCK(env, infop);
}
not_priv:
while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
if ((t_ret = __memp_fclose(dbmfp, DB_FLUSH)) != 0 && ret == 0)
ret = t_ret;
if (dbmp->pg_inout != NULL)
__os_free(env, dbmp->pg_inout);
while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
LIST_REMOVE(mpreg, q);
__os_free(env, mpreg);
}
if ((t_ret = __mutex_free(env, &dbmp->mutex)) != 0 && ret == 0)
ret = t_ret;
if (F_ISSET(env, ENV_PRIVATE)) {
infop = &dbmp->reginfo[0];
infop->mtx_alloc = MUTEX_INVALID;
__memp_free(infop, R_ADDR(infop, mp->regids));
if ((t_ret = __memp_discard_all_mpfs(env, mp)) != 0 && ret == 0)
ret = t_ret;
__memp_free(infop, R_ADDR(infop, mp->ftab));
for (i = 0; i < nreg; ++i) {
infop = &dbmp->reginfo[i];
c_mp = infop->primary;
infop->mtx_alloc = MUTEX_INVALID;
__memp_free(infop, R_ADDR(infop, c_mp->htab));
}
}
for (i = 0; i < nreg; ++i) {
infop = &dbmp->reginfo[i];
if ((t_ret =
__env_region_detach(env, infop, 0)) != 0 && ret == 0)
ret = t_ret;
}
__os_free(env, dbmp->reginfo);
__os_free(env, dbmp);
env->mp_handle = NULL;
return (ret);
}