#include "db_config.h"
#include "db_int.h"
#include "dbinc/log.h"
#include "dbinc/mp.h"
#include "dbinc/txn.h"
static int __pgno_cmp __P((const void *, const void *));
int
__memp_bh_settxn(dbmp, mfp, bhp, vtd)
DB_MPOOL *dbmp;
MPOOLFILE *mfp;
BH *bhp;
void *vtd;
{
ENV *env;
TXN_DETAIL *td;
env = dbmp->env;
td = (TXN_DETAIL *)vtd;
if (td == NULL) {
__db_errx(env, DB_STR_A("3002",
"%s: non-transactional update to a multiversion file",
"%s"), __memp_fns(dbmp, mfp));
return (EINVAL);
}
if (bhp->td_off != INVALID_ROFF) {
DB_ASSERT(env, BH_OWNER(env, bhp) == td);
return (0);
}
bhp->td_off = R_OFFSET(&env->tx_handle->reginfo, td);
return (__txn_add_buffer(env, td));
}
int
__memp_skip_curadj(dbc, pgno)
DBC * dbc;
db_pgno_t pgno;
{
BH *bhp;
DB_MPOOL *dbmp;
DB_MPOOLFILE *dbmfp;
DB_MPOOL_HASH *hp;
DB_TXN *txn;
ENV *env;
MPOOLFILE *mfp;
REGINFO *infop;
roff_t mf_offset;
int ret, skip;
u_int32_t bucket;
env = dbc->env;
dbmp = env->mp_handle;
dbmfp = dbc->dbp->mpf;
mfp = dbmfp->mfp;
mf_offset = R_OFFSET(dbmp->reginfo, mfp);
skip = 0;
for (txn = dbc->txn; txn->parent != NULL; txn = txn->parent)
;
MP_GET_BUCKET(env, mfp, pgno, &infop, hp, bucket, ret);
if (ret != 0) {
(void)__env_panic(env, ret);
return (0);
}
SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) {
if (bhp->pgno != pgno || bhp->mf_offset != mf_offset)
continue;
if (!BH_OWNED_BY(env, bhp, txn))
skip = 1;
break;
}
MUTEX_UNLOCK(env, hp->mtx_hash);
return (skip);
}
#define DB_FREEZER_MAGIC 0x06102002
int
__memp_bh_freeze(dbmp, infop, hp, bhp, need_frozenp)
DB_MPOOL *dbmp;
REGINFO *infop;
DB_MPOOL_HASH *hp;
BH *bhp;
int *need_frozenp;
{
BH *frozen_bhp;
BH_FROZEN_ALLOC *frozen_alloc;
DB_FH *fhp;
ENV *env;
MPOOL *c_mp;
MPOOLFILE *mfp;
db_mutex_t mutex;
db_pgno_t maxpgno, newpgno, nextfree;
size_t nio;
int created, h_locked, ret, t_ret;
u_int32_t magic, nbucket, ncache, pagesize;
char filename[100], *real_name;
env = dbmp->env;
c_mp = infop->primary;
created = h_locked = ret = 0;
mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
pagesize = mfp->pagesize;
real_name = NULL;
fhp = NULL;
MVCC_MPROTECT(bhp->buf, pagesize, PROT_READ | PROT_WRITE);
MPOOL_REGION_LOCK(env, infop);
frozen_bhp = SH_TAILQ_FIRST(&c_mp->free_frozen, __bh);
if (frozen_bhp != NULL) {
SH_TAILQ_REMOVE(&c_mp->free_frozen, frozen_bhp, hq, __bh);
*need_frozenp = SH_TAILQ_EMPTY(&c_mp->free_frozen);
} else {
*need_frozenp = 1;
if (__env_alloc(infop,
sizeof(BH_FROZEN_ALLOC) + sizeof(BH_FROZEN_PAGE),
&frozen_alloc) == 0) {
frozen_bhp = (BH *)(frozen_alloc + 1);
frozen_bhp->mtx_buf = MUTEX_INVALID;
SH_TAILQ_INSERT_TAIL(&c_mp->alloc_frozen,
frozen_alloc, links);
}
}
MPOOL_REGION_UNLOCK(env, infop);
if (frozen_bhp == NULL) {
ret = ENOMEM;
goto err;
}
ncache = (u_int32_t)(infop - dbmp->reginfo);
nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab));
snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK",
(u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024);
if ((ret = __db_appname(env,
DB_APP_NONE, filename, NULL, &real_name)) != 0)
goto err;
MUTEX_LOCK(env, hp->mtx_hash);
h_locked = 1;
DB_ASSERT(env, F_ISSET(bhp, BH_EXCLUSIVE) && !F_ISSET(bhp, BH_FROZEN));
if (BH_REFCOUNT(bhp) > 1 || F_ISSET(bhp, BH_DIRTY)) {
ret = EBUSY;
goto err;
}
if ((ret = __os_open(env, real_name, pagesize,
DB_OSO_CREATE | DB_OSO_EXCL, env->db_mode, &fhp)) == 0) {
created = 1;
magic = DB_FREEZER_MAGIC;
maxpgno = newpgno = 0;
if ((ret = __os_write(env, fhp,
&magic, sizeof(u_int32_t), &nio)) != 0 ||
(ret = __os_write(env, fhp,
&newpgno, sizeof(db_pgno_t), &nio)) != 0 ||
(ret = __os_write(env, fhp,
&maxpgno, sizeof(db_pgno_t), &nio)) != 0 ||
(ret = __os_seek(env, fhp, 0, 0, 0)) != 0)
goto err;
} else if (ret == EEXIST)
ret = __os_open(env,
real_name, pagesize, 0, env->db_mode, &fhp);
if (ret != 0)
goto err;
if ((ret = __os_read(env, fhp,
&magic, sizeof(u_int32_t), &nio)) != 0 ||
(ret = __os_read(env, fhp,
&newpgno, sizeof(db_pgno_t), &nio)) != 0 ||
(ret = __os_read(env, fhp,
&maxpgno, sizeof(db_pgno_t), &nio)) != 0)
goto err;
if (magic != DB_FREEZER_MAGIC) {
ret = EINVAL;
goto err;
}
if (newpgno == 0) {
newpgno = ++maxpgno;
if ((ret = __os_seek(env,
fhp, 0, 0, sizeof(u_int32_t) + sizeof(db_pgno_t))) != 0 ||
(ret = __os_write(env, fhp, &maxpgno, sizeof(db_pgno_t),
&nio)) != 0)
goto err;
} else {
if ((ret = __os_seek(env, fhp, newpgno, pagesize, 0)) != 0 ||
(ret = __os_read(env, fhp, &nextfree, sizeof(db_pgno_t),
&nio)) != 0)
goto err;
if ((ret =
__os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
(ret = __os_write(env, fhp, &nextfree, sizeof(db_pgno_t),
&nio)) != 0)
goto err;
}
if ((ret = __os_io(env, DB_IO_WRITE, fhp, newpgno, pagesize, 0,
pagesize, bhp->buf, &nio)) != 0)
goto err;
ret = __os_closehandle(env, fhp);
fhp = NULL;
if (ret != 0)
goto err;
mutex = frozen_bhp->mtx_buf;
#ifdef DIAG_MVCC
memcpy(frozen_bhp, bhp, SSZ(BH, align_off));
#else
memcpy(frozen_bhp, bhp, SSZA(BH, buf));
#endif
atomic_init(&frozen_bhp->ref, 0);
if (mutex != MUTEX_INVALID)
frozen_bhp->mtx_buf = mutex;
else if ((ret = __mutex_alloc(env, MTX_MPOOL_BH,
DB_MUTEX_SHARED, &frozen_bhp->mtx_buf)) != 0)
goto err;
F_SET(frozen_bhp, BH_FROZEN);
F_CLR(frozen_bhp, BH_EXCLUSIVE);
((BH_FROZEN_PAGE *)frozen_bhp)->spgno = newpgno;
if (frozen_bhp->td_off != INVALID_ROFF &&
(ret = __txn_add_buffer(env, BH_OWNER(env, frozen_bhp))) != 0) {
(void)__env_panic(env, ret);
goto err;
}
STAT_INC(env, mpool, freeze, hp->hash_frozen, bhp->pgno);
SH_CHAIN_INSERT_AFTER(bhp, frozen_bhp, vc, __bh);
if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) {
SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket,
bhp, frozen_bhp, hq, __bh);
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
}
MUTEX_UNLOCK(env, hp->mtx_hash);
h_locked = 0;
MUTEX_LOCK(env, mfp->mutex);
++mfp->block_cnt;
MUTEX_UNLOCK(env, mfp->mutex);
if (0) {
err: if (fhp != NULL &&
(t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
ret = t_ret;
if (created) {
DB_ASSERT(env, h_locked);
if ((t_ret = __os_unlink(env, real_name, 0)) != 0 &&
ret == 0)
ret = t_ret;
}
if (h_locked)
MUTEX_UNLOCK(env, hp->mtx_hash);
if (ret == 0)
ret = EIO;
if (frozen_bhp != NULL) {
MPOOL_REGION_LOCK(env, infop);
SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen,
frozen_bhp, hq);
MPOOL_REGION_UNLOCK(env, infop);
}
}
if (real_name != NULL)
__os_free(env, real_name);
if (ret != 0 && ret != EBUSY && ret != ENOMEM)
__db_err(env, ret, "__memp_bh_freeze");
return (ret);
}
static int
__pgno_cmp(a, b)
const void *a, *b;
{
db_pgno_t *ap, *bp;
ap = (db_pgno_t *)a;
bp = (db_pgno_t *)b;
return (int)(*ap - *bp);
}
int
__memp_bh_thaw(dbmp, infop, hp, frozen_bhp, alloc_bhp)
DB_MPOOL *dbmp;
REGINFO *infop;
DB_MPOOL_HASH *hp;
BH *frozen_bhp, *alloc_bhp;
{
DB_FH *fhp;
ENV *env;
#ifdef DIAGNOSTIC
DB_LSN vlsn;
#endif
MPOOL *c_mp;
MPOOLFILE *mfp;
db_mutex_t mutex;
db_pgno_t *freelist, *ppgno, freepgno, maxpgno, spgno;
size_t nio;
u_int32_t listsize, magic, nbucket, ncache, ntrunc, nfree, pagesize;
#ifdef HAVE_FTRUNCATE
int i;
#endif
int h_locked, needfree, ret, t_ret;
char filename[100], *real_name;
env = dbmp->env;
fhp = NULL;
c_mp = infop->primary;
mfp = R_ADDR(dbmp->reginfo, frozen_bhp->mf_offset);
freelist = NULL;
pagesize = mfp->pagesize;
ret = 0;
real_name = NULL;
MUTEX_REQUIRED(env, hp->mtx_hash);
DB_ASSERT(env, F_ISSET(frozen_bhp, BH_EXCLUSIVE) || alloc_bhp == NULL);
h_locked = 1;
DB_ASSERT(env, F_ISSET(frozen_bhp, BH_FROZEN) &&
!F_ISSET(frozen_bhp, BH_THAWED));
DB_ASSERT(env, alloc_bhp != NULL ||
SH_CHAIN_SINGLETON(frozen_bhp, vc) ||
(SH_CHAIN_HASNEXT(frozen_bhp, vc) &&
BH_OBSOLETE(frozen_bhp, hp->old_reader, vlsn)));
DB_ASSERT(env, alloc_bhp == NULL || !F_ISSET(alloc_bhp, BH_FROZEN));
spgno = ((BH_FROZEN_PAGE *)frozen_bhp)->spgno;
if (alloc_bhp != NULL) {
mutex = alloc_bhp->mtx_buf;
#ifdef DIAG_MVCC
memcpy(alloc_bhp, frozen_bhp, SSZ(BH, align_off));
#else
memcpy(alloc_bhp, frozen_bhp, SSZA(BH, buf));
#endif
alloc_bhp->mtx_buf = mutex;
MUTEX_LOCK(env, alloc_bhp->mtx_buf);
atomic_init(&alloc_bhp->ref, 1);
F_CLR(alloc_bhp, BH_FROZEN);
}
ncache = (u_int32_t)(infop - dbmp->reginfo);
nbucket = (u_int32_t)(hp - (DB_MPOOL_HASH *)R_ADDR(infop, c_mp->htab));
snprintf(filename, sizeof(filename), "__db.freezer.%lu.%lu.%luK",
(u_long)ncache, (u_long)nbucket, (u_long)pagesize / 1024);
if ((ret = __db_appname(env,
DB_APP_NONE, filename, NULL, &real_name)) != 0)
goto err;
if ((ret = __os_open(env,
real_name, pagesize, 0, env->db_mode, &fhp)) != 0)
goto err;
if ((ret = __os_read(env, fhp, &magic, sizeof(u_int32_t), &nio)) != 0 ||
(ret =
__os_read(env, fhp, &freepgno, sizeof(db_pgno_t), &nio)) != 0 ||
(ret = __os_read(env, fhp, &maxpgno, sizeof(db_pgno_t), &nio)) != 0)
goto err;
if (magic != DB_FREEZER_MAGIC) {
ret = EINVAL;
goto err;
}
if (alloc_bhp != NULL) {
DB_ASSERT(env, !F_ISSET(frozen_bhp, BH_FREED));
if ((ret = __os_io(env, DB_IO_READ, fhp,
spgno, pagesize, 0, pagesize, alloc_bhp->buf, &nio)) != 0)
goto err;
}
needfree = 1;
if (spgno == maxpgno) {
listsize = 100;
if ((ret = __os_malloc(env,
listsize * sizeof(db_pgno_t), &freelist)) != 0)
goto err;
nfree = 0;
while (freepgno != 0) {
if (nfree == listsize - 1) {
listsize *= 2;
if ((ret = __os_realloc(env,
listsize * sizeof(db_pgno_t),
&freelist)) != 0)
goto err;
}
freelist[nfree++] = freepgno;
if ((ret = __os_seek(env, fhp,
freepgno, pagesize, 0)) != 0 ||
(ret = __os_read(env, fhp, &freepgno,
sizeof(db_pgno_t), &nio)) != 0)
goto err;
}
freelist[nfree++] = spgno;
qsort(freelist, nfree, sizeof(db_pgno_t), __pgno_cmp);
for (ppgno = &freelist[nfree - 1]; ppgno > freelist; ppgno--)
if (*(ppgno - 1) != *ppgno - 1)
break;
ntrunc = (u_int32_t)(&freelist[nfree] - ppgno);
if (ntrunc == (u_int32_t)maxpgno) {
needfree = 0;
ret = __os_closehandle(env, fhp);
fhp = NULL;
if (ret != 0 ||
(ret = __os_unlink(env, real_name, 0)) != 0)
goto err;
}
#ifdef HAVE_FTRUNCATE
else {
maxpgno -= (db_pgno_t)ntrunc;
if ((ret = __os_truncate(env, fhp,
maxpgno + 1, pagesize)) != 0)
goto err;
freelist[nfree - ntrunc] = 0;
if ((ret = __os_seek(env, fhp,
0, 0, sizeof(u_int32_t))) != 0 ||
(ret = __os_write(env, fhp, &freelist[0],
sizeof(db_pgno_t), &nio)) != 0 ||
(ret = __os_write(env, fhp, &maxpgno,
sizeof(db_pgno_t), &nio)) != 0)
goto err;
for (i = 0; i < (int)(nfree - ntrunc); i++)
if ((ret = __os_seek(env,
fhp, freelist[i], pagesize, 0)) != 0 ||
(ret = __os_write(env, fhp,
&freelist[i + 1], sizeof(db_pgno_t),
&nio)) != 0)
goto err;
needfree = 0;
}
#endif
}
if (needfree) {
if ((ret = __os_seek(env, fhp, spgno, pagesize, 0)) != 0 ||
(ret = __os_write(env, fhp,
&freepgno, sizeof(db_pgno_t), &nio)) != 0 ||
(ret = __os_seek(env, fhp, 0, 0, sizeof(u_int32_t))) != 0 ||
(ret = __os_write(env, fhp,
&spgno, sizeof(db_pgno_t), &nio)) != 0)
goto err;
ret = __os_closehandle(env, fhp);
fhp = NULL;
if (ret != 0)
goto err;
}
MUTEX_REQUIRED(env, hp->mtx_hash);
if (alloc_bhp != NULL) {
alloc_bhp->priority = c_mp->lru_priority;
SH_CHAIN_INSERT_AFTER(frozen_bhp, alloc_bhp, vc, __bh);
if (!SH_CHAIN_HASNEXT(alloc_bhp, vc)) {
SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp,
alloc_bhp, hq, __bh);
SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh);
}
} else if (!SH_CHAIN_HASNEXT(frozen_bhp, vc)) {
if (SH_CHAIN_HASPREV(frozen_bhp, vc))
SH_TAILQ_INSERT_BEFORE(&hp->hash_bucket, frozen_bhp,
SH_CHAIN_PREV(frozen_bhp, vc, __bh), hq, __bh);
SH_TAILQ_REMOVE(&hp->hash_bucket, frozen_bhp, hq, __bh);
}
SH_CHAIN_REMOVE(frozen_bhp, vc, __bh);
if (alloc_bhp == NULL && frozen_bhp->td_off != INVALID_ROFF &&
(ret = __txn_remove_buffer(env,
BH_OWNER(env, frozen_bhp), MUTEX_INVALID)) != 0) {
(void)__env_panic(env, ret);
goto err;
}
frozen_bhp->td_off = INVALID_ROFF;
needfree = (atomic_dec(env, &frozen_bhp->ref) == 0);
if (!needfree)
F_SET(frozen_bhp, BH_THAWED);
MUTEX_UNLOCK(env, hp->mtx_hash);
if (F_ISSET(frozen_bhp, BH_EXCLUSIVE))
MUTEX_UNLOCK(env, frozen_bhp->mtx_buf);
h_locked = 0;
if (needfree) {
MPOOL_REGION_LOCK(env, infop);
SH_TAILQ_INSERT_TAIL(&c_mp->free_frozen, frozen_bhp, hq);
MPOOL_REGION_UNLOCK(env, infop);
}
#ifdef HAVE_STATISTICS
if (alloc_bhp != NULL)
STAT_INC_VERB(env, mpool, thaw,
hp->hash_thawed, __memp_fns(dbmp, mfp), frozen_bhp->pgno);
else
STAT_INC_VERB(env, mpool, free_frozen, hp->hash_frozen_freed,
__memp_fns(dbmp, mfp), frozen_bhp->pgno);
#endif
if (0) {
err: if (h_locked)
MUTEX_UNLOCK(env, hp->mtx_hash);
if (ret == 0)
ret = EIO;
}
if (real_name != NULL)
__os_free(env, real_name);
if (freelist != NULL)
__os_free(env, freelist);
if (fhp != NULL &&
(t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0)
ret = t_ret;
if (ret != 0)
__db_err(env, ret, "__memp_bh_thaw");
return (ret);
}