#include "db_config.h"
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/mp.h"
#include "dbinc/log.h"
#include "dbinc/txn.h"
static int __memp_pgwrite
__P((ENV *, DB_MPOOLFILE *, DB_MPOOL_HASH *, BH *));
int
__memp_bhwrite(dbmp, hp, mfp, bhp, open_extents)
DB_MPOOL *dbmp;
DB_MPOOL_HASH *hp;
MPOOLFILE *mfp;
BH *bhp;
int open_extents;
{
DB_MPOOLFILE *dbmfp;
DB_MPREG *mpreg;
ENV *env;
int opened, ret;
env = dbmp->env;
opened = 0;
if (mfp->deadfile)
return (__memp_pgwrite(env, NULL, hp, bhp));
MUTEX_LOCK(env, dbmp->mutex);
TAILQ_FOREACH(dbmfp, &dbmp->dbmfq, q)
if (dbmfp->mfp == mfp && !F_ISSET(dbmfp, MP_READONLY)) {
++dbmfp->ref;
break;
}
MUTEX_UNLOCK(env, dbmp->mutex);
if (dbmfp != NULL) {
if (dbmfp->fhp == NULL) {
if (mfp->no_backing_file) {
--dbmfp->ref;
return (EPERM);
}
MUTEX_LOCK(env, dbmp->mutex);
if (dbmfp->fhp == NULL) {
ret = __db_tmp_open(env,
F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ?
DB_OSO_DIRECT : 0, &dbmfp->fhp);
} else
ret = 0;
MUTEX_UNLOCK(env, dbmp->mutex);
if (ret != 0) {
__db_errx(env, DB_STR("3014",
"unable to create temporary backing file"));
--dbmfp->ref;
return (ret);
}
}
goto pgwrite;
}
if (!open_extents && F_ISSET(mfp, MP_EXTENT))
return (EPERM);
if (F_ISSET(mfp, MP_TEMP) || mfp->no_backing_file)
return (EPERM);
if (mfp->ftype != 0 && mfp->ftype != DB_FTYPE_SET) {
MUTEX_LOCK(env, dbmp->mutex);
LIST_FOREACH(mpreg, &dbmp->dbregq, q)
if (mpreg->ftype == mfp->ftype)
break;
MUTEX_UNLOCK(env, dbmp->mutex);
if (mpreg == NULL)
return (EPERM);
}
if ((ret = __memp_fcreate(env, &dbmfp)) != 0)
return (ret);
dbmfp->ref++;
opened = 1;
if ((ret = __memp_fopen(dbmfp, mfp, NULL,
NULL, DB_FLUSH | DB_DURABLE_UNKNOWN, 0, mfp->pagesize)) != 0) {
dbmfp->ref--;
(void)__memp_fclose(dbmfp, 0);
if (!mfp->deadfile)
return (ret);
dbmfp = NULL;
}
pgwrite:
MVCC_MPROTECT(bhp->buf, mfp->pagesize,
PROT_READ | PROT_WRITE | PROT_EXEC);
ret = __memp_pgwrite(env, dbmfp, hp, bhp);
if (dbmfp == NULL)
return (ret);
MUTEX_LOCK(env, dbmp->mutex);
if (!opened && dbmfp->ref == 1) {
if (!F_ISSET(dbmfp, MP_FLUSH)) {
F_SET(dbmfp, MP_FLUSH);
MUTEX_LOCK(env,dbmfp->mfp->mutex);
if (!F_ISSET(dbmfp, MP_FOR_FLUSH)) {
mfp->neutral_cnt++;
F_SET(dbmfp, MP_FOR_FLUSH);
}
MUTEX_UNLOCK(env, dbmfp->mfp->mutex);
}
} else
--dbmfp->ref;
MUTEX_UNLOCK(env, dbmp->mutex);
return (ret);
}
int
__memp_pgread(dbmfp, bhp, can_create)
DB_MPOOLFILE *dbmfp;
BH *bhp;
int can_create;
{
ENV *env;
MPOOLFILE *mfp;
size_t len, nr;
u_int32_t pagesize;
int ret;
env = dbmfp->env;
mfp = dbmfp->mfp;
pagesize = mfp->pagesize;
DB_ASSERT(env, !F_ISSET(bhp, BH_DIRTY_CREATE | BH_FROZEN));
DB_ASSERT(env, can_create ||
F_ISSET(bhp, BH_TRASH) || !F_ISSET(bhp, BH_DIRTY));
DB_ASSERT(env, F_ISSET(bhp, BH_EXCLUSIVE));
F_SET(bhp, BH_TRASH);
nr = 0;
if (dbmfp->fhp != NULL) {
PERFMON3(env, mpool, read, __memp_fn(dbmfp), bhp->pgno, bhp);
if ((ret = __os_io(env, DB_IO_READ, dbmfp->fhp,
bhp->pgno, pagesize, 0, pagesize, bhp->buf, &nr)) != 0)
goto err;
}
if (nr < pagesize) {
if (!can_create) {
ret = DB_PAGE_NOTFOUND;
goto err;
}
len = mfp->clear_len == DB_CLEARLEN_NOTSET ?
pagesize : mfp->clear_len;
memset(bhp->buf, 0, len);
#if defined(DIAGNOSTIC) || defined(UMRW)
if (len < pagesize)
memset(bhp->buf + len, CLEAR_BYTE, pagesize - len);
#endif
STAT_INC_VERB(env, mpool, page_create,
mfp->stat.st_page_create, __memp_fn(dbmfp), bhp->pgno);
} else
STAT_INC_VERB(env, mpool, page_in,
mfp->stat.st_page_in, __memp_fn(dbmfp), bhp->pgno);
ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp->pgno, bhp->buf, 1);
if (ret == 0)
F_CLR(bhp, BH_TRASH);
err: return (ret);
}
static int
__memp_pgwrite(env, dbmfp, hp, bhp)
ENV *env;
DB_MPOOLFILE *dbmfp;
DB_MPOOL_HASH *hp;
BH *bhp;
{
DB_LSN lsn;
MPOOLFILE *mfp;
size_t nw;
int ret;
void * buf;
if (!F_ISSET(bhp, BH_DIRTY))
return (0);
mfp = dbmfp == NULL ? NULL : dbmfp->mfp;
ret = 0;
buf = NULL;
DB_ASSERT(env, !F_ISSET(bhp, BH_FROZEN | BH_TRASH));
if (mfp == NULL || mfp->deadfile)
goto file_dead;
if (LOGGING_ON(env) && mfp->lsn_off != DB_LSN_OFF_NOTSET &&
!IS_CLIENT_PGRECOVER(env)) {
memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
if (!IS_NOT_LOGGED_LSN(lsn) &&
(ret = __log_flush(env, &lsn)) != 0)
goto err;
}
#ifdef DIAGNOSTIC
if (LOGGING_ON(env) && !IS_NOT_LOGGED_LSN(LSN(bhp->buf)) &&
!IS_CLIENT_PGRECOVER(env)) {
DB_LOG *dblp;
LOG *lp;
dblp = env->lg_handle;
lp = dblp->reginfo.primary;
if (!lp->db_log_inmemory &&
LOG_COMPARE(&lp->s_lsn, &LSN(bhp->buf)) <= 0) {
MUTEX_LOCK(env, lp->mtx_flush);
DB_ASSERT(env, F_ISSET(env->dbenv, DB_ENV_NOLOCKING) ||
LOG_COMPARE(&lp->s_lsn, &LSN(bhp->buf)) > 0);
MUTEX_UNLOCK(env, lp->mtx_flush);
}
}
#endif
#ifndef HAVE_ATOMICFILEREAD
if (mfp->backup_in_progress != 0) {
MUTEX_READLOCK(env, mfp->mtx_write);
if (bhp->pgno >= mfp->low_pgno && bhp->pgno <= mfp->high_pgno) {
MUTEX_UNLOCK(env, mfp->mtx_write);
ret = EAGAIN;
goto err;
}
atomic_inc(env, &mfp->writers);
MUTEX_UNLOCK(env, mfp->mtx_write);
} else
atomic_inc(env, &mfp->writers);
#endif
buf = bhp->buf;
if (mfp->ftype != 0) {
if (F_ISSET(bhp, BH_EXCLUSIVE))
F_SET(bhp, BH_TRASH);
else {
if ((ret = __os_malloc(env, mfp->pagesize, &buf)) != 0)
goto err;
memcpy(buf, bhp->buf, mfp->pagesize);
}
if ((ret = __memp_pg(dbmfp, bhp->pgno, buf, 0)) != 0)
goto err;
}
PERFMON3(env, mpool, write, __memp_fn(dbmfp), bhp->pgno, bhp);
if ((ret = __os_io(env, DB_IO_WRITE, dbmfp->fhp, bhp->pgno,
mfp->pagesize, 0, mfp->pagesize, buf, &nw)) != 0) {
#ifndef HAVE_ATOMICFILEREAD
atomic_dec(env, &mfp->writers);
#endif
__db_errx(env, DB_STR_A("3015",
"%s: write failed for page %lu", "%s %lu"),
__memp_fn(dbmfp), (u_long)bhp->pgno);
goto err;
}
#ifndef HAVE_ATOMICFILEREAD
atomic_dec(env, &mfp->writers);
#endif
STAT_INC_VERB(env, mpool, page_out,
mfp->stat.st_page_out, __memp_fn(dbmfp), bhp->pgno);
if (bhp->pgno > mfp->last_flushed_pgno) {
MUTEX_LOCK(env, mfp->mutex);
if (bhp->pgno > mfp->last_flushed_pgno)
mfp->last_flushed_pgno = bhp->pgno;
MUTEX_UNLOCK(env, mfp->mutex);
}
err:
file_dead:
if (buf != NULL && buf != bhp->buf)
__os_free(env, buf);
if (F_ISSET(bhp, BH_DIRTY | BH_TRASH)) {
MUTEX_LOCK(env, hp->mtx_hash);
DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc));
if (ret == 0 && F_ISSET(bhp, BH_DIRTY)) {
F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0);
atomic_dec(env, &hp->hash_page_dirty);
}
if ((ret != 0 || BH_REFCOUNT(bhp) > 1) &&
F_ISSET(bhp, BH_TRASH)) {
ret = __memp_pg(dbmfp, bhp->pgno, bhp->buf, 1);
F_CLR(bhp, BH_TRASH);
}
MUTEX_UNLOCK(env, hp->mtx_hash);
}
return (ret);
}
int
__memp_pg(dbmfp, pgno, buf, is_pgin)
DB_MPOOLFILE *dbmfp;
db_pgno_t pgno;
void *buf;
int is_pgin;
{
DBT dbt, *dbtp;
DB_MPOOL *dbmp;
DB_MPREG *mpreg;
ENV *env;
MPOOLFILE *mfp;
int ftype, ret;
env = dbmfp->env;
dbmp = env->mp_handle;
mfp = dbmfp->mfp;
if ((ftype = mfp->ftype) == DB_FTYPE_SET)
mpreg = dbmp->pg_inout;
else {
MUTEX_LOCK(env, dbmp->mutex);
LIST_FOREACH(mpreg, &dbmp->dbregq, q)
if (ftype == mpreg->ftype)
break;
MUTEX_UNLOCK(env, dbmp->mutex);
}
if (mpreg == NULL)
return (0);
if (mfp->pgcookie_len == 0)
dbtp = NULL;
else {
DB_SET_DBT(dbt, R_ADDR(
dbmp->reginfo, mfp->pgcookie_off), mfp->pgcookie_len);
dbtp = &dbt;
}
if (is_pgin) {
if (mpreg->pgin != NULL && (ret =
mpreg->pgin(env->dbenv, pgno, buf, dbtp)) != 0)
goto err;
} else
if (mpreg->pgout != NULL && (ret =
mpreg->pgout(env->dbenv, pgno, buf, dbtp)) != 0)
goto err;
return (0);
err: __db_errx(env, DB_STR_A("3016",
"%s: %s failed for page %lu", "%s %s %lu"), __memp_fn(dbmfp),
is_pgin ? DB_STR_P("pgin") : DB_STR_P("pgout"), (u_long)pgno);
return (ret);
}
int
__memp_bhfree(dbmp, infop, mfp, hp, bhp, flags)
DB_MPOOL *dbmp;
REGINFO *infop;
MPOOLFILE *mfp;
DB_MPOOL_HASH *hp;
BH *bhp;
u_int32_t flags;
{
ENV *env;
#ifdef DIAGNOSTIC
DB_LSN vlsn;
#endif
BH *prev_bhp;
MPOOL *c_mp;
int ret, t_ret;
#ifdef DIAG_MVCC
size_t pagesize;
#endif
ret = 0;
env = dbmp->env;
#ifdef DIAG_MVCC
if (mfp != NULL)
pagesize = mfp->pagesize;
#endif
DB_ASSERT(env, LF_ISSET(BH_FREE_UNLOCKED) ||
(hp != NULL && MUTEX_IS_OWNED(env, hp->mtx_hash)));
DB_ASSERT(env, BH_REFCOUNT(bhp) == 1 &&
!F_ISSET(bhp, BH_DIRTY | BH_FROZEN));
DB_ASSERT(env, LF_ISSET(BH_FREE_UNLOCKED) ||
SH_CHAIN_SINGLETON(bhp, vc) || (SH_CHAIN_HASNEXT(bhp, vc) &&
(SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off == bhp->td_off ||
bhp->td_off == INVALID_ROFF ||
IS_MAX_LSN(*VISIBLE_LSN(env, bhp)) ||
BH_OBSOLETE(bhp, hp->old_reader, vlsn))));
PERFMON3(env, mpool, evict, __memp_fns(dbmp, mfp), bhp->pgno, bhp);
if (hp == NULL)
goto no_hp;
prev_bhp = SH_CHAIN_PREV(bhp, vc, __bh);
if (!SH_CHAIN_HASNEXT(bhp, vc)) {
if (prev_bhp != NULL)
SH_TAILQ_INSERT_AFTER(&hp->hash_bucket,
bhp, prev_bhp, hq, __bh);
SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh);
}
SH_CHAIN_REMOVE(bhp, vc, __bh);
if (bhp->td_off != INVALID_ROFF && !LF_ISSET(BH_FREE_UNLOCKED)) {
ret = __txn_remove_buffer(
env, BH_OWNER(env, bhp), hp->mtx_hash);
bhp->td_off = INVALID_ROFF;
}
no_hp: if (mfp != NULL)
MVCC_MPROTECT(bhp->buf,
pagesize, PROT_READ | PROT_WRITE | PROT_EXEC);
if (!LF_ISSET(BH_FREE_UNLOCKED))
MUTEX_UNLOCK(env, hp->mtx_hash);
if (LF_ISSET(BH_FREE_REUSE))
return (ret);
if (!LF_ISSET(BH_FREE_UNLOCKED))
MUTEX_UNLOCK(env, bhp->mtx_buf);
if (LF_ISSET(BH_FREE_FREEMEM)) {
if ((ret = __mutex_free(env, &bhp->mtx_buf)) != 0)
return (ret);
MPOOL_REGION_LOCK(env, infop);
MVCC_BHUNALIGN(bhp);
__memp_free(infop, bhp);
c_mp = infop->primary;
c_mp->pages--;
MPOOL_REGION_UNLOCK(env, infop);
}
if (mfp == NULL)
return (ret);
MUTEX_LOCK(env, mfp->mutex);
if (--mfp->block_cnt == 0 && mfp->mpf_cnt == 0) {
if ((t_ret = __memp_mf_discard(dbmp, mfp, 0)) != 0 && ret == 0)
ret = t_ret;
} else
MUTEX_UNLOCK(env, mfp->mutex);
return (ret);
}