#include "db_config.h"
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/btree.h"
#include "dbinc/lock.h"
#include "dbinc/mp.h"
int
__bam_get_root(dbc, root_pgno, slevel, flags, stack)
DBC *dbc;
db_pgno_t root_pgno;
int slevel;
u_int32_t flags;
int *stack;
{
BTREE_CURSOR *cp;
DB *dbp;
DB_LOCK lock;
DB_MPOOLFILE *mpf;
PAGE *h;
db_lockmode_t lock_mode;
u_int32_t get_mode;
int ret, t_ret;
COMPQUIET(h, NULL);
LOCK_INIT(lock);
dbp = dbc->dbp;
mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
try_again:
*stack = LF_ISSET(SR_STACK) &&
(dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM));
lock_mode = DB_LOCK_READ;
if (*stack ||
LF_ISSET(SR_DEL) || (LF_ISSET(SR_NEXT) && LF_ISSET(SR_WRITE)))
lock_mode = DB_LOCK_WRITE;
retry: if (lock_mode == DB_LOCK_WRITE)
get_mode = DB_MPOOL_DIRTY;
else if (LOCK_ISSET(lock) || !STD_LOCKING(dbc) ||
F_ISSET(dbc, DBC_DOWNREV) ||
dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM))
get_mode = 0;
else
get_mode = DB_MPOOL_TRY;
BAM_GET_ROOT(dbc, root_pgno, h, get_mode, lock_mode, lock, ret);
if (ret == DB_LOCK_NOTGRANTED && get_mode == DB_MPOOL_TRY) {
DB_ASSERT(dbp->env, !LOCK_ISSET(lock));
if ((ret = __db_lget(dbc, 0,
root_pgno == PGNO_INVALID ? BAM_ROOT_PGNO(dbc) : root_pgno,
lock_mode, 0, &lock)) != 0)
return (ret);
goto retry;
}
if (ret != 0) {
(void)__LPUT(dbc, lock);
return (ret);
}
DB_ASSERT(dbp->env, TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO ||
TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO || TYPE(h) == P_LDUP);
if (!*stack &&
((LF_ISSET(SR_PARENT) && (u_int8_t)(slevel + 1) >= LEVEL(h)) ||
LEVEL(h) == LEAFLEVEL ||
(LF_ISSET(SR_START) && slevel == LEVEL(h)))) {
*stack = 1;
if (dbc->dbtype == DB_RECNO || F_ISSET(cp, C_RECNUM)) {
if (lock_mode == DB_LOCK_WRITE)
goto done;
if ((ret = __LPUT(dbc, lock)) != 0)
return (ret);
}
if (LEVEL(h) != LEAFLEVEL || LF_ISSET(SR_WRITE)) {
lock_mode = DB_LOCK_WRITE;
if ((ret = __LPUT(dbc, lock)) != 0)
return (ret);
} else if (LOCK_ISSET(lock))
goto done;
if (!STD_LOCKING(dbc)) {
if (lock_mode != DB_LOCK_WRITE)
goto done;
if ((ret = __memp_dirty(mpf, &h, dbc->thread_info,
dbc->txn, dbc->priority, 0)) != 0) {
if (h != NULL)
(void)__memp_fput(mpf,
dbc->thread_info, h, dbc->priority);
return (ret);
}
} else {
if ((ret = __db_lget(dbc, 0, root_pgno,
lock_mode, DB_LOCK_NOWAIT, &lock)) == 0) {
if (lock_mode == DB_LOCK_WRITE && (ret =
__memp_dirty(mpf, &h, dbc->thread_info,
dbc->txn, dbc->priority, 0)) != 0) {
if (h != NULL)
(void)__memp_fput(mpf,
dbc->thread_info, h,
dbc->priority);
return (ret);
}
goto done;
}
t_ret = __memp_fput(mpf,
dbc->thread_info, h, dbc->priority);
h = NULL;
if (ret == DB_LOCK_DEADLOCK ||
ret == DB_LOCK_NOTGRANTED)
ret = 0;
if (ret == 0)
ret = t_ret;
if (ret != 0)
return (ret);
get_mode = 0;
if (lock_mode == DB_LOCK_WRITE)
get_mode = DB_MPOOL_DIRTY;
if ((ret = __db_lget(dbc,
0, root_pgno, lock_mode, 0, &lock)) != 0)
return (ret);
if ((ret = __memp_fget(mpf,
&root_pgno, dbc->thread_info, dbc->txn,
(atomic_read(&mpf->mfp->multiversion) == 0 &&
lock_mode == DB_LOCK_WRITE) ? DB_MPOOL_DIRTY : 0,
&h)) != 0) {
(void)__LPUT(dbc, lock);
return (ret);
}
}
if (!((LF_ISSET(SR_PARENT) &&
(u_int8_t)(slevel + 1) >= LEVEL(h)) ||
LEVEL(h) == LEAFLEVEL ||
(LF_ISSET(SR_START) && slevel == LEVEL(h)))) {
ret = __memp_fput(mpf,
dbc->thread_info, h, dbc->priority);
h = NULL;
if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
ret = t_ret;
if (ret != 0)
return (ret);
goto try_again;
} else if (atomic_read(&mpf->mfp->multiversion) != 0 &&
lock_mode == DB_LOCK_WRITE && (ret = __memp_dirty(mpf, &h,
dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) {
(void)__memp_fput(mpf,
dbc->thread_info, h, dbc->priority);
(void)__LPUT(dbc, lock);
}
}
done: BT_STK_ENTER(dbp->env, cp, h, 0, lock, lock_mode, ret);
return (ret);
}
int
__bam_search(dbc, root_pgno, key, flags, slevel, recnop, exactp)
DBC *dbc;
db_pgno_t root_pgno;
const DBT *key;
u_int32_t flags;
int slevel, *exactp;
db_recno_t *recnop;
{
BTREE *t;
BTREE_CURSOR *cp;
DB *dbp;
DB_LOCK lock, saved_lock;
DB_MPOOLFILE *mpf;
ENV *env;
PAGE *h, *parent_h;
db_indx_t base, i, indx, *inp, lim;
db_lockmode_t lock_mode;
db_pgno_t pg, saved_pg, start_pgno;
db_recno_t recno;
int adjust, cmp, deloffset, ret, set_stack, stack, t_ret;
int getlock, was_next;
int (*func) __P((DB *, const DBT *, const DBT *));
u_int32_t get_mode, wait;
u_int8_t level, saved_level;
if (F_ISSET(dbc, DBC_OPD))
LOCK_CHECK_OFF(dbc->thread_info);
dbp = dbc->dbp;
env = dbp->env;
mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
h = NULL;
parent_h = NULL;
t = dbp->bt_internal;
recno = 0;
t_ret = 0;
BT_STK_CLR(cp);
LOCK_INIT(saved_lock);
LOCK_INIT(lock);
was_next = LF_ISSET(SR_NEXT);
wait = DB_LOCK_NOWAIT;
start_pgno = saved_pg = root_pgno;
saved_level = MAXBTREELEVEL;
retry: if ((ret = __bam_get_root(dbc, start_pgno, slevel, flags, &stack)) != 0)
goto err;
lock_mode = cp->csp->lock_mode;
get_mode = lock_mode == DB_LOCK_WRITE ? DB_MPOOL_DIRTY : 0;
h = cp->csp->page;
root_pgno = pg = PGNO(h);
lock = cp->csp->lock;
set_stack = stack;
if (F_ISSET(dbc, DBC_DOWNREV)) {
getlock = 1;
wait = 0;
} else
getlock = F_ISSET(cp, C_RECNUM) ||
(lock_mode == DB_LOCK_WRITE &&
(stack || LF_ISSET(SR_NEXT | SR_DEL)));
if (!LF_ISSET(SR_START) && LEVEL(h) < slevel)
goto done;
BT_STK_CLR(cp);
func = F_ISSET(dbc, DBC_OPD) ?
(dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare) :
t->bt_compare;
for (;;) {
if (TYPE(h) == P_LBTREE)
adjust = P_INDX;
else {
if (TYPE(h) != P_LDUP && NUM_ENT(h) == 0) {
getlock = 1;
level = LEVEL(h) + 1;
if ((ret = __memp_fput(mpf, dbc->thread_info,
h, dbc->priority)) != 0)
goto err;
goto lock_next;
}
adjust = O_INDX;
}
inp = P_INP(dbp, h);
if (LF_ISSET(SR_MIN | SR_MAX)) {
if (LF_ISSET(SR_MIN) || NUM_ENT(h) == 0)
indx = 0;
else if (TYPE(h) == P_LBTREE)
indx = NUM_ENT(h) - 2;
else
indx = NUM_ENT(h) - 1;
if (LEVEL(h) == LEAFLEVEL ||
(!LF_ISSET(SR_START) && LEVEL(h) == slevel)) {
if (LF_ISSET(SR_NEXT))
goto get_next;
goto found;
}
goto next;
}
DB_BINARY_SEARCH_FOR(base, lim, NUM_ENT(h), adjust) {
DB_BINARY_SEARCH_INCR(indx, base, lim, adjust);
if ((ret = __bam_cmp(dbc, key, h, indx,
func, &cmp)) != 0)
goto err;
if (cmp == 0) {
if (LEVEL(h) == LEAFLEVEL ||
(!LF_ISSET(SR_START) &&
LEVEL(h) == slevel)) {
if (LF_ISSET(SR_NEXT))
goto get_next;
goto found;
}
goto next;
}
if (cmp > 0)
DB_BINARY_SEARCH_SHIFT_BASE(indx, base,
lim, adjust);
}
if (LEVEL(h) == LEAFLEVEL ||
(!LF_ISSET(SR_START) && LEVEL(h) == slevel)) {
*exactp = 0;
if (LF_ISSET(SR_EXACT)) {
ret = DB_NOTFOUND;
goto err;
}
if (LF_ISSET(SR_STK_ONLY)) {
BT_STK_NUM(env, cp, h, base, ret);
if ((t_ret =
__LPUT(dbc, lock)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __memp_fput(mpf, dbc->thread_info,
h, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
h = NULL;
if (ret != 0)
goto err;
goto done;
}
if (LF_ISSET(SR_NEXT)) {
get_next:
if (PGNO(h) == root_pgno) {
ret = DB_NOTFOUND;
goto err;
}
indx = cp->sp->indx + 1;
if (indx == NUM_ENT(cp->sp->page)) {
ret = DB_NOTFOUND;
cp->csp++;
goto err;
}
LF_SET(SR_MIN);
LF_CLR(SR_NEXT);
set_stack = stack = 1;
if (LF_ISSET(SR_BOTH)) {
cp->csp++;
BT_STK_PUSH(env,
cp, h, indx, lock, lock_mode, ret);
if (ret != 0)
goto err;
LOCK_INIT(lock);
h = cp->sp->page;
pg = GET_BINTERNAL(dbp, h, indx)->pgno;
level = LEVEL(h);
h = NULL;
goto lock_next;
} else {
if ((ret = __LPUT(dbc, lock)) != 0)
goto err;
if ((ret = __memp_fput(mpf,
dbc->thread_info,
h, dbc->priority)) != 0)
goto err;
h = cp->sp->page;
cp->sp->page = NULL;
lock = cp->sp->lock;
LOCK_INIT(cp->sp->lock);
if ((ret = __bam_stkrel(dbc,
STK_NOLOCK)) != 0)
goto err;
goto next;
}
}
if (LF_ISSET(SR_DEL) && cp->csp == cp->sp)
cp->csp++;
BT_STK_ENTER(env, cp, h, base, lock, lock_mode, ret);
if (ret != 0)
goto err;
goto done;
}
indx = base > 0 ? base - O_INDX : base;
next: if (recnop != NULL)
for (i = 0; i < indx; ++i)
recno += GET_BINTERNAL(dbp, h, i)->nrecs;
pg = GET_BINTERNAL(dbp, h, indx)->pgno;
level = LEVEL(h);
if (LF_ISSET(SR_START) && slevel == level)
set_stack = stack = 1;
if (LF_ISSET(SR_STK_ONLY)) {
if (slevel == LEVEL(h)) {
BT_STK_NUM(env, cp, h, indx, ret);
if ((t_ret = __memp_fput(mpf, dbc->thread_info,
h, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
h = NULL;
if (ret != 0)
goto err;
goto done;
}
BT_STK_NUMPUSH(env, cp, h, indx, ret);
(void)__memp_fput(mpf,
dbc->thread_info, h, dbc->priority);
h = NULL;
} else if (stack) {
if (LF_ISSET(SR_PARENT) && slevel == level) {
BT_STK_ENTER(env,
cp, h, indx, lock, lock_mode, ret);
if (ret != 0)
goto err;
goto done;
}
if (LF_ISSET(SR_DEL) && NUM_ENT(h) > 1) {
cp->csp--;
if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0)
goto err;
set_stack = stack = 0;
goto do_del;
}
BT_STK_PUSH(env,
cp, h, indx, lock, lock_mode, ret);
if (ret != 0)
goto err;
LOCK_INIT(lock);
get_mode = DB_MPOOL_DIRTY;
lock_mode = DB_LOCK_WRITE;
getlock = 1;
goto lock_next;
} else {
if ((LF_ISSET(SR_PARENT) &&
(u_int8_t)(slevel + 1) >= (level - 1)) ||
(level - 1) == LEAFLEVEL)
set_stack = 1;
if (!LF_ISSET(SR_DEL | SR_NEXT)) {
parent_h = h;
goto lock_next;
}
if ((LF_ISSET(SR_DEL) && NUM_ENT(h) == 1)) {
stack = set_stack = 1;
LF_SET(SR_WRITE);
cp->csp++;
BT_STK_PUSH(env, cp, h,
indx, lock, DB_LOCK_NG, ret);
if (ret != 0)
goto err;
LOCK_INIT(lock);
} else {
do_del: if (cp->csp->page != NULL) {
if (LF_ISSET(SR_NEXT) &&
indx == NUM_ENT(h) - 1)
cp->csp++;
else if ((ret =
__bam_stkrel(dbc, STK_NOLOCK)) != 0)
goto err;
}
BT_STK_ENTER(env, cp,
h, indx, lock, lock_mode, ret);
if (ret != 0)
goto err;
LOCK_INIT(lock);
}
lock_next: h = NULL;
if (set_stack && LF_ISSET(SR_WRITE)) {
lock_mode = DB_LOCK_WRITE;
get_mode = DB_MPOOL_DIRTY;
getlock = 1;
}
if (level - 1 == saved_level) {
if ((ret = __LPUT(dbc, lock)) != 0)
goto err;
lock = saved_lock;
LOCK_INIT(saved_lock);
saved_level = MAXBTREELEVEL;
if (pg == saved_pg)
goto skip_lock;
}
if ((getlock || level - 1 == LEAFLEVEL) &&
(ret = __db_lget(dbc, LCK_COUPLE_ALWAYS,
pg, lock_mode, wait, &lock)) != 0) {
if (LF_ISSET(SR_DEL | SR_NEXT) && !stack)
cp->csp++;
PERFMON6(env, race, bam_search, dbp->fname,
dbp->dname, ret, h, parent_h, flags);
if ((t_ret = __LPUT(dbc, lock)) != 0)
ret = t_ret;
if ((t_ret = __LPUT(dbc, saved_lock)) != 0 &&
ret == 0)
ret = t_ret;
if (wait == 0 || (ret != DB_LOCK_NOTGRANTED &&
ret != DB_LOCK_DEADLOCK))
goto err;
if (parent_h != NULL &&
(ret = __memp_fput(mpf, dbc->thread_info,
parent_h, dbc->priority)) != 0)
goto err;
parent_h = NULL;
BT_STK_POP(cp);
if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0)
goto err;
if ((ret = __db_lget(dbc,
0, pg, lock_mode, 0, &saved_lock)) != 0)
goto err;
if (level - 1 > LEAFLEVEL &&
(F_ISSET(dbp, DB_AM_SUBDB) ||
(dbp->type == DB_BTREE &&
F_ISSET(dbp, DB_AM_DUPSORT))))
goto drop_lock;
if ((ret = __memp_fget(mpf, &pg,
dbc->thread_info, dbc->txn, 0, &h)) != 0 &&
ret != DB_PAGE_NOTFOUND)
goto err;
if (ret != 0 || LEVEL(h) != level - 1 ||
(LEVEL(h) == LEAFLEVEL ?
TYPE(h) != (dbc->dbtype == DB_BTREE ?
P_LBTREE : P_LRECNO) :
TYPE(h) != (dbc->dbtype == DB_BTREE ?
P_IBTREE : P_IRECNO))) {
drop_lock: ret = __LPUT(dbc, saved_lock);
if (ret != 0)
goto err;
pg = root_pgno;
saved_level = MAXBTREELEVEL;
}
if (h != NULL && (ret = __memp_fput(mpf,
dbc->thread_info, h, dbc->priority)) != 0)
goto err;
h = NULL;
if (was_next) {
LF_CLR(SR_MIN);
LF_SET(SR_NEXT);
}
saved_pg = pg;
saved_level = level - 1;
goto retry;
}
skip_lock: stack = set_stack;
}
if ((ret = __memp_fget(mpf, &pg,
dbc->thread_info, dbc->txn, get_mode, &h)) != 0)
goto err;
if (parent_h != NULL && (ret = __memp_fput(mpf,
dbc->thread_info, parent_h, dbc->priority)) != 0)
goto err;
parent_h = NULL;
}
found: *exactp = 1;
if (TYPE(h) == P_LBTREE && NUM_ENT(h) > P_INDX) {
if (LF_ISSET(SR_DUPLAST))
while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
inp[indx] == inp[indx + P_INDX])
indx += P_INDX;
else if (LF_ISSET(SR_DUPFIRST))
while (indx > 0 &&
inp[indx] == inp[indx - P_INDX])
indx -= P_INDX;
}
DB_ASSERT(env, recnop == NULL || LF_ISSET(SR_DELNO));
if (LF_ISSET(SR_DELNO)) {
deloffset = TYPE(h) == P_LBTREE ? O_INDX : 0;
if (LF_ISSET(SR_DUPLAST))
while (B_DISSET(GET_BKEYDATA(dbp,
h, indx + deloffset)->type) && indx > 0 &&
inp[indx] == inp[indx - adjust])
indx -= adjust;
else
while (B_DISSET(GET_BKEYDATA(dbp,
h, indx + deloffset)->type) &&
indx < (db_indx_t)(NUM_ENT(h) - adjust) &&
inp[indx] == inp[indx + adjust])
indx += adjust;
if (B_DISSET(GET_BKEYDATA(dbp, h, indx + deloffset)->type)) {
ret = DB_NOTFOUND;
goto err;
}
if (recnop != NULL) {
DB_ASSERT(env, TYPE(h) == P_LBTREE);
for (i = 0; i < indx; i += P_INDX)
if (!B_DISSET(
GET_BKEYDATA(dbp, h, i + O_INDX)->type))
++recno;
*recnop = recno + 1;
}
}
if (LF_ISSET(SR_STK_ONLY)) {
BT_STK_NUM(env, cp, h, indx, ret);
if ((t_ret = __memp_fput(mpf,
dbc->thread_info, h, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
h = NULL;
} else {
if (LF_ISSET(SR_DEL) && cp->csp == cp->sp)
cp->csp++;
BT_STK_ENTER(env, cp, h, indx, lock, lock_mode, ret);
}
if (ret != 0)
goto err;
cp->csp->lock = lock;
DB_ASSERT(env, parent_h == NULL);
done:
if (F_ISSET(dbc, DBC_OPD))
LOCK_CHECK_ON(dbc->thread_info);
if ((ret = __LPUT(dbc, saved_lock)) != 0)
return (ret);
return (0);
err: if (ret == 0)
ret = t_ret;
if (h != NULL && (t_ret = __memp_fput(mpf,
dbc->thread_info, h, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
if (parent_h != NULL && (t_ret = __memp_fput(mpf,
dbc->thread_info, parent_h, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
ret = t_ret;
(void)__LPUT(dbc, saved_lock);
BT_STK_POP(cp);
(void)__bam_stkrel(dbc, 0);
if (F_ISSET(dbc, DBC_OPD))
LOCK_CHECK_ON(dbc->thread_info);
return (ret);
}
int
__bam_stkrel(dbc, flags)
DBC *dbc;
u_int32_t flags;
{
BTREE_CURSOR *cp;
DB *dbp;
DB_MPOOLFILE *mpf;
EPG *epg;
int ret, t_ret;
DB_ASSERT(NULL, dbc != NULL);
dbp = dbc->dbp;
mpf = dbp->mpf;
cp = (BTREE_CURSOR *)dbc->internal;
for (ret = 0, epg = cp->sp; epg <= cp->csp; ++epg) {
if (epg->page != NULL) {
if (LF_ISSET(STK_CLRDBC) && cp->page == epg->page) {
cp->page = NULL;
LOCK_INIT(cp->lock);
}
if ((t_ret = __memp_fput(mpf, dbc->thread_info,
epg->page, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
epg->page = NULL;
}
if (LF_ISSET(STK_PGONLY))
continue;
if (LF_ISSET(STK_NOLOCK) &&
(epg->lock.mode == DB_LOCK_READ ||
atomic_read(&mpf->mfp->multiversion) == 0)) {
if ((t_ret = __LPUT(dbc, epg->lock)) != 0 && ret == 0)
ret = t_ret;
} else
if ((t_ret = __TLPUT(dbc, epg->lock)) != 0 && ret == 0)
ret = t_ret;
}
if (!LF_ISSET(STK_PGONLY))
BT_STK_CLR(cp);
return (ret);
}
int
__bam_stkgrow(env, cp)
ENV *env;
BTREE_CURSOR *cp;
{
EPG *p;
size_t entries;
int ret;
entries = cp->esp - cp->sp;
if ((ret = __os_calloc(env, entries * 2, sizeof(EPG), &p)) != 0)
return (ret);
memcpy(p, cp->sp, entries * sizeof(EPG));
if (cp->sp != cp->stack)
__os_free(env, cp->sp);
cp->sp = p;
cp->csp = p + entries;
cp->esp = p + entries * 2;
return (0);
}