#include "db_config.h"
#include "db_int.h"
#include "dbinc/db_page.h"
#include "dbinc/lock.h"
#include "dbinc/mp.h"
#include "dbinc/btree.h"
static int __bam_page __P((DBC *, EPG *, EPG *));
static int __bam_psplit __P((DBC *, EPG *, PAGE *, PAGE *, db_indx_t *));
static int __bam_root __P((DBC *, EPG *));
int
__bam_split(dbc, arg, root_pgnop)
DBC *dbc;
void *arg;
db_pgno_t *root_pgnop;
{
BTREE_CURSOR *cp;
DB_LOCK metalock, next_lock;
enum { UP, DOWN } dir;
db_pgno_t pgno, next_pgno, root_pgno;
int exact, level, ret;
if (F_ISSET(dbc, DBC_OPD))
LOCK_CHECK_OFF(dbc->thread_info);
cp = (BTREE_CURSOR *)dbc->internal;
LOCK_INIT(next_lock);
next_pgno = PGNO_INVALID;
pgno = PGNO_BASE_MD;
if ((ret = __db_lget(dbc,
0, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
goto err;
root_pgno = BAM_ROOT_PGNO(dbc);
for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
retry: if ((ret = (dbc->dbtype == DB_BTREE ?
__bam_search(dbc, PGNO_INVALID,
arg, SR_WRPAIR, level, NULL, &exact) :
__bam_rsearch(dbc,
(db_recno_t *)arg, SR_WRPAIR, level, &exact))) != 0)
break;
if (cp->csp[0].page->pgno == root_pgno) {
level = cp->csp[0].page->level;
if (root_pgnop != NULL)
*root_pgnop = root_pgno;
} else if (root_pgnop != NULL)
*root_pgnop = cp->csp[-1].page->pgno;
if (2 * B_MAXSIZEONPAGE(cp->ovflsize)
<= (db_indx_t)P_FREESPACE(dbc->dbp, cp->csp[0].page)) {
if ((ret = __bam_stkrel(dbc, STK_NOLOCK)) != 0)
goto err;
goto no_split;
}
if (ISLEAF(cp->csp->page) &&
(pgno = NEXT_PGNO(cp->csp->page)) != PGNO_INVALID) {
TRY_LOCK(dbc, pgno,
next_pgno, next_lock, DB_LOCK_WRITE, retry);
if (ret != 0)
goto err;
}
ret = cp->csp[0].page->pgno == root_pgno ?
__bam_root(dbc, &cp->csp[0]) :
__bam_page(dbc, &cp->csp[-1], &cp->csp[0]);
BT_STK_CLR(cp);
switch (ret) {
case 0:
no_split:
if (level == LEAFLEVEL)
goto done;
if (dir == UP)
dir = DOWN;
break;
case DB_NEEDSPLIT:
if (dir == DOWN)
dir = UP;
break;
default:
goto err;
}
}
if (root_pgnop != NULL)
*root_pgnop = BAM_ROOT_PGNO(dbc);
err:
done: (void)__LPUT(dbc, metalock);
(void)__TLPUT(dbc, next_lock);
if (F_ISSET(dbc, DBC_OPD))
LOCK_CHECK_ON(dbc->thread_info);
return (ret);
}
static int
__bam_root(dbc, cp)
DBC *dbc;
EPG *cp;
{
DB *dbp;
DBT log_dbt, rootent[2];
DB_LOCK llock, rlock;
DB_LSN log_lsn;
DB_MPOOLFILE *mpf;
PAGE *lp, *rp;
db_indx_t split;
u_int32_t opflags;
int ret, t_ret;
dbp = dbc->dbp;
mpf = dbp->mpf;
lp = rp = NULL;
LOCK_INIT(llock);
LOCK_INIT(rlock);
COMPQUIET(log_dbt.data, NULL);
if (cp->page->level >= MAXBTREELEVEL) {
__db_errx(dbp->env, DB_STR_A("1021",
"Too many btree levels: %d", "%d"), cp->page->level);
return (ENOSPC);
}
if ((ret = __memp_dirty(mpf,
&cp->page, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0)
goto err;
if ((ret = __db_new(dbc, TYPE(cp->page), &llock, &lp)) != 0 ||
(ret = __db_new(dbc, TYPE(cp->page), &rlock, &rp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, lp->pgno,
PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
cp->page->level, TYPE(cp->page));
P_INIT(rp, dbp->pgsize, rp->pgno,
ISINTERNAL(cp->page) ? PGNO_INVALID : lp->pgno, PGNO_INVALID,
cp->page->level, TYPE(cp->page));
PERFMON5(env, alloc, btree_split,
dbp->fname, dbp->dname, lp->pgno, cp->page->pgno, lp->level);
if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0)
goto err;
if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
if ((ret =
__os_malloc(dbp->env, dbp->pgsize, &log_dbt.data)) != 0)
goto err;
log_dbt.size = dbp->pgsize;
memcpy(log_dbt.data, cp->page, dbp->pgsize);
}
if ((ret = (dbc->dbtype == DB_RECNO ?
__ram_root(dbc, cp->page, lp, rp) :
__bam_broot(dbc, cp->page, split, lp, rp))) != 0) {
if (DBC_LOGGING(dbc))
__os_free(dbp->env, log_dbt.data);
goto err;
}
if (DBC_LOGGING(dbc)) {
memset(rootent, 0, sizeof(rootent));
rootent[0].data = GET_BINTERNAL(dbp, cp->page, 0);
rootent[1].data = GET_BINTERNAL(dbp, cp->page, 1);
if (dbc->dbtype == DB_RECNO)
rootent[0].size = rootent[1].size = RINTERNAL_SIZE;
else {
rootent[0].size = BINTERNAL_SIZE(
((BINTERNAL *)rootent[0].data)->len);
rootent[1].size = BINTERNAL_SIZE(
((BINTERNAL *)rootent[1].data)->len);
}
ZERO_LSN(log_lsn);
opflags = F_ISSET(
(BTREE_CURSOR *)dbc->internal, C_RECNUM) ? SPL_NRECS : 0;
if (dbc->dbtype == DB_RECNO)
opflags |= SPL_RECNO;
ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page), 0,
OP_SET(opflags, cp->page), PGNO(lp), &LSN(lp),
PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp),
PGNO_INVALID, &log_lsn, PGNO(cp->page),
&LSN(cp->page), 0, &log_dbt, &rootent[0], &rootent[1]);
if (ret != 0)
memcpy(cp->page, log_dbt.data, dbp->pgsize);
__os_free(dbp->env, log_dbt.data);
if (ret != 0)
goto err;
} else
LSN_NOT_LOGGED(LSN(cp->page));
LSN(lp) = LSN(cp->page);
LSN(rp) = LSN(cp->page);
ret = __bam_ca_split(dbc, cp->page->pgno, lp->pgno, rp->pgno, split, 1);
err: if (cp->page != NULL && (t_ret = __memp_fput(mpf,
dbc->thread_info, cp->page, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
cp->page = NULL;
if ((t_ret = __TLPUT(dbc, llock)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __TLPUT(dbc, rlock)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __TLPUT(dbc, cp->lock)) != 0 && ret == 0)
ret = t_ret;
if (lp != NULL && (t_ret = __memp_fput(mpf,
dbc->thread_info, lp, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
if (rp != NULL && (t_ret = __memp_fput(mpf,
dbc->thread_info, rp, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
return (ret);
}
static int
__bam_page(dbc, pp, cp)
DBC *dbc;
EPG *pp, *cp;
{
BTREE_CURSOR *bc;
DB *dbp;
DBT log_dbt, rentry;
DB_LOCK rplock;
DB_LSN log_lsn;
DB_LSN save_lsn;
DB_MPOOLFILE *mpf;
PAGE *lp, *rp, *alloc_rp, *tp;
db_indx_t split;
u_int32_t opflags;
int ret, t_ret;
dbp = dbc->dbp;
mpf = dbp->mpf;
alloc_rp = lp = rp = tp = NULL;
LOCK_INIT(rplock);
ret = -1;
if ((ret = __os_malloc(dbp->env, dbp->pgsize * 2, &lp)) != 0)
goto err;
P_INIT(lp, dbp->pgsize, PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : PREV_PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : 0,
cp->page->level, TYPE(cp->page));
rp = (PAGE *)((u_int8_t *)lp + dbp->pgsize);
P_INIT(rp, dbp->pgsize, 0,
ISINTERNAL(cp->page) ? PGNO_INVALID : PGNO(cp->page),
ISINTERNAL(cp->page) ? PGNO_INVALID : NEXT_PGNO(cp->page),
cp->page->level, TYPE(cp->page));
if ((ret = __bam_psplit(dbc, cp, lp, rp, &split)) != 0)
goto err;
if ((ret = __bam_pinsert(dbc, pp, split, lp, rp, BPI_SPACEONLY)) != 0)
goto err;
if ((ret = __db_new(dbc, TYPE(cp->page), &rplock, &alloc_rp)) != 0)
goto err;
if (ISLEAF(cp->page) && NEXT_PGNO(cp->page) != PGNO_INVALID &&
(ret = __memp_fget(mpf, &NEXT_PGNO(cp->page),
dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &tp)) != 0)
goto err;
PERFMON5(env, alloc, btree_split, dbp->fname,
dbp->dname, cp->page->pgno, pp->page->pgno, cp->page->level);
PGNO(rp) = NEXT_PGNO(lp) = PGNO(alloc_rp);
DB_ASSERT(dbp->env, IS_DIRTY(cp->page));
DB_ASSERT(dbp->env, IS_DIRTY(pp->page));
bc = (BTREE_CURSOR *)dbc->internal;
if ((ret = __bam_pinsert(dbc,
pp, split, lp, rp, F_ISSET(bc, C_RECNUM) ? 0 : BPI_NOLOGGING)) != 0)
goto err;
if (DBC_LOGGING(dbc)) {
memset(&log_dbt, 0, sizeof(log_dbt));
log_dbt.data = cp->page;
log_dbt.size = dbp->pgsize;
memset(&rentry, 0, sizeof(rentry));
rentry.data = GET_BINTERNAL(dbp, pp->page, pp->indx + 1);
opflags = F_ISSET(bc, C_RECNUM) ? SPL_NRECS : 0;
if (dbc->dbtype == DB_RECNO) {
opflags |= SPL_RECNO;
rentry.size = RINTERNAL_SIZE;
} else
rentry.size =
BINTERNAL_SIZE(((BINTERNAL *)rentry.data)->len);
if (tp == NULL)
ZERO_LSN(log_lsn);
if ((ret = __bam_split_log(dbp, dbc->txn, &LSN(cp->page),
0, OP_SET(opflags, pp->page), PGNO(cp->page),
&LSN(cp->page), PGNO(alloc_rp), &LSN(alloc_rp),
(u_int32_t)NUM_ENT(lp), tp == NULL ? 0 : PGNO(tp),
tp == NULL ? &log_lsn : &LSN(tp), PGNO(pp->page),
&LSN(pp->page), pp->indx, &log_dbt, NULL, &rentry)) != 0) {
if (F_ISSET(bc, C_RECNUM) == 0) {
t_ret = __db_ditem_nolog(dbc, pp->page,
pp->indx + 1, rentry.size);
DB_ASSERT(dbp->env, t_ret == 0);
}
goto err;
}
} else
LSN_NOT_LOGGED(LSN(cp->page));
LSN(alloc_rp) = LSN(cp->page);
LSN(lp) = LSN(cp->page);
LSN(rp) = LSN(cp->page);
LSN(pp->page) = LSN(cp->page);
if (tp != NULL) {
PREV_PGNO(tp) = PGNO(rp);
LSN(tp) = LSN(cp->page);
}
save_lsn = alloc_rp->lsn;
memcpy(alloc_rp, rp, LOFFSET(dbp, rp));
memcpy((u_int8_t *)alloc_rp + HOFFSET(rp),
(u_int8_t *)rp + HOFFSET(rp), dbp->pgsize - HOFFSET(rp));
alloc_rp->lsn = save_lsn;
save_lsn = cp->page->lsn;
memcpy(cp->page, lp, LOFFSET(dbp, lp));
memcpy((u_int8_t *)cp->page + HOFFSET(lp),
(u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
cp->page->lsn = save_lsn;
if ((ret = __bam_ca_split(dbc,
PGNO(cp->page), PGNO(cp->page), PGNO(rp), split, 0)) != 0)
goto err;
__os_free(dbp->env, lp);
if ((t_ret = __memp_fput(mpf,
dbc->thread_info, alloc_rp, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
if ((t_ret = __TLPUT(dbc, rplock)) != 0 && ret == 0)
ret = t_ret;
if (tp != NULL) {
if ((t_ret = __memp_fput(mpf,
dbc->thread_info, tp, dbc->priority)) != 0 && ret == 0)
ret = t_ret;
}
if ((t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
ret = t_ret;
return (ret);
err: if (lp != NULL)
__os_free(dbp->env, lp);
if (alloc_rp != NULL)
(void)__memp_fput(mpf,
dbc->thread_info, alloc_rp, dbc->priority);
if (tp != NULL)
(void)__memp_fput(mpf, dbc->thread_info, tp, dbc->priority);
if (pp->page != NULL)
(void)__memp_fput(mpf,
dbc->thread_info, pp->page, dbc->priority);
if (ret == DB_NEEDSPLIT && atomic_read(&mpf->mfp->multiversion) == 0)
(void)__LPUT(dbc, pp->lock);
else
(void)__TLPUT(dbc, pp->lock);
(void)__memp_fput(mpf, dbc->thread_info, cp->page, dbc->priority);
if (dbc->txn == NULL)
(void)__LPUT(dbc, rplock);
if (dbc->txn == NULL || ret == DB_NEEDSPLIT)
(void)__LPUT(dbc, cp->lock);
return (ret);
}
int
__bam_broot(dbc, rootp, split, lp, rp)
DBC *dbc;
u_int32_t split;
PAGE *rootp, *lp, *rp;
{
BINTERNAL bi, bi0, *child_bi;
BKEYDATA *child_bk;
BOVERFLOW bo, *child_bo;
BTREE_CURSOR *cp;
DB *dbp;
DBT hdr, hdr0, data;
db_pgno_t root_pgno;
int ret;
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
child_bo = NULL;
data.data = NULL;
memset(&bi, 0, sizeof(bi));
switch (TYPE(rootp)) {
case P_IBTREE:
child_bi = GET_BINTERNAL(dbp, rootp, split);
switch (B_TYPE(child_bi->type)) {
case B_KEYDATA:
bi.len = child_bi->len;
B_TSET(bi.type, B_KEYDATA);
bi.pgno = rp->pgno;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
if ((ret = __os_malloc(dbp->env,
child_bi->len, &data.data)) != 0)
return (ret);
memcpy(data.data, child_bi->data, child_bi->len);
data.size = child_bi->len;
break;
case B_OVERFLOW:
child_bo = (BOVERFLOW *)child_bi->data;
memset(&bo, 0, sizeof(bo));
bo.type = B_OVERFLOW;
bo.tlen = child_bo->tlen;
bo.pgno = child_bo->pgno;
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, B_OVERFLOW);
bi.pgno = rp->pgno;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
break;
case B_DUPLICATE:
default:
goto pgfmt;
}
break;
case P_LDUP:
case P_LBTREE:
child_bk = GET_BKEYDATA(dbp, rootp, split);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
bi.len = child_bk->len;
B_TSET(bi.type, B_KEYDATA);
bi.pgno = rp->pgno;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
if ((ret = __os_malloc(dbp->env,
child_bk->len, &data.data)) != 0)
return (ret);
memcpy(data.data, child_bk->data, child_bk->len);
data.size = child_bk->len;
break;
case B_OVERFLOW:
child_bo = (BOVERFLOW *)child_bk;
memset(&bo, 0, sizeof(bo));
bo.type = B_OVERFLOW;
bo.tlen = child_bo->tlen;
memset(&hdr, 0, sizeof(hdr));
if ((ret = __db_goff(dbc, &hdr, child_bo->tlen,
child_bo->pgno, &hdr.data, &hdr.size)) == 0)
ret = __db_poff(dbc, &hdr, &bo.pgno);
if (hdr.data != NULL)
__os_free(dbp->env, hdr.data);
if (ret != 0)
return (ret);
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, B_OVERFLOW);
bi.pgno = rp->pgno;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
break;
case B_DUPLICATE:
default:
goto pgfmt;
}
break;
default:
pgfmt: return (__db_pgfmt(dbp->env, rp->pgno));
}
root_pgno = BAM_ROOT_PGNO(dbc);
P_INIT(rootp, dbp->pgsize,
root_pgno, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
memset(&bi0, 0, sizeof(bi0));
B_TSET(bi0.type, B_KEYDATA);
bi0.pgno = lp->pgno;
if (F_ISSET(cp, C_RECNUM)) {
bi0.nrecs = __bam_total(dbp, lp);
RE_NREC_SET(rootp, bi0.nrecs);
bi.nrecs = __bam_total(dbp, rp);
RE_NREC_ADJ(rootp, bi.nrecs);
}
DB_SET_DBT(hdr0, &bi0, SSZA(BINTERNAL, data));
if ((ret = __db_pitem_nolog(dbc, rootp,
0, BINTERNAL_SIZE(0), &hdr0, NULL)) != 0)
goto err;
ret = __db_pitem_nolog(dbc, rootp, 1,
BINTERNAL_SIZE(data.size), &hdr, &data);
err: if (data.data != NULL && child_bo == NULL)
__os_free(dbp->env, data.data);
return (ret);
}
int
__ram_root(dbc, rootp, lp, rp)
DBC *dbc;
PAGE *rootp, *lp, *rp;
{
DB *dbp;
DBT hdr;
RINTERNAL ri;
db_pgno_t root_pgno;
int ret;
dbp = dbc->dbp;
root_pgno = BAM_ROOT_PGNO(dbc);
P_INIT(rootp, dbp->pgsize,
root_pgno, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO);
DB_SET_DBT(hdr, &ri, RINTERNAL_SIZE);
ri.pgno = lp->pgno;
ri.nrecs = __bam_total(dbp, lp);
if ((ret = __db_pitem_nolog(dbc,
rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_SET(rootp, ri.nrecs);
ri.pgno = rp->pgno;
ri.nrecs = __bam_total(dbp, rp);
if ((ret = __db_pitem_nolog(dbc,
rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
return (ret);
RE_NREC_ADJ(rootp, ri.nrecs);
return (0);
}
int
__bam_pinsert(dbc, parent, split, lchild, rchild, flags)
DBC *dbc;
EPG *parent;
u_int32_t split;
PAGE *lchild, *rchild;
int flags;
{
BINTERNAL bi, *child_bi;
BKEYDATA *child_bk, *tmp_bk;
BOVERFLOW bo, *child_bo;
BTREE *t;
BTREE_CURSOR *cp;
DB *dbp;
DBT a, b, hdr, data;
EPG *child;
PAGE *ppage;
RINTERNAL ri;
db_indx_t off;
db_recno_t nrecs;
size_t (*func) __P((DB *, const DBT *, const DBT *));
int (*pitem) __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
u_int32_t n, nbytes, nksize, oldsize, size;
int ret;
dbp = dbc->dbp;
cp = (BTREE_CURSOR *)dbc->internal;
t = dbp->bt_internal;
ppage = parent->page;
child = parent + 1;
nrecs = F_ISSET(cp, C_RECNUM) &&
!LF_ISSET(BPI_SPACEONLY) ? __bam_total(dbp, rchild) : 0;
off = parent->indx + O_INDX;
if (LF_ISSET(BPI_REPLACE))
oldsize = TYPE(ppage) == P_IRECNO ? RINTERNAL_PSIZE :
BINTERNAL_PSIZE(GET_BINTERNAL(dbp, ppage, off)->len);
else
oldsize = 0;
switch (TYPE(child->page)) {
case P_IBTREE:
child_bi = GET_BINTERNAL(dbp, child->page, split);
nbytes = BINTERNAL_PSIZE(child_bi->len);
if (P_FREESPACE(dbp, ppage) + oldsize < nbytes)
return (DB_NEEDSPLIT);
if (LF_ISSET(BPI_SPACEONLY))
return (0);
switch (B_TYPE(child_bi->type)) {
case B_KEYDATA:
memset(&bi, 0, sizeof(bi));
bi.len = child_bi->len;
B_TSET(bi.type, B_KEYDATA);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
DB_SET_DBT(data, child_bi->data, child_bi->len);
size = BINTERNAL_SIZE(child_bi->len);
break;
case B_OVERFLOW:
child_bo = (BOVERFLOW *)child_bi->data;
memset(&bo, 0, sizeof(bo));
bo.type = B_OVERFLOW;
bo.tlen = child_bo->tlen;
bo.pgno = child_bo->pgno;
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, B_OVERFLOW);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
size = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case B_DUPLICATE:
default:
goto pgfmt;
}
break;
case P_LDUP:
case P_LBTREE:
child_bk = GET_BKEYDATA(dbp, child->page, split);
switch (B_TYPE(child_bk->type)) {
case B_KEYDATA:
nbytes = BINTERNAL_PSIZE(child_bk->len);
nksize = child_bk->len;
if (F_ISSET(dbc, DBC_OPD)) {
if (dbp->dup_compare == __bam_defcmp)
func = __bam_defpfx;
else
func = NULL;
} else
func = t->bt_prefix;
if (func == NULL)
goto noprefix;
tmp_bk = GET_BKEYDATA(dbp, lchild, NUM_ENT(lchild) -
(TYPE(lchild) == P_LDUP ? O_INDX : P_INDX));
if (B_TYPE(tmp_bk->type) != B_KEYDATA)
goto noprefix;
DB_INIT_DBT(a, tmp_bk->data, tmp_bk->len);
DB_INIT_DBT(b, child_bk->data, child_bk->len);
nksize = (u_int32_t)func(dbp, &a, &b);
if ((n = BINTERNAL_PSIZE(nksize)) < nbytes)
nbytes = n;
else
nksize = child_bk->len;
noprefix: if (P_FREESPACE(dbp, ppage) + oldsize < nbytes)
return (DB_NEEDSPLIT);
if (LF_ISSET(BPI_SPACEONLY))
return (0);
memset(&bi, 0, sizeof(bi));
bi.len = nksize;
B_TSET(bi.type, B_KEYDATA);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
DB_SET_DBT(data, child_bk->data, nksize);
size = BINTERNAL_SIZE(nksize);
break;
case B_OVERFLOW:
nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
if (P_FREESPACE(dbp, ppage) + oldsize < nbytes)
return (DB_NEEDSPLIT);
if (LF_ISSET(BPI_SPACEONLY))
return (0);
child_bo = (BOVERFLOW *)child_bk;
memset(&bo, 0, sizeof(bo));
bo.type = B_OVERFLOW;
bo.tlen = child_bo->tlen;
memset(&hdr, 0, sizeof(hdr));
if ((ret = __db_goff(dbc, &hdr, child_bo->tlen,
child_bo->pgno, &hdr.data, &hdr.size)) == 0)
ret = __db_poff(dbc, &hdr, &bo.pgno);
if (hdr.data != NULL)
__os_free(dbp->env, hdr.data);
if (ret != 0)
return (ret);
memset(&bi, 0, sizeof(bi));
bi.len = BOVERFLOW_SIZE;
B_TSET(bi.type, B_OVERFLOW);
bi.pgno = rchild->pgno;
bi.nrecs = nrecs;
DB_SET_DBT(hdr, &bi, SSZA(BINTERNAL, data));
DB_SET_DBT(data, &bo, BOVERFLOW_SIZE);
size = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case B_DUPLICATE:
default:
goto pgfmt;
}
break;
case P_IRECNO:
case P_LRECNO:
nbytes = RINTERNAL_PSIZE;
if (P_FREESPACE(dbp, ppage) + oldsize < nbytes)
return (DB_NEEDSPLIT);
if (LF_ISSET(BPI_SPACEONLY))
return (0);
DB_SET_DBT(hdr, &ri, RINTERNAL_SIZE);
ri.pgno = rchild->pgno;
ri.nrecs = nrecs;
size = RINTERNAL_SIZE;
data.size = 0;
if (LF_ISSET(BPI_REPLACE)) {
if ((ret = __bam_ditem(dbc, ppage, off)) != 0)
return (ret);
LF_CLR(BPI_REPLACE);
}
break;
default:
pgfmt: return (__db_pgfmt(dbp->env, PGNO(child->page)));
}
if (LF_ISSET(BPI_REPLACE)) {
DB_ASSERT(dbp->env, !LF_ISSET(BPI_NOLOGGING));
if ((ret = __bam_irep(dbc, ppage, off, &hdr, &data)) != 0)
return (ret);
} else {
if (LF_ISSET(BPI_NOLOGGING))
pitem = __db_pitem_nolog;
else
pitem = __db_pitem;
if ((ret = pitem(dbc, ppage,
off, size, &hdr, data.size != 0 ? &data : NULL)) != 0)
return (ret);
}
if (F_ISSET(cp, C_RECNUM) && !LF_ISSET(BPI_NORECNUM)) {
if (DBC_LOGGING(dbc)) {
if ((ret = __bam_cadjust_log(dbp, dbc->txn,
&LSN(ppage), 0, PGNO(ppage), &LSN(ppage),
parent->indx, -(int32_t)nrecs, 0)) != 0)
return (ret);
} else
LSN_NOT_LOGGED(LSN(ppage));
if (dbc->dbtype == DB_RECNO)
GET_RINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
else
GET_BINTERNAL(dbp, ppage, parent->indx)->nrecs -= nrecs;
}
return (0);
}
static int
__bam_psplit(dbc, cp, lp, rp, splitret)
DBC *dbc;
EPG *cp;
PAGE *lp, *rp;
db_indx_t *splitret;
{
DB *dbp;
PAGE *pp;
db_indx_t half, *inp, nbytes, off, splitp, top;
int adjust, cnt, iflag, isbigkey, ret;
dbp = dbc->dbp;
pp = cp->page;
inp = P_INP(dbp, pp);
adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
off = 0;
if (NEXT_PGNO(pp) == PGNO_INVALID && cp->indx >= NUM_ENT(pp) - adjust)
off = NUM_ENT(pp) - adjust;
else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
off = adjust;
if (off != 0)
goto sort;
top = NUM_ENT(pp) - adjust;
half = (dbp->pgsize - HOFFSET(pp)) / 2;
for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
switch (TYPE(pp)) {
case P_IBTREE:
if (B_TYPE(
GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA)
nbytes += BINTERNAL_SIZE(
GET_BINTERNAL(dbp, pp, off)->len);
else
nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA)
nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
++off;
case P_LDUP:
case P_LRECNO:
if (B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA)
nbytes += BKEYDATA_SIZE(GET_BKEYDATA(dbp,
pp, off)->len);
else
nbytes += BOVERFLOW_SIZE;
break;
case P_IRECNO:
nbytes += RINTERNAL_SIZE;
break;
default:
return (__db_pgfmt(dbp->env, pp->pgno));
}
sort: splitp = off;
switch (TYPE(pp)) {
case P_IBTREE:
iflag = 1;
isbigkey =
B_TYPE(GET_BINTERNAL(dbp, pp, off)->type) != B_KEYDATA;
break;
case P_LBTREE:
case P_LDUP:
iflag = 0;
isbigkey = B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) !=
B_KEYDATA;
break;
default:
iflag = isbigkey = 0;
}
if (isbigkey)
for (cnt = 1; cnt <= 3; ++cnt) {
off = splitp + cnt * adjust;
if (off < (db_indx_t)NUM_ENT(pp) &&
((iflag && B_TYPE(
GET_BINTERNAL(dbp, pp,off)->type) == B_KEYDATA) ||
B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA)) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
if (iflag ? B_TYPE(
GET_BINTERNAL(dbp, pp, off)->type) == B_KEYDATA :
B_TYPE(GET_BKEYDATA(dbp, pp, off)->type) ==
B_KEYDATA) {
splitp = off;
break;
}
}
if (TYPE(pp) == P_LBTREE &&
inp[splitp] == inp[splitp - adjust])
for (cnt = 1;; ++cnt) {
off = splitp + cnt * adjust;
if (off < NUM_ENT(pp) &&
inp[splitp] != inp[off]) {
splitp = off;
break;
}
if (splitp <= (db_indx_t)(cnt * adjust))
continue;
off = splitp - cnt * adjust;
if (inp[splitp] != inp[off]) {
splitp = off + adjust;
break;
}
}
if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0)
return (ret);
if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0)
return (ret);
*splitret = splitp;
return (0);
}
int
__bam_copy(dbp, pp, cp, nxt, stop)
DB *dbp;
PAGE *pp, *cp;
u_int32_t nxt, stop;
{
BINTERNAL internal;
db_indx_t *cinp, nbytes, off, *pinp;
cinp = P_INP(dbp, cp);
pinp = P_INP(dbp, pp);
for (off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
switch (TYPE(pp)) {
case P_IBTREE:
if (off == 0 && nxt != 0)
nbytes = BINTERNAL_SIZE(0);
else if (B_TYPE(
GET_BINTERNAL(dbp, pp, nxt)->type) == B_KEYDATA)
nbytes = BINTERNAL_SIZE(
GET_BINTERNAL(dbp, pp, nxt)->len);
else
nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
break;
case P_LBTREE:
if (off != 0 && (nxt % P_INDX) == 0 &&
pinp[nxt] == pinp[nxt - P_INDX]) {
cinp[off] = cinp[off - P_INDX];
continue;
}
case P_LDUP:
case P_LRECNO:
if (B_TYPE(GET_BKEYDATA(dbp, pp, nxt)->type) ==
B_KEYDATA)
nbytes = BKEYDATA_SIZE(GET_BKEYDATA(dbp,
pp, nxt)->len);
else
nbytes = BOVERFLOW_SIZE;
break;
case P_IRECNO:
nbytes = RINTERNAL_SIZE;
break;
default:
return (__db_pgfmt(dbp->env, pp->pgno));
}
cinp[off] = HOFFSET(cp) -= nbytes;
if (off == 0 && nxt != 0 && TYPE(pp) == P_IBTREE) {
internal.len = 0;
UMRW_SET(internal.unused);
internal.type = B_KEYDATA;
internal.pgno = GET_BINTERNAL(dbp, pp, nxt)->pgno;
internal.nrecs = GET_BINTERNAL(dbp, pp, nxt)->nrecs;
memcpy(P_ENTRY(dbp, cp, off), &internal, nbytes);
}
else
memcpy(P_ENTRY(dbp, cp, off),
P_ENTRY(dbp, pp, nxt), nbytes);
}
return (0);
}