#include "postgres.h"
#include <limits.h>
#include "access/xact.h"
#include "common/hashfn.h"
#include "port/pg_bitutils.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/dynahash.h"
#include "utils/memutils.h"
#define DEF_SEGSIZE 256
#define DEF_SEGSIZE_SHIFT 8
#define DEF_DIRSIZE 256
#define DEF_FFACTOR 1
#define NUM_FREELISTS 32
typedef HASHELEMENT *HASHBUCKET;
typedef HASHBUCKET *HASHSEGMENT;
typedef struct
{
slock_t mutex;
long nentries;
HASHELEMENT *freeList;
} FreeListData;
struct HASHHDR
{
FreeListData freeList[NUM_FREELISTS];
long dsize;
long nsegs;
uint32 max_bucket;
uint32 high_mask;
uint32 low_mask;
Size keysize;
Size entrysize;
long num_partitions;
long ffactor;
long max_dsize;
long ssize;
int sshift;
int nelem_alloc;
#ifdef HASH_STATISTICS
long accesses;
long collisions;
#endif
};
#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
#define FREELIST_IDX(hctl, hashcode) \
(IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
struct HTAB
{
HASHHDR *hctl;
HASHSEGMENT *dir;
HashValueFunc hash;
HashCompareFunc match;
HashCopyFunc keycopy;
HashAllocFunc alloc;
MemoryContext hcxt;
char *tabname;
bool isshared;
bool isfixed;
bool frozen;
Size keysize;
long ssize;
int sshift;
};
#define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
#define ELEMENT_FROM_KEY(key) \
((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
#define MOD(x,y) ((x) & ((y)-1))
#ifdef HASH_STATISTICS
static long hash_accesses,
hash_collisions,
hash_expansions;
#endif
static void *DynaHashAlloc(Size size);
static HASHSEGMENT seg_alloc(HTAB *hashp);
static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
static bool dir_realloc(HTAB *hashp);
static bool expand_table(HTAB *hashp);
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
static bool init_htab(HTAB *hashp, long nelem);
static void hash_corrupted(HTAB *hashp);
static long next_pow2_long(long num);
static int next_pow2_int(long num);
static void register_seq_scan(HTAB *hashp);
static void deregister_seq_scan(HTAB *hashp);
static bool has_seq_scans(HTAB *hashp);
static __thread MemoryContext CurrentDynaHashCxt = NULL;
static void *
DynaHashAlloc(Size size)
{
Assert(MemoryContextIsValid(CurrentDynaHashCxt));
return MemoryContextAlloc(CurrentDynaHashCxt, size);
}
#ifdef HASH_STATISTICS
#endif
#ifdef HASH_DEBUG
#endif
#ifdef HASH_STATISTICS
#endif
static inline uint32
calc_bucket(HASHHDR *hctl, uint32 hash_val)
{
uint32 bucket;
bucket = hash_val & hctl->high_mask;
if (bucket > hctl->max_bucket)
bucket = bucket & hctl->low_mask;
return bucket;
}
void *
hash_search(HTAB *hashp,
const void *keyPtr,
HASHACTION action,
bool *foundPtr)
{
return hash_search_with_hash_value(hashp,
keyPtr,
hashp->hash(keyPtr, hashp->keysize),
action,
foundPtr);
}
void *
hash_search_with_hash_value(HTAB *hashp,
const void *keyPtr,
uint32 hashvalue,
HASHACTION action,
bool *foundPtr)
{
HASHHDR *hctl = hashp->hctl;
int freelist_idx = FREELIST_IDX(hctl, hashvalue);
Size keysize;
uint32 bucket;
long segment_num;
long segment_ndx;
HASHSEGMENT segp;
HASHBUCKET currBucket;
HASHBUCKET *prevBucketPtr;
HashCompareFunc match;
#ifdef HASH_STATISTICS
hash_accesses++;
hctl->accesses++;
#endif
if (action == HASH_ENTER || action == HASH_ENTER_NULL)
{
if (!IS_PARTITIONED(hctl) && !hashp->frozen &&
hctl->freeList[0].nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
!has_seq_scans(hashp))
(void) expand_table(hashp);
}
bucket = calc_bucket(hctl, hashvalue);
segment_num = bucket >> hashp->sshift;
segment_ndx = MOD(bucket, hashp->ssize);
segp = hashp->dir[segment_num];
if (segp == NULL)
hash_corrupted(hashp);
prevBucketPtr = &segp[segment_ndx];
currBucket = *prevBucketPtr;
match = hashp->match;
keysize = hashp->keysize;
while (currBucket != NULL)
{
if (currBucket->hashvalue == hashvalue &&
match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
break;
prevBucketPtr = &(currBucket->link);
currBucket = *prevBucketPtr;
#ifdef HASH_STATISTICS
hash_collisions++;
hctl->collisions++;
#endif
}
if (foundPtr)
*foundPtr = (bool) (currBucket != NULL);
switch (action)
{
case HASH_FIND:
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
return NULL;
case HASH_REMOVE:
if (currBucket != NULL)
{
if (IS_PARTITIONED(hctl))
SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
Assert(hctl->freeList[freelist_idx].nentries > 0);
hctl->freeList[freelist_idx].nentries--;
*prevBucketPtr = currBucket->link;
currBucket->link = hctl->freeList[freelist_idx].freeList;
hctl->freeList[freelist_idx].freeList = currBucket;
if (IS_PARTITIONED(hctl))
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
return (void *) ELEMENTKEY(currBucket);
}
return NULL;
case HASH_ENTER_NULL:
Assert(hashp->alloc != DynaHashAlloc);
case HASH_ENTER:
if (currBucket != NULL)
return (void *) ELEMENTKEY(currBucket);
if (hashp->frozen)
elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
hashp->tabname);
currBucket = get_hash_entry(hashp, freelist_idx);
if (currBucket == NULL)
{
if (action == HASH_ENTER_NULL)
return NULL;
if (hashp->isshared)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
*prevBucketPtr = currBucket;
currBucket->link = NULL;
currBucket->hashvalue = hashvalue;
hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
return (void *) ELEMENTKEY(currBucket);
}
elog(ERROR, "unrecognized hash action code: %d", (int) action);
return NULL;
}
#ifdef HASH_STATISTICS
#endif
#ifdef HASH_STATISTICS
#endif
static HASHBUCKET
get_hash_entry(HTAB *hashp, int freelist_idx)
{
HASHHDR *hctl = hashp->hctl;
HASHBUCKET newElement;
for (;;)
{
if (IS_PARTITIONED(hctl))
SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
newElement = hctl->freeList[freelist_idx].freeList;
if (newElement != NULL)
break;
if (IS_PARTITIONED(hctl))
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
{
int borrow_from_idx;
if (!IS_PARTITIONED(hctl))
return NULL;
borrow_from_idx = freelist_idx;
for (;;)
{
borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
if (borrow_from_idx == freelist_idx)
break;
SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
newElement = hctl->freeList[borrow_from_idx].freeList;
if (newElement != NULL)
{
hctl->freeList[borrow_from_idx].freeList = newElement->link;
SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
hctl->freeList[freelist_idx].nentries++;
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
return newElement;
}
SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
}
return NULL;
}
}
hctl->freeList[freelist_idx].freeList = newElement->link;
hctl->freeList[freelist_idx].nentries++;
if (IS_PARTITIONED(hctl))
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
return newElement;
}
static bool
expand_table(HTAB *hashp)
{
HASHHDR *hctl = hashp->hctl;
HASHSEGMENT old_seg,
new_seg;
long old_bucket,
new_bucket;
long new_segnum,
new_segndx;
long old_segnum,
old_segndx;
HASHBUCKET *oldlink,
*newlink;
HASHBUCKET currElement,
nextElement;
Assert(!IS_PARTITIONED(hctl));
#ifdef HASH_STATISTICS
hash_expansions++;
#endif
new_bucket = hctl->max_bucket + 1;
new_segnum = new_bucket >> hashp->sshift;
new_segndx = MOD(new_bucket, hashp->ssize);
if (new_segnum >= hctl->nsegs)
{
if (new_segnum >= hctl->dsize)
if (!dir_realloc(hashp))
return false;
if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
return false;
hctl->nsegs++;
}
hctl->max_bucket++;
old_bucket = (new_bucket & hctl->low_mask);
if ((uint32) new_bucket > hctl->high_mask)
{
hctl->low_mask = hctl->high_mask;
hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
}
old_segnum = old_bucket >> hashp->sshift;
old_segndx = MOD(old_bucket, hashp->ssize);
old_seg = hashp->dir[old_segnum];
new_seg = hashp->dir[new_segnum];
oldlink = &old_seg[old_segndx];
newlink = &new_seg[new_segndx];
for (currElement = *oldlink;
currElement != NULL;
currElement = nextElement)
{
nextElement = currElement->link;
if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
{
*oldlink = currElement;
oldlink = &currElement->link;
}
else
{
*newlink = currElement;
newlink = &currElement->link;
}
}
*oldlink = NULL;
*newlink = NULL;
return true;
}
static bool
dir_realloc(HTAB *hashp)
{
HASHSEGMENT *p;
HASHSEGMENT *old_p;
long new_dsize;
long old_dirsize;
long new_dirsize;
if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
return false;
new_dsize = hashp->hctl->dsize << 1;
old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
new_dirsize = new_dsize * sizeof(HASHSEGMENT);
old_p = hashp->dir;
CurrentDynaHashCxt = hashp->hcxt;
p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
if (p != NULL)
{
memcpy(p, old_p, old_dirsize);
MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
hashp->dir = p;
hashp->hctl->dsize = new_dsize;
Assert(hashp->alloc == DynaHashAlloc);
pfree(old_p);
return true;
}
return false;
}
static HASHSEGMENT
seg_alloc(HTAB *hashp)
{
HASHSEGMENT segp;
CurrentDynaHashCxt = hashp->hcxt;
segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
if (!segp)
return NULL;
MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
return segp;
}
static bool
element_alloc(HTAB *hashp, int nelem, int freelist_idx)
{
HASHHDR *hctl = hashp->hctl;
Size elementSize;
HASHELEMENT *firstElement;
HASHELEMENT *tmpElement;
HASHELEMENT *prevElement;
int i;
if (hashp->isfixed)
return false;
elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
CurrentDynaHashCxt = hashp->hcxt;
firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
if (!firstElement)
return false;
prevElement = NULL;
tmpElement = firstElement;
for (i = 0; i < nelem; i++)
{
tmpElement->link = prevElement;
prevElement = tmpElement;
tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
}
if (IS_PARTITIONED(hctl))
SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
firstElement->link = hctl->freeList[freelist_idx].freeList;
hctl->freeList[freelist_idx].freeList = prevElement;
if (IS_PARTITIONED(hctl))
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
return true;
}
static void
hash_corrupted(HTAB *hashp)
{
if (hashp->isshared)
elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
else
elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
}
#if SIZEOF_LONG < 8
#else
#endif
#define MAX_SEQ_SCANS 100
static __thread HTAB *seq_scan_tables[MAX_SEQ_SCANS];
static __thread int num_seq_scans = 0;
static bool
has_seq_scans(HTAB *hashp)
{
int i;
for (i = 0; i < num_seq_scans; i++)
{
if (seq_scan_tables[i] == hashp)
return true;
}
return false;
}