#ifdef __unix__
# include <unistd.h>
#endif
#if defined(_WIN32) || defined(WIN32)
# include <windows.h>
#endif
#include "gmpcompat.h"
#include "fmpz.h"
#if FLINT_USES_PTHREAD
# include <pthread.h>
# include <stdatomic.h>
#endif
#if FLINT_USES_PTHREAD
typedef struct
{
_Atomic(int) count;
pthread_t thread;
void * address;
} fmpz_block_header_s;
#else
typedef struct
{
int count;
void * address;
} fmpz_block_header_s;
#endif
#define FLINT_MPZ_MAX_CACHE_LIMBS 64
#if FLINT_MPZ_MAX_CACHE_LIMBS < MPZ_MIN_ALLOC
# error
#endif
#define PAGES_PER_BLOCK 16
#define MPZ_BLOCK 64
FLINT_TLS_PREFIX mpz_ptr * mpz_free_arr = NULL;
FLINT_TLS_PREFIX ulong mpz_free_num = 0;
FLINT_TLS_PREFIX ulong mpz_free_alloc = 0;
static slong flint_page_size;
static slong flint_mpz_structs_per_block;
static slong flint_page_mask;
static slong flint_get_page_size(void)
{
#if defined(__unix__)
return sysconf(_SC_PAGESIZE);
#elif defined(_WIN32) || defined(WIN32)
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#else
return 4096;
#endif
}
static inline
void * flint_align_ptr(void * ptr, slong size)
{
slong mask = ~(size - 1);
return (void *)((mask & (slong) ptr) + size);
}
mpz_ptr _fmpz_new_mpz(void)
{
if (mpz_free_num == 0)
{
void * aligned_ptr, * ptr;
slong i, j, num, block_size, skip;
flint_page_size = flint_get_page_size();
block_size = PAGES_PER_BLOCK*flint_page_size;
flint_page_mask = ~(flint_page_size - 1);
ptr = flint_malloc(block_size + flint_page_size);
aligned_ptr = flint_align_ptr(ptr, flint_page_size);
((fmpz_block_header_s *) ptr)->count = 0;
#if FLINT_USES_PTHREAD
((fmpz_block_header_s *) ptr)->thread = pthread_self();
#endif
skip = (sizeof(fmpz_block_header_s) - 1)/sizeof(__mpz_struct) + 1;
num = flint_page_size/sizeof(__mpz_struct);
flint_mpz_structs_per_block = PAGES_PER_BLOCK*(num - skip);
for (i = 0; i < PAGES_PER_BLOCK; i++)
{
mpz_ptr page_ptr = (mpz_ptr) ((slong) aligned_ptr + i*flint_page_size);
((fmpz_block_header_s *) page_ptr)->address = ptr;
for (j = skip; j < num; j++)
{
mpz_init2(page_ptr + j, MPZ_MIN_ALLOC * FLINT_BITS);
if (mpz_free_num >= mpz_free_alloc)
{
mpz_free_alloc = FLINT_MAX(mpz_free_num + 1, 2 * mpz_free_alloc);
mpz_free_arr = flint_realloc(mpz_free_arr, mpz_free_alloc * sizeof(mpz_ptr));
}
mpz_free_arr[mpz_free_num++] = page_ptr + j;
}
}
}
return mpz_free_arr[--mpz_free_num];
}
void _fmpz_clear_mpz(fmpz f)
{
mpz_ptr ptr = COEFF_TO_PTR(f);
FLINT_ASSERT(ptr->_mp_alloc >= MPZ_MIN_ALLOC);
fmpz_block_header_s * header_ptr = (fmpz_block_header_s *)((slong) ptr & flint_page_mask);
header_ptr = (fmpz_block_header_s *) header_ptr->address;
#if FLINT_USES_PTHREAD
if (header_ptr->count != 0 || !pthread_equal(header_ptr->thread, pthread_self()))
#else
if (header_ptr->count != 0)
#endif
{
int new_count;
mpz_clear(ptr);
#if FLINT_USES_PTHREAD
new_count = atomic_fetch_add(&(header_ptr->count), 1) + 1;
#else
new_count = ++header_ptr->count;
#endif
if (new_count == flint_mpz_structs_per_block)
flint_free(header_ptr);
} else
{
if (ptr->_mp_alloc > FLINT_MPZ_MAX_CACHE_LIMBS)
mpz_realloc(ptr, MPZ_MIN_ALLOC);
if (mpz_free_num == mpz_free_alloc)
{
mpz_free_alloc = FLINT_MAX(MPZ_BLOCK, 2 * mpz_free_alloc);
mpz_free_arr = flint_realloc(mpz_free_arr, mpz_free_alloc * sizeof(mpz_ptr));
}
mpz_free_arr[mpz_free_num++] = ptr;
}
}
void _fmpz_cleanup_mpz_content(void)
{
ulong i;
for (i = 0; i < mpz_free_num; i++)
{
int new_count;
fmpz_block_header_s * ptr;
mpz_clear(mpz_free_arr[i]);
ptr = (fmpz_block_header_s *)((slong) mpz_free_arr[i] & ~(flint_page_size - 1));
ptr = (fmpz_block_header_s *) ptr->address;
#if FLINT_USES_PTHREAD
new_count = atomic_fetch_add(&(ptr->count), 1) + 1;
#else
new_count = ++ptr->count;
#endif
if (new_count == flint_mpz_structs_per_block)
flint_free(ptr);
}
mpz_free_num = mpz_free_alloc = 0;
}
void _fmpz_cleanup(void)
{
_fmpz_cleanup_mpz_content();
flint_free(mpz_free_arr);
mpz_free_arr = NULL;
}
mpz_ptr _fmpz_promote(fmpz_t f)
{
if (!COEFF_IS_MPZ(*f))
{
mpz_ptr mf = _fmpz_new_mpz();
(*f) = PTR_TO_COEFF(mf);
return mf;
}
else
return COEFF_TO_PTR(*f);
}
mpz_ptr _fmpz_promote_val(fmpz_t f)
{
fmpz c = (*f);
if (!COEFF_IS_MPZ(c))
{
mpz_ptr mf = _fmpz_new_mpz();
(*f) = PTR_TO_COEFF(mf);
flint_mpz_set_si(mf, c);
return mf;
}
else
return COEFF_TO_PTR(c);
}
void _fmpz_demote_val(fmpz_t f)
{
mpz_ptr mf = COEFF_TO_PTR(*f);
int size = mf->_mp_size;
if (size == 1 || size == -1)
{
ulong uval = mf->_mp_d[0];
if (uval <= (ulong) COEFF_MAX)
{
_fmpz_clear_mpz(*f);
*f = size * (fmpz) uval;
}
}
else if (size == 0)
{
_fmpz_clear_mpz(*f);
*f = 0;
}
}
void _fmpz_init_readonly_mpz(fmpz_t f, const mpz_t z)
{
mpz_ptr ptr;
*f = WORD(0);
ptr = _fmpz_promote(f);
mpz_clear(ptr);
*ptr = *z;
}
void _fmpz_clear_readonly_mpz(mpz_t z)
{
int size = z->_mp_size;
if (size == 0 || ((size == 1 || size == -1) && (z->_mp_d[0] <= COEFF_MAX)))
mpz_clear(z);
}