#include <iostream>
#include <chrono>
#include <time.h>
#include <stdio.h>
#ifdef _MSC_VER
#pragma warning( push )
#pragma warning( disable : 4996)
#endif
#include <vector>
#include <chrono>
#include <map>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmrandom.h"
#include "bmsparsevec.h"
#include "bmtimer.h"
void show_help()
{
std::cerr
<< "BitMagic benchmark (analytical) (c) 2017." << std::endl
<< std::endl
;
}
bool is_timing = false;
unsigned nations_cnt = 200;
unsigned nations_top_cnt = 10;
unsigned suppliers_cnt = 100000;
unsigned customers_cnt = 1500000;
unsigned orders_cnt = customers_cnt * 5;
int parse_args(int argc, char *argv[])
{
for (int i = 1; i < argc; ++i)
{
std::string arg = argv[i];
if ((arg == "-h") || (arg == "--help"))
{
show_help();
return 0;
}
} return 0;
}
typedef bm::bvector<> TBVector;
typedef std::map<unsigned, TBVector> TIDMap;
typedef std::map<uint64_t, TBVector> TID64Map;
typedef std::map<unsigned, std::vector<char> > TIDSMap;
typedef std::map<uint64_t, std::vector<char> > TID64SMap;
bm::chrono_taker::duration_map_type timing_map;
struct Suppliers
{
Suppliers()
{
suppliers_total_bv = new TBVector(bm::BM_GAP);
}
~Suppliers()
{
delete suppliers_total_bv;
}
TBVector* suppliers_total_bv;
TIDMap suppliers_nations_bvmap; };
struct Customer
{
Customer()
{
customers_total_bv = new TBVector(bm::BM_GAP);
}
~Customer()
{
delete customers_total_bv;
}
TBVector* customers_total_bv;
TIDMap customers_nations_bvmap; };
struct Order
{
Order()
{
orders_total_bv = new TBVector(bm::BM_GAP);
}
~Order()
{
delete orders_total_bv;
}
TBVector* orders_total_bv;
TIDSMap orders_customer_smap; };
struct LineItem
{
LineItem()
{
lineitem_total_bv = new TBVector(bm::BM_GAP);
}
~LineItem()
{
delete lineitem_total_bv;
}
TBVector* lineitem_total_bv;
TID64Map lineitem_shipdate_bvmap; TID64SMap lineitem_shipdate_smap;
TIDMap lineitem_supplier_bvmap; TIDSMap lineitem_supplier_smap;
TIDMap lineitem_order_bvmap; TIDSMap lineitem_order_smap; };
template<typename TM>
void OptimizeIDMap(TM& id_map, bool opt_gap = false)
{
for (typename TM::iterator it = id_map.begin();
it != id_map.end();
++it)
{
it->second.optimize();
if (opt_gap)
{
it->second.optimize_gap_size();
}
} }
static
size_t ComputeIndexSize(const TIDSMap& sm)
{
size_t s = 0;
for (TIDSMap::const_iterator it = sm.begin();
it != sm.end();
++it)
{
const std::vector<char>& v = it->second;
if (v.size()==0)
{
std::cerr << "Empty vector found!" << std::endl;
exit(1);
}
s += v.size();
} return s;
}
static
void SerializeBVector(bm::serializer<TBVector>& bvs,
TBVector& bv,
std::vector<char>& temp_buf_vect,
std::vector<char>& buf_vect
)
{
BM_DECLARE_TEMP_BLOCK(tb)
bm::bvector<>::statistics st;
bv.optimize(tb, bm::bvector<>::opt_compress, &st);
temp_buf_vect.resize(st.max_serialize_mem);
unsigned len = bvs.serialize(bv,
(unsigned char*)&temp_buf_vect[0],
st.max_serialize_mem);
buf_vect.resize(len);
::memcpy(&buf_vect[0], &temp_buf_vect[0], len);
#ifdef DEBUG
TBVector bv1;
bm::deserialize(bv1, (unsigned char*)&buf_vect[0]);
if (bv.compare(bv1)!=0)
{
std::cerr << "Deserialization check failed!" << std::endl;
exit(1);
}
#endif
}
static
void SerializeORBVector(bm::serializer<TBVector>& bvs,
TBVector& bv,
std::vector<char>& temp_buf_vect,
std::vector<char>& buf_vect,
const std::vector<char>& src_vect
)
{
if (src_vect.size() != 0)
bm::deserialize(bv, (unsigned char*)&src_vect[0]);
SerializeBVector(bvs, bv, temp_buf_vect, buf_vect);
}
template<typename TM1, typename TM2>
void SerializeMergeIDMap(bm::serializer<TBVector>& bvs,
std::vector<char>& temp_buf_vect,
TM1& id_map,
TM2& id_smap)
{
for (typename TM1::iterator it = id_map.begin();
it != id_map.end();
++it)
{
TBVector &bv = it->second;
typename TM1::key_type id = it->first;
std::vector<char>& buf_vect = id_smap[id];
SerializeORBVector(bvs, bv, temp_buf_vect, buf_vect, buf_vect);
}
id_map.clear();
}
void GenerateSuppliersIdx(Suppliers& sup)
{
bm::random_subset<TBVector> rsub; unsigned i;
TBVector& bv_supp = *sup.suppliers_total_bv;
bv_supp.set_range(0, suppliers_cnt-1, true);
TBVector supp50p_bv;
rsub.sample(supp50p_bv, bv_supp, suppliers_cnt / 2);
unsigned big10_cnt = supp50p_bv.count() / 10;
TBVector assigned_supp_bv;
for (i = 0; i < nations_top_cnt; ++i)
{
unsigned nation_id = i;
{
TBVector supp50_10_bv;
rsub.sample(supp50_10_bv, supp50p_bv, big10_cnt);
assigned_supp_bv |= supp50_10_bv;
supp50p_bv -= supp50_10_bv;
sup.suppliers_nations_bvmap[nation_id] = supp50_10_bv;
}
if (!supp50p_bv.any())
{
break;
}
}
std::cout << "Assigned suppliers to Big10:" << assigned_supp_bv.count()
<< std::endl;
TBVector un_assigned_supp_bv = bv_supp - assigned_supp_bv;
unsigned minor_nations_cnt = nations_cnt - nations_top_cnt;
unsigned minor_nation_sample = un_assigned_supp_bv.count() / minor_nations_cnt;
std::cout << "Suppliers per minor nation: " << minor_nation_sample << std::endl;
for (i = nations_top_cnt; i < nations_cnt; ++i)
{
unsigned nation_id = i;
{
TBVector minor_supp_bv;
rsub.sample(minor_supp_bv, un_assigned_supp_bv, minor_nation_sample);
un_assigned_supp_bv -= minor_supp_bv;
sup.suppliers_nations_bvmap[nation_id] = minor_supp_bv;
}
if (!un_assigned_supp_bv.any())
{
break;
}
}
if (un_assigned_supp_bv.any()) {
sup.suppliers_nations_bvmap[rand()%nations_cnt] |= un_assigned_supp_bv;
}
std::cout << "Nations suppliers index size = " << sup.suppliers_nations_bvmap.size()
<< std::endl;
OptimizeIDMap(sup.suppliers_nations_bvmap);
}
void GenerateCustomersIdx(Customer& cust)
{
bm::random_subset<TBVector> rsub; unsigned i;
TBVector& bv_cust = *cust.customers_total_bv;
bv_cust.set_range(0, customers_cnt-1, true);
TBVector cust50p_bv;
rsub.sample(cust50p_bv, bv_cust, customers_cnt / 2);
unsigned big10_cnt = cust50p_bv.count() / 10;
TBVector assigned_cust_bv;
for (i = 0; i < nations_top_cnt; ++i)
{
unsigned nation_id = i;
{
TBVector cust50_10_bv;
rsub.sample(cust50_10_bv, cust50p_bv, big10_cnt);
assigned_cust_bv |= cust50_10_bv;
cust50p_bv -= cust50_10_bv;
cust.customers_nations_bvmap[nation_id] = cust50_10_bv;
}
if (!cust50p_bv.any())
{
break;
}
}
std::cout << "Assigned customers to Big10:" << assigned_cust_bv.count()
<< std::endl;
TBVector un_assigned_cust_bv = bv_cust - assigned_cust_bv;
unsigned minor_nations_cnt = nations_cnt - nations_top_cnt;
unsigned minor_nation_sample = un_assigned_cust_bv.count() / minor_nations_cnt;
std::cout << "Customers per minor nation: " << minor_nation_sample << std::endl;
for (i = nations_top_cnt; i < nations_cnt; ++i)
{
unsigned nation_id = i;
{
TBVector minor_cust_bv;
rsub.sample(minor_cust_bv, un_assigned_cust_bv, minor_nation_sample);
un_assigned_cust_bv -= minor_cust_bv;
cust.customers_nations_bvmap[nation_id] = minor_cust_bv;
}
if (!un_assigned_cust_bv.any())
{
break;
}
}
if (un_assigned_cust_bv.any()) {
cust.customers_nations_bvmap[rand()%nations_cnt] |= un_assigned_cust_bv;
}
std::cout << "Nations customers index size = " << cust.customers_nations_bvmap.size()
<< std::endl;
OptimizeIDMap(cust.customers_nations_bvmap);
}
void GenerateOrdersIdx(Order& ord, const Customer& cust)
{
bm::serializer<bm::bvector<> > bvs;
bvs.byte_order_serialization(false);
bvs.gap_length_serialization(false);
bvs.set_compression_level(4);
std::vector<char> temp_buf_vect;
std::vector<char> buf_vect;
bm::random_subset<TBVector> rsub; unsigned i;
TBVector& bv_ord = *ord.orders_total_bv;
bv_ord.set_range(0, orders_cnt-1, true);
unsigned orders_per_cust = orders_cnt / customers_cnt;
TBVector ord_bv(bm::BM_GAP, bm::gap_len_table_min<true>::_len, orders_cnt + 65535);
TBVector undistr_ord_bv(bv_ord);
for (i = 0; i < customers_cnt; ++i)
{
unsigned cust_id = i;
{
ord_bv.clear();
rsub.sample(ord_bv, undistr_ord_bv, orders_per_cust);
undistr_ord_bv -= ord_bv;
std::vector<char> buf_vect;
SerializeBVector(bvs, ord_bv, temp_buf_vect, buf_vect);
ord.orders_customer_smap[cust_id] = buf_vect;
#ifdef DEBUG
std::vector<char>& bufv = ord.orders_customer_smap[cust_id];
if (bufv.size() != buf_vect.size())
{
std::cout << "Problem!" << std::endl;
exit(1);
}
TBVector bv1;
bm::deserialize(bv1, (unsigned char*)&bufv[0]);
if (bv1.compare(ord_bv)!=0)
{
std::cerr << "Deserialization check failed!" << std::endl;
exit(1);
}
#endif
}
if ((i % 10000)==0)
{
std::cout << "\r" << i << "/" << customers_cnt << " " << std::flush;
}
}
if (undistr_ord_bv.any()) {
}
std::cout << std::endl;
std::cout << "Orders count = " << ord.orders_total_bv->count() << std::endl;
std::cout << "Orders customers index size = " << ord.orders_customer_smap.size()
<< std::endl;
size_t buf_sum = ComputeIndexSize(ord.orders_customer_smap);
std::cout << "Orders customers index mem.size = " << buf_sum
<< std::endl;
}
void GenerateLineItemIdx(LineItem& litem, const Order& ord, const Customer& cust)
{
bm::random_subset<TBVector> rsub;
bm::serializer<bm::bvector<> > bvs; bvs.byte_order_serialization(false);
bvs.gap_length_serialization(false);
bvs.set_compression_level(4);
std::vector<char> temp_buf_vect;
unsigned i, j;
TBVector& bv_litem = *litem.lineitem_total_bv;
unsigned li_id = 0;
unsigned year_from = 1994;
for (i = 0; i < orders_cnt; ++i)
{
unsigned order_id = i;
unsigned li_cnt = rand() % 7;
if (li_cnt == 0)
li_cnt = 1;
uint64_t order_year = year_from + (rand()%10);
TBVector& bv_order = litem.lineitem_order_bvmap[order_id];
if (li_cnt < 4) {
unsigned li_day = rand() % 365;
unsigned supp_id = rand() % suppliers_cnt;
uint64_t li_date = (order_year << 32) | li_day;
TBVector& bv_date = litem.lineitem_shipdate_bvmap[li_date];
TBVector& bv_supp = litem.lineitem_supplier_bvmap[supp_id];
for (j = 0; j < li_cnt; ++j)
{
bv_date[li_id] = true;
bv_supp[li_id] = true;
bv_litem[li_id] = true;
bv_order[li_id] = true;
++li_id;
}
}
else
{
unsigned li_day = rand() % 365;
unsigned supp_id = rand() % suppliers_cnt;
for (j = 0; j < li_cnt; ++j)
{
uint64_t li_date = (order_year << 32) | li_day;
TBVector& bv_date = litem.lineitem_shipdate_bvmap[li_date];
TBVector& bv_supp = litem.lineitem_supplier_bvmap[supp_id];
bv_date[li_id] = true;
bv_supp[li_id] = true;
bv_litem[li_id] = true;
bv_order[li_id] = true;
if (rand()%3 == 0)
{
++li_day;
supp_id = rand() % suppliers_cnt;
}
if (li_day > 365) { li_day = 1; ++order_year; }
++li_id;
} }
if ((i % 200000) == 0)
{
std::cout << "\r" << i << "[OPT]" << std::flush;
SerializeMergeIDMap(bvs,
temp_buf_vect,
litem.lineitem_shipdate_bvmap,
litem.lineitem_shipdate_smap);
SerializeMergeIDMap(bvs,
temp_buf_vect,
litem.lineitem_supplier_bvmap,
litem.lineitem_supplier_smap);
SerializeMergeIDMap(bvs,
temp_buf_vect,
litem.lineitem_order_bvmap,
litem.lineitem_order_smap);
std::cout << "\r" << i << " " << std::flush;
}
} std::cout << std::endl;
SerializeMergeIDMap(bvs,
temp_buf_vect,
litem.lineitem_shipdate_bvmap,
litem.lineitem_shipdate_smap);
SerializeMergeIDMap(bvs,
temp_buf_vect,
litem.lineitem_supplier_bvmap,
litem.lineitem_supplier_smap);
SerializeMergeIDMap(bvs,
temp_buf_vect,
litem.lineitem_order_bvmap,
litem.lineitem_order_smap);
std::cout
<< "Lineitems count = " << litem.lineitem_total_bv->count() << std::endl
<< "Lineitems shipdate index size = " << litem.lineitem_shipdate_smap.size() << std::endl
<< "Lineitems supplier index size = " << litem.lineitem_supplier_smap.size() << std::endl
<< "Lineitems order index size = " << litem.lineitem_order_smap.size()
<< std::endl;
}
int main(int argc, char *argv[])
{
Suppliers supp;
Customer cust;
Order ord;
LineItem lineitem;
try
{
GenerateSuppliersIdx(supp);
GenerateCustomersIdx(cust);
GenerateOrdersIdx(ord, cust);
GenerateLineItemIdx(lineitem, ord, cust);
getchar();
if (is_timing) {
std::cout << std::endl << "Timings (ms):" << std::endl;
bm::chrono_taker::print_duration_map(timing_map);
}
}
catch (std::exception& ex)
{
std::cerr << "Error:" << ex.what() << std::endl;
return 1;
}
return 0;
}
#ifdef _MSC_VER
#pragma warning( pop )
#endif