#include <iostream>
#include <string>
#include <vector>
#include <memory>
#include <cassert>
#include <thread>
#include "bm.h"
#include "bmstrsparsevec.h"
#include "bmsparsevec_algo.h"
#include "bmtimer.h"
#include "bmdbg.h"
#include "bmundef.h"
using namespace std;
typedef bm::bvector<> bvector_type;
typedef bm::str_sparse_vector<char, bvector_type, 8> str_sv_type;
static
void GenerateTestData(std::vector<string>& str_coll,
str_sv_type& str_sv,
unsigned max_coll = 8000000,
unsigned repeat_factor=10)
{
auto bi(str_sv.get_back_inserter());
string str;
for (unsigned i = 10; i < max_coll; i+= (rand()&0xF))
{
switch (i & 0xF)
{
case 0: str = "AB"; break;
case 1: str = "GTx"; break;
case 2: str = "cnv"; break;
default: str = "AbY11"; break;
}
str.append(to_string(i));
for (unsigned k = 0; k < repeat_factor; ++k)
{
str_coll.emplace_back(str);
bi = str; }
} bi.flush();
}
bool is_diag = true;
static
void parse_args(int argc, char *argv[])
{
for (int i = 1; i < argc; ++i)
{
std::string arg = argv[i];
if (arg == "-nodiag")
{
is_diag = false;
continue;
}
} }
int main(int argc, char *argv[])
{
try
{
parse_args(argc, argv);
std::vector<string> str_coll;
str_sv_type str_sv0;
cout << "Generating the test data... " << flush;
GenerateTestData(str_coll, str_sv0);
str_sv_type str_sv1(str_sv0);
cout << "OK" << endl;
{
cout << "Remapping the data to create compressed vector " << flush;
BM_DECLARE_TEMP_BLOCK(tb)
str_sv0.remap();
str_sv0.optimize(tb);
cout << "OK" << endl;
}
if (is_diag)
{
cout << "\nStatistics on generated SV:" << endl;
bm::print_svector_stat(str_sv1);
cout << "\nStatistics on remapped/optimized SV:" << endl;
bm::print_svector_stat(str_sv0);
cout << endl << endl;
}
unsigned test_runs = 10000;
std::vector<string> str_test_coll;
for (bvector_type::size_type i = 0; i < test_runs; ++i)
{
bvector_type::size_type idx = (unsigned) rand() % test_runs;
if (idx >= test_runs)
idx = test_runs/2;
str_test_coll.push_back(str_coll[idx]);
}
assert(str_test_coll.size() == test_runs);
std::vector<unique_ptr<bvector_type> > res_vec1;
bm::sparse_vector_scanner<str_sv_type> scanner;
cout << "Running benchmark tests.." << endl;
for (int pass = 0; pass < 2; pass++)
{
cout << "PASS = " << pass << ((pass==0) ? " -- remap/optimized" : " -- NOT remapped") << endl;
res_vec1.resize(0);
const str_sv_type* str_sv = (pass==0) ? &str_sv0 : &str_sv1;
{
bm::chrono_taker tt("scanner<>::find_eq_str()", test_runs);
for (bvector_type::size_type i = 0; i < test_runs; ++i)
{
const string& str = str_test_coll[i];
bvector_type* bv_res(new bvector_type);
scanner.find_eq_str(*str_sv, str.c_str(), *bv_res);
res_vec1.emplace_back(unique_ptr<bvector_type>(bv_res));
} }
bm::sparse_vector_scanner<str_sv_type>::pipeline<> pipe1(*str_sv);
pipe1.options().batch_size = test_runs;
{
bm::chrono_taker tt("scanner::pipeline find_eq_str()", test_runs);
for (size_t i = 0; i < test_runs; ++i)
{
const string& str = str_test_coll[i];
pipe1.add(str.c_str());
}
pipe1.complete();
scanner.find_eq_str(pipe1);
}
bm::sparse_vector_scanner<str_sv_type>::pipeline<bm::agg_opt_only_counts> pipe2(*str_sv);
pipe1.options().batch_size = test_runs;
{
bm::chrono_taker tt("scanner::pipeline find_eq_str()-count()", test_runs);
for (size_t i = 0; i < test_runs; ++i)
{
const string& str = str_test_coll[i];
pipe2.add(str.c_str());
}
pipe2.complete();
scanner.find_eq_str(pipe2);
}
bvector_type bv_or_total;
{
auto& res_vect = pipe1.get_bv_res_vector(); auto& cnt_vect = pipe2.get_bv_count_vector();
assert(res_vect.size() == cnt_vect.size());
size_t res_sz = res_vect.size();
for (size_t i = 0; i < res_sz; ++i)
{
const bvector_type* bv1 = res_vec1[i].get();
const bvector_type* bv = res_vect[i];
assert(bv);
bool match = bv1->equal(*bv); assert(match);
auto c = cnt_vect[i];
auto cnt = bv->count();
(void)cnt; (void)c; assert(cnt == c);
bv_or_total |= *bv; }
}
typedef bm::agg_run_options<false, false> scanner_custom_opt;
bm::sparse_vector_scanner<str_sv_type>::pipeline<scanner_custom_opt> pipe3(*str_sv);
pipe1.options().batch_size = test_runs;
bvector_type bv_or;
pipe3.set_or_target(&bv_or);
{
bm::chrono_taker tt("scanner::pipeline find_eq_str()-OR()", test_runs);
for (size_t i = 0; i < test_runs; ++i)
{
const string& str = str_test_coll[i];
pipe3.add(str.c_str());
}
pipe3.complete();
scanner.find_eq_str(pipe3); }
bool match = bv_or.equal(bv_or_total);
if (!match)
{
cerr << "OR vector mismatch!" << endl;
exit(1);
}
cout << endl;
}
}
catch(std::exception& ex)
{
std::cerr << ex.what() << std::endl;
return 1;
}
return 0;
}