#include <getopt.h>
#include "common.h"
#include "codecfactory.h"
#include "maropuparser.h"
#include "util.h"
#include "ztimer.h"
#include "cpubenchmark.h"
#include "deltautil.h"
#include "stringutil.h"
using namespace std;
using namespace FastPFor;
static struct option long_options[] = {
{ "codecs", required_argument, 0, 'c' },
{ "minlength", required_argument, 0, 'm' },
{ "maxlength", required_argument, 0, 'M' },
{ "nb", required_argument, 0, 'n' },
{ "splitlongarrays", no_argument, 0, 'S' },
{ "separatedeltatimes", no_argument, 0, 'D' },
{ 0, 0, 0, 0 } };
void message(const char * prog) {
cerr << " usage : " << prog << " scheme maropubinaryfile " << endl;
cerr << "By default, it assumes that the original data is made of "
"sorted distinct integers." << endl;
cerr << "Use the --codecs flag to specify the schemes." << endl;
cerr << " schemes include:" << endl;
vector < string > all = CODECFactory::allNames();
for (auto i = all.begin(); i != all.end(); ++i) {
cerr << *i << endl;
}
}
int main(int argc, char **argv) {
if (argc < 2) {
message(argv[0]);
return -1;
}
bool splitlongarrays = true;
bool separatedeltatimes = false;
size_t MINLENGTH = 1;
size_t MAXLENGTH = std::numeric_limits<uint32_t>::max();
size_t MAXCOUNTER = std::numeric_limits<std::size_t>::max();
vector < shared_ptr<IntegerCODEC> > tmp = CODECFactory::allSchemes(); vector<algostats> myalgos;
for (auto i = tmp.begin(); i != tmp.end(); ++i) {
myalgos.push_back(algostats(*i));
myalgos.push_back(algostats(*i, true)); }
int c;
while (1) {
int option_index = 0;
c = getopt_long(argc, argv, "DSc:n:m:M:", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'D' :
cout<<"# Outputting separate delta and inverseDelta times."<<endl;
separatedeltatimes = true;
break;
case 'S' :
cout<<"#\n# disabling partition of big arrays. Performance may suffer.\n#"<<endl;
splitlongarrays = false;
break;
case 'm' :
istringstream ( optarg ) >> MINLENGTH;
cout<<"# MINLENGTH = "<<MINLENGTH<<endl;
break;
case 'M' :
istringstream ( optarg ) >> MAXLENGTH;
cout<<"# MAXLENGTH = "<<MAXLENGTH<<endl;
break;
case 'n' :
istringstream ( optarg ) >> MAXCOUNTER;
cout<<"# MAXCOUNTER = "<< MAXCOUNTER << endl;
break;
case 'c':
{ myalgos.clear();
string codecsstr(optarg);
if (codecsstr.compare("NONE") != 0) {
vector < string > codecslst = split(codecsstr, ",:;");
for (auto i = codecslst.begin(); i != codecslst.end(); ++i) {
cout << "# pretty name = " << *i << endl;
if(i->at(0) == '@') { string namewithoutprefix = i->substr(1,i->size()-1);
myalgos.push_back(
algostats(CODECFactory::getFromName(namewithoutprefix),true));
} else {
myalgos.push_back(
algostats(CODECFactory::getFromName(*i)));
}
cout << "# added '" << myalgos.back().name() << "'" << endl;
}
}
}
break;
default:
cerr << "unrecognized flag" << endl;
break;
}
}
if (argc - optind < 1) {
cerr << " you must specify a file " << endl;
return -1;
}
string filename = argv[optind];
cout << "# parsing " << filename << endl;
MaropuGapReader reader(filename);
vector<uint32_t, cacheallocator> rawdata;
reader.open();
size_t counter = 0;
const size_t MAXBLOCKSIZE = 104857600; while (counter < MAXCOUNTER) {
vector < vector<uint32_t, cacheallocator> > datas;
size_t datastotalsize = 0;
cout<<"# Filling up a block "<<endl;
while (reader.loadIntegers(rawdata)) {
if ((rawdata.size() < MINLENGTH) || (rawdata.size() > MAXLENGTH))
continue;
++counter;
datastotalsize += rawdata.size();
datas.push_back(rawdata);
if (counter >= MAXCOUNTER) {
cout << "#breaking early" << endl;
break;
}
if (datastotalsize >= MAXBLOCKSIZE)
break;
}
if(datastotalsize == 0) break;
cout<<"# read "<< std::setprecision(3) << static_cast<double>(datastotalsize) * 4.0 / (1024.0 * 1024.0) << " MB, " << datas.size() << " arrays."<<endl;
cout<<"# processing block"<<endl;
if(splitlongarrays) splitLongArrays(datas);
processparameters pp(true, false, false, false, true, separatedeltatimes);
Delta::process(myalgos, datas, pp); }
reader.close();
cout<<"# build summary..."<<endl;
summarize(myalgos);
}