use std::{fmt::Debug, time::Instant};
use clap::Parser;
use half::f16;
use kannolo::sparse_plain_quantizer::SparsePlainQuantizer;
use std::process;
use kannolo::SparseDataset;
use kannolo::{
hnsw::graph_index::GraphIndex, hnsw_utils::config_hnsw::ConfigHnsw, Dataset, DistanceType,
IndexSerializer,
};
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
#[clap(short, long, value_parser)]
data_file: String,
#[clap(short, long, value_parser)]
output_file: String,
#[clap(long, value_parser)]
#[arg(default_value_t = 16)]
m: usize,
#[clap(long, value_parser)]
#[arg(default_value_t = 40)]
efc: usize,
#[clap(long, value_parser)]
#[arg(default_value_t = String::from("ip"))]
metric: String,
}
fn main() {
let args: Args = Args::parse();
let data_path = args.data_file;
let num_neighbors = args.m;
let ef_construction = args.efc;
println!("Building Index with M: {num_neighbors}, ef_construction: {ef_construction}");
let distance = match args.metric.as_str() {
"l2" => DistanceType::Euclidean,
"ip" => DistanceType::DotProduct,
_ => {
eprintln!("Error: Invalid distance type. Choose between 'l2' and 'ip'.");
process::exit(1);
}
};
let num_threads_construction = rayon::current_num_threads();
let config = ConfigHnsw::new()
.num_neighbors(num_neighbors)
.ef_construction(ef_construction)
.build();
let dataset: SparseDataset<SparsePlainQuantizer<f16>> =
SparseDataset::<SparsePlainQuantizer<f16>>::read_bin_file_f16(data_path.as_str(), None)
.unwrap();
let quantizer = SparsePlainQuantizer::<f16>::new(dataset.dim(), distance);
let start_time = Instant::now();
let index: GraphIndex<'_, SparseDataset<SparsePlainQuantizer<f16>>, SparsePlainQuantizer<f16>> =
GraphIndex::from_dataset(&dataset, &config, quantizer, num_threads_construction);
let duration = start_time.elapsed();
println!("Time to build {} (before serializing)", duration.as_secs());
let _ = IndexSerializer::save_index(&args.output_file, &index);
}