use std::fs;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::time::{Duration, Instant};
use arroy::distances::DotProduct;
use arroy::{Database, Writer};
use clap::Parser;
use heed::{EnvFlags, EnvOpenOptions};
use rand::rngs::StdRng;
use rand::SeedableRng;
const DEFAULT_MAP_SIZE: usize = 1024 * 1024 * 1024 * 200;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli {
#[arg(default_value = "import.ary")]
database: PathBuf,
#[arg(long, default_value_t = DEFAULT_MAP_SIZE)]
map_size: usize,
#[arg(long, default_value_t = 768)]
dimensions: usize,
#[arg(long)]
write_map: bool,
#[arg(long)]
no_append: bool,
#[arg(long)]
n_trees: Option<usize>,
#[arg(long, default_value_t = 42)]
seed: u64,
}
fn main() -> Result<(), heed::BoxedError> {
env_logger::init();
let Cli { database, map_size, dimensions, write_map, no_append, n_trees, seed } = Cli::parse();
let mut rng = StdRng::seed_from_u64(seed);
let reader = BufReader::new(std::io::stdin());
let _ = fs::create_dir_all(&database);
let flags = if write_map { EnvFlags::WRITE_MAP } else { EnvFlags::empty() };
let mut env_builder = EnvOpenOptions::new();
env_builder.map_size(map_size);
unsafe { env_builder.flags(flags) };
let env = unsafe { env_builder.open(&database) }.unwrap();
let mut wtxn = env.write_txn().unwrap();
let database: Database<DotProduct> = env.create_database(&mut wtxn, None)?;
let writer = Writer::<DotProduct>::new(database, 0, dimensions);
let now = Instant::now();
let mut insertion_time = Duration::default();
let mut count = 0;
for line in reader.lines() {
let line = line?;
if line.starts_with("===") {
continue;
}
let (id, vector) = line.split_once(',').expect(&line);
let id: u32 = id.parse()?;
let vector: Vec<_> = vector
.trim_matches(|c: char| c.is_whitespace() || c == '[' || c == ']')
.split(',')
.map(|s| s.trim().parse::<f32>().unwrap())
.collect();
let now = Instant::now();
if no_append {
writer.add_item(&mut wtxn, id, &vector)?;
} else {
writer.append_item(&mut wtxn, id, &vector)?;
}
insertion_time += now.elapsed();
count += 1;
}
println!("Took {:.2?} to parse and insert into arroy", now.elapsed() - insertion_time);
println!("Took {insertion_time:.2?} insert into arroy");
println!("There are {count} vectors");
println!();
println!("Building the arroy internal trees...");
let now = Instant::now();
let mut builder = writer.builder(&mut rng);
if let Some(n_trees) = n_trees {
builder.n_trees(n_trees);
}
builder.build(&mut wtxn)?;
wtxn.commit().unwrap();
println!("Took {:.2?} to build", now.elapsed());
Ok(())
}