use std::env;
use std::fs;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use std::process;
use std::time::Instant;
use caps_sa::{ExtMemOpts, build_ext_mem, build_in_memory, build_in_memory_sample_sort};
struct Args {
input: PathBuf,
output: PathBuf,
ext_mem: bool,
in_mem_ss: bool,
subproblem_count: usize,
threads: Option<usize>,
}
fn parse_args() -> Args {
let argv: Vec<String> = env::args().collect();
let mut positional: Vec<String> = Vec::new();
let mut ext_mem = false;
let mut in_mem_ss = false;
let mut subproblem_count: usize = 0;
let mut threads: Option<usize> = None;
let mut i = 1;
while i < argv.len() {
match argv[i].as_str() {
"--ext-mem" => {
ext_mem = true;
i += 1;
}
"--in-mem-ss" => {
in_mem_ss = true;
i += 1;
}
"--subproblem-count" => {
subproblem_count = argv[i + 1]
.parse()
.expect("--subproblem-count expects a positive integer");
i += 2;
}
"--threads" => {
threads = Some(
argv[i + 1]
.parse()
.expect("--threads expects a positive integer"),
);
i += 2;
}
"--help" | "-h" => {
eprintln!(
"usage: caps_sa <input> <output> [--ext-mem | --in-mem-ss] \
[--subproblem-count N] [--threads N]"
);
process::exit(0);
}
_ => {
positional.push(argv[i].clone());
i += 1;
}
}
}
if positional.len() != 2 {
eprintln!(
"error: expected 2 positional args (input, output), got {}",
positional.len()
);
process::exit(2);
}
Args {
input: PathBuf::from(&positional[0]),
output: PathBuf::from(&positional[1]),
ext_mem,
in_mem_ss,
subproblem_count,
threads,
}
}
fn main() -> std::io::Result<()> {
let args = parse_args();
if let Some(t) = args.threads {
rayon::ThreadPoolBuilder::new()
.num_threads(t)
.build_global()
.expect("failed to configure rayon thread pool");
}
let read_start = Instant::now();
let text = fs::read(&args.input)?;
let read_elapsed = read_start.elapsed();
eprintln!(
"read: n={} bytes in {:.3}s",
text.len(),
read_elapsed.as_secs_f64()
);
let use_u32 = text.len() <= (u32::MAX as usize) + 1 && !args.ext_mem && !args.in_mem_ss;
let n_entries: usize;
let build_elapsed;
let write_elapsed;
if use_u32 {
let build_start = Instant::now();
let sa: Vec<u32> = build_in_memory(&text);
build_elapsed = build_start.elapsed();
n_entries = sa.len();
eprintln!(
"build: mode=in-mem(u32) n={n_entries} entries in {:.3}s",
build_elapsed.as_secs_f64()
);
let write_start = Instant::now();
let mut writer = BufWriter::new(fs::File::create(&args.output)?);
let mut buf = [0u8; 4];
for v in &sa {
buf.copy_from_slice(&v.to_le_bytes());
writer.write_all(&buf)?;
}
writer.flush()?;
write_elapsed = write_start.elapsed();
} else if args.ext_mem || args.in_mem_ss {
let opts = ExtMemOpts {
subproblem_count: args.subproblem_count,
..ExtMemOpts::default()
};
let writer = std::cell::RefCell::new(BufWriter::new(fs::File::create(&args.output)?));
let mut count = 0usize;
let mode_label = if args.ext_mem { "ext-mem" } else { "in-mem-ss" };
let build_start = Instant::now();
if args.ext_mem {
build_ext_mem(&text, &opts, |pos| {
count += 1;
writer.borrow_mut().write_all(&pos.to_le_bytes())?;
Ok(())
})?;
} else {
build_in_memory_sample_sort(&text, &opts, |pos| {
count += 1;
writer.borrow_mut().write_all(&pos.to_le_bytes())?;
Ok(())
})?;
}
build_elapsed = build_start.elapsed();
writer.borrow_mut().flush()?;
n_entries = count;
eprintln!(
"build: mode={mode_label}(u64,stream) n={n_entries} entries in {:.3}s",
build_elapsed.as_secs_f64()
);
write_elapsed = std::time::Duration::ZERO;
} else {
let _ = args.subproblem_count;
let build_start = Instant::now();
let sa: Vec<u64> = build_in_memory(&text);
build_elapsed = build_start.elapsed();
n_entries = sa.len();
eprintln!(
"build: mode=in-mem(u64) n={n_entries} entries in {:.3}s",
build_elapsed.as_secs_f64()
);
let write_start = Instant::now();
let mut writer = BufWriter::new(fs::File::create(&args.output)?);
let mut buf = [0u8; 8];
for v in &sa {
buf.copy_from_slice(&v.to_le_bytes());
writer.write_all(&buf)?;
}
writer.flush()?;
write_elapsed = write_start.elapsed();
}
eprintln!("write: {:.3}s", write_elapsed.as_secs_f64());
Ok(())
}