1#![doc = include_str!("../README.md")]
10#![deny(unstable_features)]
11#![deny(trivial_casts)]
12#![deny(unconditional_recursion)]
13#![deny(clippy::empty_loop)]
14#![deny(unreachable_code)]
15#![deny(unreachable_pub)]
16#![deny(unreachable_patterns)]
17#![deny(unused_macro_rules)]
18#![deny(unused_doc_comments)]
19#![allow(clippy::type_complexity)]
20
21use anyhow::{Context, Result, anyhow, bail, ensure};
22use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
23use common_traits::{AsBytes, FromBytes, ToBytes, UnsignedInt};
24use dsi_bitstream::dispatch::Codes;
25use epserde::deser::Deserialize;
26use epserde::ser::Serialize;
27use std::io::{BufRead, BufReader, BufWriter, Read, Write};
28use std::path::{Path, PathBuf};
29use std::time::Duration;
30use std::time::SystemTime;
31use sux::bits::BitFieldVec;
32use webgraph::prelude::CompFlags;
33use webgraph::utils::{Granularity, MemoryUsage};
34
35macro_rules! SEQ_PROC_WARN {
36 () => {"Processing the graph sequentially: for parallel processing please build the Elias-Fano offsets list using 'webgraph build ef {}'"}
37}
38
39#[cfg(not(any(feature = "le_bins", feature = "be_bins")))]
40compile_error!("At least one of the features `le_bins` or `be_bins` must be enabled.");
41
42pub mod build_info {
43 include!(concat!(env!("OUT_DIR"), "/built.rs"));
44
45 pub fn version_string() -> String {
46 format!(
47 "{}
48git info: {} {} {}
49build info: built on {} for {} with {}",
50 PKG_VERSION,
51 GIT_VERSION.unwrap_or(""),
52 GIT_COMMIT_HASH.unwrap_or(""),
53 match GIT_DIRTY {
54 None => "",
55 Some(true) => "(dirty)",
56 Some(false) => "(clean)",
57 },
58 BUILD_DATE,
59 TARGET,
60 RUSTC_VERSION
61 )
62 }
63}
64
65#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
66pub enum PrivCode {
73 Unary,
74 Gamma,
75 Delta,
76 Zeta1,
77 Zeta2,
78 Zeta3,
79 Zeta4,
80 Zeta5,
81 Zeta6,
82 Zeta7,
83 Pi1,
84 Pi2,
85 Pi3,
86 Pi4,
87}
88
89impl From<PrivCode> for Codes {
90 fn from(value: PrivCode) -> Self {
91 match value {
92 PrivCode::Unary => Codes::Unary,
93 PrivCode::Gamma => Codes::Gamma,
94 PrivCode::Delta => Codes::Delta,
95 PrivCode::Zeta1 => Codes::Zeta(1),
96 PrivCode::Zeta2 => Codes::Zeta(2),
97 PrivCode::Zeta3 => Codes::Zeta(3),
98 PrivCode::Zeta4 => Codes::Zeta(4),
99 PrivCode::Zeta5 => Codes::Zeta(5),
100 PrivCode::Zeta6 => Codes::Zeta(6),
101 PrivCode::Zeta7 => Codes::Zeta(7),
102 PrivCode::Pi1 => Codes::Pi(1),
103 PrivCode::Pi2 => Codes::Pi(2),
104 PrivCode::Pi3 => Codes::Pi(3),
105 PrivCode::Pi4 => Codes::Pi(4),
106 }
107 }
108}
109
110#[derive(Args, Debug)]
111pub struct ArcsArgs {
113 #[arg(long, default_value_t = '#')]
114 pub line_comment_symbol: char,
116
117 #[arg(long, default_value_t = 0)]
118 pub lines_to_skip: usize,
120
121 #[arg(long)]
122 pub max_arcs: Option<usize>,
125
126 #[arg(long, default_value_t = '\t')]
127 pub separator: char,
129
130 #[arg(long, default_value_t = 0)]
131 pub source_column: usize,
133
134 #[arg(long, default_value_t = 1)]
135 pub target_column: usize,
137
138 #[arg(long, default_value_t = false)]
139 pub labels: bool,
141}
142
143pub fn num_threads_parser(arg: &str) -> Result<usize> {
148 let num_threads = arg.parse::<usize>()?;
149 ensure!(num_threads > 0, "Number of threads must be greater than 0");
150 Ok(num_threads)
151}
152
153#[derive(Args, Debug)]
155pub struct NumThreadsArg {
156 #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1), value_parser = num_threads_parser)]
157 pub num_threads: usize,
159}
160
161#[derive(Args, Debug)]
163pub struct GranularityArgs {
164 #[arg(long, conflicts_with("node_granularity"))]
165 pub arc_granularity: Option<u64>,
168
169 #[arg(long, conflicts_with("arc_granularity"))]
170 pub node_granularity: Option<usize>,
173}
174
175impl GranularityArgs {
176 pub fn into_granularity(&self) -> Granularity {
177 match (self.arc_granularity, self.node_granularity) {
178 (Some(_), Some(_)) => unreachable!(),
179 (Some(arc_granularity), None) => Granularity::Arcs(arc_granularity),
180 (None, Some(node_granularity)) => Granularity::Nodes(node_granularity),
181 (None, None) => Granularity::default(),
182 }
183 }
184}
185
186#[derive(Args, Debug)]
188pub struct MemoryUsageArg {
189 #[clap(short = 'm', long = "memory-usage", value_parser = memory_usage_parser, default_value = "50%")]
190 pub memory_usage: MemoryUsage,
195}
196
197#[derive(Debug, Clone, Copy, ValueEnum)]
198pub enum FloatVectorFormat {
200 Java,
202 Epserde,
204 Ascii,
206 Json,
208}
209
210impl FloatVectorFormat {
211 pub fn store<F>(
219 &self,
220 path: impl AsRef<Path>,
221 values: &[F],
222 precision: Option<usize>,
223 ) -> Result<()>
224 where
225 F: ToBytes + core::fmt::Display + epserde::ser::Serialize + Copy + zmij::Float,
226 for<'a> &'a [F]: epserde::ser::Serialize,
227 {
228 create_parent_dir(&path)?;
229 let path_display = path.as_ref().display();
230 let file = std::fs::File::create(&path)
231 .with_context(|| format!("Could not create vector at {}", path_display))?;
232 let mut file = BufWriter::new(file);
233
234 match self {
235 FloatVectorFormat::Epserde => {
236 log::info!("Storing in ε-serde format at {}", path_display);
237 unsafe {
238 values
239 .serialize(&mut file)
240 .with_context(|| format!("Could not write vector to {}", path_display))
241 }?;
242 }
243 FloatVectorFormat::Java => {
244 log::info!("Storing in Java format at {}", path_display);
245 for word in values.iter() {
246 file.write_all(word.to_be_bytes().as_ref())
247 .with_context(|| format!("Could not write vector to {}", path_display))?;
248 }
249 }
250 FloatVectorFormat::Ascii => {
251 log::info!("Storing in ASCII format at {}", path_display);
252 let mut buf = zmij::Buffer::new();
253 for word in values.iter() {
254 match precision {
255 None => writeln!(file, "{}", buf.format(*word)),
256 Some(precision) => writeln!(file, "{word:.precision$}"),
257 }
258 .with_context(|| format!("Could not write vector to {}", path_display))?;
259 }
260 }
261 FloatVectorFormat::Json => {
262 log::info!("Storing in JSON format at {}", path_display);
263 let mut buf = zmij::Buffer::new();
264 write!(file, "[")?;
265 for word in values.iter().take(values.len().saturating_sub(1)) {
266 match precision {
267 None => write!(file, "{}, ", buf.format(*word)),
268 Some(precision) => write!(file, "{word:.precision$}, "),
269 }
270 .with_context(|| format!("Could not write vector to {}", path_display))?;
271 }
272 if let Some(last) = values.last() {
273 match precision {
274 None => write!(file, "{}", buf.format(*last)),
275 Some(precision) => write!(file, "{last:.precision$}"),
276 }
277 .with_context(|| format!("Could not write vector to {}", path_display))?;
278 }
279 write!(file, "]")?;
280 }
281 }
282
283 Ok(())
284 }
285
286 pub fn load<F>(&self, path: impl AsRef<Path>) -> Result<Vec<F>>
289 where
290 F: FromBytes + std::str::FromStr + Copy,
291 <F as AsBytes>::Bytes: for<'a> TryFrom<&'a [u8]>,
292 <F as std::str::FromStr>::Err: std::error::Error + Send + Sync + 'static,
293 Vec<F>: epserde::deser::Deserialize,
294 {
295 let path = path.as_ref();
296 let path_display = path.display();
297
298 match self {
299 FloatVectorFormat::Epserde => {
300 log::info!("Loading ε-serde format from {}", path_display);
301 Ok(unsafe {
302 <Vec<F>>::load_full(path)
303 .with_context(|| format!("Could not load vector from {}", path_display))?
304 })
305 }
306 FloatVectorFormat::Java => {
307 log::info!("Loading Java format from {}", path_display);
308 let file = std::fs::File::open(path)
309 .with_context(|| format!("Could not open {}", path_display))?;
310 let file_len = file.metadata()?.len() as usize;
311 let byte_size = size_of::<F>();
312 ensure!(
313 file_len % byte_size == 0,
314 "File size ({}) is not a multiple of {} bytes",
315 file_len,
316 byte_size
317 );
318 let n = file_len / byte_size;
319 let mut reader = BufReader::new(file);
320 let mut result = Vec::with_capacity(n);
321 let mut buf = vec![0u8; byte_size];
322 for i in 0..n {
323 reader.read_exact(&mut buf).with_context(|| {
324 format!("Could not read value at index {i} from {}", path_display)
325 })?;
326 let bytes = buf.as_slice().try_into().map_err(|_| {
327 anyhow!("Could not convert bytes at index {i} in {}", path_display)
328 })?;
329 result.push(F::from_be_bytes(bytes));
330 }
331 Ok(result)
332 }
333 FloatVectorFormat::Ascii => {
334 log::info!("Loading ASCII format from {}", path_display);
335 let file = std::fs::File::open(path)
336 .with_context(|| format!("Could not open {}", path_display))?;
337 let reader = BufReader::new(file);
338 reader
339 .lines()
340 .enumerate()
341 .filter(|(_, line)| line.as_ref().map_or(true, |l| !l.trim().is_empty()))
342 .map(|(i, line)| {
343 let line = line.with_context(|| {
344 format!("Error reading line {} of {}", i + 1, path_display)
345 })?;
346 line.trim().parse::<F>().map_err(|e| {
347 anyhow!("Error parsing line {} of {}: {}", i + 1, path_display, e)
348 })
349 })
350 .collect()
351 }
352 FloatVectorFormat::Json => {
353 log::info!("Loading JSON format from {}", path_display);
354 let file = std::fs::File::open(path)
355 .with_context(|| format!("Could not open {}", path_display))?;
356 let mut reader = BufReader::new(file);
357 let mut result = Vec::new();
358 let mut byte = [0u8; 1];
359
360 loop {
362 reader
363 .read_exact(&mut byte)
364 .with_context(|| format!("Unexpected end of file in {}", path_display))?;
365 match byte[0] {
366 b'[' => break,
367 b if b.is_ascii_whitespace() => continue,
368 _ => bail!("Expected '[' at start of JSON array in {}", path_display),
369 }
370 }
371
372 let mut token = String::new();
374 let mut index = 0usize;
375 loop {
376 reader
377 .read_exact(&mut byte)
378 .with_context(|| format!("Unexpected end of file in {}", path_display))?;
379 match byte[0] {
380 b']' => {
381 let trimmed = token.trim();
382 if !trimmed.is_empty() {
383 result.push(trimmed.parse::<F>().map_err(|e| {
384 anyhow!(
385 "Error parsing element {} of {}: {}",
386 index + 1,
387 path_display,
388 e
389 )
390 })?);
391 }
392 break;
393 }
394 b',' => {
395 let trimmed = token.trim();
396 result.push(trimmed.parse::<F>().map_err(|e| {
397 anyhow!(
398 "Error parsing element {} of {}: {}",
399 index + 1,
400 path_display,
401 e
402 )
403 })?);
404 token.clear();
405 index += 1;
406 }
407 c => {
408 token.push(c as char);
409 }
410 }
411 }
412 Ok(result)
413 }
414 }
415 }
416}
417
418#[derive(Debug, Clone, Copy, ValueEnum)]
419pub enum IntVectorFormat {
421 Java,
423 Epserde,
425 BitFieldVec,
429 Ascii,
431 Json,
433}
434
435impl IntVectorFormat {
436 pub fn store(&self, path: impl AsRef<Path>, data: &[u64], max: Option<u64>) -> Result<()> {
441 create_parent_dir(&path)?;
443
444 let mut file = std::fs::File::create(&path)
445 .with_context(|| format!("Could not create vector at {}", path.as_ref().display()))?;
446 let mut buf = BufWriter::new(&mut file);
447
448 debug_assert_eq!(
449 max,
450 max.map(|_| { data.iter().copied().max().unwrap_or(0) }),
451 "The wrong maximum value was provided for the vector"
452 );
453
454 match self {
455 IntVectorFormat::Epserde => {
456 log::info!("Storing in epserde format at {}", path.as_ref().display());
457 unsafe {
458 data.serialize(&mut buf).with_context(|| {
459 format!("Could not write vector to {}", path.as_ref().display())
460 })
461 }?;
462 }
463 IntVectorFormat::BitFieldVec => {
464 log::info!(
465 "Storing in BitFieldVec format at {}",
466 path.as_ref().display()
467 );
468 let max = max.unwrap_or_else(|| {
469 data.iter()
470 .copied()
471 .max()
472 .unwrap_or_else(|| panic!("Empty vector"))
473 });
474 let bit_width = max.len() as usize;
475 log::info!("Using {} bits per element", bit_width);
476 let mut bit_field_vec = <BitFieldVec<u64, _>>::with_capacity(bit_width, data.len());
477 bit_field_vec.extend(data.iter().copied());
478 unsafe {
479 bit_field_vec.store(&path).with_context(|| {
480 format!("Could not write vector to {}", path.as_ref().display())
481 })
482 }?;
483 }
484 IntVectorFormat::Java => {
485 log::info!("Storing in Java format at {}", path.as_ref().display());
486 for word in data.iter() {
487 buf.write_all(&word.to_be_bytes()).with_context(|| {
488 format!("Could not write vector to {}", path.as_ref().display())
489 })?;
490 }
491 }
492 IntVectorFormat::Ascii => {
493 log::info!("Storing in ASCII format at {}", path.as_ref().display());
494 for word in data.iter() {
495 writeln!(buf, "{}", word).with_context(|| {
496 format!("Could not write vector to {}", path.as_ref().display())
497 })?;
498 }
499 }
500 IntVectorFormat::Json => {
501 log::info!("Storing in JSON format at {}", path.as_ref().display());
502 write!(buf, "[")?;
503 for word in data.iter().take(data.len().saturating_sub(1)) {
504 write!(buf, "{}, ", word).with_context(|| {
505 format!("Could not write vector to {}", path.as_ref().display())
506 })?;
507 }
508 if let Some(last) = data.last() {
509 write!(buf, "{}", last).with_context(|| {
510 format!("Could not write vector to {}", path.as_ref().display())
511 })?;
512 }
513 write!(buf, "]")?;
514 }
515 };
516
517 Ok(())
518 }
519
520 #[cfg(target_pointer_width = "64")]
521 pub fn store_usizes(
528 &self,
529 path: impl AsRef<Path>,
530 data: &[usize],
531 max: Option<usize>,
532 ) -> Result<()> {
533 self.store(
534 path,
535 unsafe { core::mem::transmute::<&[usize], &[u64]>(data) },
536 max.map(|x| x as u64),
537 )
538 }
539}
540
541pub fn memory_usage_parser(arg: &str) -> anyhow::Result<MemoryUsage> {
549 const PREF_SYMS: [(&str, u64); 10] = [
550 ("ki", 1 << 10),
551 ("mi", 1 << 20),
552 ("gi", 1 << 30),
553 ("ti", 1 << 40),
554 ("pi", 1 << 50),
555 ("k", 1E3 as u64),
556 ("m", 1E6 as u64),
557 ("g", 1E9 as u64),
558 ("t", 1E12 as u64),
559 ("p", 1E15 as u64),
560 ];
561 let arg = arg.trim().to_ascii_lowercase();
562 ensure!(!arg.is_empty(), "empty string");
563
564 if arg.ends_with('%') {
565 let perc = arg[..arg.len() - 1].parse::<f64>()?;
566 ensure!((0.0..=100.0).contains(&perc), "percentage out of range");
567 return Ok(MemoryUsage::from_perc(perc));
568 }
569
570 let num_digits = arg
571 .chars()
572 .take_while(|c| c.is_ascii_digit() || *c == '.')
573 .count();
574
575 let number = arg[..num_digits].parse::<f64>()?;
576 let suffix = &arg[num_digits..].trim();
577
578 let prefix = suffix.strip_suffix('b').unwrap_or(suffix);
579 let multiplier = PREF_SYMS
580 .iter()
581 .find(|(x, _)| *x == prefix)
582 .map(|(_, m)| m)
583 .ok_or(anyhow!("invalid prefix symbol {}", suffix))?;
584
585 let value = (number * (*multiplier as f64)) as usize;
586 ensure!(value > 0, "batch size must be greater than zero");
587
588 if suffix.ends_with('b') {
589 Ok(MemoryUsage::MemorySize(value))
590 } else {
591 Ok(MemoryUsage::BatchSize(value))
592 }
593}
594
595#[derive(Args, Debug, Clone)]
596pub struct CompressArgs {
598 #[clap(short = 'E', long)]
600 pub endianness: Option<String>,
601
602 #[clap(short = 'w', long, default_value_t = 7)]
604 pub compression_window: usize,
605 #[clap(short = 'i', long, default_value_t = 4)]
607 pub min_interval_length: usize,
608 #[clap(short = 'r', long, default_value_t = 3)]
610 pub max_ref_count: isize,
611
612 #[arg(value_enum)]
613 #[clap(long, default_value = "gamma")]
614 pub outdegrees: PrivCode,
616
617 #[arg(value_enum)]
618 #[clap(long, default_value = "unary")]
619 pub references: PrivCode,
621
622 #[arg(value_enum)]
623 #[clap(long, default_value = "gamma")]
624 pub blocks: PrivCode,
626
627 #[arg(value_enum)]
628 #[clap(long, default_value = "zeta3")]
629 pub residuals: PrivCode,
631
632 #[clap(long)]
635 pub bvgraphz: bool,
636
637 #[clap(long, default_value = "10000")]
640 pub chunk_size: usize,
641}
642
643impl From<CompressArgs> for CompFlags {
644 fn from(value: CompressArgs) -> Self {
645 CompFlags {
646 outdegrees: value.outdegrees.into(),
647 references: value.references.into(),
648 blocks: value.blocks.into(),
649 intervals: PrivCode::Gamma.into(),
650 residuals: value.residuals.into(),
651 min_interval_length: value.min_interval_length,
652 compression_window: value.compression_window,
653 max_ref_count: match value.max_ref_count {
654 -1 => usize::MAX,
655 max_ref_count => {
656 assert!(
657 max_ref_count >= 0,
658 "max_ref_count cannot be negative, except for -1, which means infinite recursion depth, but got {}",
659 max_ref_count
660 );
661 value.max_ref_count as usize
662 }
663 },
664 }
665 }
666}
667
668pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {
670 rayon::ThreadPoolBuilder::new()
671 .num_threads(num_threads)
672 .build()
673 .expect("Failed to create thread pool")
674}
675
676pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {
682 debug_assert!(path.as_ref().extension().is_none());
683 let mut path_buf = path.as_ref().to_owned();
684 let mut filename = path_buf.file_name().unwrap().to_owned();
685 filename.push(s.as_ref());
686 path_buf.set_file_name(filename);
687 path_buf
688}
689
690pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {
692 if let Some(parent_dir) = file_path.as_ref().parent() {
694 std::fs::create_dir_all(parent_dir).with_context(|| {
695 format!(
696 "Failed to create the directory {:?}",
697 parent_dir.to_string_lossy()
698 )
699 })?;
700 }
701 Ok(())
702}
703
704fn parse_duration(value: &str) -> Result<Duration> {
714 if value.is_empty() {
715 bail!("Empty duration string, if you want every 0 milliseconds use `0`.");
716 }
717 let mut duration = Duration::from_secs(0);
718 let mut acc = String::new();
719 for c in value.chars() {
720 if c.is_ascii_digit() {
721 acc.push(c);
722 } else if c.is_whitespace() {
723 continue;
724 } else {
725 let dur = acc.parse::<u64>()?;
726 match c {
727 's' => duration += Duration::from_secs(dur),
728 'm' => duration += Duration::from_secs(dur * 60),
729 'h' => duration += Duration::from_secs(dur * 60 * 60),
730 'd' => duration += Duration::from_secs(dur * 60 * 60 * 24),
731 _ => return Err(anyhow!("Invalid duration suffix: {}", c)),
732 }
733 acc.clear();
734 }
735 }
736 if !acc.is_empty() {
737 let dur = acc.parse::<u64>()?;
738 duration += Duration::from_millis(dur);
739 }
740 Ok(duration)
741}
742
743pub fn init_env_logger() -> Result<()> {
746 use jiff::SpanRound;
747 use jiff::fmt::friendly::{Designator, Spacing, SpanPrinter};
748
749 let mut builder =
750 env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"));
751
752 let start = std::time::Instant::now();
753 let printer = SpanPrinter::new()
754 .spacing(Spacing::None)
755 .designator(Designator::Compact);
756 let span_round = SpanRound::new()
757 .largest(jiff::Unit::Day)
758 .smallest(jiff::Unit::Millisecond)
759 .days_are_24_hours();
760
761 builder.format(move |buf, record| {
762 let Ok(ts) = jiff::Timestamp::try_from(SystemTime::now()) else {
763 return Err(std::io::Error::other("Failed to get timestamp"));
764 };
765 let style = buf.default_level_style(record.level());
766 let elapsed = start.elapsed();
767 let span = jiff::Span::new()
768 .seconds(elapsed.as_secs() as i64)
769 .milliseconds(elapsed.subsec_millis() as i64);
770 let span = span.round(span_round).expect("Failed to round span");
771 writeln!(
772 buf,
773 "{} {} {style}{}{style:#} [{:?}] {} - {}",
774 ts.strftime("%F %T%.3f"),
775 printer.span_to_string(&span),
776 record.level(),
777 std::thread::current().id(),
778 record.target(),
779 record.args()
780 )
781 });
782 builder.init();
783 Ok(())
784}
785
786#[derive(Args, Debug)]
787pub struct GlobalArgs {
788 #[arg(long, value_parser = parse_duration, global=true, display_order = 1000)]
789 pub log_interval: Option<Duration>,
795}
796
797#[derive(Subcommand, Debug)]
798pub enum SubCommands {
799 #[command(subcommand)]
800 Analyze(analyze::SubCommands),
801 #[command(subcommand)]
802 Bench(bench::SubCommands),
803 #[command(subcommand)]
804 Build(build::SubCommands),
805 #[command(subcommand)]
806 Check(check::SubCommands),
807 #[command(subcommand)]
808 From(from::SubCommands),
809 #[command(subcommand)]
810 Perm(perm::SubCommands),
811 #[command(subcommand)]
812 Run(run::SubCommands),
813 #[command(subcommand)]
814 To(to::SubCommands),
815 #[command(subcommand)]
816 Transform(transform::SubCommands),
817}
818
819#[derive(Parser, Debug)]
820#[command(name = "webgraph", version=build_info::version_string())]
821#[doc = include_str!("common_env.txt")]
823pub struct Cli {
824 #[command(subcommand)]
825 pub command: SubCommands,
826 #[clap(flatten)]
827 pub args: GlobalArgs,
828}
829
830pub mod dist;
831pub mod rank;
832pub mod sccs;
833
834pub mod analyze;
835pub mod bench;
836pub mod build;
837pub mod check;
838pub mod from;
839pub mod perm;
840pub mod run;
841pub mod to;
842pub mod transform;
843
844pub fn cli_main<I, T>(args: I) -> Result<()>
846where
847 I: IntoIterator<Item = T>,
848 T: Into<std::ffi::OsString> + Clone,
849{
850 let start = std::time::Instant::now();
851 let cli = Cli::parse_from(args);
852 match cli.command {
853 SubCommands::Analyze(args) => {
854 analyze::main(cli.args, args)?;
855 }
856 SubCommands::Bench(args) => {
857 bench::main(cli.args, args)?;
858 }
859 SubCommands::Build(args) => {
860 build::main(cli.args, args, Cli::command())?;
861 }
862 SubCommands::Check(args) => {
863 check::main(cli.args, args)?;
864 }
865 SubCommands::From(args) => {
866 from::main(cli.args, args)?;
867 }
868 SubCommands::Perm(args) => {
869 perm::main(cli.args, args)?;
870 }
871 SubCommands::Run(args) => {
872 run::main(cli.args, args)?;
873 }
874 SubCommands::To(args) => {
875 to::main(cli.args, args)?;
876 }
877 SubCommands::Transform(args) => {
878 transform::main(cli.args, args)?;
879 }
880 }
881
882 log::info!(
883 "The command took {}",
884 pretty_print_elapsed(start.elapsed().as_secs_f64())
885 );
886
887 Ok(())
888}
889
890fn pretty_print_elapsed(elapsed: f64) -> String {
892 let mut result = String::new();
893 let mut elapsed_seconds = elapsed as u64;
894 let weeks = elapsed_seconds / (60 * 60 * 24 * 7);
895 elapsed_seconds %= 60 * 60 * 24 * 7;
896 let days = elapsed_seconds / (60 * 60 * 24);
897 elapsed_seconds %= 60 * 60 * 24;
898 let hours = elapsed_seconds / (60 * 60);
899 elapsed_seconds %= 60 * 60;
900 let minutes = elapsed_seconds / 60;
901 match weeks {
904 0 => {}
905 1 => result.push_str("1 week "),
906 _ => result.push_str(&format!("{} weeks ", weeks)),
907 }
908 match days {
909 0 => {}
910 1 => result.push_str("1 day "),
911 _ => result.push_str(&format!("{} days ", days)),
912 }
913 match hours {
914 0 => {}
915 1 => result.push_str("1 hour "),
916 _ => result.push_str(&format!("{} hours ", hours)),
917 }
918 match minutes {
919 0 => {}
920 1 => result.push_str("1 minute "),
921 _ => result.push_str(&format!("{} minutes ", minutes)),
922 }
923
924 result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));
925 result
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931
932 mod float_vector_format {
933 use super::*;
934
935 #[test]
936 fn test_ascii_f64() {
937 let dir = tempfile::tempdir().unwrap();
938 let path = dir.path().join("test.txt");
939 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
940 FloatVectorFormat::Ascii
941 .store(&path, &values, None)
942 .unwrap();
943 let content = std::fs::read_to_string(&path).unwrap();
944 for (line, expected) in content.lines().zip(&values) {
946 let parsed: f64 = line.trim().parse().unwrap();
947 assert!((parsed - expected).abs() < 1e-10);
948 }
949 assert_eq!(content.lines().count(), 3);
950 }
951
952 #[test]
953 fn test_ascii_f32() {
954 let dir = tempfile::tempdir().unwrap();
955 let path = dir.path().join("test.txt");
956 let values: Vec<f32> = vec![1.5, 2.75, 3.0];
957 FloatVectorFormat::Ascii
958 .store(&path, &values, None)
959 .unwrap();
960 let content = std::fs::read_to_string(&path).unwrap();
961 for (line, expected) in content.lines().zip(&values) {
962 let parsed: f32 = line.trim().parse().unwrap();
963 assert!((parsed - expected).abs() < 1e-6);
964 }
965 }
966
967 #[test]
968 fn test_ascii_with_precision() {
969 let dir = tempfile::tempdir().unwrap();
970 let path = dir.path().join("test.txt");
971 let values: Vec<f64> = vec![1.123456789, 2.987654321];
972 FloatVectorFormat::Ascii
973 .store(&path, &values, Some(3))
974 .unwrap();
975 let content = std::fs::read_to_string(&path).unwrap();
976 let lines: Vec<&str> = content.lines().collect();
977 assert_eq!(lines[0], "1.123");
978 assert_eq!(lines[1], "2.988");
979 }
980
981 #[test]
982 fn test_json_f64() {
983 let dir = tempfile::tempdir().unwrap();
984 let path = dir.path().join("test.json");
985 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
986 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
987 let content = std::fs::read_to_string(&path).unwrap();
988 let parsed: Vec<f64> = serde_json::from_str(&content).unwrap();
989 assert_eq!(parsed, values);
990 }
991
992 #[test]
993 fn test_json_with_precision() {
994 let dir = tempfile::tempdir().unwrap();
995 let path = dir.path().join("test.json");
996 let values: Vec<f64> = vec![1.123456789, 2.987654321];
997 FloatVectorFormat::Json
998 .store(&path, &values, Some(2))
999 .unwrap();
1000 let content = std::fs::read_to_string(&path).unwrap();
1001 assert_eq!(content, "[1.12, 2.99]");
1002 }
1003
1004 #[test]
1005 fn test_json_empty() {
1006 let dir = tempfile::tempdir().unwrap();
1007 let path = dir.path().join("test.json");
1008 let values: Vec<f64> = vec![];
1009 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
1010 let content = std::fs::read_to_string(&path).unwrap();
1011 assert_eq!(content, "[]");
1012 }
1013
1014 #[test]
1015 fn test_json_single_element() {
1016 let dir = tempfile::tempdir().unwrap();
1017 let path = dir.path().join("test.json");
1018 let values: Vec<f64> = vec![42.0];
1019 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
1020 let content = std::fs::read_to_string(&path).unwrap();
1021 let parsed: Vec<f64> = serde_json::from_str(&content).unwrap();
1022 assert_eq!(parsed, values);
1023 }
1024
1025 #[test]
1026 fn test_java_f64() {
1027 let dir = tempfile::tempdir().unwrap();
1028 let path = dir.path().join("test.bin");
1029 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
1030 FloatVectorFormat::Java.store(&path, &values, None).unwrap();
1031 let bytes = std::fs::read(&path).unwrap();
1032 assert_eq!(bytes.len(), 3 * 8);
1033 for (i, expected) in values.iter().enumerate() {
1034 let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1035 let val = f64::from_be_bytes(chunk);
1036 assert_eq!(val, *expected);
1037 }
1038 }
1039
1040 #[test]
1041 fn test_java_f32() {
1042 let dir = tempfile::tempdir().unwrap();
1043 let path = dir.path().join("test.bin");
1044 let values: Vec<f32> = vec![1.5, 2.75, 3.0];
1045 FloatVectorFormat::Java.store(&path, &values, None).unwrap();
1046 let bytes = std::fs::read(&path).unwrap();
1047 assert_eq!(bytes.len(), 3 * 4);
1048 for (i, expected) in values.iter().enumerate() {
1049 let chunk: [u8; 4] = bytes[i * 4..(i + 1) * 4].try_into().unwrap();
1050 let val = f32::from_be_bytes(chunk);
1051 assert_eq!(val, *expected);
1052 }
1053 }
1054
1055 #[test]
1056 fn test_epserde_f64() {
1057 let dir = tempfile::tempdir().unwrap();
1058 let path = dir.path().join("test.bin");
1059 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
1060 FloatVectorFormat::Epserde
1061 .store(&path, &values, None)
1062 .unwrap();
1063 let metadata = std::fs::metadata(&path).unwrap();
1065 assert!(metadata.len() > 0);
1066 }
1067
1068 #[test]
1069 fn test_ascii_empty() {
1070 let dir = tempfile::tempdir().unwrap();
1071 let path = dir.path().join("test.txt");
1072 let values: Vec<f64> = vec![];
1073 FloatVectorFormat::Ascii
1074 .store(&path, &values, None)
1075 .unwrap();
1076 let content = std::fs::read_to_string(&path).unwrap();
1077 assert!(content.is_empty());
1078 }
1079
1080 #[test]
1081 fn test_creates_parent_dirs() {
1082 let dir = tempfile::tempdir().unwrap();
1083 let path = dir.path().join("a").join("b").join("test.txt");
1084 let values: Vec<f64> = vec![1.0];
1085 FloatVectorFormat::Ascii
1086 .store(&path, &values, None)
1087 .unwrap();
1088 assert!(path.exists());
1089 }
1090
1091 #[test]
1092 fn test_roundtrip_ascii_f64() {
1093 let dir = tempfile::tempdir().unwrap();
1094 let path = dir.path().join("test.txt");
1095 let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1096 FloatVectorFormat::Ascii
1097 .store(&path, &values, None)
1098 .unwrap();
1099 let loaded: Vec<f64> = FloatVectorFormat::Ascii.load(&path).unwrap();
1100 assert_eq!(loaded, values);
1101 }
1102
1103 #[test]
1104 fn test_roundtrip_json_f64() {
1105 let dir = tempfile::tempdir().unwrap();
1106 let path = dir.path().join("test.json");
1107 let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1108 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
1109 let loaded: Vec<f64> = FloatVectorFormat::Json.load(&path).unwrap();
1110 assert_eq!(loaded, values);
1111 }
1112
1113 #[test]
1114 fn test_roundtrip_java_f64() {
1115 let dir = tempfile::tempdir().unwrap();
1116 let path = dir.path().join("test.bin");
1117 let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1118 FloatVectorFormat::Java.store(&path, &values, None).unwrap();
1119 let loaded: Vec<f64> = FloatVectorFormat::Java.load(&path).unwrap();
1120 assert_eq!(loaded, values);
1121 }
1122
1123 #[test]
1124 fn test_roundtrip_epserde_f64() {
1125 let dir = tempfile::tempdir().unwrap();
1126 let path = dir.path().join("test.bin");
1127 let values: Vec<f64> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1128 FloatVectorFormat::Epserde
1129 .store(&path, &values, None)
1130 .unwrap();
1131 let loaded: Vec<f64> = FloatVectorFormat::Epserde.load(&path).unwrap();
1132 assert_eq!(loaded, values);
1133 }
1134
1135 #[test]
1136 fn test_roundtrip_empty() {
1137 let dir = tempfile::tempdir().unwrap();
1138 for (fmt, ext) in [
1139 (FloatVectorFormat::Ascii, "txt"),
1140 (FloatVectorFormat::Json, "json"),
1141 (FloatVectorFormat::Java, "bin"),
1142 (FloatVectorFormat::Epserde, "eps"),
1143 ] {
1144 let path = dir.path().join(format!("empty.{ext}"));
1145 let values: Vec<f64> = vec![];
1146 fmt.store(&path, &values, None).unwrap();
1147 let loaded: Vec<f64> = fmt.load(&path).unwrap();
1148 assert_eq!(loaded, values, "roundtrip failed for {ext}");
1149 }
1150 }
1151
1152 #[test]
1153 fn test_roundtrip_f32() {
1154 let dir = tempfile::tempdir().unwrap();
1155 let values: Vec<f32> = vec![1.5, 2.75, 3.0, 0.0, -1.25];
1156 for (fmt, ext) in [
1157 (FloatVectorFormat::Ascii, "txt"),
1158 (FloatVectorFormat::Json, "json"),
1159 (FloatVectorFormat::Java, "bin"),
1160 (FloatVectorFormat::Epserde, "eps"),
1161 ] {
1162 let path = dir.path().join(format!("f32.{ext}"));
1163 fmt.store(&path, &values, None).unwrap();
1164 let loaded: Vec<f32> = fmt.load(&path).unwrap();
1165 assert_eq!(loaded, values, "f32 roundtrip failed for {ext}");
1166 }
1167 }
1168 }
1169
1170 mod int_vector_format {
1171 use super::*;
1172
1173 #[test]
1174 fn test_ascii() {
1175 let dir = tempfile::tempdir().unwrap();
1176 let path = dir.path().join("test.txt");
1177 let data: Vec<u64> = vec![10, 20, 30];
1178 IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1179 let content = std::fs::read_to_string(&path).unwrap();
1180 let lines: Vec<u64> = content.lines().map(|l| l.trim().parse().unwrap()).collect();
1181 assert_eq!(lines, data);
1182 }
1183
1184 #[test]
1185 fn test_ascii_empty() {
1186 let dir = tempfile::tempdir().unwrap();
1187 let path = dir.path().join("test.txt");
1188 let data: Vec<u64> = vec![];
1189 IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1190 let content = std::fs::read_to_string(&path).unwrap();
1191 assert!(content.is_empty());
1192 }
1193
1194 #[test]
1195 fn test_json() {
1196 let dir = tempfile::tempdir().unwrap();
1197 let path = dir.path().join("test.json");
1198 let data: Vec<u64> = vec![10, 20, 30];
1199 IntVectorFormat::Json.store(&path, &data, None).unwrap();
1200 let content = std::fs::read_to_string(&path).unwrap();
1201 let parsed: Vec<u64> = serde_json::from_str(&content).unwrap();
1202 assert_eq!(parsed, data);
1203 }
1204
1205 #[test]
1206 fn test_json_empty() {
1207 let dir = tempfile::tempdir().unwrap();
1208 let path = dir.path().join("test.json");
1209 let data: Vec<u64> = vec![];
1210 IntVectorFormat::Json.store(&path, &data, None).unwrap();
1211 let content = std::fs::read_to_string(&path).unwrap();
1212 assert_eq!(content, "[]");
1213 }
1214
1215 #[test]
1216 fn test_json_single_element() {
1217 let dir = tempfile::tempdir().unwrap();
1218 let path = dir.path().join("test.json");
1219 let data: Vec<u64> = vec![42];
1220 IntVectorFormat::Json.store(&path, &data, None).unwrap();
1221 let content = std::fs::read_to_string(&path).unwrap();
1222 let parsed: Vec<u64> = serde_json::from_str(&content).unwrap();
1223 assert_eq!(parsed, data);
1224 }
1225
1226 #[test]
1227 fn test_java() {
1228 let dir = tempfile::tempdir().unwrap();
1229 let path = dir.path().join("test.bin");
1230 let data: Vec<u64> = vec![1, 256, 65535];
1231 IntVectorFormat::Java.store(&path, &data, None).unwrap();
1232 let bytes = std::fs::read(&path).unwrap();
1233 assert_eq!(bytes.len(), 3 * 8);
1234 for (i, expected) in data.iter().enumerate() {
1235 let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1236 let val = u64::from_be_bytes(chunk);
1237 assert_eq!(val, *expected);
1238 }
1239 }
1240
1241 #[test]
1242 fn test_java_empty() {
1243 let dir = tempfile::tempdir().unwrap();
1244 let path = dir.path().join("test.bin");
1245 let data: Vec<u64> = vec![];
1246 IntVectorFormat::Java.store(&path, &data, None).unwrap();
1247 let bytes = std::fs::read(&path).unwrap();
1248 assert!(bytes.is_empty());
1249 }
1250
1251 #[test]
1252 fn test_epserde() {
1253 let dir = tempfile::tempdir().unwrap();
1254 let path = dir.path().join("test.bin");
1255 let data: Vec<u64> = vec![10, 20, 30];
1256 IntVectorFormat::Epserde.store(&path, &data, None).unwrap();
1257 let metadata = std::fs::metadata(&path).unwrap();
1258 assert!(metadata.len() > 0);
1259 }
1260
1261 #[test]
1262 fn test_bitfieldvec() {
1263 let dir = tempfile::tempdir().unwrap();
1264 let path = dir.path().join("test.bin");
1265 let data: Vec<u64> = vec![1, 3, 7, 15];
1266 IntVectorFormat::BitFieldVec
1267 .store(&path, &data, Some(15))
1268 .unwrap();
1269 let metadata = std::fs::metadata(&path).unwrap();
1270 assert!(metadata.len() > 0);
1271 }
1272
1273 #[test]
1274 fn test_bitfieldvec_max_computed() {
1275 let dir = tempfile::tempdir().unwrap();
1276 let path = dir.path().join("test.bin");
1277 let data: Vec<u64> = vec![1, 3, 7, 15];
1278 IntVectorFormat::BitFieldVec
1280 .store(&path, &data, None)
1281 .unwrap();
1282 assert!(path.exists());
1283 }
1284
1285 #[test]
1286 fn test_creates_parent_dirs() {
1287 let dir = tempfile::tempdir().unwrap();
1288 let path = dir.path().join("a").join("b").join("test.txt");
1289 let data: Vec<u64> = vec![1];
1290 IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1291 assert!(path.exists());
1292 }
1293
1294 #[cfg(target_pointer_width = "64")]
1295 #[test]
1296 fn test_store_usizes() {
1297 let dir = tempfile::tempdir().unwrap();
1298 let path = dir.path().join("test.txt");
1299 let data: Vec<usize> = vec![10, 20, 30];
1300 IntVectorFormat::Ascii
1301 .store_usizes(&path, &data, None)
1302 .unwrap();
1303 let content = std::fs::read_to_string(&path).unwrap();
1304 let lines: Vec<usize> = content.lines().map(|l| l.trim().parse().unwrap()).collect();
1305 assert_eq!(lines, data);
1306 }
1307
1308 #[cfg(target_pointer_width = "64")]
1309 #[test]
1310 fn test_store_usizes_java() {
1311 let dir = tempfile::tempdir().unwrap();
1312 let path = dir.path().join("test.bin");
1313 let data: Vec<usize> = vec![1, 256, 65535];
1314 IntVectorFormat::Java
1315 .store_usizes(&path, &data, None)
1316 .unwrap();
1317 let bytes = std::fs::read(&path).unwrap();
1318 assert_eq!(bytes.len(), 3 * 8);
1319 for (i, expected) in data.iter().enumerate() {
1320 let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1321 let val = u64::from_be_bytes(chunk) as usize;
1322 assert_eq!(val, *expected);
1323 }
1324 }
1325 }
1326}