1#![doc = include_str!("../README.md")]
10#![deny(unstable_features)]
11#![deny(trivial_casts)]
12#![deny(unconditional_recursion)]
13#![deny(clippy::empty_loop)]
14#![deny(unreachable_code)]
15#![deny(unreachable_pub)]
16#![deny(unreachable_patterns)]
17#![deny(unused_macro_rules)]
18#![deny(unused_doc_comments)]
19#![allow(clippy::type_complexity)]
20
21use anyhow::{Context, Result, anyhow, bail, ensure};
22use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum};
23use common_traits::{ToBytes, UnsignedInt};
24use dsi_bitstream::dispatch::Codes;
25use epserde::ser::Serialize;
26use std::io::{BufWriter, Write};
27use std::path::{Path, PathBuf};
28use std::time::Duration;
29use std::time::SystemTime;
30use sux::bits::BitFieldVec;
31use webgraph::prelude::CompFlags;
32use webgraph::utils::{Granularity, MemoryUsage};
33
34macro_rules! SEQ_PROC_WARN {
35 () => {"Processing the graph sequentially: for parallel processing please build the Elias-Fano offsets list using 'webgraph build ef {}'"}
36}
37
38#[cfg(not(any(feature = "le_bins", feature = "be_bins")))]
39compile_error!("At least one of the features `le_bins` or `be_bins` must be enabled.");
40
41pub mod build_info {
42 include!(concat!(env!("OUT_DIR"), "/built.rs"));
43
44 pub fn version_string() -> String {
45 format!(
46 "{}
47git info: {} {} {}
48build info: built on {} for {} with {}",
49 PKG_VERSION,
50 GIT_VERSION.unwrap_or(""),
51 GIT_COMMIT_HASH.unwrap_or(""),
52 match GIT_DIRTY {
53 None => "",
54 Some(true) => "(dirty)",
55 Some(false) => "(clean)",
56 },
57 BUILD_DATE,
58 TARGET,
59 RUSTC_VERSION
60 )
61 }
62}
63
64#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
65pub enum PrivCode {
72 Unary,
73 Gamma,
74 Delta,
75 Zeta1,
76 Zeta2,
77 Zeta3,
78 Zeta4,
79 Zeta5,
80 Zeta6,
81 Zeta7,
82 Pi1,
83 Pi2,
84 Pi3,
85 Pi4,
86}
87
88impl From<PrivCode> for Codes {
89 fn from(value: PrivCode) -> Self {
90 match value {
91 PrivCode::Unary => Codes::Unary,
92 PrivCode::Gamma => Codes::Gamma,
93 PrivCode::Delta => Codes::Delta,
94 PrivCode::Zeta1 => Codes::Zeta(1),
95 PrivCode::Zeta2 => Codes::Zeta(2),
96 PrivCode::Zeta3 => Codes::Zeta(3),
97 PrivCode::Zeta4 => Codes::Zeta(4),
98 PrivCode::Zeta5 => Codes::Zeta(5),
99 PrivCode::Zeta6 => Codes::Zeta(6),
100 PrivCode::Zeta7 => Codes::Zeta(7),
101 PrivCode::Pi1 => Codes::Pi(1),
102 PrivCode::Pi2 => Codes::Pi(2),
103 PrivCode::Pi3 => Codes::Pi(3),
104 PrivCode::Pi4 => Codes::Pi(4),
105 }
106 }
107}
108
109#[derive(Args, Debug)]
110pub struct ArcsArgs {
112 #[arg(long, default_value_t = '#')]
113 pub line_comment_symbol: char,
115
116 #[arg(long, default_value_t = 0)]
117 pub lines_to_skip: usize,
119
120 #[arg(long)]
121 pub max_arcs: Option<usize>,
124
125 #[arg(long, default_value_t = '\t')]
126 pub separator: char,
128
129 #[arg(long, default_value_t = 0)]
130 pub source_column: usize,
132
133 #[arg(long, default_value_t = 1)]
134 pub target_column: usize,
136
137 #[arg(long, default_value_t = false)]
138 pub labels: bool,
140}
141
142pub fn num_threads_parser(arg: &str) -> Result<usize> {
147 let num_threads = arg.parse::<usize>()?;
148 ensure!(num_threads > 0, "Number of threads must be greater than 0");
149 Ok(num_threads)
150}
151
152#[derive(Args, Debug)]
154pub struct NumThreadsArg {
155 #[arg(short = 'j', long, default_value_t = rayon::current_num_threads().max(1), value_parser = num_threads_parser)]
156 pub num_threads: usize,
158}
159
160#[derive(Args, Debug)]
162pub struct GranularityArgs {
163 #[arg(long, conflicts_with("node_granularity"))]
164 pub arc_granularity: Option<u64>,
167
168 #[arg(long, conflicts_with("arc_granularity"))]
169 pub node_granularity: Option<usize>,
172}
173
174impl GranularityArgs {
175 pub fn into_granularity(&self) -> Granularity {
176 match (self.arc_granularity, self.node_granularity) {
177 (Some(_), Some(_)) => unreachable!(),
178 (Some(arc_granularity), None) => Granularity::Arcs(arc_granularity),
179 (None, Some(node_granularity)) => Granularity::Nodes(node_granularity),
180 (None, None) => Granularity::default(),
181 }
182 }
183}
184
185#[derive(Args, Debug)]
187pub struct MemoryUsageArg {
188 #[clap(short = 'm', long = "memory-usage", value_parser = memory_usage_parser, default_value = "50%")]
189 pub memory_usage: MemoryUsage,
194}
195
196#[derive(Debug, Clone, Copy, ValueEnum)]
197pub enum FloatVectorFormat {
199 Java,
201 Epserde,
203 Ascii,
205 Json,
207}
208
209impl FloatVectorFormat {
210 pub fn store<F>(
217 &self,
218 path: impl AsRef<Path>,
219 values: &[F],
220 precision: Option<usize>,
221 ) -> Result<()>
222 where
223 F: ToBytes + core::fmt::Display + epserde::ser::Serialize + Copy,
224 for<'a> &'a [F]: epserde::ser::Serialize,
225 {
226 let precision = precision.unwrap_or(f64::DIGITS as usize);
227 create_parent_dir(&path)?;
228 let path_display = path.as_ref().display();
229 let mut file = std::fs::File::create(&path)
230 .with_context(|| format!("Could not create vector at {}", path_display))?;
231
232 match self {
233 FloatVectorFormat::Epserde => {
234 log::info!("Storing in ε-serde format at {}", path_display);
235 unsafe {
236 values
237 .serialize(&mut file)
238 .with_context(|| format!("Could not write vector to {}", path_display))
239 }?;
240 }
241 FloatVectorFormat::Java => {
242 log::info!("Storing in Java format at {}", path_display);
243 for word in values.iter() {
244 file.write_all(word.to_be_bytes().as_ref())
245 .with_context(|| format!("Could not write vector to {}", path_display))?;
246 }
247 }
248 FloatVectorFormat::Ascii => {
249 log::info!("Storing in ASCII format at {}", path_display);
250 for word in values.iter() {
251 writeln!(file, "{word:.precision$}")
252 .with_context(|| format!("Could not write vector to {}", path_display))?;
253 }
254 }
255 FloatVectorFormat::Json => {
256 log::info!("Storing in JSON format at {}", path_display);
257 write!(file, "[")?;
258 for word in values.iter().take(values.len().saturating_sub(1)) {
259 write!(file, "{word:.precision$}, ")
260 .with_context(|| format!("Could not write vector to {}", path_display))?;
261 }
262 if let Some(last) = values.last() {
263 write!(file, "{last:.precision$}")
264 .with_context(|| format!("Could not write vector to {}", path_display))?;
265 }
266 write!(file, "]")?;
267 }
268 }
269
270 Ok(())
271 }
272}
273
274#[derive(Debug, Clone, Copy, ValueEnum)]
275pub enum IntVectorFormat {
277 Java,
279 Epserde,
281 BitFieldVec,
285 Ascii,
287 Json,
289}
290
291impl IntVectorFormat {
292 pub fn store(&self, path: impl AsRef<Path>, data: &[u64], max: Option<u64>) -> Result<()> {
297 create_parent_dir(&path)?;
299
300 let mut file = std::fs::File::create(&path)
301 .with_context(|| format!("Could not create vector at {}", path.as_ref().display()))?;
302 let mut buf = BufWriter::new(&mut file);
303
304 debug_assert_eq!(
305 max,
306 max.map(|_| { data.iter().copied().max().unwrap_or(0) }),
307 "The wrong maximum value was provided for the vector"
308 );
309
310 match self {
311 IntVectorFormat::Epserde => {
312 log::info!("Storing in epserde format at {}", path.as_ref().display());
313 unsafe {
314 data.serialize(&mut buf).with_context(|| {
315 format!("Could not write vector to {}", path.as_ref().display())
316 })
317 }?;
318 }
319 IntVectorFormat::BitFieldVec => {
320 log::info!(
321 "Storing in BitFieldVec format at {}",
322 path.as_ref().display()
323 );
324 let max = max.unwrap_or_else(|| {
325 data.iter()
326 .copied()
327 .max()
328 .unwrap_or_else(|| panic!("Empty vector"))
329 });
330 let bit_width = max.len() as usize;
331 log::info!("Using {} bits per element", bit_width);
332 let mut bit_field_vec = <BitFieldVec<u64, _>>::with_capacity(bit_width, data.len());
333 bit_field_vec.extend(data.iter().copied());
334 unsafe {
335 bit_field_vec.store(&path).with_context(|| {
336 format!("Could not write vector to {}", path.as_ref().display())
337 })
338 }?;
339 }
340 IntVectorFormat::Java => {
341 log::info!("Storing in Java format at {}", path.as_ref().display());
342 for word in data.iter() {
343 buf.write_all(&word.to_be_bytes()).with_context(|| {
344 format!("Could not write vector to {}", path.as_ref().display())
345 })?;
346 }
347 }
348 IntVectorFormat::Ascii => {
349 log::info!("Storing in ASCII format at {}", path.as_ref().display());
350 for word in data.iter() {
351 writeln!(buf, "{}", word).with_context(|| {
352 format!("Could not write vector to {}", path.as_ref().display())
353 })?;
354 }
355 }
356 IntVectorFormat::Json => {
357 log::info!("Storing in JSON format at {}", path.as_ref().display());
358 write!(buf, "[")?;
359 for word in data.iter().take(data.len().saturating_sub(1)) {
360 write!(buf, "{}, ", word).with_context(|| {
361 format!("Could not write vector to {}", path.as_ref().display())
362 })?;
363 }
364 if let Some(last) = data.last() {
365 write!(buf, "{}", last).with_context(|| {
366 format!("Could not write vector to {}", path.as_ref().display())
367 })?;
368 }
369 write!(buf, "]")?;
370 }
371 };
372
373 Ok(())
374 }
375
376 #[cfg(target_pointer_width = "64")]
377 pub fn store_usizes(
384 &self,
385 path: impl AsRef<Path>,
386 data: &[usize],
387 max: Option<usize>,
388 ) -> Result<()> {
389 self.store(
390 path,
391 unsafe { core::mem::transmute::<&[usize], &[u64]>(data) },
392 max.map(|x| x as u64),
393 )
394 }
395}
396
397pub fn memory_usage_parser(arg: &str) -> anyhow::Result<MemoryUsage> {
405 const PREF_SYMS: [(&str, u64); 10] = [
406 ("ki", 1 << 10),
407 ("mi", 1 << 20),
408 ("gi", 1 << 30),
409 ("ti", 1 << 40),
410 ("pi", 1 << 50),
411 ("k", 1E3 as u64),
412 ("m", 1E6 as u64),
413 ("g", 1E9 as u64),
414 ("t", 1E12 as u64),
415 ("p", 1E15 as u64),
416 ];
417 let arg = arg.trim().to_ascii_lowercase();
418 ensure!(!arg.is_empty(), "empty string");
419
420 if arg.ends_with('%') {
421 let perc = arg[..arg.len() - 1].parse::<f64>()?;
422 ensure!((0.0..=100.0).contains(&perc), "percentage out of range");
423 return Ok(MemoryUsage::from_perc(perc));
424 }
425
426 let num_digits = arg
427 .chars()
428 .take_while(|c| c.is_ascii_digit() || *c == '.')
429 .count();
430
431 let number = arg[..num_digits].parse::<f64>()?;
432 let suffix = &arg[num_digits..].trim();
433
434 let prefix = suffix.strip_suffix('b').unwrap_or(suffix);
435 let multiplier = PREF_SYMS
436 .iter()
437 .find(|(x, _)| *x == prefix)
438 .map(|(_, m)| m)
439 .ok_or(anyhow!("invalid prefix symbol {}", suffix))?;
440
441 let value = (number * (*multiplier as f64)) as usize;
442 ensure!(value > 0, "batch size must be greater than zero");
443
444 if suffix.ends_with('b') {
445 Ok(MemoryUsage::MemorySize(value))
446 } else {
447 Ok(MemoryUsage::BatchSize(value))
448 }
449}
450
451#[derive(Args, Debug, Clone)]
452pub struct CompressArgs {
454 #[clap(short = 'E', long)]
456 pub endianness: Option<String>,
457
458 #[clap(short = 'w', long, default_value_t = 7)]
460 pub compression_window: usize,
461 #[clap(short = 'i', long, default_value_t = 4)]
463 pub min_interval_length: usize,
464 #[clap(short = 'r', long, default_value_t = 3)]
466 pub max_ref_count: isize,
467
468 #[arg(value_enum)]
469 #[clap(long, default_value = "gamma")]
470 pub outdegrees: PrivCode,
472
473 #[arg(value_enum)]
474 #[clap(long, default_value = "unary")]
475 pub references: PrivCode,
477
478 #[arg(value_enum)]
479 #[clap(long, default_value = "gamma")]
480 pub blocks: PrivCode,
482
483 #[arg(value_enum)]
484 #[clap(long, default_value = "zeta3")]
485 pub residuals: PrivCode,
487
488 #[clap(long)]
491 pub bvgraphz: bool,
492
493 #[clap(long, default_value = "10000")]
496 pub chunk_size: usize,
497}
498
499impl From<CompressArgs> for CompFlags {
500 fn from(value: CompressArgs) -> Self {
501 CompFlags {
502 outdegrees: value.outdegrees.into(),
503 references: value.references.into(),
504 blocks: value.blocks.into(),
505 intervals: PrivCode::Gamma.into(),
506 residuals: value.residuals.into(),
507 min_interval_length: value.min_interval_length,
508 compression_window: value.compression_window,
509 max_ref_count: match value.max_ref_count {
510 -1 => usize::MAX,
511 max_ref_count => {
512 assert!(
513 max_ref_count >= 0,
514 "max_ref_count cannot be negative, except for -1, which means infinite recursion depth, but got {}",
515 max_ref_count
516 );
517 value.max_ref_count as usize
518 }
519 },
520 }
521 }
522}
523
524pub fn get_thread_pool(num_threads: usize) -> rayon::ThreadPool {
526 rayon::ThreadPoolBuilder::new()
527 .num_threads(num_threads)
528 .build()
529 .expect("Failed to create thread pool")
530}
531
532pub fn append(path: impl AsRef<Path>, s: impl AsRef<str>) -> PathBuf {
538 debug_assert!(path.as_ref().extension().is_none());
539 let mut path_buf = path.as_ref().to_owned();
540 let mut filename = path_buf.file_name().unwrap().to_owned();
541 filename.push(s.as_ref());
542 path_buf.set_file_name(filename);
543 path_buf
544}
545
546pub fn create_parent_dir(file_path: impl AsRef<Path>) -> Result<()> {
548 if let Some(parent_dir) = file_path.as_ref().parent() {
550 std::fs::create_dir_all(parent_dir).with_context(|| {
551 format!(
552 "Failed to create the directory {:?}",
553 parent_dir.to_string_lossy()
554 )
555 })?;
556 }
557 Ok(())
558}
559
560fn parse_duration(value: &str) -> Result<Duration> {
570 if value.is_empty() {
571 bail!("Empty duration string, if you want every 0 milliseconds use `0`.");
572 }
573 let mut duration = Duration::from_secs(0);
574 let mut acc = String::new();
575 for c in value.chars() {
576 if c.is_ascii_digit() {
577 acc.push(c);
578 } else if c.is_whitespace() {
579 continue;
580 } else {
581 let dur = acc.parse::<u64>()?;
582 match c {
583 's' => duration += Duration::from_secs(dur),
584 'm' => duration += Duration::from_secs(dur * 60),
585 'h' => duration += Duration::from_secs(dur * 60 * 60),
586 'd' => duration += Duration::from_secs(dur * 60 * 60 * 24),
587 _ => return Err(anyhow!("Invalid duration suffix: {}", c)),
588 }
589 acc.clear();
590 }
591 }
592 if !acc.is_empty() {
593 let dur = acc.parse::<u64>()?;
594 duration += Duration::from_millis(dur);
595 }
596 Ok(duration)
597}
598
599pub fn init_env_logger() -> Result<()> {
602 use jiff::SpanRound;
603 use jiff::fmt::friendly::{Designator, Spacing, SpanPrinter};
604
605 let mut builder =
606 env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"));
607
608 let start = std::time::Instant::now();
609 let printer = SpanPrinter::new()
610 .spacing(Spacing::None)
611 .designator(Designator::Compact);
612 let span_round = SpanRound::new()
613 .largest(jiff::Unit::Day)
614 .smallest(jiff::Unit::Millisecond)
615 .days_are_24_hours();
616
617 builder.format(move |buf, record| {
618 let Ok(ts) = jiff::Timestamp::try_from(SystemTime::now()) else {
619 return Err(std::io::Error::other("Failed to get timestamp"));
620 };
621 let style = buf.default_level_style(record.level());
622 let elapsed = start.elapsed();
623 let span = jiff::Span::new()
624 .seconds(elapsed.as_secs() as i64)
625 .milliseconds(elapsed.subsec_millis() as i64);
626 let span = span.round(span_round).expect("Failed to round span");
627 writeln!(
628 buf,
629 "{} {} {style}{}{style:#} [{:?}] {} - {}",
630 ts.strftime("%F %T%.3f"),
631 printer.span_to_string(&span),
632 record.level(),
633 std::thread::current().id(),
634 record.target(),
635 record.args()
636 )
637 });
638 builder.init();
639 Ok(())
640}
641
642#[derive(Args, Debug)]
643pub struct GlobalArgs {
644 #[arg(long, value_parser = parse_duration, global=true, display_order = 1000)]
645 pub log_interval: Option<Duration>,
651}
652
653#[derive(Subcommand, Debug)]
654pub enum SubCommands {
655 #[command(subcommand)]
656 Analyze(analyze::SubCommands),
657 #[command(subcommand)]
658 Bench(bench::SubCommands),
659 #[command(subcommand)]
660 Build(build::SubCommands),
661 #[command(subcommand)]
662 Check(check::SubCommands),
663 #[command(subcommand)]
664 From(from::SubCommands),
665 #[command(subcommand)]
666 Perm(perm::SubCommands),
667 #[command(subcommand)]
668 Run(run::SubCommands),
669 #[command(subcommand)]
670 To(to::SubCommands),
671 #[command(subcommand)]
672 Transform(transform::SubCommands),
673}
674
675#[derive(Parser, Debug)]
676#[command(name = "webgraph", version=build_info::version_string())]
677#[doc = include_str!("common_env.txt")]
679pub struct Cli {
680 #[command(subcommand)]
681 pub command: SubCommands,
682 #[clap(flatten)]
683 pub args: GlobalArgs,
684}
685
686pub mod dist;
687pub mod sccs;
688
689pub mod analyze;
690pub mod bench;
691pub mod build;
692pub mod check;
693pub mod from;
694pub mod perm;
695pub mod run;
696pub mod to;
697pub mod transform;
698
699pub fn cli_main<I, T>(args: I) -> Result<()>
701where
702 I: IntoIterator<Item = T>,
703 T: Into<std::ffi::OsString> + Clone,
704{
705 let start = std::time::Instant::now();
706 let cli = Cli::parse_from(args);
707 match cli.command {
708 SubCommands::Analyze(args) => {
709 analyze::main(cli.args, args)?;
710 }
711 SubCommands::Bench(args) => {
712 bench::main(cli.args, args)?;
713 }
714 SubCommands::Build(args) => {
715 build::main(cli.args, args, Cli::command())?;
716 }
717 SubCommands::Check(args) => {
718 check::main(cli.args, args)?;
719 }
720 SubCommands::From(args) => {
721 from::main(cli.args, args)?;
722 }
723 SubCommands::Perm(args) => {
724 perm::main(cli.args, args)?;
725 }
726 SubCommands::Run(args) => {
727 run::main(cli.args, args)?;
728 }
729 SubCommands::To(args) => {
730 to::main(cli.args, args)?;
731 }
732 SubCommands::Transform(args) => {
733 transform::main(cli.args, args)?;
734 }
735 }
736
737 log::info!(
738 "The command took {}",
739 pretty_print_elapsed(start.elapsed().as_secs_f64())
740 );
741
742 Ok(())
743}
744
745fn pretty_print_elapsed(elapsed: f64) -> String {
747 let mut result = String::new();
748 let mut elapsed_seconds = elapsed as u64;
749 let weeks = elapsed_seconds / (60 * 60 * 24 * 7);
750 elapsed_seconds %= 60 * 60 * 24 * 7;
751 let days = elapsed_seconds / (60 * 60 * 24);
752 elapsed_seconds %= 60 * 60 * 24;
753 let hours = elapsed_seconds / (60 * 60);
754 elapsed_seconds %= 60 * 60;
755 let minutes = elapsed_seconds / 60;
756 match weeks {
759 0 => {}
760 1 => result.push_str("1 week "),
761 _ => result.push_str(&format!("{} weeks ", weeks)),
762 }
763 match days {
764 0 => {}
765 1 => result.push_str("1 day "),
766 _ => result.push_str(&format!("{} days ", days)),
767 }
768 match hours {
769 0 => {}
770 1 => result.push_str("1 hour "),
771 _ => result.push_str(&format!("{} hours ", hours)),
772 }
773 match minutes {
774 0 => {}
775 1 => result.push_str("1 minute "),
776 _ => result.push_str(&format!("{} minutes ", minutes)),
777 }
778
779 result.push_str(&format!("{:.3} seconds ({}s)", elapsed % 60.0, elapsed));
780 result
781}
782
783#[cfg(test)]
784mod tests {
785 use super::*;
786
787 mod float_vector_format {
788 use super::*;
789
790 #[test]
791 fn test_ascii_f64() {
792 let dir = tempfile::tempdir().unwrap();
793 let path = dir.path().join("test.txt");
794 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
795 FloatVectorFormat::Ascii
796 .store(&path, &values, None)
797 .unwrap();
798 let content = std::fs::read_to_string(&path).unwrap();
799 for (line, expected) in content.lines().zip(&values) {
801 let parsed: f64 = line.trim().parse().unwrap();
802 assert!((parsed - expected).abs() < 1e-10);
803 }
804 assert_eq!(content.lines().count(), 3);
805 }
806
807 #[test]
808 fn test_ascii_f32() {
809 let dir = tempfile::tempdir().unwrap();
810 let path = dir.path().join("test.txt");
811 let values: Vec<f32> = vec![1.5, 2.75, 3.0];
812 FloatVectorFormat::Ascii
813 .store(&path, &values, None)
814 .unwrap();
815 let content = std::fs::read_to_string(&path).unwrap();
816 for (line, expected) in content.lines().zip(&values) {
817 let parsed: f32 = line.trim().parse().unwrap();
818 assert!((parsed - expected).abs() < 1e-6);
819 }
820 }
821
822 #[test]
823 fn test_ascii_with_precision() {
824 let dir = tempfile::tempdir().unwrap();
825 let path = dir.path().join("test.txt");
826 let values: Vec<f64> = vec![1.123456789, 2.987654321];
827 FloatVectorFormat::Ascii
828 .store(&path, &values, Some(3))
829 .unwrap();
830 let content = std::fs::read_to_string(&path).unwrap();
831 let lines: Vec<&str> = content.lines().collect();
832 assert_eq!(lines[0], "1.123");
833 assert_eq!(lines[1], "2.988");
834 }
835
836 #[test]
837 fn test_json_f64() {
838 let dir = tempfile::tempdir().unwrap();
839 let path = dir.path().join("test.json");
840 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
841 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
842 let content = std::fs::read_to_string(&path).unwrap();
843 let parsed: Vec<f64> = serde_json::from_str(&content).unwrap();
844 assert_eq!(parsed, values);
845 }
846
847 #[test]
848 fn test_json_with_precision() {
849 let dir = tempfile::tempdir().unwrap();
850 let path = dir.path().join("test.json");
851 let values: Vec<f64> = vec![1.123456789, 2.987654321];
852 FloatVectorFormat::Json
853 .store(&path, &values, Some(2))
854 .unwrap();
855 let content = std::fs::read_to_string(&path).unwrap();
856 assert_eq!(content, "[1.12, 2.99]");
857 }
858
859 #[test]
860 fn test_json_empty() {
861 let dir = tempfile::tempdir().unwrap();
862 let path = dir.path().join("test.json");
863 let values: Vec<f64> = vec![];
864 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
865 let content = std::fs::read_to_string(&path).unwrap();
866 assert_eq!(content, "[]");
867 }
868
869 #[test]
870 fn test_json_single_element() {
871 let dir = tempfile::tempdir().unwrap();
872 let path = dir.path().join("test.json");
873 let values: Vec<f64> = vec![42.0];
874 FloatVectorFormat::Json.store(&path, &values, None).unwrap();
875 let content = std::fs::read_to_string(&path).unwrap();
876 let parsed: Vec<f64> = serde_json::from_str(&content).unwrap();
877 assert_eq!(parsed, values);
878 }
879
880 #[test]
881 fn test_java_f64() {
882 let dir = tempfile::tempdir().unwrap();
883 let path = dir.path().join("test.bin");
884 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
885 FloatVectorFormat::Java.store(&path, &values, None).unwrap();
886 let bytes = std::fs::read(&path).unwrap();
887 assert_eq!(bytes.len(), 3 * 8);
888 for (i, expected) in values.iter().enumerate() {
889 let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
890 let val = f64::from_be_bytes(chunk);
891 assert_eq!(val, *expected);
892 }
893 }
894
895 #[test]
896 fn test_java_f32() {
897 let dir = tempfile::tempdir().unwrap();
898 let path = dir.path().join("test.bin");
899 let values: Vec<f32> = vec![1.5, 2.75, 3.0];
900 FloatVectorFormat::Java.store(&path, &values, None).unwrap();
901 let bytes = std::fs::read(&path).unwrap();
902 assert_eq!(bytes.len(), 3 * 4);
903 for (i, expected) in values.iter().enumerate() {
904 let chunk: [u8; 4] = bytes[i * 4..(i + 1) * 4].try_into().unwrap();
905 let val = f32::from_be_bytes(chunk);
906 assert_eq!(val, *expected);
907 }
908 }
909
910 #[test]
911 fn test_epserde_f64() {
912 let dir = tempfile::tempdir().unwrap();
913 let path = dir.path().join("test.bin");
914 let values: Vec<f64> = vec![1.5, 2.75, 3.0];
915 FloatVectorFormat::Epserde
916 .store(&path, &values, None)
917 .unwrap();
918 let metadata = std::fs::metadata(&path).unwrap();
920 assert!(metadata.len() > 0);
921 }
922
923 #[test]
924 fn test_ascii_empty() {
925 let dir = tempfile::tempdir().unwrap();
926 let path = dir.path().join("test.txt");
927 let values: Vec<f64> = vec![];
928 FloatVectorFormat::Ascii
929 .store(&path, &values, None)
930 .unwrap();
931 let content = std::fs::read_to_string(&path).unwrap();
932 assert!(content.is_empty());
933 }
934
935 #[test]
936 fn test_creates_parent_dirs() {
937 let dir = tempfile::tempdir().unwrap();
938 let path = dir.path().join("a").join("b").join("test.txt");
939 let values: Vec<f64> = vec![1.0];
940 FloatVectorFormat::Ascii
941 .store(&path, &values, None)
942 .unwrap();
943 assert!(path.exists());
944 }
945 }
946
947 mod int_vector_format {
948 use super::*;
949
950 #[test]
951 fn test_ascii() {
952 let dir = tempfile::tempdir().unwrap();
953 let path = dir.path().join("test.txt");
954 let data: Vec<u64> = vec![10, 20, 30];
955 IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
956 let content = std::fs::read_to_string(&path).unwrap();
957 let lines: Vec<u64> = content.lines().map(|l| l.trim().parse().unwrap()).collect();
958 assert_eq!(lines, data);
959 }
960
961 #[test]
962 fn test_ascii_empty() {
963 let dir = tempfile::tempdir().unwrap();
964 let path = dir.path().join("test.txt");
965 let data: Vec<u64> = vec![];
966 IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
967 let content = std::fs::read_to_string(&path).unwrap();
968 assert!(content.is_empty());
969 }
970
971 #[test]
972 fn test_json() {
973 let dir = tempfile::tempdir().unwrap();
974 let path = dir.path().join("test.json");
975 let data: Vec<u64> = vec![10, 20, 30];
976 IntVectorFormat::Json.store(&path, &data, None).unwrap();
977 let content = std::fs::read_to_string(&path).unwrap();
978 let parsed: Vec<u64> = serde_json::from_str(&content).unwrap();
979 assert_eq!(parsed, data);
980 }
981
982 #[test]
983 fn test_json_empty() {
984 let dir = tempfile::tempdir().unwrap();
985 let path = dir.path().join("test.json");
986 let data: Vec<u64> = vec![];
987 IntVectorFormat::Json.store(&path, &data, None).unwrap();
988 let content = std::fs::read_to_string(&path).unwrap();
989 assert_eq!(content, "[]");
990 }
991
992 #[test]
993 fn test_json_single_element() {
994 let dir = tempfile::tempdir().unwrap();
995 let path = dir.path().join("test.json");
996 let data: Vec<u64> = vec![42];
997 IntVectorFormat::Json.store(&path, &data, None).unwrap();
998 let content = std::fs::read_to_string(&path).unwrap();
999 let parsed: Vec<u64> = serde_json::from_str(&content).unwrap();
1000 assert_eq!(parsed, data);
1001 }
1002
1003 #[test]
1004 fn test_java() {
1005 let dir = tempfile::tempdir().unwrap();
1006 let path = dir.path().join("test.bin");
1007 let data: Vec<u64> = vec![1, 256, 65535];
1008 IntVectorFormat::Java.store(&path, &data, None).unwrap();
1009 let bytes = std::fs::read(&path).unwrap();
1010 assert_eq!(bytes.len(), 3 * 8);
1011 for (i, expected) in data.iter().enumerate() {
1012 let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1013 let val = u64::from_be_bytes(chunk);
1014 assert_eq!(val, *expected);
1015 }
1016 }
1017
1018 #[test]
1019 fn test_java_empty() {
1020 let dir = tempfile::tempdir().unwrap();
1021 let path = dir.path().join("test.bin");
1022 let data: Vec<u64> = vec![];
1023 IntVectorFormat::Java.store(&path, &data, None).unwrap();
1024 let bytes = std::fs::read(&path).unwrap();
1025 assert!(bytes.is_empty());
1026 }
1027
1028 #[test]
1029 fn test_epserde() {
1030 let dir = tempfile::tempdir().unwrap();
1031 let path = dir.path().join("test.bin");
1032 let data: Vec<u64> = vec![10, 20, 30];
1033 IntVectorFormat::Epserde.store(&path, &data, None).unwrap();
1034 let metadata = std::fs::metadata(&path).unwrap();
1035 assert!(metadata.len() > 0);
1036 }
1037
1038 #[test]
1039 fn test_bitfieldvec() {
1040 let dir = tempfile::tempdir().unwrap();
1041 let path = dir.path().join("test.bin");
1042 let data: Vec<u64> = vec![1, 3, 7, 15];
1043 IntVectorFormat::BitFieldVec
1044 .store(&path, &data, Some(15))
1045 .unwrap();
1046 let metadata = std::fs::metadata(&path).unwrap();
1047 assert!(metadata.len() > 0);
1048 }
1049
1050 #[test]
1051 fn test_bitfieldvec_max_computed() {
1052 let dir = tempfile::tempdir().unwrap();
1053 let path = dir.path().join("test.bin");
1054 let data: Vec<u64> = vec![1, 3, 7, 15];
1055 IntVectorFormat::BitFieldVec
1057 .store(&path, &data, None)
1058 .unwrap();
1059 assert!(path.exists());
1060 }
1061
1062 #[test]
1063 fn test_creates_parent_dirs() {
1064 let dir = tempfile::tempdir().unwrap();
1065 let path = dir.path().join("a").join("b").join("test.txt");
1066 let data: Vec<u64> = vec![1];
1067 IntVectorFormat::Ascii.store(&path, &data, None).unwrap();
1068 assert!(path.exists());
1069 }
1070
1071 #[cfg(target_pointer_width = "64")]
1072 #[test]
1073 fn test_store_usizes() {
1074 let dir = tempfile::tempdir().unwrap();
1075 let path = dir.path().join("test.txt");
1076 let data: Vec<usize> = vec![10, 20, 30];
1077 IntVectorFormat::Ascii
1078 .store_usizes(&path, &data, None)
1079 .unwrap();
1080 let content = std::fs::read_to_string(&path).unwrap();
1081 let lines: Vec<usize> = content.lines().map(|l| l.trim().parse().unwrap()).collect();
1082 assert_eq!(lines, data);
1083 }
1084
1085 #[cfg(target_pointer_width = "64")]
1086 #[test]
1087 fn test_store_usizes_java() {
1088 let dir = tempfile::tempdir().unwrap();
1089 let path = dir.path().join("test.bin");
1090 let data: Vec<usize> = vec![1, 256, 65535];
1091 IntVectorFormat::Java
1092 .store_usizes(&path, &data, None)
1093 .unwrap();
1094 let bytes = std::fs::read(&path).unwrap();
1095 assert_eq!(bytes.len(), 3 * 8);
1096 for (i, expected) in data.iter().enumerate() {
1097 let chunk: [u8; 8] = bytes[i * 8..(i + 1) * 8].try_into().unwrap();
1098 let val = u64::from_be_bytes(chunk) as usize;
1099 assert_eq!(val, *expected);
1100 }
1101 }
1102 }
1103}