webgraph_cli/perm/
comp.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
3 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use crate::{create_parent_dir, GlobalArgs};
9use anyhow::{ensure, Result};
10use clap::Parser;
11use dsi_progress_logger::prelude::*;
12use epserde::prelude::*;
13use mmap_rs::MmapFlags;
14use std::io::{BufWriter, Write};
15use std::path::PathBuf;
16use value_traits::slices::SliceByValue;
17use webgraph::prelude::*;
18
19#[derive(Parser, Debug)]
20#[command(name="comp", about = "Compose multiple permutations into a single one", long_about = None)]
21pub struct CliArgs {
22    /// The filename of the resulting permutation in binary big-endian format.
23    pub dst: PathBuf,
24
25    /// Filenames of the permutations in binary big-endian format to compose (in order of application).
26    pub perms: Vec<PathBuf>,
27
28    #[arg(short, long)]
29    /// Load and store permutations in ε-serde format.
30    pub epserde: bool,
31}
32
33pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
34    create_parent_dir(&args.dst)?;
35
36    let mut pl = ProgressLogger::default();
37    pl.display_memory(true).item_name("indices");
38
39    if let Some(duration) = global_args.log_interval {
40        pl.log_interval(duration);
41    }
42
43    if args.epserde {
44        let mut perm = Vec::new();
45        for path in args.perms {
46            let p = unsafe { <Vec<usize>>::mmap(&path, Flags::RANDOM_ACCESS) }?;
47            perm.push(p);
48        }
49        let mut merged = Vec::new();
50        // TODO: reduce the number of uncase
51        ensure!(
52            perm.iter()
53                .all(|p| p.uncase().len() == perm[0].uncase().len()),
54            "All permutations must have the same length"
55        );
56
57        pl.start("Combining permutations...");
58        for i in 0..perm[0].uncase().len() {
59            let mut v = i;
60            for p in &perm {
61                v = p.uncase()[v];
62            }
63            merged.push(v);
64            pl.light_update();
65        }
66        pl.done();
67        unsafe { merged.store(&args.dst) }?;
68    } else {
69        let mut writer = BufWriter::new(std::fs::File::create(&args.dst)?);
70        let mut perm = Vec::new();
71        for path in args.perms {
72            let p = JavaPermutation::mmap(&path, MmapFlags::RANDOM_ACCESS)?;
73            perm.push(p);
74        }
75
76        ensure!(
77            perm.iter()
78                .all(|p| p.as_ref().len() == perm[0].as_ref().len()),
79            "All permutations must have the same length"
80        );
81
82        pl.start("Combining permutations...");
83        for i in 0..perm[0].as_ref().len() {
84            let mut v = i;
85            for p in &perm {
86                v = p.index_value(v);
87            }
88            writer.write_all(&(v as u64).to_be_bytes())?;
89            pl.light_update();
90        }
91        pl.done();
92    }
93    Ok(())
94}