webgraph_cli/to/
endianness.rs

1/*
2* SPDX-FileCopyrightText: 2023 Tommaso Fontana
3*
4* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5*/
6
7use crate::{create_parent_dir, GlobalArgs};
8use anyhow::{Context, Result};
9use clap::Parser;
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::prelude::*;
12use log::info;
13use std::fs::File;
14use std::io::BufWriter;
15use std::path::PathBuf;
16use webgraph::prelude::*;
17
18#[derive(Parser, Debug)]
19#[command(name = "endianness", about = "Inverts the endianness of a BvGraph.", long_about = None)]
20pub struct CliArgs {
21    /// The basename of the source graph.
22    pub src: PathBuf,
23    /// The basename of the destination graph.
24    pub dst: PathBuf,
25}
26
27macro_rules! impl_convert {
28    ($global_args:expr, $args:expr, $src:ty, $dst:ty) => {
29        info!(
30            "The source graph was {}-endian, converting to {}-endian",
31            <$src>::NAME,
32            <$dst>::NAME
33        );
34
35        let properties_path = $args.src.with_extension(PROPERTIES_EXTENSION);
36        let (num_nodes, num_arcs, comp_flags) = parse_properties::<$src>(&properties_path)?;
37        // also extract the bitstream length
38        let f = std::fs::File::open(&properties_path)
39            .with_context(|| format!("Cannot open property file {}", &properties_path.display()))?;
40        let map = java_properties::read(std::io::BufReader::new(f)).with_context(|| {
41            format!(
42                "cannot parse {} as a java properties file",
43                &properties_path.display()
44            )
45        })?;
46        let bitstream_len = map
47            .get("length")
48            .with_context(|| format!("Missing 'arcs' property in {}", &properties_path.display()))?
49            .parse::<u64>()
50            .with_context(|| {
51                format!(
52                    "Cannot parse arcs as usize in {}",
53                    &properties_path.display()
54                )
55            })?;
56
57        let mut pl = ProgressLogger::default();
58        pl.display_memory(true)
59            .item_name("node")
60            .expected_updates(Some(num_arcs as usize));
61
62        if let Some(duration) = $global_args.log_interval {
63            pl.log_interval(duration);
64        }
65
66        let seq_graph = BvGraphSeq::with_basename(&$args.src)
67            .endianness::<$src>()
68            .load()
69            .with_context(|| format!("Could not load graph {}", $args.src.display()))?;
70        // build the encoder with the opposite endianness
71        std::fs::write(
72            &properties_path,
73            comp_flags.to_properties::<$dst>(num_nodes, num_arcs, bitstream_len)?,
74        )
75        .with_context(|| {
76            format!(
77                "Could not write properties to {}",
78                properties_path.display()
79            )
80        })?;
81        let target_graph_path = $args.dst.with_extension(GRAPH_EXTENSION);
82        let writer = <BufBitWriter<$dst, _>>::new(<WordAdapter<usize, _>>::new(BufWriter::new(
83            File::create(&target_graph_path)
84                .with_context(|| format!("Could not create {}", target_graph_path.display()))?,
85        )));
86        let encoder = <DynCodesEncoder<$dst, _>>::new(writer, &comp_flags)?;
87        // build the iterator that will read the graph and write it to the encoder
88
89        let offsets_path = $args.dst.with_extension(OFFSETS_EXTENSION);
90        let mut offsets_writer =
91            <BufBitWriter<BE, _>>::new(<WordAdapter<usize, _>>::new(BufWriter::new(
92                File::create(&offsets_path)
93                    .with_context(|| format!("Could not create {}", offsets_path.display()))?,
94            )));
95
96        pl.start("Inverting endianness...");
97
98        let mut iter = seq_graph
99            .offset_deg_iter()
100            .map_decoder(move |decoder| Converter {
101                decoder,
102                encoder,
103                offset: 0,
104            });
105
106        let mut offset = 0;
107        for _ in 0..num_nodes {
108            iter.next_degree()?;
109            let new_offset = iter.get_decoder().offset;
110            offsets_writer
111                .write_gamma((new_offset - offset) as u64)
112                .context("Could not write gamma")?;
113            offset = new_offset;
114            pl.light_update();
115        }
116        let new_offset = iter.get_decoder().offset;
117        offsets_writer
118            .write_gamma((new_offset - offset) as u64)
119            .context("Could not write gamma")?;
120        pl.light_update();
121        pl.done();
122        offsets_writer.flush().context("Could not flush offsets")?;
123    };
124}
125
126pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
127    create_parent_dir(&args.dst)?;
128
129    match get_endianness(&args.src)?.as_str() {
130        #[cfg(feature = "be_bins")]
131        BE::NAME => {
132            impl_convert!(global_args, args, BE, LE);
133        }
134        #[cfg(feature = "le_bins")]
135        LE::NAME => {
136            impl_convert!(global_args, args, LE, BE);
137        }
138        e => panic!("Unknown endianness: {}", e),
139    };
140
141    Ok(())
142}