Skip to main content

webgraph_cli/to/
endianness.rs

1/*
2* SPDX-FileCopyrightText: 2023 Tommaso Fontana
3*
4* SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
5*/
6
7use crate::{GlobalArgs, create_parent_dir};
8use anyhow::{Context, Result};
9use clap::Parser;
10use dsi_bitstream::prelude::*;
11use dsi_progress_logger::prelude::*;
12use log::info;
13use std::path::PathBuf;
14use webgraph::prelude::*;
15
16#[derive(Parser, Debug)]
17#[command(name = "endianness", about = "Inverts the endianness of a BvGraph.", long_about = None)]
18pub struct CliArgs {
19    /// The basename of the source graph.
20    pub src: PathBuf,
21    /// The basename of the destination graph.
22    pub dst: PathBuf,
23}
24
25macro_rules! impl_convert {
26    ($global_args:expr, $args:expr, $src:ty, $dst:ty) => {{
27        info!(
28            "The source graph was {}-endian, converting to {}-endian",
29            <$src>::NAME,
30            <$dst>::NAME
31        );
32
33        let src_properties_path = $args.src.with_extension(PROPERTIES_EXTENSION);
34        let dst_properties_path = $args.dst.with_extension(PROPERTIES_EXTENSION);
35        let (num_nodes, num_arcs, comp_flags) = parse_properties::<$src>(&src_properties_path)?;
36        // also extract the bitstream length
37        let f = std::fs::File::open(&src_properties_path).with_context(|| {
38            format!(
39                "Cannot open property file {}",
40                &src_properties_path.display()
41            )
42        })?;
43        let map = java_properties::read(std::io::BufReader::new(f)).with_context(|| {
44            format!(
45                "cannot parse {} as a java properties file",
46                &src_properties_path.display()
47            )
48        })?;
49        let bitstream_len = map
50            .get("length")
51            .with_context(|| {
52                format!(
53                    "Missing 'length' property in {}",
54                    &src_properties_path.display()
55                )
56            })?
57            .parse::<u64>()
58            .with_context(|| {
59                format!(
60                    "Cannot parse length as u64 in {}",
61                    &src_properties_path.display()
62                )
63            })?;
64
65        let mut pl = ProgressLogger::default();
66        pl.display_memory(true)
67            .item_name("node")
68            .expected_updates(Some(num_nodes as usize));
69
70        if let Some(duration) = $global_args.log_interval {
71            pl.log_interval(duration);
72        }
73
74        let seq_graph = BvGraphSeq::with_basename(&$args.src)
75            .endianness::<$src>()
76            .load()
77            .with_context(|| format!("Could not load graph {}", $args.src.display()))?;
78        // build the encoder with the opposite endianness
79        std::fs::write(
80            &dst_properties_path,
81            comp_flags.to_properties::<$dst>(num_nodes, num_arcs, bitstream_len)?,
82        )
83        .with_context(|| {
84            format!(
85                "Could not write properties to {}",
86                dst_properties_path.display()
87            )
88        })?;
89        let target_graph_path = $args.dst.with_extension(GRAPH_EXTENSION);
90        let writer = buf_bit_writer::from_path::<$dst, usize>(&target_graph_path)
91            .with_context(|| format!("Could not create {}", target_graph_path.display()))?;
92        let encoder = <DynCodesEncoder<$dst, _>>::new(writer, &comp_flags)?;
93        // build the iterator that will read the graph and write it to the encoder
94
95        let offsets_path = $args.dst.with_extension(OFFSETS_EXTENSION);
96        let mut offsets_writer = buf_bit_writer::from_path::<BE, usize>(&offsets_path)
97            .with_context(|| format!("Could not create {}", offsets_path.display()))?;
98
99        pl.start("Inverting endianness...");
100
101        let mut iter = seq_graph
102            .offset_deg_iter()
103            .map_decoder(move |decoder| Converter {
104                decoder,
105                encoder,
106                offset: 0,
107            });
108
109        let mut offset = 0;
110        for _ in 0..num_nodes {
111            iter.next_degree()?;
112            let new_offset = iter.get_decoder().offset;
113            offsets_writer
114                .write_gamma((new_offset - offset) as u64)
115                .context("Could not write gamma")?;
116            offset = new_offset;
117            pl.light_update();
118        }
119        let new_offset = iter.get_decoder().offset;
120        offsets_writer
121            .write_gamma((new_offset - offset) as u64)
122            .context("Could not write gamma")?;
123        pl.light_update();
124        pl.done();
125        offsets_writer
126            .flush()
127            .context("Could not flush offsets")
128            .map(|_| ())
129    }};
130}
131
132pub fn main(global_args: GlobalArgs, args: CliArgs) -> Result<()> {
133    create_parent_dir(&args.dst)?;
134
135    match get_endianness(&args.src)?.as_str() {
136        #[cfg(feature = "be_bins")]
137        BE::NAME => impl_convert!(global_args, args, BE, LE),
138        #[cfg(feature = "le_bins")]
139        LE::NAME => impl_convert!(global_args, args, LE, BE),
140        e => panic!("Unknown endianness: {}", e),
141    }
142}