use anyhow::{Result, bail, ensure};
use dsi_bitstream::dispatch::Codes;
use dsi_bitstream::traits::{BigEndian, Endianness, LittleEndian};
use std::collections::HashMap;
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "fuzz", derive(arbitrary::Arbitrary))]
pub struct CompFlags {
pub outdegrees: Codes,
pub references: Codes,
pub blocks: Codes,
pub intervals: Codes,
pub residuals: Codes,
pub min_interval_length: usize,
pub compression_window: usize,
pub max_ref_count: usize,
}
impl core::default::Default for CompFlags {
fn default() -> Self {
CompFlags {
outdegrees: Codes::Gamma,
references: Codes::Unary,
blocks: Codes::Gamma,
intervals: Codes::Gamma,
residuals: Codes::Zeta(3),
min_interval_length: 4,
compression_window: 7,
max_ref_count: 3,
}
}
}
const OLD_CODES: [Codes; 10] = [
Codes::Unary,
Codes::Gamma,
Codes::Delta,
Codes::Zeta(1),
Codes::Zeta(2),
Codes::Zeta(3),
Codes::Zeta(4),
Codes::Zeta(5),
Codes::Zeta(6),
Codes::Zeta(7),
];
impl CompFlags {
pub fn code_from_str(s: &str, k: usize) -> Option<Codes> {
match s.to_uppercase().as_str() {
"UNARY" => Some(Codes::Unary),
"GAMMA" => Some(Codes::Gamma),
"DELTA" => Some(Codes::Delta),
"ZETA" => Some(Codes::Zeta(k)),
"PI1" => Some(Codes::Pi(1)),
"PI2" => Some(Codes::Pi(2)),
"PI3" => Some(Codes::Pi(3)),
"PI4" => Some(Codes::Pi(4)),
"ZETA1" => Some(Codes::Zeta(1)),
"ZETA2" => Some(Codes::Zeta(2)),
"ZETA3" => Some(Codes::Zeta(3)),
"ZETA4" => Some(Codes::Zeta(4)),
"ZETA5" => Some(Codes::Zeta(5)),
"ZETA6" => Some(Codes::Zeta(6)),
"ZETA7" => Some(Codes::Zeta(7)),
_ => None,
}
}
pub fn code_to_str(c: Codes, version: usize) -> Option<&'static str> {
if version == 0 {
match c {
Codes::Unary => Some("UNARY"),
Codes::Gamma => Some("GAMMA"),
Codes::Delta => Some("DELTA"),
Codes::Zeta(_) => Some("ZETA"),
_ => unimplemented!("Code {:?} not supported", c),
}
} else {
match c {
Codes::Unary => Some("UNARY"),
Codes::Gamma => Some("GAMMA"),
Codes::Delta => Some("DELTA"),
Codes::Zeta(1) => Some("ZETA1"),
Codes::Zeta(2) => Some("ZETA2"),
Codes::Zeta(3) => Some("ZETA3"),
Codes::Zeta(4) => Some("ZETA4"),
Codes::Zeta(5) => Some("ZETA5"),
Codes::Zeta(6) => Some("ZETA6"),
Codes::Zeta(7) => Some("ZETA7"),
Codes::Pi(1) => Some("PI1"),
Codes::Pi(2) => Some("PI2"),
Codes::Pi(3) => Some("PI3"),
Codes::Pi(4) => Some("PI4"),
_ => unimplemented!("Code {:?} not supported", c),
}
}
}
fn contains_new_codes(&self) -> bool {
!OLD_CODES.contains(&self.outdegrees)
|| !OLD_CODES.contains(&self.references)
|| !OLD_CODES.contains(&self.blocks)
|| !OLD_CODES.contains(&self.intervals)
|| !OLD_CODES.contains(&self.residuals)
}
pub fn to_properties<E: Endianness>(
&self,
num_nodes: usize,
num_arcs: u64,
bitstream_len: u64,
) -> Result<String> {
let mut s = String::new();
s.push_str("#BVGraph properties\n");
s.push_str("graphclass=it.unimi.dsi.webgraph.BVGraph\n");
let version = (core::any::TypeId::of::<E>() != core::any::TypeId::of::<BigEndian>()
|| self.contains_new_codes()) as usize;
s.push_str(&format!("version={version}\n"));
s.push_str(&format!("endianness={}\n", E::NAME));
s.push_str(&format!("nodes={num_nodes}\n"));
s.push_str(&format!("arcs={num_arcs}\n"));
s.push_str(&format!("minintervallength={}\n", self.min_interval_length));
s.push_str(&format!("maxrefcount={}\n", self.max_ref_count));
s.push_str(&format!("windowsize={}\n", self.compression_window));
s.push_str(&format!(
"bitsperlink={}\n",
bitstream_len as f64 / num_arcs as f64
));
s.push_str(&format!(
"bitspernode={}\n",
bitstream_len as f64 / num_nodes as f64
));
s.push_str(&format!("length={bitstream_len}\n"));
fn stirling(n: u64) -> f64 {
let n = n as f64;
n * (n.ln() - 1.0) + 0.5 * (2.0 * std::f64::consts::PI * n).ln()
}
let n_squared = (num_nodes * num_nodes) as u64;
let theoretical_bound =
(stirling(n_squared) - stirling(num_arcs) - stirling(n_squared - num_arcs))
/ 2.0_f64.ln();
s.push_str(&format!(
"compratio={:.3}\n",
bitstream_len as f64 / theoretical_bound
));
s.push_str("compressionflags=");
let mut comp_flags = false;
if self.outdegrees != Codes::Gamma {
s.push_str(&format!(
"OUTDEGREES_{}|",
Self::code_to_str(self.outdegrees, version).unwrap()
));
comp_flags = true;
}
if self.references != Codes::Unary {
s.push_str(&format!(
"REFERENCES_{}|",
Self::code_to_str(self.references, version).unwrap()
));
comp_flags = true;
}
if self.blocks != Codes::Gamma {
s.push_str(&format!(
"BLOCKS_{}|",
Self::code_to_str(self.blocks, version).unwrap()
));
comp_flags = true;
}
if self.intervals != Codes::Gamma {
s.push_str(&format!(
"INTERVALS_{}|",
Self::code_to_str(self.intervals, version).unwrap()
));
comp_flags = true;
}
if (version == 0 && !matches!(self.residuals, Codes::Zeta(_)))
|| self.residuals != (Codes::Zeta(3))
{
s.push_str(&format!(
"RESIDUALS_{}|",
Self::code_to_str(self.residuals, version).unwrap()
));
comp_flags = true;
}
if comp_flags {
s.pop();
}
s.push('\n');
if version == 0 {
let mut k = None;
macro_rules! check_and_set_k {
($code:expr) => {
match $code {
Codes::Zeta(new_k) => {
if let Some(old_k) = k {
ensure!(old_k == new_k, "Only one value of k is supported")
}
k = Some(new_k)
}
_ => {}
}
};
}
check_and_set_k!(self.outdegrees);
check_and_set_k!(self.references);
check_and_set_k!(self.blocks);
check_and_set_k!(self.intervals);
check_and_set_k!(self.residuals);
s.push_str(&format!("zetak={}\n", k.unwrap_or(3)));
}
Ok(s)
}
pub fn from_properties<E: Endianness>(map: &HashMap<String, String>) -> Result<Self> {
let endianness = map
.get("endianness")
.map(|x| x.to_string())
.unwrap_or_else(|| BigEndian::NAME.to_string());
anyhow::ensure!(
endianness == E::NAME,
"Wrong endianness, got {} while expected {}",
endianness,
E::NAME
);
if core::any::TypeId::of::<E>() == core::any::TypeId::of::<LittleEndian>() {
anyhow::ensure!(
map.get("version").map(|x| x.parse::<u32>().unwrap()) == Some(1),
"Wrong version, got {} while expected 1",
map.get("version").unwrap_or(&"None".to_string())
);
}
let mut cf = CompFlags::default();
let mut k = 3;
if let Some(spec_k) = map.get("zetak") {
let spec_k = spec_k.parse::<usize>()?;
if !(1..=7).contains(&spec_k) {
bail!("Only ζ₁-ζ₇ are supported");
}
k = spec_k;
}
if let Some(comp_flags) = map.get("compressionflags") {
if !comp_flags.is_empty() {
for flag in comp_flags.split('|') {
let s: Vec<_> = flag.split('_').collect();
let code = CompFlags::code_from_str(s[1], k).unwrap();
match s[0] {
"OUTDEGREES" => cf.outdegrees = code,
"REFERENCES" => cf.references = code,
"BLOCKS" => cf.blocks = code,
"INTERVALS" => cf.intervals = code,
"RESIDUALS" => cf.residuals = code,
"OFFSETS" => {
ensure!(code == Codes::Gamma, "Only γ code is supported for offsets")
}
_ => bail!("Unknown compression flag {}", flag),
}
}
}
}
if let Some(compression_window) = map.get("windowsize") {
cf.compression_window = compression_window.parse()?;
}
if let Some(min_interval_length) = map.get("minintervallength") {
cf.min_interval_length = min_interval_length.parse()?;
}
if let Some(max_ref_count) = map.get("maxrefcount") {
cf.max_ref_count = max_ref_count.parse()?;
}
Ok(cf)
}
}