use crate::error::GraphError;
use scirs2_core::ndarray::Array2;
use std::fs::File;
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::mem;
use std::path::Path;
#[derive(Debug, Clone)]
pub struct CSRGraph {
n_nodes: usize,
n_edges: usize,
row_ptr: Vec<usize>,
col_idx: Vec<usize>,
weights: Vec<f64>,
}
impl CSRGraph {
pub fn from_edges(n_nodes: usize, edges: Vec<(usize, usize, f64)>) -> Result<Self, GraphError> {
let n_edges = edges.len();
let mut col_idx = Vec::with_capacity(n_edges);
let mut weights = Vec::with_capacity(n_edges);
let mut degree = vec![0; n_nodes];
for &(src, dst_, _) in &edges {
if src >= n_nodes {
return Err(GraphError::node_not_found_with_context(
src,
n_nodes,
"CSR graph construction",
));
}
if dst_ >= n_nodes {
return Err(GraphError::node_not_found_with_context(
dst_,
n_nodes,
"CSR graph construction",
));
}
degree[src] += 1;
}
let mut row_ptr = Vec::with_capacity(n_nodes + 1);
row_ptr.push(0);
for ° in °ree {
row_ptr.push(row_ptr.last().expect("Operation failed") + deg);
}
col_idx.resize(n_edges, 0);
weights.resize(n_edges, 0.0);
let mut current_pos = row_ptr.clone();
current_pos.pop();
for (src, dst, weight) in edges {
let pos = current_pos[src];
col_idx[pos] = dst;
weights[pos] = weight;
current_pos[src] += 1;
}
for node in 0..n_nodes {
let start = row_ptr[node];
let end = row_ptr[node + 1];
if end > start {
let mut pairs: Vec<(usize, f64)> = col_idx[start..end]
.iter()
.zip(&weights[start..end])
.map(|(&c, &w)| (c, w))
.collect();
pairs.sort_unstable_by_key(|&(col_, _)| col_);
for (i, (col, weight)) in pairs.into_iter().enumerate() {
col_idx[start + i] = col;
weights[start + i] = weight;
}
}
}
Ok(CSRGraph {
n_nodes,
n_edges,
row_ptr,
col_idx,
weights,
})
}
pub fn with_capacity(n_nodes: usize, estimated_edges: usize) -> Self {
CSRGraph {
n_nodes,
n_edges: 0,
row_ptr: vec![0; n_nodes + 1],
col_idx: Vec::with_capacity(estimated_edges),
weights: Vec::with_capacity(estimated_edges),
}
}
pub fn neighbors(&self, node: usize) -> impl Iterator<Item = (usize, f64)> + '_ {
let start = self.row_ptr[node];
let end = self.row_ptr[node + 1];
self.col_idx[start..end]
.iter()
.zip(&self.weights[start..end])
.map(|(&idx, &weight)| (idx, weight))
}
pub fn degree(&self, node: usize) -> usize {
self.row_ptr[node + 1] - self.row_ptr[node]
}
pub fn node_count(&self) -> usize {
self.n_nodes
}
pub fn memory_usage(&self) -> usize {
mem::size_of_val(&self.n_nodes)
+ mem::size_of_val(&self.n_edges)
+ mem::size_of_val(&self.row_ptr[..])
+ mem::size_of_val(&self.col_idx[..])
+ mem::size_of_val(&self.weights[..])
}
pub fn to_adjacency_matrix(&self) -> Array2<f64> {
let mut matrix = Array2::zeros((self.n_nodes, self.n_nodes));
for src in 0..self.n_nodes {
for (dst, weight) in self.neighbors(src) {
matrix[[src, dst]] = weight;
}
}
matrix
}
}
#[derive(Debug, Clone)]
pub struct BitPackedGraph {
n_nodes: usize,
bits: Vec<u64>,
directed: bool,
}
impl BitPackedGraph {
pub fn new(n_nodes: usize, directed: bool) -> Self {
let bits_needed = if directed {
n_nodes * n_nodes
} else {
n_nodes * (n_nodes + 1) / 2 };
let words_needed = bits_needed.div_ceil(64);
BitPackedGraph {
n_nodes,
bits: vec![0; words_needed],
directed,
}
}
fn bit_position(&self, from: usize, to: usize) -> Option<usize> {
if from >= self.n_nodes || to >= self.n_nodes {
return None;
}
if self.directed {
Some(from * self.n_nodes + to)
} else {
let (u, v) = if from <= to { (from, to) } else { (to, from) };
if u == 0 {
Some(v)
} else {
Some(u * (2 * self.n_nodes - u - 1) / 2 + v)
}
}
}
pub fn add_edge(&mut self, from: usize, to: usize) -> Result<(), GraphError> {
let bit_pos = self.bit_position(from, to).ok_or_else(|| {
GraphError::node_not_found_with_context(from, self.n_nodes, "add_edge operation")
})?;
let word_idx = bit_pos / 64;
let bit_idx = bit_pos % 64;
self.bits[word_idx] |= 1u64 << bit_idx;
Ok(())
}
pub fn has_edge(&self, from: usize, to: usize) -> bool {
if let Some(bit_pos) = self.bit_position(from, to) {
let word_idx = bit_pos / 64;
let bit_idx = bit_pos % 64;
(self.bits[word_idx] & (1u64 << bit_idx)) != 0
} else {
false
}
}
pub fn neighbors(&self, node: usize) -> Vec<usize> {
let mut neighbors = Vec::new();
if self.directed {
let start_bit = node * self.n_nodes;
let end_bit = start_bit + self.n_nodes;
let start_word = start_bit / 64;
let end_word = end_bit.div_ceil(64);
for word_idx in start_word..end_word {
if word_idx >= self.bits.len() {
break;
}
let mut word = self.bits[word_idx];
let word_start_bit = word_idx * 64;
if word_start_bit < start_bit {
let skip_bits = start_bit - word_start_bit;
word &= !((1u64 << skip_bits) - 1);
}
if word_start_bit + 64 > end_bit {
let keep_bits = end_bit - word_start_bit;
word &= (1u64 << keep_bits) - 1;
}
while word != 0 {
let bit_pos = word.trailing_zeros() as usize;
let global_bit = word_start_bit + bit_pos;
if global_bit >= start_bit && global_bit < end_bit {
let neighbor = global_bit - start_bit;
neighbors.push(neighbor);
}
word &= word - 1; }
}
} else {
for other in 0..self.n_nodes {
if self.has_edge(node, other) {
neighbors.push(other);
}
}
}
neighbors
}
pub fn degree(&self, node: usize) -> usize {
if node >= self.n_nodes {
return 0;
}
if self.directed {
let start_bit = node * self.n_nodes;
let end_bit = start_bit + self.n_nodes;
let start_word = start_bit / 64;
let end_word = end_bit.div_ceil(64);
let mut count = 0;
for word_idx in start_word..end_word {
if word_idx >= self.bits.len() {
break;
}
let mut word = self.bits[word_idx];
let word_start_bit = word_idx * 64;
if word_start_bit < start_bit {
let skip_bits = start_bit - word_start_bit;
word &= !((1u64 << skip_bits) - 1);
}
if word_start_bit + 64 > end_bit {
let keep_bits = end_bit - word_start_bit;
word &= (1u64 << keep_bits) - 1;
}
count += word.count_ones() as usize;
}
count
} else {
self.neighbors(node).len()
}
}
pub fn memory_usage(&self) -> usize {
mem::size_of_val(&self.n_nodes)
+ mem::size_of_val(&self.bits[..])
+ mem::size_of_val(&self.directed)
}
}
#[derive(Debug, Clone)]
pub struct CompressedAdjacencyList {
n_nodes: usize,
data: Vec<u8>,
offsets: Vec<usize>,
}
impl CompressedAdjacencyList {
pub fn from_adjacency(_adjlists: Vec<Vec<usize>>) -> Self {
let n_nodes = _adjlists.len();
let mut data = Vec::new();
let mut offsets = Vec::with_capacity(n_nodes + 1);
offsets.push(0);
for neighbors in _adjlists {
let _start_pos = data.len();
let mut sorted_neighbors = neighbors;
sorted_neighbors.sort_unstable();
Self::encode_varint(sorted_neighbors.len(), &mut data);
let mut prev = 0;
for &neighbor in &sorted_neighbors {
let delta = neighbor - prev;
Self::encode_varint(delta, &mut data);
prev = neighbor;
}
offsets.push(data.len());
}
CompressedAdjacencyList {
n_nodes,
data,
offsets,
}
}
fn encode_varint(mut value: usize, output: &mut Vec<u8>) {
while value >= 0x80 {
output.push((value & 0x7F) as u8 | 0x80);
value >>= 7;
}
output.push(value as u8);
}
fn decode_varint(data: &[u8], pos: &mut usize) -> usize {
let mut value = 0;
let mut shift = 0;
loop {
let byte = data[*pos];
*pos += 1;
value |= ((byte & 0x7F) as usize) << shift;
if byte & 0x80 == 0 {
break;
}
shift += 7;
}
value
}
pub fn neighbors(&self, node: usize) -> Vec<usize> {
if node >= self.n_nodes {
return Vec::new();
}
let start = self.offsets[node];
let end = self.offsets[node + 1];
let data_slice = &self.data[start..end];
let mut pos = 0;
let count = Self::decode_varint(data_slice, &mut pos);
let mut neighbors = Vec::with_capacity(count);
let mut current = 0;
for _ in 0..count {
let delta = Self::decode_varint(data_slice, &mut pos);
current += delta;
neighbors.push(current);
}
neighbors
}
pub fn memory_usage(&self) -> usize {
mem::size_of_val(&self.n_nodes)
+ mem::size_of_val(&self.data[..])
+ mem::size_of_val(&self.offsets[..])
}
}
pub enum HybridGraph {
CSR(CSRGraph),
BitPacked(BitPackedGraph),
Compressed(CompressedAdjacencyList),
}
impl HybridGraph {
pub fn auto_select(
n_nodes: usize,
edges: Vec<(usize, usize, Option<f64>)>,
directed: bool,
) -> Result<Self, GraphError> {
let n_edges = edges.len();
let density = n_edges as f64 / (n_nodes * n_nodes) as f64;
let all_unweighted = edges.iter().all(|(_, _, w)| w.is_none());
if all_unweighted && density > 0.1 {
let mut graph = BitPackedGraph::new(n_nodes, directed);
for (src, dst_, _) in edges {
graph.add_edge(src, dst_)?;
}
Ok(HybridGraph::BitPacked(graph))
} else if density < 0.01 {
let weighted_edges: Vec<(usize, usize, f64)> = edges
.into_iter()
.map(|(s, d, w)| (s, d, w.unwrap_or(1.0)))
.collect();
let graph = CSRGraph::from_edges(n_nodes, weighted_edges)?;
Ok(HybridGraph::CSR(graph))
} else {
let mut adj_lists = vec![Vec::new(); n_nodes];
for (src, dst_, _) in edges {
adj_lists[src].push(dst_);
if !directed {
adj_lists[dst_].push(src);
}
}
let graph = CompressedAdjacencyList::from_adjacency(adj_lists);
Ok(HybridGraph::Compressed(graph))
}
}
pub fn memory_usage(&self) -> usize {
match self {
HybridGraph::CSR(g) => g.memory_usage(),
HybridGraph::BitPacked(g) => g.memory_usage(),
HybridGraph::Compressed(g) => g.memory_usage(),
}
}
}
#[derive(Debug)]
pub struct MemmapGraph {
n_nodes: usize,
n_edges: usize,
file: File,
#[allow(dead_code)]
header_size: usize,
row_ptr_offset: usize,
col_idx_offset: usize,
weights_offset: usize,
}
impl MemmapGraph {
pub fn from_csr<P: AsRef<Path>>(csr: &CSRGraph, path: P) -> io::Result<Self> {
let mut file = File::create(&path)?;
let mut writer = BufWriter::new(&mut file);
writer.write_all(&csr.n_nodes.to_le_bytes())?;
writer.write_all(&csr.n_edges.to_le_bytes())?;
for &ptr in &csr.row_ptr {
writer.write_all(&ptr.to_le_bytes())?;
}
for &idx in &csr.col_idx {
writer.write_all(&idx.to_le_bytes())?;
}
for &weight in &csr.weights {
writer.write_all(&weight.to_le_bytes())?;
}
writer.flush()?;
drop(writer);
let file = File::open(path)?;
let header_size = 16; let row_ptr_offset = header_size;
let col_idx_offset = row_ptr_offset + (csr.n_nodes + 1) * 8;
let weights_offset = col_idx_offset + csr.n_edges * 8;
Ok(MemmapGraph {
n_nodes: csr.n_nodes,
n_edges: csr.n_edges,
file,
header_size,
row_ptr_offset,
col_idx_offset,
weights_offset,
})
}
pub fn from_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let mut file = File::open(path)?;
let mut buffer = [0u8; 16];
file.read_exact(&mut buffer)?;
let n_nodes = usize::from_le_bytes([
buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
]);
let n_edges = usize::from_le_bytes([
buffer[8], buffer[9], buffer[10], buffer[11], buffer[12], buffer[13], buffer[14],
buffer[15],
]);
let header_size = 16;
let row_ptr_offset = header_size;
let col_idx_offset = row_ptr_offset + (n_nodes + 1) * 8;
let weights_offset = col_idx_offset + n_edges * 8;
Ok(MemmapGraph {
n_nodes,
n_edges,
file,
header_size,
row_ptr_offset,
col_idx_offset,
weights_offset,
})
}
fn get_row_ptrs(&mut self, node: usize) -> io::Result<(usize, usize)> {
if node >= self.n_nodes {
return Ok((0, 0));
}
let mut buffer = [0u8; 16];
let offset = self.row_ptr_offset + node * 8;
self.file.seek(SeekFrom::Start(offset as u64))?;
self.file.read_exact(&mut buffer)?;
let start = usize::from_le_bytes([
buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
]);
let end = usize::from_le_bytes([
buffer[8], buffer[9], buffer[10], buffer[11], buffer[12], buffer[13], buffer[14],
buffer[15],
]);
Ok((start, end))
}
pub fn neighbors(&mut self, node: usize) -> io::Result<Vec<(usize, f64)>> {
let (start, end) = self.get_row_ptrs(node)?;
let degree = end - start;
if degree == 0 {
return Ok(Vec::new());
}
let mut col_buffer = vec![0u8; degree * 8];
let col_offset = self.col_idx_offset + start * 8;
self.file.seek(SeekFrom::Start(col_offset as u64))?;
self.file.read_exact(&mut col_buffer)?;
let mut weight_buffer = vec![0u8; degree * 8];
let weight_offset = self.weights_offset + start * 8;
self.file.seek(SeekFrom::Start(weight_offset as u64))?;
self.file.read_exact(&mut weight_buffer)?;
let mut neighbors = Vec::with_capacity(degree);
for i in 0..degree {
let col_bytes = &col_buffer[i * 8..(i + 1) * 8];
let weight_bytes = &weight_buffer[i * 8..(i + 1) * 8];
let col_idx = usize::from_le_bytes([
col_bytes[0],
col_bytes[1],
col_bytes[2],
col_bytes[3],
col_bytes[4],
col_bytes[5],
col_bytes[6],
col_bytes[7],
]);
let weight = f64::from_le_bytes([
weight_bytes[0],
weight_bytes[1],
weight_bytes[2],
weight_bytes[3],
weight_bytes[4],
weight_bytes[5],
weight_bytes[6],
weight_bytes[7],
]);
neighbors.push((col_idx, weight));
}
Ok(neighbors)
}
pub fn degree(&mut self, node: usize) -> io::Result<usize> {
let (start, end) = self.get_row_ptrs(node)?;
Ok(end - start)
}
pub fn node_count(&self) -> usize {
self.n_nodes
}
pub fn edge_count(&self) -> usize {
self.n_edges
}
pub fn has_edge(&mut self, from: usize, to: usize) -> io::Result<bool> {
let neighbors = self.neighbors(from)?;
Ok(neighbors.iter().any(|&(neighbor_, _)| neighbor_ == to))
}
}
impl MemmapGraph {
pub fn batch_neighbors(&mut self, nodes: &[usize]) -> io::Result<Vec<Vec<(usize, f64)>>> {
let mut results = Vec::with_capacity(nodes.len());
let mut sorted_nodes: Vec<_> = nodes.iter().enumerate().collect();
sorted_nodes.sort_by_key(|(_, &node)| node);
for (_, &node) in sorted_nodes {
results.push(self.neighbors(node)?);
}
Ok(results)
}
pub fn stream_edges<F>(&mut self, mut callback: F) -> io::Result<()>
where
F: FnMut(usize, usize, f64) -> bool, {
for node in 0..self.n_nodes {
let neighbors = self.neighbors(node)?;
for (neighbor, weight) in neighbors {
if !callback(node, neighbor, weight) {
return Ok(());
}
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csr_graph() {
let edges = vec![(0, 1, 1.0), (0, 2, 2.0), (1, 2, 3.0), (2, 3, 4.0)];
let graph = CSRGraph::from_edges(4, edges).expect("Operation failed");
assert_eq!(graph.degree(0), 2);
assert_eq!(graph.degree(3), 0);
let neighbors: Vec<_> = graph.neighbors(0).collect();
assert_eq!(neighbors, vec![(1, 1.0), (2, 2.0)]);
}
#[test]
fn test_bit_packed_graph() {
let mut graph = BitPackedGraph::new(4, false);
graph.add_edge(0, 1).expect("Operation failed");
graph.add_edge(1, 2).expect("Operation failed");
graph.add_edge(0, 3).expect("Operation failed");
assert!(graph.has_edge(0, 1));
assert!(graph.has_edge(1, 0)); assert!(!graph.has_edge(2, 3));
let neighbors = graph.neighbors(0);
assert!(neighbors.contains(&1));
assert!(neighbors.contains(&3));
}
#[test]
fn test_compressed_adjacency() {
let adj_lists = vec![
vec![1, 2, 5],
vec![0, 2],
vec![0, 1, 3],
vec![2],
vec![],
vec![0],
];
let graph = CompressedAdjacencyList::from_adjacency(adj_lists.clone());
for (node, expected) in adj_lists.iter().enumerate() {
let neighbors = graph.neighbors(node);
assert_eq!(&neighbors, expected);
}
let uncompressed_size = adj_lists
.iter()
.map(|list| list.len() * mem::size_of::<usize>())
.sum::<usize>();
let compressed_size = graph.memory_usage();
println!(
"Uncompressed: {} bytes, Compressed: {} bytes",
uncompressed_size, compressed_size
);
}
}