graph_csr/
lib.rs

1use std::io::{BufRead, BufReader, Read};
2
3use easy_mmap::{self, EasyMmap, EasyMmapBuilder};
4use rayon::prelude::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
5use reading::reader_to_iter;
6use util::ValidGraphType;
7
8mod reading;
9
10/// The generalized computational scheme for running algorithms
11pub mod compute;
12
13/// A collection of convenient functions and traits to be used across the crate.
14pub mod util;
15
16/// This structure holds a graph in the Compressed Sparse Row format for compression of data size.
17/// This graph is represented via Memory Mapping, allowing the graph to be loaded into memory as required.
18/// This makes it possible to load any-size graphs, even those that *do not* fit into memory!
19pub struct Graph<'a, N> {
20    nodes: EasyMmap<'a, usize>,
21    edges: EasyMmap<'a, N>,
22}
23
24impl<'a, N> Graph<'a, N>
25where
26    N: util::ValidGraphType,
27    N: 'a,
28{
29    /// Convenience method for reading an input stream in text format.
30    /// Each line should contain two numbers, separated by a space.
31    /// The graph will be converted to the underlying CSR representation, and stored in `folder_name`.
32    pub fn from_txt_adjacency_list<T>(
33        stream: T,
34        folder_name: &str,
35    ) -> Result<Graph<'a, N>, std::io::Error>
36    where
37        T: Read + Sized,
38    {
39        let reader = BufReader::new(stream);
40        let stream = reader.lines().map(|line| {
41            let line = line?;
42            let mut parts = line.split_whitespace();
43
44            let src = parts
45                .next()
46                .ok_or(std::io::ErrorKind::InvalidData)?
47                .parse::<N>()
48                .or(Err(std::io::ErrorKind::InvalidData))?;
49
50            let dst = parts
51                .next()
52                .ok_or(std::io::ErrorKind::InvalidData)?
53                .parse::<N>()
54                .or(Err(std::io::ErrorKind::InvalidData))?;
55
56            std::io::Result::Ok((src, dst))
57        });
58
59        Graph::from_adjacency_list(stream, folder_name)
60    }
61
62    /// Same as [from_txt_adjacency](Self::from_txt_adjacency_list), except this time it assumes the edge list to be in binary representation.
63    pub fn from_binary_adjancency<T>(
64        stream: T,
65        destination_folder_name: &str,
66    ) -> Result<Graph<'a, N>, std::io::Error>
67    where
68        T: Read + Sized,
69    {
70        Graph::from_adjacency_list(
71            reader_to_iter::<N, T>(stream).map(|x| std::io::Result::Ok(x)),
72            destination_folder_name,
73        )
74    }
75
76    /// Given a SORTED (by source) adjancency list file `source_file_name`, transforms this file
77    /// into the underlying binary representation in CSR and returns a version of the Graph in this format.
78    /// The graph will be stored in `folder_name`.
79    pub fn from_adjacency_list<T>(
80        stream: T,
81        folder_name: &str,
82    ) -> Result<Graph<'a, N>, std::io::Error>
83    where
84        T: Iterator<Item = std::io::Result<(N, N)>> + Sized,
85    {
86        reading::from_adjacency_list::<N, T>(stream, folder_name)?;
87
88        Self::load_graph(folder_name)
89    }
90
91    /// Loads a graph from the underlying representation and returns it as a `Graph` struct.
92    pub fn load_graph(graph_folder: &str) -> Result<Graph<'a, N>, std::io::Error> {
93        let nodes_file = reading::get_vertex_file(graph_folder)?;
94        let edges_file = reading::get_edge_file(graph_folder)?;
95
96        let nodes = EasyMmapBuilder::<usize>::new()
97            .capacity(
98                nodes_file
99                    .metadata()
100                    .expect("Failed to read metadata of vertex file")
101                    .len() as usize
102                    / std::mem::size_of::<usize>(),
103            )
104            .file(nodes_file)
105            .readable()
106            .build();
107
108        let edges = EasyMmapBuilder::<N>::new()
109            .capacity(
110                edges_file
111                    .metadata()
112                    .expect("Failed to read metadata of edge file")
113                    .len() as usize
114                    / std::mem::size_of::<N>(),
115            )
116            .file(edges_file)
117            .readable()
118            .build();
119
120        Ok(Graph { nodes, edges })
121    }
122
123    /// Returns an iterator over the edge list of each node.
124    pub fn iter(&'a self) -> impl Iterator<Item = &[N]> + 'a {
125        GraphIterator {
126            nodes: self.nodes.get_data_as_slice(),
127            edges: self.edges.get_data_as_slice(),
128            current_node: 0,
129        }
130    }
131
132    pub fn par_iter(&'a self) -> impl ParallelIterator<Item = (usize, &[N])> + 'a
133    where
134        N: Send + Sync,
135    {
136        GraphIterator {
137            nodes: self.nodes.get_data_as_slice(),
138            edges: self.edges.get_data_as_slice(),
139            current_node: 0,
140        }
141    }
142
143    #[inline]
144    #[allow(dead_code)]
145    fn iterate_nodes(&'a self) -> impl Iterator<Item = usize> + 'a {
146        self.nodes.iter().map(|x| *x)
147    }
148
149    #[inline]
150    #[allow(dead_code)]
151    fn iterate_edges(&'a self) -> impl Iterator<Item = N> + 'a {
152        self.edges.iter().map(|x| *x)
153    }
154
155    /// Returns the number of nodes existing in the graph
156    pub fn n_nodes(&self) -> usize {
157        self.nodes.len() - 1
158    }
159
160    /// Returns the number of edges existing in the graph
161    pub fn n_edges(&self) -> usize {
162        self.edges.len()
163    }
164}
165
166/// Iterates over a [Graph] struct and yields the outgoing edge lists of type `&[N]` for each node.
167pub struct GraphIterator<'a, N> {
168    nodes: &'a [usize],
169    edges: &'a [N],
170    current_node: usize,
171}
172
173impl<'a, N> Iterator for GraphIterator<'a, N>
174where
175    N: ValidGraphType,
176{
177    type Item = &'a [N];
178
179    fn next(&mut self) -> Option<Self::Item> {
180        if self.current_node >= self.nodes.len() - 1 {
181            return None;
182        };
183
184        let start = self.nodes[self.current_node];
185        let end = self.nodes[self.current_node + 1];
186
187        self.current_node += 1;
188
189        Some(&self.edges[start..end])
190    }
191}
192
193impl<'a, N> ParallelIterator for GraphIterator<'a, N>
194where
195    N: ValidGraphType + Send + Sync,
196{
197    type Item = (usize, &'a [N]);
198
199    fn drive_unindexed<C>(self, consumer: C) -> C::Result
200    where
201        C: rayon::iter::plumbing::UnindexedConsumer<Self::Item>,
202    {
203        self.nodes[..self.nodes.len() - 1]
204            .par_iter()
205            .enumerate()
206            .zip(self.nodes[1..].par_iter())
207            .map(|((idx, start), end)| {
208                let start = *start;
209                let end = *end;
210                (idx, &self.edges[start..end])
211            })
212            .drive_unindexed(consumer)
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use std::fs;
219    use std::io::{BufWriter, Write};
220
221    use super::*;
222
223    #[test]
224    fn parse_from_file() {
225        let edges = vec![(0u32, 1u32), (0, 2), (1, 5), (1, 2), (4, 7)];
226
227        let expected_nodes = vec![0usize, 2, 4, 4, 4, 5, 5, 5, 5];
228        let expected_edges = vec![1u32, 2, 5, 2, 7];
229
230        let source_file_name = format!("/tmp/tmp_src_{}", rand::random::<u32>());
231        let destination_folder_name = format!("/tmp/tmp_dst_{}", rand::random::<u32>());
232
233        let file = fs::OpenOptions::new()
234            .write(true)
235            .create(true)
236            .open(&source_file_name)
237            .unwrap();
238
239        // Write edges to file
240        let mut writer = BufWriter::new(&file);
241        for edge in edges {
242            let line = format!("{} {}\n", edge.0, edge.1);
243            writer.write(line.as_bytes()).unwrap();
244        }
245
246        drop(writer);
247
248        let file = fs::OpenOptions::new()
249            .read(true)
250            .open(&source_file_name)
251            .unwrap();
252
253        let graph = match Graph::<u32>::from_txt_adjacency_list(file, &destination_folder_name) {
254            Ok(graph) => graph,
255            Err(e) => panic!("{:?}", e),
256        };
257
258        // Check correctness
259        assert_eq!(
260            graph
261                .iterate_nodes()
262                .map(|x| x.clone())
263                .collect::<Vec<usize>>(),
264            expected_nodes
265        );
266        assert_eq!(
267            graph
268                .iterate_edges()
269                .map(|x| x.clone())
270                .collect::<Vec<u32>>(),
271            expected_edges
272        );
273    }
274
275    #[test]
276    fn parse_from_binary() {
277        let edges = vec![(0u32, 1u32), (0, 2), (1, 5), (1, 2), (4, 7)];
278        let expected_nodes = vec![0usize, 2, 4, 4, 4, 5, 5, 5, 5];
279        let expected_edges = vec![1u32, 2, 5, 2, 7];
280
281        let destionation_folder_name = format!("/tmp/tmp_dst_{}", rand::random::<u32>());
282
283        let edges_flatten = edges
284            .iter()
285            .flat_map(|x| vec![x.0, x.1])
286            .collect::<Vec<u32>>();
287
288        // Transform into slice of u8 for Read
289        let edges_flatten = unsafe {
290            std::slice::from_raw_parts(
291                edges_flatten.as_ptr() as *const u8,
292                edges_flatten.len() * std::mem::size_of::<u32>(),
293            )
294        };
295
296        let graph =
297            match Graph::<u32>::from_binary_adjancency(edges_flatten, &destionation_folder_name) {
298                Ok(graph) => graph,
299                Err(e) => panic!("{:?}", e),
300            };
301
302        // Check correctness
303        assert_eq!(
304            graph
305                .iterate_nodes()
306                .map(|x| x.clone())
307                .collect::<Vec<usize>>(),
308            expected_nodes
309        );
310
311        assert_eq!(
312            graph
313                .iterate_edges()
314                .map(|x| x.clone())
315                .collect::<Vec<u32>>(),
316            expected_edges
317        );
318    }
319
320    #[test]
321    fn parse_from_general_stream() {
322        let edges = vec![(0u32, 1u32), (0, 2), (1, 5), (1, 2), (4, 7)];
323
324        let expected_nodes = vec![0usize, 2, 4, 4, 4, 5, 5, 5, 5];
325        let expected_edges = vec![1u32, 2, 5, 2, 7];
326
327        let destination_folder_name = format!("/tmp/tmp_dst_{}", rand::random::<u32>());
328
329        // Read from string bytes stream
330        let graph = match Graph::<u32>::from_adjacency_list(
331            edges.iter().map(|x| Ok(x.clone())),
332            &destination_folder_name,
333        ) {
334            Ok(graph) => graph,
335            Err(e) => panic!("{:?}", e),
336        };
337
338        println!("Destionation folder: {}", destination_folder_name);
339
340        assert_eq!(
341            graph
342                .iterate_nodes()
343                .map(|x| x.clone())
344                .collect::<Vec<usize>>(),
345            expected_nodes
346        );
347        assert_eq!(
348            graph
349                .iterate_edges()
350                .map(|x| x.clone())
351                .collect::<Vec<u32>>(),
352            expected_edges
353        );
354    }
355
356    #[test]
357    fn load_u64_graph() {
358        let edges = vec![(0u64, 1u64), (0, 2), (1, 5), (1, 2), (4, 7)];
359
360        let expected_nodes = vec![0usize, 2, 4, 4, 4, 5, 5, 5, 5];
361        let expected_edges = vec![1u64, 2, 5, 2, 7];
362
363        let destination_folder_name = format!("/tmp/tmp_dst_{}", rand::random::<u32>());
364
365        let graph = match Graph::<u64>::from_adjacency_list(
366            edges.iter().map(|x| Ok(x.clone())),
367            &destination_folder_name,
368        ) {
369            Ok(graph) => graph,
370            Err(e) => panic!("{:?}", e),
371        };
372
373        assert_eq!(
374            graph
375                .iterate_nodes()
376                .map(|x| x.clone())
377                .collect::<Vec<usize>>(),
378            expected_nodes
379        );
380
381        assert_eq!(
382            graph
383                .iterate_edges()
384                .map(|x| x.clone())
385                .collect::<Vec<u64>>(),
386            expected_edges
387        );
388    }
389
390    #[test]
391    fn test_graph_load() {
392        let edges = vec![(0u32, 1u32), (0, 2), (1, 5), (1, 2), (4, 7)];
393        let expected_nodes = vec![0usize, 2, 4, 4, 4, 5, 5, 5, 5];
394        let expected_edges = vec![1u32, 2, 5, 2, 7];
395
396        let destination_folder_name = format!("/tmp/tmp_dst_{}", rand::random::<u32>());
397
398        match Graph::<u32>::from_adjacency_list(
399            edges.iter().map(|x| Ok(x.clone())),
400            &destination_folder_name,
401        ) {
402            Ok(_) => {}
403            Err(e) => panic!("{:?}", e),
404        };
405
406        // Load graph from memory
407        let graph = match Graph::<u32>::load_graph(&destination_folder_name) {
408            Ok(graph) => graph,
409            Err(e) => panic!("{:?}", e),
410        };
411
412        assert_eq!(
413            graph
414                .iterate_nodes()
415                .map(|x| x.clone())
416                .collect::<Vec<usize>>(),
417            expected_nodes
418        );
419
420        assert_eq!(
421            graph
422                .iterate_edges()
423                .map(|x| x.clone())
424                .collect::<Vec<u32>>(),
425            expected_edges
426        );
427    }
428
429    #[test]
430    fn iterate_graph() {
431        let edges = vec![(0u32, 1u32), (0, 2), (1, 5), (1, 2), (4, 7)];
432        let expected_res = vec![
433            (0usize, vec![1, 2]),
434            (1, vec![5, 2]),
435            (2, vec![]),
436            (3, vec![]),
437            (4, vec![7]),
438            (5, vec![]),
439            (6, vec![]),
440            (7, vec![]),
441        ];
442
443        let destination_folder_name = format!("/tmp/tmp_dst_{}", rand::random::<u32>());
444
445        let graph = match Graph::<u32>::from_adjacency_list(
446            edges.iter().map(|x| Ok(x.clone())),
447            &destination_folder_name,
448        ) {
449            Ok(g) => g,
450            Err(e) => panic!("{:?}", e),
451        };
452
453        assert_eq!(
454            graph
455                .iter()
456                .enumerate()
457                .map(|(i, edges)| (i, edges.to_vec()))
458                .collect::<Vec<(usize, Vec<u32>)>>(),
459            expected_res
460        );
461    }
462
463    #[test]
464    fn invalid() {}
465}