1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
//! Loads the Stackoverflow super user dataset into a graph.
//! Source: https://snap.stanford.edu/data/sx-superuser.html
//!
//! This is a temporal network of interactions on the stack exchange web site Super User.
//! There are three different types of interactions represented by a directed edge (u, v, t):
//!
//! * user u answered user v's question at time t (in the graph sx-superuser-a2q)
//! * user u commented on user v's question at time t (in the graph sx-superuser-c2q)
//! * user u commented on user v's answer at time t (in the graph sx-superuser-c2a)
//!
//! The graph sx-superuser contains the union of these graphs. These graphs were constructed
//! from the Stack Exchange Data Dump. Node ID numbers correspond to the 'OwnerUserId' tag
//! in that data dump.
//! *NOTE: It may take a while to download the dataset
//!
//! ## Dataset statistics
//! * Dataset statistics (sx-superuser)
//! * Nodes 	194085
//! * Temporal Edges 	1443339
//! * Edges in static graph 	924886
//! * Time span 	2773 days
//!
//! ## Source
//! Ashwin Paranjape, Austin R. Benson, and Jure Leskovec. "Motifs in Temporal Networks."
//! In Proceedings of the Tenth ACM International Conference on Web Search and Data Mining, 2017.
//!
//! ## Properties
//!
//! Header:  SRC DST UNIXTS
//!
//! where edges are separated by a new line and
//!
//! * SRC: id of the source node (a user)
//! * TGT: id of the target node (a user)
//! * UNIXTS: Unix timestamp (seconds since the epoch)
//!
//! Example:
//! ```no_run
//! use raphtory_io::graph_loader::example::sx_superuser_graph::sx_superuser_graph;
//! use raphtory::db::graph::Graph;
//! use raphtory::db::view_api::*;
//!
//! let graph = sx_superuser_graph(1).unwrap();
//!
//! println!("The graph has {:?} vertices", graph.num_vertices());
//! println!("The graph has {:?} edges", graph.num_edges());
//! ```

use raphtory::db::graph::Graph;

use crate::graph_loader::{fetch_file, source::csv_loader::CsvLoader};
use serde::Deserialize;
use std::path::PathBuf;

#[derive(Deserialize, std::fmt::Debug)]
pub struct TEdge {
    src_id: u64,
    dst_id: u64,
    time: i64,
}

/// Download the SX SuperUser dataset
/// and return the path to the file
///
/// # Returns
/// - A PathBuf to the SX SuperUser dataset
pub fn sx_superuser_file() -> Result<PathBuf, Box<dyn std::error::Error>> {
    fetch_file(
        "sx-superuser.txt.gz",
        true,
        "https://snap.stanford.edu/data/sx-superuser.txt.gz",
        600,
    )
}

/// Load the SX SuperUser dataset into a graph and return it
///
/// # Arguments
///
/// * `shards` - The number of shards to use for the graph
///
/// # Returns
///
/// - A Result containing the graph or an error
pub fn sx_superuser_graph(shards: usize) -> Result<Graph, Box<dyn std::error::Error>> {
    let graph = Graph::new(shards);
    CsvLoader::new(sx_superuser_file()?)
        .set_delimiter(" ")
        .load_into_graph(&graph, |edge: TEdge, g: &Graph| {
            g.add_edge(edge.time, edge.src_id, edge.dst_id, &vec![], None)
                .expect("Error: Unable to add edge");
        })?;

    Ok(graph)
}

#[cfg(test)]
mod sx_superuser_test {
    use crate::graph_loader::example::sx_superuser_graph::{sx_superuser_file, sx_superuser_graph};

    #[test]
    fn test_download_works() {
        let file = sx_superuser_file().unwrap();
        assert!(file.is_file())
    }

    #[test]
    fn test_graph_loading_works() {
        sx_superuser_graph(2).unwrap();
    }
}