use crate::{db::api::mutation::AdditionOps, graph_loader::fetch_file, prelude::*};
use chrono::*;
use itertools::Itertools;
use std::{
fs::File,
io::{self, BufRead},
path::{Path, PathBuf},
};
use tracing::error;
pub fn reddit_file(
timeout: u64,
test_file: Option<bool>,
) -> Result<PathBuf, Box<dyn std::error::Error>> {
match test_file {
Some(true) => fetch_file(
"reddit-title-test.tsv",
true,
"https://raw.githubusercontent.com/Raphtory/Data/main/reddit-title-test.tsv",
timeout,
),
_ => fetch_file(
"reddit-title.tsv",
true,
"http://web.archive.org/web/20201107005944/http://snap.stanford.edu/data/soc-redditHyperlinks-title.tsv",
timeout,
),
}
}
fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}
pub fn reddit_graph(timeout: u64, test_file: bool) -> Graph {
let mut g = Graph::new();
if let Ok(path) = reddit_file(timeout, Some(test_file)) {
g = generate_reddit_graph(path);
}
g
}
pub fn generate_reddit_graph(path: PathBuf) -> Graph {
let g = Graph::new();
if let Ok(lines) = read_lines(path.as_path()) {
for reddit in lines.dropping(1).flatten() {
let reddit: Vec<&str> = reddit.split('\t').collect();
let src_id = &reddit[0];
let dst_id = &reddit[1];
let post_id = reddit[2].to_string();
match NaiveDateTime::parse_from_str(reddit[3], "%Y-%m-%d %H:%M:%S") {
Ok(time) => {
let time = time.and_utc().timestamp_millis() * 1000;
let post_label: i32 = reddit[4].parse::<i32>().unwrap();
let post_properties: Vec<f64> = reddit[5]
.split(',')
.map(|s| s.parse::<f64>().unwrap())
.collect();
let edge_properties = [
("post_label".to_string(), Prop::I32(post_label)),
("post_id".to_string(), Prop::str(post_id)),
("word_count".to_string(), Prop::F64(post_properties[7])),
("long_words".to_string(), Prop::F64(post_properties[9])),
("sentences".to_string(), Prop::F64(post_properties[13])),
("readability".to_string(), Prop::F64(post_properties[17])),
(
"positive_sentiment".to_string(),
Prop::F64(post_properties[18]),
),
(
"negative_sentiment".to_string(),
Prop::F64(post_properties[19]),
),
(
"compound_sentiment".to_string(),
Prop::F64(post_properties[20]),
),
];
g.add_node(time, *src_id, NO_PROPS, None)
.map_err(|err| error!("{:?}", err))
.ok();
g.add_node(time, *dst_id, NO_PROPS, None)
.map_err(|err| error!("{:?}", err))
.ok();
g.add_edge(time, *src_id, *dst_id, edge_properties, None)
.expect("Error: Unable to add edge");
}
Err(e) => {
error!("{}", e)
}
}
}
}
g
}
#[cfg(test)]
mod reddit_test {
use crate::{
db::api::view::*,
graph_loader::reddit_hyperlinks::{reddit_file, reddit_graph},
};
#[test]
fn check_data() {
let file = reddit_file(100, Some(true));
assert!(file.is_ok());
}
#[test]
fn check_graph() {
let graph = reddit_graph(100, true);
assert_eq!(graph.count_nodes(), 16);
assert_eq!(graph.count_edges(), 9);
}
}