multi_skill/data_processing/
mod.rs1mod cf_api;
2mod dataset;
3
4pub use dataset::{get_dataset_from_disk, CachedDataset, ClosureDataset, Dataset};
5use reqwest::blocking::Client;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9fn one() -> f64 {
10 1.0
11}
12
13fn is_one(&weight: &f64) -> bool {
14 weight == one()
15}
16
17#[derive(Serialize, Deserialize)]
19pub struct Contest {
20 pub name: String,
22 pub url: Option<String>,
24 #[serde(default = "one", skip_serializing_if = "is_one")]
26 pub weight: f64,
27 pub time_seconds: u64,
29 pub standings: Vec<(String, usize, usize)>,
31}
32
33impl Contest {
34 pub fn new(index: usize) -> Self {
36 Self {
37 name: format!("Round #{}", index),
38 url: None,
39 weight: 1.,
40 time_seconds: index as u64 * 86_400,
41 standings: vec![],
42 }
43 }
44
45 pub fn find_contestant(&mut self, handle: &str) -> Option<usize> {
46 self.standings.iter().position(|x| x.0 == handle)
47 }
48
49 pub fn has_contestant(&mut self, handle: &str) -> bool {
51 self.find_contestant(handle).is_some()
52 }
53
54 pub fn remove_contestant(&mut self, handle: &str) -> Option<(String, usize, usize)> {
56 let pos = self.find_contestant(handle)?;
57 let contestant = self.standings.remove(pos);
58 for (_, lo, hi) in self.standings.iter_mut() {
59 if *hi >= pos {
60 *hi -= 1;
61 if *lo > pos {
62 *lo -= 1;
63 }
64 }
65 }
66 Some(contestant)
67 }
68
69 pub fn push_contestant(&mut self, handle: impl Into<String>) {
71 let place = self.standings.len();
72 self.standings.push((handle.into(), place, place));
73 }
74}
75
76#[derive(Serialize, Deserialize)]
78pub struct ContestSummary {
79 pub name: String,
80 pub url: Option<String>,
81 pub weight: f64,
82 pub time_seconds: u64,
83 pub num_contestants: usize,
84}
85
86impl ContestSummary {
87 pub fn new(contest: &Contest) -> Self {
89 Self {
90 name: contest.name.clone(),
91 url: contest.url.clone(),
92 weight: contest.weight,
93 time_seconds: contest.time_seconds,
94 num_contestants: contest.standings.len(),
95 }
96 }
97}
98
99fn write_to_json<T: Serialize + ?Sized>(
100 value: &T,
101 path: impl AsRef<Path>,
102) -> Result<(), &'static str> {
103 let cached_json = serde_json::to_string_pretty(&value).map_err(|_| "Serialization error")?;
104 std::fs::write(path.as_ref(), cached_json).map_err(|_| "File writing error")
105}
106
107fn write_to_csv<T: Serialize>(values: &[T], path: impl AsRef<Path>) -> Result<(), &'static str> {
108 let file = std::fs::File::create(path.as_ref()).map_err(|_| "Output file not found")?;
109 let mut writer = csv::Writer::from_writer(file);
110 values
111 .iter()
112 .try_for_each(|val| writer.serialize(val))
113 .map_err(|_| "Failed to serialize row")
114}
115
116pub fn write_slice_to_file<T: Serialize>(values: &[T], path: impl AsRef<Path>) {
117 let path = path.as_ref();
118 let write_res = match path.extension().and_then(|s| s.to_str()) {
119 Some("json") => write_to_json(values, path),
120 Some("csv") => write_to_csv(values, path),
121 _ => Err("Invalid or missing filename extension"),
122 };
123 match write_res {
124 Ok(()) => println!("Successfully wrote to {:?}", path),
125 Err(msg) => eprintln!("WARNING: failed write to {:?} because {}", path, msg),
126 };
127}
128
129pub fn get_dataset_from_codeforces_api(
131 contest_id_file: impl AsRef<std::path::Path>,
132) -> impl Dataset<Item = Contest> {
133 let client = Client::new();
134 let contests_json =
135 std::fs::read_to_string(contest_id_file).expect("Failed to read contest IDs from file");
136 let contest_ids: Vec<usize> = serde_json::from_str(&contests_json)
137 .expect("Failed to parse JSON contest IDs as a Vec<usize>");
138
139 dataset::ClosureDataset::new(contest_ids.len(), move |i| {
140 cf_api::fetch_cf_contest(&client, contest_ids[i])
141 })
142}
143
144pub fn get_dataset_by_name(
147 dataset_name: &str,
148) -> Result<Box<dyn Dataset<Item = Contest> + Send + Sync>, String> {
149 const CF_IDS: &str = "../data/codeforces/contest_ids.json";
150
151 let dataset_dir = format!("../cache/{}", dataset_name);
152 Ok(if dataset_name == "codeforces" {
153 Box::new(get_dataset_from_codeforces_api(CF_IDS).cached(dataset_dir))
154 } else {
155 Box::new(get_dataset_from_disk(dataset_dir))
156 })
157}
158
159#[cfg(test)]
160mod test {
161 use super::*;
162
163 #[test]
164 fn test_codeforces_data() {
165 let dataset = get_dataset_by_name("codeforces").unwrap();
166 let first_contest = dataset.get(0);
167 let first_winner = &first_contest.standings[0];
168
169 assert_eq!(first_contest.weight, 1.);
170 assert_eq!(first_contest.standings.len(), 66);
171 assert_eq!(first_winner.0, "vepifanov");
172 assert_eq!(first_winner.1, 0);
173 assert_eq!(first_winner.2, 0);
174 }
175}