multi_skill/data_processing/
mod.rs

1mod cf_api;
2mod dataset;
3
4pub use dataset::{get_dataset_from_disk, CachedDataset, ClosureDataset, Dataset};
5use reqwest::blocking::Client;
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9fn one() -> f64 {
10    1.0
11}
12
13fn is_one(&weight: &f64) -> bool {
14    weight == one()
15}
16
17/// Represents the outcome of a contest.
18#[derive(Serialize, Deserialize)]
19pub struct Contest {
20    /// A human-readable title for the contest.
21    pub name: String,
22    /// The source URL, if any.
23    pub url: Option<String>,
24    /// The relative weight of a contest, default is 1.
25    #[serde(default = "one", skip_serializing_if = "is_one")]
26    pub weight: f64,
27    /// The number of seconds from the Unix Epoch to the end of the contest.
28    pub time_seconds: u64,
29    /// The list of standings, containing a name and the enclosing range of ties.
30    pub standings: Vec<(String, usize, usize)>,
31}
32
33impl Contest {
34    /// Create a contest with empty standings, useful for testing.
35    pub fn new(index: usize) -> Self {
36        Self {
37            name: format!("Round #{}", index),
38            url: None,
39            weight: 1.,
40            time_seconds: index as u64 * 86_400,
41            standings: vec![],
42        }
43    }
44
45    pub fn find_contestant(&mut self, handle: &str) -> Option<usize> {
46        self.standings.iter().position(|x| x.0 == handle)
47    }
48
49    /// Detect if a given contestant exists
50    pub fn has_contestant(&mut self, handle: &str) -> bool {
51        self.find_contestant(handle).is_some()
52    }
53
54    /// Remove a contestant with the given handle, and return it if it exists.
55    pub fn remove_contestant(&mut self, handle: &str) -> Option<(String, usize, usize)> {
56        let pos = self.find_contestant(handle)?;
57        let contestant = self.standings.remove(pos);
58        for (_, lo, hi) in self.standings.iter_mut() {
59            if *hi >= pos {
60                *hi -= 1;
61                if *lo > pos {
62                    *lo -= 1;
63                }
64            }
65        }
66        Some(contestant)
67    }
68
69    /// Add a contestant with the given handle in last place.
70    pub fn push_contestant(&mut self, handle: impl Into<String>) {
71        let place = self.standings.len();
72        self.standings.push((handle.into(), place, place));
73    }
74}
75
76/// Compressed summary of a contest
77#[derive(Serialize, Deserialize)]
78pub struct ContestSummary {
79    pub name: String,
80    pub url: Option<String>,
81    pub weight: f64,
82    pub time_seconds: u64,
83    pub num_contestants: usize,
84}
85
86impl ContestSummary {
87    /// Returns a summary of the given contest, stripped of detailed standings
88    pub fn new(contest: &Contest) -> Self {
89        Self {
90            name: contest.name.clone(),
91            url: contest.url.clone(),
92            weight: contest.weight,
93            time_seconds: contest.time_seconds,
94            num_contestants: contest.standings.len(),
95        }
96    }
97}
98
99fn write_to_json<T: Serialize + ?Sized>(
100    value: &T,
101    path: impl AsRef<Path>,
102) -> Result<(), &'static str> {
103    let cached_json = serde_json::to_string_pretty(&value).map_err(|_| "Serialization error")?;
104    std::fs::write(path.as_ref(), cached_json).map_err(|_| "File writing error")
105}
106
107fn write_to_csv<T: Serialize>(values: &[T], path: impl AsRef<Path>) -> Result<(), &'static str> {
108    let file = std::fs::File::create(path.as_ref()).map_err(|_| "Output file not found")?;
109    let mut writer = csv::Writer::from_writer(file);
110    values
111        .iter()
112        .try_for_each(|val| writer.serialize(val))
113        .map_err(|_| "Failed to serialize row")
114}
115
116pub fn write_slice_to_file<T: Serialize>(values: &[T], path: impl AsRef<Path>) {
117    let path = path.as_ref();
118    let write_res = match path.extension().and_then(|s| s.to_str()) {
119        Some("json") => write_to_json(values, path),
120        Some("csv") => write_to_csv(values, path),
121        _ => Err("Invalid or missing filename extension"),
122    };
123    match write_res {
124        Ok(()) => println!("Successfully wrote to {:?}", path),
125        Err(msg) => eprintln!("WARNING: failed write to {:?} because {}", path, msg),
126    };
127}
128
129/// Helper function to get contest results from the Codeforces API.
130pub fn get_dataset_from_codeforces_api(
131    contest_id_file: impl AsRef<std::path::Path>,
132) -> impl Dataset<Item = Contest> {
133    let client = Client::new();
134    let contests_json =
135        std::fs::read_to_string(contest_id_file).expect("Failed to read contest IDs from file");
136    let contest_ids: Vec<usize> = serde_json::from_str(&contests_json)
137        .expect("Failed to parse JSON contest IDs as a Vec<usize>");
138
139    dataset::ClosureDataset::new(contest_ids.len(), move |i| {
140        cf_api::fetch_cf_contest(&client, contest_ids[i])
141    })
142}
143
144/// Helper function to get any named dataset.
145// TODO: actually throw errors when the directory is not found.
146pub fn get_dataset_by_name(
147    dataset_name: &str,
148) -> Result<Box<dyn Dataset<Item = Contest> + Send + Sync>, String> {
149    const CF_IDS: &str = "../data/codeforces/contest_ids.json";
150
151    let dataset_dir = format!("../cache/{}", dataset_name);
152    Ok(if dataset_name == "codeforces" {
153        Box::new(get_dataset_from_codeforces_api(CF_IDS).cached(dataset_dir))
154    } else {
155        Box::new(get_dataset_from_disk(dataset_dir))
156    })
157}
158
159#[cfg(test)]
160mod test {
161    use super::*;
162
163    #[test]
164    fn test_codeforces_data() {
165        let dataset = get_dataset_by_name("codeforces").unwrap();
166        let first_contest = dataset.get(0);
167        let first_winner = &first_contest.standings[0];
168
169        assert_eq!(first_contest.weight, 1.);
170        assert_eq!(first_contest.standings.len(), 66);
171        assert_eq!(first_winner.0, "vepifanov");
172        assert_eq!(first_winner.1, 0);
173        assert_eq!(first_winner.2, 0);
174    }
175}