ahc_evaluation/evaluation/
mod.rs

1mod record;
2mod stop_watch;
3
4use std::{
5    fs::{create_dir_all, read_to_string, File},
6    io::Write,
7    process::{Command, Stdio},
8};
9
10use anyhow::{bail, ensure, Context};
11use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle};
12use rayon::{iter::IntoParallelRefIterator, prelude::ParallelIterator};
13use regex::Regex;
14
15use crate::{
16    config::Config,
17    evaluation::{record::EvaluationRecord, stop_watch::Stopwatch},
18};
19
20pub use crate::evaluation::record::{show_statistics, write_to_csv};
21
22/// Executes the submission code and the local tester for each seed and collect the score and the execution time.
23pub fn evaluate(config: &Config, seeds: &[usize]) -> anyhow::Result<Vec<EvaluationRecord>> {
24    // Style of progress bar.
25    let progress_style = ProgressStyle::template(
26        ProgressStyle::default_bar(),
27        "{prefix}\n{wide_bar} {pos:>3}/{len:3} {percent:>3}% [{elapsed_precise}<{eta_precise}]",
28    )
29    .with_context(|| "Failed to create progress bar style.")?;
30
31    // Progress bar during running of the local tester.
32    let progress_bar = ProgressBar::new(seeds.len() as u64);
33    progress_bar.set_style(progress_style);
34    progress_bar.set_prefix("Running...");
35
36    // Creates output directory.
37    create_dir_all(&config.path.output_dir)
38        .with_context(|| "Failed to create output directory.")?;
39
40    // Executes the local tester and retrieve evaluations.
41    seeds
42        .par_iter()
43        .progress_with(progress_bar)
44        .map(|&seed| {
45            if config.command.execute.integrated {
46                execute_integrated_process(config, seed)
47            } else {
48                execute_independent_processes(config, seed)
49            }
50        })
51        .collect::<Result<Vec<EvaluationRecord>, _>>()
52}
53
54/// Executes the submission code via the local tester.
55fn execute_integrated_process(config: &Config, seed: usize) -> anyhow::Result<EvaluationRecord> {
56    // Reads the input file.
57    let input_file_path = config.input_file_path(seed);
58    let input_text = read_to_string(&input_file_path)
59        .with_context(|| format!("Failed to read text from `{:?}`.", input_file_path))?;
60
61    // Executes the local tester as a child process.
62    let cmd_args = config.cmd_args_for_execute_tester(seed);
63    let process_handle = spawn_process(&cmd_args)?;
64
65    // Starts measuring execution time.
66    let stopwatch = Stopwatch::start();
67
68    // Writes the contents of the input file to the standard input.
69    process_handle
70        .stdin
71        .as_ref()
72        .unwrap()
73        .write_all(input_text.as_bytes())
74        .with_context(|| "Failed to input text.")?;
75
76    // Waits for process to terminate.
77    let output = process_handle.wait_with_output()?;
78
79    ensure!(
80        output.status.success(),
81        ExecuteCommandError {
82            seed,
83            cmd_args,
84            output
85        }
86    );
87
88    // Terminates measurement of execution time.
89    let execution_time = stopwatch.elapsed_time();
90
91    // Writes the contents of the standard output to the output file.
92    let output_file_path = config.output_file_path(seed);
93    File::create(&output_file_path)
94        .with_context(|| format!("Failed to create output file `{:?}`.", output_file_path))?
95        .write_all(&output.stdout)
96        .with_context(|| format!("Failed to write to output file {:?}.", output_file_path))?;
97
98    // Regular expression for retrieving a score.
99    let score_regex = Regex::new(r"\bScore *= *(?<score>[0-9]*)\b")
100        .with_context(|| "Failed to compile regular expression.")?;
101
102    let stderr = String::from_utf8(output.stderr.clone())?;
103
104    // Retrieve the score from the output of the local tester.
105    let Some(score) = score_regex
106        .captures(&stderr)
107        .and_then(|caps| caps["score"].parse::<i64>().ok())
108    else {
109        bail!(format!(
110            "
111Failed to retrieve score.
112
113Seed: {}
114
115---------------------------- Standard Error Output -----------------------------
116{:?}
117--------------------------------------------------------------------------------
118",
119            seed, output.stderr,
120        ));
121    };
122
123    Ok(EvaluationRecord {
124        seed,
125        score,
126        execution_time,
127    })
128}
129
130/// Executes the submission code and the local tester separately.
131fn execute_independent_processes(config: &Config, seed: usize) -> anyhow::Result<EvaluationRecord> {
132    // Reads the input file.
133    let input_file_path = config.input_file_path(seed);
134    let input_text = read_to_string(&input_file_path)
135        .with_context(|| format!("Failed to read text from `{:?}`.", input_file_path))?;
136
137    // Executes the submission code as a child process.
138    let cmd_args_for_execute_submission = &config.command.execute.submission;
139    let submission_process_handle = spawn_process(cmd_args_for_execute_submission)?;
140
141    // Starts measuring execution time.
142    let stopwatch = Stopwatch::start();
143
144    // Writes the contents of the input file to the standard input.
145    submission_process_handle
146        .stdin
147        .as_ref()
148        .unwrap()
149        .write_all(input_text.as_bytes())
150        .with_context(|| "Failed to input text.")?;
151
152    // Waits for process to terminate.
153    let submission_process_output =
154        submission_process_handle
155            .wait_with_output()
156            .with_context(|| {
157                format!(
158                    "
159Failed to execute the submission code.
160List of arguments: {:?}
161",
162                    cmd_args_for_execute_submission
163                )
164            })?;
165
166    // Terminates measurement of execution time.
167    let execution_time = stopwatch.elapsed_time();
168
169    ensure!(
170        submission_process_output.status.success(),
171        ExecuteCommandError {
172            seed,
173            cmd_args: cmd_args_for_execute_submission.to_owned(),
174            output: submission_process_output
175        }
176    );
177
178    // Writes the contents of the standard output to the output file.
179    let output_file_path = config.output_file_path(seed);
180    File::create(&output_file_path)
181        .with_context(|| format!("Failed to create output file `{:?}`.", output_file_path))?
182        .write_all(&submission_process_output.stdout)
183        .with_context(|| format!("Failed to write to output file {:?}.", output_file_path))?;
184
185    // Executes the local tester as a child process.
186    let cmd_args_for_execute_tester = config.cmd_args_for_execute_tester(seed);
187
188    // Waits for process to terminate.
189    let tester_process_output = spawn_process(&cmd_args_for_execute_tester)?
190        .wait_with_output()
191        .with_context(|| {
192            format!(
193                "
194Failed to execute the local tester.
195List of arguments: {:?}
196",
197                cmd_args_for_execute_tester
198            )
199        })?;
200
201    ensure!(
202        tester_process_output.status.success(),
203        ExecuteCommandError {
204            seed,
205            cmd_args: cmd_args_for_execute_submission.to_owned(),
206            output: tester_process_output
207        }
208    );
209
210    // Regular expression for retrieving a score.
211    let score_regex = Regex::new(r"\bScore *= *(?<score>[0-9]*)\b")
212        .with_context(|| "Failed to compile regular expression.")?;
213
214    // Closure for getting scores from strings.
215    let retrieve_score = |text: &str| {
216        score_regex
217            .captures(text)
218            .and_then(|caps| caps["score"].parse::<i64>().ok())
219    };
220
221    let stdout = String::from_utf8(tester_process_output.stdout.clone())?;
222    let stderr = String::from_utf8(tester_process_output.stderr.clone())?;
223
224    // Retrieve the score from the output of the local tester.
225    let Some(score) = retrieve_score(&stdout).or(retrieve_score(&stderr)) else {
226        bail!(format!(
227            "
228Failed to retrieve score.
229
230Seed: {}
231
232------------------------------- Standard Output --------------------------------
233{:?}
234--------------------------------------------------------------------------------
235
236---------------------------- Standard Error Output -----------------------------
237{:?}
238--------------------------------------------------------------------------------
239",
240            seed, tester_process_output.stdout, tester_process_output.stderr,
241        ));
242    };
243
244    Ok(EvaluationRecord {
245        seed,
246        score,
247        execution_time,
248    })
249}
250
251/// Spawns a child process that executes the specified command.
252fn spawn_process(cmd_args: &[String]) -> anyhow::Result<std::process::Child> {
253    let program = cmd_args
254        .first()
255        .with_context(|| "The execution command is empty.")?;
256
257    Command::new(program)
258        .args(&cmd_args[1..])
259        .stdin(Stdio::piped())
260        .stdout(Stdio::piped())
261        .stderr(Stdio::piped())
262        .spawn()
263        .with_context(|| {
264            format!(
265                "
266Failed to start the child process.
267List of arguments: {:?}
268",
269                cmd_args
270            )
271        })
272}
273
274#[derive(Debug)]
275pub struct ExecuteCommandError {
276    pub seed: usize,
277    pub cmd_args: Vec<String>,
278    pub output: std::process::Output,
279}
280
281impl std::fmt::Display for ExecuteCommandError {
282    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
283        let stdout = String::from_utf8(self.output.stdout.clone()).unwrap();
284        let stderr = String::from_utf8(self.output.stderr.clone()).unwrap();
285
286        write!(
287            f,
288            "
289Failed to execute command.
290
291Exit code: {}
292
293Seed: {}
294
295Command line arguments: {:?},
296
297------------------------------- Standard Output --------------------------------
298{}
299--------------------------------------------------------------------------------
300
301---------------------------- Standard Error Output -----------------------------
302{}
303--------------------------------------------------------------------------------
304",
305            self.output.status, self.seed, self.cmd_args, stdout, stderr,
306        )
307    }
308}
309
310impl std::error::Error for ExecuteCommandError {}
ahc_evaluation/evaluation/mod.rs

ahc_evaluation/evaluation/
mod.rs