use std::path::{Path, PathBuf};
use std::process::Command;
use anyhow::{anyhow, Context, Result};
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Verdict {
pub compiled: bool,
pub tests_passed: u32,
pub tests_total: u32,
pub score: f64,
pub message: String,
}
impl Verdict {
pub fn rejected(message: impl Into<String>) -> Self {
Verdict {
compiled: false,
tests_passed: 0,
tests_total: 0,
score: 0.0,
message: message.into(),
}
}
pub fn accepted(&self) -> bool {
self.compiled && self.tests_total > 0 && self.tests_passed == self.tests_total
}
}
pub trait AnswerJudge {
fn judge(&self, answer: &str) -> Result<Verdict>;
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum ApplyMode {
WriteFile { rel_path: String },
ApplyPatch,
}
#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CodegenSpec {
pub crate_dir: PathBuf,
pub apply: ApplyMode,
pub accept_cmd: Vec<String>,
}
impl CodegenSpec {
pub fn write_file(crate_dir: impl Into<PathBuf>, rel_path: impl Into<String>) -> Self {
CodegenSpec {
crate_dir: crate_dir.into(),
apply: ApplyMode::WriteFile { rel_path: rel_path.into() },
accept_cmd: default_accept_cmd(),
}
}
pub fn apply_patch(crate_dir: impl Into<PathBuf>) -> Self {
CodegenSpec {
crate_dir: crate_dir.into(),
apply: ApplyMode::ApplyPatch,
accept_cmd: default_accept_cmd(),
}
}
pub fn with_accept_cmd(mut self, cmd: Vec<String>) -> Self {
self.accept_cmd = if cmd.is_empty() { default_accept_cmd() } else { cmd };
self
}
}
pub fn default_accept_cmd() -> Vec<String> {
vec!["cargo".to_string(), "test".to_string()]
}
#[derive(Debug, Clone)]
pub struct CodegenJudge {
spec: CodegenSpec,
}
impl CodegenJudge {
pub fn new(spec: CodegenSpec) -> Self {
Self { spec }
}
pub fn spec(&self) -> &CodegenSpec {
&self.spec
}
fn copy_crate(&self, dest: &Path) -> Result<()> {
let src = &self.spec.crate_dir;
if !src.join("Cargo.toml").is_file() {
return Err(anyhow!(
"judge target `{}` is not a crate (no Cargo.toml)",
src.display()
));
}
copy_dir_filtered(src, dest)
.with_context(|| format!("copy crate `{}` → scratch", src.display()))
}
fn apply_answer(&self, root: &Path, answer: &str) -> Result<Option<String>> {
match &self.spec.apply {
ApplyMode::WriteFile { rel_path } => {
let target = root.join(rel_path);
if let Some(parent) = target.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("mkdir for {}", target.display()))?;
}
std::fs::write(&target, answer)
.with_context(|| format!("write answer → {}", target.display()))?;
Ok(None)
}
ApplyMode::ApplyPatch => {
use std::io::Write;
let mut child = Command::new("git")
.args(["apply", "--whitespace=nowarn", "-"])
.current_dir(root)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.context("spawn `git apply`")?;
child
.stdin
.take()
.ok_or_else(|| anyhow!("git apply: no stdin"))?
.write_all(answer.as_bytes())
.context("write patch to git apply stdin")?;
let out = child.wait_with_output().context("wait for git apply")?;
if out.status.success() {
Ok(None)
} else {
let err = String::from_utf8_lossy(&out.stderr);
Ok(Some(format!("patch did not apply: {}", err.trim())))
}
}
}
}
}
impl AnswerJudge for CodegenJudge {
fn judge(&self, answer: &str) -> Result<Verdict> {
let scratch = tempfile::tempdir().context("create judge scratch dir")?;
let root = scratch.path().join("crate");
self.copy_crate(&root)?;
if let Some(reason) = self.apply_answer(&root, answer)? {
return Ok(Verdict::rejected(reason));
}
let build = Command::new("cargo")
.args(["build"])
.current_dir(&root)
.output()
.context("run `cargo build` in scratch crate")?;
if !build.status.success() {
let err = String::from_utf8_lossy(&build.stderr);
return Ok(Verdict::rejected(format!(
"did not compile: {}",
last_error_line(&err)
)));
}
let cmd = if self.spec.accept_cmd.is_empty() {
default_accept_cmd()
} else {
self.spec.accept_cmd.clone()
};
let (prog, rest) = cmd.split_first().expect("accept_cmd is non-empty");
let test = Command::new(prog)
.args(rest)
.current_dir(&root)
.output()
.with_context(|| format!("run acceptance command `{}`", cmd.join(" ")))?;
let stdout = String::from_utf8_lossy(&test.stdout);
let stderr = String::from_utf8_lossy(&test.stderr);
let (passed, total) = parse_cargo_test_counts(&stdout);
Ok(score_from_run(test.status.success(), passed, total, &stdout, &stderr))
}
}
pub fn score_from_run(
cmd_ok: bool,
passed: u32,
total: u32,
stdout: &str,
stderr: &str,
) -> Verdict {
if total > 0 {
let score = passed as f64 / total as f64;
let message = if cmd_ok && passed == total {
format!("compiled; all {total} acceptance tests passed")
} else {
format!("compiled; {passed}/{total} acceptance tests passed")
};
Verdict { compiled: true, tests_passed: passed, tests_total: total, score, message }
} else if cmd_ok {
Verdict {
compiled: true,
tests_passed: 0,
tests_total: 0,
score: 1.0,
message: "compiled; acceptance command passed".to_string(),
}
} else {
let tail = last_error_line(if stderr.trim().is_empty() { stdout } else { stderr });
Verdict {
compiled: true,
tests_passed: 0,
tests_total: 0,
score: 0.5,
message: format!("compiled but acceptance command failed: {tail}"),
}
}
}
pub fn parse_cargo_test_counts(stdout: &str) -> (u32, u32) {
let mut passed = 0u32;
let mut failed = 0u32;
let mut saw = false;
for line in stdout.lines() {
let line = line.trim();
if let Some(rest) = line.strip_prefix("test result:") {
saw = true;
let toks: Vec<&str> = rest.split_whitespace().collect();
for (i, tok) in toks.iter().enumerate() {
if let Ok(n) = tok.parse::<u32>() {
let label = toks.get(i + 1).copied().unwrap_or("");
if label.starts_with("passed") {
passed += n;
} else if label.starts_with("failed") {
failed += n;
}
}
}
}
}
if saw {
(passed, passed + failed)
} else {
(0, 0)
}
}
fn last_error_line(s: &str) -> String {
let pick = s
.lines()
.rev()
.map(|l| l.trim())
.find(|l| l.starts_with("error") || l.contains("error["))
.or_else(|| s.lines().rev().map(|l| l.trim()).find(|l| !l.is_empty()))
.unwrap_or("");
truncate(pick, 200)
}
fn truncate(s: &str, n: usize) -> String {
if s.chars().count() <= n {
s.to_string()
} else {
let head: String = s.chars().take(n.saturating_sub(1)).collect();
format!("{head}…")
}
}
fn copy_dir_filtered(src: &Path, dst: &Path) -> Result<()> {
std::fs::create_dir_all(dst)?;
for entry in std::fs::read_dir(src)? {
let entry = entry?;
let name = entry.file_name();
let name_str = name.to_string_lossy();
if name_str == "target" || name_str == ".git" {
continue;
}
let from = entry.path();
let to = dst.join(&name);
let ft = entry.file_type()?;
if ft.is_dir() {
copy_dir_filtered(&from, &to)?;
} else if ft.is_symlink() {
let resolved = std::fs::canonicalize(&from)?;
if resolved.is_dir() {
copy_dir_filtered(&resolved, &to)?;
} else {
std::fs::copy(&resolved, &to)?;
}
} else {
std::fs::copy(&from, &to)?;
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_counts_sums_suites() {
let out = "\
running 2 tests
test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out
running 1 test
test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
";
let (p, t) = parse_cargo_test_counts(out);
assert_eq!((p, t), (2, 3), "2 passed across suites, 3 total");
}
#[test]
fn score_from_run_math() {
let v = score_from_run(true, 3, 3, "", "");
assert_eq!(v.score, 1.0);
assert!(v.compiled && v.accepted());
let v = score_from_run(false, 2, 3, "", "");
assert!((v.score - 2.0 / 3.0).abs() < 1e-9);
assert!(v.compiled && !v.accepted());
let v = score_from_run(false, 0, 0, "", "boom");
assert_eq!(v.score, 0.5);
assert!(v.compiled && !v.accepted());
let v = score_from_run(true, 0, 0, "", "");
assert_eq!(v.score, 1.0);
}
#[test]
fn rejected_verdict_is_zero() {
let v = Verdict::rejected("did not compile: error[E0599]");
assert_eq!(v.score, 0.0);
assert!(!v.compiled);
assert!(!v.accepted());
assert!(v.message.contains("E0599"));
}
fn scratch_crate(body: &str) -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
std::fs::write(
root.join("Cargo.toml"),
"[package]\nname = \"judgee\"\nversion = \"0.0.0\"\nedition = \"2021\"\n\n[lib]\npath = \"src/lib.rs\"\n",
)
.unwrap();
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/lib.rs"), body).unwrap();
dir
}
const ACCEPT_TESTS: &str = r#"
#[cfg(test)]
mod judge_tests {
use super::*;
#[test]
fn t_ok() { assert!(true); }
#[test]
fn t_target() { assert_eq!(add(2, 2), 4); }
}
"#;
fn good_answer() -> String {
format!("pub fn add(a: i32, b: i32) -> i32 {{ a + b }}\n{ACCEPT_TESTS}")
}
fn noncompiling_answer() -> String {
format!("pub fn add(_a: i32, _b: i32) -> i32 {{ \"nope\" }}\n{ACCEPT_TESTS}")
}
fn failing_answer() -> String {
format!("pub fn add(a: i32, b: i32) -> i32 {{ a + b + 1 }}\n{ACCEPT_TESTS}")
}
fn judge_for(dir: &Path) -> CodegenJudge {
CodegenJudge::new(CodegenSpec::write_file(dir.to_path_buf(), "src/lib.rs"))
}
#[test]
fn known_good_answer_scores_one() {
let scratch = scratch_crate("pub fn add(_a: i32, _b: i32) -> i32 { 0 }\n");
let judge = judge_for(scratch.path());
let v = judge.judge(&good_answer()).unwrap();
assert!(v.compiled, "good answer compiles: {}", v.message);
assert_eq!(v.tests_total, 2, "two acceptance tests");
assert_eq!(v.tests_passed, 2);
assert_eq!(v.score, 1.0, "all pass → 1.0: {}", v.message);
assert!(v.accepted());
}
#[test]
fn noncompiling_answer_scores_zero() {
let scratch = scratch_crate("pub fn add(_a: i32, _b: i32) -> i32 { 0 }\n");
let judge = judge_for(scratch.path());
let v = judge.judge(&noncompiling_answer()).unwrap();
assert!(!v.compiled, "type-error answer must not compile");
assert_eq!(v.score, 0.0, "non-compiling → 0.0: {}", v.message);
assert!(!v.accepted());
}
#[test]
fn compiling_but_failing_answer_scores_partial() {
let scratch = scratch_crate("pub fn add(_a: i32, _b: i32) -> i32 { 0 }\n");
let judge = judge_for(scratch.path());
let v = judge.judge(&failing_answer()).unwrap();
assert!(v.compiled, "answer compiles: {}", v.message);
assert_eq!(v.tests_total, 2);
assert_eq!(v.tests_passed, 1, "only t_ok passes; t_target fails");
assert!((v.score - 0.5).abs() < 1e-9, "1/2 tests → 0.5: {}", v.message);
assert!(!v.accepted());
}
#[test]
fn patch_that_does_not_apply_scores_zero() {
let scratch = scratch_crate("pub fn add(_a: i32, _b: i32) -> i32 { 0 }\n");
let judge = CodegenJudge::new(CodegenSpec::apply_patch(scratch.path().to_path_buf()));
let bad = "--- a/nope.rs\n+++ b/nope.rs\n@@ -1 +1 @@\n-x\n+y\n";
let v = judge.judge(bad).unwrap();
assert_eq!(v.score, 0.0, "unappliable patch → 0.0: {}", v.message);
assert!(!v.compiled);
}
}