Skip to main content

swink_agent_eval/evaluators/code/
mod.rs

1//! Code-family evaluators (T077–T079 — code family).
2//!
3//! Public surface:
4//! * [`CodeExtractor`] + [`CodeExtractorStrategy`] — strategy object that lifts
5//!   code from an assistant response (markdown fence / regex / LLM). Shared by
6//!   every code evaluator so extraction logic lives in exactly one place.
7//! * [`CargoCheckEvaluator`] / [`ClippyEvaluator`] — deterministic evaluators
8//!   that shell out to `cargo check` / `cargo clippy` in a tempdir.
9//! * [`llm_judge::CodeLlmJudgeEvaluator`] — judge-backed evaluator using the
10//!   `code_llm_judge_v0` template.
11//!
12//! `SandboxedExecutionEvaluator` (T080–T083, behind `evaluator-sandbox`)
13//! lives in the [`sandbox`] submodule. The module compiles unconditionally
14//! but its implementation forks per-platform: Unix uses POSIX `rlimit`s
15//! (module-scoped `#![allow(unsafe_code)]` per FR-049) and Windows returns
16//! [`crate::EvaluatorError::UnsupportedPlatform`] at evaluation time.
17
18pub mod cargo_check;
19pub mod clippy;
20pub mod extractor;
21pub(crate) mod harness;
22pub mod llm_judge;
23#[cfg(feature = "evaluator-sandbox")]
24pub mod sandbox;
25
26pub use cargo_check::CargoCheckEvaluator;
27pub use clippy::ClippyEvaluator;
28pub use extractor::{CodeExtractor, CodeExtractorStrategy};
29#[cfg(feature = "evaluator-sandbox")]
30pub use sandbox::{
31    SandboxLimits, SandboxOutcome, SandboxRunner, SandboxedExecutionEvaluator, ShellRunner,
32    run_sandboxed,
33};