Skip to main content

agentics_persistence/db/
evaluation_policy.rs

1use sqlx::{PgPool, Postgres, Row, Transaction};
2
3use chrono::{DateTime, Utc};
4
5use agentics_domain::models::challenge::{
6    ChallengeBundleSpec, ChallengeEligibilityType, ChallengeExecutionSpec,
7};
8use agentics_domain::models::evaluation::ScoringMode;
9use agentics_domain::models::ids::AgentId;
10use agentics_domain::models::names::{ChallengeName, TargetName};
11use agentics_domain::storage::StorageKey;
12use agentics_error::{Result, ServiceError};
13
14use super::challenges::{
15    ChallengeRecord, agent_is_shortlisted, challenge_has_shortlist, get_published_challenge,
16    localized_text_from_row,
17};
18use super::ids::challenge_name_from_row;
19
20/// Published challenge admission data needed by API preflight checks.
21#[derive(Debug, Clone)]
22pub struct PublishedChallengeAdmission {
23    pub challenge_name: ChallengeName,
24    pub validation_submission_limit: Option<i64>,
25    pub official_submission_limit: Option<i64>,
26}
27
28/// Verify that a published challenge accepts the requested evaluation mode and
29/// return the canonical challenge name plus challenge-scoped limits.
30///
31/// API handlers call this before storing uploaded artifacts so disabled
32/// validation does not consume storage; write paths repeat the same check before
33/// inserting queued work as the authoritative guard.
34pub async fn ensure_published_challenge_supports_eval_type(
35    pool: &PgPool,
36    challenge_name: &ChallengeName,
37    target: &TargetName,
38    eval_type: ScoringMode,
39    agent_id: &AgentId,
40) -> Result<PublishedChallengeAdmission> {
41    let challenge = get_published_challenge(pool, challenge_name).await?;
42    let challenge =
43        challenge.ok_or_else(|| ServiceError::BadRequest("challenge not found".to_string()))?;
44    let spec: ChallengeBundleSpec = serde_json::from_value(challenge.spec_json)
45        .map_err(|e| ServiceError::Internal(e.to_string()))?;
46    ensure_challenge_supports_eval_type(
47        pool,
48        &challenge.challenge_name,
49        &spec,
50        target,
51        eval_type,
52        agent_id,
53    )
54    .await?;
55    ensure_validation_uses_public_bundle(
56        eval_type,
57        &spec,
58        &challenge.bundle_key,
59        &challenge.public_bundle_key,
60    )?;
61    Ok(PublishedChallengeAdmission {
62        challenge_name: challenge.challenge_name,
63        validation_submission_limit: spec.validation_submission_limit,
64        official_submission_limit: spec.official_submission_limit,
65    })
66}
67
68/// Ensures challenge supports eval type before continuing.
69pub(super) async fn ensure_challenge_supports_eval_type(
70    pool: &PgPool,
71    challenge_name: &ChallengeName,
72    spec: &ChallengeBundleSpec,
73    target: &TargetName,
74    eval_type: ScoringMode,
75    agent_id: &AgentId,
76) -> Result<()> {
77    ensure_challenge_accepts_submissions(spec)?;
78    ensure_challenge_eligibility(pool, challenge_name, spec, agent_id).await?;
79    ensure_target_supports_eval_type(spec, target, eval_type)
80}
81
82/// Validate target and evaluation-mode support using a parsed challenge contract.
83fn ensure_target_supports_eval_type(
84    spec: &ChallengeBundleSpec,
85    target: &TargetName,
86    eval_type: ScoringMode,
87) -> Result<()> {
88    let target = spec.target(target).ok_or_else(|| {
89        ServiceError::BadRequest(format!("challenge does not support target `{target}`"))
90    })?;
91
92    if eval_type == ScoringMode::Validation && !target.validation_enabled {
93        return Err(ServiceError::BadRequest(
94            "validation pass is disabled for this challenge and target".to_string(),
95        ));
96    }
97    if eval_type == ScoringMode::Official && !execution_declares_official_runs(&spec.execution) {
98        return Err(ServiceError::BadRequest(
99            "challenge does not support official evaluation".to_string(),
100        ));
101    }
102
103    Ok(())
104}
105
106/// Return whether the challenge contract declares an official evaluation path.
107fn execution_declares_official_runs(execution: &ChallengeExecutionSpec) -> bool {
108    match execution {
109        ChallengeExecutionSpec::SeparatedEvaluator(spec) => {
110            spec.official_runs.is_some() || spec.official_evaluation_setup.is_some()
111        }
112        ChallengeExecutionSpec::PipedStdio(spec) => {
113            spec.official_session.is_some() || spec.official_evaluation_setup.is_some()
114        }
115        ChallengeExecutionSpec::CoexecutedBenchmark(_) => true,
116    }
117}
118
119/// Lock an active challenge row for an admission transaction.
120pub(super) async fn lock_active_challenge_for_admission_tx(
121    tx: &mut Transaction<'_, Postgres>,
122    challenge_name: &ChallengeName,
123) -> Result<ChallengeRecord> {
124    let row = sqlx::query(
125        r#"
126        SELECT challenge_name, title, summary, bundle_key, public_bundle_key, statement_key, spec_json, moltbook_discussion_url
127        FROM challenges
128        WHERE challenge_name = $1
129          AND status = 'active'
130          AND spec_json IS NOT NULL
131        FOR UPDATE
132        "#,
133    )
134    .bind(challenge_name.as_str())
135    .fetch_optional(&mut **tx)
136    .await?;
137
138    let row = row.ok_or_else(|| ServiceError::BadRequest("challenge not found".to_string()))?;
139    Ok(ChallengeRecord {
140        challenge_name: challenge_name_from_row(&row, "challenge_name")?,
141        title: row.try_get("title")?,
142        summary: localized_text_from_row(&row, "summary")?,
143        bundle_key: storage_key_from_row(&row, "bundle_key")?,
144        public_bundle_key: storage_key_from_row(&row, "public_bundle_key")?,
145        statement_key: storage_key_from_row(&row, "statement_key")?,
146        spec_json: row.try_get("spec_json")?,
147        moltbook_discussion_url: optional_moltbook_post_url_from_row(
148            &row,
149            "moltbook_discussion_url",
150        )?,
151    })
152}
153
154/// Authoritatively verify challenge admission while holding the challenge row lock.
155pub(super) async fn ensure_challenge_supports_eval_type_tx(
156    tx: &mut Transaction<'_, Postgres>,
157    challenge_name: &ChallengeName,
158    spec: &ChallengeBundleSpec,
159    target: &TargetName,
160    eval_type: ScoringMode,
161    agent_id: &AgentId,
162) -> Result<()> {
163    ensure_challenge_accepts_submissions(spec)?;
164    ensure_challenge_eligibility_tx(tx, challenge_name, spec, agent_id).await?;
165    ensure_target_supports_eval_type(spec, target, eval_type)
166}
167
168/// Reject validation when the stored public bundle aliases private benchmark data.
169pub(super) fn ensure_validation_uses_public_bundle(
170    eval_type: ScoringMode,
171    spec: &ChallengeBundleSpec,
172    bundle_key: &StorageKey,
173    public_bundle_key: &StorageKey,
174) -> Result<()> {
175    if eval_type == ScoringMode::Validation
176        && spec.datasets.private_benchmark_enabled
177        && bundle_key == public_bundle_key
178    {
179        return Err(ServiceError::BadRequest(
180            "validation is unavailable because this private-benchmark challenge does not have a distinct public bundle key"
181                .to_string(),
182        ));
183    }
184
185    Ok(())
186}
187
188/// Ensures challenge accepts submissions before continuing.
189fn ensure_challenge_accepts_submissions(spec: &ChallengeBundleSpec) -> Result<()> {
190    let now = Utc::now();
191    let starts_at = parse_required_challenge_time(&spec.starts_at, "starts_at")?;
192    if now < starts_at {
193        return Err(ServiceError::Forbidden(
194            "challenge has not started yet".to_string(),
195        ));
196    }
197    if let Some(closes_at) = parse_challenge_time(spec.closes_at.as_deref(), "closes_at")?
198        && now >= closes_at
199    {
200        return Err(ServiceError::Forbidden("challenge has closed".to_string()));
201    }
202    Ok(())
203}
204
205/// Parses required challenge time from persisted challenge policy.
206fn parse_required_challenge_time(value: &str, field: &str) -> Result<DateTime<Utc>> {
207    DateTime::parse_from_rfc3339(value)
208        .map(|date| date.with_timezone(&Utc))
209        .map_err(|e| ServiceError::Internal(format!("{field} is not valid RFC3339: {e}")))
210}
211
212/// Ensures challenge eligibility before continuing.
213async fn ensure_challenge_eligibility(
214    pool: &PgPool,
215    challenge_name: &ChallengeName,
216    spec: &ChallengeBundleSpec,
217    agent_id: &AgentId,
218) -> Result<()> {
219    match spec.eligibility.eligibility_type {
220        ChallengeEligibilityType::Open => Ok(()),
221        ChallengeEligibilityType::PrivateShortlist => {
222            if !challenge_has_shortlist(pool, challenge_name).await? {
223                return Err(ServiceError::Forbidden(
224                    "challenge requires a shortlist, but no shortlist has been uploaded yet"
225                        .to_string(),
226                ));
227            }
228            if !agent_is_shortlisted(pool, challenge_name, agent_id).await? {
229                return Err(ServiceError::Forbidden(
230                    "agent is not eligible for this challenge".to_string(),
231                ));
232            }
233            Ok(())
234        }
235    }
236}
237
238/// Ensures challenge eligibility inside an admission transaction.
239async fn ensure_challenge_eligibility_tx(
240    tx: &mut Transaction<'_, Postgres>,
241    challenge_name: &ChallengeName,
242    spec: &ChallengeBundleSpec,
243    agent_id: &AgentId,
244) -> Result<()> {
245    match spec.eligibility.eligibility_type {
246        ChallengeEligibilityType::Open => Ok(()),
247        ChallengeEligibilityType::PrivateShortlist => {
248            let has_shortlist = sqlx::query_scalar::<_, bool>(
249                r#"
250                SELECT EXISTS (
251                    SELECT 1
252                    FROM challenge_shortlisted_agents
253                    WHERE challenge_name = $1
254                )
255                "#,
256            )
257            .bind(challenge_name.as_str())
258            .fetch_one(&mut **tx)
259            .await?;
260            if !has_shortlist {
261                return Err(ServiceError::Forbidden(
262                    "challenge requires a shortlist, but no shortlist has been uploaded yet"
263                        .to_string(),
264                ));
265            }
266
267            let is_shortlisted = sqlx::query_scalar::<_, bool>(
268                r#"
269                SELECT EXISTS (
270                    SELECT 1
271                    FROM challenge_shortlisted_agents
272                    WHERE challenge_name = $1 AND agent_id = $2::uuid
273                )
274                "#,
275            )
276            .bind(challenge_name.as_str())
277            .bind(agent_id.as_str())
278            .fetch_one(&mut **tx)
279            .await?;
280            if !is_shortlisted {
281                return Err(ServiceError::Forbidden(
282                    "agent is not eligible for this challenge".to_string(),
283                ));
284            }
285            Ok(())
286        }
287    }
288}
289
290/// Read a storage key from a locked challenge row.
291fn storage_key_from_row(row: &sqlx::postgres::PgRow, column: &str) -> Result<StorageKey> {
292    let value: String = row.try_get(column)?;
293    StorageKey::try_new(&value)
294        .map_err(|e| ServiceError::Internal(format!("stored invalid {column}: {e}")))
295}
296
297/// Read an optional Moltbook post URL from a locked challenge row.
298fn optional_moltbook_post_url_from_row(
299    row: &sqlx::postgres::PgRow,
300    column: &str,
301) -> Result<Option<agentics_domain::models::urls::MoltbookPostUrl>> {
302    let value: Option<String> = row.try_get(column)?;
303    value
304        .map(agentics_domain::models::urls::MoltbookPostUrl::try_new)
305        .transpose()
306        .map_err(|e| ServiceError::Internal(format!("stored invalid {column}: {e}")))
307}
308
309/// Parses challenge time from an external boundary string.
310fn parse_challenge_time(value: Option<&str>, field: &str) -> Result<Option<DateTime<Utc>>> {
311    value
312        .map(|value| {
313            DateTime::parse_from_rfc3339(value)
314                .map(|date| date.with_timezone(&Utc))
315                .map_err(|e| ServiceError::Internal(format!("invalid challenge {field}: {e}")))
316        })
317        .transpose()
318}