Skip to main content

plan_archive/validate/
metadata.rs

1//! Validator for the per-plan `metadata.yaml` written into each archived
2//! plan folder at
3//! `plans/<host>/<org-or-group-path>/<repo>/<YYYY-MM-DD>-<slug>/metadata.yaml`.
4//!
5//! Schema (v1):
6//!
7//! ```yaml
8//! version: 1
9//! source:
10//!   host: github.com
11//!   org_or_group_path: graysurf
12//!   repo: agent-runtime-kit
13//!   branch: main
14//!   archive_commit: a2e8f227e...
15//!   original_path: docs/plans/2026-05-27-plan-archive-runtime-kit/
16//! captured_classification:
17//!   class: personal
18//!   primary_identity: graysurf
19//!   employer: null
20//!   retention: null
21//! refs:
22//!   issue: https://github.com/graysurf/agent-runtime-kit/issues/126
23//!   pr: https://github.com/graysurf/agent-runtime-kit/pull/127
24//! ```
25//!
26//! `captured_classification` may be omitted for pre-classification plans
27//! archived before the captured-classification field was introduced.
28//! Validators emit a `metadata-captured-classification-missing` warning
29//! in that case but still accept the file.
30
31use serde::{Deserialize, Serialize};
32use thiserror::Error;
33
34use super::{ValidationWarning, hosts::HostClass};
35
36const SUPPORTED_VERSION: u32 = 1;
37
38#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
39pub struct MetadataConfig {
40    pub version: u32,
41    pub source: MetadataSource,
42    pub captured_classification: Option<MetadataClassification>,
43    pub refs: MetadataRefs,
44}
45
46#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
47pub struct MetadataSource {
48    pub host: String,
49    pub org_or_group_path: String,
50    pub repo: String,
51    pub branch: String,
52    pub archive_commit: String,
53    pub original_path: String,
54}
55
56#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
57pub struct MetadataClassification {
58    pub class: HostClass,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub employer: Option<String>,
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub retention: Option<String>,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub primary_identity: Option<String>,
65}
66
67#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
68pub struct MetadataRefs {
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub issue: Option<String>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub pr: Option<String>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub mr: Option<String>,
75}
76
77#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
78pub struct MetadataValidationData {
79    pub config: MetadataConfig,
80}
81
82#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
83pub struct MetadataValidation {
84    pub data: MetadataValidationData,
85    pub warnings: Vec<ValidationWarning>,
86}
87
88#[derive(Debug, Error)]
89pub enum MetadataValidationError {
90    #[error("metadata.yaml is empty")]
91    Empty,
92    #[error("metadata.yaml could not be parsed as YAML: {0}")]
93    Parse(String),
94    #[error("metadata.yaml version {found} is not supported (expected {SUPPORTED_VERSION})")]
95    UnsupportedVersion { found: u32 },
96    #[error("metadata.yaml field `{field}` is missing or empty")]
97    MissingRequiredField { field: String },
98    #[error("metadata.yaml `refs`: at least one of `issue`, `pr`, `mr` must be present")]
99    NoRefs,
100    #[error(
101        "metadata.yaml captured_classification.class `{found}` is not a recognised value (expected `personal` or `employer`)"
102    )]
103    UnknownClass { found: String },
104    #[error(
105        "metadata.yaml captured_classification.class `employer` requires a non-empty `employer` field"
106    )]
107    EmployerMissingName,
108}
109
110impl MetadataValidationError {
111    pub fn code(&self) -> &'static str {
112        match self {
113            Self::Empty => "metadata-empty",
114            Self::Parse(_) => "metadata-parse-error",
115            Self::UnsupportedVersion { .. } => "metadata-unsupported-version",
116            Self::MissingRequiredField { .. } => "metadata-missing-required-field",
117            Self::NoRefs => "metadata-no-refs",
118            Self::UnknownClass { .. } => "metadata-unknown-class",
119            Self::EmployerMissingName => "metadata-employer-missing-name",
120        }
121    }
122}
123
124pub fn validate_metadata_yaml(input: &str) -> Result<MetadataValidation, MetadataValidationError> {
125    let trimmed = input.trim();
126    if trimmed.is_empty() {
127        return Err(MetadataValidationError::Empty);
128    }
129
130    let raw: RawMetadataFile = serde_yaml_ng::from_str(input)
131        .map_err(|err| MetadataValidationError::Parse(err.to_string()))?;
132
133    let version = raw.version.unwrap_or(SUPPORTED_VERSION);
134    if version != SUPPORTED_VERSION {
135        return Err(MetadataValidationError::UnsupportedVersion { found: version });
136    }
137
138    let raw_source = raw
139        .source
140        .ok_or_else(|| MetadataValidationError::MissingRequiredField {
141            field: "source".to_string(),
142        })?;
143
144    let source = MetadataSource {
145        host: require_field("source.host", raw_source.host)?,
146        org_or_group_path: require_field("source.org_or_group_path", raw_source.org_or_group_path)?,
147        repo: require_field("source.repo", raw_source.repo)?,
148        branch: require_field("source.branch", raw_source.branch)?,
149        archive_commit: require_field("source.archive_commit", raw_source.archive_commit)?,
150        original_path: require_field("source.original_path", raw_source.original_path)?,
151    };
152
153    let mut warnings = Vec::new();
154    let captured_classification = match raw.captured_classification {
155        None => {
156            warnings.push(ValidationWarning::new(
157                "metadata-captured-classification-missing",
158                "metadata.yaml omits captured_classification (pre-classification plan); resolve against current config/hosts.yaml at query time",
159            ));
160            None
161        }
162        Some(raw_cls) => Some(build_classification(raw_cls)?),
163    };
164
165    let raw_refs = raw.refs.unwrap_or_default();
166    if raw_refs.issue.is_none() && raw_refs.pr.is_none() && raw_refs.mr.is_none() {
167        return Err(MetadataValidationError::NoRefs);
168    }
169    let refs = MetadataRefs {
170        issue: raw_refs.issue,
171        pr: raw_refs.pr,
172        mr: raw_refs.mr,
173    };
174
175    Ok(MetadataValidation {
176        data: MetadataValidationData {
177            config: MetadataConfig {
178                version,
179                source,
180                captured_classification,
181                refs,
182            },
183        },
184        warnings,
185    })
186}
187
188fn require_field(name: &str, value: Option<String>) -> Result<String, MetadataValidationError> {
189    match value {
190        Some(v) if !v.trim().is_empty() => Ok(v),
191        _ => Err(MetadataValidationError::MissingRequiredField {
192            field: name.to_string(),
193        }),
194    }
195}
196
197fn build_classification(
198    raw: RawClassification,
199) -> Result<MetadataClassification, MetadataValidationError> {
200    let class = match raw.class.as_str() {
201        "personal" => HostClass::Personal,
202        "employer" => HostClass::Employer,
203        other => {
204            return Err(MetadataValidationError::UnknownClass {
205                found: other.to_string(),
206            });
207        }
208    };
209    if matches!(class, HostClass::Employer) && raw.employer.as_deref().is_none_or(str::is_empty) {
210        return Err(MetadataValidationError::EmployerMissingName);
211    }
212    Ok(MetadataClassification {
213        class,
214        employer: raw.employer,
215        retention: raw.retention,
216        primary_identity: raw.primary_identity,
217    })
218}
219
220#[derive(Debug, Deserialize)]
221struct RawMetadataFile {
222    #[serde(default)]
223    version: Option<u32>,
224    #[serde(default)]
225    source: Option<RawSource>,
226    #[serde(default)]
227    captured_classification: Option<RawClassification>,
228    #[serde(default)]
229    refs: Option<RawRefs>,
230    #[allow(dead_code)]
231    #[serde(default)]
232    schema: Option<String>,
233}
234
235#[derive(Debug, Deserialize)]
236struct RawSource {
237    #[serde(default)]
238    host: Option<String>,
239    #[serde(default)]
240    org_or_group_path: Option<String>,
241    #[serde(default)]
242    repo: Option<String>,
243    #[serde(default)]
244    branch: Option<String>,
245    #[serde(default)]
246    archive_commit: Option<String>,
247    #[serde(default)]
248    original_path: Option<String>,
249}
250
251#[derive(Debug, Deserialize)]
252struct RawClassification {
253    class: String,
254    #[serde(default)]
255    employer: Option<String>,
256    #[serde(default)]
257    retention: Option<String>,
258    #[serde(default)]
259    primary_identity: Option<String>,
260}
261
262#[derive(Debug, Default, Deserialize)]
263struct RawRefs {
264    #[serde(default)]
265    issue: Option<String>,
266    #[serde(default)]
267    pr: Option<String>,
268    #[serde(default)]
269    mr: Option<String>,
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    const GITHUB_PR: &str = r"
277version: 1
278source:
279  host: github.com
280  org_or_group_path: graysurf
281  repo: agent-runtime-kit
282  branch: main
283  archive_commit: a2e8f227000000000000000000000000a2e8f227
284  original_path: docs/plans/2026-05-27-plan-archive-runtime-kit/
285captured_classification:
286  class: personal
287  primary_identity: graysurf
288refs:
289  issue: https://github.com/graysurf/agent-runtime-kit/issues/126
290  pr: https://github.com/graysurf/agent-runtime-kit/pull/127
291";
292
293    const GITLAB_MR: &str = r"
294version: 1
295source:
296  host: gitlab.example.com
297  org_or_group_path: acme/platform/backend
298  repo: services
299  branch: main
300  archive_commit: deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
301  original_path: docs/plans/2026-04-10-some-plan/
302captured_classification:
303  class: employer
304  employer: ExampleCorp
305  retention: delete-on-termination
306refs:
307  issue: https://gitlab.example.com/acme/platform/backend/services/-/issues/42
308  mr: https://gitlab.example.com/acme/platform/backend/services/-/merge_requests/99
309";
310
311    const ORPHAN_PLAN: &str = r"
312version: 1
313source:
314  host: github.com
315  org_or_group_path: graysurf
316  repo: agent-runtime-kit
317  branch: main
318  archive_commit: c0ffee00c0ffee00c0ffee00c0ffee00c0ffee00
319  original_path: docs/plans/2026-01-15-orphan-experiment/
320refs:
321  issue: https://github.com/graysurf/agent-runtime-kit/issues/9999
322";
323
324    #[test]
325    fn github_pr_validates() {
326        let v = validate_metadata_yaml(GITHUB_PR).expect("validation");
327        assert!(v.warnings.is_empty());
328        assert_eq!(v.data.config.source.host, "github.com");
329        assert!(v.data.config.captured_classification.is_some());
330        assert_eq!(
331            v.data.config.refs.pr.as_deref(),
332            Some("https://github.com/graysurf/agent-runtime-kit/pull/127")
333        );
334    }
335
336    #[test]
337    fn gitlab_mr_validates() {
338        let v = validate_metadata_yaml(GITLAB_MR).expect("validation");
339        let cls = v.data.config.captured_classification.expect("captured");
340        assert!(matches!(cls.class, HostClass::Employer));
341        assert_eq!(cls.employer.as_deref(), Some("ExampleCorp"));
342        assert!(v.data.config.refs.mr.is_some());
343    }
344
345    #[test]
346    fn orphan_plan_warns_on_missing_classification() {
347        let v = validate_metadata_yaml(ORPHAN_PLAN).expect("validation");
348        assert!(v.data.config.captured_classification.is_none());
349        assert_eq!(v.warnings.len(), 1);
350        assert_eq!(
351            v.warnings[0].code,
352            "metadata-captured-classification-missing"
353        );
354    }
355
356    #[test]
357    fn missing_required_field_rejected() {
358        // Omit `source.repo` while keeping the rest of the document
359        // well-formed so the validator surfaces the missing-field code
360        // rather than a YAML parse error.
361        let input = r"
362version: 1
363source:
364  host: github.com
365  org_or_group_path: graysurf
366  branch: main
367  archive_commit: a2e8f227000000000000000000000000a2e8f227
368  original_path: docs/plans/2026-05-27-something/
369refs:
370  issue: https://github.com/graysurf/r/issues/1
371";
372        let err = validate_metadata_yaml(input).expect_err("missing field");
373        assert_eq!(err.code(), "metadata-missing-required-field");
374    }
375
376    #[test]
377    fn no_refs_rejected() {
378        let input = r"
379version: 1
380source:
381  host: github.com
382  org_or_group_path: graysurf
383  repo: r
384  branch: main
385  archive_commit: deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
386  original_path: docs/plans/foo/
387refs: {}
388";
389        let err = validate_metadata_yaml(input).expect_err("no refs");
390        assert_eq!(err.code(), "metadata-no-refs");
391    }
392
393    #[test]
394    fn employer_missing_name_rejected() {
395        let input = r"
396version: 1
397source:
398  host: gitlab.example.com
399  org_or_group_path: a/b
400  repo: r
401  branch: main
402  archive_commit: deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
403  original_path: docs/plans/foo/
404captured_classification:
405  class: employer
406refs:
407  mr: https://gitlab.example.com/a/b/r/-/merge_requests/1
408";
409        let err = validate_metadata_yaml(input).expect_err("missing employer");
410        assert_eq!(err.code(), "metadata-employer-missing-name");
411    }
412
413    #[test]
414    fn unknown_class_rejected() {
415        let input = r"
416version: 1
417source:
418  host: github.com
419  org_or_group_path: graysurf
420  repo: r
421  branch: main
422  archive_commit: deadbeefdeadbeefdeadbeefdeadbeefdeadbeef
423  original_path: docs/plans/foo/
424captured_classification:
425  class: visitor
426refs:
427  issue: https://github.com/graysurf/r/issues/1
428";
429        let err = validate_metadata_yaml(input).expect_err("unknown class");
430        assert_eq!(err.code(), "metadata-unknown-class");
431    }
432}