Skip to main content

harn_vm/orchestration/crystallize/
bundle.rs

1//! Crystallization bundle: types, build/write/load/validate, shadow replay, and redaction helpers.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::{Component, Path, PathBuf};
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value as JsonValue;
8
9use super::super::{now_rfc3339, ReplayTraceRun};
10use super::api::load_crystallization_trace;
11use super::shadow::{find_sequence_start, shadow_candidate};
12use super::types::{
13    CrystallizationApproval, CrystallizationArtifacts, CrystallizationReport,
14    CrystallizationSideEffect, CrystallizationTrace, PromotionApprovalRecord, PromotionCriteria,
15    PromotionDivergenceRecord, SavingsEstimate, SegmentKind, ShadowRunReport,
16    SkillInductionGateReceipt, WorkflowCandidate, WorkflowCandidateStep, BUNDLE_EVAL_PACK_FILE,
17    BUNDLE_FIXTURES_DIR, BUNDLE_MANIFEST_FILE, BUNDLE_REPORT_FILE, BUNDLE_SCHEMA,
18    BUNDLE_SCHEMA_VERSION, BUNDLE_SKILL_DIR, BUNDLE_SKILL_FILE, BUNDLE_SKILL_GATE_FILE,
19    BUNDLE_WORKFLOW_FILE, DEFAULT_ROLLOUT_POLICY, SKILL_GATE_RECEIPT_SCHEMA,
20};
21use crate::redact::{RedactionPolicy, REDACTED_PLACEHOLDER};
22use crate::skills::{parse_frontmatter, split_frontmatter};
23use crate::value::VmError;
24
25// ===== Crystallization bundle =====
26//
27// A bundle is a directory layout that Harn writes and a cloud platform (or any
28// other importer) reads without bespoke glue. The contract is:
29//
30//   bundle/
31//     candidate.json        # versioned manifest documented below
32//     workflow.harn         # generated/reviewable workflow code
33//     report.json           # full mining/shadow/eval report
34//     harn.eval.toml        # generated eval pack when available (optional)
35//     fixtures/             # redacted replay fixtures referenced by the
36//                           # report (optional, only when --bundle is used
37//                           # with `harn crystallize` and traces were on disk)
38//
39// `candidate.json` is the authoritative manifest. It must include the
40// `schema` and `schema_version` markers. Cloud importers MUST reject any
41// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
42// or whose `schema_version` is greater than the highest version they
43// understand. Only the documented additive fields may be added without
44// bumping `schema_version`.
45
46#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
47#[serde(default)]
48pub struct BundleGenerator {
49    pub tool: String,
50    pub version: String,
51}
52
53impl Default for BundleGenerator {
54    fn default() -> Self {
55        Self {
56            tool: "harn".to_string(),
57            version: env!("CARGO_PKG_VERSION").to_string(),
58        }
59    }
60}
61
62#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
63#[serde(default)]
64pub struct BundleWorkflowRef {
65    /// Relative path inside the bundle directory.
66    pub path: String,
67    /// Short identifier used in `pipeline NAME(...)`.
68    pub name: String,
69    /// Logical package name promotion uses to register the workflow.
70    pub package_name: String,
71    /// Initial workflow version proposed for promotion.
72    pub package_version: String,
73}
74
75#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
76#[serde(default)]
77pub struct BundleSourceTrace {
78    pub trace_id: String,
79    pub source_hash: String,
80    /// Optional human-visible URL (PR, issue, run record path) for the
81    /// trace. `None` when the trace was loaded from an in-memory store.
82    pub source_url: Option<String>,
83    /// Optional cloud-side receipt id when the trace was already promoted
84    /// into a tenant receipt. Cloud importers use this to wire candidate
85    /// evidence to existing receipts without round-tripping the raw payload.
86    pub source_receipt_id: Option<String>,
87    /// Relative path of the redacted fixture inside the bundle, if one
88    /// was emitted.
89    pub fixture_path: Option<String>,
90}
91
92#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
93#[serde(default)]
94pub struct BundleStep {
95    pub index: usize,
96    pub kind: String,
97    pub name: String,
98    pub segment: SegmentKind,
99    pub parameter_refs: Vec<String>,
100    pub side_effects: Vec<CrystallizationSideEffect>,
101    pub capabilities: Vec<String>,
102    pub required_secrets: Vec<String>,
103    pub approval: Option<CrystallizationApproval>,
104    pub review_notes: Vec<String>,
105}
106
107impl BundleStep {
108    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
109        Self {
110            index: step.index,
111            kind: step.kind.clone(),
112            name: step.name.clone(),
113            segment: step.segment.clone(),
114            parameter_refs: step.parameter_refs.clone(),
115            side_effects: step.side_effects.clone(),
116            capabilities: step.capabilities.clone(),
117            required_secrets: step.required_secrets.clone(),
118            approval: step.approval.clone(),
119            review_notes: step.review_notes.clone(),
120        }
121    }
122}
123
124#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
125#[serde(default)]
126pub struct BundleEvalPackRef {
127    /// Relative path of the eval pack inside the bundle directory.
128    pub path: String,
129    /// Optional external link the eval pack also lives at (e.g. a hosted
130    /// `eval-pack://` URI when the bundle was promoted into a tenant).
131    pub link: Option<String>,
132}
133
134#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
135#[serde(default)]
136pub struct BundleSkillRef {
137    /// Relative path to the generated `SKILL.md` inside the bundle.
138    pub path: String,
139    /// Relative path to the replay/held-out gate receipt for the skill.
140    pub gate_receipt_path: String,
141    pub name: String,
142    pub skill_candidate_id: String,
143    pub workflow_candidate_id: String,
144}
145
146#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
147#[serde(default)]
148pub struct BundleFixtureRef {
149    pub path: String,
150    pub trace_id: String,
151    pub source_hash: String,
152    pub redacted: bool,
153}
154
155#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
156#[serde(default)]
157pub struct BundlePromotion {
158    pub owner: Option<String>,
159    pub approver: Option<String>,
160    pub author: Option<String>,
161    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
162    /// surfaces may extend this enum but must keep existing values stable.
163    pub rollout_policy: String,
164    pub rollback_target: Option<String>,
165    pub created_at: String,
166    pub workflow_version: String,
167    pub package_name: String,
168    pub sample_count: usize,
169    pub confidence: f64,
170    pub shadow_success_count: usize,
171    pub shadow_failure_count: usize,
172    pub divergence_history: Vec<PromotionDivergenceRecord>,
173    pub approval_history: Vec<PromotionApprovalRecord>,
174    pub criteria: PromotionCriteria,
175    pub estimated_time_token_savings: SavingsEstimate,
176}
177
178impl Default for BundlePromotion {
179    fn default() -> Self {
180        Self {
181            owner: None,
182            approver: None,
183            author: None,
184            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
185            rollback_target: None,
186            created_at: String::new(),
187            workflow_version: String::new(),
188            package_name: String::new(),
189            sample_count: 0,
190            confidence: 0.0,
191            shadow_success_count: 0,
192            shadow_failure_count: 0,
193            divergence_history: Vec::new(),
194            approval_history: Vec::new(),
195            criteria: PromotionCriteria::default(),
196            estimated_time_token_savings: SavingsEstimate::default(),
197        }
198    }
199}
200
201#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
202#[serde(default)]
203pub struct BundleRedactionSummary {
204    pub applied: bool,
205    pub rules: Vec<String>,
206    pub summary: String,
207    /// Number of fixture files copied into the bundle (0 when no fixture
208    /// directory was emitted).
209    pub fixture_count: usize,
210}
211
212#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
213#[serde(default)]
214pub struct CrystallizationBundleManifest {
215    pub schema: String,
216    pub schema_version: u32,
217    pub generated_at: String,
218    pub generator: BundleGenerator,
219    pub kind: BundleKind,
220    pub candidate_id: String,
221    pub external_key: String,
222    pub title: String,
223    pub team: Option<String>,
224    pub repo: Option<String>,
225    pub risk_level: String,
226    pub workflow: BundleWorkflowRef,
227    pub source_trace_hashes: Vec<String>,
228    pub source_traces: Vec<BundleSourceTrace>,
229    pub deterministic_steps: Vec<BundleStep>,
230    pub fuzzy_steps: Vec<BundleStep>,
231    pub side_effects: Vec<CrystallizationSideEffect>,
232    pub capabilities: Vec<String>,
233    pub required_secrets: Vec<String>,
234    pub savings: SavingsEstimate,
235    pub shadow: ShadowRunReport,
236    pub eval_pack: Option<BundleEvalPackRef>,
237    pub skill: Option<BundleSkillRef>,
238    pub fixtures: Vec<BundleFixtureRef>,
239    pub promotion: BundlePromotion,
240    pub redaction: BundleRedactionSummary,
241    pub confidence: f64,
242    pub rejection_reasons: Vec<String>,
243    pub warnings: Vec<String>,
244}
245
246#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
247#[serde(rename_all = "snake_case")]
248pub enum BundleKind {
249    /// A normal candidate that passed shadow comparison and is ready for
250    /// review and promotion.
251    #[default]
252    Candidate,
253    /// A "plan-only" candidate: every step has a side-effect-free, in-process
254    /// outcome (e.g. classify and write a receipt). Cloud importers can
255    /// promote these without explicit external-side-effect approval.
256    PlanOnly,
257    /// No safe candidate was selected. The bundle still records what was
258    /// attempted, the rejection reasons, and any rejected candidates so
259    /// reviewers can debug or feed it back into mining.
260    Rejected,
261}
262
263#[derive(Clone, Debug, Default)]
264pub struct BundleOptions {
265    /// Stable identifier downstream cloud importers use to dedupe bundles
266    /// across runs (defaults to a sanitized workflow name).
267    pub external_key: Option<String>,
268    pub title: Option<String>,
269    pub team: Option<String>,
270    pub repo: Option<String>,
271    pub risk_level: Option<String>,
272    pub rollout_policy: Option<String>,
273}
274
275#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
276#[serde(default)]
277pub struct CrystallizationBundle {
278    pub manifest: CrystallizationBundleManifest,
279    pub report: CrystallizationReport,
280    pub harn_code: String,
281    pub eval_pack_toml: String,
282    pub skill_markdown: String,
283    pub skill_gate_receipt_json: String,
284    pub fixtures: Vec<CrystallizationTrace>,
285}
286
287/// Errors surfaced when validating a bundle on disk.
288#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
289#[serde(default)]
290pub struct BundleValidation {
291    pub bundle_dir: String,
292    pub schema: String,
293    pub schema_version: u32,
294    pub kind: BundleKind,
295    pub candidate_id: String,
296    pub manifest_ok: bool,
297    pub workflow_ok: bool,
298    pub report_ok: bool,
299    pub eval_pack_ok: bool,
300    pub skill_ok: bool,
301    pub fixtures_ok: bool,
302    pub redaction_ok: bool,
303    pub problems: Vec<String>,
304}
305
306impl BundleValidation {
307    pub fn is_ok(&self) -> bool {
308        self.problems.is_empty()
309    }
310}
311
312/// Build an in-memory bundle from already-mined artifacts. The traces
313/// passed here are the same normalized traces used to mine the candidate;
314/// they will be redacted before being attached as fixtures.
315pub fn build_crystallization_bundle(
316    artifacts: CrystallizationArtifacts,
317    traces: &[CrystallizationTrace],
318    options: BundleOptions,
319) -> Result<CrystallizationBundle, VmError> {
320    let CrystallizationArtifacts {
321        report,
322        harn_code,
323        eval_pack_toml,
324    } = artifacts;
325
326    let (selected, kind) = match report
327        .selected_candidate_id
328        .as_deref()
329        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
330    {
331        Some(candidate) => {
332            let kind = if candidate_is_plan_only(candidate) {
333                BundleKind::PlanOnly
334            } else {
335                BundleKind::Candidate
336            };
337            (Some(candidate), kind)
338        }
339        None => (None, BundleKind::Rejected),
340    };
341
342    let workflow_name = selected
343        .map(|candidate| candidate.name.clone())
344        .unwrap_or_else(|| "crystallized_workflow".to_string());
345    let package_name = selected
346        .map(|candidate| candidate.promotion.package_name.clone())
347        .unwrap_or_else(|| workflow_name.replace('_', "-"));
348    let workflow_version = selected
349        .map(|candidate| candidate.promotion.version.clone())
350        .unwrap_or_else(|| "0.0.0".to_string());
351
352    let manifest_workflow = BundleWorkflowRef {
353        path: BUNDLE_WORKFLOW_FILE.to_string(),
354        name: workflow_name.clone(),
355        package_name: package_name.clone(),
356        package_version: workflow_version.clone(),
357    };
358
359    let external_key = options
360        .external_key
361        .clone()
362        .filter(|key| !key.trim().is_empty())
363        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
364    let title = options
365        .title
366        .clone()
367        .filter(|title| !title.trim().is_empty())
368        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
369    let risk_level = options
370        .risk_level
371        .clone()
372        .filter(|risk| !risk.trim().is_empty())
373        .unwrap_or_else(|| infer_risk_level(selected));
374    let rollout_policy = options
375        .rollout_policy
376        .clone()
377        .filter(|policy| !policy.trim().is_empty())
378        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
379
380    let (deterministic_steps, fuzzy_steps) = match selected {
381        Some(candidate) => candidate
382            .steps
383            .iter()
384            .map(BundleStep::from_candidate_step)
385            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
386        None => (Vec::new(), Vec::new()),
387    };
388
389    let source_trace_hashes = selected
390        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
391        .unwrap_or_default();
392
393    let mut source_traces = Vec::new();
394    let mut fixture_refs = Vec::new();
395    let mut fixture_payloads = Vec::new();
396    if let Some(candidate) = selected {
397        let mut fixture_trace_ids = BTreeSet::new();
398        for example in &candidate.examples {
399            fixture_trace_ids.insert(example.trace_id.clone());
400        }
401        for trace in traces {
402            if find_sequence_start(trace, &candidate.sequence_signature).is_some() {
403                fixture_trace_ids.insert(trace.id.clone());
404            }
405        }
406        for trace_id in fixture_trace_ids {
407            let trace = traces.iter().find(|trace| trace.id == trace_id);
408            let source_hash = trace
409                .and_then(|trace| trace.source_hash.clone())
410                .or_else(|| {
411                    candidate
412                        .examples
413                        .iter()
414                        .find(|example| example.trace_id == trace_id)
415                        .map(|example| example.source_hash.clone())
416                })
417                .unwrap_or_default();
418            let fixture_relative = trace.map(|trace| {
419                format!(
420                    "{BUNDLE_FIXTURES_DIR}/{}.json",
421                    sanitize_fixture_name(&trace.id)
422                )
423            });
424            source_traces.push(BundleSourceTrace {
425                trace_id: trace_id.clone(),
426                source_hash: source_hash.clone(),
427                source_url: trace.and_then(|trace| trace.source.clone()),
428                source_receipt_id: trace
429                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
430                    .and_then(|value| value.as_str().map(str::to_string)),
431                fixture_path: fixture_relative.clone(),
432            });
433            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
434                let mut redacted = trace.clone();
435                redact_trace_for_bundle(&mut redacted);
436                fixture_refs.push(BundleFixtureRef {
437                    path: fixture_path,
438                    trace_id: trace.id.clone(),
439                    source_hash,
440                    redacted: true,
441                });
442                fixture_payloads.push(redacted);
443            }
444        }
445    }
446
447    // Owner defaults to author so cloud importers always have a populated
448    // ownership pointer, but stays separate from `author` so reviewers can
449    // assign a different owner in the manifest before promotion.
450    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
451    let promotion = BundlePromotion {
452        owner: author.clone(),
453        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
454        author,
455        rollout_policy,
456        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
457        created_at: now_rfc3339(),
458        workflow_version,
459        package_name,
460        sample_count: selected
461            .map(|candidate| candidate.promotion.sample_count)
462            .unwrap_or_default(),
463        confidence: selected
464            .map(|candidate| candidate.promotion.confidence)
465            .unwrap_or_default(),
466        shadow_success_count: selected
467            .map(|candidate| candidate.promotion.shadow_success_count)
468            .unwrap_or_default(),
469        shadow_failure_count: selected
470            .map(|candidate| candidate.promotion.shadow_failure_count)
471            .unwrap_or_default(),
472        divergence_history: selected
473            .map(|candidate| candidate.promotion.divergence_history.clone())
474            .unwrap_or_default(),
475        approval_history: selected
476            .map(|candidate| candidate.promotion.approval_history.clone())
477            .unwrap_or_default(),
478        criteria: selected
479            .map(|candidate| candidate.promotion.criteria.clone())
480            .unwrap_or_default(),
481        estimated_time_token_savings: selected
482            .map(|candidate| candidate.promotion.estimated_time_token_savings.clone())
483            .unwrap_or_default(),
484    };
485
486    let redaction = BundleRedactionSummary {
487        applied: !fixture_payloads.is_empty(),
488        rules: vec![
489            "sensitive_keys".to_string(),
490            "secret_value_heuristic".to_string(),
491        ],
492        summary: if fixture_payloads.is_empty() {
493            "no fixtures emitted".to_string()
494        } else {
495            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
496                .to_string()
497        },
498        fixture_count: fixture_payloads.len(),
499    };
500
501    let eval_pack = if eval_pack_toml.trim().is_empty() {
502        None
503    } else {
504        Some(BundleEvalPackRef {
505            path: BUNDLE_EVAL_PACK_FILE.to_string(),
506            link: selected
507                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
508                .filter(|link| !link.trim().is_empty()),
509        })
510    };
511    let selected_skill = selected.and_then(|candidate| {
512        report
513            .skill_candidates
514            .iter()
515            .find(|skill| skill.workflow_candidate_id == candidate.id)
516    });
517    let skill = selected_skill.map(|skill| BundleSkillRef {
518        path: format!("{BUNDLE_SKILL_DIR}/{BUNDLE_SKILL_FILE}"),
519        gate_receipt_path: format!("{BUNDLE_SKILL_DIR}/{BUNDLE_SKILL_GATE_FILE}"),
520        name: skill.name.clone(),
521        skill_candidate_id: skill.id.clone(),
522        workflow_candidate_id: skill.workflow_candidate_id.clone(),
523    });
524    let skill_markdown = selected_skill
525        .map(|skill| skill.skill_markdown.clone())
526        .unwrap_or_default();
527    let skill_gate_receipt_json = selected_skill
528        .and_then(|skill| serde_json::to_string_pretty(&skill.replay_gate.receipt).ok())
529        .unwrap_or_default();
530
531    let manifest = CrystallizationBundleManifest {
532        schema: BUNDLE_SCHEMA.to_string(),
533        schema_version: BUNDLE_SCHEMA_VERSION,
534        generated_at: now_rfc3339(),
535        generator: BundleGenerator::default(),
536        kind,
537        candidate_id: selected
538            .map(|candidate| candidate.id.clone())
539            .unwrap_or_default(),
540        external_key,
541        title,
542        team: options.team,
543        repo: options.repo,
544        risk_level,
545        workflow: manifest_workflow,
546        source_trace_hashes,
547        source_traces,
548        deterministic_steps,
549        fuzzy_steps,
550        side_effects: selected
551            .map(|candidate| candidate.side_effects.clone())
552            .unwrap_or_default(),
553        capabilities: selected
554            .map(|candidate| candidate.capabilities.clone())
555            .unwrap_or_default(),
556        required_secrets: selected
557            .map(|candidate| candidate.required_secrets.clone())
558            .unwrap_or_default(),
559        savings: selected
560            .map(|candidate| candidate.savings.clone())
561            .unwrap_or_default(),
562        shadow: selected
563            .map(|candidate| candidate.shadow.clone())
564            .unwrap_or_default(),
565        eval_pack,
566        skill,
567        fixtures: fixture_refs,
568        promotion,
569        redaction,
570        confidence: selected
571            .map(|candidate| candidate.confidence)
572            .unwrap_or(0.0),
573        rejection_reasons: report
574            .rejected_candidates
575            .iter()
576            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
577            .collect(),
578        warnings: report.warnings.clone(),
579    };
580
581    Ok(CrystallizationBundle {
582        manifest,
583        report,
584        harn_code,
585        eval_pack_toml,
586        skill_markdown,
587        skill_gate_receipt_json,
588        fixtures: fixture_payloads,
589    })
590}
591
592/// Write a bundle to a directory. Creates the directory if it does not
593/// already exist. Returns the manifest with `generated_at` and any
594/// runtime-resolved metadata filled in.
595pub fn write_crystallization_bundle(
596    bundle: &CrystallizationBundle,
597    bundle_dir: &Path,
598) -> Result<CrystallizationBundleManifest, VmError> {
599    std::fs::create_dir_all(bundle_dir).map_err(|error| {
600        VmError::Runtime(format!(
601            "failed to create bundle dir {}: {error}",
602            bundle_dir.display()
603        ))
604    })?;
605    write_bytes(
606        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
607        bundle.harn_code.as_bytes(),
608    )?;
609    let report_json = serde_json::to_vec_pretty(&bundle.report)
610        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
611    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
612
613    if !bundle.eval_pack_toml.trim().is_empty() {
614        write_bytes(
615            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
616            bundle.eval_pack_toml.as_bytes(),
617        )?;
618    }
619
620    if !bundle.skill_markdown.trim().is_empty() {
621        let skill_dir = bundle_dir.join(BUNDLE_SKILL_DIR);
622        std::fs::create_dir_all(&skill_dir).map_err(|error| {
623            VmError::Runtime(format!(
624                "failed to create skill dir {}: {error}",
625                skill_dir.display()
626            ))
627        })?;
628        write_bytes(
629            &skill_dir.join(BUNDLE_SKILL_FILE),
630            bundle.skill_markdown.as_bytes(),
631        )?;
632        if !bundle.skill_gate_receipt_json.trim().is_empty() {
633            write_bytes(
634                &skill_dir.join(BUNDLE_SKILL_GATE_FILE),
635                bundle.skill_gate_receipt_json.as_bytes(),
636            )?;
637        }
638    }
639
640    if !bundle.fixtures.is_empty() {
641        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
642        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
643            VmError::Runtime(format!(
644                "failed to create fixtures dir {}: {error}",
645                fixtures_dir.display()
646            ))
647        })?;
648        for trace in &bundle.fixtures {
649            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
650            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
651                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
652            })?;
653            write_bytes(&path, &payload)?;
654        }
655    }
656
657    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
658        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
659    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
660    Ok(bundle.manifest.clone())
661}
662
663/// Read a bundle manifest from disk. Verifies the schema marker but does
664/// not cross-check workflow/report/eval-pack sibling files; for a richer
665/// check use [`validate_crystallization_bundle`].
666pub fn load_crystallization_bundle_manifest(
667    bundle_dir: &Path,
668) -> Result<CrystallizationBundleManifest, VmError> {
669    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
670    let bytes = std::fs::read(&manifest_path).map_err(|error| {
671        VmError::Runtime(format!(
672            "failed to read bundle manifest {}: {error}",
673            manifest_path.display()
674        ))
675    })?;
676    let manifest: CrystallizationBundleManifest =
677        serde_json::from_slice(&bytes).map_err(|error| {
678            VmError::Runtime(format!(
679                "failed to decode bundle manifest {}: {error}",
680                manifest_path.display()
681            ))
682        })?;
683    if manifest.schema != BUNDLE_SCHEMA {
684        return Err(VmError::Runtime(format!(
685            "bundle {} has unrecognized schema {:?} (expected {})",
686            bundle_dir.display(),
687            manifest.schema,
688            BUNDLE_SCHEMA
689        )));
690    }
691    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
692        return Err(VmError::Runtime(format!(
693            "bundle {} schema_version {} is newer than supported {}",
694            bundle_dir.display(),
695            manifest.schema_version,
696            BUNDLE_SCHEMA_VERSION
697        )));
698    }
699    Ok(manifest)
700}
701
702fn resolve_bundle_manifest_path(
703    bundle_dir: &Path,
704    relative_path: &str,
705    label: &str,
706) -> Result<PathBuf, String> {
707    let path = Path::new(relative_path);
708    if relative_path.trim().is_empty()
709        || path.is_absolute()
710        || path.components().any(|component| {
711            matches!(
712                component,
713                Component::ParentDir | Component::Prefix(_) | Component::RootDir
714            )
715        })
716        || has_windows_rooted_or_drive_relative_prefix(relative_path)
717    {
718        return Err(format!(
719            "manifest {label} path {relative_path:?} must stay inside the bundle"
720        ));
721    }
722    Ok(bundle_dir.join(path))
723}
724
725fn has_windows_rooted_or_drive_relative_prefix(path: &str) -> bool {
726    let normalized = path.replace('\\', "/");
727    let bytes = normalized.as_bytes();
728    normalized.starts_with('/')
729        || (bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':')
730}
731
732/// Read every fixture trace referenced by the bundle manifest. Returns
733/// the manifest plus loaded traces, in the order they appear in the
734/// manifest. Fixtures with `path: None` are skipped.
735pub fn load_crystallization_bundle(
736    bundle_dir: &Path,
737) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
738    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
739    let mut traces = Vec::new();
740    for fixture in &manifest.fixtures {
741        let path = resolve_bundle_manifest_path(bundle_dir, &fixture.path, "fixture")
742            .map_err(VmError::Runtime)?;
743        traces.push(load_crystallization_trace(&path)?);
744    }
745    Ok((manifest, traces))
746}
747
748/// Validate a bundle directory layout and contents. Cheap enough to call
749/// from a CLI smoke command; performs no live side effects.
750pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
751    let mut validation = BundleValidation {
752        bundle_dir: bundle_dir.display().to_string(),
753        ..BundleValidation::default()
754    };
755    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
756        Ok(manifest) => manifest,
757        Err(error) => {
758            validation.problems.push(error.to_string());
759            return Ok(validation);
760        }
761    };
762    validation.manifest_ok = true;
763    validation.schema = manifest.schema.clone();
764    validation.schema_version = manifest.schema_version;
765    validation.kind = manifest.kind.clone();
766    validation.candidate_id = manifest.candidate_id.clone();
767
768    match resolve_bundle_manifest_path(bundle_dir, &manifest.workflow.path, "workflow") {
769        Ok(workflow_path) if workflow_path.exists() => {
770            validation.workflow_ok = true;
771        }
772        Ok(workflow_path) => {
773            validation
774                .problems
775                .push(format!("missing workflow file {}", workflow_path.display()));
776        }
777        Err(problem) => validation.problems.push(problem),
778    }
779
780    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
781    match std::fs::read(&report_path) {
782        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
783            Ok(report) => {
784                validation.report_ok = true;
785                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
786                    && manifest.candidate_id.is_empty()
787                {
788                    validation
789                        .problems
790                        .push("manifest is non-rejected but has empty candidate_id".to_string());
791                }
792                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
793                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
794                {
795                    validation.problems.push(format!(
796                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
797                        report.selected_candidate_id, manifest.candidate_id
798                    ));
799                }
800            }
801            Err(error) => {
802                validation
803                    .problems
804                    .push(format!("invalid report.json: {error}"));
805            }
806        },
807        Err(error) => {
808            validation.problems.push(format!(
809                "missing report file {}: {error}",
810                report_path.display()
811            ));
812        }
813    }
814
815    if let Some(eval_pack) = &manifest.eval_pack {
816        match resolve_bundle_manifest_path(bundle_dir, &eval_pack.path, "eval_pack") {
817            Ok(path) if path.exists() => {
818                validation.eval_pack_ok = true;
819            }
820            Ok(path) => {
821                validation.problems.push(format!(
822                    "manifest references eval pack {} but file is missing",
823                    path.display()
824                ));
825            }
826            Err(problem) => validation.problems.push(problem),
827        }
828    } else {
829        validation.eval_pack_ok = true;
830    }
831
832    if let Some(skill) = &manifest.skill {
833        let mut skill_problem = false;
834        match resolve_bundle_manifest_path(bundle_dir, &skill.path, "skill") {
835            Ok(path) if path.exists() => match std::fs::read_to_string(&path) {
836                Ok(source) => {
837                    let (frontmatter, _) = split_frontmatter(&source);
838                    match parse_frontmatter(frontmatter) {
839                        Ok(parsed) => {
840                            if parsed.manifest.name.trim().is_empty() {
841                                validation
842                                    .problems
843                                    .push("skill SKILL.md is missing frontmatter name".to_string());
844                                skill_problem = true;
845                            } else if parsed.manifest.name != skill.name {
846                                validation.problems.push(format!(
847                                    "skill SKILL.md name {} does not match manifest skill name {}",
848                                    parsed.manifest.name, skill.name
849                                ));
850                                skill_problem = true;
851                            }
852                            if parsed.manifest.short.trim().is_empty() {
853                                validation.problems.push(
854                                    "skill SKILL.md is missing required short card".to_string(),
855                                );
856                                skill_problem = true;
857                            }
858                        }
859                        Err(error) => {
860                            validation
861                                .problems
862                                .push(format!("invalid skill SKILL.md frontmatter: {error}"));
863                            skill_problem = true;
864                        }
865                    }
866                }
867                Err(error) => {
868                    validation.problems.push(format!(
869                        "failed to read skill file {}: {error}",
870                        path.display()
871                    ));
872                    skill_problem = true;
873                }
874            },
875            Ok(path) => {
876                validation.problems.push(format!(
877                    "manifest references skill {} but file is missing",
878                    path.display()
879                ));
880                skill_problem = true;
881            }
882            Err(problem) => {
883                validation.problems.push(problem);
884                skill_problem = true;
885            }
886        }
887        match resolve_bundle_manifest_path(bundle_dir, &skill.gate_receipt_path, "skill gate") {
888            Ok(path) if path.exists() => match std::fs::read_to_string(&path) {
889                Ok(source) => match serde_json::from_str::<SkillInductionGateReceipt>(&source) {
890                    Ok(receipt) => {
891                        if receipt.type_name != SKILL_GATE_RECEIPT_SCHEMA {
892                            validation.problems.push(format!(
893                                "skill gate receipt has unexpected type {}",
894                                receipt.type_name
895                            ));
896                            skill_problem = true;
897                        }
898                        if receipt.skill_candidate_id != skill.skill_candidate_id {
899                            validation.problems.push(format!(
900                                "skill gate receipt candidate id {} does not match manifest {}",
901                                receipt.skill_candidate_id, skill.skill_candidate_id
902                            ));
903                            skill_problem = true;
904                        }
905                        if receipt.workflow_candidate_id != skill.workflow_candidate_id {
906                            validation.problems.push(format!(
907                                "skill gate receipt workflow id {} does not match manifest {}",
908                                receipt.workflow_candidate_id, skill.workflow_candidate_id
909                            ));
910                            skill_problem = true;
911                        }
912                        if !receipt.accepted {
913                            validation
914                                .problems
915                                .push("skill gate receipt is not accepted".to_string());
916                            skill_problem = true;
917                        }
918                    }
919                    Err(error) => {
920                        validation
921                            .problems
922                            .push(format!("invalid skill gate receipt JSON: {error}"));
923                        skill_problem = true;
924                    }
925                },
926                Err(error) => {
927                    validation.problems.push(format!(
928                        "failed to read skill gate receipt {}: {error}",
929                        path.display()
930                    ));
931                    skill_problem = true;
932                }
933            },
934            Ok(path) => {
935                validation.problems.push(format!(
936                    "manifest references skill gate receipt {} but file is missing",
937                    path.display()
938                ));
939                skill_problem = true;
940            }
941            Err(problem) => {
942                validation.problems.push(problem);
943                skill_problem = true;
944            }
945        }
946        validation.skill_ok = !skill_problem;
947    } else {
948        validation.skill_ok = true;
949    }
950
951    let mut fixtures_problem = false;
952    for fixture in &manifest.fixtures {
953        let path = match resolve_bundle_manifest_path(bundle_dir, &fixture.path, "fixture") {
954            Ok(path) => path,
955            Err(problem) => {
956                validation.problems.push(problem);
957                fixtures_problem = true;
958                continue;
959            }
960        };
961        if !path.exists() {
962            validation
963                .problems
964                .push(format!("missing fixture {}", path.display()));
965            fixtures_problem = true;
966            continue;
967        }
968        if !fixture.redacted {
969            validation.problems.push(format!(
970                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
971                fixture.path
972            ));
973            fixtures_problem = true;
974        }
975    }
976    validation.fixtures_ok = !fixtures_problem;
977
978    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
979        validation
980            .problems
981            .push("redaction.applied is false but bundle includes fixtures".to_string());
982    } else {
983        validation.redaction_ok = true;
984    }
985    if !manifest
986        .required_secrets
987        .iter()
988        .all(|secret| secret_id_looks_logical(secret))
989    {
990        validation.problems.push(
991            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
992        );
993    }
994
995    Ok(validation)
996}
997
998/// Replay shadow comparison from a bundle: re-runs the deterministic
999/// shadow check in-process against the bundle's redacted fixtures, with
1000/// no live side effects. Returns the manifest and the freshly computed
1001/// `ShadowRunReport`. The returned report is suitable for cloud import or
1002/// for asserting determinism in CI.
1003pub fn shadow_replay_bundle(
1004    bundle_dir: &Path,
1005) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
1006    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
1007    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
1008    let bytes = std::fs::read(&report_path).map_err(|error| {
1009        VmError::Runtime(format!(
1010            "failed to read bundle report {}: {error}",
1011            report_path.display()
1012        ))
1013    })?;
1014    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
1015        VmError::Runtime(format!(
1016            "failed to decode bundle report {}: {error}",
1017            report_path.display()
1018        ))
1019    })?;
1020    let candidate = report
1021        .selected_candidate_id
1022        .as_deref()
1023        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
1024        .ok_or_else(|| {
1025            VmError::Runtime(format!(
1026                "bundle {} has no selected candidate to replay",
1027                bundle_dir.display()
1028            ))
1029        })?;
1030    let shadow = shadow_candidate(candidate, &traces);
1031    Ok((manifest, shadow))
1032}
1033
1034fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
1035    crate::atomic_io::atomic_write(path, bytes)
1036        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
1037}
1038
1039fn sanitize_fixture_name(raw: &str) -> String {
1040    let cleaned = raw
1041        .chars()
1042        .map(|ch| {
1043            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
1044                ch
1045            } else {
1046                '_'
1047            }
1048        })
1049        .collect::<String>();
1050    if cleaned.trim_matches('_').is_empty() {
1051        "trace".to_string()
1052    } else {
1053        cleaned.trim_matches('_').to_string()
1054    }
1055}
1056
1057fn sanitize_external_key(raw: &str) -> String {
1058    let mut out = String::new();
1059    let mut prev_dash = false;
1060    for ch in raw.chars() {
1061        let lowered = ch.to_ascii_lowercase();
1062        if lowered.is_ascii_alphanumeric() {
1063            out.push(lowered);
1064            prev_dash = false;
1065        } else if !prev_dash && !out.is_empty() {
1066            out.push('-');
1067            prev_dash = true;
1068        }
1069    }
1070    let trimmed = out.trim_matches('-').to_string();
1071    if trimmed.is_empty() {
1072        "crystallized-workflow".to_string()
1073    } else {
1074        trimmed
1075    }
1076}
1077
1078fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
1079    if let Some(candidate) = candidate {
1080        format!(
1081            "{} ({} step{})",
1082            candidate.name,
1083            candidate.steps.len(),
1084            if candidate.steps.len() == 1 { "" } else { "s" }
1085        )
1086    } else {
1087        format!("rejected: {fallback_name}")
1088    }
1089}
1090
1091fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
1092    let Some(candidate) = candidate else {
1093        return "high".to_string();
1094    };
1095    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
1096    let needs_secret = !candidate.required_secrets.is_empty();
1097    if touches_external && needs_secret {
1098        "high".to_string()
1099    } else if touches_external || needs_secret {
1100        "medium".to_string()
1101    } else {
1102        "low".to_string()
1103    }
1104}
1105
1106fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
1107    let kind = effect.kind.to_ascii_lowercase();
1108    if kind.is_empty() {
1109        return false;
1110    }
1111    // Plan-only side effects stay inside Harn's own data plane: they
1112    // write receipts, append to the in-process event log, or stash plans.
1113    // None of those touch tenant-external systems.
1114    let internal = kind.contains("receipt")
1115        || kind.contains("event_log")
1116        || kind.contains("memo")
1117        || kind.contains("plan");
1118    if internal {
1119        return false;
1120    }
1121    kind.contains("post")
1122        || kind.contains("write")
1123        || kind.contains("publish")
1124        || kind.contains("delete")
1125        || kind.contains("send")
1126}
1127
1128fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
1129    if candidate.steps.is_empty() {
1130        return false;
1131    }
1132    candidate.side_effects.iter().all(|effect| {
1133        let kind = effect.kind.to_ascii_lowercase();
1134        // Plan-only side effects stay inside Harn's own data plane: receipt
1135        // writes, in-memory event-log appends, file-only mutations, etc.
1136        kind.is_empty()
1137            || kind.contains("receipt")
1138            || kind.contains("event_log")
1139            || kind.contains("memo")
1140            || kind.contains("plan")
1141            || (kind.contains("file") && !kind.contains("publish"))
1142    })
1143}
1144
1145pub(super) fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
1146    let policy = RedactionPolicy::default();
1147    for action in &mut trace.actions {
1148        policy.redact_json_in_place(&mut action.inputs);
1149        if let Some(output) = action.output.as_mut() {
1150            policy.redact_json_in_place(output);
1151        }
1152        if let Some(observed) = action.observed_output.as_mut() {
1153            policy.redact_json_in_place(observed);
1154        }
1155        redact_bundle_map(&mut action.parameters, &policy);
1156        redact_bundle_map(&mut action.metadata, &policy);
1157    }
1158    redact_bundle_map(&mut trace.metadata, &policy);
1159    if let Some(run) = trace.replay_run.as_mut() {
1160        redact_replay_run_for_bundle(run, &policy);
1161    }
1162}
1163
1164fn redact_bundle_map(map: &mut BTreeMap<String, JsonValue>, policy: &RedactionPolicy) {
1165    for (key, value) in map {
1166        if policy.field_is_sensitive(key) {
1167            *value = JsonValue::String(REDACTED_PLACEHOLDER.to_string());
1168        } else {
1169            policy.redact_json_in_place(value);
1170        }
1171    }
1172}
1173
1174fn redact_replay_run_for_bundle(run: &mut ReplayTraceRun, policy: &RedactionPolicy) {
1175    for value in run
1176        .event_log_entries
1177        .iter_mut()
1178        .chain(run.trigger_firings.iter_mut())
1179        .chain(run.llm_interactions.iter_mut())
1180        .chain(run.protocol_interactions.iter_mut())
1181        .chain(run.approval_interactions.iter_mut())
1182        .chain(run.effect_receipts.iter_mut())
1183        .chain(run.agent_transcript_deltas.iter_mut())
1184        .chain(run.final_artifacts.iter_mut())
1185        .chain(run.policy_decisions.iter_mut())
1186    {
1187        policy.redact_json_in_place(value);
1188    }
1189}
1190
1191fn secret_id_looks_logical(value: &str) -> bool {
1192    let trimmed = value.trim();
1193    !trimmed.is_empty() && !RedactionPolicy::default().looks_like_secret_value(trimmed)
1194}