Skip to main content

harn_vm/orchestration/crystallize/
bundle.rs

1//! Crystallization bundle: types, build/write/load/validate, shadow replay, and redaction helpers.
2
3use std::collections::BTreeSet;
4use std::path::{Component, Path, PathBuf};
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value as JsonValue;
8
9use super::super::{now_rfc3339, ReplayTraceRun};
10use super::api::load_crystallization_trace;
11use super::shadow::{find_sequence_start, shadow_candidate};
12use super::types::{
13    CrystallizationApproval, CrystallizationArtifacts, CrystallizationReport,
14    CrystallizationSideEffect, CrystallizationTrace, PromotionApprovalRecord, PromotionCriteria,
15    PromotionDivergenceRecord, SavingsEstimate, SegmentKind, ShadowRunReport,
16    SkillInductionGateReceipt, WorkflowCandidate, WorkflowCandidateStep, BUNDLE_EVAL_PACK_FILE,
17    BUNDLE_FIXTURES_DIR, BUNDLE_MANIFEST_FILE, BUNDLE_REPORT_FILE, BUNDLE_SCHEMA,
18    BUNDLE_SCHEMA_VERSION, BUNDLE_SKILL_DIR, BUNDLE_SKILL_FILE, BUNDLE_SKILL_GATE_FILE,
19    BUNDLE_WORKFLOW_FILE, DEFAULT_ROLLOUT_POLICY, SKILL_GATE_RECEIPT_SCHEMA,
20};
21use crate::skills::{parse_frontmatter, split_frontmatter};
22use crate::value::VmError;
23
24// ===== Crystallization bundle =====
25//
26// A bundle is a directory layout that Harn writes and Harn Cloud (or any
27// other importer) reads without bespoke glue. The contract is:
28//
29//   bundle/
30//     candidate.json        # versioned manifest documented below
31//     workflow.harn         # generated/reviewable workflow code
32//     report.json           # full mining/shadow/eval report
33//     harn.eval.toml        # generated eval pack when available (optional)
34//     fixtures/             # redacted replay fixtures referenced by the
35//                           # report (optional, only when --bundle is used
36//                           # with `harn crystallize` and traces were on disk)
37//
38// `candidate.json` is the authoritative manifest. It must include the
39// `schema` and `schema_version` markers. Cloud importers MUST reject any
40// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
41// or whose `schema_version` is greater than the highest version they
42// understand. Only the documented additive fields may be added without
43// bumping `schema_version`.
44
45#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
46#[serde(default)]
47pub struct BundleGenerator {
48    pub tool: String,
49    pub version: String,
50}
51
52impl Default for BundleGenerator {
53    fn default() -> Self {
54        Self {
55            tool: "harn".to_string(),
56            version: env!("CARGO_PKG_VERSION").to_string(),
57        }
58    }
59}
60
61#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
62#[serde(default)]
63pub struct BundleWorkflowRef {
64    /// Relative path inside the bundle directory.
65    pub path: String,
66    /// Short identifier used in `pipeline NAME(...)`.
67    pub name: String,
68    /// Logical package name promotion uses to register the workflow.
69    pub package_name: String,
70    /// Initial workflow version proposed for promotion.
71    pub package_version: String,
72}
73
74#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
75#[serde(default)]
76pub struct BundleSourceTrace {
77    pub trace_id: String,
78    pub source_hash: String,
79    /// Optional human-visible URL (PR, issue, run record path) for the
80    /// trace. `None` when the trace was loaded from an in-memory store.
81    pub source_url: Option<String>,
82    /// Optional cloud-side receipt id when the trace was already promoted
83    /// into a tenant receipt. Cloud importers use this to wire candidate
84    /// evidence to existing receipts without round-tripping the raw payload.
85    pub source_receipt_id: Option<String>,
86    /// Relative path of the redacted fixture inside the bundle, if one
87    /// was emitted.
88    pub fixture_path: Option<String>,
89}
90
91#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
92#[serde(default)]
93pub struct BundleStep {
94    pub index: usize,
95    pub kind: String,
96    pub name: String,
97    pub segment: SegmentKind,
98    pub parameter_refs: Vec<String>,
99    pub side_effects: Vec<CrystallizationSideEffect>,
100    pub capabilities: Vec<String>,
101    pub required_secrets: Vec<String>,
102    pub approval: Option<CrystallizationApproval>,
103    pub review_notes: Vec<String>,
104}
105
106impl BundleStep {
107    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
108        Self {
109            index: step.index,
110            kind: step.kind.clone(),
111            name: step.name.clone(),
112            segment: step.segment.clone(),
113            parameter_refs: step.parameter_refs.clone(),
114            side_effects: step.side_effects.clone(),
115            capabilities: step.capabilities.clone(),
116            required_secrets: step.required_secrets.clone(),
117            approval: step.approval.clone(),
118            review_notes: step.review_notes.clone(),
119        }
120    }
121}
122
123#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
124#[serde(default)]
125pub struct BundleEvalPackRef {
126    /// Relative path of the eval pack inside the bundle directory.
127    pub path: String,
128    /// Optional external link the eval pack also lives at (e.g. a hosted
129    /// `eval-pack://` URI when the bundle was promoted into a tenant).
130    pub link: Option<String>,
131}
132
133#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
134#[serde(default)]
135pub struct BundleSkillRef {
136    /// Relative path to the generated `SKILL.md` inside the bundle.
137    pub path: String,
138    /// Relative path to the replay/held-out gate receipt for the skill.
139    pub gate_receipt_path: String,
140    pub name: String,
141    pub skill_candidate_id: String,
142    pub workflow_candidate_id: String,
143}
144
145#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
146#[serde(default)]
147pub struct BundleFixtureRef {
148    pub path: String,
149    pub trace_id: String,
150    pub source_hash: String,
151    pub redacted: bool,
152}
153
154#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
155#[serde(default)]
156pub struct BundlePromotion {
157    pub owner: Option<String>,
158    pub approver: Option<String>,
159    pub author: Option<String>,
160    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
161    /// surfaces may extend this enum but must keep existing values stable.
162    pub rollout_policy: String,
163    pub rollback_target: Option<String>,
164    pub created_at: String,
165    pub workflow_version: String,
166    pub package_name: String,
167    pub sample_count: usize,
168    pub confidence: f64,
169    pub shadow_success_count: usize,
170    pub shadow_failure_count: usize,
171    pub divergence_history: Vec<PromotionDivergenceRecord>,
172    pub approval_history: Vec<PromotionApprovalRecord>,
173    pub criteria: PromotionCriteria,
174    pub estimated_time_token_savings: SavingsEstimate,
175}
176
177impl Default for BundlePromotion {
178    fn default() -> Self {
179        Self {
180            owner: None,
181            approver: None,
182            author: None,
183            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
184            rollback_target: None,
185            created_at: String::new(),
186            workflow_version: String::new(),
187            package_name: String::new(),
188            sample_count: 0,
189            confidence: 0.0,
190            shadow_success_count: 0,
191            shadow_failure_count: 0,
192            divergence_history: Vec::new(),
193            approval_history: Vec::new(),
194            criteria: PromotionCriteria::default(),
195            estimated_time_token_savings: SavingsEstimate::default(),
196        }
197    }
198}
199
200#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
201#[serde(default)]
202pub struct BundleRedactionSummary {
203    pub applied: bool,
204    pub rules: Vec<String>,
205    pub summary: String,
206    /// Number of fixture files copied into the bundle (0 when no fixture
207    /// directory was emitted).
208    pub fixture_count: usize,
209}
210
211#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
212#[serde(default)]
213pub struct CrystallizationBundleManifest {
214    pub schema: String,
215    pub schema_version: u32,
216    pub generated_at: String,
217    pub generator: BundleGenerator,
218    pub kind: BundleKind,
219    pub candidate_id: String,
220    pub external_key: String,
221    pub title: String,
222    pub team: Option<String>,
223    pub repo: Option<String>,
224    pub risk_level: String,
225    pub workflow: BundleWorkflowRef,
226    pub source_trace_hashes: Vec<String>,
227    pub source_traces: Vec<BundleSourceTrace>,
228    pub deterministic_steps: Vec<BundleStep>,
229    pub fuzzy_steps: Vec<BundleStep>,
230    pub side_effects: Vec<CrystallizationSideEffect>,
231    pub capabilities: Vec<String>,
232    pub required_secrets: Vec<String>,
233    pub savings: SavingsEstimate,
234    pub shadow: ShadowRunReport,
235    pub eval_pack: Option<BundleEvalPackRef>,
236    pub skill: Option<BundleSkillRef>,
237    pub fixtures: Vec<BundleFixtureRef>,
238    pub promotion: BundlePromotion,
239    pub redaction: BundleRedactionSummary,
240    pub confidence: f64,
241    pub rejection_reasons: Vec<String>,
242    pub warnings: Vec<String>,
243}
244
245#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
246#[serde(rename_all = "snake_case")]
247pub enum BundleKind {
248    /// A normal candidate that passed shadow comparison and is ready for
249    /// review and promotion.
250    #[default]
251    Candidate,
252    /// A "plan-only" candidate: every step has a side-effect-free, in-process
253    /// outcome (e.g. classify and write a receipt). Cloud importers can
254    /// promote these without explicit external-side-effect approval.
255    PlanOnly,
256    /// No safe candidate was selected. The bundle still records what was
257    /// attempted, the rejection reasons, and any rejected candidates so
258    /// reviewers can debug or feed it back into mining.
259    Rejected,
260}
261
262#[derive(Clone, Debug, Default)]
263pub struct BundleOptions {
264    /// Stable identifier downstream cloud importers use to dedupe bundles
265    /// across runs (defaults to a sanitized workflow name).
266    pub external_key: Option<String>,
267    pub title: Option<String>,
268    pub team: Option<String>,
269    pub repo: Option<String>,
270    pub risk_level: Option<String>,
271    pub rollout_policy: Option<String>,
272}
273
274#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
275#[serde(default)]
276pub struct CrystallizationBundle {
277    pub manifest: CrystallizationBundleManifest,
278    pub report: CrystallizationReport,
279    pub harn_code: String,
280    pub eval_pack_toml: String,
281    pub skill_markdown: String,
282    pub skill_gate_receipt_json: String,
283    pub fixtures: Vec<CrystallizationTrace>,
284}
285
286/// Errors surfaced when validating a bundle on disk.
287#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
288#[serde(default)]
289pub struct BundleValidation {
290    pub bundle_dir: String,
291    pub schema: String,
292    pub schema_version: u32,
293    pub kind: BundleKind,
294    pub candidate_id: String,
295    pub manifest_ok: bool,
296    pub workflow_ok: bool,
297    pub report_ok: bool,
298    pub eval_pack_ok: bool,
299    pub skill_ok: bool,
300    pub fixtures_ok: bool,
301    pub redaction_ok: bool,
302    pub problems: Vec<String>,
303}
304
305impl BundleValidation {
306    pub fn is_ok(&self) -> bool {
307        self.problems.is_empty()
308    }
309}
310
311/// Build an in-memory bundle from already-mined artifacts. The traces
312/// passed here are the same normalized traces used to mine the candidate;
313/// they will be redacted before being attached as fixtures.
314pub fn build_crystallization_bundle(
315    artifacts: CrystallizationArtifacts,
316    traces: &[CrystallizationTrace],
317    options: BundleOptions,
318) -> Result<CrystallizationBundle, VmError> {
319    let CrystallizationArtifacts {
320        report,
321        harn_code,
322        eval_pack_toml,
323    } = artifacts;
324
325    let (selected, kind) = match report
326        .selected_candidate_id
327        .as_deref()
328        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
329    {
330        Some(candidate) => {
331            let kind = if candidate_is_plan_only(candidate) {
332                BundleKind::PlanOnly
333            } else {
334                BundleKind::Candidate
335            };
336            (Some(candidate), kind)
337        }
338        None => (None, BundleKind::Rejected),
339    };
340
341    let workflow_name = selected
342        .map(|candidate| candidate.name.clone())
343        .unwrap_or_else(|| "crystallized_workflow".to_string());
344    let package_name = selected
345        .map(|candidate| candidate.promotion.package_name.clone())
346        .unwrap_or_else(|| workflow_name.replace('_', "-"));
347    let workflow_version = selected
348        .map(|candidate| candidate.promotion.version.clone())
349        .unwrap_or_else(|| "0.0.0".to_string());
350
351    let manifest_workflow = BundleWorkflowRef {
352        path: BUNDLE_WORKFLOW_FILE.to_string(),
353        name: workflow_name.clone(),
354        package_name: package_name.clone(),
355        package_version: workflow_version.clone(),
356    };
357
358    let external_key = options
359        .external_key
360        .clone()
361        .filter(|key| !key.trim().is_empty())
362        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
363    let title = options
364        .title
365        .clone()
366        .filter(|title| !title.trim().is_empty())
367        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
368    let risk_level = options
369        .risk_level
370        .clone()
371        .filter(|risk| !risk.trim().is_empty())
372        .unwrap_or_else(|| infer_risk_level(selected));
373    let rollout_policy = options
374        .rollout_policy
375        .clone()
376        .filter(|policy| !policy.trim().is_empty())
377        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
378
379    let (deterministic_steps, fuzzy_steps) = match selected {
380        Some(candidate) => candidate
381            .steps
382            .iter()
383            .map(BundleStep::from_candidate_step)
384            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
385        None => (Vec::new(), Vec::new()),
386    };
387
388    let source_trace_hashes = selected
389        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
390        .unwrap_or_default();
391
392    let mut source_traces = Vec::new();
393    let mut fixture_refs = Vec::new();
394    let mut fixture_payloads = Vec::new();
395    if let Some(candidate) = selected {
396        let mut fixture_trace_ids = BTreeSet::new();
397        for example in &candidate.examples {
398            fixture_trace_ids.insert(example.trace_id.clone());
399        }
400        for trace in traces {
401            if find_sequence_start(trace, &candidate.sequence_signature).is_some() {
402                fixture_trace_ids.insert(trace.id.clone());
403            }
404        }
405        for trace_id in fixture_trace_ids {
406            let trace = traces.iter().find(|trace| trace.id == trace_id);
407            let source_hash = trace
408                .and_then(|trace| trace.source_hash.clone())
409                .or_else(|| {
410                    candidate
411                        .examples
412                        .iter()
413                        .find(|example| example.trace_id == trace_id)
414                        .map(|example| example.source_hash.clone())
415                })
416                .unwrap_or_default();
417            let fixture_relative = trace.map(|trace| {
418                format!(
419                    "{BUNDLE_FIXTURES_DIR}/{}.json",
420                    sanitize_fixture_name(&trace.id)
421                )
422            });
423            source_traces.push(BundleSourceTrace {
424                trace_id: trace_id.clone(),
425                source_hash: source_hash.clone(),
426                source_url: trace.and_then(|trace| trace.source.clone()),
427                source_receipt_id: trace
428                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
429                    .and_then(|value| value.as_str().map(str::to_string)),
430                fixture_path: fixture_relative.clone(),
431            });
432            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
433                let mut redacted = trace.clone();
434                redact_trace_for_bundle(&mut redacted);
435                fixture_refs.push(BundleFixtureRef {
436                    path: fixture_path,
437                    trace_id: trace.id.clone(),
438                    source_hash,
439                    redacted: true,
440                });
441                fixture_payloads.push(redacted);
442            }
443        }
444    }
445
446    // Owner defaults to author so cloud importers always have a populated
447    // ownership pointer, but stays separate from `author` so reviewers can
448    // assign a different owner in the manifest before promotion.
449    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
450    let promotion = BundlePromotion {
451        owner: author.clone(),
452        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
453        author,
454        rollout_policy,
455        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
456        created_at: now_rfc3339(),
457        workflow_version,
458        package_name,
459        sample_count: selected
460            .map(|candidate| candidate.promotion.sample_count)
461            .unwrap_or_default(),
462        confidence: selected
463            .map(|candidate| candidate.promotion.confidence)
464            .unwrap_or_default(),
465        shadow_success_count: selected
466            .map(|candidate| candidate.promotion.shadow_success_count)
467            .unwrap_or_default(),
468        shadow_failure_count: selected
469            .map(|candidate| candidate.promotion.shadow_failure_count)
470            .unwrap_or_default(),
471        divergence_history: selected
472            .map(|candidate| candidate.promotion.divergence_history.clone())
473            .unwrap_or_default(),
474        approval_history: selected
475            .map(|candidate| candidate.promotion.approval_history.clone())
476            .unwrap_or_default(),
477        criteria: selected
478            .map(|candidate| candidate.promotion.criteria.clone())
479            .unwrap_or_default(),
480        estimated_time_token_savings: selected
481            .map(|candidate| candidate.promotion.estimated_time_token_savings.clone())
482            .unwrap_or_default(),
483    };
484
485    let redaction = BundleRedactionSummary {
486        applied: !fixture_payloads.is_empty(),
487        rules: vec![
488            "sensitive_keys".to_string(),
489            "secret_value_heuristic".to_string(),
490        ],
491        summary: if fixture_payloads.is_empty() {
492            "no fixtures emitted".to_string()
493        } else {
494            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
495                .to_string()
496        },
497        fixture_count: fixture_payloads.len(),
498    };
499
500    let eval_pack = if eval_pack_toml.trim().is_empty() {
501        None
502    } else {
503        Some(BundleEvalPackRef {
504            path: BUNDLE_EVAL_PACK_FILE.to_string(),
505            link: selected
506                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
507                .filter(|link| !link.trim().is_empty()),
508        })
509    };
510    let selected_skill = selected.and_then(|candidate| {
511        report
512            .skill_candidates
513            .iter()
514            .find(|skill| skill.workflow_candidate_id == candidate.id)
515    });
516    let skill = selected_skill.map(|skill| BundleSkillRef {
517        path: format!("{BUNDLE_SKILL_DIR}/{BUNDLE_SKILL_FILE}"),
518        gate_receipt_path: format!("{BUNDLE_SKILL_DIR}/{BUNDLE_SKILL_GATE_FILE}"),
519        name: skill.name.clone(),
520        skill_candidate_id: skill.id.clone(),
521        workflow_candidate_id: skill.workflow_candidate_id.clone(),
522    });
523    let skill_markdown = selected_skill
524        .map(|skill| skill.skill_markdown.clone())
525        .unwrap_or_default();
526    let skill_gate_receipt_json = selected_skill
527        .and_then(|skill| serde_json::to_string_pretty(&skill.replay_gate.receipt).ok())
528        .unwrap_or_default();
529
530    let manifest = CrystallizationBundleManifest {
531        schema: BUNDLE_SCHEMA.to_string(),
532        schema_version: BUNDLE_SCHEMA_VERSION,
533        generated_at: now_rfc3339(),
534        generator: BundleGenerator::default(),
535        kind,
536        candidate_id: selected
537            .map(|candidate| candidate.id.clone())
538            .unwrap_or_default(),
539        external_key,
540        title,
541        team: options.team,
542        repo: options.repo,
543        risk_level,
544        workflow: manifest_workflow,
545        source_trace_hashes,
546        source_traces,
547        deterministic_steps,
548        fuzzy_steps,
549        side_effects: selected
550            .map(|candidate| candidate.side_effects.clone())
551            .unwrap_or_default(),
552        capabilities: selected
553            .map(|candidate| candidate.capabilities.clone())
554            .unwrap_or_default(),
555        required_secrets: selected
556            .map(|candidate| candidate.required_secrets.clone())
557            .unwrap_or_default(),
558        savings: selected
559            .map(|candidate| candidate.savings.clone())
560            .unwrap_or_default(),
561        shadow: selected
562            .map(|candidate| candidate.shadow.clone())
563            .unwrap_or_default(),
564        eval_pack,
565        skill,
566        fixtures: fixture_refs,
567        promotion,
568        redaction,
569        confidence: selected
570            .map(|candidate| candidate.confidence)
571            .unwrap_or(0.0),
572        rejection_reasons: report
573            .rejected_candidates
574            .iter()
575            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
576            .collect(),
577        warnings: report.warnings.clone(),
578    };
579
580    Ok(CrystallizationBundle {
581        manifest,
582        report,
583        harn_code,
584        eval_pack_toml,
585        skill_markdown,
586        skill_gate_receipt_json,
587        fixtures: fixture_payloads,
588    })
589}
590
591/// Write a bundle to a directory. Creates the directory if it does not
592/// already exist. Returns the manifest with `generated_at` and any
593/// runtime-resolved metadata filled in.
594pub fn write_crystallization_bundle(
595    bundle: &CrystallizationBundle,
596    bundle_dir: &Path,
597) -> Result<CrystallizationBundleManifest, VmError> {
598    std::fs::create_dir_all(bundle_dir).map_err(|error| {
599        VmError::Runtime(format!(
600            "failed to create bundle dir {}: {error}",
601            bundle_dir.display()
602        ))
603    })?;
604    write_bytes(
605        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
606        bundle.harn_code.as_bytes(),
607    )?;
608    let report_json = serde_json::to_vec_pretty(&bundle.report)
609        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
610    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
611
612    if !bundle.eval_pack_toml.trim().is_empty() {
613        write_bytes(
614            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
615            bundle.eval_pack_toml.as_bytes(),
616        )?;
617    }
618
619    if !bundle.skill_markdown.trim().is_empty() {
620        let skill_dir = bundle_dir.join(BUNDLE_SKILL_DIR);
621        std::fs::create_dir_all(&skill_dir).map_err(|error| {
622            VmError::Runtime(format!(
623                "failed to create skill dir {}: {error}",
624                skill_dir.display()
625            ))
626        })?;
627        write_bytes(
628            &skill_dir.join(BUNDLE_SKILL_FILE),
629            bundle.skill_markdown.as_bytes(),
630        )?;
631        if !bundle.skill_gate_receipt_json.trim().is_empty() {
632            write_bytes(
633                &skill_dir.join(BUNDLE_SKILL_GATE_FILE),
634                bundle.skill_gate_receipt_json.as_bytes(),
635            )?;
636        }
637    }
638
639    if !bundle.fixtures.is_empty() {
640        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
641        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
642            VmError::Runtime(format!(
643                "failed to create fixtures dir {}: {error}",
644                fixtures_dir.display()
645            ))
646        })?;
647        for trace in &bundle.fixtures {
648            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
649            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
650                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
651            })?;
652            write_bytes(&path, &payload)?;
653        }
654    }
655
656    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
657        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
658    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
659    Ok(bundle.manifest.clone())
660}
661
662/// Read a bundle manifest from disk. Verifies the schema marker but does
663/// not cross-check workflow/report/eval-pack sibling files; for a richer
664/// check use [`validate_crystallization_bundle`].
665pub fn load_crystallization_bundle_manifest(
666    bundle_dir: &Path,
667) -> Result<CrystallizationBundleManifest, VmError> {
668    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
669    let bytes = std::fs::read(&manifest_path).map_err(|error| {
670        VmError::Runtime(format!(
671            "failed to read bundle manifest {}: {error}",
672            manifest_path.display()
673        ))
674    })?;
675    let manifest: CrystallizationBundleManifest =
676        serde_json::from_slice(&bytes).map_err(|error| {
677            VmError::Runtime(format!(
678                "failed to decode bundle manifest {}: {error}",
679                manifest_path.display()
680            ))
681        })?;
682    if manifest.schema != BUNDLE_SCHEMA {
683        return Err(VmError::Runtime(format!(
684            "bundle {} has unrecognized schema {:?} (expected {})",
685            bundle_dir.display(),
686            manifest.schema,
687            BUNDLE_SCHEMA
688        )));
689    }
690    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
691        return Err(VmError::Runtime(format!(
692            "bundle {} schema_version {} is newer than supported {}",
693            bundle_dir.display(),
694            manifest.schema_version,
695            BUNDLE_SCHEMA_VERSION
696        )));
697    }
698    Ok(manifest)
699}
700
701fn resolve_bundle_manifest_path(
702    bundle_dir: &Path,
703    relative_path: &str,
704    label: &str,
705) -> Result<PathBuf, String> {
706    let path = Path::new(relative_path);
707    if relative_path.trim().is_empty()
708        || path.is_absolute()
709        || path.components().any(|component| {
710            matches!(
711                component,
712                Component::ParentDir | Component::Prefix(_) | Component::RootDir
713            )
714        })
715        || has_windows_rooted_or_drive_relative_prefix(relative_path)
716    {
717        return Err(format!(
718            "manifest {label} path {relative_path:?} must stay inside the bundle"
719        ));
720    }
721    Ok(bundle_dir.join(path))
722}
723
724fn has_windows_rooted_or_drive_relative_prefix(path: &str) -> bool {
725    let normalized = path.replace('\\', "/");
726    let bytes = normalized.as_bytes();
727    normalized.starts_with('/')
728        || (bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':')
729}
730
731/// Read every fixture trace referenced by the bundle manifest. Returns
732/// the manifest plus loaded traces, in the order they appear in the
733/// manifest. Fixtures with `path: None` are skipped.
734pub fn load_crystallization_bundle(
735    bundle_dir: &Path,
736) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
737    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
738    let mut traces = Vec::new();
739    for fixture in &manifest.fixtures {
740        let path = resolve_bundle_manifest_path(bundle_dir, &fixture.path, "fixture")
741            .map_err(VmError::Runtime)?;
742        traces.push(load_crystallization_trace(&path)?);
743    }
744    Ok((manifest, traces))
745}
746
747/// Validate a bundle directory layout and contents. Cheap enough to call
748/// from a CLI smoke command; performs no live side effects.
749pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
750    let mut validation = BundleValidation {
751        bundle_dir: bundle_dir.display().to_string(),
752        ..BundleValidation::default()
753    };
754    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
755        Ok(manifest) => manifest,
756        Err(error) => {
757            validation.problems.push(error.to_string());
758            return Ok(validation);
759        }
760    };
761    validation.manifest_ok = true;
762    validation.schema = manifest.schema.clone();
763    validation.schema_version = manifest.schema_version;
764    validation.kind = manifest.kind.clone();
765    validation.candidate_id = manifest.candidate_id.clone();
766
767    match resolve_bundle_manifest_path(bundle_dir, &manifest.workflow.path, "workflow") {
768        Ok(workflow_path) if workflow_path.exists() => {
769            validation.workflow_ok = true;
770        }
771        Ok(workflow_path) => {
772            validation
773                .problems
774                .push(format!("missing workflow file {}", workflow_path.display()));
775        }
776        Err(problem) => validation.problems.push(problem),
777    }
778
779    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
780    match std::fs::read(&report_path) {
781        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
782            Ok(report) => {
783                validation.report_ok = true;
784                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
785                    && manifest.candidate_id.is_empty()
786                {
787                    validation
788                        .problems
789                        .push("manifest is non-rejected but has empty candidate_id".to_string());
790                }
791                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
792                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
793                {
794                    validation.problems.push(format!(
795                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
796                        report.selected_candidate_id, manifest.candidate_id
797                    ));
798                }
799            }
800            Err(error) => {
801                validation
802                    .problems
803                    .push(format!("invalid report.json: {error}"));
804            }
805        },
806        Err(error) => {
807            validation.problems.push(format!(
808                "missing report file {}: {error}",
809                report_path.display()
810            ));
811        }
812    }
813
814    if let Some(eval_pack) = &manifest.eval_pack {
815        match resolve_bundle_manifest_path(bundle_dir, &eval_pack.path, "eval_pack") {
816            Ok(path) if path.exists() => {
817                validation.eval_pack_ok = true;
818            }
819            Ok(path) => {
820                validation.problems.push(format!(
821                    "manifest references eval pack {} but file is missing",
822                    path.display()
823                ));
824            }
825            Err(problem) => validation.problems.push(problem),
826        }
827    } else {
828        validation.eval_pack_ok = true;
829    }
830
831    if let Some(skill) = &manifest.skill {
832        let mut skill_problem = false;
833        match resolve_bundle_manifest_path(bundle_dir, &skill.path, "skill") {
834            Ok(path) if path.exists() => match std::fs::read_to_string(&path) {
835                Ok(source) => {
836                    let (frontmatter, _) = split_frontmatter(&source);
837                    match parse_frontmatter(frontmatter) {
838                        Ok(parsed) => {
839                            if parsed.manifest.name.trim().is_empty() {
840                                validation
841                                    .problems
842                                    .push("skill SKILL.md is missing frontmatter name".to_string());
843                                skill_problem = true;
844                            } else if parsed.manifest.name != skill.name {
845                                validation.problems.push(format!(
846                                    "skill SKILL.md name {} does not match manifest skill name {}",
847                                    parsed.manifest.name, skill.name
848                                ));
849                                skill_problem = true;
850                            }
851                            if parsed.manifest.short.trim().is_empty() {
852                                validation.problems.push(
853                                    "skill SKILL.md is missing required short card".to_string(),
854                                );
855                                skill_problem = true;
856                            }
857                        }
858                        Err(error) => {
859                            validation
860                                .problems
861                                .push(format!("invalid skill SKILL.md frontmatter: {error}"));
862                            skill_problem = true;
863                        }
864                    }
865                }
866                Err(error) => {
867                    validation.problems.push(format!(
868                        "failed to read skill file {}: {error}",
869                        path.display()
870                    ));
871                    skill_problem = true;
872                }
873            },
874            Ok(path) => {
875                validation.problems.push(format!(
876                    "manifest references skill {} but file is missing",
877                    path.display()
878                ));
879                skill_problem = true;
880            }
881            Err(problem) => {
882                validation.problems.push(problem);
883                skill_problem = true;
884            }
885        }
886        match resolve_bundle_manifest_path(bundle_dir, &skill.gate_receipt_path, "skill gate") {
887            Ok(path) if path.exists() => match std::fs::read_to_string(&path) {
888                Ok(source) => match serde_json::from_str::<SkillInductionGateReceipt>(&source) {
889                    Ok(receipt) => {
890                        if receipt.type_name != SKILL_GATE_RECEIPT_SCHEMA {
891                            validation.problems.push(format!(
892                                "skill gate receipt has unexpected type {}",
893                                receipt.type_name
894                            ));
895                            skill_problem = true;
896                        }
897                        if receipt.skill_candidate_id != skill.skill_candidate_id {
898                            validation.problems.push(format!(
899                                "skill gate receipt candidate id {} does not match manifest {}",
900                                receipt.skill_candidate_id, skill.skill_candidate_id
901                            ));
902                            skill_problem = true;
903                        }
904                        if receipt.workflow_candidate_id != skill.workflow_candidate_id {
905                            validation.problems.push(format!(
906                                "skill gate receipt workflow id {} does not match manifest {}",
907                                receipt.workflow_candidate_id, skill.workflow_candidate_id
908                            ));
909                            skill_problem = true;
910                        }
911                        if !receipt.accepted {
912                            validation
913                                .problems
914                                .push("skill gate receipt is not accepted".to_string());
915                            skill_problem = true;
916                        }
917                    }
918                    Err(error) => {
919                        validation
920                            .problems
921                            .push(format!("invalid skill gate receipt JSON: {error}"));
922                        skill_problem = true;
923                    }
924                },
925                Err(error) => {
926                    validation.problems.push(format!(
927                        "failed to read skill gate receipt {}: {error}",
928                        path.display()
929                    ));
930                    skill_problem = true;
931                }
932            },
933            Ok(path) => {
934                validation.problems.push(format!(
935                    "manifest references skill gate receipt {} but file is missing",
936                    path.display()
937                ));
938                skill_problem = true;
939            }
940            Err(problem) => {
941                validation.problems.push(problem);
942                skill_problem = true;
943            }
944        }
945        validation.skill_ok = !skill_problem;
946    } else {
947        validation.skill_ok = true;
948    }
949
950    let mut fixtures_problem = false;
951    for fixture in &manifest.fixtures {
952        let path = match resolve_bundle_manifest_path(bundle_dir, &fixture.path, "fixture") {
953            Ok(path) => path,
954            Err(problem) => {
955                validation.problems.push(problem);
956                fixtures_problem = true;
957                continue;
958            }
959        };
960        if !path.exists() {
961            validation
962                .problems
963                .push(format!("missing fixture {}", path.display()));
964            fixtures_problem = true;
965            continue;
966        }
967        if !fixture.redacted {
968            validation.problems.push(format!(
969                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
970                fixture.path
971            ));
972            fixtures_problem = true;
973        }
974    }
975    validation.fixtures_ok = !fixtures_problem;
976
977    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
978        validation
979            .problems
980            .push("redaction.applied is false but bundle includes fixtures".to_string());
981    } else {
982        validation.redaction_ok = true;
983    }
984    if !manifest
985        .required_secrets
986        .iter()
987        .all(|secret| secret_id_looks_logical(secret))
988    {
989        validation.problems.push(
990            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
991        );
992    }
993
994    Ok(validation)
995}
996
997/// Replay shadow comparison from a bundle: re-runs the deterministic
998/// shadow check in-process against the bundle's redacted fixtures, with
999/// no live side effects. Returns the manifest and the freshly computed
1000/// `ShadowRunReport`. The returned report is suitable for cloud import or
1001/// for asserting determinism in CI.
1002pub fn shadow_replay_bundle(
1003    bundle_dir: &Path,
1004) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
1005    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
1006    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
1007    let bytes = std::fs::read(&report_path).map_err(|error| {
1008        VmError::Runtime(format!(
1009            "failed to read bundle report {}: {error}",
1010            report_path.display()
1011        ))
1012    })?;
1013    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
1014        VmError::Runtime(format!(
1015            "failed to decode bundle report {}: {error}",
1016            report_path.display()
1017        ))
1018    })?;
1019    let candidate = report
1020        .selected_candidate_id
1021        .as_deref()
1022        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
1023        .ok_or_else(|| {
1024            VmError::Runtime(format!(
1025                "bundle {} has no selected candidate to replay",
1026                bundle_dir.display()
1027            ))
1028        })?;
1029    let shadow = shadow_candidate(candidate, &traces);
1030    Ok((manifest, shadow))
1031}
1032
1033fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
1034    crate::atomic_io::atomic_write(path, bytes)
1035        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
1036}
1037
1038fn sanitize_fixture_name(raw: &str) -> String {
1039    let cleaned = raw
1040        .chars()
1041        .map(|ch| {
1042            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
1043                ch
1044            } else {
1045                '_'
1046            }
1047        })
1048        .collect::<String>();
1049    if cleaned.trim_matches('_').is_empty() {
1050        "trace".to_string()
1051    } else {
1052        cleaned.trim_matches('_').to_string()
1053    }
1054}
1055
1056fn sanitize_external_key(raw: &str) -> String {
1057    let mut out = String::new();
1058    let mut prev_dash = false;
1059    for ch in raw.chars() {
1060        let lowered = ch.to_ascii_lowercase();
1061        if lowered.is_ascii_alphanumeric() {
1062            out.push(lowered);
1063            prev_dash = false;
1064        } else if !prev_dash && !out.is_empty() {
1065            out.push('-');
1066            prev_dash = true;
1067        }
1068    }
1069    let trimmed = out.trim_matches('-').to_string();
1070    if trimmed.is_empty() {
1071        "crystallized-workflow".to_string()
1072    } else {
1073        trimmed
1074    }
1075}
1076
1077fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
1078    if let Some(candidate) = candidate {
1079        format!(
1080            "{} ({} step{})",
1081            candidate.name,
1082            candidate.steps.len(),
1083            if candidate.steps.len() == 1 { "" } else { "s" }
1084        )
1085    } else {
1086        format!("rejected: {fallback_name}")
1087    }
1088}
1089
1090fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
1091    let Some(candidate) = candidate else {
1092        return "high".to_string();
1093    };
1094    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
1095    let needs_secret = !candidate.required_secrets.is_empty();
1096    if touches_external && needs_secret {
1097        "high".to_string()
1098    } else if touches_external || needs_secret {
1099        "medium".to_string()
1100    } else {
1101        "low".to_string()
1102    }
1103}
1104
1105fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
1106    let kind = effect.kind.to_ascii_lowercase();
1107    if kind.is_empty() {
1108        return false;
1109    }
1110    // Plan-only side effects stay inside Harn's own data plane: they
1111    // write receipts, append to the in-process event log, or stash plans.
1112    // None of those touch tenant-external systems.
1113    let internal = kind.contains("receipt")
1114        || kind.contains("event_log")
1115        || kind.contains("memo")
1116        || kind.contains("plan");
1117    if internal {
1118        return false;
1119    }
1120    kind.contains("post")
1121        || kind.contains("write")
1122        || kind.contains("publish")
1123        || kind.contains("delete")
1124        || kind.contains("send")
1125}
1126
1127fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
1128    if candidate.steps.is_empty() {
1129        return false;
1130    }
1131    candidate.side_effects.iter().all(|effect| {
1132        let kind = effect.kind.to_ascii_lowercase();
1133        // Plan-only side effects stay inside Harn's own data plane: receipt
1134        // writes, in-memory event-log appends, file-only mutations, etc.
1135        kind.is_empty()
1136            || kind.contains("receipt")
1137            || kind.contains("event_log")
1138            || kind.contains("memo")
1139            || kind.contains("plan")
1140            || (kind.contains("file") && !kind.contains("publish"))
1141    })
1142}
1143
1144pub(super) fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
1145    for action in &mut trace.actions {
1146        redact_bundle_value(&mut action.inputs);
1147        if let Some(output) = action.output.as_mut() {
1148            redact_bundle_value(output);
1149        }
1150        if let Some(observed) = action.observed_output.as_mut() {
1151            redact_bundle_value(observed);
1152        }
1153        for value in action.parameters.values_mut() {
1154            redact_bundle_value(value);
1155        }
1156        for (_, value) in action.metadata.iter_mut() {
1157            redact_bundle_value(value);
1158        }
1159    }
1160    for (_, value) in trace.metadata.iter_mut() {
1161        redact_bundle_value(value);
1162    }
1163    if let Some(run) = trace.replay_run.as_mut() {
1164        redact_replay_run_for_bundle(run);
1165    }
1166}
1167
1168fn redact_replay_run_for_bundle(run: &mut ReplayTraceRun) {
1169    for value in run
1170        .event_log_entries
1171        .iter_mut()
1172        .chain(run.trigger_firings.iter_mut())
1173        .chain(run.llm_interactions.iter_mut())
1174        .chain(run.protocol_interactions.iter_mut())
1175        .chain(run.approval_interactions.iter_mut())
1176        .chain(run.effect_receipts.iter_mut())
1177        .chain(run.agent_transcript_deltas.iter_mut())
1178        .chain(run.final_artifacts.iter_mut())
1179        .chain(run.policy_decisions.iter_mut())
1180    {
1181        redact_bundle_value(value);
1182    }
1183}
1184
1185fn redact_bundle_value(value: &mut JsonValue) {
1186    match value {
1187        JsonValue::String(text) if looks_like_secret_value(text) => {
1188            *text = "[redacted]".to_string();
1189        }
1190        JsonValue::Array(items) => {
1191            for item in items {
1192                redact_bundle_value(item);
1193            }
1194        }
1195        JsonValue::Object(map) => {
1196            for (key, child) in map.iter_mut() {
1197                if is_sensitive_bundle_key(key) {
1198                    *child = JsonValue::String("[redacted]".to_string());
1199                } else {
1200                    redact_bundle_value(child);
1201                }
1202            }
1203        }
1204        _ => {}
1205    }
1206}
1207
1208fn is_sensitive_bundle_key(key: &str) -> bool {
1209    let lower = key.to_ascii_lowercase();
1210    lower.contains("secret")
1211        || lower.contains("token")
1212        || lower.contains("password")
1213        || lower.contains("api_key")
1214        || lower.contains("apikey")
1215        || lower == "authorization"
1216        || lower == "cookie"
1217        || lower == "set-cookie"
1218}
1219
1220fn looks_like_secret_value(value: &str) -> bool {
1221    let trimmed = value.trim();
1222    trimmed.starts_with("sk-")
1223        || trimmed.starts_with("ghp_")
1224        || trimmed.starts_with("ghs_")
1225        || trimmed.starts_with("xoxb-")
1226        || trimmed.starts_with("xoxp-")
1227        || trimmed.starts_with("AKIA")
1228        || (trimmed.len() > 48
1229            && trimmed
1230                .chars()
1231                .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
1232}
1233
1234fn secret_id_looks_logical(value: &str) -> bool {
1235    !looks_like_secret_value(value) && !value.trim().is_empty()
1236}