Skip to main content

harn_vm/orchestration/crystallize/
bundle.rs

1//! Crystallization bundle: types, build/write/load/validate, shadow replay, and redaction helpers.
2
3use std::collections::BTreeSet;
4use std::path::Path;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value as JsonValue;
8
9use super::super::{now_rfc3339, ReplayTraceRun};
10use super::api::load_crystallization_trace;
11use super::shadow::{find_sequence_start, shadow_candidate};
12use super::types::{
13    CrystallizationApproval, CrystallizationArtifacts, CrystallizationReport,
14    CrystallizationSideEffect, CrystallizationTrace, PromotionApprovalRecord, PromotionCriteria,
15    PromotionDivergenceRecord, SavingsEstimate, SegmentKind, ShadowRunReport, WorkflowCandidate,
16    WorkflowCandidateStep, BUNDLE_EVAL_PACK_FILE, BUNDLE_FIXTURES_DIR, BUNDLE_MANIFEST_FILE,
17    BUNDLE_REPORT_FILE, BUNDLE_SCHEMA, BUNDLE_SCHEMA_VERSION, BUNDLE_WORKFLOW_FILE,
18    DEFAULT_ROLLOUT_POLICY,
19};
20use crate::value::VmError;
21
22// ===== Crystallization bundle =====
23//
24// A bundle is a directory layout that Harn writes and Harn Cloud (or any
25// other importer) reads without bespoke glue. The contract is:
26//
27//   bundle/
28//     candidate.json        # versioned manifest documented below
29//     workflow.harn         # generated/reviewable workflow code
30//     report.json           # full mining/shadow/eval report
31//     harn.eval.toml        # generated eval pack when available (optional)
32//     fixtures/             # redacted replay fixtures referenced by the
33//                           # report (optional, only when --bundle is used
34//                           # with `harn crystallize` and traces were on disk)
35//
36// `candidate.json` is the authoritative manifest. It must include the
37// `schema` and `schema_version` markers. Cloud importers MUST reject any
38// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
39// or whose `schema_version` is greater than the highest version they
40// understand. Only the documented additive fields may be added without
41// bumping `schema_version`.
42
43#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
44#[serde(default)]
45pub struct BundleGenerator {
46    pub tool: String,
47    pub version: String,
48}
49
50impl Default for BundleGenerator {
51    fn default() -> Self {
52        Self {
53            tool: "harn".to_string(),
54            version: env!("CARGO_PKG_VERSION").to_string(),
55        }
56    }
57}
58
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
60#[serde(default)]
61pub struct BundleWorkflowRef {
62    /// Relative path inside the bundle directory.
63    pub path: String,
64    /// Short identifier used in `pipeline NAME(...)`.
65    pub name: String,
66    /// Logical package name promotion uses to register the workflow.
67    pub package_name: String,
68    /// Initial workflow version proposed for promotion.
69    pub package_version: String,
70}
71
72#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
73#[serde(default)]
74pub struct BundleSourceTrace {
75    pub trace_id: String,
76    pub source_hash: String,
77    /// Optional human-visible URL (PR, issue, run record path) for the
78    /// trace. `None` when the trace was loaded from an in-memory store.
79    pub source_url: Option<String>,
80    /// Optional cloud-side receipt id when the trace was already promoted
81    /// into a tenant receipt. Cloud importers use this to wire candidate
82    /// evidence to existing receipts without round-tripping the raw payload.
83    pub source_receipt_id: Option<String>,
84    /// Relative path of the redacted fixture inside the bundle, if one
85    /// was emitted.
86    pub fixture_path: Option<String>,
87}
88
89#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
90#[serde(default)]
91pub struct BundleStep {
92    pub index: usize,
93    pub kind: String,
94    pub name: String,
95    pub segment: SegmentKind,
96    pub parameter_refs: Vec<String>,
97    pub side_effects: Vec<CrystallizationSideEffect>,
98    pub capabilities: Vec<String>,
99    pub required_secrets: Vec<String>,
100    pub approval: Option<CrystallizationApproval>,
101    pub review_notes: Vec<String>,
102}
103
104impl BundleStep {
105    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
106        Self {
107            index: step.index,
108            kind: step.kind.clone(),
109            name: step.name.clone(),
110            segment: step.segment.clone(),
111            parameter_refs: step.parameter_refs.clone(),
112            side_effects: step.side_effects.clone(),
113            capabilities: step.capabilities.clone(),
114            required_secrets: step.required_secrets.clone(),
115            approval: step.approval.clone(),
116            review_notes: step.review_notes.clone(),
117        }
118    }
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
122#[serde(default)]
123pub struct BundleEvalPackRef {
124    /// Relative path of the eval pack inside the bundle directory.
125    pub path: String,
126    /// Optional external link the eval pack also lives at (e.g. a hosted
127    /// `eval-pack://` URI when the bundle was promoted into a tenant).
128    pub link: Option<String>,
129}
130
131#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
132#[serde(default)]
133pub struct BundleFixtureRef {
134    pub path: String,
135    pub trace_id: String,
136    pub source_hash: String,
137    pub redacted: bool,
138}
139
140#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
141#[serde(default)]
142pub struct BundlePromotion {
143    pub owner: Option<String>,
144    pub approver: Option<String>,
145    pub author: Option<String>,
146    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
147    /// surfaces may extend this enum but must keep existing values stable.
148    pub rollout_policy: String,
149    pub rollback_target: Option<String>,
150    pub created_at: String,
151    pub workflow_version: String,
152    pub package_name: String,
153    pub sample_count: usize,
154    pub confidence: f64,
155    pub shadow_success_count: usize,
156    pub shadow_failure_count: usize,
157    pub divergence_history: Vec<PromotionDivergenceRecord>,
158    pub approval_history: Vec<PromotionApprovalRecord>,
159    pub criteria: PromotionCriteria,
160    pub estimated_time_token_savings: SavingsEstimate,
161}
162
163impl Default for BundlePromotion {
164    fn default() -> Self {
165        Self {
166            owner: None,
167            approver: None,
168            author: None,
169            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
170            rollback_target: None,
171            created_at: String::new(),
172            workflow_version: String::new(),
173            package_name: String::new(),
174            sample_count: 0,
175            confidence: 0.0,
176            shadow_success_count: 0,
177            shadow_failure_count: 0,
178            divergence_history: Vec::new(),
179            approval_history: Vec::new(),
180            criteria: PromotionCriteria::default(),
181            estimated_time_token_savings: SavingsEstimate::default(),
182        }
183    }
184}
185
186#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
187#[serde(default)]
188pub struct BundleRedactionSummary {
189    pub applied: bool,
190    pub rules: Vec<String>,
191    pub summary: String,
192    /// Number of fixture files copied into the bundle (0 when no fixture
193    /// directory was emitted).
194    pub fixture_count: usize,
195}
196
197#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
198#[serde(default)]
199pub struct CrystallizationBundleManifest {
200    pub schema: String,
201    pub schema_version: u32,
202    pub generated_at: String,
203    pub generator: BundleGenerator,
204    pub kind: BundleKind,
205    pub candidate_id: String,
206    pub external_key: String,
207    pub title: String,
208    pub team: Option<String>,
209    pub repo: Option<String>,
210    pub risk_level: String,
211    pub workflow: BundleWorkflowRef,
212    pub source_trace_hashes: Vec<String>,
213    pub source_traces: Vec<BundleSourceTrace>,
214    pub deterministic_steps: Vec<BundleStep>,
215    pub fuzzy_steps: Vec<BundleStep>,
216    pub side_effects: Vec<CrystallizationSideEffect>,
217    pub capabilities: Vec<String>,
218    pub required_secrets: Vec<String>,
219    pub savings: SavingsEstimate,
220    pub shadow: ShadowRunReport,
221    pub eval_pack: Option<BundleEvalPackRef>,
222    pub fixtures: Vec<BundleFixtureRef>,
223    pub promotion: BundlePromotion,
224    pub redaction: BundleRedactionSummary,
225    pub confidence: f64,
226    pub rejection_reasons: Vec<String>,
227    pub warnings: Vec<String>,
228}
229
230#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
231#[serde(rename_all = "snake_case")]
232pub enum BundleKind {
233    /// A normal candidate that passed shadow comparison and is ready for
234    /// review and promotion.
235    #[default]
236    Candidate,
237    /// A "plan-only" candidate: every step has a side-effect-free, in-process
238    /// outcome (e.g. classify and write a receipt). Cloud importers can
239    /// promote these without explicit external-side-effect approval.
240    PlanOnly,
241    /// No safe candidate was selected. The bundle still records what was
242    /// attempted, the rejection reasons, and any rejected candidates so
243    /// reviewers can debug or feed it back into mining.
244    Rejected,
245}
246
247#[derive(Clone, Debug, Default)]
248pub struct BundleOptions {
249    /// Stable identifier downstream cloud importers use to dedupe bundles
250    /// across runs (defaults to a sanitized workflow name).
251    pub external_key: Option<String>,
252    pub title: Option<String>,
253    pub team: Option<String>,
254    pub repo: Option<String>,
255    pub risk_level: Option<String>,
256    pub rollout_policy: Option<String>,
257}
258
259#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
260#[serde(default)]
261pub struct CrystallizationBundle {
262    pub manifest: CrystallizationBundleManifest,
263    pub report: CrystallizationReport,
264    pub harn_code: String,
265    pub eval_pack_toml: String,
266    pub fixtures: Vec<CrystallizationTrace>,
267}
268
269/// Errors surfaced when validating a bundle on disk.
270#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
271#[serde(default)]
272pub struct BundleValidation {
273    pub bundle_dir: String,
274    pub schema: String,
275    pub schema_version: u32,
276    pub kind: BundleKind,
277    pub candidate_id: String,
278    pub manifest_ok: bool,
279    pub workflow_ok: bool,
280    pub report_ok: bool,
281    pub eval_pack_ok: bool,
282    pub fixtures_ok: bool,
283    pub redaction_ok: bool,
284    pub problems: Vec<String>,
285}
286
287impl BundleValidation {
288    pub fn is_ok(&self) -> bool {
289        self.problems.is_empty()
290    }
291}
292
293/// Build an in-memory bundle from already-mined artifacts. The traces
294/// passed here are the same normalized traces used to mine the candidate;
295/// they will be redacted before being attached as fixtures.
296pub fn build_crystallization_bundle(
297    artifacts: CrystallizationArtifacts,
298    traces: &[CrystallizationTrace],
299    options: BundleOptions,
300) -> Result<CrystallizationBundle, VmError> {
301    let CrystallizationArtifacts {
302        report,
303        harn_code,
304        eval_pack_toml,
305    } = artifacts;
306
307    let (selected, kind) = match report
308        .selected_candidate_id
309        .as_deref()
310        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
311    {
312        Some(candidate) => {
313            let kind = if candidate_is_plan_only(candidate) {
314                BundleKind::PlanOnly
315            } else {
316                BundleKind::Candidate
317            };
318            (Some(candidate), kind)
319        }
320        None => (None, BundleKind::Rejected),
321    };
322
323    let workflow_name = selected
324        .map(|candidate| candidate.name.clone())
325        .unwrap_or_else(|| "crystallized_workflow".to_string());
326    let package_name = selected
327        .map(|candidate| candidate.promotion.package_name.clone())
328        .unwrap_or_else(|| workflow_name.replace('_', "-"));
329    let workflow_version = selected
330        .map(|candidate| candidate.promotion.version.clone())
331        .unwrap_or_else(|| "0.0.0".to_string());
332
333    let manifest_workflow = BundleWorkflowRef {
334        path: BUNDLE_WORKFLOW_FILE.to_string(),
335        name: workflow_name.clone(),
336        package_name: package_name.clone(),
337        package_version: workflow_version.clone(),
338    };
339
340    let external_key = options
341        .external_key
342        .clone()
343        .filter(|key| !key.trim().is_empty())
344        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
345    let title = options
346        .title
347        .clone()
348        .filter(|title| !title.trim().is_empty())
349        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
350    let risk_level = options
351        .risk_level
352        .clone()
353        .filter(|risk| !risk.trim().is_empty())
354        .unwrap_or_else(|| infer_risk_level(selected));
355    let rollout_policy = options
356        .rollout_policy
357        .clone()
358        .filter(|policy| !policy.trim().is_empty())
359        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
360
361    let (deterministic_steps, fuzzy_steps) = match selected {
362        Some(candidate) => candidate
363            .steps
364            .iter()
365            .map(BundleStep::from_candidate_step)
366            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
367        None => (Vec::new(), Vec::new()),
368    };
369
370    let source_trace_hashes = selected
371        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
372        .unwrap_or_default();
373
374    let mut source_traces = Vec::new();
375    let mut fixture_refs = Vec::new();
376    let mut fixture_payloads = Vec::new();
377    if let Some(candidate) = selected {
378        let mut fixture_trace_ids = BTreeSet::new();
379        for example in &candidate.examples {
380            fixture_trace_ids.insert(example.trace_id.clone());
381        }
382        for trace in traces {
383            if find_sequence_start(trace, &candidate.sequence_signature).is_some() {
384                fixture_trace_ids.insert(trace.id.clone());
385            }
386        }
387        for trace_id in fixture_trace_ids {
388            let trace = traces.iter().find(|trace| trace.id == trace_id);
389            let source_hash = trace
390                .and_then(|trace| trace.source_hash.clone())
391                .or_else(|| {
392                    candidate
393                        .examples
394                        .iter()
395                        .find(|example| example.trace_id == trace_id)
396                        .map(|example| example.source_hash.clone())
397                })
398                .unwrap_or_default();
399            let fixture_relative = trace.map(|trace| {
400                format!(
401                    "{BUNDLE_FIXTURES_DIR}/{}.json",
402                    sanitize_fixture_name(&trace.id)
403                )
404            });
405            source_traces.push(BundleSourceTrace {
406                trace_id: trace_id.clone(),
407                source_hash: source_hash.clone(),
408                source_url: trace.and_then(|trace| trace.source.clone()),
409                source_receipt_id: trace
410                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
411                    .and_then(|value| value.as_str().map(str::to_string)),
412                fixture_path: fixture_relative.clone(),
413            });
414            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
415                let mut redacted = trace.clone();
416                redact_trace_for_bundle(&mut redacted);
417                fixture_refs.push(BundleFixtureRef {
418                    path: fixture_path,
419                    trace_id: trace.id.clone(),
420                    source_hash,
421                    redacted: true,
422                });
423                fixture_payloads.push(redacted);
424            }
425        }
426    }
427
428    // Owner defaults to author so cloud importers always have a populated
429    // ownership pointer, but stays separate from `author` so reviewers can
430    // assign a different owner in the manifest before promotion.
431    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
432    let promotion = BundlePromotion {
433        owner: author.clone(),
434        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
435        author,
436        rollout_policy,
437        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
438        created_at: now_rfc3339(),
439        workflow_version,
440        package_name: package_name.clone(),
441        sample_count: selected
442            .map(|candidate| candidate.promotion.sample_count)
443            .unwrap_or_default(),
444        confidence: selected
445            .map(|candidate| candidate.promotion.confidence)
446            .unwrap_or_default(),
447        shadow_success_count: selected
448            .map(|candidate| candidate.promotion.shadow_success_count)
449            .unwrap_or_default(),
450        shadow_failure_count: selected
451            .map(|candidate| candidate.promotion.shadow_failure_count)
452            .unwrap_or_default(),
453        divergence_history: selected
454            .map(|candidate| candidate.promotion.divergence_history.clone())
455            .unwrap_or_default(),
456        approval_history: selected
457            .map(|candidate| candidate.promotion.approval_history.clone())
458            .unwrap_or_default(),
459        criteria: selected
460            .map(|candidate| candidate.promotion.criteria.clone())
461            .unwrap_or_default(),
462        estimated_time_token_savings: selected
463            .map(|candidate| candidate.promotion.estimated_time_token_savings.clone())
464            .unwrap_or_default(),
465    };
466
467    let redaction = BundleRedactionSummary {
468        applied: !fixture_payloads.is_empty(),
469        rules: vec![
470            "sensitive_keys".to_string(),
471            "secret_value_heuristic".to_string(),
472        ],
473        summary: if fixture_payloads.is_empty() {
474            "no fixtures emitted".to_string()
475        } else {
476            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
477                .to_string()
478        },
479        fixture_count: fixture_payloads.len(),
480    };
481
482    let eval_pack = if eval_pack_toml.trim().is_empty() {
483        None
484    } else {
485        Some(BundleEvalPackRef {
486            path: BUNDLE_EVAL_PACK_FILE.to_string(),
487            link: selected
488                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
489                .filter(|link| !link.trim().is_empty()),
490        })
491    };
492
493    let manifest = CrystallizationBundleManifest {
494        schema: BUNDLE_SCHEMA.to_string(),
495        schema_version: BUNDLE_SCHEMA_VERSION,
496        generated_at: now_rfc3339(),
497        generator: BundleGenerator::default(),
498        kind,
499        candidate_id: selected
500            .map(|candidate| candidate.id.clone())
501            .unwrap_or_default(),
502        external_key,
503        title,
504        team: options.team,
505        repo: options.repo,
506        risk_level,
507        workflow: manifest_workflow,
508        source_trace_hashes,
509        source_traces,
510        deterministic_steps,
511        fuzzy_steps,
512        side_effects: selected
513            .map(|candidate| candidate.side_effects.clone())
514            .unwrap_or_default(),
515        capabilities: selected
516            .map(|candidate| candidate.capabilities.clone())
517            .unwrap_or_default(),
518        required_secrets: selected
519            .map(|candidate| candidate.required_secrets.clone())
520            .unwrap_or_default(),
521        savings: selected
522            .map(|candidate| candidate.savings.clone())
523            .unwrap_or_default(),
524        shadow: selected
525            .map(|candidate| candidate.shadow.clone())
526            .unwrap_or_default(),
527        eval_pack,
528        fixtures: fixture_refs,
529        promotion,
530        redaction,
531        confidence: selected
532            .map(|candidate| candidate.confidence)
533            .unwrap_or(0.0),
534        rejection_reasons: report
535            .rejected_candidates
536            .iter()
537            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
538            .collect(),
539        warnings: report.warnings.clone(),
540    };
541
542    Ok(CrystallizationBundle {
543        manifest,
544        report,
545        harn_code,
546        eval_pack_toml,
547        fixtures: fixture_payloads,
548    })
549}
550
551/// Write a bundle to a directory. Creates the directory if it does not
552/// already exist. Returns the manifest with `generated_at` and any
553/// runtime-resolved metadata filled in.
554pub fn write_crystallization_bundle(
555    bundle: &CrystallizationBundle,
556    bundle_dir: &Path,
557) -> Result<CrystallizationBundleManifest, VmError> {
558    std::fs::create_dir_all(bundle_dir).map_err(|error| {
559        VmError::Runtime(format!(
560            "failed to create bundle dir {}: {error}",
561            bundle_dir.display()
562        ))
563    })?;
564    write_bytes(
565        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
566        bundle.harn_code.as_bytes(),
567    )?;
568    let report_json = serde_json::to_vec_pretty(&bundle.report)
569        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
570    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
571
572    if !bundle.eval_pack_toml.trim().is_empty() {
573        write_bytes(
574            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
575            bundle.eval_pack_toml.as_bytes(),
576        )?;
577    }
578
579    if !bundle.fixtures.is_empty() {
580        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
581        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
582            VmError::Runtime(format!(
583                "failed to create fixtures dir {}: {error}",
584                fixtures_dir.display()
585            ))
586        })?;
587        for trace in &bundle.fixtures {
588            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
589            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
590                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
591            })?;
592            write_bytes(&path, &payload)?;
593        }
594    }
595
596    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
597        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
598    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
599    Ok(bundle.manifest.clone())
600}
601
602/// Read a bundle manifest from disk. Verifies the schema marker but does
603/// not cross-check workflow/report/eval-pack sibling files; for a richer
604/// check use [`validate_crystallization_bundle`].
605pub fn load_crystallization_bundle_manifest(
606    bundle_dir: &Path,
607) -> Result<CrystallizationBundleManifest, VmError> {
608    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
609    let bytes = std::fs::read(&manifest_path).map_err(|error| {
610        VmError::Runtime(format!(
611            "failed to read bundle manifest {}: {error}",
612            manifest_path.display()
613        ))
614    })?;
615    let manifest: CrystallizationBundleManifest =
616        serde_json::from_slice(&bytes).map_err(|error| {
617            VmError::Runtime(format!(
618                "failed to decode bundle manifest {}: {error}",
619                manifest_path.display()
620            ))
621        })?;
622    if manifest.schema != BUNDLE_SCHEMA {
623        return Err(VmError::Runtime(format!(
624            "bundle {} has unrecognized schema {:?} (expected {})",
625            bundle_dir.display(),
626            manifest.schema,
627            BUNDLE_SCHEMA
628        )));
629    }
630    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
631        return Err(VmError::Runtime(format!(
632            "bundle {} schema_version {} is newer than supported {}",
633            bundle_dir.display(),
634            manifest.schema_version,
635            BUNDLE_SCHEMA_VERSION
636        )));
637    }
638    Ok(manifest)
639}
640
641/// Read every fixture trace referenced by the bundle manifest. Returns
642/// the manifest plus loaded traces, in the order they appear in the
643/// manifest. Fixtures with `path: None` are skipped.
644pub fn load_crystallization_bundle(
645    bundle_dir: &Path,
646) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
647    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
648    let mut traces = Vec::new();
649    for fixture in &manifest.fixtures {
650        let path = bundle_dir.join(&fixture.path);
651        traces.push(load_crystallization_trace(&path)?);
652    }
653    Ok((manifest, traces))
654}
655
656/// Validate a bundle directory layout and contents. Cheap enough to call
657/// from a CLI smoke command; performs no live side effects.
658pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
659    let mut validation = BundleValidation {
660        bundle_dir: bundle_dir.display().to_string(),
661        ..BundleValidation::default()
662    };
663    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
664        Ok(manifest) => manifest,
665        Err(error) => {
666            validation.problems.push(error.to_string());
667            return Ok(validation);
668        }
669    };
670    validation.manifest_ok = true;
671    validation.schema = manifest.schema.clone();
672    validation.schema_version = manifest.schema_version;
673    validation.kind = manifest.kind.clone();
674    validation.candidate_id = manifest.candidate_id.clone();
675
676    let workflow_path = bundle_dir.join(&manifest.workflow.path);
677    if workflow_path.exists() {
678        validation.workflow_ok = true;
679    } else {
680        validation
681            .problems
682            .push(format!("missing workflow file {}", workflow_path.display()));
683    }
684
685    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
686    match std::fs::read(&report_path) {
687        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
688            Ok(report) => {
689                validation.report_ok = true;
690                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
691                    && manifest.candidate_id.is_empty()
692                {
693                    validation
694                        .problems
695                        .push("manifest is non-rejected but has empty candidate_id".to_string());
696                }
697                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
698                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
699                {
700                    validation.problems.push(format!(
701                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
702                        report.selected_candidate_id, manifest.candidate_id
703                    ));
704                }
705            }
706            Err(error) => {
707                validation
708                    .problems
709                    .push(format!("invalid report.json: {error}"));
710            }
711        },
712        Err(error) => {
713            validation.problems.push(format!(
714                "missing report file {}: {error}",
715                report_path.display()
716            ));
717        }
718    }
719
720    if let Some(eval_pack) = &manifest.eval_pack {
721        let path = bundle_dir.join(&eval_pack.path);
722        if path.exists() {
723            validation.eval_pack_ok = true;
724        } else {
725            validation.problems.push(format!(
726                "manifest references eval pack {} but file is missing",
727                path.display()
728            ));
729        }
730    } else {
731        validation.eval_pack_ok = true;
732    }
733
734    let mut fixtures_problem = false;
735    for fixture in &manifest.fixtures {
736        let path = bundle_dir.join(&fixture.path);
737        if !path.exists() {
738            validation
739                .problems
740                .push(format!("missing fixture {}", path.display()));
741            fixtures_problem = true;
742            continue;
743        }
744        if !fixture.redacted {
745            validation.problems.push(format!(
746                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
747                fixture.path
748            ));
749            fixtures_problem = true;
750        }
751    }
752    validation.fixtures_ok = !fixtures_problem;
753
754    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
755        validation
756            .problems
757            .push("redaction.applied is false but bundle includes fixtures".to_string());
758    } else {
759        validation.redaction_ok = true;
760    }
761    if !manifest
762        .required_secrets
763        .iter()
764        .all(|secret| secret_id_looks_logical(secret))
765    {
766        validation.problems.push(
767            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
768        );
769    }
770
771    Ok(validation)
772}
773
774/// Replay shadow comparison from a bundle: re-runs the deterministic
775/// shadow check in-process against the bundle's redacted fixtures, with
776/// no live side effects. Returns the manifest and the freshly computed
777/// `ShadowRunReport`. The returned report is suitable for cloud import or
778/// for asserting determinism in CI.
779pub fn shadow_replay_bundle(
780    bundle_dir: &Path,
781) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
782    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
783    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
784    let bytes = std::fs::read(&report_path).map_err(|error| {
785        VmError::Runtime(format!(
786            "failed to read bundle report {}: {error}",
787            report_path.display()
788        ))
789    })?;
790    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
791        VmError::Runtime(format!(
792            "failed to decode bundle report {}: {error}",
793            report_path.display()
794        ))
795    })?;
796    let candidate = report
797        .selected_candidate_id
798        .as_deref()
799        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
800        .ok_or_else(|| {
801            VmError::Runtime(format!(
802                "bundle {} has no selected candidate to replay",
803                bundle_dir.display()
804            ))
805        })?;
806    let shadow = shadow_candidate(candidate, &traces);
807    Ok((manifest, shadow))
808}
809
810fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
811    crate::atomic_io::atomic_write(path, bytes)
812        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
813}
814
815fn sanitize_fixture_name(raw: &str) -> String {
816    let cleaned = raw
817        .chars()
818        .map(|ch| {
819            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
820                ch
821            } else {
822                '_'
823            }
824        })
825        .collect::<String>();
826    if cleaned.trim_matches('_').is_empty() {
827        "trace".to_string()
828    } else {
829        cleaned.trim_matches('_').to_string()
830    }
831}
832
833fn sanitize_external_key(raw: &str) -> String {
834    let mut out = String::new();
835    let mut prev_dash = false;
836    for ch in raw.chars() {
837        let lowered = ch.to_ascii_lowercase();
838        if lowered.is_ascii_alphanumeric() {
839            out.push(lowered);
840            prev_dash = false;
841        } else if !prev_dash && !out.is_empty() {
842            out.push('-');
843            prev_dash = true;
844        }
845    }
846    let trimmed = out.trim_matches('-').to_string();
847    if trimmed.is_empty() {
848        "crystallized-workflow".to_string()
849    } else {
850        trimmed
851    }
852}
853
854fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
855    if let Some(candidate) = candidate {
856        format!(
857            "{} ({} step{})",
858            candidate.name,
859            candidate.steps.len(),
860            if candidate.steps.len() == 1 { "" } else { "s" }
861        )
862    } else {
863        format!("rejected: {fallback_name}")
864    }
865}
866
867fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
868    let Some(candidate) = candidate else {
869        return "high".to_string();
870    };
871    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
872    let needs_secret = !candidate.required_secrets.is_empty();
873    if touches_external && needs_secret {
874        "high".to_string()
875    } else if touches_external || needs_secret {
876        "medium".to_string()
877    } else {
878        "low".to_string()
879    }
880}
881
882fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
883    let kind = effect.kind.to_ascii_lowercase();
884    if kind.is_empty() {
885        return false;
886    }
887    // Plan-only side effects stay inside Harn's own data plane: they
888    // write receipts, append to the in-process event log, or stash plans.
889    // None of those touch tenant-external systems.
890    let internal = kind.contains("receipt")
891        || kind.contains("event_log")
892        || kind.contains("memo")
893        || kind.contains("plan");
894    if internal {
895        return false;
896    }
897    kind.contains("post")
898        || kind.contains("write")
899        || kind.contains("publish")
900        || kind.contains("delete")
901        || kind.contains("send")
902}
903
904fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
905    if candidate.steps.is_empty() {
906        return false;
907    }
908    candidate.side_effects.iter().all(|effect| {
909        let kind = effect.kind.to_ascii_lowercase();
910        // Plan-only side effects stay inside Harn's own data plane: receipt
911        // writes, in-memory event-log appends, file-only mutations, etc.
912        kind.is_empty()
913            || kind.contains("receipt")
914            || kind.contains("event_log")
915            || kind.contains("memo")
916            || kind.contains("plan")
917            || (kind.contains("file") && !kind.contains("publish"))
918    })
919}
920
921pub(super) fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
922    for action in &mut trace.actions {
923        redact_bundle_value(&mut action.inputs);
924        if let Some(output) = action.output.as_mut() {
925            redact_bundle_value(output);
926        }
927        if let Some(observed) = action.observed_output.as_mut() {
928            redact_bundle_value(observed);
929        }
930        for value in action.parameters.values_mut() {
931            redact_bundle_value(value);
932        }
933        for (_, value) in action.metadata.iter_mut() {
934            redact_bundle_value(value);
935        }
936    }
937    for (_, value) in trace.metadata.iter_mut() {
938        redact_bundle_value(value);
939    }
940    if let Some(run) = trace.replay_run.as_mut() {
941        redact_replay_run_for_bundle(run);
942    }
943}
944
945fn redact_replay_run_for_bundle(run: &mut ReplayTraceRun) {
946    for value in run
947        .event_log_entries
948        .iter_mut()
949        .chain(run.trigger_firings.iter_mut())
950        .chain(run.llm_interactions.iter_mut())
951        .chain(run.protocol_interactions.iter_mut())
952        .chain(run.approval_interactions.iter_mut())
953        .chain(run.effect_receipts.iter_mut())
954        .chain(run.agent_transcript_deltas.iter_mut())
955        .chain(run.final_artifacts.iter_mut())
956        .chain(run.policy_decisions.iter_mut())
957    {
958        redact_bundle_value(value);
959    }
960}
961
962fn redact_bundle_value(value: &mut JsonValue) {
963    match value {
964        JsonValue::String(text) if looks_like_secret_value(text) => {
965            *text = "[redacted]".to_string();
966        }
967        JsonValue::Array(items) => {
968            for item in items {
969                redact_bundle_value(item);
970            }
971        }
972        JsonValue::Object(map) => {
973            for (key, child) in map.iter_mut() {
974                if is_sensitive_bundle_key(key) {
975                    *child = JsonValue::String("[redacted]".to_string());
976                } else {
977                    redact_bundle_value(child);
978                }
979            }
980        }
981        _ => {}
982    }
983}
984
985fn is_sensitive_bundle_key(key: &str) -> bool {
986    let lower = key.to_ascii_lowercase();
987    lower.contains("secret")
988        || lower.contains("token")
989        || lower.contains("password")
990        || lower.contains("api_key")
991        || lower.contains("apikey")
992        || lower == "authorization"
993        || lower == "cookie"
994        || lower == "set-cookie"
995}
996
997fn looks_like_secret_value(value: &str) -> bool {
998    let trimmed = value.trim();
999    trimmed.starts_with("sk-")
1000        || trimmed.starts_with("ghp_")
1001        || trimmed.starts_with("ghs_")
1002        || trimmed.starts_with("xoxb-")
1003        || trimmed.starts_with("xoxp-")
1004        || trimmed.starts_with("AKIA")
1005        || (trimmed.len() > 48
1006            && trimmed
1007                .chars()
1008                .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
1009}
1010
1011fn secret_id_looks_logical(value: &str) -> bool {
1012    !looks_like_secret_value(value) && !value.trim().is_empty()
1013}