Skip to main content

harn_vm/orchestration/
crystallize.rs

1//! Workflow crystallization primitives.
2//!
3//! This module keeps the first mining pass deliberately conservative: it
4//! accepts ordered trace actions, finds a repeated contiguous action sequence,
5//! extracts scalar parameters from fields that vary across examples, rejects
6//! divergent side effects, and emits a reviewable Harn skeleton plus shadow
7//! comparison metadata. Hosted surfaces can layer richer mining on top of this
8//! stable IR without changing the CLI contract.
9
10use std::collections::{BTreeMap, BTreeSet};
11use std::fmt::Write as _;
12use std::path::{Path, PathBuf};
13
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value as JsonValue};
16use sha2::{Digest, Sha256};
17
18use super::{
19    new_id, now_rfc3339, run_replay_oracle_trace, ReplayAllowlistRule, ReplayExpectation,
20    ReplayFixture, ReplayOracleReport, ReplayOracleTrace, ReplayTraceRun, RunRecord,
21};
22use crate::value::VmError;
23
24const TRACE_SCHEMA_VERSION: u32 = 1;
25const DEFAULT_MIN_EXAMPLES: usize = 2;
26const DEFAULT_PROMOTION_MIN_CONFIDENCE: f64 = 0.80;
27
28/// Stable schema marker for `candidate.json` inside a crystallization
29/// bundle. Cloud importers and other downstream consumers should refuse
30/// bundles whose `schema` field is anything else.
31pub const BUNDLE_SCHEMA: &str = "harn.crystallization.candidate.bundle";
32/// Versioned schema number for the bundle manifest. Cloud importers and
33/// other consumers should refuse bundles whose `schema_version` is newer
34/// than the highest version they understand.
35pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
36/// Conventional file names inside a crystallization bundle directory.
37pub const BUNDLE_MANIFEST_FILE: &str = "candidate.json";
38pub const BUNDLE_REPORT_FILE: &str = "report.json";
39pub const BUNDLE_WORKFLOW_FILE: &str = "workflow.harn";
40pub const BUNDLE_EVAL_PACK_FILE: &str = "harn.eval.toml";
41pub const BUNDLE_FIXTURES_DIR: &str = "fixtures";
42
43/// Default rollout policy applied when a bundle is emitted without one
44/// explicitly configured. Hosted promotion surfaces can override it.
45const DEFAULT_ROLLOUT_POLICY: &str = "shadow_then_canary";
46
47#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
48#[serde(default)]
49pub struct CrystallizationTrace {
50    pub version: u32,
51    pub id: String,
52    pub source: Option<String>,
53    pub source_hash: Option<String>,
54    pub workflow_id: Option<String>,
55    pub started_at: Option<String>,
56    pub finished_at: Option<String>,
57    pub flow: Option<CrystallizationFlowRef>,
58    pub actions: Vec<CrystallizationAction>,
59    pub replay_run: Option<ReplayTraceRun>,
60    pub replay_allowlist: Vec<ReplayAllowlistRule>,
61    pub usage: CrystallizationUsage,
62    pub metadata: BTreeMap<String, JsonValue>,
63}
64
65#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
66#[serde(default)]
67pub struct CrystallizationFlowRef {
68    pub trace_id: Option<String>,
69    pub agent_run_id: Option<String>,
70    pub transcript_ref: Option<String>,
71    pub atom_ids: Vec<String>,
72    pub slice_ids: Vec<String>,
73}
74
75#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
76#[serde(default)]
77pub struct CrystallizationAction {
78    pub id: String,
79    pub kind: String,
80    pub name: String,
81    pub timestamp: Option<String>,
82    pub inputs: JsonValue,
83    pub output: Option<JsonValue>,
84    pub observed_output: Option<JsonValue>,
85    pub parameters: BTreeMap<String, JsonValue>,
86    pub side_effects: Vec<CrystallizationSideEffect>,
87    pub capabilities: Vec<String>,
88    pub required_secrets: Vec<String>,
89    pub approval: Option<CrystallizationApproval>,
90    pub cost: CrystallizationCost,
91    pub duration_ms: Option<i64>,
92    pub deterministic: Option<bool>,
93    pub fuzzy: Option<bool>,
94    pub metadata: BTreeMap<String, JsonValue>,
95}
96
97#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
98#[serde(default)]
99pub struct CrystallizationSideEffect {
100    pub kind: String,
101    pub target: String,
102    pub capability: Option<String>,
103    pub mutation: Option<String>,
104    pub metadata: BTreeMap<String, JsonValue>,
105}
106
107#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
108#[serde(default)]
109pub struct CrystallizationApproval {
110    pub prompt: Option<String>,
111    pub approver: Option<String>,
112    pub required: bool,
113    pub boundary: Option<String>,
114}
115
116#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
117#[serde(default)]
118pub struct CrystallizationCost {
119    pub model: Option<String>,
120    pub model_calls: i64,
121    pub input_tokens: i64,
122    pub output_tokens: i64,
123    pub total_cost_usd: f64,
124    pub wall_ms: i64,
125}
126
127#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
128#[serde(default)]
129pub struct CrystallizationUsage {
130    pub model_calls: i64,
131    pub input_tokens: i64,
132    pub output_tokens: i64,
133    pub total_cost_usd: f64,
134    pub wall_ms: i64,
135}
136
137#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
138#[serde(rename_all = "snake_case")]
139pub enum SegmentKind {
140    #[default]
141    Deterministic,
142    Fuzzy,
143}
144
145type SequenceExample = (usize, usize);
146type RepeatedSequence = (Vec<String>, Vec<SequenceExample>);
147
148#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
149#[serde(default)]
150pub struct WorkflowCandidateParameter {
151    pub name: String,
152    pub source_paths: Vec<String>,
153    pub examples: Vec<String>,
154    pub required: bool,
155}
156
157#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
158#[serde(default)]
159pub struct WorkflowCandidateStep {
160    pub index: usize,
161    pub kind: String,
162    pub name: String,
163    pub segment: SegmentKind,
164    pub parameter_refs: Vec<String>,
165    pub constants: BTreeMap<String, JsonValue>,
166    pub preconditions: Vec<String>,
167    pub side_effects: Vec<CrystallizationSideEffect>,
168    pub capabilities: Vec<String>,
169    pub required_secrets: Vec<String>,
170    pub approval: Option<CrystallizationApproval>,
171    pub expected_output: Option<JsonValue>,
172    pub review_notes: Vec<String>,
173}
174
175#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
176#[serde(default)]
177pub struct WorkflowCandidateExample {
178    pub trace_id: String,
179    pub source_hash: String,
180    pub start_index: usize,
181    pub action_ids: Vec<String>,
182}
183
184#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
185#[serde(default)]
186pub struct PromotionMetadata {
187    pub source_trace_hashes: Vec<String>,
188    pub author: Option<String>,
189    pub approver: Option<String>,
190    pub created_at: String,
191    pub version: String,
192    pub package_name: String,
193    pub capability_set: Vec<String>,
194    pub secrets_required: Vec<String>,
195    pub rollback_target: Option<String>,
196    pub eval_pack_link: Option<String>,
197    pub sample_count: usize,
198    pub confidence: f64,
199    pub shadow_success_count: usize,
200    pub shadow_failure_count: usize,
201    pub divergence_history: Vec<PromotionDivergenceRecord>,
202    pub approval_history: Vec<PromotionApprovalRecord>,
203    pub criteria: PromotionCriteria,
204    pub estimated_time_token_savings: SavingsEstimate,
205}
206
207#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
208#[serde(default)]
209pub struct PromotionCriteria {
210    pub min_examples: usize,
211    pub min_confidence: f64,
212    pub requires_shadow_pass: bool,
213    pub requires_no_rejections: bool,
214    pub requires_human_approval: bool,
215    pub approval_reason: Option<String>,
216    pub status: PromotionStatus,
217    pub reasons: Vec<String>,
218}
219
220#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
221#[serde(rename_all = "snake_case")]
222pub enum PromotionStatus {
223    #[default]
224    Blocked,
225    NeedsApproval,
226    Ready,
227}
228
229#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
230#[serde(default)]
231pub struct PromotionDivergenceRecord {
232    pub trace_id: String,
233    pub path: Option<String>,
234    pub message: String,
235    pub left: Option<JsonValue>,
236    pub right: Option<JsonValue>,
237}
238
239#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
240#[serde(default)]
241pub struct PromotionApprovalRecord {
242    pub actor: String,
243    pub decision: String,
244    pub recorded_at: String,
245    pub reason: Option<String>,
246}
247
248#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
249#[serde(default)]
250pub struct SavingsEstimate {
251    pub model_calls_avoided: i64,
252    pub input_tokens_avoided: i64,
253    pub output_tokens_avoided: i64,
254    pub estimated_cost_usd_avoided: f64,
255    pub wall_ms_avoided: i64,
256    pub cpu_runtime_cost_usd: f64,
257    pub remaining_model_calls: i64,
258}
259
260#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
261#[serde(default)]
262pub struct ShadowTraceResult {
263    pub trace_id: String,
264    pub source_hash: String,
265    pub pass: bool,
266    pub details: Vec<String>,
267    pub compared_receipts: usize,
268    pub replay_oracle: Option<ReplayOracleReport>,
269}
270
271#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
272#[serde(default)]
273pub struct ShadowRunReport {
274    pub pass: bool,
275    pub compared_traces: usize,
276    pub failures: Vec<String>,
277    pub traces: Vec<ShadowTraceResult>,
278}
279
280#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
281#[serde(default)]
282pub struct WorkflowClusterKey {
283    pub goal: Option<String>,
284    pub tool_sequence: Vec<String>,
285    pub touched_artifact_types: Vec<String>,
286    pub success_criteria: Vec<String>,
287}
288
289#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
290#[serde(default)]
291pub struct WorkflowCandidate {
292    pub id: String,
293    pub name: String,
294    pub confidence: f64,
295    pub cluster_key: WorkflowClusterKey,
296    pub sequence_signature: Vec<String>,
297    pub parameters: Vec<WorkflowCandidateParameter>,
298    pub steps: Vec<WorkflowCandidateStep>,
299    pub examples: Vec<WorkflowCandidateExample>,
300    pub capabilities: Vec<String>,
301    pub required_secrets: Vec<String>,
302    pub approval_points: Vec<CrystallizationApproval>,
303    pub side_effects: Vec<CrystallizationSideEffect>,
304    pub expected_outputs: Vec<JsonValue>,
305    pub expected_receipts: Vec<JsonValue>,
306    pub warnings: Vec<String>,
307    pub rejection_reasons: Vec<String>,
308    pub promotion: PromotionMetadata,
309    pub savings: SavingsEstimate,
310    pub shadow: ShadowRunReport,
311}
312
313impl WorkflowCandidate {
314    pub fn is_safe_to_propose(&self) -> bool {
315        self.rejection_reasons.is_empty()
316    }
317}
318
319#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
320#[serde(default)]
321pub struct CrystallizationReport {
322    pub version: u32,
323    pub generated_at: String,
324    pub source_trace_count: usize,
325    pub excluded_trace_count: usize,
326    pub selected_candidate_id: Option<String>,
327    pub candidates: Vec<WorkflowCandidate>,
328    pub rejected_candidates: Vec<WorkflowCandidate>,
329    pub warnings: Vec<String>,
330    pub input_format: CrystallizationInputFormat,
331    pub harn_code_path: Option<String>,
332    pub eval_pack_path: Option<String>,
333    /// Optional plain-language explanation of which steps are safe to
334    /// automate vs. which still require human/agent review. Populated by
335    /// the release-fixture ingest path so reviewers can inspect a
336    /// candidate without re-deriving the deterministic/agentic split.
337    #[serde(skip_serializing_if = "Option::is_none")]
338    pub segment_summary: Option<SegmentSummary>,
339    /// Optional summary of how shell/tool failures were represented and
340    /// whether failure context was fed back into a model. Populated by
341    /// the release-fixture ingest path.
342    #[serde(skip_serializing_if = "Option::is_none")]
343    pub recovery_summary: Option<RecoveryFeedbackSummary>,
344}
345
346/// Plain-language summary of the deterministic/agentic split for a
347/// candidate. Designed to be human-readable in `report.json` without a
348/// reviewer needing to walk the step list manually.
349#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
350#[serde(default)]
351pub struct SegmentSummary {
352    pub deterministic_count: usize,
353    pub agentic_count: usize,
354    pub safe_to_automate: Vec<String>,
355    pub requires_human_review: Vec<String>,
356    pub plain_language: String,
357}
358
359/// Summary of how shell/tool failures were represented in the source
360/// trace and whether the failure context was fed back into the model.
361/// Lets reviewers see at a glance whether recovery was advisory only or
362/// whether the workflow attempted to repair itself.
363#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
364#[serde(default)]
365pub struct RecoveryFeedbackSummary {
366    pub shell_failures_seen: usize,
367    pub recovery_advice_runs: usize,
368    /// `true` when at least one recovery action observed by the source
369    /// trace fed the failing-step context into a model loop (vs. just
370    /// recording the failure for human review).
371    pub failures_fed_into_agent: bool,
372    pub failed_steps: Vec<String>,
373    /// Plain-language explanation of how recovery was represented.
374    pub representation: String,
375}
376
377#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
378#[serde(default)]
379pub struct CrystallizationInputFormat {
380    pub name: String,
381    pub version: u32,
382    pub required_fields: Vec<String>,
383    pub preserved_fields: Vec<String>,
384}
385
386#[derive(Clone, Debug, Default)]
387pub struct CrystallizeOptions {
388    pub min_examples: usize,
389    pub shadow_traces: Vec<CrystallizationTrace>,
390    pub promotion_min_confidence: f64,
391    pub workflow_name: Option<String>,
392    pub package_name: Option<String>,
393    pub author: Option<String>,
394    pub approver: Option<String>,
395    pub eval_pack_link: Option<String>,
396}
397
398#[derive(Clone, Debug, Default)]
399pub struct CrystallizationArtifacts {
400    pub report: CrystallizationReport,
401    pub harn_code: String,
402    pub eval_pack_toml: String,
403}
404
405pub fn crystallize_traces(
406    traces: Vec<CrystallizationTrace>,
407    options: CrystallizeOptions,
408) -> Result<CrystallizationArtifacts, VmError> {
409    let min_examples = options.min_examples.max(DEFAULT_MIN_EXAMPLES);
410    if traces.len() < min_examples {
411        return Err(VmError::Runtime(format!(
412            "crystallize requires at least {min_examples} traces, got {}",
413            traces.len()
414        )));
415    }
416
417    let normalized_all = traces.into_iter().map(normalize_trace).collect::<Vec<_>>();
418    let (normalized, excluded_mining) = partition_mineable_traces(normalized_all);
419    if normalized.len() < min_examples {
420        return Err(VmError::Runtime(format!(
421            "crystallize requires at least {min_examples} eligible traces, got {} (excluded {})",
422            normalized.len(),
423            excluded_mining.len()
424        )));
425    }
426    let (shadow_traces, excluded_shadow) = partition_mineable_traces(
427        options
428            .shadow_traces
429            .iter()
430            .cloned()
431            .map(normalize_trace)
432            .collect(),
433    );
434    let mut shadow_pool = normalized.clone();
435    shadow_pool.extend(shadow_traces);
436
437    let mut candidates = mine_candidates(&normalized, min_examples, &options);
438    let mut rejected_candidates = Vec::new();
439    for candidate in &mut candidates {
440        candidate.shadow = shadow_candidate(candidate, &shadow_pool);
441        if !candidate.shadow.pass {
442            candidate
443                .rejection_reasons
444                .extend(candidate.shadow.failures.clone());
445        }
446        refresh_promotion_metadata(candidate, min_examples, &options);
447    }
448
449    let mut accepted = Vec::new();
450    for candidate in candidates {
451        if candidate.is_safe_to_propose() {
452            accepted.push(candidate);
453        } else {
454            rejected_candidates.push(candidate);
455        }
456    }
457    accepted.sort_by(|left, right| {
458        right
459            .confidence
460            .partial_cmp(&left.confidence)
461            .unwrap_or(std::cmp::Ordering::Equal)
462            .then_with(|| right.steps.len().cmp(&left.steps.len()))
463    });
464
465    let selected = accepted.first();
466    let harn_code = selected
467        .map(generate_harn_code)
468        .unwrap_or_else(|| rejected_workflow_stub(&rejected_candidates));
469    let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
470
471    let report = CrystallizationReport {
472        version: 1,
473        generated_at: now_rfc3339(),
474        source_trace_count: normalized.len(),
475        excluded_trace_count: excluded_mining.len() + excluded_shadow.len(),
476        selected_candidate_id: selected.map(|candidate| candidate.id.clone()),
477        candidates: accepted,
478        rejected_candidates,
479        warnings: Vec::new(),
480        input_format: CrystallizationInputFormat {
481            name: "harn.crystallization.trace".to_string(),
482            version: TRACE_SCHEMA_VERSION,
483            required_fields: vec!["id".to_string(), "actions".to_string()],
484            preserved_fields: vec![
485                "ordered actions".to_string(),
486                "tool calls".to_string(),
487                "model calls".to_string(),
488                "human approvals".to_string(),
489                "file mutations".to_string(),
490                "external API calls".to_string(),
491                "observed outputs".to_string(),
492                "costs".to_string(),
493                "timestamps".to_string(),
494                "source hashes".to_string(),
495                "Flow provenance references".to_string(),
496            ],
497        },
498        harn_code_path: None,
499        eval_pack_path: None,
500        segment_summary: None,
501        recovery_summary: None,
502    };
503
504    Ok(CrystallizationArtifacts {
505        report,
506        harn_code,
507        eval_pack_toml,
508    })
509}
510
511/// Synthesize a single-trace crystallization candidate without going
512/// through repeated-sequence mining. Used by the release-fixture ingest
513/// path where the trace is the workflow: there is exactly one example
514/// and the deterministic/agentic split comes from the source events
515/// directly. The resulting [`CrystallizationArtifacts`] is bundle-ready
516/// (`build_crystallization_bundle`) and validates with the existing
517/// validator.
518///
519/// `extra_parameters` and `extra_metadata` let callers seed parameters
520/// (e.g., release identity fields like `current_version` /
521/// `next_version`) and metadata (segment / recovery summaries) that the
522/// trace alone does not surface.
523pub fn synthesize_candidate_from_trace(
524    trace: CrystallizationTrace,
525    options: CrystallizeOptions,
526    extra_parameters: Vec<WorkflowCandidateParameter>,
527    segment_summary: Option<SegmentSummary>,
528    recovery_summary: Option<RecoveryFeedbackSummary>,
529) -> Result<CrystallizationArtifacts, VmError> {
530    if trace.actions.is_empty() {
531        return Err(VmError::Runtime(
532            "synthesize_candidate_from_trace requires a trace with at least one action".to_string(),
533        ));
534    }
535    let normalized = normalize_trace(trace);
536    let mut candidate = build_single_trace_candidate(&normalized, &options, extra_parameters);
537    candidate.shadow = shadow_candidate(&candidate, std::slice::from_ref(&normalized));
538    if !candidate.shadow.pass {
539        candidate
540            .rejection_reasons
541            .extend(candidate.shadow.failures.clone());
542    }
543    refresh_promotion_metadata(&mut candidate, 1, &options);
544    let (accepted, rejected) = if candidate.is_safe_to_propose() {
545        (vec![candidate], Vec::new())
546    } else {
547        (Vec::new(), vec![candidate])
548    };
549
550    let selected = accepted.first();
551    let harn_code = selected
552        .map(generate_harn_code)
553        .unwrap_or_else(|| rejected_workflow_stub(&rejected));
554    let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
555    let selected_id = selected.map(|candidate| candidate.id.clone());
556
557    let report = CrystallizationReport {
558        version: 1,
559        generated_at: now_rfc3339(),
560        source_trace_count: 1,
561        excluded_trace_count: 0,
562        selected_candidate_id: selected_id,
563        candidates: accepted,
564        rejected_candidates: rejected,
565        warnings: Vec::new(),
566        input_format: CrystallizationInputFormat {
567            name: "harn.crystallization.trace".to_string(),
568            version: TRACE_SCHEMA_VERSION,
569            required_fields: vec!["id".to_string(), "actions".to_string()],
570            preserved_fields: vec![
571                "ordered actions".to_string(),
572                "deterministic events".to_string(),
573                "agentic events".to_string(),
574                "tool/shell observations".to_string(),
575                "recovery advice".to_string(),
576                "release metadata".to_string(),
577                "source hashes".to_string(),
578            ],
579        },
580        harn_code_path: None,
581        eval_pack_path: None,
582        segment_summary,
583        recovery_summary,
584    };
585
586    Ok(CrystallizationArtifacts {
587        report,
588        harn_code,
589        eval_pack_toml,
590    })
591}
592
593/// Build a single-example candidate directly from a normalized trace.
594/// This is the single-trace analog of `mine_candidates`: every action
595/// becomes a step, every action's `fuzzy`/`model_call` flag drives the
596/// segment kind, and parameters are taken from each action's
597/// `parameters` map (plus any caller-supplied `extra_parameters`).
598fn build_single_trace_candidate(
599    trace: &CrystallizationTrace,
600    options: &CrystallizeOptions,
601    extra_parameters: Vec<WorkflowCandidateParameter>,
602) -> WorkflowCandidate {
603    let mut steps = Vec::with_capacity(trace.actions.len());
604    let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
605    let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
606    let mut warnings = Vec::new();
607    let sequence: Vec<String> = trace.actions.iter().map(action_signature).collect();
608
609    for (idx, action) in trace.actions.iter().enumerate() {
610        let mut parameter_refs = BTreeSet::new();
611        for (name, value) in &action.parameters {
612            if is_scalar(value) {
613                let ident = sanitize_identifier(name);
614                parameter_values
615                    .entry(ident.clone())
616                    .or_default()
617                    .insert(json_scalar_string(value));
618                parameter_paths
619                    .entry(ident.clone())
620                    .or_default()
621                    .insert(format!("steps[{idx}].parameters.{name}"));
622                parameter_refs.insert(ident);
623            }
624        }
625
626        let fuzzy = action.fuzzy.unwrap_or(false) || action.kind == "model_call";
627        if fuzzy {
628            warnings.push(format!(
629                "step {} '{}' remains fuzzy and requires review/LLM handling",
630                idx + 1,
631                action.name
632            ));
633        }
634
635        steps.push(WorkflowCandidateStep {
636            index: idx + 1,
637            kind: action.kind.clone(),
638            name: action.name.clone(),
639            segment: if fuzzy {
640                SegmentKind::Fuzzy
641            } else {
642                SegmentKind::Deterministic
643            },
644            parameter_refs: parameter_refs.into_iter().collect(),
645            constants: constants_for_action(action),
646            preconditions: preconditions_for_action(action),
647            side_effects: action.side_effects.clone(),
648            capabilities: sorted_strings(action.capabilities.iter().cloned()),
649            required_secrets: sorted_strings(action.required_secrets.iter().cloned()),
650            approval: action.approval.clone(),
651            expected_output: action
652                .observed_output
653                .clone()
654                .or_else(|| action.output.clone()),
655            review_notes: review_notes_for_action(action),
656        });
657    }
658
659    let mut parameters: Vec<WorkflowCandidateParameter> = parameter_values
660        .iter()
661        .map(|(name, values)| WorkflowCandidateParameter {
662            name: name.clone(),
663            source_paths: parameter_paths
664                .get(name)
665                .map(|paths| paths.iter().cloned().collect())
666                .unwrap_or_default(),
667            examples: values.iter().take(5).cloned().collect(),
668            required: true,
669        })
670        .collect();
671    // Caller-supplied extras (e.g. release metadata fields) take
672    // precedence and are appended in stable order. De-dupe by name so a
673    // caller-supplied parameter overrides any same-name parameter
674    // discovered from action parameters.
675    let extra_names: BTreeSet<String> = extra_parameters.iter().map(|p| p.name.clone()).collect();
676    parameters.retain(|p| !extra_names.contains(&p.name));
677    parameters.extend(extra_parameters);
678    parameters.sort_by(|left, right| left.name.cmp(&right.name));
679
680    let example_refs = vec![WorkflowCandidateExample {
681        trace_id: trace.id.clone(),
682        source_hash: trace.source_hash.clone().unwrap_or_default(),
683        start_index: 0,
684        action_ids: trace
685            .actions
686            .iter()
687            .map(|action| action.id.clone())
688            .collect(),
689    }];
690
691    let capabilities = sorted_strings(
692        steps
693            .iter()
694            .flat_map(|step| step.capabilities.iter().cloned()),
695    );
696    let required_secrets = sorted_strings(
697        steps
698            .iter()
699            .flat_map(|step| step.required_secrets.iter().cloned()),
700    );
701    let approval_points = steps
702        .iter()
703        .filter_map(|step| step.approval.clone())
704        .collect::<Vec<_>>();
705    let side_effects = sorted_side_effects(
706        steps
707            .iter()
708            .flat_map(|step| step.side_effects.iter().cloned())
709            .collect(),
710    );
711    let expected_outputs = steps
712        .iter()
713        .filter_map(|step| step.expected_output.clone())
714        .collect::<Vec<_>>();
715    let expected_receipts = expected_receipts_for_examples(std::slice::from_ref(trace), &[(0, 0)]);
716
717    let savings = estimate_savings(std::slice::from_ref(trace), &[(0, 0)], &steps);
718    let confidence = confidence_for(&[(0, 0)], 1, &steps, true);
719    let name = options
720        .workflow_name
721        .clone()
722        .unwrap_or_else(|| infer_workflow_name(&steps));
723    let package_name = options
724        .package_name
725        .clone()
726        .unwrap_or_else(|| name.replace('_', "-"));
727
728    WorkflowCandidate {
729        id: stable_candidate_id(&sequence, &example_refs),
730        name,
731        confidence,
732        cluster_key: cluster_key_for_candidate(std::slice::from_ref(trace), &[(0, 0)], &steps),
733        sequence_signature: sequence,
734        parameters,
735        steps,
736        examples: example_refs.clone(),
737        capabilities: capabilities.clone(),
738        required_secrets: required_secrets.clone(),
739        approval_points,
740        side_effects,
741        expected_outputs,
742        expected_receipts,
743        warnings,
744        rejection_reasons: Vec::new(),
745        promotion: PromotionMetadata {
746            source_trace_hashes: example_refs
747                .iter()
748                .map(|example| example.source_hash.clone())
749                .collect(),
750            author: options.author.clone(),
751            approver: options.approver.clone(),
752            created_at: now_rfc3339(),
753            version: "0.1.0".to_string(),
754            package_name,
755            capability_set: capabilities,
756            secrets_required: required_secrets,
757            rollback_target: Some("keep source trace and previous package version".to_string()),
758            eval_pack_link: options.eval_pack_link.clone(),
759            ..PromotionMetadata::default()
760        },
761        savings,
762        shadow: ShadowRunReport::default(),
763    }
764}
765
766fn review_notes_for_action(action: &CrystallizationAction) -> Vec<String> {
767    let mut notes = Vec::new();
768    if action.kind == "shell_failure"
769        || matches!(
770            action
771                .metadata
772                .get("success")
773                .and_then(|value| value.as_bool()),
774            Some(false)
775        )
776    {
777        notes.push(format!(
778            "shell/tool step '{}' failed in the source trace; reviewer should confirm \
779             whether deterministic recovery is possible before promotion.",
780            action.name
781        ));
782    }
783    if let Some(hint) = action
784        .metadata
785        .get("recovery_hint")
786        .and_then(|value| value.as_str())
787        .filter(|hint| !hint.trim().is_empty())
788    {
789        notes.push(format!("recovery hint from source trace: {hint}"));
790    }
791    if action.kind == "agent_recovery_advice" {
792        notes.push(
793            "agent-authored recovery advice; treat as advisory, never as deterministic truth."
794                .to_string(),
795        );
796    }
797    notes
798}
799
800pub fn load_crystallization_traces_from_dir(
801    dir: &Path,
802) -> Result<Vec<CrystallizationTrace>, VmError> {
803    let mut paths = Vec::new();
804    collect_json_paths(dir, &mut paths)?;
805    if paths.is_empty() {
806        return Err(VmError::Runtime(format!(
807            "no .json trace files found under {}",
808            dir.display()
809        )));
810    }
811    paths.sort();
812    paths
813        .iter()
814        .map(|path| load_crystallization_trace(path))
815        .collect()
816}
817
818pub fn load_crystallization_trace(path: &Path) -> Result<CrystallizationTrace, VmError> {
819    let content = std::fs::read_to_string(path).map_err(|error| {
820        VmError::Runtime(format!(
821            "failed to read crystallization trace {}: {error}",
822            path.display()
823        ))
824    })?;
825    let value: JsonValue = serde_json::from_str(&content).map_err(|error| {
826        VmError::Runtime(format!(
827            "failed to parse crystallization trace {}: {error}",
828            path.display()
829        ))
830    })?;
831
832    let mut trace = if value.get("actions").is_some() {
833        serde_json::from_value::<CrystallizationTrace>(value.clone()).map_err(|error| {
834            VmError::Runtime(format!(
835                "failed to decode crystallization trace {}: {error}",
836                path.display()
837            ))
838        })?
839    } else if value.get("stages").is_some() || value.get("_type") == Some(&json!("workflow_run")) {
840        let run: RunRecord = serde_json::from_value(value.clone()).map_err(|error| {
841            VmError::Runtime(format!(
842                "failed to decode run record {} as crystallization input: {error}",
843                path.display()
844            ))
845        })?;
846        trace_from_run_record(run)
847    } else {
848        return Err(VmError::Runtime(format!(
849            "{} is neither a crystallization trace nor a workflow run record",
850            path.display()
851        )));
852    };
853    if trace.source.is_none() {
854        trace.source = Some(path.display().to_string());
855    }
856    if trace.source_hash.is_none() {
857        trace.source_hash = Some(hash_bytes(content.as_bytes()));
858    }
859    Ok(normalize_trace(trace))
860}
861
862pub fn write_crystallization_artifacts(
863    mut artifacts: CrystallizationArtifacts,
864    workflow_path: &Path,
865    report_path: &Path,
866    eval_pack_path: Option<&Path>,
867) -> Result<CrystallizationReport, VmError> {
868    crate::atomic_io::atomic_write(workflow_path, artifacts.harn_code.as_bytes()).map_err(
869        |error| {
870            VmError::Runtime(format!(
871                "failed to write generated workflow {}: {error}",
872                workflow_path.display()
873            ))
874        },
875    )?;
876
877    artifacts.report.harn_code_path = Some(workflow_path.display().to_string());
878    if let Some(path) = eval_pack_path {
879        if !artifacts.eval_pack_toml.trim().is_empty() {
880            crate::atomic_io::atomic_write(path, artifacts.eval_pack_toml.as_bytes()).map_err(
881                |error| {
882                    VmError::Runtime(format!(
883                        "failed to write eval pack {}: {error}",
884                        path.display()
885                    ))
886                },
887            )?;
888            artifacts.report.eval_pack_path = Some(path.display().to_string());
889            if let Some(candidate) = artifacts.report.candidates.first_mut() {
890                candidate.promotion.eval_pack_link = Some(path.display().to_string());
891            }
892        }
893    }
894
895    let report_json = serde_json::to_string_pretty(&artifacts.report)
896        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
897    crate::atomic_io::atomic_write(report_path, report_json.as_bytes()).map_err(|error| {
898        VmError::Runtime(format!(
899            "failed to write crystallization report {}: {error}",
900            report_path.display()
901        ))
902    })?;
903    Ok(artifacts.report)
904}
905
906fn collect_json_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), VmError> {
907    let entries = std::fs::read_dir(dir).map_err(|error| {
908        VmError::Runtime(format!(
909            "failed to read crystallization trace dir {}: {error}",
910            dir.display()
911        ))
912    })?;
913    for entry in entries {
914        let entry = entry.map_err(|error| {
915            VmError::Runtime(format!(
916                "failed to read entry in trace dir {}: {error}",
917                dir.display()
918            ))
919        })?;
920        let path = entry.path();
921        if path.is_dir() {
922            collect_json_paths(&path, out)?;
923        } else if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
924            out.push(path);
925        }
926    }
927    Ok(())
928}
929
930fn trace_from_run_record(run: RunRecord) -> CrystallizationTrace {
931    let mut actions = Vec::new();
932    for stage in &run.stages {
933        actions.push(CrystallizationAction {
934            id: stage.id.clone(),
935            kind: if stage.kind.is_empty() {
936                "stage".to_string()
937            } else {
938                stage.kind.clone()
939            },
940            name: stage.node_id.clone(),
941            timestamp: Some(stage.started_at.clone()),
942            output: stage.visible_text.as_ref().map(|text| json!(text)),
943            observed_output: stage.visible_text.as_ref().map(|text| json!(text)),
944            duration_ms: stage.usage.as_ref().map(|usage| usage.total_duration_ms),
945            cost: stage
946                .usage
947                .as_ref()
948                .map(|usage| CrystallizationCost {
949                    model_calls: usage.call_count,
950                    input_tokens: usage.input_tokens,
951                    output_tokens: usage.output_tokens,
952                    total_cost_usd: usage.total_cost,
953                    wall_ms: usage.total_duration_ms,
954                    model: usage.models.first().cloned(),
955                })
956                .unwrap_or_default(),
957            deterministic: Some(
958                stage
959                    .usage
960                    .as_ref()
961                    .map(|usage| usage.call_count == 0)
962                    .unwrap_or(true),
963            ),
964            fuzzy: Some(
965                stage
966                    .usage
967                    .as_ref()
968                    .is_some_and(|usage| usage.call_count > 0),
969            ),
970            metadata: stage.metadata.clone(),
971            ..CrystallizationAction::default()
972        });
973    }
974    for tool in &run.tool_recordings {
975        actions.push(CrystallizationAction {
976            id: tool.tool_use_id.clone(),
977            kind: "tool_call".to_string(),
978            name: tool.tool_name.clone(),
979            timestamp: Some(tool.timestamp.clone()),
980            output: Some(json!(tool.result)),
981            observed_output: Some(json!(tool.result)),
982            duration_ms: Some(tool.duration_ms as i64),
983            deterministic: Some(!tool.is_rejected),
984            fuzzy: Some(false),
985            metadata: BTreeMap::from([
986                ("args_hash".to_string(), json!(tool.args_hash)),
987                ("iteration".to_string(), json!(tool.iteration)),
988                ("is_rejected".to_string(), json!(tool.is_rejected)),
989            ]),
990            ..CrystallizationAction::default()
991        });
992    }
993    for question in &run.hitl_questions {
994        actions.push(CrystallizationAction {
995            id: question.request_id.clone(),
996            kind: "human_approval".to_string(),
997            name: question.agent.clone(),
998            timestamp: Some(question.asked_at.clone()),
999            approval: Some(CrystallizationApproval {
1000                prompt: Some(question.prompt.clone()),
1001                required: true,
1002                boundary: Some("hitl".to_string()),
1003                ..CrystallizationApproval::default()
1004            }),
1005            deterministic: Some(false),
1006            fuzzy: Some(false),
1007            metadata: question
1008                .trace_id
1009                .as_ref()
1010                .map(|trace_id| BTreeMap::from([("trace_id".to_string(), json!(trace_id))]))
1011                .unwrap_or_default(),
1012            ..CrystallizationAction::default()
1013        });
1014    }
1015    actions.sort_by(|left, right| left.timestamp.cmp(&right.timestamp));
1016
1017    CrystallizationTrace {
1018        version: TRACE_SCHEMA_VERSION,
1019        id: run.id.clone(),
1020        workflow_id: Some(run.workflow_id.clone()),
1021        started_at: Some(run.started_at.clone()),
1022        finished_at: run.finished_at.clone(),
1023        actions,
1024        replay_run: run.replay_fixture.as_ref().map(replay_run_from_fixture),
1025        replay_allowlist: default_trace_replay_allowlist(),
1026        usage: run
1027            .usage
1028            .map(|usage| CrystallizationUsage {
1029                model_calls: usage.call_count,
1030                input_tokens: usage.input_tokens,
1031                output_tokens: usage.output_tokens,
1032                total_cost_usd: usage.total_cost,
1033                wall_ms: usage.total_duration_ms,
1034            })
1035            .unwrap_or_default(),
1036        metadata: run.metadata.clone(),
1037        ..CrystallizationTrace::default()
1038    }
1039}
1040
1041fn replay_run_from_fixture(fixture: &ReplayFixture) -> ReplayTraceRun {
1042    ReplayTraceRun {
1043        run_id: fixture.source_run_id.clone(),
1044        final_artifacts: fixture
1045            .stage_assertions
1046            .iter()
1047            .map(|assertion| {
1048                json!({
1049                    "node_id": assertion.node_id.clone(),
1050                    "expected_status": assertion.expected_status.clone(),
1051                    "expected_outcome": assertion.expected_outcome.clone(),
1052                    "expected_branch": assertion.expected_branch.clone(),
1053                    "required_artifact_kinds": assertion.required_artifact_kinds.clone(),
1054                    "visible_text_contains": assertion.visible_text_contains.clone(),
1055                })
1056            })
1057            .collect(),
1058        policy_decisions: vec![json!({
1059            "workflow_id": fixture.workflow_id.clone(),
1060            "expected_status": fixture.expected_status.clone(),
1061            "eval_kind": fixture.eval_kind.clone(),
1062        })],
1063        ..ReplayTraceRun::default()
1064    }
1065}
1066
1067fn default_trace_replay_allowlist() -> Vec<ReplayAllowlistRule> {
1068    vec![ReplayAllowlistRule {
1069        path: "/run_id".to_string(),
1070        reason: "run ids are allocated per execution".to_string(),
1071        replacement: None,
1072    }]
1073}
1074
1075fn normalize_trace(mut trace: CrystallizationTrace) -> CrystallizationTrace {
1076    if trace.version == 0 {
1077        trace.version = TRACE_SCHEMA_VERSION;
1078    }
1079    if trace.id.trim().is_empty() {
1080        trace.id = new_id("trace");
1081    }
1082    if trace.source_hash.is_none() {
1083        let payload = serde_json::to_vec(&trace.actions).unwrap_or_default();
1084        trace.source_hash = Some(hash_bytes(&payload));
1085    }
1086    for (idx, action) in trace.actions.iter_mut().enumerate() {
1087        if action.id.trim().is_empty() {
1088            action.id = format!("action_{}", idx + 1);
1089        }
1090        if action.kind.trim().is_empty() {
1091            action.kind = "action".to_string();
1092        }
1093        if action.name.trim().is_empty() {
1094            action.name = action.kind.clone();
1095        }
1096        action.capabilities.sort();
1097        action.capabilities.dedup();
1098        action.required_secrets.sort();
1099        action.required_secrets.dedup();
1100        action.side_effects = sorted_side_effects(std::mem::take(&mut action.side_effects));
1101        if action.cost.model_calls == 0 && action.kind == "model_call" {
1102            action.cost.model_calls = 1;
1103        }
1104    }
1105    if trace.usage == CrystallizationUsage::default() {
1106        for action in &trace.actions {
1107            trace.usage.model_calls += action.cost.model_calls;
1108            trace.usage.input_tokens += action.cost.input_tokens;
1109            trace.usage.output_tokens += action.cost.output_tokens;
1110            trace.usage.total_cost_usd += action.cost.total_cost_usd;
1111            trace.usage.wall_ms += action.cost.wall_ms + action.duration_ms.unwrap_or_default();
1112        }
1113    }
1114    trace
1115}
1116
1117fn partition_mineable_traces(
1118    traces: Vec<CrystallizationTrace>,
1119) -> (Vec<CrystallizationTrace>, Vec<CrystallizationTrace>) {
1120    traces
1121        .into_iter()
1122        .partition(|trace| trace_is_mineable(trace).is_ok())
1123}
1124
1125fn trace_is_mineable(trace: &CrystallizationTrace) -> Result<(), String> {
1126    if metadata_has_unresolved_policy_violation(&trace.metadata) {
1127        return Err("trace has unresolved policy violations".to_string());
1128    }
1129    for action in &trace.actions {
1130        if metadata_has_unresolved_policy_violation(&action.metadata) {
1131            return Err(format!(
1132                "action {} has unresolved policy violations",
1133                action.id
1134            ));
1135        }
1136        if action_has_nondeterministic_side_effect(action) {
1137            return Err(format!(
1138                "action {} has nondeterministic side effects",
1139                action.id
1140            ));
1141        }
1142    }
1143    Ok(())
1144}
1145
1146fn metadata_has_unresolved_policy_violation(metadata: &BTreeMap<String, JsonValue>) -> bool {
1147    for (key, value) in metadata {
1148        let lower = key.to_ascii_lowercase();
1149        if lower.contains("unresolved_policy_violation")
1150            || lower == "policy_violation_unresolved"
1151            || lower == "policy_violation"
1152        {
1153            if value.as_bool() == Some(true) {
1154                return true;
1155            }
1156            if value
1157                .as_str()
1158                .is_some_and(|text| text.eq_ignore_ascii_case("unresolved"))
1159            {
1160                return true;
1161            }
1162            if value.as_array().is_some_and(|items| !items.is_empty()) {
1163                return true;
1164            }
1165        }
1166        if lower == "policy_status"
1167            && value
1168                .as_str()
1169                .is_some_and(|text| text.eq_ignore_ascii_case("unresolved"))
1170        {
1171            return true;
1172        }
1173    }
1174    false
1175}
1176
1177fn action_has_nondeterministic_side_effect(action: &CrystallizationAction) -> bool {
1178    if action.side_effects.is_empty() {
1179        return false;
1180    }
1181    if action
1182        .metadata
1183        .get("nondeterministic_side_effect")
1184        .and_then(JsonValue::as_bool)
1185        == Some(true)
1186    {
1187        return true;
1188    }
1189    if action.deterministic == Some(false) && action.fuzzy.unwrap_or(false) {
1190        return true;
1191    }
1192    action.side_effects.iter().any(|effect| {
1193        effect
1194            .metadata
1195            .get("nondeterministic")
1196            .and_then(JsonValue::as_bool)
1197            == Some(true)
1198    })
1199}
1200
1201fn mine_candidates(
1202    traces: &[CrystallizationTrace],
1203    min_examples: usize,
1204    options: &CrystallizeOptions,
1205) -> Vec<WorkflowCandidate> {
1206    let signatures = traces
1207        .iter()
1208        .map(|trace| {
1209            trace
1210                .actions
1211                .iter()
1212                .map(action_signature)
1213                .collect::<Vec<_>>()
1214        })
1215        .collect::<Vec<_>>();
1216    let Some((sequence, examples)) = best_repeated_sequence(&signatures, min_examples) else {
1217        return Vec::new();
1218    };
1219
1220    let mut example_refs = Vec::new();
1221    for (trace_index, start_index) in &examples {
1222        let trace = &traces[*trace_index];
1223        example_refs.push(WorkflowCandidateExample {
1224            trace_id: trace.id.clone(),
1225            source_hash: trace.source_hash.clone().unwrap_or_default(),
1226            start_index: *start_index,
1227            action_ids: trace.actions[*start_index..*start_index + sequence.len()]
1228                .iter()
1229                .map(|action| action.id.clone())
1230                .collect(),
1231        });
1232    }
1233
1234    let mut steps = Vec::new();
1235    let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1236    let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1237    let mut rejection_reasons = Vec::new();
1238    let mut warnings = Vec::new();
1239
1240    for step_index in 0..sequence.len() {
1241        let actions = examples
1242            .iter()
1243            .map(|(trace_index, start_index)| {
1244                &traces[*trace_index].actions[*start_index + step_index]
1245            })
1246            .collect::<Vec<_>>();
1247        let first = actions[0];
1248        if !compatible_side_effects(&actions) {
1249            rejection_reasons.push(format!(
1250                "step {} '{}' has divergent side effects across examples",
1251                step_index + 1,
1252                first.name
1253            ));
1254        }
1255
1256        let mut parameter_refs = BTreeSet::new();
1257        for action in &actions {
1258            for (name, value) in &action.parameters {
1259                if is_scalar(value) {
1260                    parameter_values
1261                        .entry(sanitize_identifier(name))
1262                        .or_default()
1263                        .insert(json_scalar_string(value));
1264                    parameter_paths
1265                        .entry(sanitize_identifier(name))
1266                        .or_default()
1267                        .insert(format!("steps[{step_index}].parameters.{name}"));
1268                    parameter_refs.insert(sanitize_identifier(name));
1269                }
1270            }
1271        }
1272        collect_varying_parameters(
1273            &actions,
1274            "inputs",
1275            |action| &action.inputs,
1276            &mut parameter_values,
1277            &mut parameter_paths,
1278            &mut parameter_refs,
1279        );
1280
1281        let fuzzy = first.fuzzy.unwrap_or(false)
1282            || first.kind == "model_call"
1283            || actions.iter().any(|action| action.fuzzy.unwrap_or(false));
1284        if fuzzy {
1285            warnings.push(format!(
1286                "step {} '{}' remains fuzzy and requires review/LLM handling",
1287                step_index + 1,
1288                first.name
1289            ));
1290        }
1291
1292        steps.push(WorkflowCandidateStep {
1293            index: step_index + 1,
1294            kind: first.kind.clone(),
1295            name: first.name.clone(),
1296            segment: if fuzzy {
1297                SegmentKind::Fuzzy
1298            } else {
1299                SegmentKind::Deterministic
1300            },
1301            parameter_refs: parameter_refs.into_iter().collect(),
1302            constants: constants_for_action(first),
1303            preconditions: preconditions_for_action(first),
1304            side_effects: first.side_effects.clone(),
1305            capabilities: sorted_strings(first.capabilities.iter().cloned()),
1306            required_secrets: sorted_strings(first.required_secrets.iter().cloned()),
1307            approval: first.approval.clone(),
1308            expected_output: stable_expected_output(&actions),
1309            review_notes: Vec::new(),
1310        });
1311    }
1312
1313    let parameters = parameter_values
1314        .iter()
1315        .map(|(name, values)| WorkflowCandidateParameter {
1316            name: name.clone(),
1317            source_paths: parameter_paths
1318                .get(name)
1319                .map(|paths| paths.iter().cloned().collect())
1320                .unwrap_or_default(),
1321            examples: values.iter().take(5).cloned().collect(),
1322            required: true,
1323        })
1324        .collect::<Vec<_>>();
1325
1326    let capabilities = sorted_strings(
1327        steps
1328            .iter()
1329            .flat_map(|step| step.capabilities.iter().cloned()),
1330    );
1331    let required_secrets = sorted_strings(
1332        steps
1333            .iter()
1334            .flat_map(|step| step.required_secrets.iter().cloned()),
1335    );
1336    let approval_points = steps
1337        .iter()
1338        .filter_map(|step| step.approval.clone())
1339        .collect::<Vec<_>>();
1340    let side_effects = sorted_side_effects(
1341        steps
1342            .iter()
1343            .flat_map(|step| step.side_effects.iter().cloned())
1344            .collect(),
1345    );
1346    let expected_outputs = steps
1347        .iter()
1348        .filter_map(|step| step.expected_output.clone())
1349        .collect::<Vec<_>>();
1350    let expected_receipts = expected_receipts_for_examples(traces, &examples);
1351    let savings = estimate_savings(traces, &examples, &steps);
1352    let confidence = confidence_for(
1353        &examples,
1354        traces.len(),
1355        &steps,
1356        rejection_reasons.is_empty(),
1357    );
1358    let name = options
1359        .workflow_name
1360        .clone()
1361        .unwrap_or_else(|| infer_workflow_name(&steps));
1362    let package_name = options
1363        .package_name
1364        .clone()
1365        .unwrap_or_else(|| name.replace('_', "-"));
1366
1367    vec![WorkflowCandidate {
1368        id: stable_candidate_id(&sequence, &example_refs),
1369        name,
1370        confidence,
1371        cluster_key: cluster_key_for_candidate(traces, &examples, &steps),
1372        sequence_signature: sequence,
1373        parameters,
1374        steps,
1375        examples: example_refs.clone(),
1376        capabilities: capabilities.clone(),
1377        required_secrets: required_secrets.clone(),
1378        approval_points,
1379        side_effects,
1380        expected_outputs,
1381        expected_receipts,
1382        warnings,
1383        rejection_reasons,
1384        promotion: PromotionMetadata {
1385            source_trace_hashes: example_refs
1386                .iter()
1387                .map(|example| example.source_hash.clone())
1388                .collect(),
1389            author: options.author.clone(),
1390            approver: options.approver.clone(),
1391            created_at: now_rfc3339(),
1392            version: "0.1.0".to_string(),
1393            package_name,
1394            capability_set: capabilities,
1395            secrets_required: required_secrets,
1396            rollback_target: Some("keep source traces and previous package version".to_string()),
1397            eval_pack_link: options.eval_pack_link.clone(),
1398            ..PromotionMetadata::default()
1399        },
1400        savings,
1401        shadow: ShadowRunReport::default(),
1402    }]
1403}
1404
1405fn best_repeated_sequence(
1406    signatures: &[Vec<String>],
1407    min_examples: usize,
1408) -> Option<RepeatedSequence> {
1409    let mut occurrences: BTreeMap<Vec<String>, Vec<(usize, usize)>> = BTreeMap::new();
1410    for (trace_index, trace_signatures) in signatures.iter().enumerate() {
1411        for start in 0..trace_signatures.len() {
1412            let max_len = (trace_signatures.len() - start).min(12);
1413            for len in 2..=max_len {
1414                let sequence = trace_signatures[start..start + len].to_vec();
1415                occurrences
1416                    .entry(sequence)
1417                    .or_default()
1418                    .push((trace_index, start));
1419            }
1420        }
1421    }
1422
1423    occurrences
1424        .into_iter()
1425        .filter_map(|(sequence, positions)| {
1426            let mut seen = BTreeSet::new();
1427            let mut examples = Vec::new();
1428            for (trace_index, start) in positions {
1429                if seen.insert(trace_index) {
1430                    examples.push((trace_index, start));
1431                }
1432            }
1433            if examples.len() >= min_examples {
1434                Some((sequence, examples))
1435            } else {
1436                None
1437            }
1438        })
1439        .max_by(
1440            |(left_sequence, left_examples), (right_sequence, right_examples)| {
1441                left_examples
1442                    .len()
1443                    .cmp(&right_examples.len())
1444                    .then_with(|| left_sequence.len().cmp(&right_sequence.len()))
1445            },
1446        )
1447}
1448
1449fn action_signature(action: &CrystallizationAction) -> String {
1450    let mut parameter_keys = action.parameters.keys().cloned().collect::<Vec<_>>();
1451    parameter_keys.sort();
1452    format!(
1453        "{}:{}:{}",
1454        action.kind,
1455        action.name,
1456        parameter_keys.join(",")
1457    )
1458}
1459
1460fn compatible_side_effects(actions: &[&CrystallizationAction]) -> bool {
1461    let first = sorted_side_effects(actions[0].side_effects.clone());
1462    actions
1463        .iter()
1464        .skip(1)
1465        .all(|action| sorted_side_effects(action.side_effects.clone()) == first)
1466}
1467
1468fn collect_varying_parameters(
1469    actions: &[&CrystallizationAction],
1470    root: &str,
1471    value_for: impl Fn(&CrystallizationAction) -> &JsonValue,
1472    parameter_values: &mut BTreeMap<String, BTreeSet<String>>,
1473    parameter_paths: &mut BTreeMap<String, BTreeSet<String>>,
1474    parameter_refs: &mut BTreeSet<String>,
1475) {
1476    let mut paths = BTreeMap::<String, Vec<JsonValue>>::new();
1477    for action in actions {
1478        collect_scalar_paths(value_for(action), root, &mut paths);
1479    }
1480    for (path, values) in paths {
1481        if values.len() != actions.len() {
1482            continue;
1483        }
1484        let unique = values
1485            .iter()
1486            .map(json_scalar_string)
1487            .collect::<BTreeSet<_>>();
1488        if unique.len() < 2 {
1489            continue;
1490        }
1491        let name = parameter_name_for_path(&path);
1492        parameter_values
1493            .entry(name.clone())
1494            .or_default()
1495            .extend(unique);
1496        parameter_paths
1497            .entry(name.clone())
1498            .or_default()
1499            .insert(path);
1500        parameter_refs.insert(name);
1501    }
1502}
1503
1504fn collect_scalar_paths(
1505    value: &JsonValue,
1506    prefix: &str,
1507    out: &mut BTreeMap<String, Vec<JsonValue>>,
1508) {
1509    match value {
1510        JsonValue::Object(map) => {
1511            for (key, child) in map {
1512                collect_scalar_paths(child, &format!("{prefix}.{key}"), out);
1513            }
1514        }
1515        JsonValue::Array(items) => {
1516            for (idx, child) in items.iter().enumerate() {
1517                collect_scalar_paths(child, &format!("{prefix}[{idx}]"), out);
1518            }
1519        }
1520        _ if is_scalar(value) => {
1521            out.entry(prefix.to_string())
1522                .or_default()
1523                .push(value.clone());
1524        }
1525        _ => {}
1526    }
1527}
1528
1529fn parameter_name_for_path(path: &str) -> String {
1530    let lower = path.to_ascii_lowercase();
1531    for (needle, name) in [
1532        ("version", "version"),
1533        ("repo_path", "repo_path"),
1534        ("repo", "repo_path"),
1535        ("branch_name", "branch_name"),
1536        ("branch", "branch_name"),
1537        ("release_target", "release_target"),
1538        ("target", "release_target"),
1539    ] {
1540        if lower.contains(needle) {
1541            return name.to_string();
1542        }
1543    }
1544    let tail = path
1545        .rsplit(['.', '['])
1546        .next()
1547        .unwrap_or("param")
1548        .trim_end_matches(']');
1549    sanitize_identifier(tail)
1550}
1551
1552fn constants_for_action(action: &CrystallizationAction) -> BTreeMap<String, JsonValue> {
1553    let mut constants = BTreeMap::new();
1554    constants.insert("kind".to_string(), json!(action.kind));
1555    constants.insert("name".to_string(), json!(action.name));
1556    if action.deterministic.unwrap_or(false) {
1557        constants.insert("deterministic".to_string(), json!(true));
1558    }
1559    constants
1560}
1561
1562fn preconditions_for_action(action: &CrystallizationAction) -> Vec<String> {
1563    let mut out = Vec::new();
1564    for capability in &action.capabilities {
1565        out.push(format!("capability '{capability}' is available"));
1566    }
1567    for secret in &action.required_secrets {
1568        out.push(format!("secret '{secret}' is configured"));
1569    }
1570    if let Some(approval) = &action.approval {
1571        if approval.required {
1572            out.push("human approval boundary is preserved".to_string());
1573        }
1574    }
1575    out
1576}
1577
1578fn stable_expected_output(actions: &[&CrystallizationAction]) -> Option<JsonValue> {
1579    let first = actions[0]
1580        .observed_output
1581        .as_ref()
1582        .or(actions[0].output.as_ref())?;
1583    if actions
1584        .iter()
1585        .all(|action| action.observed_output.as_ref().or(action.output.as_ref()) == Some(first))
1586    {
1587        Some(first.clone())
1588    } else {
1589        None
1590    }
1591}
1592
1593fn expected_receipts_for_examples(
1594    traces: &[CrystallizationTrace],
1595    examples: &[(usize, usize)],
1596) -> Vec<JsonValue> {
1597    examples
1598        .iter()
1599        .find_map(|(trace_index, _)| {
1600            traces
1601                .get(*trace_index)
1602                .and_then(|trace| trace.replay_run.as_ref())
1603                .map(|run| run.effect_receipts.clone())
1604                .filter(|receipts| !receipts.is_empty())
1605        })
1606        .unwrap_or_default()
1607}
1608
1609fn cluster_key_for_candidate(
1610    traces: &[CrystallizationTrace],
1611    examples: &[(usize, usize)],
1612    steps: &[WorkflowCandidateStep],
1613) -> WorkflowClusterKey {
1614    let goal = examples.iter().find_map(|(trace_index, _)| {
1615        traces.get(*trace_index).and_then(|trace| {
1616            trace
1617                .metadata
1618                .get("goal")
1619                .and_then(JsonValue::as_str)
1620                .map(str::to_string)
1621                .or_else(|| trace.workflow_id.clone())
1622        })
1623    });
1624    let tool_sequence = steps
1625        .iter()
1626        .filter(|step| step.kind == "tool_call" || step.kind == "file_mutation")
1627        .map(|step| step.name.clone())
1628        .collect::<Vec<_>>();
1629    let touched_artifact_types = sorted_strings(
1630        steps
1631            .iter()
1632            .flat_map(|step| step.side_effects.iter().map(artifact_type_for_side_effect)),
1633    );
1634    let success_criteria = sorted_strings(examples.iter().filter_map(|(trace_index, _)| {
1635        traces
1636            .get(*trace_index)
1637            .and_then(|trace| trace.metadata.get("success_criteria"))
1638            .and_then(JsonValue::as_str)
1639            .map(str::to_string)
1640    }));
1641
1642    WorkflowClusterKey {
1643        goal,
1644        tool_sequence,
1645        touched_artifact_types,
1646        success_criteria,
1647    }
1648}
1649
1650fn artifact_type_for_side_effect(effect: &CrystallizationSideEffect) -> String {
1651    if let Some(kind) = effect
1652        .metadata
1653        .get("artifact_type")
1654        .and_then(JsonValue::as_str)
1655        .filter(|kind| !kind.trim().is_empty())
1656    {
1657        return kind.to_string();
1658    }
1659    let lower_kind = effect.kind.to_ascii_lowercase();
1660    if lower_kind.contains("git") {
1661        "git".to_string()
1662    } else if lower_kind.contains("file") {
1663        Path::new(&effect.target)
1664            .extension()
1665            .and_then(|ext| ext.to_str())
1666            .map(|ext| format!("file:{ext}"))
1667            .unwrap_or_else(|| "file".to_string())
1668    } else if lower_kind.contains("receipt") {
1669        "receipt".to_string()
1670    } else if lower_kind.contains("publish") {
1671        "package_publish".to_string()
1672    } else if !lower_kind.is_empty() {
1673        lower_kind
1674    } else {
1675        "unknown".to_string()
1676    }
1677}
1678
1679fn refresh_promotion_metadata(
1680    candidate: &mut WorkflowCandidate,
1681    min_examples: usize,
1682    options: &CrystallizeOptions,
1683) {
1684    let min_confidence = if options.promotion_min_confidence > 0.0 {
1685        options.promotion_min_confidence
1686    } else {
1687        DEFAULT_PROMOTION_MIN_CONFIDENCE
1688    };
1689    let shadow_success_count = candidate
1690        .shadow
1691        .traces
1692        .iter()
1693        .filter(|trace| trace.pass)
1694        .count();
1695    let shadow_failure_count = candidate
1696        .shadow
1697        .traces
1698        .len()
1699        .saturating_sub(shadow_success_count);
1700    let sample_count = candidate
1701        .shadow
1702        .compared_traces
1703        .max(candidate.examples.len());
1704    let divergence_history = candidate
1705        .shadow
1706        .traces
1707        .iter()
1708        .filter(|trace| !trace.pass)
1709        .flat_map(divergence_records_for_shadow_trace)
1710        .collect::<Vec<_>>();
1711    let source_trace_hashes = sorted_strings(
1712        candidate
1713            .promotion
1714            .source_trace_hashes
1715            .iter()
1716            .cloned()
1717            .chain(
1718                candidate
1719                    .shadow
1720                    .traces
1721                    .iter()
1722                    .map(|trace| trace.source_hash.clone()),
1723            ),
1724    );
1725    let approval_history = options
1726        .approver
1727        .as_ref()
1728        .filter(|approver| !approver.trim().is_empty())
1729        .map(|approver| {
1730            vec![PromotionApprovalRecord {
1731                actor: approver.clone(),
1732                decision: "approved_for_shadow_promotion".to_string(),
1733                recorded_at: now_rfc3339(),
1734                reason: Some("approver supplied in crystallization options".to_string()),
1735            }]
1736        })
1737        .unwrap_or_default();
1738    let mut criteria = PromotionCriteria {
1739        min_examples,
1740        min_confidence,
1741        requires_shadow_pass: true,
1742        requires_no_rejections: true,
1743        requires_human_approval: true,
1744        approval_reason: Some(
1745            "workflow candidates start in shadow mode and require explicit human approval before promotion".to_string(),
1746        ),
1747        ..PromotionCriteria::default()
1748    };
1749    if sample_count < min_examples {
1750        criteria.reasons.push(format!(
1751            "sample count {} is below required minimum {min_examples}",
1752            sample_count
1753        ));
1754    }
1755    if candidate.confidence < min_confidence {
1756        criteria.reasons.push(format!(
1757            "confidence {:.2} is below required minimum {:.2}",
1758            candidate.confidence, min_confidence
1759        ));
1760    }
1761    if !candidate.shadow.pass {
1762        criteria
1763            .reasons
1764            .push("shadow comparison did not pass".to_string());
1765    }
1766    if !candidate.rejection_reasons.is_empty() {
1767        criteria
1768            .reasons
1769            .push("candidate has rejection reasons".to_string());
1770    }
1771    if approval_history.is_empty() {
1772        criteria
1773            .reasons
1774            .push("human approval has not been recorded".to_string());
1775    }
1776    criteria.status = if !candidate.rejection_reasons.is_empty()
1777        || !candidate.shadow.pass
1778        || sample_count < min_examples
1779        || candidate.confidence < min_confidence
1780    {
1781        PromotionStatus::Blocked
1782    } else if approval_history.is_empty() {
1783        PromotionStatus::NeedsApproval
1784    } else {
1785        PromotionStatus::Ready
1786    };
1787
1788    candidate.promotion.sample_count = sample_count;
1789    candidate.promotion.source_trace_hashes = source_trace_hashes;
1790    candidate.promotion.confidence = candidate.confidence;
1791    candidate.promotion.shadow_success_count = shadow_success_count;
1792    candidate.promotion.shadow_failure_count = shadow_failure_count;
1793    candidate.promotion.divergence_history = divergence_history;
1794    candidate.promotion.approval_history = approval_history;
1795    candidate.promotion.criteria = criteria;
1796    candidate.promotion.estimated_time_token_savings = candidate.savings.clone();
1797}
1798
1799fn divergence_records_for_shadow_trace(
1800    trace: &ShadowTraceResult,
1801) -> Vec<PromotionDivergenceRecord> {
1802    if let Some(report) = &trace.replay_oracle {
1803        if let Some(divergence) = &report.divergence {
1804            return vec![PromotionDivergenceRecord {
1805                trace_id: trace.trace_id.clone(),
1806                path: Some(divergence.path.clone()),
1807                message: divergence.message.clone(),
1808                left: Some(divergence.left.clone()),
1809                right: Some(divergence.right.clone()),
1810            }];
1811        }
1812    }
1813    trace
1814        .details
1815        .iter()
1816        .map(|detail| PromotionDivergenceRecord {
1817            trace_id: trace.trace_id.clone(),
1818            path: None,
1819            message: detail.clone(),
1820            left: None,
1821            right: None,
1822        })
1823        .collect()
1824}
1825
1826fn shadow_candidate(
1827    candidate: &WorkflowCandidate,
1828    traces: &[CrystallizationTrace],
1829) -> ShadowRunReport {
1830    let mut failures = Vec::new();
1831    let mut results = Vec::new();
1832    let mut compared_trace_ids = BTreeSet::new();
1833    for example in &candidate.examples {
1834        let Some(trace) = traces.iter().find(|trace| trace.id == example.trace_id) else {
1835            failures.push(format!("missing source trace {}", example.trace_id));
1836            continue;
1837        };
1838        compared_trace_ids.insert(trace.id.clone());
1839        let result = shadow_trace_result(candidate, trace, example.start_index);
1840        if !result.pass {
1841            failures.push(format!("trace {} failed shadow comparison", trace.id));
1842        }
1843        results.push(result);
1844    }
1845
1846    for trace in traces {
1847        if compared_trace_ids.contains(&trace.id) {
1848            continue;
1849        }
1850        if let Some(start_index) = find_sequence_start(trace, &candidate.sequence_signature) {
1851            compared_trace_ids.insert(trace.id.clone());
1852            let result = shadow_trace_result(candidate, trace, start_index);
1853            if !result.pass {
1854                failures.push(format!("trace {} failed shadow comparison", trace.id));
1855            }
1856            results.push(result);
1857        }
1858    }
1859
1860    ShadowRunReport {
1861        pass: failures.is_empty(),
1862        compared_traces: results.len(),
1863        failures,
1864        traces: results,
1865    }
1866}
1867
1868fn find_sequence_start(trace: &CrystallizationTrace, sequence: &[String]) -> Option<usize> {
1869    if sequence.is_empty() || trace.actions.len() < sequence.len() {
1870        return None;
1871    }
1872    trace
1873        .actions
1874        .windows(sequence.len())
1875        .position(|window| window.iter().map(action_signature).collect::<Vec<_>>() == sequence)
1876}
1877
1878fn shadow_trace_result(
1879    candidate: &WorkflowCandidate,
1880    trace: &CrystallizationTrace,
1881    start_index: usize,
1882) -> ShadowTraceResult {
1883    let mut details = Vec::new();
1884    let end = start_index + candidate.steps.len();
1885    if end > trace.actions.len() {
1886        details.push("candidate sequence extends past trace action list".to_string());
1887    } else {
1888        let signatures = trace.actions[start_index..end]
1889            .iter()
1890            .map(action_signature)
1891            .collect::<Vec<_>>();
1892        if signatures != candidate.sequence_signature {
1893            details.push("action sequence signature changed".to_string());
1894        }
1895        for (offset, step) in candidate.steps.iter().enumerate() {
1896            let action = &trace.actions[start_index + offset];
1897            if sorted_side_effects(action.side_effects.clone()) != step.side_effects {
1898                details.push(format!(
1899                    "step {} side effects differ for action {}",
1900                    step.index, action.id
1901                ));
1902            }
1903            if action.approval.as_ref().map(|approval| approval.required)
1904                != step.approval.as_ref().map(|approval| approval.required)
1905            {
1906                details.push(format!("step {} approval boundary differs", step.index));
1907            }
1908            if step.segment == SegmentKind::Deterministic {
1909                if let Some(expected) = &step.expected_output {
1910                    let actual = action.observed_output.as_ref().or(action.output.as_ref());
1911                    if actual != Some(expected) {
1912                        details.push(format!("step {} deterministic output differs", step.index));
1913                    }
1914                }
1915            }
1916        }
1917    }
1918
1919    let (replay_oracle, compared_receipts) = replay_oracle_for_shadow(candidate, trace);
1920    if let Some(report) = &replay_oracle {
1921        if !report.passed {
1922            if let Some(divergence) = &report.divergence {
1923                details.push(format!(
1924                    "receipt replay diverged at {}: {}",
1925                    divergence.path, divergence.message
1926                ));
1927            } else {
1928                details.push("receipt replay oracle did not pass".to_string());
1929            }
1930        }
1931    }
1932
1933    ShadowTraceResult {
1934        trace_id: trace.id.clone(),
1935        source_hash: trace.source_hash.clone().unwrap_or_default(),
1936        pass: details.is_empty(),
1937        details,
1938        compared_receipts,
1939        replay_oracle,
1940    }
1941}
1942
1943fn replay_oracle_for_shadow(
1944    candidate: &WorkflowCandidate,
1945    trace: &CrystallizationTrace,
1946) -> (Option<ReplayOracleReport>, usize) {
1947    let Some(first_run) = trace.replay_run.as_ref() else {
1948        return (None, 0);
1949    };
1950    if first_run.effect_receipts.is_empty() && candidate.expected_receipts.is_empty() {
1951        return (None, 0);
1952    }
1953    let mut second_run = first_run.clone();
1954    second_run.run_id = format!("shadow_{}_{}", candidate.id, trace.id);
1955    second_run.effect_receipts = candidate.expected_receipts.clone();
1956    let compared_receipts = first_run
1957        .effect_receipts
1958        .len()
1959        .max(second_run.effect_receipts.len());
1960    let oracle_trace = ReplayOracleTrace {
1961        name: format!("{}_shadow_receipts_{}", candidate.name, trace.id),
1962        description: Some(
1963            "crystallization shadow receipt comparison using the replay oracle".to_string(),
1964        ),
1965        expect: ReplayExpectation::Match,
1966        allowlist: trace.replay_allowlist.clone(),
1967        first_run: first_run.clone(),
1968        second_run,
1969        ..ReplayOracleTrace::default()
1970    };
1971    let oracle_name = oracle_trace.name.clone();
1972    let second_run_counts = oracle_trace.second_run.counts();
1973    let report = match run_replay_oracle_trace(&oracle_trace) {
1974        Ok(report) => report,
1975        Err(error) => ReplayOracleReport {
1976            name: oracle_name,
1977            expectation: ReplayExpectation::Match,
1978            passed: false,
1979            first_run_counts: first_run.counts(),
1980            second_run_counts,
1981            protocol_fixture_refs: Vec::new(),
1982            divergence: Some(super::ReplayDivergence {
1983                path: "/".to_string(),
1984                left: JsonValue::Null,
1985                right: JsonValue::Null,
1986                message: error.to_string(),
1987            }),
1988        },
1989    };
1990    (Some(report), compared_receipts)
1991}
1992
1993fn estimate_savings(
1994    traces: &[CrystallizationTrace],
1995    examples: &[(usize, usize)],
1996    steps: &[WorkflowCandidateStep],
1997) -> SavingsEstimate {
1998    let mut estimate = SavingsEstimate::default();
1999    for (trace_index, start_index) in examples {
2000        let trace = &traces[*trace_index];
2001        for action in &trace.actions[*start_index..*start_index + steps.len()] {
2002            if action.kind == "model_call" || action.fuzzy.unwrap_or(false) {
2003                estimate.remaining_model_calls += action.cost.model_calls.max(1);
2004            } else {
2005                estimate.model_calls_avoided += action.cost.model_calls;
2006                estimate.input_tokens_avoided += action.cost.input_tokens;
2007                estimate.output_tokens_avoided += action.cost.output_tokens;
2008                estimate.estimated_cost_usd_avoided += action.cost.total_cost_usd;
2009                estimate.wall_ms_avoided +=
2010                    action.cost.wall_ms + action.duration_ms.unwrap_or_default();
2011            }
2012        }
2013    }
2014    estimate.cpu_runtime_cost_usd = 0.0;
2015    estimate
2016}
2017
2018fn confidence_for(
2019    examples: &[(usize, usize)],
2020    trace_count: usize,
2021    steps: &[WorkflowCandidateStep],
2022    safe: bool,
2023) -> f64 {
2024    if !safe || trace_count == 0 {
2025        return 0.0;
2026    }
2027    let coverage = examples.len() as f64 / trace_count as f64;
2028    let deterministic = steps
2029        .iter()
2030        .filter(|step| step.segment == SegmentKind::Deterministic)
2031        .count() as f64
2032        / steps.len().max(1) as f64;
2033    ((coverage * 0.65) + (deterministic * 0.35)).min(0.99)
2034}
2035
2036fn infer_workflow_name(steps: &[WorkflowCandidateStep]) -> String {
2037    let names = steps
2038        .iter()
2039        .map(|step| step.name.to_ascii_lowercase())
2040        .collect::<Vec<_>>()
2041        .join("_");
2042    if names.contains("version") || names.contains("release") {
2043        "crystallized_version_bump".to_string()
2044    } else {
2045        "crystallized_workflow".to_string()
2046    }
2047}
2048
2049pub fn generate_harn_code(candidate: &WorkflowCandidate) -> String {
2050    let mut out = String::new();
2051    let params = if candidate.parameters.is_empty() {
2052        "task".to_string()
2053    } else {
2054        candidate
2055            .parameters
2056            .iter()
2057            .map(|parameter| parameter.name.as_str())
2058            .collect::<Vec<_>>()
2059            .join(", ")
2060    };
2061    writeln!(out, "/**").unwrap();
2062    writeln!(
2063        out,
2064        " * Generated by harn crystallize. Review before promotion."
2065    )
2066    .unwrap();
2067    writeln!(out, " * Candidate: {}", candidate.id).unwrap();
2068    writeln!(
2069        out,
2070        " * Source trace hashes: {}",
2071        candidate.promotion.source_trace_hashes.join(", ")
2072    )
2073    .unwrap();
2074    writeln!(
2075        out,
2076        " * Capabilities: {}",
2077        if candidate.capabilities.is_empty() {
2078            "none".to_string()
2079        } else {
2080            candidate.capabilities.join(", ")
2081        }
2082    )
2083    .unwrap();
2084    writeln!(
2085        out,
2086        " * Required secrets: {}",
2087        if candidate.required_secrets.is_empty() {
2088            "none".to_string()
2089        } else {
2090            candidate.required_secrets.join(", ")
2091        }
2092    )
2093    .unwrap();
2094    writeln!(out, " */").unwrap();
2095    writeln!(out, "pipeline {}({}) {{", candidate.name, params).unwrap();
2096    writeln!(out, "  let review_warnings = []").unwrap();
2097    for step in &candidate.steps {
2098        writeln!(out, "  // Step {}: {} {}", step.index, step.kind, step.name).unwrap();
2099        for side_effect in &step.side_effects {
2100            writeln!(
2101                out,
2102                "  // side_effect: {} {}",
2103                side_effect.kind, side_effect.target
2104            )
2105            .unwrap();
2106        }
2107        if let Some(approval) = &step.approval {
2108            if approval.required {
2109                writeln!(
2110                    out,
2111                    "  // approval_required: {}",
2112                    approval
2113                        .boundary
2114                        .clone()
2115                        .unwrap_or_else(|| "human_review".to_string())
2116                )
2117                .unwrap();
2118            }
2119        }
2120        if step.segment == SegmentKind::Fuzzy {
2121            writeln!(
2122                out,
2123                "  // TODO: fuzzy segment still needs LLM/reviewer handling before deterministic promotion."
2124            )
2125            .unwrap();
2126            writeln!(
2127                out,
2128                "  review_warnings.push(\"fuzzy step: {}\")",
2129                escape_harn_string(&step.name)
2130            )
2131            .unwrap();
2132        }
2133        writeln!(
2134            out,
2135            "  log(\"crystallized step {}: {}\")",
2136            step.index,
2137            escape_harn_string(&step.name)
2138        )
2139        .unwrap();
2140    }
2141    writeln!(
2142        out,
2143        "  return {{status: \"shadow_ready\", candidate_id: \"{}\", review_warnings: review_warnings}}",
2144        escape_harn_string(&candidate.id)
2145    )
2146    .unwrap();
2147    writeln!(out, "}}").unwrap();
2148    out
2149}
2150
2151fn rejected_workflow_stub(rejected: &[WorkflowCandidate]) -> String {
2152    let mut out = String::new();
2153    writeln!(
2154        out,
2155        "// Generated by harn crystallize. No safe candidate was proposed."
2156    )
2157    .unwrap();
2158    writeln!(out, "pipeline crystallized_workflow(task) {{").unwrap();
2159    writeln!(out, "  log(\"no safe crystallization candidate\")").unwrap();
2160    writeln!(
2161        out,
2162        "  return {{status: \"rejected\", rejected_candidates: {}}}",
2163        rejected.len()
2164    )
2165    .unwrap();
2166    writeln!(out, "}}").unwrap();
2167    out
2168}
2169
2170pub fn generate_eval_pack(candidate: &WorkflowCandidate) -> String {
2171    let mut out = String::new();
2172    writeln!(out, "version = 1").unwrap();
2173    writeln!(
2174        out,
2175        "id = \"{}-crystallization\"",
2176        candidate.promotion.package_name
2177    )
2178    .unwrap();
2179    writeln!(
2180        out,
2181        "name = \"{} crystallization shadow evals\"",
2182        candidate.name
2183    )
2184    .unwrap();
2185    writeln!(out).unwrap();
2186    writeln!(out, "[package]").unwrap();
2187    writeln!(out, "name = \"{}\"", candidate.promotion.package_name).unwrap();
2188    writeln!(out, "version = \"{}\"", candidate.promotion.version).unwrap();
2189    writeln!(out).unwrap();
2190    for example in &candidate.examples {
2191        writeln!(out, "[[fixtures]]").unwrap();
2192        writeln!(out, "id = \"{}\"", example.trace_id).unwrap();
2193        writeln!(out, "kind = \"jsonl-trace\"").unwrap();
2194        writeln!(out, "trace_id = \"{}\"", example.trace_id).unwrap();
2195        writeln!(out).unwrap();
2196    }
2197    writeln!(out, "[[rubrics]]").unwrap();
2198    writeln!(out, "id = \"shadow-determinism\"").unwrap();
2199    writeln!(out, "kind = \"deterministic\"").unwrap();
2200    writeln!(out).unwrap();
2201    writeln!(out, "[[rubrics.assertions]]").unwrap();
2202    writeln!(out, "kind = \"crystallization-shadow\"").unwrap();
2203    writeln!(
2204        out,
2205        "expected = {{ candidate_id = \"{}\", pass = true }}",
2206        candidate.id
2207    )
2208    .unwrap();
2209    writeln!(out).unwrap();
2210    writeln!(out, "[[cases]]").unwrap();
2211    writeln!(out, "id = \"{}-shadow\"", candidate.name).unwrap();
2212    writeln!(out, "name = \"{} shadow replay\"", candidate.name).unwrap();
2213    writeln!(out, "rubrics = [\"shadow-determinism\"]").unwrap();
2214    writeln!(out, "severity = \"blocking\"").unwrap();
2215    out
2216}
2217
2218fn stable_candidate_id(sequence: &[String], examples: &[WorkflowCandidateExample]) -> String {
2219    let mut hasher = Sha256::new();
2220    for item in sequence {
2221        hasher.update(item.as_bytes());
2222        hasher.update([0]);
2223    }
2224    for example in examples {
2225        hasher.update(example.source_hash.as_bytes());
2226        hasher.update([0]);
2227    }
2228    format!("candidate_{}", hex_prefix(hasher.finalize().as_slice(), 16))
2229}
2230
2231fn hash_bytes(bytes: &[u8]) -> String {
2232    let mut hasher = Sha256::new();
2233    hasher.update(bytes);
2234    format!("sha256:{}", hex::encode(hasher.finalize()))
2235}
2236
2237fn hex_prefix(bytes: &[u8], chars: usize) -> String {
2238    hex::encode(bytes).chars().take(chars).collect::<String>()
2239}
2240
2241fn sorted_strings(items: impl Iterator<Item = String>) -> Vec<String> {
2242    let mut set = items.collect::<BTreeSet<_>>();
2243    set.retain(|item| !item.trim().is_empty());
2244    set.into_iter().collect()
2245}
2246
2247fn sorted_side_effects(items: Vec<CrystallizationSideEffect>) -> Vec<CrystallizationSideEffect> {
2248    let mut items = items
2249        .into_iter()
2250        .filter(|item| !item.kind.trim().is_empty() || !item.target.trim().is_empty())
2251        .collect::<Vec<_>>();
2252    items.sort_by_key(side_effect_sort_key);
2253    items.dedup_by(|left, right| side_effect_sort_key(left) == side_effect_sort_key(right));
2254    items
2255}
2256
2257fn side_effect_sort_key(item: &CrystallizationSideEffect) -> String {
2258    format!(
2259        "{}\x1f{}\x1f{}\x1f{}",
2260        item.kind,
2261        item.target,
2262        item.capability.clone().unwrap_or_default(),
2263        item.mutation.clone().unwrap_or_default()
2264    )
2265}
2266
2267fn is_scalar(value: &JsonValue) -> bool {
2268    matches!(
2269        value,
2270        JsonValue::String(_) | JsonValue::Number(_) | JsonValue::Bool(_) | JsonValue::Null
2271    )
2272}
2273
2274fn json_scalar_string(value: &JsonValue) -> String {
2275    match value {
2276        JsonValue::String(value) => value.clone(),
2277        other => other.to_string(),
2278    }
2279}
2280
2281fn sanitize_identifier(raw: &str) -> String {
2282    let mut out = String::new();
2283    for (idx, ch) in raw.chars().enumerate() {
2284        if ch.is_ascii_alphanumeric() || ch == '_' {
2285            if idx == 0 && ch.is_ascii_digit() {
2286                out.push('_');
2287            }
2288            out.push(ch.to_ascii_lowercase());
2289        } else if !out.ends_with('_') {
2290            out.push('_');
2291        }
2292    }
2293    let trimmed = out.trim_matches('_').to_string();
2294    if trimmed.is_empty() {
2295        "param".to_string()
2296    } else {
2297        trimmed
2298    }
2299}
2300
2301fn escape_harn_string(value: &str) -> String {
2302    value.replace('\\', "\\\\").replace('"', "\\\"")
2303}
2304
2305// ===== Crystallization bundle =====
2306//
2307// A bundle is a directory layout that Harn writes and Harn Cloud (or any
2308// other importer) reads without bespoke glue. The contract is:
2309//
2310//   bundle/
2311//     candidate.json        # versioned manifest documented below
2312//     workflow.harn         # generated/reviewable workflow code
2313//     report.json           # full mining/shadow/eval report
2314//     harn.eval.toml        # generated eval pack when available (optional)
2315//     fixtures/             # redacted replay fixtures referenced by the
2316//                           # report (optional, only when --bundle is used
2317//                           # with `harn crystallize` and traces were on disk)
2318//
2319// `candidate.json` is the authoritative manifest. It must include the
2320// `schema` and `schema_version` markers. Cloud importers MUST reject any
2321// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
2322// or whose `schema_version` is greater than the highest version they
2323// understand. Only the documented additive fields may be added without
2324// bumping `schema_version`.
2325
2326#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
2327#[serde(default)]
2328pub struct BundleGenerator {
2329    pub tool: String,
2330    pub version: String,
2331}
2332
2333impl Default for BundleGenerator {
2334    fn default() -> Self {
2335        Self {
2336            tool: "harn".to_string(),
2337            version: env!("CARGO_PKG_VERSION").to_string(),
2338        }
2339    }
2340}
2341
2342#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2343#[serde(default)]
2344pub struct BundleWorkflowRef {
2345    /// Relative path inside the bundle directory.
2346    pub path: String,
2347    /// Short identifier used in `pipeline NAME(...)`.
2348    pub name: String,
2349    /// Logical package name promotion uses to register the workflow.
2350    pub package_name: String,
2351    /// Initial workflow version proposed for promotion.
2352    pub package_version: String,
2353}
2354
2355#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2356#[serde(default)]
2357pub struct BundleSourceTrace {
2358    pub trace_id: String,
2359    pub source_hash: String,
2360    /// Optional human-visible URL (PR, issue, run record path) for the
2361    /// trace. `None` when the trace was loaded from an in-memory store.
2362    pub source_url: Option<String>,
2363    /// Optional cloud-side receipt id when the trace was already promoted
2364    /// into a tenant receipt. Cloud importers use this to wire candidate
2365    /// evidence to existing receipts without round-tripping the raw payload.
2366    pub source_receipt_id: Option<String>,
2367    /// Relative path of the redacted fixture inside the bundle, if one
2368    /// was emitted.
2369    pub fixture_path: Option<String>,
2370}
2371
2372#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
2373#[serde(default)]
2374pub struct BundleStep {
2375    pub index: usize,
2376    pub kind: String,
2377    pub name: String,
2378    pub segment: SegmentKind,
2379    pub parameter_refs: Vec<String>,
2380    pub side_effects: Vec<CrystallizationSideEffect>,
2381    pub capabilities: Vec<String>,
2382    pub required_secrets: Vec<String>,
2383    pub approval: Option<CrystallizationApproval>,
2384    pub review_notes: Vec<String>,
2385}
2386
2387impl BundleStep {
2388    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
2389        Self {
2390            index: step.index,
2391            kind: step.kind.clone(),
2392            name: step.name.clone(),
2393            segment: step.segment.clone(),
2394            parameter_refs: step.parameter_refs.clone(),
2395            side_effects: step.side_effects.clone(),
2396            capabilities: step.capabilities.clone(),
2397            required_secrets: step.required_secrets.clone(),
2398            approval: step.approval.clone(),
2399            review_notes: step.review_notes.clone(),
2400        }
2401    }
2402}
2403
2404#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2405#[serde(default)]
2406pub struct BundleEvalPackRef {
2407    /// Relative path of the eval pack inside the bundle directory.
2408    pub path: String,
2409    /// Optional external link the eval pack also lives at (e.g. a hosted
2410    /// `eval-pack://` URI when the bundle was promoted into a tenant).
2411    pub link: Option<String>,
2412}
2413
2414#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2415#[serde(default)]
2416pub struct BundleFixtureRef {
2417    pub path: String,
2418    pub trace_id: String,
2419    pub source_hash: String,
2420    pub redacted: bool,
2421}
2422
2423#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
2424#[serde(default)]
2425pub struct BundlePromotion {
2426    pub owner: Option<String>,
2427    pub approver: Option<String>,
2428    pub author: Option<String>,
2429    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
2430    /// surfaces may extend this enum but must keep existing values stable.
2431    pub rollout_policy: String,
2432    pub rollback_target: Option<String>,
2433    pub created_at: String,
2434    pub workflow_version: String,
2435    pub package_name: String,
2436    pub sample_count: usize,
2437    pub confidence: f64,
2438    pub shadow_success_count: usize,
2439    pub shadow_failure_count: usize,
2440    pub divergence_history: Vec<PromotionDivergenceRecord>,
2441    pub approval_history: Vec<PromotionApprovalRecord>,
2442    pub criteria: PromotionCriteria,
2443    pub estimated_time_token_savings: SavingsEstimate,
2444}
2445
2446impl Default for BundlePromotion {
2447    fn default() -> Self {
2448        Self {
2449            owner: None,
2450            approver: None,
2451            author: None,
2452            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
2453            rollback_target: None,
2454            created_at: String::new(),
2455            workflow_version: String::new(),
2456            package_name: String::new(),
2457            sample_count: 0,
2458            confidence: 0.0,
2459            shadow_success_count: 0,
2460            shadow_failure_count: 0,
2461            divergence_history: Vec::new(),
2462            approval_history: Vec::new(),
2463            criteria: PromotionCriteria::default(),
2464            estimated_time_token_savings: SavingsEstimate::default(),
2465        }
2466    }
2467}
2468
2469#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2470#[serde(default)]
2471pub struct BundleRedactionSummary {
2472    pub applied: bool,
2473    pub rules: Vec<String>,
2474    pub summary: String,
2475    /// Number of fixture files copied into the bundle (0 when no fixture
2476    /// directory was emitted).
2477    pub fixture_count: usize,
2478}
2479
2480#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
2481#[serde(default)]
2482pub struct CrystallizationBundleManifest {
2483    pub schema: String,
2484    pub schema_version: u32,
2485    pub generated_at: String,
2486    pub generator: BundleGenerator,
2487    pub kind: BundleKind,
2488    pub candidate_id: String,
2489    pub external_key: String,
2490    pub title: String,
2491    pub team: Option<String>,
2492    pub repo: Option<String>,
2493    pub risk_level: String,
2494    pub workflow: BundleWorkflowRef,
2495    pub source_trace_hashes: Vec<String>,
2496    pub source_traces: Vec<BundleSourceTrace>,
2497    pub deterministic_steps: Vec<BundleStep>,
2498    pub fuzzy_steps: Vec<BundleStep>,
2499    pub side_effects: Vec<CrystallizationSideEffect>,
2500    pub capabilities: Vec<String>,
2501    pub required_secrets: Vec<String>,
2502    pub savings: SavingsEstimate,
2503    pub shadow: ShadowRunReport,
2504    pub eval_pack: Option<BundleEvalPackRef>,
2505    pub fixtures: Vec<BundleFixtureRef>,
2506    pub promotion: BundlePromotion,
2507    pub redaction: BundleRedactionSummary,
2508    pub confidence: f64,
2509    pub rejection_reasons: Vec<String>,
2510    pub warnings: Vec<String>,
2511}
2512
2513#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2514#[serde(rename_all = "snake_case")]
2515pub enum BundleKind {
2516    /// A normal candidate that passed shadow comparison and is ready for
2517    /// review and promotion.
2518    #[default]
2519    Candidate,
2520    /// A "plan-only" candidate: every step has a side-effect-free, in-process
2521    /// outcome (e.g. classify and write a receipt). Cloud importers can
2522    /// promote these without explicit external-side-effect approval.
2523    PlanOnly,
2524    /// No safe candidate was selected. The bundle still records what was
2525    /// attempted, the rejection reasons, and any rejected candidates so
2526    /// reviewers can debug or feed it back into mining.
2527    Rejected,
2528}
2529
2530#[derive(Clone, Debug, Default)]
2531pub struct BundleOptions {
2532    /// Stable identifier downstream cloud importers use to dedupe bundles
2533    /// across runs (defaults to a sanitized workflow name).
2534    pub external_key: Option<String>,
2535    pub title: Option<String>,
2536    pub team: Option<String>,
2537    pub repo: Option<String>,
2538    pub risk_level: Option<String>,
2539    pub rollout_policy: Option<String>,
2540}
2541
2542#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
2543#[serde(default)]
2544pub struct CrystallizationBundle {
2545    pub manifest: CrystallizationBundleManifest,
2546    pub report: CrystallizationReport,
2547    pub harn_code: String,
2548    pub eval_pack_toml: String,
2549    pub fixtures: Vec<CrystallizationTrace>,
2550}
2551
2552/// Errors surfaced when validating a bundle on disk.
2553#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2554#[serde(default)]
2555pub struct BundleValidation {
2556    pub bundle_dir: String,
2557    pub schema: String,
2558    pub schema_version: u32,
2559    pub kind: BundleKind,
2560    pub candidate_id: String,
2561    pub manifest_ok: bool,
2562    pub workflow_ok: bool,
2563    pub report_ok: bool,
2564    pub eval_pack_ok: bool,
2565    pub fixtures_ok: bool,
2566    pub redaction_ok: bool,
2567    pub problems: Vec<String>,
2568}
2569
2570impl BundleValidation {
2571    pub fn is_ok(&self) -> bool {
2572        self.problems.is_empty()
2573    }
2574}
2575
2576/// Build an in-memory bundle from already-mined artifacts. The traces
2577/// passed here are the same normalized traces used to mine the candidate;
2578/// they will be redacted before being attached as fixtures.
2579pub fn build_crystallization_bundle(
2580    artifacts: CrystallizationArtifacts,
2581    traces: &[CrystallizationTrace],
2582    options: BundleOptions,
2583) -> Result<CrystallizationBundle, VmError> {
2584    let CrystallizationArtifacts {
2585        report,
2586        harn_code,
2587        eval_pack_toml,
2588    } = artifacts;
2589
2590    let (selected, kind) = match report
2591        .selected_candidate_id
2592        .as_deref()
2593        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2594    {
2595        Some(candidate) => {
2596            let kind = if candidate_is_plan_only(candidate) {
2597                BundleKind::PlanOnly
2598            } else {
2599                BundleKind::Candidate
2600            };
2601            (Some(candidate), kind)
2602        }
2603        None => (None, BundleKind::Rejected),
2604    };
2605
2606    let workflow_name = selected
2607        .map(|candidate| candidate.name.clone())
2608        .unwrap_or_else(|| "crystallized_workflow".to_string());
2609    let package_name = selected
2610        .map(|candidate| candidate.promotion.package_name.clone())
2611        .unwrap_or_else(|| workflow_name.replace('_', "-"));
2612    let workflow_version = selected
2613        .map(|candidate| candidate.promotion.version.clone())
2614        .unwrap_or_else(|| "0.0.0".to_string());
2615
2616    let manifest_workflow = BundleWorkflowRef {
2617        path: BUNDLE_WORKFLOW_FILE.to_string(),
2618        name: workflow_name.clone(),
2619        package_name: package_name.clone(),
2620        package_version: workflow_version.clone(),
2621    };
2622
2623    let external_key = options
2624        .external_key
2625        .clone()
2626        .filter(|key| !key.trim().is_empty())
2627        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
2628    let title = options
2629        .title
2630        .clone()
2631        .filter(|title| !title.trim().is_empty())
2632        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
2633    let risk_level = options
2634        .risk_level
2635        .clone()
2636        .filter(|risk| !risk.trim().is_empty())
2637        .unwrap_or_else(|| infer_risk_level(selected));
2638    let rollout_policy = options
2639        .rollout_policy
2640        .clone()
2641        .filter(|policy| !policy.trim().is_empty())
2642        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
2643
2644    let (deterministic_steps, fuzzy_steps) = match selected {
2645        Some(candidate) => candidate
2646            .steps
2647            .iter()
2648            .map(BundleStep::from_candidate_step)
2649            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
2650        None => (Vec::new(), Vec::new()),
2651    };
2652
2653    let source_trace_hashes = selected
2654        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
2655        .unwrap_or_default();
2656
2657    let mut source_traces = Vec::new();
2658    let mut fixture_refs = Vec::new();
2659    let mut fixture_payloads = Vec::new();
2660    if let Some(candidate) = selected {
2661        let mut fixture_trace_ids = BTreeSet::new();
2662        for example in &candidate.examples {
2663            fixture_trace_ids.insert(example.trace_id.clone());
2664        }
2665        for trace in traces {
2666            if find_sequence_start(trace, &candidate.sequence_signature).is_some() {
2667                fixture_trace_ids.insert(trace.id.clone());
2668            }
2669        }
2670        for trace_id in fixture_trace_ids {
2671            let trace = traces.iter().find(|trace| trace.id == trace_id);
2672            let source_hash = trace
2673                .and_then(|trace| trace.source_hash.clone())
2674                .or_else(|| {
2675                    candidate
2676                        .examples
2677                        .iter()
2678                        .find(|example| example.trace_id == trace_id)
2679                        .map(|example| example.source_hash.clone())
2680                })
2681                .unwrap_or_default();
2682            let fixture_relative = trace.map(|trace| {
2683                format!(
2684                    "{BUNDLE_FIXTURES_DIR}/{}.json",
2685                    sanitize_fixture_name(&trace.id)
2686                )
2687            });
2688            source_traces.push(BundleSourceTrace {
2689                trace_id: trace_id.clone(),
2690                source_hash: source_hash.clone(),
2691                source_url: trace.and_then(|trace| trace.source.clone()),
2692                source_receipt_id: trace
2693                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
2694                    .and_then(|value| value.as_str().map(str::to_string)),
2695                fixture_path: fixture_relative.clone(),
2696            });
2697            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
2698                let mut redacted = trace.clone();
2699                redact_trace_for_bundle(&mut redacted);
2700                fixture_refs.push(BundleFixtureRef {
2701                    path: fixture_path,
2702                    trace_id: trace.id.clone(),
2703                    source_hash,
2704                    redacted: true,
2705                });
2706                fixture_payloads.push(redacted);
2707            }
2708        }
2709    }
2710
2711    // Owner defaults to author so cloud importers always have a populated
2712    // ownership pointer, but stays separate from `author` so reviewers can
2713    // assign a different owner in the manifest before promotion.
2714    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
2715    let promotion = BundlePromotion {
2716        owner: author.clone(),
2717        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
2718        author,
2719        rollout_policy,
2720        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
2721        created_at: now_rfc3339(),
2722        workflow_version,
2723        package_name: package_name.clone(),
2724        sample_count: selected
2725            .map(|candidate| candidate.promotion.sample_count)
2726            .unwrap_or_default(),
2727        confidence: selected
2728            .map(|candidate| candidate.promotion.confidence)
2729            .unwrap_or_default(),
2730        shadow_success_count: selected
2731            .map(|candidate| candidate.promotion.shadow_success_count)
2732            .unwrap_or_default(),
2733        shadow_failure_count: selected
2734            .map(|candidate| candidate.promotion.shadow_failure_count)
2735            .unwrap_or_default(),
2736        divergence_history: selected
2737            .map(|candidate| candidate.promotion.divergence_history.clone())
2738            .unwrap_or_default(),
2739        approval_history: selected
2740            .map(|candidate| candidate.promotion.approval_history.clone())
2741            .unwrap_or_default(),
2742        criteria: selected
2743            .map(|candidate| candidate.promotion.criteria.clone())
2744            .unwrap_or_default(),
2745        estimated_time_token_savings: selected
2746            .map(|candidate| candidate.promotion.estimated_time_token_savings.clone())
2747            .unwrap_or_default(),
2748    };
2749
2750    let redaction = BundleRedactionSummary {
2751        applied: !fixture_payloads.is_empty(),
2752        rules: vec![
2753            "sensitive_keys".to_string(),
2754            "secret_value_heuristic".to_string(),
2755        ],
2756        summary: if fixture_payloads.is_empty() {
2757            "no fixtures emitted".to_string()
2758        } else {
2759            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
2760                .to_string()
2761        },
2762        fixture_count: fixture_payloads.len(),
2763    };
2764
2765    let eval_pack = if eval_pack_toml.trim().is_empty() {
2766        None
2767    } else {
2768        Some(BundleEvalPackRef {
2769            path: BUNDLE_EVAL_PACK_FILE.to_string(),
2770            link: selected
2771                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
2772                .filter(|link| !link.trim().is_empty()),
2773        })
2774    };
2775
2776    let manifest = CrystallizationBundleManifest {
2777        schema: BUNDLE_SCHEMA.to_string(),
2778        schema_version: BUNDLE_SCHEMA_VERSION,
2779        generated_at: now_rfc3339(),
2780        generator: BundleGenerator::default(),
2781        kind,
2782        candidate_id: selected
2783            .map(|candidate| candidate.id.clone())
2784            .unwrap_or_default(),
2785        external_key,
2786        title,
2787        team: options.team,
2788        repo: options.repo,
2789        risk_level,
2790        workflow: manifest_workflow,
2791        source_trace_hashes,
2792        source_traces,
2793        deterministic_steps,
2794        fuzzy_steps,
2795        side_effects: selected
2796            .map(|candidate| candidate.side_effects.clone())
2797            .unwrap_or_default(),
2798        capabilities: selected
2799            .map(|candidate| candidate.capabilities.clone())
2800            .unwrap_or_default(),
2801        required_secrets: selected
2802            .map(|candidate| candidate.required_secrets.clone())
2803            .unwrap_or_default(),
2804        savings: selected
2805            .map(|candidate| candidate.savings.clone())
2806            .unwrap_or_default(),
2807        shadow: selected
2808            .map(|candidate| candidate.shadow.clone())
2809            .unwrap_or_default(),
2810        eval_pack,
2811        fixtures: fixture_refs,
2812        promotion,
2813        redaction,
2814        confidence: selected
2815            .map(|candidate| candidate.confidence)
2816            .unwrap_or(0.0),
2817        rejection_reasons: report
2818            .rejected_candidates
2819            .iter()
2820            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
2821            .collect(),
2822        warnings: report.warnings.clone(),
2823    };
2824
2825    Ok(CrystallizationBundle {
2826        manifest,
2827        report,
2828        harn_code,
2829        eval_pack_toml,
2830        fixtures: fixture_payloads,
2831    })
2832}
2833
2834/// Write a bundle to a directory. Creates the directory if it does not
2835/// already exist. Returns the manifest with `generated_at` and any
2836/// runtime-resolved metadata filled in.
2837pub fn write_crystallization_bundle(
2838    bundle: &CrystallizationBundle,
2839    bundle_dir: &Path,
2840) -> Result<CrystallizationBundleManifest, VmError> {
2841    std::fs::create_dir_all(bundle_dir).map_err(|error| {
2842        VmError::Runtime(format!(
2843            "failed to create bundle dir {}: {error}",
2844            bundle_dir.display()
2845        ))
2846    })?;
2847    write_bytes(
2848        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
2849        bundle.harn_code.as_bytes(),
2850    )?;
2851    let report_json = serde_json::to_vec_pretty(&bundle.report)
2852        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
2853    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
2854
2855    if !bundle.eval_pack_toml.trim().is_empty() {
2856        write_bytes(
2857            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
2858            bundle.eval_pack_toml.as_bytes(),
2859        )?;
2860    }
2861
2862    if !bundle.fixtures.is_empty() {
2863        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
2864        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
2865            VmError::Runtime(format!(
2866                "failed to create fixtures dir {}: {error}",
2867                fixtures_dir.display()
2868            ))
2869        })?;
2870        for trace in &bundle.fixtures {
2871            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
2872            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
2873                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
2874            })?;
2875            write_bytes(&path, &payload)?;
2876        }
2877    }
2878
2879    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
2880        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
2881    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
2882    Ok(bundle.manifest.clone())
2883}
2884
2885/// Read a bundle manifest from disk. Verifies the schema marker but does
2886/// not cross-check workflow/report/eval-pack sibling files; for a richer
2887/// check use [`validate_crystallization_bundle`].
2888pub fn load_crystallization_bundle_manifest(
2889    bundle_dir: &Path,
2890) -> Result<CrystallizationBundleManifest, VmError> {
2891    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
2892    let bytes = std::fs::read(&manifest_path).map_err(|error| {
2893        VmError::Runtime(format!(
2894            "failed to read bundle manifest {}: {error}",
2895            manifest_path.display()
2896        ))
2897    })?;
2898    let manifest: CrystallizationBundleManifest =
2899        serde_json::from_slice(&bytes).map_err(|error| {
2900            VmError::Runtime(format!(
2901                "failed to decode bundle manifest {}: {error}",
2902                manifest_path.display()
2903            ))
2904        })?;
2905    if manifest.schema != BUNDLE_SCHEMA {
2906        return Err(VmError::Runtime(format!(
2907            "bundle {} has unrecognized schema {:?} (expected {})",
2908            bundle_dir.display(),
2909            manifest.schema,
2910            BUNDLE_SCHEMA
2911        )));
2912    }
2913    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
2914        return Err(VmError::Runtime(format!(
2915            "bundle {} schema_version {} is newer than supported {}",
2916            bundle_dir.display(),
2917            manifest.schema_version,
2918            BUNDLE_SCHEMA_VERSION
2919        )));
2920    }
2921    Ok(manifest)
2922}
2923
2924/// Read every fixture trace referenced by the bundle manifest. Returns
2925/// the manifest plus loaded traces, in the order they appear in the
2926/// manifest. Fixtures with `path: None` are skipped.
2927pub fn load_crystallization_bundle(
2928    bundle_dir: &Path,
2929) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
2930    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
2931    let mut traces = Vec::new();
2932    for fixture in &manifest.fixtures {
2933        let path = bundle_dir.join(&fixture.path);
2934        traces.push(load_crystallization_trace(&path)?);
2935    }
2936    Ok((manifest, traces))
2937}
2938
2939/// Validate a bundle directory layout and contents. Cheap enough to call
2940/// from a CLI smoke command; performs no live side effects.
2941pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
2942    let mut validation = BundleValidation {
2943        bundle_dir: bundle_dir.display().to_string(),
2944        ..BundleValidation::default()
2945    };
2946    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
2947        Ok(manifest) => manifest,
2948        Err(error) => {
2949            validation.problems.push(error.to_string());
2950            return Ok(validation);
2951        }
2952    };
2953    validation.manifest_ok = true;
2954    validation.schema = manifest.schema.clone();
2955    validation.schema_version = manifest.schema_version;
2956    validation.kind = manifest.kind.clone();
2957    validation.candidate_id = manifest.candidate_id.clone();
2958
2959    let workflow_path = bundle_dir.join(&manifest.workflow.path);
2960    if workflow_path.exists() {
2961        validation.workflow_ok = true;
2962    } else {
2963        validation
2964            .problems
2965            .push(format!("missing workflow file {}", workflow_path.display()));
2966    }
2967
2968    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2969    match std::fs::read(&report_path) {
2970        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
2971            Ok(report) => {
2972                validation.report_ok = true;
2973                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2974                    && manifest.candidate_id.is_empty()
2975                {
2976                    validation
2977                        .problems
2978                        .push("manifest is non-rejected but has empty candidate_id".to_string());
2979                }
2980                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2981                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
2982                {
2983                    validation.problems.push(format!(
2984                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
2985                        report.selected_candidate_id, manifest.candidate_id
2986                    ));
2987                }
2988            }
2989            Err(error) => {
2990                validation
2991                    .problems
2992                    .push(format!("invalid report.json: {error}"));
2993            }
2994        },
2995        Err(error) => {
2996            validation.problems.push(format!(
2997                "missing report file {}: {error}",
2998                report_path.display()
2999            ));
3000        }
3001    }
3002
3003    if let Some(eval_pack) = &manifest.eval_pack {
3004        let path = bundle_dir.join(&eval_pack.path);
3005        if path.exists() {
3006            validation.eval_pack_ok = true;
3007        } else {
3008            validation.problems.push(format!(
3009                "manifest references eval pack {} but file is missing",
3010                path.display()
3011            ));
3012        }
3013    } else {
3014        validation.eval_pack_ok = true;
3015    }
3016
3017    let mut fixtures_problem = false;
3018    for fixture in &manifest.fixtures {
3019        let path = bundle_dir.join(&fixture.path);
3020        if !path.exists() {
3021            validation
3022                .problems
3023                .push(format!("missing fixture {}", path.display()));
3024            fixtures_problem = true;
3025            continue;
3026        }
3027        if !fixture.redacted {
3028            validation.problems.push(format!(
3029                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
3030                fixture.path
3031            ));
3032            fixtures_problem = true;
3033        }
3034    }
3035    validation.fixtures_ok = !fixtures_problem;
3036
3037    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
3038        validation
3039            .problems
3040            .push("redaction.applied is false but bundle includes fixtures".to_string());
3041    } else {
3042        validation.redaction_ok = true;
3043    }
3044    if !manifest
3045        .required_secrets
3046        .iter()
3047        .all(|secret| secret_id_looks_logical(secret))
3048    {
3049        validation.problems.push(
3050            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
3051        );
3052    }
3053
3054    Ok(validation)
3055}
3056
3057/// Replay shadow comparison from a bundle: re-runs the deterministic
3058/// shadow check in-process against the bundle's redacted fixtures, with
3059/// no live side effects. Returns the manifest and the freshly computed
3060/// `ShadowRunReport`. The returned report is suitable for cloud import or
3061/// for asserting determinism in CI.
3062pub fn shadow_replay_bundle(
3063    bundle_dir: &Path,
3064) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
3065    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
3066    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
3067    let bytes = std::fs::read(&report_path).map_err(|error| {
3068        VmError::Runtime(format!(
3069            "failed to read bundle report {}: {error}",
3070            report_path.display()
3071        ))
3072    })?;
3073    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
3074        VmError::Runtime(format!(
3075            "failed to decode bundle report {}: {error}",
3076            report_path.display()
3077        ))
3078    })?;
3079    let candidate = report
3080        .selected_candidate_id
3081        .as_deref()
3082        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
3083        .ok_or_else(|| {
3084            VmError::Runtime(format!(
3085                "bundle {} has no selected candidate to replay",
3086                bundle_dir.display()
3087            ))
3088        })?;
3089    let shadow = shadow_candidate(candidate, &traces);
3090    Ok((manifest, shadow))
3091}
3092
3093fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
3094    crate::atomic_io::atomic_write(path, bytes)
3095        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
3096}
3097
3098fn sanitize_fixture_name(raw: &str) -> String {
3099    let cleaned = raw
3100        .chars()
3101        .map(|ch| {
3102            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
3103                ch
3104            } else {
3105                '_'
3106            }
3107        })
3108        .collect::<String>();
3109    if cleaned.trim_matches('_').is_empty() {
3110        "trace".to_string()
3111    } else {
3112        cleaned.trim_matches('_').to_string()
3113    }
3114}
3115
3116fn sanitize_external_key(raw: &str) -> String {
3117    let mut out = String::new();
3118    let mut prev_dash = false;
3119    for ch in raw.chars() {
3120        let lowered = ch.to_ascii_lowercase();
3121        if lowered.is_ascii_alphanumeric() {
3122            out.push(lowered);
3123            prev_dash = false;
3124        } else if !prev_dash && !out.is_empty() {
3125            out.push('-');
3126            prev_dash = true;
3127        }
3128    }
3129    let trimmed = out.trim_matches('-').to_string();
3130    if trimmed.is_empty() {
3131        "crystallized-workflow".to_string()
3132    } else {
3133        trimmed
3134    }
3135}
3136
3137fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
3138    if let Some(candidate) = candidate {
3139        format!(
3140            "{} ({} step{})",
3141            candidate.name,
3142            candidate.steps.len(),
3143            if candidate.steps.len() == 1 { "" } else { "s" }
3144        )
3145    } else {
3146        format!("rejected: {fallback_name}")
3147    }
3148}
3149
3150fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
3151    let Some(candidate) = candidate else {
3152        return "high".to_string();
3153    };
3154    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
3155    let needs_secret = !candidate.required_secrets.is_empty();
3156    if touches_external && needs_secret {
3157        "high".to_string()
3158    } else if touches_external || needs_secret {
3159        "medium".to_string()
3160    } else {
3161        "low".to_string()
3162    }
3163}
3164
3165fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
3166    let kind = effect.kind.to_ascii_lowercase();
3167    if kind.is_empty() {
3168        return false;
3169    }
3170    // Plan-only side effects stay inside Harn's own data plane: they
3171    // write receipts, append to the in-process event log, or stash plans.
3172    // None of those touch tenant-external systems.
3173    let internal = kind.contains("receipt")
3174        || kind.contains("event_log")
3175        || kind.contains("memo")
3176        || kind.contains("plan");
3177    if internal {
3178        return false;
3179    }
3180    kind.contains("post")
3181        || kind.contains("write")
3182        || kind.contains("publish")
3183        || kind.contains("delete")
3184        || kind.contains("send")
3185}
3186
3187fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
3188    if candidate.steps.is_empty() {
3189        return false;
3190    }
3191    candidate.side_effects.iter().all(|effect| {
3192        let kind = effect.kind.to_ascii_lowercase();
3193        // Plan-only side effects stay inside Harn's own data plane: receipt
3194        // writes, in-memory event-log appends, file-only mutations, etc.
3195        kind.is_empty()
3196            || kind.contains("receipt")
3197            || kind.contains("event_log")
3198            || kind.contains("memo")
3199            || kind.contains("plan")
3200            || (kind.contains("file") && !kind.contains("publish"))
3201    })
3202}
3203
3204fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
3205    for action in &mut trace.actions {
3206        redact_bundle_value(&mut action.inputs);
3207        if let Some(output) = action.output.as_mut() {
3208            redact_bundle_value(output);
3209        }
3210        if let Some(observed) = action.observed_output.as_mut() {
3211            redact_bundle_value(observed);
3212        }
3213        for value in action.parameters.values_mut() {
3214            redact_bundle_value(value);
3215        }
3216        for (_, value) in action.metadata.iter_mut() {
3217            redact_bundle_value(value);
3218        }
3219    }
3220    for (_, value) in trace.metadata.iter_mut() {
3221        redact_bundle_value(value);
3222    }
3223    if let Some(run) = trace.replay_run.as_mut() {
3224        redact_replay_run_for_bundle(run);
3225    }
3226}
3227
3228fn redact_replay_run_for_bundle(run: &mut ReplayTraceRun) {
3229    for value in run
3230        .event_log_entries
3231        .iter_mut()
3232        .chain(run.trigger_firings.iter_mut())
3233        .chain(run.llm_interactions.iter_mut())
3234        .chain(run.protocol_interactions.iter_mut())
3235        .chain(run.approval_interactions.iter_mut())
3236        .chain(run.effect_receipts.iter_mut())
3237        .chain(run.agent_transcript_deltas.iter_mut())
3238        .chain(run.final_artifacts.iter_mut())
3239        .chain(run.policy_decisions.iter_mut())
3240    {
3241        redact_bundle_value(value);
3242    }
3243}
3244
3245fn redact_bundle_value(value: &mut JsonValue) {
3246    match value {
3247        JsonValue::String(text) if looks_like_secret_value(text) => {
3248            *text = "[redacted]".to_string();
3249        }
3250        JsonValue::Array(items) => {
3251            for item in items {
3252                redact_bundle_value(item);
3253            }
3254        }
3255        JsonValue::Object(map) => {
3256            for (key, child) in map.iter_mut() {
3257                if is_sensitive_bundle_key(key) {
3258                    *child = JsonValue::String("[redacted]".to_string());
3259                } else {
3260                    redact_bundle_value(child);
3261                }
3262            }
3263        }
3264        _ => {}
3265    }
3266}
3267
3268fn is_sensitive_bundle_key(key: &str) -> bool {
3269    let lower = key.to_ascii_lowercase();
3270    lower.contains("secret")
3271        || lower.contains("token")
3272        || lower.contains("password")
3273        || lower.contains("api_key")
3274        || lower.contains("apikey")
3275        || lower == "authorization"
3276        || lower == "cookie"
3277        || lower == "set-cookie"
3278}
3279
3280fn looks_like_secret_value(value: &str) -> bool {
3281    let trimmed = value.trim();
3282    trimmed.starts_with("sk-")
3283        || trimmed.starts_with("ghp_")
3284        || trimmed.starts_with("ghs_")
3285        || trimmed.starts_with("xoxb-")
3286        || trimmed.starts_with("xoxp-")
3287        || trimmed.starts_with("AKIA")
3288        || (trimmed.len() > 48
3289            && trimmed
3290                .chars()
3291                .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
3292}
3293
3294fn secret_id_looks_logical(value: &str) -> bool {
3295    !looks_like_secret_value(value) && !value.trim().is_empty()
3296}
3297
3298#[cfg(test)]
3299mod tests {
3300    use super::*;
3301
3302    fn version_trace(
3303        id: &str,
3304        version: &str,
3305        side_target: &str,
3306        fuzzy: bool,
3307    ) -> CrystallizationTrace {
3308        CrystallizationTrace {
3309            id: id.to_string(),
3310            actions: vec![
3311                CrystallizationAction {
3312                    id: format!("{id}-branch"),
3313                    kind: "tool_call".to_string(),
3314                    name: "git.checkout_branch".to_string(),
3315                    parameters: BTreeMap::from([
3316                        ("repo_path".to_string(), json!(format!("/tmp/{id}"))),
3317                        (
3318                            "branch_name".to_string(),
3319                            json!(format!("release-{version}")),
3320                        ),
3321                    ]),
3322                    side_effects: vec![CrystallizationSideEffect {
3323                        kind: "git_ref".to_string(),
3324                        target: side_target.to_string(),
3325                        capability: Some("git.write".to_string()),
3326                        ..CrystallizationSideEffect::default()
3327                    }],
3328                    capabilities: vec!["git.write".to_string()],
3329                    deterministic: Some(true),
3330                    duration_ms: Some(20),
3331                    ..CrystallizationAction::default()
3332                },
3333                CrystallizationAction {
3334                    id: format!("{id}-manifest"),
3335                    kind: "file_mutation".to_string(),
3336                    name: "update_manifest_version".to_string(),
3337                    inputs: json!({"version": version, "path": "harn.toml"}),
3338                    parameters: BTreeMap::from([("version".to_string(), json!(version))]),
3339                    side_effects: vec![CrystallizationSideEffect {
3340                        kind: "file_write".to_string(),
3341                        target: "harn.toml".to_string(),
3342                        capability: Some("fs.write".to_string()),
3343                        ..CrystallizationSideEffect::default()
3344                    }],
3345                    capabilities: vec!["fs.write".to_string()],
3346                    deterministic: Some(true),
3347                    ..CrystallizationAction::default()
3348                },
3349                CrystallizationAction {
3350                    id: format!("{id}-release"),
3351                    kind: if fuzzy { "model_call" } else { "tool_call" }.to_string(),
3352                    name: "prepare_release_notes".to_string(),
3353                    inputs: json!({"release_target": "crates.io", "version": version}),
3354                    parameters: BTreeMap::from([
3355                        ("release_target".to_string(), json!("crates.io")),
3356                        ("version".to_string(), json!(version)),
3357                    ]),
3358                    fuzzy: Some(fuzzy),
3359                    deterministic: Some(!fuzzy),
3360                    cost: CrystallizationCost {
3361                        model_calls: if fuzzy { 1 } else { 0 },
3362                        input_tokens: if fuzzy { 1200 } else { 0 },
3363                        output_tokens: if fuzzy { 250 } else { 0 },
3364                        total_cost_usd: if fuzzy { 0.01 } else { 0.0 },
3365                        wall_ms: 3000,
3366                        ..CrystallizationCost::default()
3367                    },
3368                    ..CrystallizationAction::default()
3369                },
3370            ],
3371            ..CrystallizationTrace::default()
3372        }
3373    }
3374
3375    #[test]
3376    fn crystallizes_repeated_version_bump_with_parameters() {
3377        let traces = (0..5)
3378            .map(|idx| {
3379                version_trace(
3380                    &format!("trace_{idx}"),
3381                    &format!("0.7.{idx}"),
3382                    "release-branch",
3383                    false,
3384                )
3385            })
3386            .collect::<Vec<_>>();
3387
3388        let artifacts = crystallize_traces(
3389            traces,
3390            CrystallizeOptions {
3391                workflow_name: Some("version_bump".to_string()),
3392                ..CrystallizeOptions::default()
3393            },
3394        )
3395        .unwrap();
3396
3397        let candidate = &artifacts.report.candidates[0];
3398        assert!(candidate.rejection_reasons.is_empty());
3399        assert!(candidate.shadow.pass);
3400        assert_eq!(candidate.examples.len(), 5);
3401        let params = candidate
3402            .parameters
3403            .iter()
3404            .map(|param| param.name.as_str())
3405            .collect::<BTreeSet<_>>();
3406        assert!(params.contains("version"));
3407        assert!(params.contains("repo_path"));
3408        assert!(params.contains("branch_name"));
3409        assert!(artifacts.harn_code.contains("pipeline version_bump("));
3410        assert!(artifacts.eval_pack_toml.contains("crystallization-shadow"));
3411    }
3412
3413    #[test]
3414    fn rejects_divergent_side_effects() {
3415        let traces = vec![
3416            version_trace("trace_a", "0.7.1", "release-branch", false),
3417            version_trace("trace_b", "0.7.2", "main", false),
3418            version_trace("trace_c", "0.7.3", "release-branch", false),
3419        ];
3420
3421        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
3422
3423        assert!(artifacts.report.candidates.is_empty());
3424        assert_eq!(artifacts.report.rejected_candidates.len(), 1);
3425        assert!(artifacts.report.rejected_candidates[0].rejection_reasons[0]
3426            .contains("divergent side effects"));
3427    }
3428
3429    #[test]
3430    fn preserves_remaining_fuzzy_segment() {
3431        let traces = (0..3)
3432            .map(|idx| {
3433                version_trace(
3434                    &format!("trace_{idx}"),
3435                    &format!("0.8.{idx}"),
3436                    "release-branch",
3437                    true,
3438                )
3439            })
3440            .collect::<Vec<_>>();
3441
3442        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
3443        let candidate = &artifacts.report.candidates[0];
3444
3445        assert!(candidate
3446            .steps
3447            .iter()
3448            .any(|step| step.segment == SegmentKind::Fuzzy));
3449        assert!(candidate.savings.remaining_model_calls > 0);
3450        assert!(artifacts.harn_code.contains("TODO: fuzzy segment"));
3451    }
3452
3453    #[test]
3454    fn excludes_unresolved_policy_violations_from_mining() {
3455        let mut traces = version_traces(2);
3456        let mut excluded = version_trace("trace_policy_blocked", "0.7.99", "release-branch", false);
3457        excluded
3458            .metadata
3459            .insert("unresolved_policy_violation".to_string(), json!(true));
3460        traces.push(excluded);
3461
3462        let artifacts = crystallize_traces(
3463            traces,
3464            CrystallizeOptions {
3465                min_examples: 2,
3466                ..CrystallizeOptions::default()
3467            },
3468        )
3469        .unwrap();
3470
3471        assert_eq!(artifacts.report.source_trace_count, 2);
3472        assert_eq!(artifacts.report.excluded_trace_count, 1);
3473        assert!(artifacts.report.selected_candidate_id.is_some());
3474    }
3475
3476    fn plan_only_trace(id: &str, suffix: &str) -> CrystallizationTrace {
3477        CrystallizationTrace {
3478            id: id.to_string(),
3479            actions: vec![
3480                CrystallizationAction {
3481                    id: format!("{id}-classify"),
3482                    kind: "tool_call".to_string(),
3483                    name: "classify_issue".to_string(),
3484                    parameters: BTreeMap::from([
3485                        ("issue_id".to_string(), json!(format!("HAR-{suffix}"))),
3486                        ("team_key".to_string(), json!("HAR")),
3487                    ]),
3488                    capabilities: vec!["linear.read".to_string()],
3489                    deterministic: Some(true),
3490                    duration_ms: Some(15),
3491                    ..CrystallizationAction::default()
3492                },
3493                CrystallizationAction {
3494                    id: format!("{id}-receipt"),
3495                    kind: "receipt_write".to_string(),
3496                    name: "emit_receipt".to_string(),
3497                    inputs: json!({"summary": format!("plan only #{suffix}"), "kind": "plan"}),
3498                    parameters: BTreeMap::from([
3499                        ("kind".to_string(), json!("plan")),
3500                        ("summary".to_string(), json!(format!("plan only #{suffix}"))),
3501                    ]),
3502                    side_effects: vec![CrystallizationSideEffect {
3503                        kind: "receipt_write".to_string(),
3504                        target: "tenant_event_log".to_string(),
3505                        capability: Some("receipt.write".to_string()),
3506                        ..CrystallizationSideEffect::default()
3507                    }],
3508                    capabilities: vec!["receipt.write".to_string()],
3509                    deterministic: Some(true),
3510                    duration_ms: Some(5),
3511                    ..CrystallizationAction::default()
3512                },
3513            ],
3514            ..CrystallizationTrace::default()
3515        }
3516    }
3517
3518    fn version_traces(count: usize) -> Vec<CrystallizationTrace> {
3519        (0..count)
3520            .map(|idx| {
3521                version_trace(
3522                    &format!("trace_{idx}"),
3523                    &format!("0.7.{idx}"),
3524                    "release-branch",
3525                    false,
3526                )
3527            })
3528            .collect()
3529    }
3530
3531    #[test]
3532    fn build_bundle_assembles_versioned_manifest() {
3533        let traces = version_traces(5);
3534        let artifacts = crystallize_traces(
3535            traces.clone(),
3536            CrystallizeOptions {
3537                workflow_name: Some("version_bump".to_string()),
3538                package_name: Some("release-workflows".to_string()),
3539                author: Some("ops@example.com".to_string()),
3540                approver: Some("lead@example.com".to_string()),
3541                eval_pack_link: Some("eval-pack://release-workflows/v1".to_string()),
3542                ..CrystallizeOptions::default()
3543            },
3544        )
3545        .unwrap();
3546
3547        let bundle = build_crystallization_bundle(
3548            artifacts,
3549            &traces,
3550            BundleOptions {
3551                team: Some("platform".to_string()),
3552                repo: Some("burin-labs/harn".to_string()),
3553                ..BundleOptions::default()
3554            },
3555        )
3556        .unwrap();
3557
3558        let manifest = &bundle.manifest;
3559        assert_eq!(manifest.schema, BUNDLE_SCHEMA);
3560        assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
3561        assert_eq!(manifest.kind, BundleKind::Candidate);
3562        assert!(!manifest.candidate_id.is_empty());
3563        assert_eq!(manifest.workflow.name, "version_bump");
3564        assert_eq!(manifest.workflow.package_name, "release-workflows");
3565        assert_eq!(manifest.workflow.path, BUNDLE_WORKFLOW_FILE);
3566        assert_eq!(manifest.team.as_deref(), Some("platform"));
3567        assert_eq!(manifest.repo.as_deref(), Some("burin-labs/harn"));
3568        assert_eq!(manifest.external_key, "version-bump");
3569        assert_eq!(manifest.promotion.rollout_policy, "shadow_then_canary");
3570        assert_eq!(
3571            manifest.promotion.author.as_deref(),
3572            Some("ops@example.com")
3573        );
3574        assert_eq!(
3575            manifest.promotion.approver.as_deref(),
3576            Some("lead@example.com")
3577        );
3578        assert_eq!(manifest.promotion.workflow_version, "0.1.0");
3579        assert!(manifest.deterministic_steps.len() + manifest.fuzzy_steps.len() > 0);
3580        assert_eq!(manifest.source_traces.len(), traces.len());
3581        assert_eq!(manifest.fixtures.len(), traces.len());
3582        assert!(manifest.fixtures.iter().all(|fixture| fixture.redacted));
3583        assert!(manifest.redaction.applied);
3584        assert!(manifest.redaction.fixture_count > 0);
3585        assert!(manifest
3586            .eval_pack
3587            .as_ref()
3588            .is_some_and(|eval| eval.path == BUNDLE_EVAL_PACK_FILE));
3589        assert!(manifest
3590            .required_secrets
3591            .iter()
3592            .all(|secret| !secret.is_empty()));
3593    }
3594
3595    #[test]
3596    fn write_bundle_round_trips_through_disk() {
3597        let traces = version_traces(5);
3598        let artifacts = crystallize_traces(
3599            traces.clone(),
3600            CrystallizeOptions {
3601                workflow_name: Some("version_bump".to_string()),
3602                ..CrystallizeOptions::default()
3603            },
3604        )
3605        .unwrap();
3606        let bundle =
3607            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3608
3609        let dir = tempfile::tempdir().unwrap();
3610        let written = write_crystallization_bundle(&bundle, dir.path()).unwrap();
3611        assert_eq!(written.candidate_id, bundle.manifest.candidate_id);
3612
3613        // Files exist on disk.
3614        for relative in [
3615            BUNDLE_MANIFEST_FILE,
3616            BUNDLE_REPORT_FILE,
3617            BUNDLE_WORKFLOW_FILE,
3618            BUNDLE_EVAL_PACK_FILE,
3619        ] {
3620            assert!(dir.path().join(relative).exists(), "missing {relative}");
3621        }
3622        let fixtures_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
3623        assert!(fixtures_dir.is_dir());
3624        assert_eq!(
3625            std::fs::read_dir(&fixtures_dir).unwrap().count(),
3626            traces.len()
3627        );
3628
3629        // Manifest round-trips.
3630        let (loaded_manifest, loaded_traces) = load_crystallization_bundle(dir.path()).unwrap();
3631        assert_eq!(loaded_manifest, bundle.manifest);
3632        assert_eq!(loaded_traces.len(), traces.len());
3633
3634        // Validation passes.
3635        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3636        assert!(
3637            validation.problems.is_empty(),
3638            "unexpected problems: {:?}",
3639            validation.problems
3640        );
3641        assert!(validation.is_ok());
3642        assert!(validation.workflow_ok && validation.report_ok);
3643        assert!(validation.fixtures_ok && validation.redaction_ok);
3644
3645        // Shadow replay matches the persisted shadow report.
3646        let (replay_manifest, shadow) = shadow_replay_bundle(dir.path()).unwrap();
3647        assert_eq!(replay_manifest.candidate_id, bundle.manifest.candidate_id);
3648        assert!(shadow.pass, "shadow should still pass");
3649        assert_eq!(shadow.compared_traces, traces.len());
3650    }
3651
3652    #[test]
3653    fn validate_rejects_bundle_with_missing_workflow() {
3654        let traces = version_traces(3);
3655        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3656        let bundle =
3657            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3658
3659        let dir = tempfile::tempdir().unwrap();
3660        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3661        std::fs::remove_file(dir.path().join(BUNDLE_WORKFLOW_FILE)).unwrap();
3662
3663        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3664        assert!(!validation.is_ok());
3665        assert!(validation
3666            .problems
3667            .iter()
3668            .any(|problem| problem.contains("missing workflow file")));
3669    }
3670
3671    #[test]
3672    fn validate_rejects_bundle_with_unredacted_fixture() {
3673        let traces = version_traces(3);
3674        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3675        let mut bundle =
3676            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3677        // Force a fixture to claim it is unredacted; this must trip
3678        // validation so a malicious or careless producer cannot ship raw
3679        // private payloads under the bundle contract.
3680        bundle.manifest.fixtures[0].redacted = false;
3681        let dir = tempfile::tempdir().unwrap();
3682        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3683
3684        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3685        assert!(!validation.is_ok());
3686        assert!(validation
3687            .problems
3688            .iter()
3689            .any(|problem| problem.contains("not marked redacted")));
3690    }
3691
3692    #[test]
3693    fn validate_rejects_unsupported_schema_version() {
3694        let traces = version_traces(3);
3695        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3696        let mut bundle =
3697            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3698        bundle.manifest.schema_version = BUNDLE_SCHEMA_VERSION + 1;
3699        let dir = tempfile::tempdir().unwrap();
3700        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3701
3702        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3703        assert!(!validation.is_ok());
3704        assert!(validation
3705            .problems
3706            .iter()
3707            .any(|problem| problem.contains("schema_version")));
3708    }
3709
3710    #[test]
3711    fn redacts_secret_like_values_in_fixtures() {
3712        // Build secret-shaped strings at runtime so we exercise the
3713        // redaction prefixes (`xoxb-`, `ghp_`, `sk-`) without checking in
3714        // source that looks like a real credential to secret scanners.
3715        let slack_prefix = format!("{}{}", "xo", "xb-");
3716        let github_prefix = format!("{}{}", "gh", "p_");
3717        let openai_prefix = "sk-".to_string();
3718        let pad = "A".repeat(48);
3719        let slack_secret = format!("{slack_prefix}1234567890-{pad}");
3720        let github_secret = format!("{github_prefix}{pad}");
3721        let openai_secret = format!("{openai_prefix}{pad}");
3722
3723        let mut secret_action = CrystallizationAction {
3724            id: "secret".to_string(),
3725            kind: "tool_call".to_string(),
3726            name: "post_release_to_slack".to_string(),
3727            parameters: BTreeMap::from([
3728                ("slack_token".to_string(), json!(slack_secret)),
3729                ("channel".to_string(), json!("#releases")),
3730            ]),
3731            inputs: json!({
3732                "authorization": format!("Bearer {github_secret}"),
3733                "version": "0.7.1",
3734            }),
3735            ..CrystallizationAction::default()
3736        };
3737        secret_action
3738            .metadata
3739            .insert("api_key".to_string(), json!(openai_secret));
3740
3741        let mut trace = CrystallizationTrace {
3742            id: "trace_secret".to_string(),
3743            actions: vec![secret_action],
3744            ..CrystallizationTrace::default()
3745        };
3746        redact_trace_for_bundle(&mut trace);
3747        let action = &trace.actions[0];
3748        assert_eq!(
3749            action.parameters.get("slack_token"),
3750            Some(&json!("[redacted]"))
3751        );
3752        assert_eq!(action.parameters.get("channel"), Some(&json!("#releases")));
3753        let inputs = action.inputs.as_object().unwrap();
3754        assert_eq!(inputs.get("authorization"), Some(&json!("[redacted]")));
3755        assert_eq!(inputs.get("version"), Some(&json!("0.7.1")));
3756        assert_eq!(action.metadata.get("api_key"), Some(&json!("[redacted]")));
3757    }
3758
3759    #[test]
3760    fn plan_only_fixture_yields_plan_only_kind() {
3761        let traces = (0..3)
3762            .map(|idx| plan_only_trace(&format!("plan_{idx}"), &format!("{idx}")))
3763            .collect::<Vec<_>>();
3764        let artifacts = crystallize_traces(
3765            traces.clone(),
3766            CrystallizeOptions {
3767                workflow_name: Some("plan_only_triage".to_string()),
3768                ..CrystallizeOptions::default()
3769            },
3770        )
3771        .unwrap();
3772        let bundle =
3773            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3774        assert_eq!(bundle.manifest.kind, BundleKind::PlanOnly);
3775        assert_eq!(bundle.manifest.risk_level, "low");
3776    }
3777
3778    #[test]
3779    fn rejected_bundle_has_rejected_kind() {
3780        let traces = vec![
3781            version_trace("trace_a", "0.7.1", "release-branch", false),
3782            version_trace("trace_b", "0.7.2", "main", false),
3783            version_trace("trace_c", "0.7.3", "release-branch", false),
3784        ];
3785        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3786        let bundle =
3787            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3788        assert_eq!(bundle.manifest.kind, BundleKind::Rejected);
3789        assert!(bundle.manifest.candidate_id.is_empty());
3790        assert!(!bundle.manifest.rejection_reasons.is_empty());
3791        assert!(bundle.fixtures.is_empty());
3792    }
3793
3794    #[test]
3795    fn validate_round_trips_rejected_bundle() {
3796        let traces = vec![
3797            version_trace("trace_a", "0.7.1", "release-branch", false),
3798            version_trace("trace_b", "0.7.2", "main", false),
3799            version_trace("trace_c", "0.7.3", "release-branch", false),
3800        ];
3801        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3802        let bundle =
3803            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3804        let dir = tempfile::tempdir().unwrap();
3805        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3806
3807        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3808        assert!(validation.is_ok(), "{:?}", validation.problems);
3809        assert_eq!(validation.kind, BundleKind::Rejected);
3810    }
3811
3812    #[test]
3813    fn shadow_replay_fails_when_fixture_diverges() {
3814        let traces = version_traces(3);
3815        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3816        let bundle =
3817            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3818        let dir = tempfile::tempdir().unwrap();
3819        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3820
3821        // Tamper with one redacted fixture so its action list no longer
3822        // matches the candidate signature; the replay must fail without
3823        // panicking.
3824        let fixture_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
3825        let some_fixture = std::fs::read_dir(&fixture_dir)
3826            .unwrap()
3827            .next()
3828            .unwrap()
3829            .unwrap()
3830            .path();
3831        let mut tampered: CrystallizationTrace =
3832            serde_json::from_slice(&std::fs::read(&some_fixture).unwrap()).unwrap();
3833        tampered.actions.truncate(1);
3834        std::fs::write(&some_fixture, serde_json::to_vec_pretty(&tampered).unwrap()).unwrap();
3835
3836        let (_, shadow) = shadow_replay_bundle(dir.path()).unwrap();
3837        assert!(!shadow.pass);
3838        assert!(!shadow.failures.is_empty());
3839    }
3840}