Skip to main content

harn_vm/orchestration/
crystallize.rs

1//! Workflow crystallization primitives.
2//!
3//! This module keeps the first mining pass deliberately conservative: it
4//! accepts ordered trace actions, finds a repeated contiguous action sequence,
5//! extracts scalar parameters from fields that vary across examples, rejects
6//! divergent side effects, and emits a reviewable Harn skeleton plus shadow
7//! comparison metadata. Hosted surfaces can layer richer mining on top of this
8//! stable IR without changing the CLI contract.
9
10use std::collections::{BTreeMap, BTreeSet};
11use std::fmt::Write as _;
12use std::path::{Path, PathBuf};
13
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value as JsonValue};
16use sha2::{Digest, Sha256};
17
18use super::{new_id, now_rfc3339, RunRecord};
19use crate::value::VmError;
20
21const TRACE_SCHEMA_VERSION: u32 = 1;
22const DEFAULT_MIN_EXAMPLES: usize = 2;
23
24/// Stable schema marker for `candidate.json` inside a crystallization
25/// bundle. Cloud importers and other downstream consumers should refuse
26/// bundles whose `schema` field is anything else.
27pub const BUNDLE_SCHEMA: &str = "harn.crystallization.candidate.bundle";
28/// Versioned schema number for the bundle manifest. Cloud importers and
29/// other consumers should refuse bundles whose `schema_version` is newer
30/// than the highest version they understand.
31pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
32/// Conventional file names inside a crystallization bundle directory.
33pub const BUNDLE_MANIFEST_FILE: &str = "candidate.json";
34pub const BUNDLE_REPORT_FILE: &str = "report.json";
35pub const BUNDLE_WORKFLOW_FILE: &str = "workflow.harn";
36pub const BUNDLE_EVAL_PACK_FILE: &str = "harn.eval.toml";
37pub const BUNDLE_FIXTURES_DIR: &str = "fixtures";
38
39/// Default rollout policy applied when a bundle is emitted without one
40/// explicitly configured. Hosted promotion surfaces can override it.
41const DEFAULT_ROLLOUT_POLICY: &str = "shadow_then_canary";
42
43#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
44#[serde(default)]
45pub struct CrystallizationTrace {
46    pub version: u32,
47    pub id: String,
48    pub source: Option<String>,
49    pub source_hash: Option<String>,
50    pub workflow_id: Option<String>,
51    pub started_at: Option<String>,
52    pub finished_at: Option<String>,
53    pub flow: Option<CrystallizationFlowRef>,
54    pub actions: Vec<CrystallizationAction>,
55    pub usage: CrystallizationUsage,
56    pub metadata: BTreeMap<String, JsonValue>,
57}
58
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
60#[serde(default)]
61pub struct CrystallizationFlowRef {
62    pub trace_id: Option<String>,
63    pub agent_run_id: Option<String>,
64    pub transcript_ref: Option<String>,
65    pub atom_ids: Vec<String>,
66    pub slice_ids: Vec<String>,
67}
68
69#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
70#[serde(default)]
71pub struct CrystallizationAction {
72    pub id: String,
73    pub kind: String,
74    pub name: String,
75    pub timestamp: Option<String>,
76    pub inputs: JsonValue,
77    pub output: Option<JsonValue>,
78    pub observed_output: Option<JsonValue>,
79    pub parameters: BTreeMap<String, JsonValue>,
80    pub side_effects: Vec<CrystallizationSideEffect>,
81    pub capabilities: Vec<String>,
82    pub required_secrets: Vec<String>,
83    pub approval: Option<CrystallizationApproval>,
84    pub cost: CrystallizationCost,
85    pub duration_ms: Option<i64>,
86    pub deterministic: Option<bool>,
87    pub fuzzy: Option<bool>,
88    pub metadata: BTreeMap<String, JsonValue>,
89}
90
91#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
92#[serde(default)]
93pub struct CrystallizationSideEffect {
94    pub kind: String,
95    pub target: String,
96    pub capability: Option<String>,
97    pub mutation: Option<String>,
98    pub metadata: BTreeMap<String, JsonValue>,
99}
100
101#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
102#[serde(default)]
103pub struct CrystallizationApproval {
104    pub prompt: Option<String>,
105    pub approver: Option<String>,
106    pub required: bool,
107    pub boundary: Option<String>,
108}
109
110#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
111#[serde(default)]
112pub struct CrystallizationCost {
113    pub model: Option<String>,
114    pub model_calls: i64,
115    pub input_tokens: i64,
116    pub output_tokens: i64,
117    pub total_cost_usd: f64,
118    pub wall_ms: i64,
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
122#[serde(default)]
123pub struct CrystallizationUsage {
124    pub model_calls: i64,
125    pub input_tokens: i64,
126    pub output_tokens: i64,
127    pub total_cost_usd: f64,
128    pub wall_ms: i64,
129}
130
131#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
132#[serde(rename_all = "snake_case")]
133pub enum SegmentKind {
134    #[default]
135    Deterministic,
136    Fuzzy,
137}
138
139type SequenceExample = (usize, usize);
140type RepeatedSequence = (Vec<String>, Vec<SequenceExample>);
141
142#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
143#[serde(default)]
144pub struct WorkflowCandidateParameter {
145    pub name: String,
146    pub source_paths: Vec<String>,
147    pub examples: Vec<String>,
148    pub required: bool,
149}
150
151#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
152#[serde(default)]
153pub struct WorkflowCandidateStep {
154    pub index: usize,
155    pub kind: String,
156    pub name: String,
157    pub segment: SegmentKind,
158    pub parameter_refs: Vec<String>,
159    pub constants: BTreeMap<String, JsonValue>,
160    pub preconditions: Vec<String>,
161    pub side_effects: Vec<CrystallizationSideEffect>,
162    pub capabilities: Vec<String>,
163    pub required_secrets: Vec<String>,
164    pub approval: Option<CrystallizationApproval>,
165    pub expected_output: Option<JsonValue>,
166    pub review_notes: Vec<String>,
167}
168
169#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
170#[serde(default)]
171pub struct WorkflowCandidateExample {
172    pub trace_id: String,
173    pub source_hash: String,
174    pub start_index: usize,
175    pub action_ids: Vec<String>,
176}
177
178#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
179#[serde(default)]
180pub struct PromotionMetadata {
181    pub source_trace_hashes: Vec<String>,
182    pub author: Option<String>,
183    pub approver: Option<String>,
184    pub created_at: String,
185    pub version: String,
186    pub package_name: String,
187    pub capability_set: Vec<String>,
188    pub secrets_required: Vec<String>,
189    pub rollback_target: Option<String>,
190    pub eval_pack_link: Option<String>,
191}
192
193#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
194#[serde(default)]
195pub struct SavingsEstimate {
196    pub model_calls_avoided: i64,
197    pub input_tokens_avoided: i64,
198    pub output_tokens_avoided: i64,
199    pub estimated_cost_usd_avoided: f64,
200    pub wall_ms_avoided: i64,
201    pub cpu_runtime_cost_usd: f64,
202    pub remaining_model_calls: i64,
203}
204
205#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
206#[serde(default)]
207pub struct ShadowTraceResult {
208    pub trace_id: String,
209    pub pass: bool,
210    pub details: Vec<String>,
211}
212
213#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
214#[serde(default)]
215pub struct ShadowRunReport {
216    pub pass: bool,
217    pub compared_traces: usize,
218    pub failures: Vec<String>,
219    pub traces: Vec<ShadowTraceResult>,
220}
221
222#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
223#[serde(default)]
224pub struct WorkflowCandidate {
225    pub id: String,
226    pub name: String,
227    pub confidence: f64,
228    pub sequence_signature: Vec<String>,
229    pub parameters: Vec<WorkflowCandidateParameter>,
230    pub steps: Vec<WorkflowCandidateStep>,
231    pub examples: Vec<WorkflowCandidateExample>,
232    pub capabilities: Vec<String>,
233    pub required_secrets: Vec<String>,
234    pub approval_points: Vec<CrystallizationApproval>,
235    pub side_effects: Vec<CrystallizationSideEffect>,
236    pub expected_outputs: Vec<JsonValue>,
237    pub warnings: Vec<String>,
238    pub rejection_reasons: Vec<String>,
239    pub promotion: PromotionMetadata,
240    pub savings: SavingsEstimate,
241    pub shadow: ShadowRunReport,
242}
243
244impl WorkflowCandidate {
245    pub fn is_safe_to_propose(&self) -> bool {
246        self.rejection_reasons.is_empty()
247    }
248}
249
250#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
251#[serde(default)]
252pub struct CrystallizationReport {
253    pub version: u32,
254    pub generated_at: String,
255    pub source_trace_count: usize,
256    pub selected_candidate_id: Option<String>,
257    pub candidates: Vec<WorkflowCandidate>,
258    pub rejected_candidates: Vec<WorkflowCandidate>,
259    pub warnings: Vec<String>,
260    pub input_format: CrystallizationInputFormat,
261    pub harn_code_path: Option<String>,
262    pub eval_pack_path: Option<String>,
263    /// Optional plain-language explanation of which steps are safe to
264    /// automate vs. which still require human/agent review. Populated by
265    /// the release-fixture ingest path so reviewers can inspect a
266    /// candidate without re-deriving the deterministic/agentic split.
267    #[serde(skip_serializing_if = "Option::is_none")]
268    pub segment_summary: Option<SegmentSummary>,
269    /// Optional summary of how shell/tool failures were represented and
270    /// whether failure context was fed back into a model. Populated by
271    /// the release-fixture ingest path.
272    #[serde(skip_serializing_if = "Option::is_none")]
273    pub recovery_summary: Option<RecoveryFeedbackSummary>,
274}
275
276/// Plain-language summary of the deterministic/agentic split for a
277/// candidate. Designed to be human-readable in `report.json` without a
278/// reviewer needing to walk the step list manually.
279#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
280#[serde(default)]
281pub struct SegmentSummary {
282    pub deterministic_count: usize,
283    pub agentic_count: usize,
284    pub safe_to_automate: Vec<String>,
285    pub requires_human_review: Vec<String>,
286    pub plain_language: String,
287}
288
289/// Summary of how shell/tool failures were represented in the source
290/// trace and whether the failure context was fed back into the model.
291/// Lets reviewers see at a glance whether recovery was advisory only or
292/// whether the workflow attempted to repair itself.
293#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
294#[serde(default)]
295pub struct RecoveryFeedbackSummary {
296    pub shell_failures_seen: usize,
297    pub recovery_advice_runs: usize,
298    /// `true` when at least one recovery action observed by the source
299    /// trace fed the failing-step context into a model loop (vs. just
300    /// recording the failure for human review).
301    pub failures_fed_into_agent: bool,
302    pub failed_steps: Vec<String>,
303    /// Plain-language explanation of how recovery was represented.
304    pub representation: String,
305}
306
307#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
308#[serde(default)]
309pub struct CrystallizationInputFormat {
310    pub name: String,
311    pub version: u32,
312    pub required_fields: Vec<String>,
313    pub preserved_fields: Vec<String>,
314}
315
316#[derive(Clone, Debug, Default)]
317pub struct CrystallizeOptions {
318    pub min_examples: usize,
319    pub workflow_name: Option<String>,
320    pub package_name: Option<String>,
321    pub author: Option<String>,
322    pub approver: Option<String>,
323    pub eval_pack_link: Option<String>,
324}
325
326#[derive(Clone, Debug, Default)]
327pub struct CrystallizationArtifacts {
328    pub report: CrystallizationReport,
329    pub harn_code: String,
330    pub eval_pack_toml: String,
331}
332
333pub fn crystallize_traces(
334    traces: Vec<CrystallizationTrace>,
335    options: CrystallizeOptions,
336) -> Result<CrystallizationArtifacts, VmError> {
337    let min_examples = options.min_examples.max(DEFAULT_MIN_EXAMPLES);
338    if traces.len() < min_examples {
339        return Err(VmError::Runtime(format!(
340            "crystallize requires at least {min_examples} traces, got {}",
341            traces.len()
342        )));
343    }
344
345    let normalized = traces.into_iter().map(normalize_trace).collect::<Vec<_>>();
346    let mut candidates = mine_candidates(&normalized, min_examples, &options);
347    let mut rejected_candidates = Vec::new();
348    for candidate in &mut candidates {
349        candidate.shadow = shadow_candidate(candidate, &normalized);
350        if !candidate.shadow.pass {
351            candidate
352                .rejection_reasons
353                .extend(candidate.shadow.failures.clone());
354        }
355    }
356
357    let mut accepted = Vec::new();
358    for candidate in candidates {
359        if candidate.is_safe_to_propose() {
360            accepted.push(candidate);
361        } else {
362            rejected_candidates.push(candidate);
363        }
364    }
365    accepted.sort_by(|left, right| {
366        right
367            .confidence
368            .partial_cmp(&left.confidence)
369            .unwrap_or(std::cmp::Ordering::Equal)
370            .then_with(|| right.steps.len().cmp(&left.steps.len()))
371    });
372
373    let selected = accepted.first();
374    let harn_code = selected
375        .map(generate_harn_code)
376        .unwrap_or_else(|| rejected_workflow_stub(&rejected_candidates));
377    let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
378
379    let report = CrystallizationReport {
380        version: 1,
381        generated_at: now_rfc3339(),
382        source_trace_count: normalized.len(),
383        selected_candidate_id: selected.map(|candidate| candidate.id.clone()),
384        candidates: accepted,
385        rejected_candidates,
386        warnings: Vec::new(),
387        input_format: CrystallizationInputFormat {
388            name: "harn.crystallization.trace".to_string(),
389            version: TRACE_SCHEMA_VERSION,
390            required_fields: vec!["id".to_string(), "actions".to_string()],
391            preserved_fields: vec![
392                "ordered actions".to_string(),
393                "tool calls".to_string(),
394                "model calls".to_string(),
395                "human approvals".to_string(),
396                "file mutations".to_string(),
397                "external API calls".to_string(),
398                "observed outputs".to_string(),
399                "costs".to_string(),
400                "timestamps".to_string(),
401                "source hashes".to_string(),
402                "Flow provenance references".to_string(),
403            ],
404        },
405        harn_code_path: None,
406        eval_pack_path: None,
407        segment_summary: None,
408        recovery_summary: None,
409    };
410
411    Ok(CrystallizationArtifacts {
412        report,
413        harn_code,
414        eval_pack_toml,
415    })
416}
417
418/// Synthesize a single-trace crystallization candidate without going
419/// through repeated-sequence mining. Used by the release-fixture ingest
420/// path where the trace is the workflow: there is exactly one example
421/// and the deterministic/agentic split comes from the source events
422/// directly. The resulting [`CrystallizationArtifacts`] is bundle-ready
423/// (`build_crystallization_bundle`) and validates with the existing
424/// validator.
425///
426/// `extra_parameters` and `extra_metadata` let callers seed parameters
427/// (e.g., release identity fields like `current_version` /
428/// `next_version`) and metadata (segment / recovery summaries) that the
429/// trace alone does not surface.
430pub fn synthesize_candidate_from_trace(
431    trace: CrystallizationTrace,
432    options: CrystallizeOptions,
433    extra_parameters: Vec<WorkflowCandidateParameter>,
434    segment_summary: Option<SegmentSummary>,
435    recovery_summary: Option<RecoveryFeedbackSummary>,
436) -> Result<CrystallizationArtifacts, VmError> {
437    if trace.actions.is_empty() {
438        return Err(VmError::Runtime(
439            "synthesize_candidate_from_trace requires a trace with at least one action".to_string(),
440        ));
441    }
442    let normalized = normalize_trace(trace);
443    let mut candidate = build_single_trace_candidate(&normalized, &options, extra_parameters);
444    candidate.shadow = shadow_candidate(&candidate, std::slice::from_ref(&normalized));
445    if !candidate.shadow.pass {
446        candidate
447            .rejection_reasons
448            .extend(candidate.shadow.failures.clone());
449    }
450    let (accepted, rejected) = if candidate.is_safe_to_propose() {
451        (vec![candidate], Vec::new())
452    } else {
453        (Vec::new(), vec![candidate])
454    };
455
456    let selected = accepted.first();
457    let harn_code = selected
458        .map(generate_harn_code)
459        .unwrap_or_else(|| rejected_workflow_stub(&rejected));
460    let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
461    let selected_id = selected.map(|candidate| candidate.id.clone());
462
463    let report = CrystallizationReport {
464        version: 1,
465        generated_at: now_rfc3339(),
466        source_trace_count: 1,
467        selected_candidate_id: selected_id,
468        candidates: accepted,
469        rejected_candidates: rejected,
470        warnings: Vec::new(),
471        input_format: CrystallizationInputFormat {
472            name: "harn.crystallization.trace".to_string(),
473            version: TRACE_SCHEMA_VERSION,
474            required_fields: vec!["id".to_string(), "actions".to_string()],
475            preserved_fields: vec![
476                "ordered actions".to_string(),
477                "deterministic events".to_string(),
478                "agentic events".to_string(),
479                "tool/shell observations".to_string(),
480                "recovery advice".to_string(),
481                "release metadata".to_string(),
482                "source hashes".to_string(),
483            ],
484        },
485        harn_code_path: None,
486        eval_pack_path: None,
487        segment_summary,
488        recovery_summary,
489    };
490
491    Ok(CrystallizationArtifacts {
492        report,
493        harn_code,
494        eval_pack_toml,
495    })
496}
497
498/// Build a single-example candidate directly from a normalized trace.
499/// This is the single-trace analog of `mine_candidates`: every action
500/// becomes a step, every action's `fuzzy`/`model_call` flag drives the
501/// segment kind, and parameters are taken from each action's
502/// `parameters` map (plus any caller-supplied `extra_parameters`).
503fn build_single_trace_candidate(
504    trace: &CrystallizationTrace,
505    options: &CrystallizeOptions,
506    extra_parameters: Vec<WorkflowCandidateParameter>,
507) -> WorkflowCandidate {
508    let mut steps = Vec::with_capacity(trace.actions.len());
509    let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
510    let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
511    let mut warnings = Vec::new();
512    let sequence: Vec<String> = trace.actions.iter().map(action_signature).collect();
513
514    for (idx, action) in trace.actions.iter().enumerate() {
515        let mut parameter_refs = BTreeSet::new();
516        for (name, value) in &action.parameters {
517            if is_scalar(value) {
518                let ident = sanitize_identifier(name);
519                parameter_values
520                    .entry(ident.clone())
521                    .or_default()
522                    .insert(json_scalar_string(value));
523                parameter_paths
524                    .entry(ident.clone())
525                    .or_default()
526                    .insert(format!("steps[{idx}].parameters.{name}"));
527                parameter_refs.insert(ident);
528            }
529        }
530
531        let fuzzy = action.fuzzy.unwrap_or(false) || action.kind == "model_call";
532        if fuzzy {
533            warnings.push(format!(
534                "step {} '{}' remains fuzzy and requires review/LLM handling",
535                idx + 1,
536                action.name
537            ));
538        }
539
540        steps.push(WorkflowCandidateStep {
541            index: idx + 1,
542            kind: action.kind.clone(),
543            name: action.name.clone(),
544            segment: if fuzzy {
545                SegmentKind::Fuzzy
546            } else {
547                SegmentKind::Deterministic
548            },
549            parameter_refs: parameter_refs.into_iter().collect(),
550            constants: constants_for_action(action),
551            preconditions: preconditions_for_action(action),
552            side_effects: action.side_effects.clone(),
553            capabilities: sorted_strings(action.capabilities.iter().cloned()),
554            required_secrets: sorted_strings(action.required_secrets.iter().cloned()),
555            approval: action.approval.clone(),
556            expected_output: action
557                .observed_output
558                .clone()
559                .or_else(|| action.output.clone()),
560            review_notes: review_notes_for_action(action),
561        });
562    }
563
564    let mut parameters: Vec<WorkflowCandidateParameter> = parameter_values
565        .iter()
566        .map(|(name, values)| WorkflowCandidateParameter {
567            name: name.clone(),
568            source_paths: parameter_paths
569                .get(name)
570                .map(|paths| paths.iter().cloned().collect())
571                .unwrap_or_default(),
572            examples: values.iter().take(5).cloned().collect(),
573            required: true,
574        })
575        .collect();
576    // Caller-supplied extras (e.g. release metadata fields) take
577    // precedence and are appended in stable order. De-dupe by name so a
578    // caller-supplied parameter overrides any same-name parameter
579    // discovered from action parameters.
580    let extra_names: BTreeSet<String> = extra_parameters.iter().map(|p| p.name.clone()).collect();
581    parameters.retain(|p| !extra_names.contains(&p.name));
582    parameters.extend(extra_parameters);
583    parameters.sort_by(|left, right| left.name.cmp(&right.name));
584
585    let example_refs = vec![WorkflowCandidateExample {
586        trace_id: trace.id.clone(),
587        source_hash: trace.source_hash.clone().unwrap_or_default(),
588        start_index: 0,
589        action_ids: trace
590            .actions
591            .iter()
592            .map(|action| action.id.clone())
593            .collect(),
594    }];
595
596    let capabilities = sorted_strings(
597        steps
598            .iter()
599            .flat_map(|step| step.capabilities.iter().cloned()),
600    );
601    let required_secrets = sorted_strings(
602        steps
603            .iter()
604            .flat_map(|step| step.required_secrets.iter().cloned()),
605    );
606    let approval_points = steps
607        .iter()
608        .filter_map(|step| step.approval.clone())
609        .collect::<Vec<_>>();
610    let side_effects = sorted_side_effects(
611        steps
612            .iter()
613            .flat_map(|step| step.side_effects.iter().cloned())
614            .collect(),
615    );
616    let expected_outputs = steps
617        .iter()
618        .filter_map(|step| step.expected_output.clone())
619        .collect::<Vec<_>>();
620
621    let savings = estimate_savings(std::slice::from_ref(trace), &[(0, 0)], &steps);
622    let confidence = confidence_for(&[(0, 0)], 1, &steps, true);
623    let name = options
624        .workflow_name
625        .clone()
626        .unwrap_or_else(|| infer_workflow_name(&steps));
627    let package_name = options
628        .package_name
629        .clone()
630        .unwrap_or_else(|| name.replace('_', "-"));
631
632    WorkflowCandidate {
633        id: stable_candidate_id(&sequence, &example_refs),
634        name,
635        confidence,
636        sequence_signature: sequence,
637        parameters,
638        steps,
639        examples: example_refs.clone(),
640        capabilities: capabilities.clone(),
641        required_secrets: required_secrets.clone(),
642        approval_points,
643        side_effects,
644        expected_outputs,
645        warnings,
646        rejection_reasons: Vec::new(),
647        promotion: PromotionMetadata {
648            source_trace_hashes: example_refs
649                .iter()
650                .map(|example| example.source_hash.clone())
651                .collect(),
652            author: options.author.clone(),
653            approver: options.approver.clone(),
654            created_at: now_rfc3339(),
655            version: "0.1.0".to_string(),
656            package_name,
657            capability_set: capabilities,
658            secrets_required: required_secrets,
659            rollback_target: Some("keep source trace and previous package version".to_string()),
660            eval_pack_link: options.eval_pack_link.clone(),
661        },
662        savings,
663        shadow: ShadowRunReport::default(),
664    }
665}
666
667fn review_notes_for_action(action: &CrystallizationAction) -> Vec<String> {
668    let mut notes = Vec::new();
669    if action.kind == "shell_failure"
670        || matches!(
671            action
672                .metadata
673                .get("success")
674                .and_then(|value| value.as_bool()),
675            Some(false)
676        )
677    {
678        notes.push(format!(
679            "shell/tool step '{}' failed in the source trace; reviewer should confirm \
680             whether deterministic recovery is possible before promotion.",
681            action.name
682        ));
683    }
684    if let Some(hint) = action
685        .metadata
686        .get("recovery_hint")
687        .and_then(|value| value.as_str())
688        .filter(|hint| !hint.trim().is_empty())
689    {
690        notes.push(format!("recovery hint from source trace: {hint}"));
691    }
692    if action.kind == "agent_recovery_advice" {
693        notes.push(
694            "agent-authored recovery advice; treat as advisory, never as deterministic truth."
695                .to_string(),
696        );
697    }
698    notes
699}
700
701pub fn load_crystallization_traces_from_dir(
702    dir: &Path,
703) -> Result<Vec<CrystallizationTrace>, VmError> {
704    let mut paths = Vec::new();
705    collect_json_paths(dir, &mut paths)?;
706    if paths.is_empty() {
707        return Err(VmError::Runtime(format!(
708            "no .json trace files found under {}",
709            dir.display()
710        )));
711    }
712    paths.sort();
713    paths
714        .iter()
715        .map(|path| load_crystallization_trace(path))
716        .collect()
717}
718
719pub fn load_crystallization_trace(path: &Path) -> Result<CrystallizationTrace, VmError> {
720    let content = std::fs::read_to_string(path).map_err(|error| {
721        VmError::Runtime(format!(
722            "failed to read crystallization trace {}: {error}",
723            path.display()
724        ))
725    })?;
726    let value: JsonValue = serde_json::from_str(&content).map_err(|error| {
727        VmError::Runtime(format!(
728            "failed to parse crystallization trace {}: {error}",
729            path.display()
730        ))
731    })?;
732
733    let mut trace = if value.get("actions").is_some() {
734        serde_json::from_value::<CrystallizationTrace>(value.clone()).map_err(|error| {
735            VmError::Runtime(format!(
736                "failed to decode crystallization trace {}: {error}",
737                path.display()
738            ))
739        })?
740    } else if value.get("stages").is_some() || value.get("_type") == Some(&json!("workflow_run")) {
741        let run: RunRecord = serde_json::from_value(value.clone()).map_err(|error| {
742            VmError::Runtime(format!(
743                "failed to decode run record {} as crystallization input: {error}",
744                path.display()
745            ))
746        })?;
747        trace_from_run_record(run)
748    } else {
749        return Err(VmError::Runtime(format!(
750            "{} is neither a crystallization trace nor a workflow run record",
751            path.display()
752        )));
753    };
754    if trace.source.is_none() {
755        trace.source = Some(path.display().to_string());
756    }
757    if trace.source_hash.is_none() {
758        trace.source_hash = Some(hash_bytes(content.as_bytes()));
759    }
760    Ok(normalize_trace(trace))
761}
762
763pub fn write_crystallization_artifacts(
764    mut artifacts: CrystallizationArtifacts,
765    workflow_path: &Path,
766    report_path: &Path,
767    eval_pack_path: Option<&Path>,
768) -> Result<CrystallizationReport, VmError> {
769    crate::atomic_io::atomic_write(workflow_path, artifacts.harn_code.as_bytes()).map_err(
770        |error| {
771            VmError::Runtime(format!(
772                "failed to write generated workflow {}: {error}",
773                workflow_path.display()
774            ))
775        },
776    )?;
777
778    artifacts.report.harn_code_path = Some(workflow_path.display().to_string());
779    if let Some(path) = eval_pack_path {
780        if !artifacts.eval_pack_toml.trim().is_empty() {
781            crate::atomic_io::atomic_write(path, artifacts.eval_pack_toml.as_bytes()).map_err(
782                |error| {
783                    VmError::Runtime(format!(
784                        "failed to write eval pack {}: {error}",
785                        path.display()
786                    ))
787                },
788            )?;
789            artifacts.report.eval_pack_path = Some(path.display().to_string());
790            if let Some(candidate) = artifacts.report.candidates.first_mut() {
791                candidate.promotion.eval_pack_link = Some(path.display().to_string());
792            }
793        }
794    }
795
796    let report_json = serde_json::to_string_pretty(&artifacts.report)
797        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
798    crate::atomic_io::atomic_write(report_path, report_json.as_bytes()).map_err(|error| {
799        VmError::Runtime(format!(
800            "failed to write crystallization report {}: {error}",
801            report_path.display()
802        ))
803    })?;
804    Ok(artifacts.report)
805}
806
807fn collect_json_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), VmError> {
808    let entries = std::fs::read_dir(dir).map_err(|error| {
809        VmError::Runtime(format!(
810            "failed to read crystallization trace dir {}: {error}",
811            dir.display()
812        ))
813    })?;
814    for entry in entries {
815        let entry = entry.map_err(|error| {
816            VmError::Runtime(format!(
817                "failed to read entry in trace dir {}: {error}",
818                dir.display()
819            ))
820        })?;
821        let path = entry.path();
822        if path.is_dir() {
823            collect_json_paths(&path, out)?;
824        } else if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
825            out.push(path);
826        }
827    }
828    Ok(())
829}
830
831fn trace_from_run_record(run: RunRecord) -> CrystallizationTrace {
832    let mut actions = Vec::new();
833    for stage in &run.stages {
834        actions.push(CrystallizationAction {
835            id: stage.id.clone(),
836            kind: if stage.kind.is_empty() {
837                "stage".to_string()
838            } else {
839                stage.kind.clone()
840            },
841            name: stage.node_id.clone(),
842            timestamp: Some(stage.started_at.clone()),
843            output: stage.visible_text.as_ref().map(|text| json!(text)),
844            observed_output: stage.visible_text.as_ref().map(|text| json!(text)),
845            duration_ms: stage.usage.as_ref().map(|usage| usage.total_duration_ms),
846            cost: stage
847                .usage
848                .as_ref()
849                .map(|usage| CrystallizationCost {
850                    model_calls: usage.call_count,
851                    input_tokens: usage.input_tokens,
852                    output_tokens: usage.output_tokens,
853                    total_cost_usd: usage.total_cost,
854                    wall_ms: usage.total_duration_ms,
855                    model: usage.models.first().cloned(),
856                })
857                .unwrap_or_default(),
858            deterministic: Some(
859                stage
860                    .usage
861                    .as_ref()
862                    .map(|usage| usage.call_count == 0)
863                    .unwrap_or(true),
864            ),
865            fuzzy: Some(
866                stage
867                    .usage
868                    .as_ref()
869                    .is_some_and(|usage| usage.call_count > 0),
870            ),
871            metadata: stage.metadata.clone(),
872            ..CrystallizationAction::default()
873        });
874    }
875    for tool in &run.tool_recordings {
876        actions.push(CrystallizationAction {
877            id: tool.tool_use_id.clone(),
878            kind: "tool_call".to_string(),
879            name: tool.tool_name.clone(),
880            timestamp: Some(tool.timestamp.clone()),
881            output: Some(json!(tool.result)),
882            observed_output: Some(json!(tool.result)),
883            duration_ms: Some(tool.duration_ms as i64),
884            deterministic: Some(!tool.is_rejected),
885            fuzzy: Some(false),
886            metadata: BTreeMap::from([
887                ("args_hash".to_string(), json!(tool.args_hash)),
888                ("iteration".to_string(), json!(tool.iteration)),
889                ("is_rejected".to_string(), json!(tool.is_rejected)),
890            ]),
891            ..CrystallizationAction::default()
892        });
893    }
894    for question in &run.hitl_questions {
895        actions.push(CrystallizationAction {
896            id: question.request_id.clone(),
897            kind: "human_approval".to_string(),
898            name: question.agent.clone(),
899            timestamp: Some(question.asked_at.clone()),
900            approval: Some(CrystallizationApproval {
901                prompt: Some(question.prompt.clone()),
902                required: true,
903                boundary: Some("hitl".to_string()),
904                ..CrystallizationApproval::default()
905            }),
906            deterministic: Some(false),
907            fuzzy: Some(false),
908            metadata: question
909                .trace_id
910                .as_ref()
911                .map(|trace_id| BTreeMap::from([("trace_id".to_string(), json!(trace_id))]))
912                .unwrap_or_default(),
913            ..CrystallizationAction::default()
914        });
915    }
916    actions.sort_by(|left, right| left.timestamp.cmp(&right.timestamp));
917
918    CrystallizationTrace {
919        version: TRACE_SCHEMA_VERSION,
920        id: run.id.clone(),
921        workflow_id: Some(run.workflow_id.clone()),
922        started_at: Some(run.started_at.clone()),
923        finished_at: run.finished_at.clone(),
924        actions,
925        usage: run
926            .usage
927            .map(|usage| CrystallizationUsage {
928                model_calls: usage.call_count,
929                input_tokens: usage.input_tokens,
930                output_tokens: usage.output_tokens,
931                total_cost_usd: usage.total_cost,
932                wall_ms: usage.total_duration_ms,
933            })
934            .unwrap_or_default(),
935        metadata: run.metadata.clone(),
936        ..CrystallizationTrace::default()
937    }
938}
939
940fn normalize_trace(mut trace: CrystallizationTrace) -> CrystallizationTrace {
941    if trace.version == 0 {
942        trace.version = TRACE_SCHEMA_VERSION;
943    }
944    if trace.id.trim().is_empty() {
945        trace.id = new_id("trace");
946    }
947    if trace.source_hash.is_none() {
948        let payload = serde_json::to_vec(&trace.actions).unwrap_or_default();
949        trace.source_hash = Some(hash_bytes(&payload));
950    }
951    for (idx, action) in trace.actions.iter_mut().enumerate() {
952        if action.id.trim().is_empty() {
953            action.id = format!("action_{}", idx + 1);
954        }
955        if action.kind.trim().is_empty() {
956            action.kind = "action".to_string();
957        }
958        if action.name.trim().is_empty() {
959            action.name = action.kind.clone();
960        }
961        action.capabilities.sort();
962        action.capabilities.dedup();
963        action.required_secrets.sort();
964        action.required_secrets.dedup();
965        action.side_effects = sorted_side_effects(std::mem::take(&mut action.side_effects));
966        if action.cost.model_calls == 0 && action.kind == "model_call" {
967            action.cost.model_calls = 1;
968        }
969    }
970    if trace.usage == CrystallizationUsage::default() {
971        for action in &trace.actions {
972            trace.usage.model_calls += action.cost.model_calls;
973            trace.usage.input_tokens += action.cost.input_tokens;
974            trace.usage.output_tokens += action.cost.output_tokens;
975            trace.usage.total_cost_usd += action.cost.total_cost_usd;
976            trace.usage.wall_ms += action.cost.wall_ms + action.duration_ms.unwrap_or_default();
977        }
978    }
979    trace
980}
981
982fn mine_candidates(
983    traces: &[CrystallizationTrace],
984    min_examples: usize,
985    options: &CrystallizeOptions,
986) -> Vec<WorkflowCandidate> {
987    let signatures = traces
988        .iter()
989        .map(|trace| {
990            trace
991                .actions
992                .iter()
993                .map(action_signature)
994                .collect::<Vec<_>>()
995        })
996        .collect::<Vec<_>>();
997    let Some((sequence, examples)) = best_repeated_sequence(&signatures, min_examples) else {
998        return Vec::new();
999    };
1000
1001    let mut example_refs = Vec::new();
1002    for (trace_index, start_index) in &examples {
1003        let trace = &traces[*trace_index];
1004        example_refs.push(WorkflowCandidateExample {
1005            trace_id: trace.id.clone(),
1006            source_hash: trace.source_hash.clone().unwrap_or_default(),
1007            start_index: *start_index,
1008            action_ids: trace.actions[*start_index..*start_index + sequence.len()]
1009                .iter()
1010                .map(|action| action.id.clone())
1011                .collect(),
1012        });
1013    }
1014
1015    let mut steps = Vec::new();
1016    let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1017    let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1018    let mut rejection_reasons = Vec::new();
1019    let mut warnings = Vec::new();
1020
1021    for step_index in 0..sequence.len() {
1022        let actions = examples
1023            .iter()
1024            .map(|(trace_index, start_index)| {
1025                &traces[*trace_index].actions[*start_index + step_index]
1026            })
1027            .collect::<Vec<_>>();
1028        let first = actions[0];
1029        if !compatible_side_effects(&actions) {
1030            rejection_reasons.push(format!(
1031                "step {} '{}' has divergent side effects across examples",
1032                step_index + 1,
1033                first.name
1034            ));
1035        }
1036
1037        let mut parameter_refs = BTreeSet::new();
1038        for action in &actions {
1039            for (name, value) in &action.parameters {
1040                if is_scalar(value) {
1041                    parameter_values
1042                        .entry(sanitize_identifier(name))
1043                        .or_default()
1044                        .insert(json_scalar_string(value));
1045                    parameter_paths
1046                        .entry(sanitize_identifier(name))
1047                        .or_default()
1048                        .insert(format!("steps[{step_index}].parameters.{name}"));
1049                    parameter_refs.insert(sanitize_identifier(name));
1050                }
1051            }
1052        }
1053        collect_varying_parameters(
1054            &actions,
1055            "inputs",
1056            |action| &action.inputs,
1057            &mut parameter_values,
1058            &mut parameter_paths,
1059            &mut parameter_refs,
1060        );
1061
1062        let fuzzy = first.fuzzy.unwrap_or(false)
1063            || first.kind == "model_call"
1064            || actions.iter().any(|action| action.fuzzy.unwrap_or(false));
1065        if fuzzy {
1066            warnings.push(format!(
1067                "step {} '{}' remains fuzzy and requires review/LLM handling",
1068                step_index + 1,
1069                first.name
1070            ));
1071        }
1072
1073        steps.push(WorkflowCandidateStep {
1074            index: step_index + 1,
1075            kind: first.kind.clone(),
1076            name: first.name.clone(),
1077            segment: if fuzzy {
1078                SegmentKind::Fuzzy
1079            } else {
1080                SegmentKind::Deterministic
1081            },
1082            parameter_refs: parameter_refs.into_iter().collect(),
1083            constants: constants_for_action(first),
1084            preconditions: preconditions_for_action(first),
1085            side_effects: first.side_effects.clone(),
1086            capabilities: sorted_strings(first.capabilities.iter().cloned()),
1087            required_secrets: sorted_strings(first.required_secrets.iter().cloned()),
1088            approval: first.approval.clone(),
1089            expected_output: stable_expected_output(&actions),
1090            review_notes: Vec::new(),
1091        });
1092    }
1093
1094    let parameters = parameter_values
1095        .iter()
1096        .map(|(name, values)| WorkflowCandidateParameter {
1097            name: name.clone(),
1098            source_paths: parameter_paths
1099                .get(name)
1100                .map(|paths| paths.iter().cloned().collect())
1101                .unwrap_or_default(),
1102            examples: values.iter().take(5).cloned().collect(),
1103            required: true,
1104        })
1105        .collect::<Vec<_>>();
1106
1107    let capabilities = sorted_strings(
1108        steps
1109            .iter()
1110            .flat_map(|step| step.capabilities.iter().cloned()),
1111    );
1112    let required_secrets = sorted_strings(
1113        steps
1114            .iter()
1115            .flat_map(|step| step.required_secrets.iter().cloned()),
1116    );
1117    let approval_points = steps
1118        .iter()
1119        .filter_map(|step| step.approval.clone())
1120        .collect::<Vec<_>>();
1121    let side_effects = sorted_side_effects(
1122        steps
1123            .iter()
1124            .flat_map(|step| step.side_effects.iter().cloned())
1125            .collect(),
1126    );
1127    let expected_outputs = steps
1128        .iter()
1129        .filter_map(|step| step.expected_output.clone())
1130        .collect::<Vec<_>>();
1131    let savings = estimate_savings(traces, &examples, &steps);
1132    let confidence = confidence_for(
1133        &examples,
1134        traces.len(),
1135        &steps,
1136        rejection_reasons.is_empty(),
1137    );
1138    let name = options
1139        .workflow_name
1140        .clone()
1141        .unwrap_or_else(|| infer_workflow_name(&steps));
1142    let package_name = options
1143        .package_name
1144        .clone()
1145        .unwrap_or_else(|| name.replace('_', "-"));
1146
1147    vec![WorkflowCandidate {
1148        id: stable_candidate_id(&sequence, &example_refs),
1149        name,
1150        confidence,
1151        sequence_signature: sequence,
1152        parameters,
1153        steps,
1154        examples: example_refs.clone(),
1155        capabilities: capabilities.clone(),
1156        required_secrets: required_secrets.clone(),
1157        approval_points,
1158        side_effects,
1159        expected_outputs,
1160        warnings,
1161        rejection_reasons,
1162        promotion: PromotionMetadata {
1163            source_trace_hashes: example_refs
1164                .iter()
1165                .map(|example| example.source_hash.clone())
1166                .collect(),
1167            author: options.author.clone(),
1168            approver: options.approver.clone(),
1169            created_at: now_rfc3339(),
1170            version: "0.1.0".to_string(),
1171            package_name,
1172            capability_set: capabilities,
1173            secrets_required: required_secrets,
1174            rollback_target: Some("keep source traces and previous package version".to_string()),
1175            eval_pack_link: options.eval_pack_link.clone(),
1176        },
1177        savings,
1178        shadow: ShadowRunReport::default(),
1179    }]
1180}
1181
1182fn best_repeated_sequence(
1183    signatures: &[Vec<String>],
1184    min_examples: usize,
1185) -> Option<RepeatedSequence> {
1186    let mut occurrences: BTreeMap<Vec<String>, Vec<(usize, usize)>> = BTreeMap::new();
1187    for (trace_index, trace_signatures) in signatures.iter().enumerate() {
1188        for start in 0..trace_signatures.len() {
1189            let max_len = (trace_signatures.len() - start).min(12);
1190            for len in 2..=max_len {
1191                let sequence = trace_signatures[start..start + len].to_vec();
1192                occurrences
1193                    .entry(sequence)
1194                    .or_default()
1195                    .push((trace_index, start));
1196            }
1197        }
1198    }
1199
1200    occurrences
1201        .into_iter()
1202        .filter_map(|(sequence, positions)| {
1203            let mut seen = BTreeSet::new();
1204            let mut examples = Vec::new();
1205            for (trace_index, start) in positions {
1206                if seen.insert(trace_index) {
1207                    examples.push((trace_index, start));
1208                }
1209            }
1210            if examples.len() >= min_examples {
1211                Some((sequence, examples))
1212            } else {
1213                None
1214            }
1215        })
1216        .max_by(
1217            |(left_sequence, left_examples), (right_sequence, right_examples)| {
1218                left_examples
1219                    .len()
1220                    .cmp(&right_examples.len())
1221                    .then_with(|| left_sequence.len().cmp(&right_sequence.len()))
1222            },
1223        )
1224}
1225
1226fn action_signature(action: &CrystallizationAction) -> String {
1227    let mut parameter_keys = action.parameters.keys().cloned().collect::<Vec<_>>();
1228    parameter_keys.sort();
1229    format!(
1230        "{}:{}:{}",
1231        action.kind,
1232        action.name,
1233        parameter_keys.join(",")
1234    )
1235}
1236
1237fn compatible_side_effects(actions: &[&CrystallizationAction]) -> bool {
1238    let first = sorted_side_effects(actions[0].side_effects.clone());
1239    actions
1240        .iter()
1241        .skip(1)
1242        .all(|action| sorted_side_effects(action.side_effects.clone()) == first)
1243}
1244
1245fn collect_varying_parameters(
1246    actions: &[&CrystallizationAction],
1247    root: &str,
1248    value_for: impl Fn(&CrystallizationAction) -> &JsonValue,
1249    parameter_values: &mut BTreeMap<String, BTreeSet<String>>,
1250    parameter_paths: &mut BTreeMap<String, BTreeSet<String>>,
1251    parameter_refs: &mut BTreeSet<String>,
1252) {
1253    let mut paths = BTreeMap::<String, Vec<JsonValue>>::new();
1254    for action in actions {
1255        collect_scalar_paths(value_for(action), root, &mut paths);
1256    }
1257    for (path, values) in paths {
1258        if values.len() != actions.len() {
1259            continue;
1260        }
1261        let unique = values
1262            .iter()
1263            .map(json_scalar_string)
1264            .collect::<BTreeSet<_>>();
1265        if unique.len() < 2 {
1266            continue;
1267        }
1268        let name = parameter_name_for_path(&path);
1269        parameter_values
1270            .entry(name.clone())
1271            .or_default()
1272            .extend(unique);
1273        parameter_paths
1274            .entry(name.clone())
1275            .or_default()
1276            .insert(path);
1277        parameter_refs.insert(name);
1278    }
1279}
1280
1281fn collect_scalar_paths(
1282    value: &JsonValue,
1283    prefix: &str,
1284    out: &mut BTreeMap<String, Vec<JsonValue>>,
1285) {
1286    match value {
1287        JsonValue::Object(map) => {
1288            for (key, child) in map {
1289                collect_scalar_paths(child, &format!("{prefix}.{key}"), out);
1290            }
1291        }
1292        JsonValue::Array(items) => {
1293            for (idx, child) in items.iter().enumerate() {
1294                collect_scalar_paths(child, &format!("{prefix}[{idx}]"), out);
1295            }
1296        }
1297        _ if is_scalar(value) => {
1298            out.entry(prefix.to_string())
1299                .or_default()
1300                .push(value.clone());
1301        }
1302        _ => {}
1303    }
1304}
1305
1306fn parameter_name_for_path(path: &str) -> String {
1307    let lower = path.to_ascii_lowercase();
1308    for (needle, name) in [
1309        ("version", "version"),
1310        ("repo_path", "repo_path"),
1311        ("repo", "repo_path"),
1312        ("branch_name", "branch_name"),
1313        ("branch", "branch_name"),
1314        ("release_target", "release_target"),
1315        ("target", "release_target"),
1316    ] {
1317        if lower.contains(needle) {
1318            return name.to_string();
1319        }
1320    }
1321    let tail = path
1322        .rsplit(['.', '['])
1323        .next()
1324        .unwrap_or("param")
1325        .trim_end_matches(']');
1326    sanitize_identifier(tail)
1327}
1328
1329fn constants_for_action(action: &CrystallizationAction) -> BTreeMap<String, JsonValue> {
1330    let mut constants = BTreeMap::new();
1331    constants.insert("kind".to_string(), json!(action.kind));
1332    constants.insert("name".to_string(), json!(action.name));
1333    if action.deterministic.unwrap_or(false) {
1334        constants.insert("deterministic".to_string(), json!(true));
1335    }
1336    constants
1337}
1338
1339fn preconditions_for_action(action: &CrystallizationAction) -> Vec<String> {
1340    let mut out = Vec::new();
1341    for capability in &action.capabilities {
1342        out.push(format!("capability '{capability}' is available"));
1343    }
1344    for secret in &action.required_secrets {
1345        out.push(format!("secret '{secret}' is configured"));
1346    }
1347    if let Some(approval) = &action.approval {
1348        if approval.required {
1349            out.push("human approval boundary is preserved".to_string());
1350        }
1351    }
1352    out
1353}
1354
1355fn stable_expected_output(actions: &[&CrystallizationAction]) -> Option<JsonValue> {
1356    let first = actions[0]
1357        .observed_output
1358        .as_ref()
1359        .or(actions[0].output.as_ref())?;
1360    if actions
1361        .iter()
1362        .all(|action| action.observed_output.as_ref().or(action.output.as_ref()) == Some(first))
1363    {
1364        Some(first.clone())
1365    } else {
1366        None
1367    }
1368}
1369
1370fn shadow_candidate(
1371    candidate: &WorkflowCandidate,
1372    traces: &[CrystallizationTrace],
1373) -> ShadowRunReport {
1374    let mut failures = Vec::new();
1375    let mut results = Vec::new();
1376    for example in &candidate.examples {
1377        let Some(trace) = traces.iter().find(|trace| trace.id == example.trace_id) else {
1378            failures.push(format!("missing source trace {}", example.trace_id));
1379            continue;
1380        };
1381        let mut details = Vec::new();
1382        let end = example.start_index + candidate.steps.len();
1383        if end > trace.actions.len() {
1384            details.push("candidate sequence extends past trace action list".to_string());
1385        } else {
1386            let signatures = trace.actions[example.start_index..end]
1387                .iter()
1388                .map(action_signature)
1389                .collect::<Vec<_>>();
1390            if signatures != candidate.sequence_signature {
1391                details.push("action sequence signature changed".to_string());
1392            }
1393            for (offset, step) in candidate.steps.iter().enumerate() {
1394                let action = &trace.actions[example.start_index + offset];
1395                if sorted_side_effects(action.side_effects.clone()) != step.side_effects {
1396                    details.push(format!(
1397                        "step {} side effects differ for action {}",
1398                        step.index, action.id
1399                    ));
1400                }
1401                if action.approval.as_ref().map(|approval| approval.required)
1402                    != step.approval.as_ref().map(|approval| approval.required)
1403                {
1404                    details.push(format!("step {} approval boundary differs", step.index));
1405                }
1406                if step.segment == SegmentKind::Deterministic {
1407                    if let Some(expected) = &step.expected_output {
1408                        let actual = action.observed_output.as_ref().or(action.output.as_ref());
1409                        if actual != Some(expected) {
1410                            details
1411                                .push(format!("step {} deterministic output differs", step.index));
1412                        }
1413                    }
1414                }
1415            }
1416        }
1417        let pass = details.is_empty();
1418        if !pass {
1419            failures.push(format!("trace {} failed shadow comparison", trace.id));
1420        }
1421        results.push(ShadowTraceResult {
1422            trace_id: trace.id.clone(),
1423            pass,
1424            details,
1425        });
1426    }
1427    ShadowRunReport {
1428        pass: failures.is_empty(),
1429        compared_traces: results.len(),
1430        failures,
1431        traces: results,
1432    }
1433}
1434
1435fn estimate_savings(
1436    traces: &[CrystallizationTrace],
1437    examples: &[(usize, usize)],
1438    steps: &[WorkflowCandidateStep],
1439) -> SavingsEstimate {
1440    let mut estimate = SavingsEstimate::default();
1441    for (trace_index, start_index) in examples {
1442        let trace = &traces[*trace_index];
1443        for action in &trace.actions[*start_index..*start_index + steps.len()] {
1444            if action.kind == "model_call" || action.fuzzy.unwrap_or(false) {
1445                estimate.remaining_model_calls += action.cost.model_calls.max(1);
1446            } else {
1447                estimate.model_calls_avoided += action.cost.model_calls;
1448                estimate.input_tokens_avoided += action.cost.input_tokens;
1449                estimate.output_tokens_avoided += action.cost.output_tokens;
1450                estimate.estimated_cost_usd_avoided += action.cost.total_cost_usd;
1451                estimate.wall_ms_avoided +=
1452                    action.cost.wall_ms + action.duration_ms.unwrap_or_default();
1453            }
1454        }
1455    }
1456    estimate.cpu_runtime_cost_usd = 0.0;
1457    estimate
1458}
1459
1460fn confidence_for(
1461    examples: &[(usize, usize)],
1462    trace_count: usize,
1463    steps: &[WorkflowCandidateStep],
1464    safe: bool,
1465) -> f64 {
1466    if !safe || trace_count == 0 {
1467        return 0.0;
1468    }
1469    let coverage = examples.len() as f64 / trace_count as f64;
1470    let deterministic = steps
1471        .iter()
1472        .filter(|step| step.segment == SegmentKind::Deterministic)
1473        .count() as f64
1474        / steps.len().max(1) as f64;
1475    ((coverage * 0.65) + (deterministic * 0.35)).min(0.99)
1476}
1477
1478fn infer_workflow_name(steps: &[WorkflowCandidateStep]) -> String {
1479    let names = steps
1480        .iter()
1481        .map(|step| step.name.to_ascii_lowercase())
1482        .collect::<Vec<_>>()
1483        .join("_");
1484    if names.contains("version") || names.contains("release") {
1485        "crystallized_version_bump".to_string()
1486    } else {
1487        "crystallized_workflow".to_string()
1488    }
1489}
1490
1491pub fn generate_harn_code(candidate: &WorkflowCandidate) -> String {
1492    let mut out = String::new();
1493    let params = if candidate.parameters.is_empty() {
1494        "task".to_string()
1495    } else {
1496        candidate
1497            .parameters
1498            .iter()
1499            .map(|parameter| parameter.name.as_str())
1500            .collect::<Vec<_>>()
1501            .join(", ")
1502    };
1503    writeln!(out, "/**").unwrap();
1504    writeln!(
1505        out,
1506        " * Generated by harn crystallize. Review before promotion."
1507    )
1508    .unwrap();
1509    writeln!(out, " * Candidate: {}", candidate.id).unwrap();
1510    writeln!(
1511        out,
1512        " * Source trace hashes: {}",
1513        candidate.promotion.source_trace_hashes.join(", ")
1514    )
1515    .unwrap();
1516    writeln!(
1517        out,
1518        " * Capabilities: {}",
1519        if candidate.capabilities.is_empty() {
1520            "none".to_string()
1521        } else {
1522            candidate.capabilities.join(", ")
1523        }
1524    )
1525    .unwrap();
1526    writeln!(
1527        out,
1528        " * Required secrets: {}",
1529        if candidate.required_secrets.is_empty() {
1530            "none".to_string()
1531        } else {
1532            candidate.required_secrets.join(", ")
1533        }
1534    )
1535    .unwrap();
1536    writeln!(out, " */").unwrap();
1537    writeln!(out, "pipeline {}({}) {{", candidate.name, params).unwrap();
1538    writeln!(out, "  let review_warnings = []").unwrap();
1539    for step in &candidate.steps {
1540        writeln!(out, "  // Step {}: {} {}", step.index, step.kind, step.name).unwrap();
1541        for side_effect in &step.side_effects {
1542            writeln!(
1543                out,
1544                "  // side_effect: {} {}",
1545                side_effect.kind, side_effect.target
1546            )
1547            .unwrap();
1548        }
1549        if let Some(approval) = &step.approval {
1550            if approval.required {
1551                writeln!(
1552                    out,
1553                    "  // approval_required: {}",
1554                    approval
1555                        .boundary
1556                        .clone()
1557                        .unwrap_or_else(|| "human_review".to_string())
1558                )
1559                .unwrap();
1560            }
1561        }
1562        if step.segment == SegmentKind::Fuzzy {
1563            writeln!(
1564                out,
1565                "  // TODO: fuzzy segment still needs LLM/reviewer handling before deterministic promotion."
1566            )
1567            .unwrap();
1568            writeln!(
1569                out,
1570                "  review_warnings.push(\"fuzzy step: {}\")",
1571                escape_harn_string(&step.name)
1572            )
1573            .unwrap();
1574        }
1575        writeln!(
1576            out,
1577            "  log(\"crystallized step {}: {}\")",
1578            step.index,
1579            escape_harn_string(&step.name)
1580        )
1581        .unwrap();
1582    }
1583    writeln!(
1584        out,
1585        "  return {{status: \"shadow_ready\", candidate_id: \"{}\", review_warnings: review_warnings}}",
1586        escape_harn_string(&candidate.id)
1587    )
1588    .unwrap();
1589    writeln!(out, "}}").unwrap();
1590    out
1591}
1592
1593fn rejected_workflow_stub(rejected: &[WorkflowCandidate]) -> String {
1594    let mut out = String::new();
1595    writeln!(
1596        out,
1597        "// Generated by harn crystallize. No safe candidate was proposed."
1598    )
1599    .unwrap();
1600    writeln!(out, "pipeline crystallized_workflow(task) {{").unwrap();
1601    writeln!(out, "  log(\"no safe crystallization candidate\")").unwrap();
1602    writeln!(
1603        out,
1604        "  return {{status: \"rejected\", rejected_candidates: {}}}",
1605        rejected.len()
1606    )
1607    .unwrap();
1608    writeln!(out, "}}").unwrap();
1609    out
1610}
1611
1612pub fn generate_eval_pack(candidate: &WorkflowCandidate) -> String {
1613    let mut out = String::new();
1614    writeln!(out, "version = 1").unwrap();
1615    writeln!(
1616        out,
1617        "id = \"{}-crystallization\"",
1618        candidate.promotion.package_name
1619    )
1620    .unwrap();
1621    writeln!(
1622        out,
1623        "name = \"{} crystallization shadow evals\"",
1624        candidate.name
1625    )
1626    .unwrap();
1627    writeln!(out).unwrap();
1628    writeln!(out, "[package]").unwrap();
1629    writeln!(out, "name = \"{}\"", candidate.promotion.package_name).unwrap();
1630    writeln!(out, "version = \"{}\"", candidate.promotion.version).unwrap();
1631    writeln!(out).unwrap();
1632    for example in &candidate.examples {
1633        writeln!(out, "[[fixtures]]").unwrap();
1634        writeln!(out, "id = \"{}\"", example.trace_id).unwrap();
1635        writeln!(out, "kind = \"jsonl-trace\"").unwrap();
1636        writeln!(out, "trace_id = \"{}\"", example.trace_id).unwrap();
1637        writeln!(out).unwrap();
1638    }
1639    writeln!(out, "[[rubrics]]").unwrap();
1640    writeln!(out, "id = \"shadow-determinism\"").unwrap();
1641    writeln!(out, "kind = \"deterministic\"").unwrap();
1642    writeln!(out).unwrap();
1643    writeln!(out, "[[rubrics.assertions]]").unwrap();
1644    writeln!(out, "kind = \"crystallization-shadow\"").unwrap();
1645    writeln!(
1646        out,
1647        "expected = {{ candidate_id = \"{}\", pass = true }}",
1648        candidate.id
1649    )
1650    .unwrap();
1651    writeln!(out).unwrap();
1652    writeln!(out, "[[cases]]").unwrap();
1653    writeln!(out, "id = \"{}-shadow\"", candidate.name).unwrap();
1654    writeln!(out, "name = \"{} shadow replay\"", candidate.name).unwrap();
1655    writeln!(out, "rubrics = [\"shadow-determinism\"]").unwrap();
1656    writeln!(out, "severity = \"blocking\"").unwrap();
1657    out
1658}
1659
1660fn stable_candidate_id(sequence: &[String], examples: &[WorkflowCandidateExample]) -> String {
1661    let mut hasher = Sha256::new();
1662    for item in sequence {
1663        hasher.update(item.as_bytes());
1664        hasher.update([0]);
1665    }
1666    for example in examples {
1667        hasher.update(example.source_hash.as_bytes());
1668        hasher.update([0]);
1669    }
1670    format!("candidate_{}", hex_prefix(hasher.finalize().as_slice(), 16))
1671}
1672
1673fn hash_bytes(bytes: &[u8]) -> String {
1674    let mut hasher = Sha256::new();
1675    hasher.update(bytes);
1676    format!("sha256:{}", hex::encode(hasher.finalize()))
1677}
1678
1679fn hex_prefix(bytes: &[u8], chars: usize) -> String {
1680    hex::encode(bytes).chars().take(chars).collect::<String>()
1681}
1682
1683fn sorted_strings(items: impl Iterator<Item = String>) -> Vec<String> {
1684    let mut set = items.collect::<BTreeSet<_>>();
1685    set.retain(|item| !item.trim().is_empty());
1686    set.into_iter().collect()
1687}
1688
1689fn sorted_side_effects(items: Vec<CrystallizationSideEffect>) -> Vec<CrystallizationSideEffect> {
1690    let mut items = items
1691        .into_iter()
1692        .filter(|item| !item.kind.trim().is_empty() || !item.target.trim().is_empty())
1693        .collect::<Vec<_>>();
1694    items.sort_by_key(side_effect_sort_key);
1695    items.dedup_by(|left, right| side_effect_sort_key(left) == side_effect_sort_key(right));
1696    items
1697}
1698
1699fn side_effect_sort_key(item: &CrystallizationSideEffect) -> String {
1700    format!(
1701        "{}\x1f{}\x1f{}\x1f{}",
1702        item.kind,
1703        item.target,
1704        item.capability.clone().unwrap_or_default(),
1705        item.mutation.clone().unwrap_or_default()
1706    )
1707}
1708
1709fn is_scalar(value: &JsonValue) -> bool {
1710    matches!(
1711        value,
1712        JsonValue::String(_) | JsonValue::Number(_) | JsonValue::Bool(_) | JsonValue::Null
1713    )
1714}
1715
1716fn json_scalar_string(value: &JsonValue) -> String {
1717    match value {
1718        JsonValue::String(value) => value.clone(),
1719        other => other.to_string(),
1720    }
1721}
1722
1723fn sanitize_identifier(raw: &str) -> String {
1724    let mut out = String::new();
1725    for (idx, ch) in raw.chars().enumerate() {
1726        if ch.is_ascii_alphanumeric() || ch == '_' {
1727            if idx == 0 && ch.is_ascii_digit() {
1728                out.push('_');
1729            }
1730            out.push(ch.to_ascii_lowercase());
1731        } else if !out.ends_with('_') {
1732            out.push('_');
1733        }
1734    }
1735    let trimmed = out.trim_matches('_').to_string();
1736    if trimmed.is_empty() {
1737        "param".to_string()
1738    } else {
1739        trimmed
1740    }
1741}
1742
1743fn escape_harn_string(value: &str) -> String {
1744    value.replace('\\', "\\\\").replace('"', "\\\"")
1745}
1746
1747// ===== Crystallization bundle =====
1748//
1749// A bundle is a directory layout that Harn writes and Harn Cloud (or any
1750// other importer) reads without bespoke glue. The contract is:
1751//
1752//   bundle/
1753//     candidate.json        # versioned manifest documented below
1754//     workflow.harn         # generated/reviewable workflow code
1755//     report.json           # full mining/shadow/eval report
1756//     harn.eval.toml        # generated eval pack when available (optional)
1757//     fixtures/             # redacted replay fixtures referenced by the
1758//                           # report (optional, only when --bundle is used
1759//                           # with `harn crystallize` and traces were on disk)
1760//
1761// `candidate.json` is the authoritative manifest. It must include the
1762// `schema` and `schema_version` markers. Cloud importers MUST reject any
1763// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
1764// or whose `schema_version` is greater than the highest version they
1765// understand. Only the documented additive fields may be added without
1766// bumping `schema_version`.
1767
1768#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1769#[serde(default)]
1770pub struct BundleGenerator {
1771    pub tool: String,
1772    pub version: String,
1773}
1774
1775impl Default for BundleGenerator {
1776    fn default() -> Self {
1777        Self {
1778            tool: "harn".to_string(),
1779            version: env!("CARGO_PKG_VERSION").to_string(),
1780        }
1781    }
1782}
1783
1784#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1785#[serde(default)]
1786pub struct BundleWorkflowRef {
1787    /// Relative path inside the bundle directory.
1788    pub path: String,
1789    /// Short identifier used in `pipeline NAME(...)`.
1790    pub name: String,
1791    /// Logical package name promotion uses to register the workflow.
1792    pub package_name: String,
1793    /// Initial workflow version proposed for promotion.
1794    pub package_version: String,
1795}
1796
1797#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1798#[serde(default)]
1799pub struct BundleSourceTrace {
1800    pub trace_id: String,
1801    pub source_hash: String,
1802    /// Optional human-visible URL (PR, issue, run record path) for the
1803    /// trace. `None` when the trace was loaded from an in-memory store.
1804    pub source_url: Option<String>,
1805    /// Optional cloud-side receipt id when the trace was already promoted
1806    /// into a tenant receipt. Cloud importers use this to wire candidate
1807    /// evidence to existing receipts without round-tripping the raw payload.
1808    pub source_receipt_id: Option<String>,
1809    /// Relative path of the redacted fixture inside the bundle, if one
1810    /// was emitted.
1811    pub fixture_path: Option<String>,
1812}
1813
1814#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1815#[serde(default)]
1816pub struct BundleStep {
1817    pub index: usize,
1818    pub kind: String,
1819    pub name: String,
1820    pub segment: SegmentKind,
1821    pub parameter_refs: Vec<String>,
1822    pub side_effects: Vec<CrystallizationSideEffect>,
1823    pub capabilities: Vec<String>,
1824    pub required_secrets: Vec<String>,
1825    pub approval: Option<CrystallizationApproval>,
1826    pub review_notes: Vec<String>,
1827}
1828
1829impl BundleStep {
1830    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
1831        Self {
1832            index: step.index,
1833            kind: step.kind.clone(),
1834            name: step.name.clone(),
1835            segment: step.segment.clone(),
1836            parameter_refs: step.parameter_refs.clone(),
1837            side_effects: step.side_effects.clone(),
1838            capabilities: step.capabilities.clone(),
1839            required_secrets: step.required_secrets.clone(),
1840            approval: step.approval.clone(),
1841            review_notes: step.review_notes.clone(),
1842        }
1843    }
1844}
1845
1846#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1847#[serde(default)]
1848pub struct BundleEvalPackRef {
1849    /// Relative path of the eval pack inside the bundle directory.
1850    pub path: String,
1851    /// Optional external link the eval pack also lives at (e.g. a hosted
1852    /// `eval-pack://` URI when the bundle was promoted into a tenant).
1853    pub link: Option<String>,
1854}
1855
1856#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1857#[serde(default)]
1858pub struct BundleFixtureRef {
1859    pub path: String,
1860    pub trace_id: String,
1861    pub source_hash: String,
1862    pub redacted: bool,
1863}
1864
1865#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1866#[serde(default)]
1867pub struct BundlePromotion {
1868    pub owner: Option<String>,
1869    pub approver: Option<String>,
1870    pub author: Option<String>,
1871    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
1872    /// surfaces may extend this enum but must keep existing values stable.
1873    pub rollout_policy: String,
1874    pub rollback_target: Option<String>,
1875    pub created_at: String,
1876    pub workflow_version: String,
1877    pub package_name: String,
1878}
1879
1880impl Default for BundlePromotion {
1881    fn default() -> Self {
1882        Self {
1883            owner: None,
1884            approver: None,
1885            author: None,
1886            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
1887            rollback_target: None,
1888            created_at: String::new(),
1889            workflow_version: String::new(),
1890            package_name: String::new(),
1891        }
1892    }
1893}
1894
1895#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1896#[serde(default)]
1897pub struct BundleRedactionSummary {
1898    pub applied: bool,
1899    pub rules: Vec<String>,
1900    pub summary: String,
1901    /// Number of fixture files copied into the bundle (0 when no fixture
1902    /// directory was emitted).
1903    pub fixture_count: usize,
1904}
1905
1906#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1907#[serde(default)]
1908pub struct CrystallizationBundleManifest {
1909    pub schema: String,
1910    pub schema_version: u32,
1911    pub generated_at: String,
1912    pub generator: BundleGenerator,
1913    pub kind: BundleKind,
1914    pub candidate_id: String,
1915    pub external_key: String,
1916    pub title: String,
1917    pub team: Option<String>,
1918    pub repo: Option<String>,
1919    pub risk_level: String,
1920    pub workflow: BundleWorkflowRef,
1921    pub source_trace_hashes: Vec<String>,
1922    pub source_traces: Vec<BundleSourceTrace>,
1923    pub deterministic_steps: Vec<BundleStep>,
1924    pub fuzzy_steps: Vec<BundleStep>,
1925    pub side_effects: Vec<CrystallizationSideEffect>,
1926    pub capabilities: Vec<String>,
1927    pub required_secrets: Vec<String>,
1928    pub savings: SavingsEstimate,
1929    pub shadow: ShadowRunReport,
1930    pub eval_pack: Option<BundleEvalPackRef>,
1931    pub fixtures: Vec<BundleFixtureRef>,
1932    pub promotion: BundlePromotion,
1933    pub redaction: BundleRedactionSummary,
1934    pub confidence: f64,
1935    pub rejection_reasons: Vec<String>,
1936    pub warnings: Vec<String>,
1937}
1938
1939#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1940#[serde(rename_all = "snake_case")]
1941pub enum BundleKind {
1942    /// A normal candidate that passed shadow comparison and is ready for
1943    /// review and promotion.
1944    #[default]
1945    Candidate,
1946    /// A "plan-only" candidate: every step has a side-effect-free, in-process
1947    /// outcome (e.g. classify and write a receipt). Cloud importers can
1948    /// promote these without explicit external-side-effect approval.
1949    PlanOnly,
1950    /// No safe candidate was selected. The bundle still records what was
1951    /// attempted, the rejection reasons, and any rejected candidates so
1952    /// reviewers can debug or feed it back into mining.
1953    Rejected,
1954}
1955
1956#[derive(Clone, Debug, Default)]
1957pub struct BundleOptions {
1958    /// Stable identifier downstream cloud importers use to dedupe bundles
1959    /// across runs (defaults to a sanitized workflow name).
1960    pub external_key: Option<String>,
1961    pub title: Option<String>,
1962    pub team: Option<String>,
1963    pub repo: Option<String>,
1964    pub risk_level: Option<String>,
1965    pub rollout_policy: Option<String>,
1966}
1967
1968#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1969#[serde(default)]
1970pub struct CrystallizationBundle {
1971    pub manifest: CrystallizationBundleManifest,
1972    pub report: CrystallizationReport,
1973    pub harn_code: String,
1974    pub eval_pack_toml: String,
1975    pub fixtures: Vec<CrystallizationTrace>,
1976}
1977
1978/// Errors surfaced when validating a bundle on disk.
1979#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1980#[serde(default)]
1981pub struct BundleValidation {
1982    pub bundle_dir: String,
1983    pub schema: String,
1984    pub schema_version: u32,
1985    pub kind: BundleKind,
1986    pub candidate_id: String,
1987    pub manifest_ok: bool,
1988    pub workflow_ok: bool,
1989    pub report_ok: bool,
1990    pub eval_pack_ok: bool,
1991    pub fixtures_ok: bool,
1992    pub redaction_ok: bool,
1993    pub problems: Vec<String>,
1994}
1995
1996impl BundleValidation {
1997    pub fn is_ok(&self) -> bool {
1998        self.problems.is_empty()
1999    }
2000}
2001
2002/// Build an in-memory bundle from already-mined artifacts. The traces
2003/// passed here are the same normalized traces used to mine the candidate;
2004/// they will be redacted before being attached as fixtures.
2005pub fn build_crystallization_bundle(
2006    artifacts: CrystallizationArtifacts,
2007    traces: &[CrystallizationTrace],
2008    options: BundleOptions,
2009) -> Result<CrystallizationBundle, VmError> {
2010    let CrystallizationArtifacts {
2011        report,
2012        harn_code,
2013        eval_pack_toml,
2014    } = artifacts;
2015
2016    let (selected, kind) = match report
2017        .selected_candidate_id
2018        .as_deref()
2019        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2020    {
2021        Some(candidate) => {
2022            let kind = if candidate_is_plan_only(candidate) {
2023                BundleKind::PlanOnly
2024            } else {
2025                BundleKind::Candidate
2026            };
2027            (Some(candidate), kind)
2028        }
2029        None => (None, BundleKind::Rejected),
2030    };
2031
2032    let workflow_name = selected
2033        .map(|candidate| candidate.name.clone())
2034        .unwrap_or_else(|| "crystallized_workflow".to_string());
2035    let package_name = selected
2036        .map(|candidate| candidate.promotion.package_name.clone())
2037        .unwrap_or_else(|| workflow_name.replace('_', "-"));
2038    let workflow_version = selected
2039        .map(|candidate| candidate.promotion.version.clone())
2040        .unwrap_or_else(|| "0.0.0".to_string());
2041
2042    let manifest_workflow = BundleWorkflowRef {
2043        path: BUNDLE_WORKFLOW_FILE.to_string(),
2044        name: workflow_name.clone(),
2045        package_name: package_name.clone(),
2046        package_version: workflow_version.clone(),
2047    };
2048
2049    let external_key = options
2050        .external_key
2051        .clone()
2052        .filter(|key| !key.trim().is_empty())
2053        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
2054    let title = options
2055        .title
2056        .clone()
2057        .filter(|title| !title.trim().is_empty())
2058        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
2059    let risk_level = options
2060        .risk_level
2061        .clone()
2062        .filter(|risk| !risk.trim().is_empty())
2063        .unwrap_or_else(|| infer_risk_level(selected));
2064    let rollout_policy = options
2065        .rollout_policy
2066        .clone()
2067        .filter(|policy| !policy.trim().is_empty())
2068        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
2069
2070    let (deterministic_steps, fuzzy_steps) = match selected {
2071        Some(candidate) => candidate
2072            .steps
2073            .iter()
2074            .map(BundleStep::from_candidate_step)
2075            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
2076        None => (Vec::new(), Vec::new()),
2077    };
2078
2079    let source_trace_hashes = selected
2080        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
2081        .unwrap_or_default();
2082
2083    let mut source_traces = Vec::new();
2084    let mut fixture_refs = Vec::new();
2085    let mut fixture_payloads = Vec::new();
2086    if let Some(candidate) = selected {
2087        for example in &candidate.examples {
2088            let trace = traces.iter().find(|trace| trace.id == example.trace_id);
2089            let fixture_relative = trace.map(|trace| {
2090                format!(
2091                    "{BUNDLE_FIXTURES_DIR}/{}.json",
2092                    sanitize_fixture_name(&trace.id)
2093                )
2094            });
2095            source_traces.push(BundleSourceTrace {
2096                trace_id: example.trace_id.clone(),
2097                source_hash: example.source_hash.clone(),
2098                source_url: trace.and_then(|trace| trace.source.clone()),
2099                source_receipt_id: trace
2100                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
2101                    .and_then(|value| value.as_str().map(str::to_string)),
2102                fixture_path: fixture_relative.clone(),
2103            });
2104            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
2105                let mut redacted = trace.clone();
2106                redact_trace_for_bundle(&mut redacted);
2107                fixture_refs.push(BundleFixtureRef {
2108                    path: fixture_path,
2109                    trace_id: trace.id.clone(),
2110                    source_hash: trace.source_hash.clone().unwrap_or_default(),
2111                    redacted: true,
2112                });
2113                fixture_payloads.push(redacted);
2114            }
2115        }
2116    }
2117
2118    // Owner defaults to author so cloud importers always have a populated
2119    // ownership pointer, but stays separate from `author` so reviewers can
2120    // assign a different owner in the manifest before promotion.
2121    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
2122    let promotion = BundlePromotion {
2123        owner: author.clone(),
2124        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
2125        author,
2126        rollout_policy,
2127        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
2128        created_at: now_rfc3339(),
2129        workflow_version,
2130        package_name: package_name.clone(),
2131    };
2132
2133    let redaction = BundleRedactionSummary {
2134        applied: !fixture_payloads.is_empty(),
2135        rules: vec![
2136            "sensitive_keys".to_string(),
2137            "secret_value_heuristic".to_string(),
2138        ],
2139        summary: if fixture_payloads.is_empty() {
2140            "no fixtures emitted".to_string()
2141        } else {
2142            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
2143                .to_string()
2144        },
2145        fixture_count: fixture_payloads.len(),
2146    };
2147
2148    let eval_pack = if eval_pack_toml.trim().is_empty() {
2149        None
2150    } else {
2151        Some(BundleEvalPackRef {
2152            path: BUNDLE_EVAL_PACK_FILE.to_string(),
2153            link: selected
2154                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
2155                .filter(|link| !link.trim().is_empty()),
2156        })
2157    };
2158
2159    let manifest = CrystallizationBundleManifest {
2160        schema: BUNDLE_SCHEMA.to_string(),
2161        schema_version: BUNDLE_SCHEMA_VERSION,
2162        generated_at: now_rfc3339(),
2163        generator: BundleGenerator::default(),
2164        kind,
2165        candidate_id: selected
2166            .map(|candidate| candidate.id.clone())
2167            .unwrap_or_default(),
2168        external_key,
2169        title,
2170        team: options.team,
2171        repo: options.repo,
2172        risk_level,
2173        workflow: manifest_workflow,
2174        source_trace_hashes,
2175        source_traces,
2176        deterministic_steps,
2177        fuzzy_steps,
2178        side_effects: selected
2179            .map(|candidate| candidate.side_effects.clone())
2180            .unwrap_or_default(),
2181        capabilities: selected
2182            .map(|candidate| candidate.capabilities.clone())
2183            .unwrap_or_default(),
2184        required_secrets: selected
2185            .map(|candidate| candidate.required_secrets.clone())
2186            .unwrap_or_default(),
2187        savings: selected
2188            .map(|candidate| candidate.savings.clone())
2189            .unwrap_or_default(),
2190        shadow: selected
2191            .map(|candidate| candidate.shadow.clone())
2192            .unwrap_or_default(),
2193        eval_pack,
2194        fixtures: fixture_refs,
2195        promotion,
2196        redaction,
2197        confidence: selected
2198            .map(|candidate| candidate.confidence)
2199            .unwrap_or(0.0),
2200        rejection_reasons: report
2201            .rejected_candidates
2202            .iter()
2203            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
2204            .collect(),
2205        warnings: report.warnings.clone(),
2206    };
2207
2208    Ok(CrystallizationBundle {
2209        manifest,
2210        report,
2211        harn_code,
2212        eval_pack_toml,
2213        fixtures: fixture_payloads,
2214    })
2215}
2216
2217/// Write a bundle to a directory. Creates the directory if it does not
2218/// already exist. Returns the manifest with `generated_at` and any
2219/// runtime-resolved metadata filled in.
2220pub fn write_crystallization_bundle(
2221    bundle: &CrystallizationBundle,
2222    bundle_dir: &Path,
2223) -> Result<CrystallizationBundleManifest, VmError> {
2224    std::fs::create_dir_all(bundle_dir).map_err(|error| {
2225        VmError::Runtime(format!(
2226            "failed to create bundle dir {}: {error}",
2227            bundle_dir.display()
2228        ))
2229    })?;
2230    write_bytes(
2231        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
2232        bundle.harn_code.as_bytes(),
2233    )?;
2234    let report_json = serde_json::to_vec_pretty(&bundle.report)
2235        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
2236    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
2237
2238    if !bundle.eval_pack_toml.trim().is_empty() {
2239        write_bytes(
2240            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
2241            bundle.eval_pack_toml.as_bytes(),
2242        )?;
2243    }
2244
2245    if !bundle.fixtures.is_empty() {
2246        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
2247        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
2248            VmError::Runtime(format!(
2249                "failed to create fixtures dir {}: {error}",
2250                fixtures_dir.display()
2251            ))
2252        })?;
2253        for trace in &bundle.fixtures {
2254            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
2255            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
2256                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
2257            })?;
2258            write_bytes(&path, &payload)?;
2259        }
2260    }
2261
2262    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
2263        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
2264    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
2265    Ok(bundle.manifest.clone())
2266}
2267
2268/// Read a bundle manifest from disk. Verifies the schema marker but does
2269/// not cross-check workflow/report/eval-pack sibling files; for a richer
2270/// check use [`validate_crystallization_bundle`].
2271pub fn load_crystallization_bundle_manifest(
2272    bundle_dir: &Path,
2273) -> Result<CrystallizationBundleManifest, VmError> {
2274    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
2275    let bytes = std::fs::read(&manifest_path).map_err(|error| {
2276        VmError::Runtime(format!(
2277            "failed to read bundle manifest {}: {error}",
2278            manifest_path.display()
2279        ))
2280    })?;
2281    let manifest: CrystallizationBundleManifest =
2282        serde_json::from_slice(&bytes).map_err(|error| {
2283            VmError::Runtime(format!(
2284                "failed to decode bundle manifest {}: {error}",
2285                manifest_path.display()
2286            ))
2287        })?;
2288    if manifest.schema != BUNDLE_SCHEMA {
2289        return Err(VmError::Runtime(format!(
2290            "bundle {} has unrecognized schema {:?} (expected {})",
2291            bundle_dir.display(),
2292            manifest.schema,
2293            BUNDLE_SCHEMA
2294        )));
2295    }
2296    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
2297        return Err(VmError::Runtime(format!(
2298            "bundle {} schema_version {} is newer than supported {}",
2299            bundle_dir.display(),
2300            manifest.schema_version,
2301            BUNDLE_SCHEMA_VERSION
2302        )));
2303    }
2304    Ok(manifest)
2305}
2306
2307/// Read every fixture trace referenced by the bundle manifest. Returns
2308/// the manifest plus loaded traces, in the order they appear in the
2309/// manifest. Fixtures with `path: None` are skipped.
2310pub fn load_crystallization_bundle(
2311    bundle_dir: &Path,
2312) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
2313    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
2314    let mut traces = Vec::new();
2315    for fixture in &manifest.fixtures {
2316        let path = bundle_dir.join(&fixture.path);
2317        traces.push(load_crystallization_trace(&path)?);
2318    }
2319    Ok((manifest, traces))
2320}
2321
2322/// Validate a bundle directory layout and contents. Cheap enough to call
2323/// from a CLI smoke command; performs no live side effects.
2324pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
2325    let mut validation = BundleValidation {
2326        bundle_dir: bundle_dir.display().to_string(),
2327        ..BundleValidation::default()
2328    };
2329    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
2330        Ok(manifest) => manifest,
2331        Err(error) => {
2332            validation.problems.push(error.to_string());
2333            return Ok(validation);
2334        }
2335    };
2336    validation.manifest_ok = true;
2337    validation.schema = manifest.schema.clone();
2338    validation.schema_version = manifest.schema_version;
2339    validation.kind = manifest.kind.clone();
2340    validation.candidate_id = manifest.candidate_id.clone();
2341
2342    let workflow_path = bundle_dir.join(&manifest.workflow.path);
2343    if workflow_path.exists() {
2344        validation.workflow_ok = true;
2345    } else {
2346        validation
2347            .problems
2348            .push(format!("missing workflow file {}", workflow_path.display()));
2349    }
2350
2351    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2352    match std::fs::read(&report_path) {
2353        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
2354            Ok(report) => {
2355                validation.report_ok = true;
2356                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2357                    && manifest.candidate_id.is_empty()
2358                {
2359                    validation
2360                        .problems
2361                        .push("manifest is non-rejected but has empty candidate_id".to_string());
2362                }
2363                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2364                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
2365                {
2366                    validation.problems.push(format!(
2367                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
2368                        report.selected_candidate_id, manifest.candidate_id
2369                    ));
2370                }
2371            }
2372            Err(error) => {
2373                validation
2374                    .problems
2375                    .push(format!("invalid report.json: {error}"));
2376            }
2377        },
2378        Err(error) => {
2379            validation.problems.push(format!(
2380                "missing report file {}: {error}",
2381                report_path.display()
2382            ));
2383        }
2384    }
2385
2386    if let Some(eval_pack) = &manifest.eval_pack {
2387        let path = bundle_dir.join(&eval_pack.path);
2388        if path.exists() {
2389            validation.eval_pack_ok = true;
2390        } else {
2391            validation.problems.push(format!(
2392                "manifest references eval pack {} but file is missing",
2393                path.display()
2394            ));
2395        }
2396    } else {
2397        validation.eval_pack_ok = true;
2398    }
2399
2400    let mut fixtures_problem = false;
2401    for fixture in &manifest.fixtures {
2402        let path = bundle_dir.join(&fixture.path);
2403        if !path.exists() {
2404            validation
2405                .problems
2406                .push(format!("missing fixture {}", path.display()));
2407            fixtures_problem = true;
2408            continue;
2409        }
2410        if !fixture.redacted {
2411            validation.problems.push(format!(
2412                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
2413                fixture.path
2414            ));
2415            fixtures_problem = true;
2416        }
2417    }
2418    validation.fixtures_ok = !fixtures_problem;
2419
2420    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
2421        validation
2422            .problems
2423            .push("redaction.applied is false but bundle includes fixtures".to_string());
2424    } else {
2425        validation.redaction_ok = true;
2426    }
2427    if !manifest
2428        .required_secrets
2429        .iter()
2430        .all(|secret| secret_id_looks_logical(secret))
2431    {
2432        validation.problems.push(
2433            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
2434        );
2435    }
2436
2437    Ok(validation)
2438}
2439
2440/// Replay shadow comparison from a bundle: re-runs the deterministic
2441/// shadow check in-process against the bundle's redacted fixtures, with
2442/// no live side effects. Returns the manifest and the freshly computed
2443/// `ShadowRunReport`. The returned report is suitable for cloud import or
2444/// for asserting determinism in CI.
2445pub fn shadow_replay_bundle(
2446    bundle_dir: &Path,
2447) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
2448    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
2449    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2450    let bytes = std::fs::read(&report_path).map_err(|error| {
2451        VmError::Runtime(format!(
2452            "failed to read bundle report {}: {error}",
2453            report_path.display()
2454        ))
2455    })?;
2456    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
2457        VmError::Runtime(format!(
2458            "failed to decode bundle report {}: {error}",
2459            report_path.display()
2460        ))
2461    })?;
2462    let candidate = report
2463        .selected_candidate_id
2464        .as_deref()
2465        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2466        .ok_or_else(|| {
2467            VmError::Runtime(format!(
2468                "bundle {} has no selected candidate to replay",
2469                bundle_dir.display()
2470            ))
2471        })?;
2472    let shadow = shadow_candidate(candidate, &traces);
2473    Ok((manifest, shadow))
2474}
2475
2476fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
2477    crate::atomic_io::atomic_write(path, bytes)
2478        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
2479}
2480
2481fn sanitize_fixture_name(raw: &str) -> String {
2482    let cleaned = raw
2483        .chars()
2484        .map(|ch| {
2485            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
2486                ch
2487            } else {
2488                '_'
2489            }
2490        })
2491        .collect::<String>();
2492    if cleaned.trim_matches('_').is_empty() {
2493        "trace".to_string()
2494    } else {
2495        cleaned.trim_matches('_').to_string()
2496    }
2497}
2498
2499fn sanitize_external_key(raw: &str) -> String {
2500    let mut out = String::new();
2501    let mut prev_dash = false;
2502    for ch in raw.chars() {
2503        let lowered = ch.to_ascii_lowercase();
2504        if lowered.is_ascii_alphanumeric() {
2505            out.push(lowered);
2506            prev_dash = false;
2507        } else if !prev_dash && !out.is_empty() {
2508            out.push('-');
2509            prev_dash = true;
2510        }
2511    }
2512    let trimmed = out.trim_matches('-').to_string();
2513    if trimmed.is_empty() {
2514        "crystallized-workflow".to_string()
2515    } else {
2516        trimmed
2517    }
2518}
2519
2520fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
2521    if let Some(candidate) = candidate {
2522        format!(
2523            "{} ({} step{})",
2524            candidate.name,
2525            candidate.steps.len(),
2526            if candidate.steps.len() == 1 { "" } else { "s" }
2527        )
2528    } else {
2529        format!("rejected: {fallback_name}")
2530    }
2531}
2532
2533fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
2534    let Some(candidate) = candidate else {
2535        return "high".to_string();
2536    };
2537    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
2538    let needs_secret = !candidate.required_secrets.is_empty();
2539    if touches_external && needs_secret {
2540        "high".to_string()
2541    } else if touches_external || needs_secret {
2542        "medium".to_string()
2543    } else {
2544        "low".to_string()
2545    }
2546}
2547
2548fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
2549    let kind = effect.kind.to_ascii_lowercase();
2550    if kind.is_empty() {
2551        return false;
2552    }
2553    // Plan-only side effects stay inside Harn's own data plane: they
2554    // write receipts, append to the in-process event log, or stash plans.
2555    // None of those touch tenant-external systems.
2556    let internal = kind.contains("receipt")
2557        || kind.contains("event_log")
2558        || kind.contains("memo")
2559        || kind.contains("plan");
2560    if internal {
2561        return false;
2562    }
2563    kind.contains("post")
2564        || kind.contains("write")
2565        || kind.contains("publish")
2566        || kind.contains("delete")
2567        || kind.contains("send")
2568}
2569
2570fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
2571    if candidate.steps.is_empty() {
2572        return false;
2573    }
2574    candidate.side_effects.iter().all(|effect| {
2575        let kind = effect.kind.to_ascii_lowercase();
2576        // Plan-only side effects stay inside Harn's own data plane: receipt
2577        // writes, in-memory event-log appends, file-only mutations, etc.
2578        kind.is_empty()
2579            || kind.contains("receipt")
2580            || kind.contains("event_log")
2581            || kind.contains("memo")
2582            || kind.contains("plan")
2583            || (kind.contains("file") && !kind.contains("publish"))
2584    })
2585}
2586
2587fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
2588    for action in &mut trace.actions {
2589        redact_bundle_value(&mut action.inputs);
2590        if let Some(output) = action.output.as_mut() {
2591            redact_bundle_value(output);
2592        }
2593        if let Some(observed) = action.observed_output.as_mut() {
2594            redact_bundle_value(observed);
2595        }
2596        for value in action.parameters.values_mut() {
2597            redact_bundle_value(value);
2598        }
2599        for (_, value) in action.metadata.iter_mut() {
2600            redact_bundle_value(value);
2601        }
2602    }
2603    for (_, value) in trace.metadata.iter_mut() {
2604        redact_bundle_value(value);
2605    }
2606}
2607
2608fn redact_bundle_value(value: &mut JsonValue) {
2609    match value {
2610        JsonValue::String(text) if looks_like_secret_value(text) => {
2611            *text = "[redacted]".to_string();
2612        }
2613        JsonValue::Array(items) => {
2614            for item in items {
2615                redact_bundle_value(item);
2616            }
2617        }
2618        JsonValue::Object(map) => {
2619            for (key, child) in map.iter_mut() {
2620                if is_sensitive_bundle_key(key) {
2621                    *child = JsonValue::String("[redacted]".to_string());
2622                } else {
2623                    redact_bundle_value(child);
2624                }
2625            }
2626        }
2627        _ => {}
2628    }
2629}
2630
2631fn is_sensitive_bundle_key(key: &str) -> bool {
2632    let lower = key.to_ascii_lowercase();
2633    lower.contains("secret")
2634        || lower.contains("token")
2635        || lower.contains("password")
2636        || lower.contains("api_key")
2637        || lower.contains("apikey")
2638        || lower == "authorization"
2639        || lower == "cookie"
2640        || lower == "set-cookie"
2641}
2642
2643fn looks_like_secret_value(value: &str) -> bool {
2644    let trimmed = value.trim();
2645    trimmed.starts_with("sk-")
2646        || trimmed.starts_with("ghp_")
2647        || trimmed.starts_with("ghs_")
2648        || trimmed.starts_with("xoxb-")
2649        || trimmed.starts_with("xoxp-")
2650        || trimmed.starts_with("AKIA")
2651        || (trimmed.len() > 48
2652            && trimmed
2653                .chars()
2654                .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
2655}
2656
2657fn secret_id_looks_logical(value: &str) -> bool {
2658    !looks_like_secret_value(value) && !value.trim().is_empty()
2659}
2660
2661#[cfg(test)]
2662mod tests {
2663    use super::*;
2664
2665    fn version_trace(
2666        id: &str,
2667        version: &str,
2668        side_target: &str,
2669        fuzzy: bool,
2670    ) -> CrystallizationTrace {
2671        CrystallizationTrace {
2672            id: id.to_string(),
2673            actions: vec![
2674                CrystallizationAction {
2675                    id: format!("{id}-branch"),
2676                    kind: "tool_call".to_string(),
2677                    name: "git.checkout_branch".to_string(),
2678                    parameters: BTreeMap::from([
2679                        ("repo_path".to_string(), json!(format!("/tmp/{id}"))),
2680                        (
2681                            "branch_name".to_string(),
2682                            json!(format!("release-{version}")),
2683                        ),
2684                    ]),
2685                    side_effects: vec![CrystallizationSideEffect {
2686                        kind: "git_ref".to_string(),
2687                        target: side_target.to_string(),
2688                        capability: Some("git.write".to_string()),
2689                        ..CrystallizationSideEffect::default()
2690                    }],
2691                    capabilities: vec!["git.write".to_string()],
2692                    deterministic: Some(true),
2693                    duration_ms: Some(20),
2694                    ..CrystallizationAction::default()
2695                },
2696                CrystallizationAction {
2697                    id: format!("{id}-manifest"),
2698                    kind: "file_mutation".to_string(),
2699                    name: "update_manifest_version".to_string(),
2700                    inputs: json!({"version": version, "path": "harn.toml"}),
2701                    parameters: BTreeMap::from([("version".to_string(), json!(version))]),
2702                    side_effects: vec![CrystallizationSideEffect {
2703                        kind: "file_write".to_string(),
2704                        target: "harn.toml".to_string(),
2705                        capability: Some("fs.write".to_string()),
2706                        ..CrystallizationSideEffect::default()
2707                    }],
2708                    capabilities: vec!["fs.write".to_string()],
2709                    deterministic: Some(true),
2710                    ..CrystallizationAction::default()
2711                },
2712                CrystallizationAction {
2713                    id: format!("{id}-release"),
2714                    kind: if fuzzy { "model_call" } else { "tool_call" }.to_string(),
2715                    name: "prepare_release_notes".to_string(),
2716                    inputs: json!({"release_target": "crates.io", "version": version}),
2717                    parameters: BTreeMap::from([
2718                        ("release_target".to_string(), json!("crates.io")),
2719                        ("version".to_string(), json!(version)),
2720                    ]),
2721                    fuzzy: Some(fuzzy),
2722                    deterministic: Some(!fuzzy),
2723                    cost: CrystallizationCost {
2724                        model_calls: if fuzzy { 1 } else { 0 },
2725                        input_tokens: if fuzzy { 1200 } else { 0 },
2726                        output_tokens: if fuzzy { 250 } else { 0 },
2727                        total_cost_usd: if fuzzy { 0.01 } else { 0.0 },
2728                        wall_ms: 3000,
2729                        ..CrystallizationCost::default()
2730                    },
2731                    ..CrystallizationAction::default()
2732                },
2733            ],
2734            ..CrystallizationTrace::default()
2735        }
2736    }
2737
2738    #[test]
2739    fn crystallizes_repeated_version_bump_with_parameters() {
2740        let traces = (0..5)
2741            .map(|idx| {
2742                version_trace(
2743                    &format!("trace_{idx}"),
2744                    &format!("0.7.{idx}"),
2745                    "release-branch",
2746                    false,
2747                )
2748            })
2749            .collect::<Vec<_>>();
2750
2751        let artifacts = crystallize_traces(
2752            traces,
2753            CrystallizeOptions {
2754                workflow_name: Some("version_bump".to_string()),
2755                ..CrystallizeOptions::default()
2756            },
2757        )
2758        .unwrap();
2759
2760        let candidate = &artifacts.report.candidates[0];
2761        assert!(candidate.rejection_reasons.is_empty());
2762        assert!(candidate.shadow.pass);
2763        assert_eq!(candidate.examples.len(), 5);
2764        let params = candidate
2765            .parameters
2766            .iter()
2767            .map(|param| param.name.as_str())
2768            .collect::<BTreeSet<_>>();
2769        assert!(params.contains("version"));
2770        assert!(params.contains("repo_path"));
2771        assert!(params.contains("branch_name"));
2772        assert!(artifacts.harn_code.contains("pipeline version_bump("));
2773        assert!(artifacts.eval_pack_toml.contains("crystallization-shadow"));
2774    }
2775
2776    #[test]
2777    fn rejects_divergent_side_effects() {
2778        let traces = vec![
2779            version_trace("trace_a", "0.7.1", "release-branch", false),
2780            version_trace("trace_b", "0.7.2", "main", false),
2781            version_trace("trace_c", "0.7.3", "release-branch", false),
2782        ];
2783
2784        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2785
2786        assert!(artifacts.report.candidates.is_empty());
2787        assert_eq!(artifacts.report.rejected_candidates.len(), 1);
2788        assert!(artifacts.report.rejected_candidates[0].rejection_reasons[0]
2789            .contains("divergent side effects"));
2790    }
2791
2792    #[test]
2793    fn preserves_remaining_fuzzy_segment() {
2794        let traces = (0..3)
2795            .map(|idx| {
2796                version_trace(
2797                    &format!("trace_{idx}"),
2798                    &format!("0.8.{idx}"),
2799                    "release-branch",
2800                    true,
2801                )
2802            })
2803            .collect::<Vec<_>>();
2804
2805        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2806        let candidate = &artifacts.report.candidates[0];
2807
2808        assert!(candidate
2809            .steps
2810            .iter()
2811            .any(|step| step.segment == SegmentKind::Fuzzy));
2812        assert!(candidate.savings.remaining_model_calls > 0);
2813        assert!(artifacts.harn_code.contains("TODO: fuzzy segment"));
2814    }
2815
2816    fn plan_only_trace(id: &str, suffix: &str) -> CrystallizationTrace {
2817        CrystallizationTrace {
2818            id: id.to_string(),
2819            actions: vec![
2820                CrystallizationAction {
2821                    id: format!("{id}-classify"),
2822                    kind: "tool_call".to_string(),
2823                    name: "classify_issue".to_string(),
2824                    parameters: BTreeMap::from([
2825                        ("issue_id".to_string(), json!(format!("HAR-{suffix}"))),
2826                        ("team_key".to_string(), json!("HAR")),
2827                    ]),
2828                    capabilities: vec!["linear.read".to_string()],
2829                    deterministic: Some(true),
2830                    duration_ms: Some(15),
2831                    ..CrystallizationAction::default()
2832                },
2833                CrystallizationAction {
2834                    id: format!("{id}-receipt"),
2835                    kind: "receipt_write".to_string(),
2836                    name: "emit_receipt".to_string(),
2837                    inputs: json!({"summary": format!("plan only #{suffix}"), "kind": "plan"}),
2838                    parameters: BTreeMap::from([
2839                        ("kind".to_string(), json!("plan")),
2840                        ("summary".to_string(), json!(format!("plan only #{suffix}"))),
2841                    ]),
2842                    side_effects: vec![CrystallizationSideEffect {
2843                        kind: "receipt_write".to_string(),
2844                        target: "tenant_event_log".to_string(),
2845                        capability: Some("receipt.write".to_string()),
2846                        ..CrystallizationSideEffect::default()
2847                    }],
2848                    capabilities: vec!["receipt.write".to_string()],
2849                    deterministic: Some(true),
2850                    duration_ms: Some(5),
2851                    ..CrystallizationAction::default()
2852                },
2853            ],
2854            ..CrystallizationTrace::default()
2855        }
2856    }
2857
2858    fn version_traces(count: usize) -> Vec<CrystallizationTrace> {
2859        (0..count)
2860            .map(|idx| {
2861                version_trace(
2862                    &format!("trace_{idx}"),
2863                    &format!("0.7.{idx}"),
2864                    "release-branch",
2865                    false,
2866                )
2867            })
2868            .collect()
2869    }
2870
2871    #[test]
2872    fn build_bundle_assembles_versioned_manifest() {
2873        let traces = version_traces(5);
2874        let artifacts = crystallize_traces(
2875            traces.clone(),
2876            CrystallizeOptions {
2877                workflow_name: Some("version_bump".to_string()),
2878                package_name: Some("release-workflows".to_string()),
2879                author: Some("ops@example.com".to_string()),
2880                approver: Some("lead@example.com".to_string()),
2881                eval_pack_link: Some("eval-pack://release-workflows/v1".to_string()),
2882                ..CrystallizeOptions::default()
2883            },
2884        )
2885        .unwrap();
2886
2887        let bundle = build_crystallization_bundle(
2888            artifacts,
2889            &traces,
2890            BundleOptions {
2891                team: Some("platform".to_string()),
2892                repo: Some("burin-labs/harn".to_string()),
2893                ..BundleOptions::default()
2894            },
2895        )
2896        .unwrap();
2897
2898        let manifest = &bundle.manifest;
2899        assert_eq!(manifest.schema, BUNDLE_SCHEMA);
2900        assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
2901        assert_eq!(manifest.kind, BundleKind::Candidate);
2902        assert!(!manifest.candidate_id.is_empty());
2903        assert_eq!(manifest.workflow.name, "version_bump");
2904        assert_eq!(manifest.workflow.package_name, "release-workflows");
2905        assert_eq!(manifest.workflow.path, BUNDLE_WORKFLOW_FILE);
2906        assert_eq!(manifest.team.as_deref(), Some("platform"));
2907        assert_eq!(manifest.repo.as_deref(), Some("burin-labs/harn"));
2908        assert_eq!(manifest.external_key, "version-bump");
2909        assert_eq!(manifest.promotion.rollout_policy, "shadow_then_canary");
2910        assert_eq!(
2911            manifest.promotion.author.as_deref(),
2912            Some("ops@example.com")
2913        );
2914        assert_eq!(
2915            manifest.promotion.approver.as_deref(),
2916            Some("lead@example.com")
2917        );
2918        assert_eq!(manifest.promotion.workflow_version, "0.1.0");
2919        assert!(manifest.deterministic_steps.len() + manifest.fuzzy_steps.len() > 0);
2920        assert_eq!(manifest.source_traces.len(), traces.len());
2921        assert_eq!(manifest.fixtures.len(), traces.len());
2922        assert!(manifest.fixtures.iter().all(|fixture| fixture.redacted));
2923        assert!(manifest.redaction.applied);
2924        assert!(manifest.redaction.fixture_count > 0);
2925        assert!(manifest
2926            .eval_pack
2927            .as_ref()
2928            .is_some_and(|eval| eval.path == BUNDLE_EVAL_PACK_FILE));
2929        assert!(manifest
2930            .required_secrets
2931            .iter()
2932            .all(|secret| !secret.is_empty()));
2933    }
2934
2935    #[test]
2936    fn write_bundle_round_trips_through_disk() {
2937        let traces = version_traces(5);
2938        let artifacts = crystallize_traces(
2939            traces.clone(),
2940            CrystallizeOptions {
2941                workflow_name: Some("version_bump".to_string()),
2942                ..CrystallizeOptions::default()
2943            },
2944        )
2945        .unwrap();
2946        let bundle =
2947            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2948
2949        let dir = tempfile::tempdir().unwrap();
2950        let written = write_crystallization_bundle(&bundle, dir.path()).unwrap();
2951        assert_eq!(written.candidate_id, bundle.manifest.candidate_id);
2952
2953        // Files exist on disk.
2954        for relative in [
2955            BUNDLE_MANIFEST_FILE,
2956            BUNDLE_REPORT_FILE,
2957            BUNDLE_WORKFLOW_FILE,
2958            BUNDLE_EVAL_PACK_FILE,
2959        ] {
2960            assert!(dir.path().join(relative).exists(), "missing {relative}");
2961        }
2962        let fixtures_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
2963        assert!(fixtures_dir.is_dir());
2964        assert_eq!(
2965            std::fs::read_dir(&fixtures_dir).unwrap().count(),
2966            traces.len()
2967        );
2968
2969        // Manifest round-trips.
2970        let (loaded_manifest, loaded_traces) = load_crystallization_bundle(dir.path()).unwrap();
2971        assert_eq!(loaded_manifest, bundle.manifest);
2972        assert_eq!(loaded_traces.len(), traces.len());
2973
2974        // Validation passes.
2975        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2976        assert!(
2977            validation.problems.is_empty(),
2978            "unexpected problems: {:?}",
2979            validation.problems
2980        );
2981        assert!(validation.is_ok());
2982        assert!(validation.workflow_ok && validation.report_ok);
2983        assert!(validation.fixtures_ok && validation.redaction_ok);
2984
2985        // Shadow replay matches the persisted shadow report.
2986        let (replay_manifest, shadow) = shadow_replay_bundle(dir.path()).unwrap();
2987        assert_eq!(replay_manifest.candidate_id, bundle.manifest.candidate_id);
2988        assert!(shadow.pass, "shadow should still pass");
2989        assert_eq!(shadow.compared_traces, traces.len());
2990    }
2991
2992    #[test]
2993    fn validate_rejects_bundle_with_missing_workflow() {
2994        let traces = version_traces(3);
2995        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2996        let bundle =
2997            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2998
2999        let dir = tempfile::tempdir().unwrap();
3000        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3001        std::fs::remove_file(dir.path().join(BUNDLE_WORKFLOW_FILE)).unwrap();
3002
3003        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3004        assert!(!validation.is_ok());
3005        assert!(validation
3006            .problems
3007            .iter()
3008            .any(|problem| problem.contains("missing workflow file")));
3009    }
3010
3011    #[test]
3012    fn validate_rejects_bundle_with_unredacted_fixture() {
3013        let traces = version_traces(3);
3014        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3015        let mut bundle =
3016            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3017        // Force a fixture to claim it is unredacted; this must trip
3018        // validation so a malicious or careless producer cannot ship raw
3019        // private payloads under the bundle contract.
3020        bundle.manifest.fixtures[0].redacted = false;
3021        let dir = tempfile::tempdir().unwrap();
3022        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3023
3024        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3025        assert!(!validation.is_ok());
3026        assert!(validation
3027            .problems
3028            .iter()
3029            .any(|problem| problem.contains("not marked redacted")));
3030    }
3031
3032    #[test]
3033    fn validate_rejects_unsupported_schema_version() {
3034        let traces = version_traces(3);
3035        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3036        let mut bundle =
3037            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3038        bundle.manifest.schema_version = BUNDLE_SCHEMA_VERSION + 1;
3039        let dir = tempfile::tempdir().unwrap();
3040        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3041
3042        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3043        assert!(!validation.is_ok());
3044        assert!(validation
3045            .problems
3046            .iter()
3047            .any(|problem| problem.contains("schema_version")));
3048    }
3049
3050    #[test]
3051    fn redacts_secret_like_values_in_fixtures() {
3052        // Build secret-shaped strings at runtime so we exercise the
3053        // redaction prefixes (`xoxb-`, `ghp_`, `sk-`) without checking in
3054        // source that looks like a real credential to secret scanners.
3055        let slack_prefix = format!("{}{}", "xo", "xb-");
3056        let github_prefix = format!("{}{}", "gh", "p_");
3057        let openai_prefix = "sk-".to_string();
3058        let pad = "A".repeat(48);
3059        let slack_secret = format!("{slack_prefix}1234567890-{pad}");
3060        let github_secret = format!("{github_prefix}{pad}");
3061        let openai_secret = format!("{openai_prefix}{pad}");
3062
3063        let mut secret_action = CrystallizationAction {
3064            id: "secret".to_string(),
3065            kind: "tool_call".to_string(),
3066            name: "post_release_to_slack".to_string(),
3067            parameters: BTreeMap::from([
3068                ("slack_token".to_string(), json!(slack_secret)),
3069                ("channel".to_string(), json!("#releases")),
3070            ]),
3071            inputs: json!({
3072                "authorization": format!("Bearer {github_secret}"),
3073                "version": "0.7.1",
3074            }),
3075            ..CrystallizationAction::default()
3076        };
3077        secret_action
3078            .metadata
3079            .insert("api_key".to_string(), json!(openai_secret));
3080
3081        let mut trace = CrystallizationTrace {
3082            id: "trace_secret".to_string(),
3083            actions: vec![secret_action],
3084            ..CrystallizationTrace::default()
3085        };
3086        redact_trace_for_bundle(&mut trace);
3087        let action = &trace.actions[0];
3088        assert_eq!(
3089            action.parameters.get("slack_token"),
3090            Some(&json!("[redacted]"))
3091        );
3092        assert_eq!(action.parameters.get("channel"), Some(&json!("#releases")));
3093        let inputs = action.inputs.as_object().unwrap();
3094        assert_eq!(inputs.get("authorization"), Some(&json!("[redacted]")));
3095        assert_eq!(inputs.get("version"), Some(&json!("0.7.1")));
3096        assert_eq!(action.metadata.get("api_key"), Some(&json!("[redacted]")));
3097    }
3098
3099    #[test]
3100    fn plan_only_fixture_yields_plan_only_kind() {
3101        let traces = (0..3)
3102            .map(|idx| plan_only_trace(&format!("plan_{idx}"), &format!("{idx}")))
3103            .collect::<Vec<_>>();
3104        let artifacts = crystallize_traces(
3105            traces.clone(),
3106            CrystallizeOptions {
3107                workflow_name: Some("plan_only_triage".to_string()),
3108                ..CrystallizeOptions::default()
3109            },
3110        )
3111        .unwrap();
3112        let bundle =
3113            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3114        assert_eq!(bundle.manifest.kind, BundleKind::PlanOnly);
3115        assert_eq!(bundle.manifest.risk_level, "low");
3116    }
3117
3118    #[test]
3119    fn rejected_bundle_has_rejected_kind() {
3120        let traces = vec![
3121            version_trace("trace_a", "0.7.1", "release-branch", false),
3122            version_trace("trace_b", "0.7.2", "main", false),
3123            version_trace("trace_c", "0.7.3", "release-branch", false),
3124        ];
3125        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3126        let bundle =
3127            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3128        assert_eq!(bundle.manifest.kind, BundleKind::Rejected);
3129        assert!(bundle.manifest.candidate_id.is_empty());
3130        assert!(!bundle.manifest.rejection_reasons.is_empty());
3131        assert!(bundle.fixtures.is_empty());
3132    }
3133
3134    #[test]
3135    fn validate_round_trips_rejected_bundle() {
3136        let traces = vec![
3137            version_trace("trace_a", "0.7.1", "release-branch", false),
3138            version_trace("trace_b", "0.7.2", "main", false),
3139            version_trace("trace_c", "0.7.3", "release-branch", false),
3140        ];
3141        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3142        let bundle =
3143            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3144        let dir = tempfile::tempdir().unwrap();
3145        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3146
3147        let validation = validate_crystallization_bundle(dir.path()).unwrap();
3148        assert!(validation.is_ok(), "{:?}", validation.problems);
3149        assert_eq!(validation.kind, BundleKind::Rejected);
3150    }
3151
3152    #[test]
3153    fn shadow_replay_fails_when_fixture_diverges() {
3154        let traces = version_traces(3);
3155        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3156        let bundle =
3157            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3158        let dir = tempfile::tempdir().unwrap();
3159        write_crystallization_bundle(&bundle, dir.path()).unwrap();
3160
3161        // Tamper with one redacted fixture so its action list no longer
3162        // matches the candidate signature; the replay must fail without
3163        // panicking.
3164        let fixture_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
3165        let some_fixture = std::fs::read_dir(&fixture_dir)
3166            .unwrap()
3167            .next()
3168            .unwrap()
3169            .unwrap()
3170            .path();
3171        let mut tampered: CrystallizationTrace =
3172            serde_json::from_slice(&std::fs::read(&some_fixture).unwrap()).unwrap();
3173        tampered.actions.truncate(1);
3174        std::fs::write(&some_fixture, serde_json::to_vec_pretty(&tampered).unwrap()).unwrap();
3175
3176        let (_, shadow) = shadow_replay_bundle(dir.path()).unwrap();
3177        assert!(!shadow.pass);
3178        assert!(!shadow.failures.is_empty());
3179    }
3180}