Skip to main content

harn_vm/orchestration/
crystallize.rs

1//! Workflow crystallization primitives.
2//!
3//! This module keeps the first mining pass deliberately conservative: it
4//! accepts ordered trace actions, finds a repeated contiguous action sequence,
5//! extracts scalar parameters from fields that vary across examples, rejects
6//! divergent side effects, and emits a reviewable Harn skeleton plus shadow
7//! comparison metadata. Hosted surfaces can layer richer mining on top of this
8//! stable IR without changing the CLI contract.
9
10use std::collections::{BTreeMap, BTreeSet};
11use std::fmt::Write as _;
12use std::path::{Path, PathBuf};
13
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value as JsonValue};
16use sha2::{Digest, Sha256};
17
18use super::{new_id, now_rfc3339, RunRecord};
19use crate::value::VmError;
20
21const TRACE_SCHEMA_VERSION: u32 = 1;
22const DEFAULT_MIN_EXAMPLES: usize = 2;
23
24/// Stable schema marker for `candidate.json` inside a crystallization
25/// bundle. Cloud importers and other downstream consumers should refuse
26/// bundles whose `schema` field is anything else.
27pub const BUNDLE_SCHEMA: &str = "harn.crystallization.candidate.bundle";
28/// Versioned schema number for the bundle manifest. Cloud importers and
29/// other consumers should refuse bundles whose `schema_version` is newer
30/// than the highest version they understand.
31pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
32/// Conventional file names inside a crystallization bundle directory.
33pub const BUNDLE_MANIFEST_FILE: &str = "candidate.json";
34pub const BUNDLE_REPORT_FILE: &str = "report.json";
35pub const BUNDLE_WORKFLOW_FILE: &str = "workflow.harn";
36pub const BUNDLE_EVAL_PACK_FILE: &str = "harn.eval.toml";
37pub const BUNDLE_FIXTURES_DIR: &str = "fixtures";
38
39/// Default rollout policy applied when a bundle is emitted without one
40/// explicitly configured. Hosted promotion surfaces can override it.
41const DEFAULT_ROLLOUT_POLICY: &str = "shadow_then_canary";
42
43#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
44#[serde(default)]
45pub struct CrystallizationTrace {
46    pub version: u32,
47    pub id: String,
48    pub source: Option<String>,
49    pub source_hash: Option<String>,
50    pub workflow_id: Option<String>,
51    pub started_at: Option<String>,
52    pub finished_at: Option<String>,
53    pub flow: Option<CrystallizationFlowRef>,
54    pub actions: Vec<CrystallizationAction>,
55    pub usage: CrystallizationUsage,
56    pub metadata: BTreeMap<String, JsonValue>,
57}
58
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
60#[serde(default)]
61pub struct CrystallizationFlowRef {
62    pub trace_id: Option<String>,
63    pub agent_run_id: Option<String>,
64    pub transcript_ref: Option<String>,
65    pub atom_ids: Vec<String>,
66    pub slice_ids: Vec<String>,
67}
68
69#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
70#[serde(default)]
71pub struct CrystallizationAction {
72    pub id: String,
73    pub kind: String,
74    pub name: String,
75    pub timestamp: Option<String>,
76    pub inputs: JsonValue,
77    pub output: Option<JsonValue>,
78    pub observed_output: Option<JsonValue>,
79    pub parameters: BTreeMap<String, JsonValue>,
80    pub side_effects: Vec<CrystallizationSideEffect>,
81    pub capabilities: Vec<String>,
82    pub required_secrets: Vec<String>,
83    pub approval: Option<CrystallizationApproval>,
84    pub cost: CrystallizationCost,
85    pub duration_ms: Option<i64>,
86    pub deterministic: Option<bool>,
87    pub fuzzy: Option<bool>,
88    pub metadata: BTreeMap<String, JsonValue>,
89}
90
91#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
92#[serde(default)]
93pub struct CrystallizationSideEffect {
94    pub kind: String,
95    pub target: String,
96    pub capability: Option<String>,
97    pub mutation: Option<String>,
98    pub metadata: BTreeMap<String, JsonValue>,
99}
100
101#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
102#[serde(default)]
103pub struct CrystallizationApproval {
104    pub prompt: Option<String>,
105    pub approver: Option<String>,
106    pub required: bool,
107    pub boundary: Option<String>,
108}
109
110#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
111#[serde(default)]
112pub struct CrystallizationCost {
113    pub model: Option<String>,
114    pub model_calls: i64,
115    pub input_tokens: i64,
116    pub output_tokens: i64,
117    pub total_cost_usd: f64,
118    pub wall_ms: i64,
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
122#[serde(default)]
123pub struct CrystallizationUsage {
124    pub model_calls: i64,
125    pub input_tokens: i64,
126    pub output_tokens: i64,
127    pub total_cost_usd: f64,
128    pub wall_ms: i64,
129}
130
131#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
132#[serde(rename_all = "snake_case")]
133pub enum SegmentKind {
134    #[default]
135    Deterministic,
136    Fuzzy,
137}
138
139type SequenceExample = (usize, usize);
140type RepeatedSequence = (Vec<String>, Vec<SequenceExample>);
141
142#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
143#[serde(default)]
144pub struct WorkflowCandidateParameter {
145    pub name: String,
146    pub source_paths: Vec<String>,
147    pub examples: Vec<String>,
148    pub required: bool,
149}
150
151#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
152#[serde(default)]
153pub struct WorkflowCandidateStep {
154    pub index: usize,
155    pub kind: String,
156    pub name: String,
157    pub segment: SegmentKind,
158    pub parameter_refs: Vec<String>,
159    pub constants: BTreeMap<String, JsonValue>,
160    pub preconditions: Vec<String>,
161    pub side_effects: Vec<CrystallizationSideEffect>,
162    pub capabilities: Vec<String>,
163    pub required_secrets: Vec<String>,
164    pub approval: Option<CrystallizationApproval>,
165    pub expected_output: Option<JsonValue>,
166    pub review_notes: Vec<String>,
167}
168
169#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
170#[serde(default)]
171pub struct WorkflowCandidateExample {
172    pub trace_id: String,
173    pub source_hash: String,
174    pub start_index: usize,
175    pub action_ids: Vec<String>,
176}
177
178#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
179#[serde(default)]
180pub struct PromotionMetadata {
181    pub source_trace_hashes: Vec<String>,
182    pub author: Option<String>,
183    pub approver: Option<String>,
184    pub created_at: String,
185    pub version: String,
186    pub package_name: String,
187    pub capability_set: Vec<String>,
188    pub secrets_required: Vec<String>,
189    pub rollback_target: Option<String>,
190    pub eval_pack_link: Option<String>,
191}
192
193#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
194#[serde(default)]
195pub struct SavingsEstimate {
196    pub model_calls_avoided: i64,
197    pub input_tokens_avoided: i64,
198    pub output_tokens_avoided: i64,
199    pub estimated_cost_usd_avoided: f64,
200    pub wall_ms_avoided: i64,
201    pub cpu_runtime_cost_usd: f64,
202    pub remaining_model_calls: i64,
203}
204
205#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
206#[serde(default)]
207pub struct ShadowTraceResult {
208    pub trace_id: String,
209    pub pass: bool,
210    pub details: Vec<String>,
211}
212
213#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
214#[serde(default)]
215pub struct ShadowRunReport {
216    pub pass: bool,
217    pub compared_traces: usize,
218    pub failures: Vec<String>,
219    pub traces: Vec<ShadowTraceResult>,
220}
221
222#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
223#[serde(default)]
224pub struct WorkflowCandidate {
225    pub id: String,
226    pub name: String,
227    pub confidence: f64,
228    pub sequence_signature: Vec<String>,
229    pub parameters: Vec<WorkflowCandidateParameter>,
230    pub steps: Vec<WorkflowCandidateStep>,
231    pub examples: Vec<WorkflowCandidateExample>,
232    pub capabilities: Vec<String>,
233    pub required_secrets: Vec<String>,
234    pub approval_points: Vec<CrystallizationApproval>,
235    pub side_effects: Vec<CrystallizationSideEffect>,
236    pub expected_outputs: Vec<JsonValue>,
237    pub warnings: Vec<String>,
238    pub rejection_reasons: Vec<String>,
239    pub promotion: PromotionMetadata,
240    pub savings: SavingsEstimate,
241    pub shadow: ShadowRunReport,
242}
243
244impl WorkflowCandidate {
245    pub fn is_safe_to_propose(&self) -> bool {
246        self.rejection_reasons.is_empty()
247    }
248}
249
250#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
251#[serde(default)]
252pub struct CrystallizationReport {
253    pub version: u32,
254    pub generated_at: String,
255    pub source_trace_count: usize,
256    pub selected_candidate_id: Option<String>,
257    pub candidates: Vec<WorkflowCandidate>,
258    pub rejected_candidates: Vec<WorkflowCandidate>,
259    pub warnings: Vec<String>,
260    pub input_format: CrystallizationInputFormat,
261    pub harn_code_path: Option<String>,
262    pub eval_pack_path: Option<String>,
263}
264
265#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
266#[serde(default)]
267pub struct CrystallizationInputFormat {
268    pub name: String,
269    pub version: u32,
270    pub required_fields: Vec<String>,
271    pub preserved_fields: Vec<String>,
272}
273
274#[derive(Clone, Debug, Default)]
275pub struct CrystallizeOptions {
276    pub min_examples: usize,
277    pub workflow_name: Option<String>,
278    pub package_name: Option<String>,
279    pub author: Option<String>,
280    pub approver: Option<String>,
281    pub eval_pack_link: Option<String>,
282}
283
284#[derive(Clone, Debug, Default)]
285pub struct CrystallizationArtifacts {
286    pub report: CrystallizationReport,
287    pub harn_code: String,
288    pub eval_pack_toml: String,
289}
290
291pub fn crystallize_traces(
292    traces: Vec<CrystallizationTrace>,
293    options: CrystallizeOptions,
294) -> Result<CrystallizationArtifacts, VmError> {
295    let min_examples = options.min_examples.max(DEFAULT_MIN_EXAMPLES);
296    if traces.len() < min_examples {
297        return Err(VmError::Runtime(format!(
298            "crystallize requires at least {min_examples} traces, got {}",
299            traces.len()
300        )));
301    }
302
303    let normalized = traces.into_iter().map(normalize_trace).collect::<Vec<_>>();
304    let mut candidates = mine_candidates(&normalized, min_examples, &options);
305    let mut rejected_candidates = Vec::new();
306    for candidate in &mut candidates {
307        candidate.shadow = shadow_candidate(candidate, &normalized);
308        if !candidate.shadow.pass {
309            candidate
310                .rejection_reasons
311                .extend(candidate.shadow.failures.clone());
312        }
313    }
314
315    let mut accepted = Vec::new();
316    for candidate in candidates {
317        if candidate.is_safe_to_propose() {
318            accepted.push(candidate);
319        } else {
320            rejected_candidates.push(candidate);
321        }
322    }
323    accepted.sort_by(|left, right| {
324        right
325            .confidence
326            .partial_cmp(&left.confidence)
327            .unwrap_or(std::cmp::Ordering::Equal)
328            .then_with(|| right.steps.len().cmp(&left.steps.len()))
329    });
330
331    let selected = accepted.first();
332    let harn_code = selected
333        .map(generate_harn_code)
334        .unwrap_or_else(|| rejected_workflow_stub(&rejected_candidates));
335    let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
336
337    let report = CrystallizationReport {
338        version: 1,
339        generated_at: now_rfc3339(),
340        source_trace_count: normalized.len(),
341        selected_candidate_id: selected.map(|candidate| candidate.id.clone()),
342        candidates: accepted,
343        rejected_candidates,
344        warnings: Vec::new(),
345        input_format: CrystallizationInputFormat {
346            name: "harn.crystallization.trace".to_string(),
347            version: TRACE_SCHEMA_VERSION,
348            required_fields: vec!["id".to_string(), "actions".to_string()],
349            preserved_fields: vec![
350                "ordered actions".to_string(),
351                "tool calls".to_string(),
352                "model calls".to_string(),
353                "human approvals".to_string(),
354                "file mutations".to_string(),
355                "external API calls".to_string(),
356                "observed outputs".to_string(),
357                "costs".to_string(),
358                "timestamps".to_string(),
359                "source hashes".to_string(),
360                "Flow provenance references".to_string(),
361            ],
362        },
363        harn_code_path: None,
364        eval_pack_path: None,
365    };
366
367    Ok(CrystallizationArtifacts {
368        report,
369        harn_code,
370        eval_pack_toml,
371    })
372}
373
374pub fn load_crystallization_traces_from_dir(
375    dir: &Path,
376) -> Result<Vec<CrystallizationTrace>, VmError> {
377    let mut paths = Vec::new();
378    collect_json_paths(dir, &mut paths)?;
379    if paths.is_empty() {
380        return Err(VmError::Runtime(format!(
381            "no .json trace files found under {}",
382            dir.display()
383        )));
384    }
385    paths.sort();
386    paths
387        .iter()
388        .map(|path| load_crystallization_trace(path))
389        .collect()
390}
391
392pub fn load_crystallization_trace(path: &Path) -> Result<CrystallizationTrace, VmError> {
393    let content = std::fs::read_to_string(path).map_err(|error| {
394        VmError::Runtime(format!(
395            "failed to read crystallization trace {}: {error}",
396            path.display()
397        ))
398    })?;
399    let value: JsonValue = serde_json::from_str(&content).map_err(|error| {
400        VmError::Runtime(format!(
401            "failed to parse crystallization trace {}: {error}",
402            path.display()
403        ))
404    })?;
405
406    let mut trace = if value.get("actions").is_some() {
407        serde_json::from_value::<CrystallizationTrace>(value.clone()).map_err(|error| {
408            VmError::Runtime(format!(
409                "failed to decode crystallization trace {}: {error}",
410                path.display()
411            ))
412        })?
413    } else if value.get("stages").is_some() || value.get("_type") == Some(&json!("workflow_run")) {
414        let run: RunRecord = serde_json::from_value(value.clone()).map_err(|error| {
415            VmError::Runtime(format!(
416                "failed to decode run record {} as crystallization input: {error}",
417                path.display()
418            ))
419        })?;
420        trace_from_run_record(run)
421    } else {
422        return Err(VmError::Runtime(format!(
423            "{} is neither a crystallization trace nor a workflow run record",
424            path.display()
425        )));
426    };
427    if trace.source.is_none() {
428        trace.source = Some(path.display().to_string());
429    }
430    if trace.source_hash.is_none() {
431        trace.source_hash = Some(hash_bytes(content.as_bytes()));
432    }
433    Ok(normalize_trace(trace))
434}
435
436pub fn write_crystallization_artifacts(
437    mut artifacts: CrystallizationArtifacts,
438    workflow_path: &Path,
439    report_path: &Path,
440    eval_pack_path: Option<&Path>,
441) -> Result<CrystallizationReport, VmError> {
442    if let Some(parent) = workflow_path
443        .parent()
444        .filter(|path| !path.as_os_str().is_empty())
445    {
446        std::fs::create_dir_all(parent).map_err(|error| {
447            VmError::Runtime(format!(
448                "failed to create workflow output dir {}: {error}",
449                parent.display()
450            ))
451        })?;
452    }
453    std::fs::write(workflow_path, &artifacts.harn_code).map_err(|error| {
454        VmError::Runtime(format!(
455            "failed to write generated workflow {}: {error}",
456            workflow_path.display()
457        ))
458    })?;
459
460    artifacts.report.harn_code_path = Some(workflow_path.display().to_string());
461    if let Some(path) = eval_pack_path {
462        if !artifacts.eval_pack_toml.trim().is_empty() {
463            if let Some(parent) = path.parent().filter(|path| !path.as_os_str().is_empty()) {
464                std::fs::create_dir_all(parent).map_err(|error| {
465                    VmError::Runtime(format!(
466                        "failed to create eval pack output dir {}: {error}",
467                        parent.display()
468                    ))
469                })?;
470            }
471            std::fs::write(path, &artifacts.eval_pack_toml).map_err(|error| {
472                VmError::Runtime(format!(
473                    "failed to write eval pack {}: {error}",
474                    path.display()
475                ))
476            })?;
477            artifacts.report.eval_pack_path = Some(path.display().to_string());
478            if let Some(candidate) = artifacts.report.candidates.first_mut() {
479                candidate.promotion.eval_pack_link = Some(path.display().to_string());
480            }
481        }
482    }
483
484    if let Some(parent) = report_path
485        .parent()
486        .filter(|path| !path.as_os_str().is_empty())
487    {
488        std::fs::create_dir_all(parent).map_err(|error| {
489            VmError::Runtime(format!(
490                "failed to create report output dir {}: {error}",
491                parent.display()
492            ))
493        })?;
494    }
495    let report_json = serde_json::to_string_pretty(&artifacts.report)
496        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
497    std::fs::write(report_path, report_json).map_err(|error| {
498        VmError::Runtime(format!(
499            "failed to write crystallization report {}: {error}",
500            report_path.display()
501        ))
502    })?;
503    Ok(artifacts.report)
504}
505
506fn collect_json_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), VmError> {
507    let entries = std::fs::read_dir(dir).map_err(|error| {
508        VmError::Runtime(format!(
509            "failed to read crystallization trace dir {}: {error}",
510            dir.display()
511        ))
512    })?;
513    for entry in entries {
514        let entry = entry.map_err(|error| {
515            VmError::Runtime(format!(
516                "failed to read entry in trace dir {}: {error}",
517                dir.display()
518            ))
519        })?;
520        let path = entry.path();
521        if path.is_dir() {
522            collect_json_paths(&path, out)?;
523        } else if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
524            out.push(path);
525        }
526    }
527    Ok(())
528}
529
530fn trace_from_run_record(run: RunRecord) -> CrystallizationTrace {
531    let mut actions = Vec::new();
532    for stage in &run.stages {
533        actions.push(CrystallizationAction {
534            id: stage.id.clone(),
535            kind: if stage.kind.is_empty() {
536                "stage".to_string()
537            } else {
538                stage.kind.clone()
539            },
540            name: stage.node_id.clone(),
541            timestamp: Some(stage.started_at.clone()),
542            output: stage.visible_text.as_ref().map(|text| json!(text)),
543            observed_output: stage.visible_text.as_ref().map(|text| json!(text)),
544            duration_ms: stage.usage.as_ref().map(|usage| usage.total_duration_ms),
545            cost: stage
546                .usage
547                .as_ref()
548                .map(|usage| CrystallizationCost {
549                    model_calls: usage.call_count,
550                    input_tokens: usage.input_tokens,
551                    output_tokens: usage.output_tokens,
552                    total_cost_usd: usage.total_cost,
553                    wall_ms: usage.total_duration_ms,
554                    model: usage.models.first().cloned(),
555                })
556                .unwrap_or_default(),
557            deterministic: Some(
558                stage
559                    .usage
560                    .as_ref()
561                    .map(|usage| usage.call_count == 0)
562                    .unwrap_or(true),
563            ),
564            fuzzy: Some(
565                stage
566                    .usage
567                    .as_ref()
568                    .is_some_and(|usage| usage.call_count > 0),
569            ),
570            metadata: stage.metadata.clone(),
571            ..CrystallizationAction::default()
572        });
573    }
574    for tool in &run.tool_recordings {
575        actions.push(CrystallizationAction {
576            id: tool.tool_use_id.clone(),
577            kind: "tool_call".to_string(),
578            name: tool.tool_name.clone(),
579            timestamp: Some(tool.timestamp.clone()),
580            output: Some(json!(tool.result)),
581            observed_output: Some(json!(tool.result)),
582            duration_ms: Some(tool.duration_ms as i64),
583            deterministic: Some(!tool.is_rejected),
584            fuzzy: Some(false),
585            metadata: BTreeMap::from([
586                ("args_hash".to_string(), json!(tool.args_hash)),
587                ("iteration".to_string(), json!(tool.iteration)),
588                ("is_rejected".to_string(), json!(tool.is_rejected)),
589            ]),
590            ..CrystallizationAction::default()
591        });
592    }
593    for question in &run.hitl_questions {
594        actions.push(CrystallizationAction {
595            id: question.request_id.clone(),
596            kind: "human_approval".to_string(),
597            name: question.agent.clone(),
598            timestamp: Some(question.asked_at.clone()),
599            approval: Some(CrystallizationApproval {
600                prompt: Some(question.prompt.clone()),
601                required: true,
602                boundary: Some("hitl".to_string()),
603                ..CrystallizationApproval::default()
604            }),
605            deterministic: Some(false),
606            fuzzy: Some(false),
607            metadata: question
608                .trace_id
609                .as_ref()
610                .map(|trace_id| BTreeMap::from([("trace_id".to_string(), json!(trace_id))]))
611                .unwrap_or_default(),
612            ..CrystallizationAction::default()
613        });
614    }
615    actions.sort_by(|left, right| left.timestamp.cmp(&right.timestamp));
616
617    CrystallizationTrace {
618        version: TRACE_SCHEMA_VERSION,
619        id: run.id.clone(),
620        workflow_id: Some(run.workflow_id.clone()),
621        started_at: Some(run.started_at.clone()),
622        finished_at: run.finished_at.clone(),
623        actions,
624        usage: run
625            .usage
626            .map(|usage| CrystallizationUsage {
627                model_calls: usage.call_count,
628                input_tokens: usage.input_tokens,
629                output_tokens: usage.output_tokens,
630                total_cost_usd: usage.total_cost,
631                wall_ms: usage.total_duration_ms,
632            })
633            .unwrap_or_default(),
634        metadata: run.metadata.clone(),
635        ..CrystallizationTrace::default()
636    }
637}
638
639fn normalize_trace(mut trace: CrystallizationTrace) -> CrystallizationTrace {
640    if trace.version == 0 {
641        trace.version = TRACE_SCHEMA_VERSION;
642    }
643    if trace.id.trim().is_empty() {
644        trace.id = new_id("trace");
645    }
646    if trace.source_hash.is_none() {
647        let payload = serde_json::to_vec(&trace.actions).unwrap_or_default();
648        trace.source_hash = Some(hash_bytes(&payload));
649    }
650    for (idx, action) in trace.actions.iter_mut().enumerate() {
651        if action.id.trim().is_empty() {
652            action.id = format!("action_{}", idx + 1);
653        }
654        if action.kind.trim().is_empty() {
655            action.kind = "action".to_string();
656        }
657        if action.name.trim().is_empty() {
658            action.name = action.kind.clone();
659        }
660        action.capabilities.sort();
661        action.capabilities.dedup();
662        action.required_secrets.sort();
663        action.required_secrets.dedup();
664        action.side_effects = sorted_side_effects(std::mem::take(&mut action.side_effects));
665        if action.cost.model_calls == 0 && action.kind == "model_call" {
666            action.cost.model_calls = 1;
667        }
668    }
669    if trace.usage == CrystallizationUsage::default() {
670        for action in &trace.actions {
671            trace.usage.model_calls += action.cost.model_calls;
672            trace.usage.input_tokens += action.cost.input_tokens;
673            trace.usage.output_tokens += action.cost.output_tokens;
674            trace.usage.total_cost_usd += action.cost.total_cost_usd;
675            trace.usage.wall_ms += action.cost.wall_ms + action.duration_ms.unwrap_or_default();
676        }
677    }
678    trace
679}
680
681fn mine_candidates(
682    traces: &[CrystallizationTrace],
683    min_examples: usize,
684    options: &CrystallizeOptions,
685) -> Vec<WorkflowCandidate> {
686    let signatures = traces
687        .iter()
688        .map(|trace| {
689            trace
690                .actions
691                .iter()
692                .map(action_signature)
693                .collect::<Vec<_>>()
694        })
695        .collect::<Vec<_>>();
696    let Some((sequence, examples)) = best_repeated_sequence(&signatures, min_examples) else {
697        return Vec::new();
698    };
699
700    let mut example_refs = Vec::new();
701    for (trace_index, start_index) in &examples {
702        let trace = &traces[*trace_index];
703        example_refs.push(WorkflowCandidateExample {
704            trace_id: trace.id.clone(),
705            source_hash: trace.source_hash.clone().unwrap_or_default(),
706            start_index: *start_index,
707            action_ids: trace.actions[*start_index..*start_index + sequence.len()]
708                .iter()
709                .map(|action| action.id.clone())
710                .collect(),
711        });
712    }
713
714    let mut steps = Vec::new();
715    let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
716    let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
717    let mut rejection_reasons = Vec::new();
718    let mut warnings = Vec::new();
719
720    for step_index in 0..sequence.len() {
721        let actions = examples
722            .iter()
723            .map(|(trace_index, start_index)| {
724                &traces[*trace_index].actions[*start_index + step_index]
725            })
726            .collect::<Vec<_>>();
727        let first = actions[0];
728        if !compatible_side_effects(&actions) {
729            rejection_reasons.push(format!(
730                "step {} '{}' has divergent side effects across examples",
731                step_index + 1,
732                first.name
733            ));
734        }
735
736        let mut parameter_refs = BTreeSet::new();
737        for action in &actions {
738            for (name, value) in &action.parameters {
739                if is_scalar(value) {
740                    parameter_values
741                        .entry(sanitize_identifier(name))
742                        .or_default()
743                        .insert(json_scalar_string(value));
744                    parameter_paths
745                        .entry(sanitize_identifier(name))
746                        .or_default()
747                        .insert(format!("steps[{step_index}].parameters.{name}"));
748                    parameter_refs.insert(sanitize_identifier(name));
749                }
750            }
751        }
752        collect_varying_parameters(
753            &actions,
754            "inputs",
755            |action| &action.inputs,
756            &mut parameter_values,
757            &mut parameter_paths,
758            &mut parameter_refs,
759        );
760
761        let fuzzy = first.fuzzy.unwrap_or(false)
762            || first.kind == "model_call"
763            || actions.iter().any(|action| action.fuzzy.unwrap_or(false));
764        if fuzzy {
765            warnings.push(format!(
766                "step {} '{}' remains fuzzy and requires review/LLM handling",
767                step_index + 1,
768                first.name
769            ));
770        }
771
772        steps.push(WorkflowCandidateStep {
773            index: step_index + 1,
774            kind: first.kind.clone(),
775            name: first.name.clone(),
776            segment: if fuzzy {
777                SegmentKind::Fuzzy
778            } else {
779                SegmentKind::Deterministic
780            },
781            parameter_refs: parameter_refs.into_iter().collect(),
782            constants: constants_for_action(first),
783            preconditions: preconditions_for_action(first),
784            side_effects: first.side_effects.clone(),
785            capabilities: sorted_strings(first.capabilities.iter().cloned()),
786            required_secrets: sorted_strings(first.required_secrets.iter().cloned()),
787            approval: first.approval.clone(),
788            expected_output: stable_expected_output(&actions),
789            review_notes: Vec::new(),
790        });
791    }
792
793    let parameters = parameter_values
794        .iter()
795        .map(|(name, values)| WorkflowCandidateParameter {
796            name: name.clone(),
797            source_paths: parameter_paths
798                .get(name)
799                .map(|paths| paths.iter().cloned().collect())
800                .unwrap_or_default(),
801            examples: values.iter().take(5).cloned().collect(),
802            required: true,
803        })
804        .collect::<Vec<_>>();
805
806    let capabilities = sorted_strings(
807        steps
808            .iter()
809            .flat_map(|step| step.capabilities.iter().cloned()),
810    );
811    let required_secrets = sorted_strings(
812        steps
813            .iter()
814            .flat_map(|step| step.required_secrets.iter().cloned()),
815    );
816    let approval_points = steps
817        .iter()
818        .filter_map(|step| step.approval.clone())
819        .collect::<Vec<_>>();
820    let side_effects = sorted_side_effects(
821        steps
822            .iter()
823            .flat_map(|step| step.side_effects.iter().cloned())
824            .collect(),
825    );
826    let expected_outputs = steps
827        .iter()
828        .filter_map(|step| step.expected_output.clone())
829        .collect::<Vec<_>>();
830    let savings = estimate_savings(traces, &examples, &steps);
831    let confidence = confidence_for(
832        &examples,
833        traces.len(),
834        &steps,
835        rejection_reasons.is_empty(),
836    );
837    let name = options
838        .workflow_name
839        .clone()
840        .unwrap_or_else(|| infer_workflow_name(&steps));
841    let package_name = options
842        .package_name
843        .clone()
844        .unwrap_or_else(|| name.replace('_', "-"));
845
846    vec![WorkflowCandidate {
847        id: stable_candidate_id(&sequence, &example_refs),
848        name,
849        confidence,
850        sequence_signature: sequence,
851        parameters,
852        steps,
853        examples: example_refs.clone(),
854        capabilities: capabilities.clone(),
855        required_secrets: required_secrets.clone(),
856        approval_points,
857        side_effects,
858        expected_outputs,
859        warnings,
860        rejection_reasons,
861        promotion: PromotionMetadata {
862            source_trace_hashes: example_refs
863                .iter()
864                .map(|example| example.source_hash.clone())
865                .collect(),
866            author: options.author.clone(),
867            approver: options.approver.clone(),
868            created_at: now_rfc3339(),
869            version: "0.1.0".to_string(),
870            package_name,
871            capability_set: capabilities,
872            secrets_required: required_secrets,
873            rollback_target: Some("keep source traces and previous package version".to_string()),
874            eval_pack_link: options.eval_pack_link.clone(),
875        },
876        savings,
877        shadow: ShadowRunReport::default(),
878    }]
879}
880
881fn best_repeated_sequence(
882    signatures: &[Vec<String>],
883    min_examples: usize,
884) -> Option<RepeatedSequence> {
885    let mut occurrences: BTreeMap<Vec<String>, Vec<(usize, usize)>> = BTreeMap::new();
886    for (trace_index, trace_signatures) in signatures.iter().enumerate() {
887        for start in 0..trace_signatures.len() {
888            let max_len = (trace_signatures.len() - start).min(12);
889            for len in 2..=max_len {
890                let sequence = trace_signatures[start..start + len].to_vec();
891                occurrences
892                    .entry(sequence)
893                    .or_default()
894                    .push((trace_index, start));
895            }
896        }
897    }
898
899    occurrences
900        .into_iter()
901        .filter_map(|(sequence, positions)| {
902            let mut seen = BTreeSet::new();
903            let mut examples = Vec::new();
904            for (trace_index, start) in positions {
905                if seen.insert(trace_index) {
906                    examples.push((trace_index, start));
907                }
908            }
909            if examples.len() >= min_examples {
910                Some((sequence, examples))
911            } else {
912                None
913            }
914        })
915        .max_by(
916            |(left_sequence, left_examples), (right_sequence, right_examples)| {
917                left_examples
918                    .len()
919                    .cmp(&right_examples.len())
920                    .then_with(|| left_sequence.len().cmp(&right_sequence.len()))
921            },
922        )
923}
924
925fn action_signature(action: &CrystallizationAction) -> String {
926    let mut parameter_keys = action.parameters.keys().cloned().collect::<Vec<_>>();
927    parameter_keys.sort();
928    format!(
929        "{}:{}:{}",
930        action.kind,
931        action.name,
932        parameter_keys.join(",")
933    )
934}
935
936fn compatible_side_effects(actions: &[&CrystallizationAction]) -> bool {
937    let first = sorted_side_effects(actions[0].side_effects.clone());
938    actions
939        .iter()
940        .skip(1)
941        .all(|action| sorted_side_effects(action.side_effects.clone()) == first)
942}
943
944fn collect_varying_parameters(
945    actions: &[&CrystallizationAction],
946    root: &str,
947    value_for: impl Fn(&CrystallizationAction) -> &JsonValue,
948    parameter_values: &mut BTreeMap<String, BTreeSet<String>>,
949    parameter_paths: &mut BTreeMap<String, BTreeSet<String>>,
950    parameter_refs: &mut BTreeSet<String>,
951) {
952    let mut paths = BTreeMap::<String, Vec<JsonValue>>::new();
953    for action in actions {
954        collect_scalar_paths(value_for(action), root, &mut paths);
955    }
956    for (path, values) in paths {
957        if values.len() != actions.len() {
958            continue;
959        }
960        let unique = values
961            .iter()
962            .map(json_scalar_string)
963            .collect::<BTreeSet<_>>();
964        if unique.len() < 2 {
965            continue;
966        }
967        let name = parameter_name_for_path(&path);
968        parameter_values
969            .entry(name.clone())
970            .or_default()
971            .extend(unique);
972        parameter_paths
973            .entry(name.clone())
974            .or_default()
975            .insert(path);
976        parameter_refs.insert(name);
977    }
978}
979
980fn collect_scalar_paths(
981    value: &JsonValue,
982    prefix: &str,
983    out: &mut BTreeMap<String, Vec<JsonValue>>,
984) {
985    match value {
986        JsonValue::Object(map) => {
987            for (key, child) in map {
988                collect_scalar_paths(child, &format!("{prefix}.{key}"), out);
989            }
990        }
991        JsonValue::Array(items) => {
992            for (idx, child) in items.iter().enumerate() {
993                collect_scalar_paths(child, &format!("{prefix}[{idx}]"), out);
994            }
995        }
996        _ if is_scalar(value) => {
997            out.entry(prefix.to_string())
998                .or_default()
999                .push(value.clone());
1000        }
1001        _ => {}
1002    }
1003}
1004
1005fn parameter_name_for_path(path: &str) -> String {
1006    let lower = path.to_ascii_lowercase();
1007    for (needle, name) in [
1008        ("version", "version"),
1009        ("repo_path", "repo_path"),
1010        ("repo", "repo_path"),
1011        ("branch_name", "branch_name"),
1012        ("branch", "branch_name"),
1013        ("release_target", "release_target"),
1014        ("target", "release_target"),
1015    ] {
1016        if lower.contains(needle) {
1017            return name.to_string();
1018        }
1019    }
1020    let tail = path
1021        .rsplit(['.', '['])
1022        .next()
1023        .unwrap_or("param")
1024        .trim_end_matches(']');
1025    sanitize_identifier(tail)
1026}
1027
1028fn constants_for_action(action: &CrystallizationAction) -> BTreeMap<String, JsonValue> {
1029    let mut constants = BTreeMap::new();
1030    constants.insert("kind".to_string(), json!(action.kind));
1031    constants.insert("name".to_string(), json!(action.name));
1032    if action.deterministic.unwrap_or(false) {
1033        constants.insert("deterministic".to_string(), json!(true));
1034    }
1035    constants
1036}
1037
1038fn preconditions_for_action(action: &CrystallizationAction) -> Vec<String> {
1039    let mut out = Vec::new();
1040    for capability in &action.capabilities {
1041        out.push(format!("capability '{capability}' is available"));
1042    }
1043    for secret in &action.required_secrets {
1044        out.push(format!("secret '{secret}' is configured"));
1045    }
1046    if let Some(approval) = &action.approval {
1047        if approval.required {
1048            out.push("human approval boundary is preserved".to_string());
1049        }
1050    }
1051    out
1052}
1053
1054fn stable_expected_output(actions: &[&CrystallizationAction]) -> Option<JsonValue> {
1055    let first = actions[0]
1056        .observed_output
1057        .as_ref()
1058        .or(actions[0].output.as_ref())?;
1059    if actions
1060        .iter()
1061        .all(|action| action.observed_output.as_ref().or(action.output.as_ref()) == Some(first))
1062    {
1063        Some(first.clone())
1064    } else {
1065        None
1066    }
1067}
1068
1069fn shadow_candidate(
1070    candidate: &WorkflowCandidate,
1071    traces: &[CrystallizationTrace],
1072) -> ShadowRunReport {
1073    let mut failures = Vec::new();
1074    let mut results = Vec::new();
1075    for example in &candidate.examples {
1076        let Some(trace) = traces.iter().find(|trace| trace.id == example.trace_id) else {
1077            failures.push(format!("missing source trace {}", example.trace_id));
1078            continue;
1079        };
1080        let mut details = Vec::new();
1081        let end = example.start_index + candidate.steps.len();
1082        if end > trace.actions.len() {
1083            details.push("candidate sequence extends past trace action list".to_string());
1084        } else {
1085            let signatures = trace.actions[example.start_index..end]
1086                .iter()
1087                .map(action_signature)
1088                .collect::<Vec<_>>();
1089            if signatures != candidate.sequence_signature {
1090                details.push("action sequence signature changed".to_string());
1091            }
1092            for (offset, step) in candidate.steps.iter().enumerate() {
1093                let action = &trace.actions[example.start_index + offset];
1094                if sorted_side_effects(action.side_effects.clone()) != step.side_effects {
1095                    details.push(format!(
1096                        "step {} side effects differ for action {}",
1097                        step.index, action.id
1098                    ));
1099                }
1100                if action.approval.as_ref().map(|approval| approval.required)
1101                    != step.approval.as_ref().map(|approval| approval.required)
1102                {
1103                    details.push(format!("step {} approval boundary differs", step.index));
1104                }
1105                if step.segment == SegmentKind::Deterministic {
1106                    if let Some(expected) = &step.expected_output {
1107                        let actual = action.observed_output.as_ref().or(action.output.as_ref());
1108                        if actual != Some(expected) {
1109                            details
1110                                .push(format!("step {} deterministic output differs", step.index));
1111                        }
1112                    }
1113                }
1114            }
1115        }
1116        let pass = details.is_empty();
1117        if !pass {
1118            failures.push(format!("trace {} failed shadow comparison", trace.id));
1119        }
1120        results.push(ShadowTraceResult {
1121            trace_id: trace.id.clone(),
1122            pass,
1123            details,
1124        });
1125    }
1126    ShadowRunReport {
1127        pass: failures.is_empty(),
1128        compared_traces: results.len(),
1129        failures,
1130        traces: results,
1131    }
1132}
1133
1134fn estimate_savings(
1135    traces: &[CrystallizationTrace],
1136    examples: &[(usize, usize)],
1137    steps: &[WorkflowCandidateStep],
1138) -> SavingsEstimate {
1139    let mut estimate = SavingsEstimate::default();
1140    for (trace_index, start_index) in examples {
1141        let trace = &traces[*trace_index];
1142        for action in &trace.actions[*start_index..*start_index + steps.len()] {
1143            if action.kind == "model_call" || action.fuzzy.unwrap_or(false) {
1144                estimate.remaining_model_calls += action.cost.model_calls.max(1);
1145            } else {
1146                estimate.model_calls_avoided += action.cost.model_calls;
1147                estimate.input_tokens_avoided += action.cost.input_tokens;
1148                estimate.output_tokens_avoided += action.cost.output_tokens;
1149                estimate.estimated_cost_usd_avoided += action.cost.total_cost_usd;
1150                estimate.wall_ms_avoided +=
1151                    action.cost.wall_ms + action.duration_ms.unwrap_or_default();
1152            }
1153        }
1154    }
1155    estimate.cpu_runtime_cost_usd = 0.0;
1156    estimate
1157}
1158
1159fn confidence_for(
1160    examples: &[(usize, usize)],
1161    trace_count: usize,
1162    steps: &[WorkflowCandidateStep],
1163    safe: bool,
1164) -> f64 {
1165    if !safe || trace_count == 0 {
1166        return 0.0;
1167    }
1168    let coverage = examples.len() as f64 / trace_count as f64;
1169    let deterministic = steps
1170        .iter()
1171        .filter(|step| step.segment == SegmentKind::Deterministic)
1172        .count() as f64
1173        / steps.len().max(1) as f64;
1174    ((coverage * 0.65) + (deterministic * 0.35)).min(0.99)
1175}
1176
1177fn infer_workflow_name(steps: &[WorkflowCandidateStep]) -> String {
1178    let names = steps
1179        .iter()
1180        .map(|step| step.name.to_ascii_lowercase())
1181        .collect::<Vec<_>>()
1182        .join("_");
1183    if names.contains("version") || names.contains("release") {
1184        "crystallized_version_bump".to_string()
1185    } else {
1186        "crystallized_workflow".to_string()
1187    }
1188}
1189
1190pub fn generate_harn_code(candidate: &WorkflowCandidate) -> String {
1191    let mut out = String::new();
1192    let params = if candidate.parameters.is_empty() {
1193        "task".to_string()
1194    } else {
1195        candidate
1196            .parameters
1197            .iter()
1198            .map(|parameter| parameter.name.as_str())
1199            .collect::<Vec<_>>()
1200            .join(", ")
1201    };
1202    writeln!(out, "/**").unwrap();
1203    writeln!(
1204        out,
1205        " * Generated by harn crystallize. Review before promotion."
1206    )
1207    .unwrap();
1208    writeln!(out, " * Candidate: {}", candidate.id).unwrap();
1209    writeln!(
1210        out,
1211        " * Source trace hashes: {}",
1212        candidate.promotion.source_trace_hashes.join(", ")
1213    )
1214    .unwrap();
1215    writeln!(
1216        out,
1217        " * Capabilities: {}",
1218        if candidate.capabilities.is_empty() {
1219            "none".to_string()
1220        } else {
1221            candidate.capabilities.join(", ")
1222        }
1223    )
1224    .unwrap();
1225    writeln!(
1226        out,
1227        " * Required secrets: {}",
1228        if candidate.required_secrets.is_empty() {
1229            "none".to_string()
1230        } else {
1231            candidate.required_secrets.join(", ")
1232        }
1233    )
1234    .unwrap();
1235    writeln!(out, " */").unwrap();
1236    writeln!(out, "pipeline {}({}) {{", candidate.name, params).unwrap();
1237    writeln!(out, "  let review_warnings = []").unwrap();
1238    for step in &candidate.steps {
1239        writeln!(out, "  // Step {}: {} {}", step.index, step.kind, step.name).unwrap();
1240        for side_effect in &step.side_effects {
1241            writeln!(
1242                out,
1243                "  // side_effect: {} {}",
1244                side_effect.kind, side_effect.target
1245            )
1246            .unwrap();
1247        }
1248        if let Some(approval) = &step.approval {
1249            if approval.required {
1250                writeln!(
1251                    out,
1252                    "  // approval_required: {}",
1253                    approval
1254                        .boundary
1255                        .clone()
1256                        .unwrap_or_else(|| "human_review".to_string())
1257                )
1258                .unwrap();
1259            }
1260        }
1261        if step.segment == SegmentKind::Fuzzy {
1262            writeln!(
1263                out,
1264                "  // TODO: fuzzy segment still needs LLM/reviewer handling before deterministic promotion."
1265            )
1266            .unwrap();
1267            writeln!(
1268                out,
1269                "  review_warnings.push(\"fuzzy step: {}\")",
1270                escape_harn_string(&step.name)
1271            )
1272            .unwrap();
1273        }
1274        writeln!(
1275            out,
1276            "  log(\"crystallized step {}: {}\")",
1277            step.index,
1278            escape_harn_string(&step.name)
1279        )
1280        .unwrap();
1281    }
1282    writeln!(
1283        out,
1284        "  return {{status: \"shadow_ready\", candidate_id: \"{}\", review_warnings: review_warnings}}",
1285        escape_harn_string(&candidate.id)
1286    )
1287    .unwrap();
1288    writeln!(out, "}}").unwrap();
1289    out
1290}
1291
1292fn rejected_workflow_stub(rejected: &[WorkflowCandidate]) -> String {
1293    let mut out = String::new();
1294    writeln!(
1295        out,
1296        "// Generated by harn crystallize. No safe candidate was proposed."
1297    )
1298    .unwrap();
1299    writeln!(out, "pipeline crystallized_workflow(task) {{").unwrap();
1300    writeln!(out, "  log(\"no safe crystallization candidate\")").unwrap();
1301    writeln!(
1302        out,
1303        "  return {{status: \"rejected\", rejected_candidates: {}}}",
1304        rejected.len()
1305    )
1306    .unwrap();
1307    writeln!(out, "}}").unwrap();
1308    out
1309}
1310
1311pub fn generate_eval_pack(candidate: &WorkflowCandidate) -> String {
1312    let mut out = String::new();
1313    writeln!(out, "version = 1").unwrap();
1314    writeln!(
1315        out,
1316        "id = \"{}-crystallization\"",
1317        candidate.promotion.package_name
1318    )
1319    .unwrap();
1320    writeln!(
1321        out,
1322        "name = \"{} crystallization shadow evals\"",
1323        candidate.name
1324    )
1325    .unwrap();
1326    writeln!(out).unwrap();
1327    writeln!(out, "[package]").unwrap();
1328    writeln!(out, "name = \"{}\"", candidate.promotion.package_name).unwrap();
1329    writeln!(out, "version = \"{}\"", candidate.promotion.version).unwrap();
1330    writeln!(out).unwrap();
1331    for example in &candidate.examples {
1332        writeln!(out, "[[fixtures]]").unwrap();
1333        writeln!(out, "id = \"{}\"", example.trace_id).unwrap();
1334        writeln!(out, "kind = \"jsonl-trace\"").unwrap();
1335        writeln!(out, "trace_id = \"{}\"", example.trace_id).unwrap();
1336        writeln!(out).unwrap();
1337    }
1338    writeln!(out, "[[rubrics]]").unwrap();
1339    writeln!(out, "id = \"shadow-determinism\"").unwrap();
1340    writeln!(out, "kind = \"deterministic\"").unwrap();
1341    writeln!(out).unwrap();
1342    writeln!(out, "[[rubrics.assertions]]").unwrap();
1343    writeln!(out, "kind = \"crystallization-shadow\"").unwrap();
1344    writeln!(
1345        out,
1346        "expected = {{ candidate_id = \"{}\", pass = true }}",
1347        candidate.id
1348    )
1349    .unwrap();
1350    writeln!(out).unwrap();
1351    writeln!(out, "[[cases]]").unwrap();
1352    writeln!(out, "id = \"{}-shadow\"", candidate.name).unwrap();
1353    writeln!(out, "name = \"{} shadow replay\"", candidate.name).unwrap();
1354    writeln!(out, "rubrics = [\"shadow-determinism\"]").unwrap();
1355    writeln!(out, "severity = \"blocking\"").unwrap();
1356    out
1357}
1358
1359fn stable_candidate_id(sequence: &[String], examples: &[WorkflowCandidateExample]) -> String {
1360    let mut hasher = Sha256::new();
1361    for item in sequence {
1362        hasher.update(item.as_bytes());
1363        hasher.update([0]);
1364    }
1365    for example in examples {
1366        hasher.update(example.source_hash.as_bytes());
1367        hasher.update([0]);
1368    }
1369    format!("candidate_{}", hex_prefix(hasher.finalize().as_slice(), 16))
1370}
1371
1372fn hash_bytes(bytes: &[u8]) -> String {
1373    let mut hasher = Sha256::new();
1374    hasher.update(bytes);
1375    format!("sha256:{}", hex::encode(hasher.finalize()))
1376}
1377
1378fn hex_prefix(bytes: &[u8], chars: usize) -> String {
1379    hex::encode(bytes).chars().take(chars).collect::<String>()
1380}
1381
1382fn sorted_strings(items: impl Iterator<Item = String>) -> Vec<String> {
1383    let mut set = items.collect::<BTreeSet<_>>();
1384    set.retain(|item| !item.trim().is_empty());
1385    set.into_iter().collect()
1386}
1387
1388fn sorted_side_effects(items: Vec<CrystallizationSideEffect>) -> Vec<CrystallizationSideEffect> {
1389    let mut items = items
1390        .into_iter()
1391        .filter(|item| !item.kind.trim().is_empty() || !item.target.trim().is_empty())
1392        .collect::<Vec<_>>();
1393    items.sort_by_key(side_effect_sort_key);
1394    items.dedup_by(|left, right| side_effect_sort_key(left) == side_effect_sort_key(right));
1395    items
1396}
1397
1398fn side_effect_sort_key(item: &CrystallizationSideEffect) -> String {
1399    format!(
1400        "{}\x1f{}\x1f{}\x1f{}",
1401        item.kind,
1402        item.target,
1403        item.capability.clone().unwrap_or_default(),
1404        item.mutation.clone().unwrap_or_default()
1405    )
1406}
1407
1408fn is_scalar(value: &JsonValue) -> bool {
1409    matches!(
1410        value,
1411        JsonValue::String(_) | JsonValue::Number(_) | JsonValue::Bool(_) | JsonValue::Null
1412    )
1413}
1414
1415fn json_scalar_string(value: &JsonValue) -> String {
1416    match value {
1417        JsonValue::String(value) => value.clone(),
1418        other => other.to_string(),
1419    }
1420}
1421
1422fn sanitize_identifier(raw: &str) -> String {
1423    let mut out = String::new();
1424    for (idx, ch) in raw.chars().enumerate() {
1425        if ch.is_ascii_alphanumeric() || ch == '_' {
1426            if idx == 0 && ch.is_ascii_digit() {
1427                out.push('_');
1428            }
1429            out.push(ch.to_ascii_lowercase());
1430        } else if !out.ends_with('_') {
1431            out.push('_');
1432        }
1433    }
1434    let trimmed = out.trim_matches('_').to_string();
1435    if trimmed.is_empty() {
1436        "param".to_string()
1437    } else {
1438        trimmed
1439    }
1440}
1441
1442fn escape_harn_string(value: &str) -> String {
1443    value.replace('\\', "\\\\").replace('"', "\\\"")
1444}
1445
1446// ===== Crystallization bundle =====
1447//
1448// A bundle is a directory layout that Harn writes and Harn Cloud (or any
1449// other importer) reads without bespoke glue. The contract is:
1450//
1451//   bundle/
1452//     candidate.json        # versioned manifest documented below
1453//     workflow.harn         # generated/reviewable workflow code
1454//     report.json           # full mining/shadow/eval report
1455//     harn.eval.toml        # generated eval pack when available (optional)
1456//     fixtures/             # redacted replay fixtures referenced by the
1457//                           # report (optional, only when --bundle is used
1458//                           # with `harn crystallize` and traces were on disk)
1459//
1460// `candidate.json` is the authoritative manifest. It must include the
1461// `schema` and `schema_version` markers. Cloud importers MUST reject any
1462// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
1463// or whose `schema_version` is greater than the highest version they
1464// understand. Only the documented additive fields may be added without
1465// bumping `schema_version`.
1466
1467#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1468#[serde(default)]
1469pub struct BundleGenerator {
1470    pub tool: String,
1471    pub version: String,
1472}
1473
1474impl Default for BundleGenerator {
1475    fn default() -> Self {
1476        Self {
1477            tool: "harn".to_string(),
1478            version: env!("CARGO_PKG_VERSION").to_string(),
1479        }
1480    }
1481}
1482
1483#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1484#[serde(default)]
1485pub struct BundleWorkflowRef {
1486    /// Relative path inside the bundle directory.
1487    pub path: String,
1488    /// Short identifier used in `pipeline NAME(...)`.
1489    pub name: String,
1490    /// Logical package name promotion uses to register the workflow.
1491    pub package_name: String,
1492    /// Initial workflow version proposed for promotion.
1493    pub package_version: String,
1494}
1495
1496#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1497#[serde(default)]
1498pub struct BundleSourceTrace {
1499    pub trace_id: String,
1500    pub source_hash: String,
1501    /// Optional human-visible URL (PR, issue, run record path) for the
1502    /// trace. `None` when the trace was loaded from an in-memory store.
1503    pub source_url: Option<String>,
1504    /// Optional cloud-side receipt id when the trace was already promoted
1505    /// into a tenant receipt. Cloud importers use this to wire candidate
1506    /// evidence to existing receipts without round-tripping the raw payload.
1507    pub source_receipt_id: Option<String>,
1508    /// Relative path of the redacted fixture inside the bundle, if one
1509    /// was emitted.
1510    pub fixture_path: Option<String>,
1511}
1512
1513#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1514#[serde(default)]
1515pub struct BundleStep {
1516    pub index: usize,
1517    pub kind: String,
1518    pub name: String,
1519    pub segment: SegmentKind,
1520    pub parameter_refs: Vec<String>,
1521    pub side_effects: Vec<CrystallizationSideEffect>,
1522    pub capabilities: Vec<String>,
1523    pub required_secrets: Vec<String>,
1524    pub approval: Option<CrystallizationApproval>,
1525    pub review_notes: Vec<String>,
1526}
1527
1528impl BundleStep {
1529    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
1530        Self {
1531            index: step.index,
1532            kind: step.kind.clone(),
1533            name: step.name.clone(),
1534            segment: step.segment.clone(),
1535            parameter_refs: step.parameter_refs.clone(),
1536            side_effects: step.side_effects.clone(),
1537            capabilities: step.capabilities.clone(),
1538            required_secrets: step.required_secrets.clone(),
1539            approval: step.approval.clone(),
1540            review_notes: step.review_notes.clone(),
1541        }
1542    }
1543}
1544
1545#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1546#[serde(default)]
1547pub struct BundleEvalPackRef {
1548    /// Relative path of the eval pack inside the bundle directory.
1549    pub path: String,
1550    /// Optional external link the eval pack also lives at (e.g. a hosted
1551    /// `eval-pack://` URI when the bundle was promoted into a tenant).
1552    pub link: Option<String>,
1553}
1554
1555#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1556#[serde(default)]
1557pub struct BundleFixtureRef {
1558    pub path: String,
1559    pub trace_id: String,
1560    pub source_hash: String,
1561    pub redacted: bool,
1562}
1563
1564#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1565#[serde(default)]
1566pub struct BundlePromotion {
1567    pub owner: Option<String>,
1568    pub approver: Option<String>,
1569    pub author: Option<String>,
1570    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
1571    /// surfaces may extend this enum but must keep existing values stable.
1572    pub rollout_policy: String,
1573    pub rollback_target: Option<String>,
1574    pub created_at: String,
1575    pub workflow_version: String,
1576    pub package_name: String,
1577}
1578
1579impl Default for BundlePromotion {
1580    fn default() -> Self {
1581        Self {
1582            owner: None,
1583            approver: None,
1584            author: None,
1585            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
1586            rollback_target: None,
1587            created_at: String::new(),
1588            workflow_version: String::new(),
1589            package_name: String::new(),
1590        }
1591    }
1592}
1593
1594#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1595#[serde(default)]
1596pub struct BundleRedactionSummary {
1597    pub applied: bool,
1598    pub rules: Vec<String>,
1599    pub summary: String,
1600    /// Number of fixture files copied into the bundle (0 when no fixture
1601    /// directory was emitted).
1602    pub fixture_count: usize,
1603}
1604
1605#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1606#[serde(default)]
1607pub struct CrystallizationBundleManifest {
1608    pub schema: String,
1609    pub schema_version: u32,
1610    pub generated_at: String,
1611    pub generator: BundleGenerator,
1612    pub kind: BundleKind,
1613    pub candidate_id: String,
1614    pub external_key: String,
1615    pub title: String,
1616    pub team: Option<String>,
1617    pub repo: Option<String>,
1618    pub risk_level: String,
1619    pub workflow: BundleWorkflowRef,
1620    pub source_trace_hashes: Vec<String>,
1621    pub source_traces: Vec<BundleSourceTrace>,
1622    pub deterministic_steps: Vec<BundleStep>,
1623    pub fuzzy_steps: Vec<BundleStep>,
1624    pub side_effects: Vec<CrystallizationSideEffect>,
1625    pub capabilities: Vec<String>,
1626    pub required_secrets: Vec<String>,
1627    pub savings: SavingsEstimate,
1628    pub shadow: ShadowRunReport,
1629    pub eval_pack: Option<BundleEvalPackRef>,
1630    pub fixtures: Vec<BundleFixtureRef>,
1631    pub promotion: BundlePromotion,
1632    pub redaction: BundleRedactionSummary,
1633    pub confidence: f64,
1634    pub rejection_reasons: Vec<String>,
1635    pub warnings: Vec<String>,
1636}
1637
1638#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1639#[serde(rename_all = "snake_case")]
1640pub enum BundleKind {
1641    /// A normal candidate that passed shadow comparison and is ready for
1642    /// review and promotion.
1643    #[default]
1644    Candidate,
1645    /// A "plan-only" candidate: every step has a side-effect-free, in-process
1646    /// outcome (e.g. classify and write a receipt). Cloud importers can
1647    /// promote these without explicit external-side-effect approval.
1648    PlanOnly,
1649    /// No safe candidate was selected. The bundle still records what was
1650    /// attempted, the rejection reasons, and any rejected candidates so
1651    /// reviewers can debug or feed it back into mining.
1652    Rejected,
1653}
1654
1655#[derive(Clone, Debug, Default)]
1656pub struct BundleOptions {
1657    /// Stable identifier downstream cloud importers use to dedupe bundles
1658    /// across runs (defaults to a sanitized workflow name).
1659    pub external_key: Option<String>,
1660    pub title: Option<String>,
1661    pub team: Option<String>,
1662    pub repo: Option<String>,
1663    pub risk_level: Option<String>,
1664    pub rollout_policy: Option<String>,
1665}
1666
1667#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1668#[serde(default)]
1669pub struct CrystallizationBundle {
1670    pub manifest: CrystallizationBundleManifest,
1671    pub report: CrystallizationReport,
1672    pub harn_code: String,
1673    pub eval_pack_toml: String,
1674    pub fixtures: Vec<CrystallizationTrace>,
1675}
1676
1677/// Errors surfaced when validating a bundle on disk.
1678#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1679#[serde(default)]
1680pub struct BundleValidation {
1681    pub bundle_dir: String,
1682    pub schema: String,
1683    pub schema_version: u32,
1684    pub kind: BundleKind,
1685    pub candidate_id: String,
1686    pub manifest_ok: bool,
1687    pub workflow_ok: bool,
1688    pub report_ok: bool,
1689    pub eval_pack_ok: bool,
1690    pub fixtures_ok: bool,
1691    pub redaction_ok: bool,
1692    pub problems: Vec<String>,
1693}
1694
1695impl BundleValidation {
1696    pub fn is_ok(&self) -> bool {
1697        self.problems.is_empty()
1698    }
1699}
1700
1701/// Build an in-memory bundle from already-mined artifacts. The traces
1702/// passed here are the same normalized traces used to mine the candidate;
1703/// they will be redacted before being attached as fixtures.
1704pub fn build_crystallization_bundle(
1705    artifacts: CrystallizationArtifacts,
1706    traces: &[CrystallizationTrace],
1707    options: BundleOptions,
1708) -> Result<CrystallizationBundle, VmError> {
1709    let CrystallizationArtifacts {
1710        report,
1711        harn_code,
1712        eval_pack_toml,
1713    } = artifacts;
1714
1715    let (selected, kind) = match report
1716        .selected_candidate_id
1717        .as_deref()
1718        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
1719    {
1720        Some(candidate) => {
1721            let kind = if candidate_is_plan_only(candidate) {
1722                BundleKind::PlanOnly
1723            } else {
1724                BundleKind::Candidate
1725            };
1726            (Some(candidate), kind)
1727        }
1728        None => (None, BundleKind::Rejected),
1729    };
1730
1731    let workflow_name = selected
1732        .map(|candidate| candidate.name.clone())
1733        .unwrap_or_else(|| "crystallized_workflow".to_string());
1734    let package_name = selected
1735        .map(|candidate| candidate.promotion.package_name.clone())
1736        .unwrap_or_else(|| workflow_name.replace('_', "-"));
1737    let workflow_version = selected
1738        .map(|candidate| candidate.promotion.version.clone())
1739        .unwrap_or_else(|| "0.0.0".to_string());
1740
1741    let manifest_workflow = BundleWorkflowRef {
1742        path: BUNDLE_WORKFLOW_FILE.to_string(),
1743        name: workflow_name.clone(),
1744        package_name: package_name.clone(),
1745        package_version: workflow_version.clone(),
1746    };
1747
1748    let external_key = options
1749        .external_key
1750        .clone()
1751        .filter(|key| !key.trim().is_empty())
1752        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
1753    let title = options
1754        .title
1755        .clone()
1756        .filter(|title| !title.trim().is_empty())
1757        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
1758    let risk_level = options
1759        .risk_level
1760        .clone()
1761        .filter(|risk| !risk.trim().is_empty())
1762        .unwrap_or_else(|| infer_risk_level(selected));
1763    let rollout_policy = options
1764        .rollout_policy
1765        .clone()
1766        .filter(|policy| !policy.trim().is_empty())
1767        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
1768
1769    let (deterministic_steps, fuzzy_steps) = match selected {
1770        Some(candidate) => candidate
1771            .steps
1772            .iter()
1773            .map(BundleStep::from_candidate_step)
1774            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
1775        None => (Vec::new(), Vec::new()),
1776    };
1777
1778    let source_trace_hashes = selected
1779        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
1780        .unwrap_or_default();
1781
1782    let mut source_traces = Vec::new();
1783    let mut fixture_refs = Vec::new();
1784    let mut fixture_payloads = Vec::new();
1785    if let Some(candidate) = selected {
1786        for example in &candidate.examples {
1787            let trace = traces.iter().find(|trace| trace.id == example.trace_id);
1788            let fixture_relative = trace.map(|trace| {
1789                format!(
1790                    "{BUNDLE_FIXTURES_DIR}/{}.json",
1791                    sanitize_fixture_name(&trace.id)
1792                )
1793            });
1794            source_traces.push(BundleSourceTrace {
1795                trace_id: example.trace_id.clone(),
1796                source_hash: example.source_hash.clone(),
1797                source_url: trace.and_then(|trace| trace.source.clone()),
1798                source_receipt_id: trace
1799                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
1800                    .and_then(|value| value.as_str().map(str::to_string)),
1801                fixture_path: fixture_relative.clone(),
1802            });
1803            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
1804                let mut redacted = trace.clone();
1805                redact_trace_for_bundle(&mut redacted);
1806                fixture_refs.push(BundleFixtureRef {
1807                    path: fixture_path,
1808                    trace_id: trace.id.clone(),
1809                    source_hash: trace.source_hash.clone().unwrap_or_default(),
1810                    redacted: true,
1811                });
1812                fixture_payloads.push(redacted);
1813            }
1814        }
1815    }
1816
1817    // Owner defaults to author so cloud importers always have a populated
1818    // ownership pointer, but stays separate from `author` so reviewers can
1819    // assign a different owner in the manifest before promotion.
1820    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
1821    let promotion = BundlePromotion {
1822        owner: author.clone(),
1823        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
1824        author,
1825        rollout_policy,
1826        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
1827        created_at: now_rfc3339(),
1828        workflow_version,
1829        package_name: package_name.clone(),
1830    };
1831
1832    let redaction = BundleRedactionSummary {
1833        applied: !fixture_payloads.is_empty(),
1834        rules: vec![
1835            "sensitive_keys".to_string(),
1836            "secret_value_heuristic".to_string(),
1837        ],
1838        summary: if fixture_payloads.is_empty() {
1839            "no fixtures emitted".to_string()
1840        } else {
1841            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
1842                .to_string()
1843        },
1844        fixture_count: fixture_payloads.len(),
1845    };
1846
1847    let eval_pack = if eval_pack_toml.trim().is_empty() {
1848        None
1849    } else {
1850        Some(BundleEvalPackRef {
1851            path: BUNDLE_EVAL_PACK_FILE.to_string(),
1852            link: selected
1853                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
1854                .filter(|link| !link.trim().is_empty()),
1855        })
1856    };
1857
1858    let manifest = CrystallizationBundleManifest {
1859        schema: BUNDLE_SCHEMA.to_string(),
1860        schema_version: BUNDLE_SCHEMA_VERSION,
1861        generated_at: now_rfc3339(),
1862        generator: BundleGenerator::default(),
1863        kind,
1864        candidate_id: selected
1865            .map(|candidate| candidate.id.clone())
1866            .unwrap_or_default(),
1867        external_key,
1868        title,
1869        team: options.team,
1870        repo: options.repo,
1871        risk_level,
1872        workflow: manifest_workflow,
1873        source_trace_hashes,
1874        source_traces,
1875        deterministic_steps,
1876        fuzzy_steps,
1877        side_effects: selected
1878            .map(|candidate| candidate.side_effects.clone())
1879            .unwrap_or_default(),
1880        capabilities: selected
1881            .map(|candidate| candidate.capabilities.clone())
1882            .unwrap_or_default(),
1883        required_secrets: selected
1884            .map(|candidate| candidate.required_secrets.clone())
1885            .unwrap_or_default(),
1886        savings: selected
1887            .map(|candidate| candidate.savings.clone())
1888            .unwrap_or_default(),
1889        shadow: selected
1890            .map(|candidate| candidate.shadow.clone())
1891            .unwrap_or_default(),
1892        eval_pack,
1893        fixtures: fixture_refs,
1894        promotion,
1895        redaction,
1896        confidence: selected
1897            .map(|candidate| candidate.confidence)
1898            .unwrap_or(0.0),
1899        rejection_reasons: report
1900            .rejected_candidates
1901            .iter()
1902            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
1903            .collect(),
1904        warnings: report.warnings.clone(),
1905    };
1906
1907    Ok(CrystallizationBundle {
1908        manifest,
1909        report,
1910        harn_code,
1911        eval_pack_toml,
1912        fixtures: fixture_payloads,
1913    })
1914}
1915
1916/// Write a bundle to a directory. Creates the directory if it does not
1917/// already exist. Returns the manifest with `generated_at` and any
1918/// runtime-resolved metadata filled in.
1919pub fn write_crystallization_bundle(
1920    bundle: &CrystallizationBundle,
1921    bundle_dir: &Path,
1922) -> Result<CrystallizationBundleManifest, VmError> {
1923    std::fs::create_dir_all(bundle_dir).map_err(|error| {
1924        VmError::Runtime(format!(
1925            "failed to create bundle dir {}: {error}",
1926            bundle_dir.display()
1927        ))
1928    })?;
1929    write_bytes(
1930        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
1931        bundle.harn_code.as_bytes(),
1932    )?;
1933    let report_json = serde_json::to_vec_pretty(&bundle.report)
1934        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
1935    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
1936
1937    if !bundle.eval_pack_toml.trim().is_empty() {
1938        write_bytes(
1939            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
1940            bundle.eval_pack_toml.as_bytes(),
1941        )?;
1942    }
1943
1944    if !bundle.fixtures.is_empty() {
1945        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
1946        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
1947            VmError::Runtime(format!(
1948                "failed to create fixtures dir {}: {error}",
1949                fixtures_dir.display()
1950            ))
1951        })?;
1952        for trace in &bundle.fixtures {
1953            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
1954            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
1955                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
1956            })?;
1957            write_bytes(&path, &payload)?;
1958        }
1959    }
1960
1961    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
1962        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
1963    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
1964    Ok(bundle.manifest.clone())
1965}
1966
1967/// Read a bundle manifest from disk. Verifies the schema marker but does
1968/// not cross-check workflow/report/eval-pack sibling files; for a richer
1969/// check use [`validate_crystallization_bundle`].
1970pub fn load_crystallization_bundle_manifest(
1971    bundle_dir: &Path,
1972) -> Result<CrystallizationBundleManifest, VmError> {
1973    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
1974    let bytes = std::fs::read(&manifest_path).map_err(|error| {
1975        VmError::Runtime(format!(
1976            "failed to read bundle manifest {}: {error}",
1977            manifest_path.display()
1978        ))
1979    })?;
1980    let manifest: CrystallizationBundleManifest =
1981        serde_json::from_slice(&bytes).map_err(|error| {
1982            VmError::Runtime(format!(
1983                "failed to decode bundle manifest {}: {error}",
1984                manifest_path.display()
1985            ))
1986        })?;
1987    if manifest.schema != BUNDLE_SCHEMA {
1988        return Err(VmError::Runtime(format!(
1989            "bundle {} has unrecognized schema {:?} (expected {})",
1990            bundle_dir.display(),
1991            manifest.schema,
1992            BUNDLE_SCHEMA
1993        )));
1994    }
1995    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
1996        return Err(VmError::Runtime(format!(
1997            "bundle {} schema_version {} is newer than supported {}",
1998            bundle_dir.display(),
1999            manifest.schema_version,
2000            BUNDLE_SCHEMA_VERSION
2001        )));
2002    }
2003    Ok(manifest)
2004}
2005
2006/// Read every fixture trace referenced by the bundle manifest. Returns
2007/// the manifest plus loaded traces, in the order they appear in the
2008/// manifest. Fixtures with `path: None` are skipped.
2009pub fn load_crystallization_bundle(
2010    bundle_dir: &Path,
2011) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
2012    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
2013    let mut traces = Vec::new();
2014    for fixture in &manifest.fixtures {
2015        let path = bundle_dir.join(&fixture.path);
2016        traces.push(load_crystallization_trace(&path)?);
2017    }
2018    Ok((manifest, traces))
2019}
2020
2021/// Validate a bundle directory layout and contents. Cheap enough to call
2022/// from a CLI smoke command; performs no live side effects.
2023pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
2024    let mut validation = BundleValidation {
2025        bundle_dir: bundle_dir.display().to_string(),
2026        ..BundleValidation::default()
2027    };
2028    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
2029        Ok(manifest) => manifest,
2030        Err(error) => {
2031            validation.problems.push(error.to_string());
2032            return Ok(validation);
2033        }
2034    };
2035    validation.manifest_ok = true;
2036    validation.schema = manifest.schema.clone();
2037    validation.schema_version = manifest.schema_version;
2038    validation.kind = manifest.kind.clone();
2039    validation.candidate_id = manifest.candidate_id.clone();
2040
2041    let workflow_path = bundle_dir.join(&manifest.workflow.path);
2042    if workflow_path.exists() {
2043        validation.workflow_ok = true;
2044    } else {
2045        validation
2046            .problems
2047            .push(format!("missing workflow file {}", workflow_path.display()));
2048    }
2049
2050    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2051    match std::fs::read(&report_path) {
2052        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
2053            Ok(report) => {
2054                validation.report_ok = true;
2055                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2056                    && manifest.candidate_id.is_empty()
2057                {
2058                    validation
2059                        .problems
2060                        .push("manifest is non-rejected but has empty candidate_id".to_string());
2061                }
2062                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2063                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
2064                {
2065                    validation.problems.push(format!(
2066                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
2067                        report.selected_candidate_id, manifest.candidate_id
2068                    ));
2069                }
2070            }
2071            Err(error) => {
2072                validation
2073                    .problems
2074                    .push(format!("invalid report.json: {error}"));
2075            }
2076        },
2077        Err(error) => {
2078            validation.problems.push(format!(
2079                "missing report file {}: {error}",
2080                report_path.display()
2081            ));
2082        }
2083    }
2084
2085    if let Some(eval_pack) = &manifest.eval_pack {
2086        let path = bundle_dir.join(&eval_pack.path);
2087        if path.exists() {
2088            validation.eval_pack_ok = true;
2089        } else {
2090            validation.problems.push(format!(
2091                "manifest references eval pack {} but file is missing",
2092                path.display()
2093            ));
2094        }
2095    } else {
2096        validation.eval_pack_ok = true;
2097    }
2098
2099    let mut fixtures_problem = false;
2100    for fixture in &manifest.fixtures {
2101        let path = bundle_dir.join(&fixture.path);
2102        if !path.exists() {
2103            validation
2104                .problems
2105                .push(format!("missing fixture {}", path.display()));
2106            fixtures_problem = true;
2107            continue;
2108        }
2109        if !fixture.redacted {
2110            validation.problems.push(format!(
2111                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
2112                fixture.path
2113            ));
2114            fixtures_problem = true;
2115        }
2116    }
2117    validation.fixtures_ok = !fixtures_problem;
2118
2119    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
2120        validation
2121            .problems
2122            .push("redaction.applied is false but bundle includes fixtures".to_string());
2123    } else {
2124        validation.redaction_ok = true;
2125    }
2126    if !manifest
2127        .required_secrets
2128        .iter()
2129        .all(|secret| secret_id_looks_logical(secret))
2130    {
2131        validation.problems.push(
2132            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
2133        );
2134    }
2135
2136    Ok(validation)
2137}
2138
2139/// Replay shadow comparison from a bundle: re-runs the deterministic
2140/// shadow check in-process against the bundle's redacted fixtures, with
2141/// no live side effects. Returns the manifest and the freshly computed
2142/// `ShadowRunReport`. The returned report is suitable for cloud import or
2143/// for asserting determinism in CI.
2144pub fn shadow_replay_bundle(
2145    bundle_dir: &Path,
2146) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
2147    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
2148    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2149    let bytes = std::fs::read(&report_path).map_err(|error| {
2150        VmError::Runtime(format!(
2151            "failed to read bundle report {}: {error}",
2152            report_path.display()
2153        ))
2154    })?;
2155    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
2156        VmError::Runtime(format!(
2157            "failed to decode bundle report {}: {error}",
2158            report_path.display()
2159        ))
2160    })?;
2161    let candidate = report
2162        .selected_candidate_id
2163        .as_deref()
2164        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2165        .ok_or_else(|| {
2166            VmError::Runtime(format!(
2167                "bundle {} has no selected candidate to replay",
2168                bundle_dir.display()
2169            ))
2170        })?;
2171    let shadow = shadow_candidate(candidate, &traces);
2172    Ok((manifest, shadow))
2173}
2174
2175fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
2176    if let Some(parent) = path
2177        .parent()
2178        .filter(|parent| !parent.as_os_str().is_empty())
2179    {
2180        std::fs::create_dir_all(parent).map_err(|error| {
2181            VmError::Runtime(format!(
2182                "failed to create parent dir {}: {error}",
2183                parent.display()
2184            ))
2185        })?;
2186    }
2187    std::fs::write(path, bytes)
2188        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
2189}
2190
2191fn sanitize_fixture_name(raw: &str) -> String {
2192    let cleaned = raw
2193        .chars()
2194        .map(|ch| {
2195            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
2196                ch
2197            } else {
2198                '_'
2199            }
2200        })
2201        .collect::<String>();
2202    if cleaned.trim_matches('_').is_empty() {
2203        "trace".to_string()
2204    } else {
2205        cleaned.trim_matches('_').to_string()
2206    }
2207}
2208
2209fn sanitize_external_key(raw: &str) -> String {
2210    let mut out = String::new();
2211    let mut prev_dash = false;
2212    for ch in raw.chars() {
2213        let lowered = ch.to_ascii_lowercase();
2214        if lowered.is_ascii_alphanumeric() {
2215            out.push(lowered);
2216            prev_dash = false;
2217        } else if !prev_dash && !out.is_empty() {
2218            out.push('-');
2219            prev_dash = true;
2220        }
2221    }
2222    let trimmed = out.trim_matches('-').to_string();
2223    if trimmed.is_empty() {
2224        "crystallized-workflow".to_string()
2225    } else {
2226        trimmed
2227    }
2228}
2229
2230fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
2231    if let Some(candidate) = candidate {
2232        format!(
2233            "{} ({} step{})",
2234            candidate.name,
2235            candidate.steps.len(),
2236            if candidate.steps.len() == 1 { "" } else { "s" }
2237        )
2238    } else {
2239        format!("rejected: {fallback_name}")
2240    }
2241}
2242
2243fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
2244    let Some(candidate) = candidate else {
2245        return "high".to_string();
2246    };
2247    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
2248    let needs_secret = !candidate.required_secrets.is_empty();
2249    if touches_external && needs_secret {
2250        "high".to_string()
2251    } else if touches_external || needs_secret {
2252        "medium".to_string()
2253    } else {
2254        "low".to_string()
2255    }
2256}
2257
2258fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
2259    let kind = effect.kind.to_ascii_lowercase();
2260    if kind.is_empty() {
2261        return false;
2262    }
2263    // Plan-only side effects stay inside Harn's own data plane: they
2264    // write receipts, append to the in-process event log, or stash plans.
2265    // None of those touch tenant-external systems.
2266    let internal = kind.contains("receipt")
2267        || kind.contains("event_log")
2268        || kind.contains("memo")
2269        || kind.contains("plan");
2270    if internal {
2271        return false;
2272    }
2273    kind.contains("post")
2274        || kind.contains("write")
2275        || kind.contains("publish")
2276        || kind.contains("delete")
2277        || kind.contains("send")
2278}
2279
2280fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
2281    if candidate.steps.is_empty() {
2282        return false;
2283    }
2284    candidate.side_effects.iter().all(|effect| {
2285        let kind = effect.kind.to_ascii_lowercase();
2286        // Plan-only side effects stay inside Harn's own data plane: receipt
2287        // writes, in-memory event-log appends, file-only mutations, etc.
2288        kind.is_empty()
2289            || kind.contains("receipt")
2290            || kind.contains("event_log")
2291            || kind.contains("memo")
2292            || kind.contains("plan")
2293            || (kind.contains("file") && !kind.contains("publish"))
2294    })
2295}
2296
2297fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
2298    for action in &mut trace.actions {
2299        redact_bundle_value(&mut action.inputs);
2300        if let Some(output) = action.output.as_mut() {
2301            redact_bundle_value(output);
2302        }
2303        if let Some(observed) = action.observed_output.as_mut() {
2304            redact_bundle_value(observed);
2305        }
2306        for value in action.parameters.values_mut() {
2307            redact_bundle_value(value);
2308        }
2309        for (_, value) in action.metadata.iter_mut() {
2310            redact_bundle_value(value);
2311        }
2312    }
2313    for (_, value) in trace.metadata.iter_mut() {
2314        redact_bundle_value(value);
2315    }
2316}
2317
2318fn redact_bundle_value(value: &mut JsonValue) {
2319    match value {
2320        JsonValue::String(text) if looks_like_secret_value(text) => {
2321            *text = "[redacted]".to_string();
2322        }
2323        JsonValue::Array(items) => {
2324            for item in items {
2325                redact_bundle_value(item);
2326            }
2327        }
2328        JsonValue::Object(map) => {
2329            for (key, child) in map.iter_mut() {
2330                if is_sensitive_bundle_key(key) {
2331                    *child = JsonValue::String("[redacted]".to_string());
2332                } else {
2333                    redact_bundle_value(child);
2334                }
2335            }
2336        }
2337        _ => {}
2338    }
2339}
2340
2341fn is_sensitive_bundle_key(key: &str) -> bool {
2342    let lower = key.to_ascii_lowercase();
2343    lower.contains("secret")
2344        || lower.contains("token")
2345        || lower.contains("password")
2346        || lower.contains("api_key")
2347        || lower.contains("apikey")
2348        || lower == "authorization"
2349        || lower == "cookie"
2350        || lower == "set-cookie"
2351}
2352
2353fn looks_like_secret_value(value: &str) -> bool {
2354    let trimmed = value.trim();
2355    trimmed.starts_with("sk-")
2356        || trimmed.starts_with("ghp_")
2357        || trimmed.starts_with("ghs_")
2358        || trimmed.starts_with("xoxb-")
2359        || trimmed.starts_with("xoxp-")
2360        || trimmed.starts_with("AKIA")
2361        || (trimmed.len() > 48
2362            && trimmed
2363                .chars()
2364                .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
2365}
2366
2367fn secret_id_looks_logical(value: &str) -> bool {
2368    !looks_like_secret_value(value) && !value.trim().is_empty()
2369}
2370
2371#[cfg(test)]
2372mod tests {
2373    use super::*;
2374
2375    fn version_trace(
2376        id: &str,
2377        version: &str,
2378        side_target: &str,
2379        fuzzy: bool,
2380    ) -> CrystallizationTrace {
2381        CrystallizationTrace {
2382            id: id.to_string(),
2383            actions: vec![
2384                CrystallizationAction {
2385                    id: format!("{id}-branch"),
2386                    kind: "tool_call".to_string(),
2387                    name: "git.checkout_branch".to_string(),
2388                    parameters: BTreeMap::from([
2389                        ("repo_path".to_string(), json!(format!("/tmp/{id}"))),
2390                        (
2391                            "branch_name".to_string(),
2392                            json!(format!("release-{version}")),
2393                        ),
2394                    ]),
2395                    side_effects: vec![CrystallizationSideEffect {
2396                        kind: "git_ref".to_string(),
2397                        target: side_target.to_string(),
2398                        capability: Some("git.write".to_string()),
2399                        ..CrystallizationSideEffect::default()
2400                    }],
2401                    capabilities: vec!["git.write".to_string()],
2402                    deterministic: Some(true),
2403                    duration_ms: Some(20),
2404                    ..CrystallizationAction::default()
2405                },
2406                CrystallizationAction {
2407                    id: format!("{id}-manifest"),
2408                    kind: "file_mutation".to_string(),
2409                    name: "update_manifest_version".to_string(),
2410                    inputs: json!({"version": version, "path": "harn.toml"}),
2411                    parameters: BTreeMap::from([("version".to_string(), json!(version))]),
2412                    side_effects: vec![CrystallizationSideEffect {
2413                        kind: "file_write".to_string(),
2414                        target: "harn.toml".to_string(),
2415                        capability: Some("fs.write".to_string()),
2416                        ..CrystallizationSideEffect::default()
2417                    }],
2418                    capabilities: vec!["fs.write".to_string()],
2419                    deterministic: Some(true),
2420                    ..CrystallizationAction::default()
2421                },
2422                CrystallizationAction {
2423                    id: format!("{id}-release"),
2424                    kind: if fuzzy { "model_call" } else { "tool_call" }.to_string(),
2425                    name: "prepare_release_notes".to_string(),
2426                    inputs: json!({"release_target": "crates.io", "version": version}),
2427                    parameters: BTreeMap::from([
2428                        ("release_target".to_string(), json!("crates.io")),
2429                        ("version".to_string(), json!(version)),
2430                    ]),
2431                    fuzzy: Some(fuzzy),
2432                    deterministic: Some(!fuzzy),
2433                    cost: CrystallizationCost {
2434                        model_calls: if fuzzy { 1 } else { 0 },
2435                        input_tokens: if fuzzy { 1200 } else { 0 },
2436                        output_tokens: if fuzzy { 250 } else { 0 },
2437                        total_cost_usd: if fuzzy { 0.01 } else { 0.0 },
2438                        wall_ms: 3000,
2439                        ..CrystallizationCost::default()
2440                    },
2441                    ..CrystallizationAction::default()
2442                },
2443            ],
2444            ..CrystallizationTrace::default()
2445        }
2446    }
2447
2448    #[test]
2449    fn crystallizes_repeated_version_bump_with_parameters() {
2450        let traces = (0..5)
2451            .map(|idx| {
2452                version_trace(
2453                    &format!("trace_{idx}"),
2454                    &format!("0.7.{idx}"),
2455                    "release-branch",
2456                    false,
2457                )
2458            })
2459            .collect::<Vec<_>>();
2460
2461        let artifacts = crystallize_traces(
2462            traces,
2463            CrystallizeOptions {
2464                workflow_name: Some("version_bump".to_string()),
2465                ..CrystallizeOptions::default()
2466            },
2467        )
2468        .unwrap();
2469
2470        let candidate = &artifacts.report.candidates[0];
2471        assert!(candidate.rejection_reasons.is_empty());
2472        assert!(candidate.shadow.pass);
2473        assert_eq!(candidate.examples.len(), 5);
2474        let params = candidate
2475            .parameters
2476            .iter()
2477            .map(|param| param.name.as_str())
2478            .collect::<BTreeSet<_>>();
2479        assert!(params.contains("version"));
2480        assert!(params.contains("repo_path"));
2481        assert!(params.contains("branch_name"));
2482        assert!(artifacts.harn_code.contains("pipeline version_bump("));
2483        assert!(artifacts.eval_pack_toml.contains("crystallization-shadow"));
2484    }
2485
2486    #[test]
2487    fn rejects_divergent_side_effects() {
2488        let traces = vec![
2489            version_trace("trace_a", "0.7.1", "release-branch", false),
2490            version_trace("trace_b", "0.7.2", "main", false),
2491            version_trace("trace_c", "0.7.3", "release-branch", false),
2492        ];
2493
2494        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2495
2496        assert!(artifacts.report.candidates.is_empty());
2497        assert_eq!(artifacts.report.rejected_candidates.len(), 1);
2498        assert!(artifacts.report.rejected_candidates[0].rejection_reasons[0]
2499            .contains("divergent side effects"));
2500    }
2501
2502    #[test]
2503    fn preserves_remaining_fuzzy_segment() {
2504        let traces = (0..3)
2505            .map(|idx| {
2506                version_trace(
2507                    &format!("trace_{idx}"),
2508                    &format!("0.8.{idx}"),
2509                    "release-branch",
2510                    true,
2511                )
2512            })
2513            .collect::<Vec<_>>();
2514
2515        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2516        let candidate = &artifacts.report.candidates[0];
2517
2518        assert!(candidate
2519            .steps
2520            .iter()
2521            .any(|step| step.segment == SegmentKind::Fuzzy));
2522        assert!(candidate.savings.remaining_model_calls > 0);
2523        assert!(artifacts.harn_code.contains("TODO: fuzzy segment"));
2524    }
2525
2526    fn plan_only_trace(id: &str, suffix: &str) -> CrystallizationTrace {
2527        CrystallizationTrace {
2528            id: id.to_string(),
2529            actions: vec![
2530                CrystallizationAction {
2531                    id: format!("{id}-classify"),
2532                    kind: "tool_call".to_string(),
2533                    name: "classify_issue".to_string(),
2534                    parameters: BTreeMap::from([
2535                        ("issue_id".to_string(), json!(format!("HAR-{suffix}"))),
2536                        ("team_key".to_string(), json!("HAR")),
2537                    ]),
2538                    capabilities: vec!["linear.read".to_string()],
2539                    deterministic: Some(true),
2540                    duration_ms: Some(15),
2541                    ..CrystallizationAction::default()
2542                },
2543                CrystallizationAction {
2544                    id: format!("{id}-receipt"),
2545                    kind: "receipt_write".to_string(),
2546                    name: "emit_receipt".to_string(),
2547                    inputs: json!({"summary": format!("plan only #{suffix}"), "kind": "plan"}),
2548                    parameters: BTreeMap::from([
2549                        ("kind".to_string(), json!("plan")),
2550                        ("summary".to_string(), json!(format!("plan only #{suffix}"))),
2551                    ]),
2552                    side_effects: vec![CrystallizationSideEffect {
2553                        kind: "receipt_write".to_string(),
2554                        target: "tenant_event_log".to_string(),
2555                        capability: Some("receipt.write".to_string()),
2556                        ..CrystallizationSideEffect::default()
2557                    }],
2558                    capabilities: vec!["receipt.write".to_string()],
2559                    deterministic: Some(true),
2560                    duration_ms: Some(5),
2561                    ..CrystallizationAction::default()
2562                },
2563            ],
2564            ..CrystallizationTrace::default()
2565        }
2566    }
2567
2568    fn version_traces(count: usize) -> Vec<CrystallizationTrace> {
2569        (0..count)
2570            .map(|idx| {
2571                version_trace(
2572                    &format!("trace_{idx}"),
2573                    &format!("0.7.{idx}"),
2574                    "release-branch",
2575                    false,
2576                )
2577            })
2578            .collect()
2579    }
2580
2581    #[test]
2582    fn build_bundle_assembles_versioned_manifest() {
2583        let traces = version_traces(5);
2584        let artifacts = crystallize_traces(
2585            traces.clone(),
2586            CrystallizeOptions {
2587                workflow_name: Some("version_bump".to_string()),
2588                package_name: Some("release-workflows".to_string()),
2589                author: Some("ops@example.com".to_string()),
2590                approver: Some("lead@example.com".to_string()),
2591                eval_pack_link: Some("eval-pack://release-workflows/v1".to_string()),
2592                ..CrystallizeOptions::default()
2593            },
2594        )
2595        .unwrap();
2596
2597        let bundle = build_crystallization_bundle(
2598            artifacts,
2599            &traces,
2600            BundleOptions {
2601                team: Some("platform".to_string()),
2602                repo: Some("burin-labs/harn".to_string()),
2603                ..BundleOptions::default()
2604            },
2605        )
2606        .unwrap();
2607
2608        let manifest = &bundle.manifest;
2609        assert_eq!(manifest.schema, BUNDLE_SCHEMA);
2610        assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
2611        assert_eq!(manifest.kind, BundleKind::Candidate);
2612        assert!(!manifest.candidate_id.is_empty());
2613        assert_eq!(manifest.workflow.name, "version_bump");
2614        assert_eq!(manifest.workflow.package_name, "release-workflows");
2615        assert_eq!(manifest.workflow.path, BUNDLE_WORKFLOW_FILE);
2616        assert_eq!(manifest.team.as_deref(), Some("platform"));
2617        assert_eq!(manifest.repo.as_deref(), Some("burin-labs/harn"));
2618        assert_eq!(manifest.external_key, "version-bump");
2619        assert_eq!(manifest.promotion.rollout_policy, "shadow_then_canary");
2620        assert_eq!(
2621            manifest.promotion.author.as_deref(),
2622            Some("ops@example.com")
2623        );
2624        assert_eq!(
2625            manifest.promotion.approver.as_deref(),
2626            Some("lead@example.com")
2627        );
2628        assert_eq!(manifest.promotion.workflow_version, "0.1.0");
2629        assert!(manifest.deterministic_steps.len() + manifest.fuzzy_steps.len() > 0);
2630        assert_eq!(manifest.source_traces.len(), traces.len());
2631        assert_eq!(manifest.fixtures.len(), traces.len());
2632        assert!(manifest.fixtures.iter().all(|fixture| fixture.redacted));
2633        assert!(manifest.redaction.applied);
2634        assert!(manifest.redaction.fixture_count > 0);
2635        assert!(manifest
2636            .eval_pack
2637            .as_ref()
2638            .is_some_and(|eval| eval.path == BUNDLE_EVAL_PACK_FILE));
2639        assert!(manifest
2640            .required_secrets
2641            .iter()
2642            .all(|secret| !secret.is_empty()));
2643    }
2644
2645    #[test]
2646    fn write_bundle_round_trips_through_disk() {
2647        let traces = version_traces(5);
2648        let artifacts = crystallize_traces(
2649            traces.clone(),
2650            CrystallizeOptions {
2651                workflow_name: Some("version_bump".to_string()),
2652                ..CrystallizeOptions::default()
2653            },
2654        )
2655        .unwrap();
2656        let bundle =
2657            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2658
2659        let dir = tempfile::tempdir().unwrap();
2660        let written = write_crystallization_bundle(&bundle, dir.path()).unwrap();
2661        assert_eq!(written.candidate_id, bundle.manifest.candidate_id);
2662
2663        // Files exist on disk.
2664        for relative in [
2665            BUNDLE_MANIFEST_FILE,
2666            BUNDLE_REPORT_FILE,
2667            BUNDLE_WORKFLOW_FILE,
2668            BUNDLE_EVAL_PACK_FILE,
2669        ] {
2670            assert!(dir.path().join(relative).exists(), "missing {relative}");
2671        }
2672        let fixtures_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
2673        assert!(fixtures_dir.is_dir());
2674        assert_eq!(
2675            std::fs::read_dir(&fixtures_dir).unwrap().count(),
2676            traces.len()
2677        );
2678
2679        // Manifest round-trips.
2680        let (loaded_manifest, loaded_traces) = load_crystallization_bundle(dir.path()).unwrap();
2681        assert_eq!(loaded_manifest, bundle.manifest);
2682        assert_eq!(loaded_traces.len(), traces.len());
2683
2684        // Validation passes.
2685        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2686        assert!(
2687            validation.problems.is_empty(),
2688            "unexpected problems: {:?}",
2689            validation.problems
2690        );
2691        assert!(validation.is_ok());
2692        assert!(validation.workflow_ok && validation.report_ok);
2693        assert!(validation.fixtures_ok && validation.redaction_ok);
2694
2695        // Shadow replay matches the persisted shadow report.
2696        let (replay_manifest, shadow) = shadow_replay_bundle(dir.path()).unwrap();
2697        assert_eq!(replay_manifest.candidate_id, bundle.manifest.candidate_id);
2698        assert!(shadow.pass, "shadow should still pass");
2699        assert_eq!(shadow.compared_traces, traces.len());
2700    }
2701
2702    #[test]
2703    fn validate_rejects_bundle_with_missing_workflow() {
2704        let traces = version_traces(3);
2705        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2706        let bundle =
2707            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2708
2709        let dir = tempfile::tempdir().unwrap();
2710        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2711        std::fs::remove_file(dir.path().join(BUNDLE_WORKFLOW_FILE)).unwrap();
2712
2713        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2714        assert!(!validation.is_ok());
2715        assert!(validation
2716            .problems
2717            .iter()
2718            .any(|problem| problem.contains("missing workflow file")));
2719    }
2720
2721    #[test]
2722    fn validate_rejects_bundle_with_unredacted_fixture() {
2723        let traces = version_traces(3);
2724        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2725        let mut bundle =
2726            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2727        // Force a fixture to claim it is unredacted; this must trip
2728        // validation so a malicious or careless producer cannot ship raw
2729        // private payloads under the bundle contract.
2730        bundle.manifest.fixtures[0].redacted = false;
2731        let dir = tempfile::tempdir().unwrap();
2732        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2733
2734        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2735        assert!(!validation.is_ok());
2736        assert!(validation
2737            .problems
2738            .iter()
2739            .any(|problem| problem.contains("not marked redacted")));
2740    }
2741
2742    #[test]
2743    fn validate_rejects_unsupported_schema_version() {
2744        let traces = version_traces(3);
2745        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2746        let mut bundle =
2747            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2748        bundle.manifest.schema_version = BUNDLE_SCHEMA_VERSION + 1;
2749        let dir = tempfile::tempdir().unwrap();
2750        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2751
2752        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2753        assert!(!validation.is_ok());
2754        assert!(validation
2755            .problems
2756            .iter()
2757            .any(|problem| problem.contains("schema_version")));
2758    }
2759
2760    #[test]
2761    fn redacts_secret_like_values_in_fixtures() {
2762        // Build secret-shaped strings at runtime so we exercise the
2763        // redaction prefixes (`xoxb-`, `ghp_`, `sk-`) without checking in
2764        // source that looks like a real credential to secret scanners.
2765        let slack_prefix = format!("{}{}", "xo", "xb-");
2766        let github_prefix = format!("{}{}", "gh", "p_");
2767        let openai_prefix = "sk-".to_string();
2768        let pad = "A".repeat(48);
2769        let slack_secret = format!("{slack_prefix}1234567890-{pad}");
2770        let github_secret = format!("{github_prefix}{pad}");
2771        let openai_secret = format!("{openai_prefix}{pad}");
2772
2773        let mut secret_action = CrystallizationAction {
2774            id: "secret".to_string(),
2775            kind: "tool_call".to_string(),
2776            name: "post_release_to_slack".to_string(),
2777            parameters: BTreeMap::from([
2778                ("slack_token".to_string(), json!(slack_secret)),
2779                ("channel".to_string(), json!("#releases")),
2780            ]),
2781            inputs: json!({
2782                "authorization": format!("Bearer {github_secret}"),
2783                "version": "0.7.1",
2784            }),
2785            ..CrystallizationAction::default()
2786        };
2787        secret_action
2788            .metadata
2789            .insert("api_key".to_string(), json!(openai_secret));
2790
2791        let mut trace = CrystallizationTrace {
2792            id: "trace_secret".to_string(),
2793            actions: vec![secret_action],
2794            ..CrystallizationTrace::default()
2795        };
2796        redact_trace_for_bundle(&mut trace);
2797        let action = &trace.actions[0];
2798        assert_eq!(
2799            action.parameters.get("slack_token"),
2800            Some(&json!("[redacted]"))
2801        );
2802        assert_eq!(action.parameters.get("channel"), Some(&json!("#releases")));
2803        let inputs = action.inputs.as_object().unwrap();
2804        assert_eq!(inputs.get("authorization"), Some(&json!("[redacted]")));
2805        assert_eq!(inputs.get("version"), Some(&json!("0.7.1")));
2806        assert_eq!(action.metadata.get("api_key"), Some(&json!("[redacted]")));
2807    }
2808
2809    #[test]
2810    fn plan_only_fixture_yields_plan_only_kind() {
2811        let traces = (0..3)
2812            .map(|idx| plan_only_trace(&format!("plan_{idx}"), &format!("{idx}")))
2813            .collect::<Vec<_>>();
2814        let artifacts = crystallize_traces(
2815            traces.clone(),
2816            CrystallizeOptions {
2817                workflow_name: Some("plan_only_triage".to_string()),
2818                ..CrystallizeOptions::default()
2819            },
2820        )
2821        .unwrap();
2822        let bundle =
2823            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2824        assert_eq!(bundle.manifest.kind, BundleKind::PlanOnly);
2825        assert_eq!(bundle.manifest.risk_level, "low");
2826    }
2827
2828    #[test]
2829    fn rejected_bundle_has_rejected_kind() {
2830        let traces = vec![
2831            version_trace("trace_a", "0.7.1", "release-branch", false),
2832            version_trace("trace_b", "0.7.2", "main", false),
2833            version_trace("trace_c", "0.7.3", "release-branch", false),
2834        ];
2835        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2836        let bundle =
2837            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2838        assert_eq!(bundle.manifest.kind, BundleKind::Rejected);
2839        assert!(bundle.manifest.candidate_id.is_empty());
2840        assert!(!bundle.manifest.rejection_reasons.is_empty());
2841        assert!(bundle.fixtures.is_empty());
2842    }
2843
2844    #[test]
2845    fn validate_round_trips_rejected_bundle() {
2846        let traces = vec![
2847            version_trace("trace_a", "0.7.1", "release-branch", false),
2848            version_trace("trace_b", "0.7.2", "main", false),
2849            version_trace("trace_c", "0.7.3", "release-branch", false),
2850        ];
2851        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2852        let bundle =
2853            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2854        let dir = tempfile::tempdir().unwrap();
2855        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2856
2857        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2858        assert!(validation.is_ok(), "{:?}", validation.problems);
2859        assert_eq!(validation.kind, BundleKind::Rejected);
2860    }
2861
2862    #[test]
2863    fn shadow_replay_fails_when_fixture_diverges() {
2864        let traces = version_traces(3);
2865        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2866        let bundle =
2867            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2868        let dir = tempfile::tempdir().unwrap();
2869        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2870
2871        // Tamper with one redacted fixture so its action list no longer
2872        // matches the candidate signature; the replay must fail without
2873        // panicking.
2874        let fixture_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
2875        let some_fixture = std::fs::read_dir(&fixture_dir)
2876            .unwrap()
2877            .next()
2878            .unwrap()
2879            .unwrap()
2880            .path();
2881        let mut tampered: CrystallizationTrace =
2882            serde_json::from_slice(&std::fs::read(&some_fixture).unwrap()).unwrap();
2883        tampered.actions.truncate(1);
2884        std::fs::write(&some_fixture, serde_json::to_vec_pretty(&tampered).unwrap()).unwrap();
2885
2886        let (_, shadow) = shadow_replay_bundle(dir.path()).unwrap();
2887        assert!(!shadow.pass);
2888        assert!(!shadow.failures.is_empty());
2889    }
2890}