Skip to main content

harn_vm/orchestration/
crystallize.rs

1//! Workflow crystallization primitives.
2//!
3//! This module keeps the first mining pass deliberately conservative: it
4//! accepts ordered trace actions, finds a repeated contiguous action sequence,
5//! extracts scalar parameters from fields that vary across examples, rejects
6//! divergent side effects, and emits a reviewable Harn skeleton plus shadow
7//! comparison metadata. Hosted surfaces can layer richer mining on top of this
8//! stable IR without changing the CLI contract.
9
10use std::collections::{BTreeMap, BTreeSet};
11use std::fmt::Write as _;
12use std::path::{Path, PathBuf};
13
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value as JsonValue};
16use sha2::{Digest, Sha256};
17
18use super::{new_id, now_rfc3339, RunRecord};
19use crate::value::VmError;
20
21const TRACE_SCHEMA_VERSION: u32 = 1;
22const DEFAULT_MIN_EXAMPLES: usize = 2;
23
24/// Stable schema marker for `candidate.json` inside a crystallization
25/// bundle. Cloud importers and other downstream consumers should refuse
26/// bundles whose `schema` field is anything else.
27pub const BUNDLE_SCHEMA: &str = "harn.crystallization.candidate.bundle";
28/// Versioned schema number for the bundle manifest. Cloud importers and
29/// other consumers should refuse bundles whose `schema_version` is newer
30/// than the highest version they understand.
31pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
32/// Conventional file names inside a crystallization bundle directory.
33pub const BUNDLE_MANIFEST_FILE: &str = "candidate.json";
34pub const BUNDLE_REPORT_FILE: &str = "report.json";
35pub const BUNDLE_WORKFLOW_FILE: &str = "workflow.harn";
36pub const BUNDLE_EVAL_PACK_FILE: &str = "harn.eval.toml";
37pub const BUNDLE_FIXTURES_DIR: &str = "fixtures";
38
39/// Default rollout policy applied when a bundle is emitted without one
40/// explicitly configured. Hosted promotion surfaces can override it.
41const DEFAULT_ROLLOUT_POLICY: &str = "shadow_then_canary";
42
43#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
44#[serde(default)]
45pub struct CrystallizationTrace {
46    pub version: u32,
47    pub id: String,
48    pub source: Option<String>,
49    pub source_hash: Option<String>,
50    pub workflow_id: Option<String>,
51    pub started_at: Option<String>,
52    pub finished_at: Option<String>,
53    pub flow: Option<CrystallizationFlowRef>,
54    pub actions: Vec<CrystallizationAction>,
55    pub usage: CrystallizationUsage,
56    pub metadata: BTreeMap<String, JsonValue>,
57}
58
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
60#[serde(default)]
61pub struct CrystallizationFlowRef {
62    pub trace_id: Option<String>,
63    pub agent_run_id: Option<String>,
64    pub transcript_ref: Option<String>,
65    pub atom_ids: Vec<String>,
66    pub slice_ids: Vec<String>,
67}
68
69#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
70#[serde(default)]
71pub struct CrystallizationAction {
72    pub id: String,
73    pub kind: String,
74    pub name: String,
75    pub timestamp: Option<String>,
76    pub inputs: JsonValue,
77    pub output: Option<JsonValue>,
78    pub observed_output: Option<JsonValue>,
79    pub parameters: BTreeMap<String, JsonValue>,
80    pub side_effects: Vec<CrystallizationSideEffect>,
81    pub capabilities: Vec<String>,
82    pub required_secrets: Vec<String>,
83    pub approval: Option<CrystallizationApproval>,
84    pub cost: CrystallizationCost,
85    pub duration_ms: Option<i64>,
86    pub deterministic: Option<bool>,
87    pub fuzzy: Option<bool>,
88    pub metadata: BTreeMap<String, JsonValue>,
89}
90
91#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
92#[serde(default)]
93pub struct CrystallizationSideEffect {
94    pub kind: String,
95    pub target: String,
96    pub capability: Option<String>,
97    pub mutation: Option<String>,
98    pub metadata: BTreeMap<String, JsonValue>,
99}
100
101#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
102#[serde(default)]
103pub struct CrystallizationApproval {
104    pub prompt: Option<String>,
105    pub approver: Option<String>,
106    pub required: bool,
107    pub boundary: Option<String>,
108}
109
110#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
111#[serde(default)]
112pub struct CrystallizationCost {
113    pub model: Option<String>,
114    pub model_calls: i64,
115    pub input_tokens: i64,
116    pub output_tokens: i64,
117    pub total_cost_usd: f64,
118    pub wall_ms: i64,
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
122#[serde(default)]
123pub struct CrystallizationUsage {
124    pub model_calls: i64,
125    pub input_tokens: i64,
126    pub output_tokens: i64,
127    pub total_cost_usd: f64,
128    pub wall_ms: i64,
129}
130
131#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
132#[serde(rename_all = "snake_case")]
133pub enum SegmentKind {
134    #[default]
135    Deterministic,
136    Fuzzy,
137}
138
139type SequenceExample = (usize, usize);
140type RepeatedSequence = (Vec<String>, Vec<SequenceExample>);
141
142#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
143#[serde(default)]
144pub struct WorkflowCandidateParameter {
145    pub name: String,
146    pub source_paths: Vec<String>,
147    pub examples: Vec<String>,
148    pub required: bool,
149}
150
151#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
152#[serde(default)]
153pub struct WorkflowCandidateStep {
154    pub index: usize,
155    pub kind: String,
156    pub name: String,
157    pub segment: SegmentKind,
158    pub parameter_refs: Vec<String>,
159    pub constants: BTreeMap<String, JsonValue>,
160    pub preconditions: Vec<String>,
161    pub side_effects: Vec<CrystallizationSideEffect>,
162    pub capabilities: Vec<String>,
163    pub required_secrets: Vec<String>,
164    pub approval: Option<CrystallizationApproval>,
165    pub expected_output: Option<JsonValue>,
166    pub review_notes: Vec<String>,
167}
168
169#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
170#[serde(default)]
171pub struct WorkflowCandidateExample {
172    pub trace_id: String,
173    pub source_hash: String,
174    pub start_index: usize,
175    pub action_ids: Vec<String>,
176}
177
178#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
179#[serde(default)]
180pub struct PromotionMetadata {
181    pub source_trace_hashes: Vec<String>,
182    pub author: Option<String>,
183    pub approver: Option<String>,
184    pub created_at: String,
185    pub version: String,
186    pub package_name: String,
187    pub capability_set: Vec<String>,
188    pub secrets_required: Vec<String>,
189    pub rollback_target: Option<String>,
190    pub eval_pack_link: Option<String>,
191}
192
193#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
194#[serde(default)]
195pub struct SavingsEstimate {
196    pub model_calls_avoided: i64,
197    pub input_tokens_avoided: i64,
198    pub output_tokens_avoided: i64,
199    pub estimated_cost_usd_avoided: f64,
200    pub wall_ms_avoided: i64,
201    pub cpu_runtime_cost_usd: f64,
202    pub remaining_model_calls: i64,
203}
204
205#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
206#[serde(default)]
207pub struct ShadowTraceResult {
208    pub trace_id: String,
209    pub pass: bool,
210    pub details: Vec<String>,
211}
212
213#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
214#[serde(default)]
215pub struct ShadowRunReport {
216    pub pass: bool,
217    pub compared_traces: usize,
218    pub failures: Vec<String>,
219    pub traces: Vec<ShadowTraceResult>,
220}
221
222#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
223#[serde(default)]
224pub struct WorkflowCandidate {
225    pub id: String,
226    pub name: String,
227    pub confidence: f64,
228    pub sequence_signature: Vec<String>,
229    pub parameters: Vec<WorkflowCandidateParameter>,
230    pub steps: Vec<WorkflowCandidateStep>,
231    pub examples: Vec<WorkflowCandidateExample>,
232    pub capabilities: Vec<String>,
233    pub required_secrets: Vec<String>,
234    pub approval_points: Vec<CrystallizationApproval>,
235    pub side_effects: Vec<CrystallizationSideEffect>,
236    pub expected_outputs: Vec<JsonValue>,
237    pub warnings: Vec<String>,
238    pub rejection_reasons: Vec<String>,
239    pub promotion: PromotionMetadata,
240    pub savings: SavingsEstimate,
241    pub shadow: ShadowRunReport,
242}
243
244impl WorkflowCandidate {
245    pub fn is_safe_to_propose(&self) -> bool {
246        self.rejection_reasons.is_empty()
247    }
248}
249
250#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
251#[serde(default)]
252pub struct CrystallizationReport {
253    pub version: u32,
254    pub generated_at: String,
255    pub source_trace_count: usize,
256    pub selected_candidate_id: Option<String>,
257    pub candidates: Vec<WorkflowCandidate>,
258    pub rejected_candidates: Vec<WorkflowCandidate>,
259    pub warnings: Vec<String>,
260    pub input_format: CrystallizationInputFormat,
261    pub harn_code_path: Option<String>,
262    pub eval_pack_path: Option<String>,
263}
264
265#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
266#[serde(default)]
267pub struct CrystallizationInputFormat {
268    pub name: String,
269    pub version: u32,
270    pub required_fields: Vec<String>,
271    pub preserved_fields: Vec<String>,
272}
273
274#[derive(Clone, Debug, Default)]
275pub struct CrystallizeOptions {
276    pub min_examples: usize,
277    pub workflow_name: Option<String>,
278    pub package_name: Option<String>,
279    pub author: Option<String>,
280    pub approver: Option<String>,
281    pub eval_pack_link: Option<String>,
282}
283
284#[derive(Clone, Debug, Default)]
285pub struct CrystallizationArtifacts {
286    pub report: CrystallizationReport,
287    pub harn_code: String,
288    pub eval_pack_toml: String,
289}
290
291pub fn crystallize_traces(
292    traces: Vec<CrystallizationTrace>,
293    options: CrystallizeOptions,
294) -> Result<CrystallizationArtifacts, VmError> {
295    let min_examples = options.min_examples.max(DEFAULT_MIN_EXAMPLES);
296    if traces.len() < min_examples {
297        return Err(VmError::Runtime(format!(
298            "crystallize requires at least {min_examples} traces, got {}",
299            traces.len()
300        )));
301    }
302
303    let normalized = traces.into_iter().map(normalize_trace).collect::<Vec<_>>();
304    let mut candidates = mine_candidates(&normalized, min_examples, &options);
305    let mut rejected_candidates = Vec::new();
306    for candidate in &mut candidates {
307        candidate.shadow = shadow_candidate(candidate, &normalized);
308        if !candidate.shadow.pass {
309            candidate
310                .rejection_reasons
311                .extend(candidate.shadow.failures.clone());
312        }
313    }
314
315    let mut accepted = Vec::new();
316    for candidate in candidates {
317        if candidate.is_safe_to_propose() {
318            accepted.push(candidate);
319        } else {
320            rejected_candidates.push(candidate);
321        }
322    }
323    accepted.sort_by(|left, right| {
324        right
325            .confidence
326            .partial_cmp(&left.confidence)
327            .unwrap_or(std::cmp::Ordering::Equal)
328            .then_with(|| right.steps.len().cmp(&left.steps.len()))
329    });
330
331    let selected = accepted.first();
332    let harn_code = selected
333        .map(generate_harn_code)
334        .unwrap_or_else(|| rejected_workflow_stub(&rejected_candidates));
335    let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
336
337    let report = CrystallizationReport {
338        version: 1,
339        generated_at: now_rfc3339(),
340        source_trace_count: normalized.len(),
341        selected_candidate_id: selected.map(|candidate| candidate.id.clone()),
342        candidates: accepted,
343        rejected_candidates,
344        warnings: Vec::new(),
345        input_format: CrystallizationInputFormat {
346            name: "harn.crystallization.trace".to_string(),
347            version: TRACE_SCHEMA_VERSION,
348            required_fields: vec!["id".to_string(), "actions".to_string()],
349            preserved_fields: vec![
350                "ordered actions".to_string(),
351                "tool calls".to_string(),
352                "model calls".to_string(),
353                "human approvals".to_string(),
354                "file mutations".to_string(),
355                "external API calls".to_string(),
356                "observed outputs".to_string(),
357                "costs".to_string(),
358                "timestamps".to_string(),
359                "source hashes".to_string(),
360                "Flow provenance references".to_string(),
361            ],
362        },
363        harn_code_path: None,
364        eval_pack_path: None,
365    };
366
367    Ok(CrystallizationArtifacts {
368        report,
369        harn_code,
370        eval_pack_toml,
371    })
372}
373
374pub fn load_crystallization_traces_from_dir(
375    dir: &Path,
376) -> Result<Vec<CrystallizationTrace>, VmError> {
377    let mut paths = Vec::new();
378    collect_json_paths(dir, &mut paths)?;
379    if paths.is_empty() {
380        return Err(VmError::Runtime(format!(
381            "no .json trace files found under {}",
382            dir.display()
383        )));
384    }
385    paths.sort();
386    paths
387        .iter()
388        .map(|path| load_crystallization_trace(path))
389        .collect()
390}
391
392pub fn load_crystallization_trace(path: &Path) -> Result<CrystallizationTrace, VmError> {
393    let content = std::fs::read_to_string(path).map_err(|error| {
394        VmError::Runtime(format!(
395            "failed to read crystallization trace {}: {error}",
396            path.display()
397        ))
398    })?;
399    let value: JsonValue = serde_json::from_str(&content).map_err(|error| {
400        VmError::Runtime(format!(
401            "failed to parse crystallization trace {}: {error}",
402            path.display()
403        ))
404    })?;
405
406    let mut trace = if value.get("actions").is_some() {
407        serde_json::from_value::<CrystallizationTrace>(value.clone()).map_err(|error| {
408            VmError::Runtime(format!(
409                "failed to decode crystallization trace {}: {error}",
410                path.display()
411            ))
412        })?
413    } else if value.get("stages").is_some() || value.get("_type") == Some(&json!("workflow_run")) {
414        let run: RunRecord = serde_json::from_value(value.clone()).map_err(|error| {
415            VmError::Runtime(format!(
416                "failed to decode run record {} as crystallization input: {error}",
417                path.display()
418            ))
419        })?;
420        trace_from_run_record(run)
421    } else {
422        return Err(VmError::Runtime(format!(
423            "{} is neither a crystallization trace nor a workflow run record",
424            path.display()
425        )));
426    };
427    if trace.source.is_none() {
428        trace.source = Some(path.display().to_string());
429    }
430    if trace.source_hash.is_none() {
431        trace.source_hash = Some(hash_bytes(content.as_bytes()));
432    }
433    Ok(normalize_trace(trace))
434}
435
436pub fn write_crystallization_artifacts(
437    mut artifacts: CrystallizationArtifacts,
438    workflow_path: &Path,
439    report_path: &Path,
440    eval_pack_path: Option<&Path>,
441) -> Result<CrystallizationReport, VmError> {
442    crate::atomic_io::atomic_write(workflow_path, artifacts.harn_code.as_bytes()).map_err(
443        |error| {
444            VmError::Runtime(format!(
445                "failed to write generated workflow {}: {error}",
446                workflow_path.display()
447            ))
448        },
449    )?;
450
451    artifacts.report.harn_code_path = Some(workflow_path.display().to_string());
452    if let Some(path) = eval_pack_path {
453        if !artifacts.eval_pack_toml.trim().is_empty() {
454            crate::atomic_io::atomic_write(path, artifacts.eval_pack_toml.as_bytes()).map_err(
455                |error| {
456                    VmError::Runtime(format!(
457                        "failed to write eval pack {}: {error}",
458                        path.display()
459                    ))
460                },
461            )?;
462            artifacts.report.eval_pack_path = Some(path.display().to_string());
463            if let Some(candidate) = artifacts.report.candidates.first_mut() {
464                candidate.promotion.eval_pack_link = Some(path.display().to_string());
465            }
466        }
467    }
468
469    let report_json = serde_json::to_string_pretty(&artifacts.report)
470        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
471    crate::atomic_io::atomic_write(report_path, report_json.as_bytes()).map_err(|error| {
472        VmError::Runtime(format!(
473            "failed to write crystallization report {}: {error}",
474            report_path.display()
475        ))
476    })?;
477    Ok(artifacts.report)
478}
479
480fn collect_json_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), VmError> {
481    let entries = std::fs::read_dir(dir).map_err(|error| {
482        VmError::Runtime(format!(
483            "failed to read crystallization trace dir {}: {error}",
484            dir.display()
485        ))
486    })?;
487    for entry in entries {
488        let entry = entry.map_err(|error| {
489            VmError::Runtime(format!(
490                "failed to read entry in trace dir {}: {error}",
491                dir.display()
492            ))
493        })?;
494        let path = entry.path();
495        if path.is_dir() {
496            collect_json_paths(&path, out)?;
497        } else if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
498            out.push(path);
499        }
500    }
501    Ok(())
502}
503
504fn trace_from_run_record(run: RunRecord) -> CrystallizationTrace {
505    let mut actions = Vec::new();
506    for stage in &run.stages {
507        actions.push(CrystallizationAction {
508            id: stage.id.clone(),
509            kind: if stage.kind.is_empty() {
510                "stage".to_string()
511            } else {
512                stage.kind.clone()
513            },
514            name: stage.node_id.clone(),
515            timestamp: Some(stage.started_at.clone()),
516            output: stage.visible_text.as_ref().map(|text| json!(text)),
517            observed_output: stage.visible_text.as_ref().map(|text| json!(text)),
518            duration_ms: stage.usage.as_ref().map(|usage| usage.total_duration_ms),
519            cost: stage
520                .usage
521                .as_ref()
522                .map(|usage| CrystallizationCost {
523                    model_calls: usage.call_count,
524                    input_tokens: usage.input_tokens,
525                    output_tokens: usage.output_tokens,
526                    total_cost_usd: usage.total_cost,
527                    wall_ms: usage.total_duration_ms,
528                    model: usage.models.first().cloned(),
529                })
530                .unwrap_or_default(),
531            deterministic: Some(
532                stage
533                    .usage
534                    .as_ref()
535                    .map(|usage| usage.call_count == 0)
536                    .unwrap_or(true),
537            ),
538            fuzzy: Some(
539                stage
540                    .usage
541                    .as_ref()
542                    .is_some_and(|usage| usage.call_count > 0),
543            ),
544            metadata: stage.metadata.clone(),
545            ..CrystallizationAction::default()
546        });
547    }
548    for tool in &run.tool_recordings {
549        actions.push(CrystallizationAction {
550            id: tool.tool_use_id.clone(),
551            kind: "tool_call".to_string(),
552            name: tool.tool_name.clone(),
553            timestamp: Some(tool.timestamp.clone()),
554            output: Some(json!(tool.result)),
555            observed_output: Some(json!(tool.result)),
556            duration_ms: Some(tool.duration_ms as i64),
557            deterministic: Some(!tool.is_rejected),
558            fuzzy: Some(false),
559            metadata: BTreeMap::from([
560                ("args_hash".to_string(), json!(tool.args_hash)),
561                ("iteration".to_string(), json!(tool.iteration)),
562                ("is_rejected".to_string(), json!(tool.is_rejected)),
563            ]),
564            ..CrystallizationAction::default()
565        });
566    }
567    for question in &run.hitl_questions {
568        actions.push(CrystallizationAction {
569            id: question.request_id.clone(),
570            kind: "human_approval".to_string(),
571            name: question.agent.clone(),
572            timestamp: Some(question.asked_at.clone()),
573            approval: Some(CrystallizationApproval {
574                prompt: Some(question.prompt.clone()),
575                required: true,
576                boundary: Some("hitl".to_string()),
577                ..CrystallizationApproval::default()
578            }),
579            deterministic: Some(false),
580            fuzzy: Some(false),
581            metadata: question
582                .trace_id
583                .as_ref()
584                .map(|trace_id| BTreeMap::from([("trace_id".to_string(), json!(trace_id))]))
585                .unwrap_or_default(),
586            ..CrystallizationAction::default()
587        });
588    }
589    actions.sort_by(|left, right| left.timestamp.cmp(&right.timestamp));
590
591    CrystallizationTrace {
592        version: TRACE_SCHEMA_VERSION,
593        id: run.id.clone(),
594        workflow_id: Some(run.workflow_id.clone()),
595        started_at: Some(run.started_at.clone()),
596        finished_at: run.finished_at.clone(),
597        actions,
598        usage: run
599            .usage
600            .map(|usage| CrystallizationUsage {
601                model_calls: usage.call_count,
602                input_tokens: usage.input_tokens,
603                output_tokens: usage.output_tokens,
604                total_cost_usd: usage.total_cost,
605                wall_ms: usage.total_duration_ms,
606            })
607            .unwrap_or_default(),
608        metadata: run.metadata.clone(),
609        ..CrystallizationTrace::default()
610    }
611}
612
613fn normalize_trace(mut trace: CrystallizationTrace) -> CrystallizationTrace {
614    if trace.version == 0 {
615        trace.version = TRACE_SCHEMA_VERSION;
616    }
617    if trace.id.trim().is_empty() {
618        trace.id = new_id("trace");
619    }
620    if trace.source_hash.is_none() {
621        let payload = serde_json::to_vec(&trace.actions).unwrap_or_default();
622        trace.source_hash = Some(hash_bytes(&payload));
623    }
624    for (idx, action) in trace.actions.iter_mut().enumerate() {
625        if action.id.trim().is_empty() {
626            action.id = format!("action_{}", idx + 1);
627        }
628        if action.kind.trim().is_empty() {
629            action.kind = "action".to_string();
630        }
631        if action.name.trim().is_empty() {
632            action.name = action.kind.clone();
633        }
634        action.capabilities.sort();
635        action.capabilities.dedup();
636        action.required_secrets.sort();
637        action.required_secrets.dedup();
638        action.side_effects = sorted_side_effects(std::mem::take(&mut action.side_effects));
639        if action.cost.model_calls == 0 && action.kind == "model_call" {
640            action.cost.model_calls = 1;
641        }
642    }
643    if trace.usage == CrystallizationUsage::default() {
644        for action in &trace.actions {
645            trace.usage.model_calls += action.cost.model_calls;
646            trace.usage.input_tokens += action.cost.input_tokens;
647            trace.usage.output_tokens += action.cost.output_tokens;
648            trace.usage.total_cost_usd += action.cost.total_cost_usd;
649            trace.usage.wall_ms += action.cost.wall_ms + action.duration_ms.unwrap_or_default();
650        }
651    }
652    trace
653}
654
655fn mine_candidates(
656    traces: &[CrystallizationTrace],
657    min_examples: usize,
658    options: &CrystallizeOptions,
659) -> Vec<WorkflowCandidate> {
660    let signatures = traces
661        .iter()
662        .map(|trace| {
663            trace
664                .actions
665                .iter()
666                .map(action_signature)
667                .collect::<Vec<_>>()
668        })
669        .collect::<Vec<_>>();
670    let Some((sequence, examples)) = best_repeated_sequence(&signatures, min_examples) else {
671        return Vec::new();
672    };
673
674    let mut example_refs = Vec::new();
675    for (trace_index, start_index) in &examples {
676        let trace = &traces[*trace_index];
677        example_refs.push(WorkflowCandidateExample {
678            trace_id: trace.id.clone(),
679            source_hash: trace.source_hash.clone().unwrap_or_default(),
680            start_index: *start_index,
681            action_ids: trace.actions[*start_index..*start_index + sequence.len()]
682                .iter()
683                .map(|action| action.id.clone())
684                .collect(),
685        });
686    }
687
688    let mut steps = Vec::new();
689    let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
690    let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
691    let mut rejection_reasons = Vec::new();
692    let mut warnings = Vec::new();
693
694    for step_index in 0..sequence.len() {
695        let actions = examples
696            .iter()
697            .map(|(trace_index, start_index)| {
698                &traces[*trace_index].actions[*start_index + step_index]
699            })
700            .collect::<Vec<_>>();
701        let first = actions[0];
702        if !compatible_side_effects(&actions) {
703            rejection_reasons.push(format!(
704                "step {} '{}' has divergent side effects across examples",
705                step_index + 1,
706                first.name
707            ));
708        }
709
710        let mut parameter_refs = BTreeSet::new();
711        for action in &actions {
712            for (name, value) in &action.parameters {
713                if is_scalar(value) {
714                    parameter_values
715                        .entry(sanitize_identifier(name))
716                        .or_default()
717                        .insert(json_scalar_string(value));
718                    parameter_paths
719                        .entry(sanitize_identifier(name))
720                        .or_default()
721                        .insert(format!("steps[{step_index}].parameters.{name}"));
722                    parameter_refs.insert(sanitize_identifier(name));
723                }
724            }
725        }
726        collect_varying_parameters(
727            &actions,
728            "inputs",
729            |action| &action.inputs,
730            &mut parameter_values,
731            &mut parameter_paths,
732            &mut parameter_refs,
733        );
734
735        let fuzzy = first.fuzzy.unwrap_or(false)
736            || first.kind == "model_call"
737            || actions.iter().any(|action| action.fuzzy.unwrap_or(false));
738        if fuzzy {
739            warnings.push(format!(
740                "step {} '{}' remains fuzzy and requires review/LLM handling",
741                step_index + 1,
742                first.name
743            ));
744        }
745
746        steps.push(WorkflowCandidateStep {
747            index: step_index + 1,
748            kind: first.kind.clone(),
749            name: first.name.clone(),
750            segment: if fuzzy {
751                SegmentKind::Fuzzy
752            } else {
753                SegmentKind::Deterministic
754            },
755            parameter_refs: parameter_refs.into_iter().collect(),
756            constants: constants_for_action(first),
757            preconditions: preconditions_for_action(first),
758            side_effects: first.side_effects.clone(),
759            capabilities: sorted_strings(first.capabilities.iter().cloned()),
760            required_secrets: sorted_strings(first.required_secrets.iter().cloned()),
761            approval: first.approval.clone(),
762            expected_output: stable_expected_output(&actions),
763            review_notes: Vec::new(),
764        });
765    }
766
767    let parameters = parameter_values
768        .iter()
769        .map(|(name, values)| WorkflowCandidateParameter {
770            name: name.clone(),
771            source_paths: parameter_paths
772                .get(name)
773                .map(|paths| paths.iter().cloned().collect())
774                .unwrap_or_default(),
775            examples: values.iter().take(5).cloned().collect(),
776            required: true,
777        })
778        .collect::<Vec<_>>();
779
780    let capabilities = sorted_strings(
781        steps
782            .iter()
783            .flat_map(|step| step.capabilities.iter().cloned()),
784    );
785    let required_secrets = sorted_strings(
786        steps
787            .iter()
788            .flat_map(|step| step.required_secrets.iter().cloned()),
789    );
790    let approval_points = steps
791        .iter()
792        .filter_map(|step| step.approval.clone())
793        .collect::<Vec<_>>();
794    let side_effects = sorted_side_effects(
795        steps
796            .iter()
797            .flat_map(|step| step.side_effects.iter().cloned())
798            .collect(),
799    );
800    let expected_outputs = steps
801        .iter()
802        .filter_map(|step| step.expected_output.clone())
803        .collect::<Vec<_>>();
804    let savings = estimate_savings(traces, &examples, &steps);
805    let confidence = confidence_for(
806        &examples,
807        traces.len(),
808        &steps,
809        rejection_reasons.is_empty(),
810    );
811    let name = options
812        .workflow_name
813        .clone()
814        .unwrap_or_else(|| infer_workflow_name(&steps));
815    let package_name = options
816        .package_name
817        .clone()
818        .unwrap_or_else(|| name.replace('_', "-"));
819
820    vec![WorkflowCandidate {
821        id: stable_candidate_id(&sequence, &example_refs),
822        name,
823        confidence,
824        sequence_signature: sequence,
825        parameters,
826        steps,
827        examples: example_refs.clone(),
828        capabilities: capabilities.clone(),
829        required_secrets: required_secrets.clone(),
830        approval_points,
831        side_effects,
832        expected_outputs,
833        warnings,
834        rejection_reasons,
835        promotion: PromotionMetadata {
836            source_trace_hashes: example_refs
837                .iter()
838                .map(|example| example.source_hash.clone())
839                .collect(),
840            author: options.author.clone(),
841            approver: options.approver.clone(),
842            created_at: now_rfc3339(),
843            version: "0.1.0".to_string(),
844            package_name,
845            capability_set: capabilities,
846            secrets_required: required_secrets,
847            rollback_target: Some("keep source traces and previous package version".to_string()),
848            eval_pack_link: options.eval_pack_link.clone(),
849        },
850        savings,
851        shadow: ShadowRunReport::default(),
852    }]
853}
854
855fn best_repeated_sequence(
856    signatures: &[Vec<String>],
857    min_examples: usize,
858) -> Option<RepeatedSequence> {
859    let mut occurrences: BTreeMap<Vec<String>, Vec<(usize, usize)>> = BTreeMap::new();
860    for (trace_index, trace_signatures) in signatures.iter().enumerate() {
861        for start in 0..trace_signatures.len() {
862            let max_len = (trace_signatures.len() - start).min(12);
863            for len in 2..=max_len {
864                let sequence = trace_signatures[start..start + len].to_vec();
865                occurrences
866                    .entry(sequence)
867                    .or_default()
868                    .push((trace_index, start));
869            }
870        }
871    }
872
873    occurrences
874        .into_iter()
875        .filter_map(|(sequence, positions)| {
876            let mut seen = BTreeSet::new();
877            let mut examples = Vec::new();
878            for (trace_index, start) in positions {
879                if seen.insert(trace_index) {
880                    examples.push((trace_index, start));
881                }
882            }
883            if examples.len() >= min_examples {
884                Some((sequence, examples))
885            } else {
886                None
887            }
888        })
889        .max_by(
890            |(left_sequence, left_examples), (right_sequence, right_examples)| {
891                left_examples
892                    .len()
893                    .cmp(&right_examples.len())
894                    .then_with(|| left_sequence.len().cmp(&right_sequence.len()))
895            },
896        )
897}
898
899fn action_signature(action: &CrystallizationAction) -> String {
900    let mut parameter_keys = action.parameters.keys().cloned().collect::<Vec<_>>();
901    parameter_keys.sort();
902    format!(
903        "{}:{}:{}",
904        action.kind,
905        action.name,
906        parameter_keys.join(",")
907    )
908}
909
910fn compatible_side_effects(actions: &[&CrystallizationAction]) -> bool {
911    let first = sorted_side_effects(actions[0].side_effects.clone());
912    actions
913        .iter()
914        .skip(1)
915        .all(|action| sorted_side_effects(action.side_effects.clone()) == first)
916}
917
918fn collect_varying_parameters(
919    actions: &[&CrystallizationAction],
920    root: &str,
921    value_for: impl Fn(&CrystallizationAction) -> &JsonValue,
922    parameter_values: &mut BTreeMap<String, BTreeSet<String>>,
923    parameter_paths: &mut BTreeMap<String, BTreeSet<String>>,
924    parameter_refs: &mut BTreeSet<String>,
925) {
926    let mut paths = BTreeMap::<String, Vec<JsonValue>>::new();
927    for action in actions {
928        collect_scalar_paths(value_for(action), root, &mut paths);
929    }
930    for (path, values) in paths {
931        if values.len() != actions.len() {
932            continue;
933        }
934        let unique = values
935            .iter()
936            .map(json_scalar_string)
937            .collect::<BTreeSet<_>>();
938        if unique.len() < 2 {
939            continue;
940        }
941        let name = parameter_name_for_path(&path);
942        parameter_values
943            .entry(name.clone())
944            .or_default()
945            .extend(unique);
946        parameter_paths
947            .entry(name.clone())
948            .or_default()
949            .insert(path);
950        parameter_refs.insert(name);
951    }
952}
953
954fn collect_scalar_paths(
955    value: &JsonValue,
956    prefix: &str,
957    out: &mut BTreeMap<String, Vec<JsonValue>>,
958) {
959    match value {
960        JsonValue::Object(map) => {
961            for (key, child) in map {
962                collect_scalar_paths(child, &format!("{prefix}.{key}"), out);
963            }
964        }
965        JsonValue::Array(items) => {
966            for (idx, child) in items.iter().enumerate() {
967                collect_scalar_paths(child, &format!("{prefix}[{idx}]"), out);
968            }
969        }
970        _ if is_scalar(value) => {
971            out.entry(prefix.to_string())
972                .or_default()
973                .push(value.clone());
974        }
975        _ => {}
976    }
977}
978
979fn parameter_name_for_path(path: &str) -> String {
980    let lower = path.to_ascii_lowercase();
981    for (needle, name) in [
982        ("version", "version"),
983        ("repo_path", "repo_path"),
984        ("repo", "repo_path"),
985        ("branch_name", "branch_name"),
986        ("branch", "branch_name"),
987        ("release_target", "release_target"),
988        ("target", "release_target"),
989    ] {
990        if lower.contains(needle) {
991            return name.to_string();
992        }
993    }
994    let tail = path
995        .rsplit(['.', '['])
996        .next()
997        .unwrap_or("param")
998        .trim_end_matches(']');
999    sanitize_identifier(tail)
1000}
1001
1002fn constants_for_action(action: &CrystallizationAction) -> BTreeMap<String, JsonValue> {
1003    let mut constants = BTreeMap::new();
1004    constants.insert("kind".to_string(), json!(action.kind));
1005    constants.insert("name".to_string(), json!(action.name));
1006    if action.deterministic.unwrap_or(false) {
1007        constants.insert("deterministic".to_string(), json!(true));
1008    }
1009    constants
1010}
1011
1012fn preconditions_for_action(action: &CrystallizationAction) -> Vec<String> {
1013    let mut out = Vec::new();
1014    for capability in &action.capabilities {
1015        out.push(format!("capability '{capability}' is available"));
1016    }
1017    for secret in &action.required_secrets {
1018        out.push(format!("secret '{secret}' is configured"));
1019    }
1020    if let Some(approval) = &action.approval {
1021        if approval.required {
1022            out.push("human approval boundary is preserved".to_string());
1023        }
1024    }
1025    out
1026}
1027
1028fn stable_expected_output(actions: &[&CrystallizationAction]) -> Option<JsonValue> {
1029    let first = actions[0]
1030        .observed_output
1031        .as_ref()
1032        .or(actions[0].output.as_ref())?;
1033    if actions
1034        .iter()
1035        .all(|action| action.observed_output.as_ref().or(action.output.as_ref()) == Some(first))
1036    {
1037        Some(first.clone())
1038    } else {
1039        None
1040    }
1041}
1042
1043fn shadow_candidate(
1044    candidate: &WorkflowCandidate,
1045    traces: &[CrystallizationTrace],
1046) -> ShadowRunReport {
1047    let mut failures = Vec::new();
1048    let mut results = Vec::new();
1049    for example in &candidate.examples {
1050        let Some(trace) = traces.iter().find(|trace| trace.id == example.trace_id) else {
1051            failures.push(format!("missing source trace {}", example.trace_id));
1052            continue;
1053        };
1054        let mut details = Vec::new();
1055        let end = example.start_index + candidate.steps.len();
1056        if end > trace.actions.len() {
1057            details.push("candidate sequence extends past trace action list".to_string());
1058        } else {
1059            let signatures = trace.actions[example.start_index..end]
1060                .iter()
1061                .map(action_signature)
1062                .collect::<Vec<_>>();
1063            if signatures != candidate.sequence_signature {
1064                details.push("action sequence signature changed".to_string());
1065            }
1066            for (offset, step) in candidate.steps.iter().enumerate() {
1067                let action = &trace.actions[example.start_index + offset];
1068                if sorted_side_effects(action.side_effects.clone()) != step.side_effects {
1069                    details.push(format!(
1070                        "step {} side effects differ for action {}",
1071                        step.index, action.id
1072                    ));
1073                }
1074                if action.approval.as_ref().map(|approval| approval.required)
1075                    != step.approval.as_ref().map(|approval| approval.required)
1076                {
1077                    details.push(format!("step {} approval boundary differs", step.index));
1078                }
1079                if step.segment == SegmentKind::Deterministic {
1080                    if let Some(expected) = &step.expected_output {
1081                        let actual = action.observed_output.as_ref().or(action.output.as_ref());
1082                        if actual != Some(expected) {
1083                            details
1084                                .push(format!("step {} deterministic output differs", step.index));
1085                        }
1086                    }
1087                }
1088            }
1089        }
1090        let pass = details.is_empty();
1091        if !pass {
1092            failures.push(format!("trace {} failed shadow comparison", trace.id));
1093        }
1094        results.push(ShadowTraceResult {
1095            trace_id: trace.id.clone(),
1096            pass,
1097            details,
1098        });
1099    }
1100    ShadowRunReport {
1101        pass: failures.is_empty(),
1102        compared_traces: results.len(),
1103        failures,
1104        traces: results,
1105    }
1106}
1107
1108fn estimate_savings(
1109    traces: &[CrystallizationTrace],
1110    examples: &[(usize, usize)],
1111    steps: &[WorkflowCandidateStep],
1112) -> SavingsEstimate {
1113    let mut estimate = SavingsEstimate::default();
1114    for (trace_index, start_index) in examples {
1115        let trace = &traces[*trace_index];
1116        for action in &trace.actions[*start_index..*start_index + steps.len()] {
1117            if action.kind == "model_call" || action.fuzzy.unwrap_or(false) {
1118                estimate.remaining_model_calls += action.cost.model_calls.max(1);
1119            } else {
1120                estimate.model_calls_avoided += action.cost.model_calls;
1121                estimate.input_tokens_avoided += action.cost.input_tokens;
1122                estimate.output_tokens_avoided += action.cost.output_tokens;
1123                estimate.estimated_cost_usd_avoided += action.cost.total_cost_usd;
1124                estimate.wall_ms_avoided +=
1125                    action.cost.wall_ms + action.duration_ms.unwrap_or_default();
1126            }
1127        }
1128    }
1129    estimate.cpu_runtime_cost_usd = 0.0;
1130    estimate
1131}
1132
1133fn confidence_for(
1134    examples: &[(usize, usize)],
1135    trace_count: usize,
1136    steps: &[WorkflowCandidateStep],
1137    safe: bool,
1138) -> f64 {
1139    if !safe || trace_count == 0 {
1140        return 0.0;
1141    }
1142    let coverage = examples.len() as f64 / trace_count as f64;
1143    let deterministic = steps
1144        .iter()
1145        .filter(|step| step.segment == SegmentKind::Deterministic)
1146        .count() as f64
1147        / steps.len().max(1) as f64;
1148    ((coverage * 0.65) + (deterministic * 0.35)).min(0.99)
1149}
1150
1151fn infer_workflow_name(steps: &[WorkflowCandidateStep]) -> String {
1152    let names = steps
1153        .iter()
1154        .map(|step| step.name.to_ascii_lowercase())
1155        .collect::<Vec<_>>()
1156        .join("_");
1157    if names.contains("version") || names.contains("release") {
1158        "crystallized_version_bump".to_string()
1159    } else {
1160        "crystallized_workflow".to_string()
1161    }
1162}
1163
1164pub fn generate_harn_code(candidate: &WorkflowCandidate) -> String {
1165    let mut out = String::new();
1166    let params = if candidate.parameters.is_empty() {
1167        "task".to_string()
1168    } else {
1169        candidate
1170            .parameters
1171            .iter()
1172            .map(|parameter| parameter.name.as_str())
1173            .collect::<Vec<_>>()
1174            .join(", ")
1175    };
1176    writeln!(out, "/**").unwrap();
1177    writeln!(
1178        out,
1179        " * Generated by harn crystallize. Review before promotion."
1180    )
1181    .unwrap();
1182    writeln!(out, " * Candidate: {}", candidate.id).unwrap();
1183    writeln!(
1184        out,
1185        " * Source trace hashes: {}",
1186        candidate.promotion.source_trace_hashes.join(", ")
1187    )
1188    .unwrap();
1189    writeln!(
1190        out,
1191        " * Capabilities: {}",
1192        if candidate.capabilities.is_empty() {
1193            "none".to_string()
1194        } else {
1195            candidate.capabilities.join(", ")
1196        }
1197    )
1198    .unwrap();
1199    writeln!(
1200        out,
1201        " * Required secrets: {}",
1202        if candidate.required_secrets.is_empty() {
1203            "none".to_string()
1204        } else {
1205            candidate.required_secrets.join(", ")
1206        }
1207    )
1208    .unwrap();
1209    writeln!(out, " */").unwrap();
1210    writeln!(out, "pipeline {}({}) {{", candidate.name, params).unwrap();
1211    writeln!(out, "  let review_warnings = []").unwrap();
1212    for step in &candidate.steps {
1213        writeln!(out, "  // Step {}: {} {}", step.index, step.kind, step.name).unwrap();
1214        for side_effect in &step.side_effects {
1215            writeln!(
1216                out,
1217                "  // side_effect: {} {}",
1218                side_effect.kind, side_effect.target
1219            )
1220            .unwrap();
1221        }
1222        if let Some(approval) = &step.approval {
1223            if approval.required {
1224                writeln!(
1225                    out,
1226                    "  // approval_required: {}",
1227                    approval
1228                        .boundary
1229                        .clone()
1230                        .unwrap_or_else(|| "human_review".to_string())
1231                )
1232                .unwrap();
1233            }
1234        }
1235        if step.segment == SegmentKind::Fuzzy {
1236            writeln!(
1237                out,
1238                "  // TODO: fuzzy segment still needs LLM/reviewer handling before deterministic promotion."
1239            )
1240            .unwrap();
1241            writeln!(
1242                out,
1243                "  review_warnings.push(\"fuzzy step: {}\")",
1244                escape_harn_string(&step.name)
1245            )
1246            .unwrap();
1247        }
1248        writeln!(
1249            out,
1250            "  log(\"crystallized step {}: {}\")",
1251            step.index,
1252            escape_harn_string(&step.name)
1253        )
1254        .unwrap();
1255    }
1256    writeln!(
1257        out,
1258        "  return {{status: \"shadow_ready\", candidate_id: \"{}\", review_warnings: review_warnings}}",
1259        escape_harn_string(&candidate.id)
1260    )
1261    .unwrap();
1262    writeln!(out, "}}").unwrap();
1263    out
1264}
1265
1266fn rejected_workflow_stub(rejected: &[WorkflowCandidate]) -> String {
1267    let mut out = String::new();
1268    writeln!(
1269        out,
1270        "// Generated by harn crystallize. No safe candidate was proposed."
1271    )
1272    .unwrap();
1273    writeln!(out, "pipeline crystallized_workflow(task) {{").unwrap();
1274    writeln!(out, "  log(\"no safe crystallization candidate\")").unwrap();
1275    writeln!(
1276        out,
1277        "  return {{status: \"rejected\", rejected_candidates: {}}}",
1278        rejected.len()
1279    )
1280    .unwrap();
1281    writeln!(out, "}}").unwrap();
1282    out
1283}
1284
1285pub fn generate_eval_pack(candidate: &WorkflowCandidate) -> String {
1286    let mut out = String::new();
1287    writeln!(out, "version = 1").unwrap();
1288    writeln!(
1289        out,
1290        "id = \"{}-crystallization\"",
1291        candidate.promotion.package_name
1292    )
1293    .unwrap();
1294    writeln!(
1295        out,
1296        "name = \"{} crystallization shadow evals\"",
1297        candidate.name
1298    )
1299    .unwrap();
1300    writeln!(out).unwrap();
1301    writeln!(out, "[package]").unwrap();
1302    writeln!(out, "name = \"{}\"", candidate.promotion.package_name).unwrap();
1303    writeln!(out, "version = \"{}\"", candidate.promotion.version).unwrap();
1304    writeln!(out).unwrap();
1305    for example in &candidate.examples {
1306        writeln!(out, "[[fixtures]]").unwrap();
1307        writeln!(out, "id = \"{}\"", example.trace_id).unwrap();
1308        writeln!(out, "kind = \"jsonl-trace\"").unwrap();
1309        writeln!(out, "trace_id = \"{}\"", example.trace_id).unwrap();
1310        writeln!(out).unwrap();
1311    }
1312    writeln!(out, "[[rubrics]]").unwrap();
1313    writeln!(out, "id = \"shadow-determinism\"").unwrap();
1314    writeln!(out, "kind = \"deterministic\"").unwrap();
1315    writeln!(out).unwrap();
1316    writeln!(out, "[[rubrics.assertions]]").unwrap();
1317    writeln!(out, "kind = \"crystallization-shadow\"").unwrap();
1318    writeln!(
1319        out,
1320        "expected = {{ candidate_id = \"{}\", pass = true }}",
1321        candidate.id
1322    )
1323    .unwrap();
1324    writeln!(out).unwrap();
1325    writeln!(out, "[[cases]]").unwrap();
1326    writeln!(out, "id = \"{}-shadow\"", candidate.name).unwrap();
1327    writeln!(out, "name = \"{} shadow replay\"", candidate.name).unwrap();
1328    writeln!(out, "rubrics = [\"shadow-determinism\"]").unwrap();
1329    writeln!(out, "severity = \"blocking\"").unwrap();
1330    out
1331}
1332
1333fn stable_candidate_id(sequence: &[String], examples: &[WorkflowCandidateExample]) -> String {
1334    let mut hasher = Sha256::new();
1335    for item in sequence {
1336        hasher.update(item.as_bytes());
1337        hasher.update([0]);
1338    }
1339    for example in examples {
1340        hasher.update(example.source_hash.as_bytes());
1341        hasher.update([0]);
1342    }
1343    format!("candidate_{}", hex_prefix(hasher.finalize().as_slice(), 16))
1344}
1345
1346fn hash_bytes(bytes: &[u8]) -> String {
1347    let mut hasher = Sha256::new();
1348    hasher.update(bytes);
1349    format!("sha256:{}", hex::encode(hasher.finalize()))
1350}
1351
1352fn hex_prefix(bytes: &[u8], chars: usize) -> String {
1353    hex::encode(bytes).chars().take(chars).collect::<String>()
1354}
1355
1356fn sorted_strings(items: impl Iterator<Item = String>) -> Vec<String> {
1357    let mut set = items.collect::<BTreeSet<_>>();
1358    set.retain(|item| !item.trim().is_empty());
1359    set.into_iter().collect()
1360}
1361
1362fn sorted_side_effects(items: Vec<CrystallizationSideEffect>) -> Vec<CrystallizationSideEffect> {
1363    let mut items = items
1364        .into_iter()
1365        .filter(|item| !item.kind.trim().is_empty() || !item.target.trim().is_empty())
1366        .collect::<Vec<_>>();
1367    items.sort_by_key(side_effect_sort_key);
1368    items.dedup_by(|left, right| side_effect_sort_key(left) == side_effect_sort_key(right));
1369    items
1370}
1371
1372fn side_effect_sort_key(item: &CrystallizationSideEffect) -> String {
1373    format!(
1374        "{}\x1f{}\x1f{}\x1f{}",
1375        item.kind,
1376        item.target,
1377        item.capability.clone().unwrap_or_default(),
1378        item.mutation.clone().unwrap_or_default()
1379    )
1380}
1381
1382fn is_scalar(value: &JsonValue) -> bool {
1383    matches!(
1384        value,
1385        JsonValue::String(_) | JsonValue::Number(_) | JsonValue::Bool(_) | JsonValue::Null
1386    )
1387}
1388
1389fn json_scalar_string(value: &JsonValue) -> String {
1390    match value {
1391        JsonValue::String(value) => value.clone(),
1392        other => other.to_string(),
1393    }
1394}
1395
1396fn sanitize_identifier(raw: &str) -> String {
1397    let mut out = String::new();
1398    for (idx, ch) in raw.chars().enumerate() {
1399        if ch.is_ascii_alphanumeric() || ch == '_' {
1400            if idx == 0 && ch.is_ascii_digit() {
1401                out.push('_');
1402            }
1403            out.push(ch.to_ascii_lowercase());
1404        } else if !out.ends_with('_') {
1405            out.push('_');
1406        }
1407    }
1408    let trimmed = out.trim_matches('_').to_string();
1409    if trimmed.is_empty() {
1410        "param".to_string()
1411    } else {
1412        trimmed
1413    }
1414}
1415
1416fn escape_harn_string(value: &str) -> String {
1417    value.replace('\\', "\\\\").replace('"', "\\\"")
1418}
1419
1420// ===== Crystallization bundle =====
1421//
1422// A bundle is a directory layout that Harn writes and Harn Cloud (or any
1423// other importer) reads without bespoke glue. The contract is:
1424//
1425//   bundle/
1426//     candidate.json        # versioned manifest documented below
1427//     workflow.harn         # generated/reviewable workflow code
1428//     report.json           # full mining/shadow/eval report
1429//     harn.eval.toml        # generated eval pack when available (optional)
1430//     fixtures/             # redacted replay fixtures referenced by the
1431//                           # report (optional, only when --bundle is used
1432//                           # with `harn crystallize` and traces were on disk)
1433//
1434// `candidate.json` is the authoritative manifest. It must include the
1435// `schema` and `schema_version` markers. Cloud importers MUST reject any
1436// bundle whose `schema` is not exactly `harn.crystallization.candidate.bundle`
1437// or whose `schema_version` is greater than the highest version they
1438// understand. Only the documented additive fields may be added without
1439// bumping `schema_version`.
1440
1441#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1442#[serde(default)]
1443pub struct BundleGenerator {
1444    pub tool: String,
1445    pub version: String,
1446}
1447
1448impl Default for BundleGenerator {
1449    fn default() -> Self {
1450        Self {
1451            tool: "harn".to_string(),
1452            version: env!("CARGO_PKG_VERSION").to_string(),
1453        }
1454    }
1455}
1456
1457#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1458#[serde(default)]
1459pub struct BundleWorkflowRef {
1460    /// Relative path inside the bundle directory.
1461    pub path: String,
1462    /// Short identifier used in `pipeline NAME(...)`.
1463    pub name: String,
1464    /// Logical package name promotion uses to register the workflow.
1465    pub package_name: String,
1466    /// Initial workflow version proposed for promotion.
1467    pub package_version: String,
1468}
1469
1470#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1471#[serde(default)]
1472pub struct BundleSourceTrace {
1473    pub trace_id: String,
1474    pub source_hash: String,
1475    /// Optional human-visible URL (PR, issue, run record path) for the
1476    /// trace. `None` when the trace was loaded from an in-memory store.
1477    pub source_url: Option<String>,
1478    /// Optional cloud-side receipt id when the trace was already promoted
1479    /// into a tenant receipt. Cloud importers use this to wire candidate
1480    /// evidence to existing receipts without round-tripping the raw payload.
1481    pub source_receipt_id: Option<String>,
1482    /// Relative path of the redacted fixture inside the bundle, if one
1483    /// was emitted.
1484    pub fixture_path: Option<String>,
1485}
1486
1487#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1488#[serde(default)]
1489pub struct BundleStep {
1490    pub index: usize,
1491    pub kind: String,
1492    pub name: String,
1493    pub segment: SegmentKind,
1494    pub parameter_refs: Vec<String>,
1495    pub side_effects: Vec<CrystallizationSideEffect>,
1496    pub capabilities: Vec<String>,
1497    pub required_secrets: Vec<String>,
1498    pub approval: Option<CrystallizationApproval>,
1499    pub review_notes: Vec<String>,
1500}
1501
1502impl BundleStep {
1503    fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
1504        Self {
1505            index: step.index,
1506            kind: step.kind.clone(),
1507            name: step.name.clone(),
1508            segment: step.segment.clone(),
1509            parameter_refs: step.parameter_refs.clone(),
1510            side_effects: step.side_effects.clone(),
1511            capabilities: step.capabilities.clone(),
1512            required_secrets: step.required_secrets.clone(),
1513            approval: step.approval.clone(),
1514            review_notes: step.review_notes.clone(),
1515        }
1516    }
1517}
1518
1519#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1520#[serde(default)]
1521pub struct BundleEvalPackRef {
1522    /// Relative path of the eval pack inside the bundle directory.
1523    pub path: String,
1524    /// Optional external link the eval pack also lives at (e.g. a hosted
1525    /// `eval-pack://` URI when the bundle was promoted into a tenant).
1526    pub link: Option<String>,
1527}
1528
1529#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1530#[serde(default)]
1531pub struct BundleFixtureRef {
1532    pub path: String,
1533    pub trace_id: String,
1534    pub source_hash: String,
1535    pub redacted: bool,
1536}
1537
1538#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1539#[serde(default)]
1540pub struct BundlePromotion {
1541    pub owner: Option<String>,
1542    pub approver: Option<String>,
1543    pub author: Option<String>,
1544    /// Logical rollout strategy. Defaults to `shadow_then_canary`. Hosted
1545    /// surfaces may extend this enum but must keep existing values stable.
1546    pub rollout_policy: String,
1547    pub rollback_target: Option<String>,
1548    pub created_at: String,
1549    pub workflow_version: String,
1550    pub package_name: String,
1551}
1552
1553impl Default for BundlePromotion {
1554    fn default() -> Self {
1555        Self {
1556            owner: None,
1557            approver: None,
1558            author: None,
1559            rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
1560            rollback_target: None,
1561            created_at: String::new(),
1562            workflow_version: String::new(),
1563            package_name: String::new(),
1564        }
1565    }
1566}
1567
1568#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1569#[serde(default)]
1570pub struct BundleRedactionSummary {
1571    pub applied: bool,
1572    pub rules: Vec<String>,
1573    pub summary: String,
1574    /// Number of fixture files copied into the bundle (0 when no fixture
1575    /// directory was emitted).
1576    pub fixture_count: usize,
1577}
1578
1579#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1580#[serde(default)]
1581pub struct CrystallizationBundleManifest {
1582    pub schema: String,
1583    pub schema_version: u32,
1584    pub generated_at: String,
1585    pub generator: BundleGenerator,
1586    pub kind: BundleKind,
1587    pub candidate_id: String,
1588    pub external_key: String,
1589    pub title: String,
1590    pub team: Option<String>,
1591    pub repo: Option<String>,
1592    pub risk_level: String,
1593    pub workflow: BundleWorkflowRef,
1594    pub source_trace_hashes: Vec<String>,
1595    pub source_traces: Vec<BundleSourceTrace>,
1596    pub deterministic_steps: Vec<BundleStep>,
1597    pub fuzzy_steps: Vec<BundleStep>,
1598    pub side_effects: Vec<CrystallizationSideEffect>,
1599    pub capabilities: Vec<String>,
1600    pub required_secrets: Vec<String>,
1601    pub savings: SavingsEstimate,
1602    pub shadow: ShadowRunReport,
1603    pub eval_pack: Option<BundleEvalPackRef>,
1604    pub fixtures: Vec<BundleFixtureRef>,
1605    pub promotion: BundlePromotion,
1606    pub redaction: BundleRedactionSummary,
1607    pub confidence: f64,
1608    pub rejection_reasons: Vec<String>,
1609    pub warnings: Vec<String>,
1610}
1611
1612#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1613#[serde(rename_all = "snake_case")]
1614pub enum BundleKind {
1615    /// A normal candidate that passed shadow comparison and is ready for
1616    /// review and promotion.
1617    #[default]
1618    Candidate,
1619    /// A "plan-only" candidate: every step has a side-effect-free, in-process
1620    /// outcome (e.g. classify and write a receipt). Cloud importers can
1621    /// promote these without explicit external-side-effect approval.
1622    PlanOnly,
1623    /// No safe candidate was selected. The bundle still records what was
1624    /// attempted, the rejection reasons, and any rejected candidates so
1625    /// reviewers can debug or feed it back into mining.
1626    Rejected,
1627}
1628
1629#[derive(Clone, Debug, Default)]
1630pub struct BundleOptions {
1631    /// Stable identifier downstream cloud importers use to dedupe bundles
1632    /// across runs (defaults to a sanitized workflow name).
1633    pub external_key: Option<String>,
1634    pub title: Option<String>,
1635    pub team: Option<String>,
1636    pub repo: Option<String>,
1637    pub risk_level: Option<String>,
1638    pub rollout_policy: Option<String>,
1639}
1640
1641#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1642#[serde(default)]
1643pub struct CrystallizationBundle {
1644    pub manifest: CrystallizationBundleManifest,
1645    pub report: CrystallizationReport,
1646    pub harn_code: String,
1647    pub eval_pack_toml: String,
1648    pub fixtures: Vec<CrystallizationTrace>,
1649}
1650
1651/// Errors surfaced when validating a bundle on disk.
1652#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1653#[serde(default)]
1654pub struct BundleValidation {
1655    pub bundle_dir: String,
1656    pub schema: String,
1657    pub schema_version: u32,
1658    pub kind: BundleKind,
1659    pub candidate_id: String,
1660    pub manifest_ok: bool,
1661    pub workflow_ok: bool,
1662    pub report_ok: bool,
1663    pub eval_pack_ok: bool,
1664    pub fixtures_ok: bool,
1665    pub redaction_ok: bool,
1666    pub problems: Vec<String>,
1667}
1668
1669impl BundleValidation {
1670    pub fn is_ok(&self) -> bool {
1671        self.problems.is_empty()
1672    }
1673}
1674
1675/// Build an in-memory bundle from already-mined artifacts. The traces
1676/// passed here are the same normalized traces used to mine the candidate;
1677/// they will be redacted before being attached as fixtures.
1678pub fn build_crystallization_bundle(
1679    artifacts: CrystallizationArtifacts,
1680    traces: &[CrystallizationTrace],
1681    options: BundleOptions,
1682) -> Result<CrystallizationBundle, VmError> {
1683    let CrystallizationArtifacts {
1684        report,
1685        harn_code,
1686        eval_pack_toml,
1687    } = artifacts;
1688
1689    let (selected, kind) = match report
1690        .selected_candidate_id
1691        .as_deref()
1692        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
1693    {
1694        Some(candidate) => {
1695            let kind = if candidate_is_plan_only(candidate) {
1696                BundleKind::PlanOnly
1697            } else {
1698                BundleKind::Candidate
1699            };
1700            (Some(candidate), kind)
1701        }
1702        None => (None, BundleKind::Rejected),
1703    };
1704
1705    let workflow_name = selected
1706        .map(|candidate| candidate.name.clone())
1707        .unwrap_or_else(|| "crystallized_workflow".to_string());
1708    let package_name = selected
1709        .map(|candidate| candidate.promotion.package_name.clone())
1710        .unwrap_or_else(|| workflow_name.replace('_', "-"));
1711    let workflow_version = selected
1712        .map(|candidate| candidate.promotion.version.clone())
1713        .unwrap_or_else(|| "0.0.0".to_string());
1714
1715    let manifest_workflow = BundleWorkflowRef {
1716        path: BUNDLE_WORKFLOW_FILE.to_string(),
1717        name: workflow_name.clone(),
1718        package_name: package_name.clone(),
1719        package_version: workflow_version.clone(),
1720    };
1721
1722    let external_key = options
1723        .external_key
1724        .clone()
1725        .filter(|key| !key.trim().is_empty())
1726        .unwrap_or_else(|| sanitize_external_key(&workflow_name));
1727    let title = options
1728        .title
1729        .clone()
1730        .filter(|title| !title.trim().is_empty())
1731        .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
1732    let risk_level = options
1733        .risk_level
1734        .clone()
1735        .filter(|risk| !risk.trim().is_empty())
1736        .unwrap_or_else(|| infer_risk_level(selected));
1737    let rollout_policy = options
1738        .rollout_policy
1739        .clone()
1740        .filter(|policy| !policy.trim().is_empty())
1741        .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
1742
1743    let (deterministic_steps, fuzzy_steps) = match selected {
1744        Some(candidate) => candidate
1745            .steps
1746            .iter()
1747            .map(BundleStep::from_candidate_step)
1748            .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
1749        None => (Vec::new(), Vec::new()),
1750    };
1751
1752    let source_trace_hashes = selected
1753        .map(|candidate| candidate.promotion.source_trace_hashes.clone())
1754        .unwrap_or_default();
1755
1756    let mut source_traces = Vec::new();
1757    let mut fixture_refs = Vec::new();
1758    let mut fixture_payloads = Vec::new();
1759    if let Some(candidate) = selected {
1760        for example in &candidate.examples {
1761            let trace = traces.iter().find(|trace| trace.id == example.trace_id);
1762            let fixture_relative = trace.map(|trace| {
1763                format!(
1764                    "{BUNDLE_FIXTURES_DIR}/{}.json",
1765                    sanitize_fixture_name(&trace.id)
1766                )
1767            });
1768            source_traces.push(BundleSourceTrace {
1769                trace_id: example.trace_id.clone(),
1770                source_hash: example.source_hash.clone(),
1771                source_url: trace.and_then(|trace| trace.source.clone()),
1772                source_receipt_id: trace
1773                    .and_then(|trace| trace.metadata.get("source_receipt_id"))
1774                    .and_then(|value| value.as_str().map(str::to_string)),
1775                fixture_path: fixture_relative.clone(),
1776            });
1777            if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
1778                let mut redacted = trace.clone();
1779                redact_trace_for_bundle(&mut redacted);
1780                fixture_refs.push(BundleFixtureRef {
1781                    path: fixture_path,
1782                    trace_id: trace.id.clone(),
1783                    source_hash: trace.source_hash.clone().unwrap_or_default(),
1784                    redacted: true,
1785                });
1786                fixture_payloads.push(redacted);
1787            }
1788        }
1789    }
1790
1791    // Owner defaults to author so cloud importers always have a populated
1792    // ownership pointer, but stays separate from `author` so reviewers can
1793    // assign a different owner in the manifest before promotion.
1794    let author = selected.and_then(|candidate| candidate.promotion.author.clone());
1795    let promotion = BundlePromotion {
1796        owner: author.clone(),
1797        approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
1798        author,
1799        rollout_policy,
1800        rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
1801        created_at: now_rfc3339(),
1802        workflow_version,
1803        package_name: package_name.clone(),
1804    };
1805
1806    let redaction = BundleRedactionSummary {
1807        applied: !fixture_payloads.is_empty(),
1808        rules: vec![
1809            "sensitive_keys".to_string(),
1810            "secret_value_heuristic".to_string(),
1811        ],
1812        summary: if fixture_payloads.is_empty() {
1813            "no fixtures emitted".to_string()
1814        } else {
1815            "fixture payloads scrubbed of secret-like values and sensitive keys before write"
1816                .to_string()
1817        },
1818        fixture_count: fixture_payloads.len(),
1819    };
1820
1821    let eval_pack = if eval_pack_toml.trim().is_empty() {
1822        None
1823    } else {
1824        Some(BundleEvalPackRef {
1825            path: BUNDLE_EVAL_PACK_FILE.to_string(),
1826            link: selected
1827                .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
1828                .filter(|link| !link.trim().is_empty()),
1829        })
1830    };
1831
1832    let manifest = CrystallizationBundleManifest {
1833        schema: BUNDLE_SCHEMA.to_string(),
1834        schema_version: BUNDLE_SCHEMA_VERSION,
1835        generated_at: now_rfc3339(),
1836        generator: BundleGenerator::default(),
1837        kind,
1838        candidate_id: selected
1839            .map(|candidate| candidate.id.clone())
1840            .unwrap_or_default(),
1841        external_key,
1842        title,
1843        team: options.team,
1844        repo: options.repo,
1845        risk_level,
1846        workflow: manifest_workflow,
1847        source_trace_hashes,
1848        source_traces,
1849        deterministic_steps,
1850        fuzzy_steps,
1851        side_effects: selected
1852            .map(|candidate| candidate.side_effects.clone())
1853            .unwrap_or_default(),
1854        capabilities: selected
1855            .map(|candidate| candidate.capabilities.clone())
1856            .unwrap_or_default(),
1857        required_secrets: selected
1858            .map(|candidate| candidate.required_secrets.clone())
1859            .unwrap_or_default(),
1860        savings: selected
1861            .map(|candidate| candidate.savings.clone())
1862            .unwrap_or_default(),
1863        shadow: selected
1864            .map(|candidate| candidate.shadow.clone())
1865            .unwrap_or_default(),
1866        eval_pack,
1867        fixtures: fixture_refs,
1868        promotion,
1869        redaction,
1870        confidence: selected
1871            .map(|candidate| candidate.confidence)
1872            .unwrap_or(0.0),
1873        rejection_reasons: report
1874            .rejected_candidates
1875            .iter()
1876            .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
1877            .collect(),
1878        warnings: report.warnings.clone(),
1879    };
1880
1881    Ok(CrystallizationBundle {
1882        manifest,
1883        report,
1884        harn_code,
1885        eval_pack_toml,
1886        fixtures: fixture_payloads,
1887    })
1888}
1889
1890/// Write a bundle to a directory. Creates the directory if it does not
1891/// already exist. Returns the manifest with `generated_at` and any
1892/// runtime-resolved metadata filled in.
1893pub fn write_crystallization_bundle(
1894    bundle: &CrystallizationBundle,
1895    bundle_dir: &Path,
1896) -> Result<CrystallizationBundleManifest, VmError> {
1897    std::fs::create_dir_all(bundle_dir).map_err(|error| {
1898        VmError::Runtime(format!(
1899            "failed to create bundle dir {}: {error}",
1900            bundle_dir.display()
1901        ))
1902    })?;
1903    write_bytes(
1904        &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
1905        bundle.harn_code.as_bytes(),
1906    )?;
1907    let report_json = serde_json::to_vec_pretty(&bundle.report)
1908        .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
1909    write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
1910
1911    if !bundle.eval_pack_toml.trim().is_empty() {
1912        write_bytes(
1913            &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
1914            bundle.eval_pack_toml.as_bytes(),
1915        )?;
1916    }
1917
1918    if !bundle.fixtures.is_empty() {
1919        let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
1920        std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
1921            VmError::Runtime(format!(
1922                "failed to create fixtures dir {}: {error}",
1923                fixtures_dir.display()
1924            ))
1925        })?;
1926        for trace in &bundle.fixtures {
1927            let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
1928            let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
1929                VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
1930            })?;
1931            write_bytes(&path, &payload)?;
1932        }
1933    }
1934
1935    let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
1936        .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
1937    write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
1938    Ok(bundle.manifest.clone())
1939}
1940
1941/// Read a bundle manifest from disk. Verifies the schema marker but does
1942/// not cross-check workflow/report/eval-pack sibling files; for a richer
1943/// check use [`validate_crystallization_bundle`].
1944pub fn load_crystallization_bundle_manifest(
1945    bundle_dir: &Path,
1946) -> Result<CrystallizationBundleManifest, VmError> {
1947    let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
1948    let bytes = std::fs::read(&manifest_path).map_err(|error| {
1949        VmError::Runtime(format!(
1950            "failed to read bundle manifest {}: {error}",
1951            manifest_path.display()
1952        ))
1953    })?;
1954    let manifest: CrystallizationBundleManifest =
1955        serde_json::from_slice(&bytes).map_err(|error| {
1956            VmError::Runtime(format!(
1957                "failed to decode bundle manifest {}: {error}",
1958                manifest_path.display()
1959            ))
1960        })?;
1961    if manifest.schema != BUNDLE_SCHEMA {
1962        return Err(VmError::Runtime(format!(
1963            "bundle {} has unrecognized schema {:?} (expected {})",
1964            bundle_dir.display(),
1965            manifest.schema,
1966            BUNDLE_SCHEMA
1967        )));
1968    }
1969    if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
1970        return Err(VmError::Runtime(format!(
1971            "bundle {} schema_version {} is newer than supported {}",
1972            bundle_dir.display(),
1973            manifest.schema_version,
1974            BUNDLE_SCHEMA_VERSION
1975        )));
1976    }
1977    Ok(manifest)
1978}
1979
1980/// Read every fixture trace referenced by the bundle manifest. Returns
1981/// the manifest plus loaded traces, in the order they appear in the
1982/// manifest. Fixtures with `path: None` are skipped.
1983pub fn load_crystallization_bundle(
1984    bundle_dir: &Path,
1985) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
1986    let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
1987    let mut traces = Vec::new();
1988    for fixture in &manifest.fixtures {
1989        let path = bundle_dir.join(&fixture.path);
1990        traces.push(load_crystallization_trace(&path)?);
1991    }
1992    Ok((manifest, traces))
1993}
1994
1995/// Validate a bundle directory layout and contents. Cheap enough to call
1996/// from a CLI smoke command; performs no live side effects.
1997pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
1998    let mut validation = BundleValidation {
1999        bundle_dir: bundle_dir.display().to_string(),
2000        ..BundleValidation::default()
2001    };
2002    let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
2003        Ok(manifest) => manifest,
2004        Err(error) => {
2005            validation.problems.push(error.to_string());
2006            return Ok(validation);
2007        }
2008    };
2009    validation.manifest_ok = true;
2010    validation.schema = manifest.schema.clone();
2011    validation.schema_version = manifest.schema_version;
2012    validation.kind = manifest.kind.clone();
2013    validation.candidate_id = manifest.candidate_id.clone();
2014
2015    let workflow_path = bundle_dir.join(&manifest.workflow.path);
2016    if workflow_path.exists() {
2017        validation.workflow_ok = true;
2018    } else {
2019        validation
2020            .problems
2021            .push(format!("missing workflow file {}", workflow_path.display()));
2022    }
2023
2024    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2025    match std::fs::read(&report_path) {
2026        Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
2027            Ok(report) => {
2028                validation.report_ok = true;
2029                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2030                    && manifest.candidate_id.is_empty()
2031                {
2032                    validation
2033                        .problems
2034                        .push("manifest is non-rejected but has empty candidate_id".to_string());
2035                }
2036                if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2037                    && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
2038                {
2039                    validation.problems.push(format!(
2040                        "report selected_candidate_id {:?} does not match manifest candidate_id {}",
2041                        report.selected_candidate_id, manifest.candidate_id
2042                    ));
2043                }
2044            }
2045            Err(error) => {
2046                validation
2047                    .problems
2048                    .push(format!("invalid report.json: {error}"));
2049            }
2050        },
2051        Err(error) => {
2052            validation.problems.push(format!(
2053                "missing report file {}: {error}",
2054                report_path.display()
2055            ));
2056        }
2057    }
2058
2059    if let Some(eval_pack) = &manifest.eval_pack {
2060        let path = bundle_dir.join(&eval_pack.path);
2061        if path.exists() {
2062            validation.eval_pack_ok = true;
2063        } else {
2064            validation.problems.push(format!(
2065                "manifest references eval pack {} but file is missing",
2066                path.display()
2067            ));
2068        }
2069    } else {
2070        validation.eval_pack_ok = true;
2071    }
2072
2073    let mut fixtures_problem = false;
2074    for fixture in &manifest.fixtures {
2075        let path = bundle_dir.join(&fixture.path);
2076        if !path.exists() {
2077            validation
2078                .problems
2079                .push(format!("missing fixture {}", path.display()));
2080            fixtures_problem = true;
2081            continue;
2082        }
2083        if !fixture.redacted {
2084            validation.problems.push(format!(
2085                "fixture {} is not marked redacted; bundle must not ship raw private payloads",
2086                fixture.path
2087            ));
2088            fixtures_problem = true;
2089        }
2090    }
2091    validation.fixtures_ok = !fixtures_problem;
2092
2093    if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
2094        validation
2095            .problems
2096            .push("redaction.applied is false but bundle includes fixtures".to_string());
2097    } else {
2098        validation.redaction_ok = true;
2099    }
2100    if !manifest
2101        .required_secrets
2102        .iter()
2103        .all(|secret| secret_id_looks_logical(secret))
2104    {
2105        validation.problems.push(
2106            "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
2107        );
2108    }
2109
2110    Ok(validation)
2111}
2112
2113/// Replay shadow comparison from a bundle: re-runs the deterministic
2114/// shadow check in-process against the bundle's redacted fixtures, with
2115/// no live side effects. Returns the manifest and the freshly computed
2116/// `ShadowRunReport`. The returned report is suitable for cloud import or
2117/// for asserting determinism in CI.
2118pub fn shadow_replay_bundle(
2119    bundle_dir: &Path,
2120) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
2121    let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
2122    let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2123    let bytes = std::fs::read(&report_path).map_err(|error| {
2124        VmError::Runtime(format!(
2125            "failed to read bundle report {}: {error}",
2126            report_path.display()
2127        ))
2128    })?;
2129    let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
2130        VmError::Runtime(format!(
2131            "failed to decode bundle report {}: {error}",
2132            report_path.display()
2133        ))
2134    })?;
2135    let candidate = report
2136        .selected_candidate_id
2137        .as_deref()
2138        .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2139        .ok_or_else(|| {
2140            VmError::Runtime(format!(
2141                "bundle {} has no selected candidate to replay",
2142                bundle_dir.display()
2143            ))
2144        })?;
2145    let shadow = shadow_candidate(candidate, &traces);
2146    Ok((manifest, shadow))
2147}
2148
2149fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
2150    crate::atomic_io::atomic_write(path, bytes)
2151        .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
2152}
2153
2154fn sanitize_fixture_name(raw: &str) -> String {
2155    let cleaned = raw
2156        .chars()
2157        .map(|ch| {
2158            if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
2159                ch
2160            } else {
2161                '_'
2162            }
2163        })
2164        .collect::<String>();
2165    if cleaned.trim_matches('_').is_empty() {
2166        "trace".to_string()
2167    } else {
2168        cleaned.trim_matches('_').to_string()
2169    }
2170}
2171
2172fn sanitize_external_key(raw: &str) -> String {
2173    let mut out = String::new();
2174    let mut prev_dash = false;
2175    for ch in raw.chars() {
2176        let lowered = ch.to_ascii_lowercase();
2177        if lowered.is_ascii_alphanumeric() {
2178            out.push(lowered);
2179            prev_dash = false;
2180        } else if !prev_dash && !out.is_empty() {
2181            out.push('-');
2182            prev_dash = true;
2183        }
2184    }
2185    let trimmed = out.trim_matches('-').to_string();
2186    if trimmed.is_empty() {
2187        "crystallized-workflow".to_string()
2188    } else {
2189        trimmed
2190    }
2191}
2192
2193fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
2194    if let Some(candidate) = candidate {
2195        format!(
2196            "{} ({} step{})",
2197            candidate.name,
2198            candidate.steps.len(),
2199            if candidate.steps.len() == 1 { "" } else { "s" }
2200        )
2201    } else {
2202        format!("rejected: {fallback_name}")
2203    }
2204}
2205
2206fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
2207    let Some(candidate) = candidate else {
2208        return "high".to_string();
2209    };
2210    let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
2211    let needs_secret = !candidate.required_secrets.is_empty();
2212    if touches_external && needs_secret {
2213        "high".to_string()
2214    } else if touches_external || needs_secret {
2215        "medium".to_string()
2216    } else {
2217        "low".to_string()
2218    }
2219}
2220
2221fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
2222    let kind = effect.kind.to_ascii_lowercase();
2223    if kind.is_empty() {
2224        return false;
2225    }
2226    // Plan-only side effects stay inside Harn's own data plane: they
2227    // write receipts, append to the in-process event log, or stash plans.
2228    // None of those touch tenant-external systems.
2229    let internal = kind.contains("receipt")
2230        || kind.contains("event_log")
2231        || kind.contains("memo")
2232        || kind.contains("plan");
2233    if internal {
2234        return false;
2235    }
2236    kind.contains("post")
2237        || kind.contains("write")
2238        || kind.contains("publish")
2239        || kind.contains("delete")
2240        || kind.contains("send")
2241}
2242
2243fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
2244    if candidate.steps.is_empty() {
2245        return false;
2246    }
2247    candidate.side_effects.iter().all(|effect| {
2248        let kind = effect.kind.to_ascii_lowercase();
2249        // Plan-only side effects stay inside Harn's own data plane: receipt
2250        // writes, in-memory event-log appends, file-only mutations, etc.
2251        kind.is_empty()
2252            || kind.contains("receipt")
2253            || kind.contains("event_log")
2254            || kind.contains("memo")
2255            || kind.contains("plan")
2256            || (kind.contains("file") && !kind.contains("publish"))
2257    })
2258}
2259
2260fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
2261    for action in &mut trace.actions {
2262        redact_bundle_value(&mut action.inputs);
2263        if let Some(output) = action.output.as_mut() {
2264            redact_bundle_value(output);
2265        }
2266        if let Some(observed) = action.observed_output.as_mut() {
2267            redact_bundle_value(observed);
2268        }
2269        for value in action.parameters.values_mut() {
2270            redact_bundle_value(value);
2271        }
2272        for (_, value) in action.metadata.iter_mut() {
2273            redact_bundle_value(value);
2274        }
2275    }
2276    for (_, value) in trace.metadata.iter_mut() {
2277        redact_bundle_value(value);
2278    }
2279}
2280
2281fn redact_bundle_value(value: &mut JsonValue) {
2282    match value {
2283        JsonValue::String(text) if looks_like_secret_value(text) => {
2284            *text = "[redacted]".to_string();
2285        }
2286        JsonValue::Array(items) => {
2287            for item in items {
2288                redact_bundle_value(item);
2289            }
2290        }
2291        JsonValue::Object(map) => {
2292            for (key, child) in map.iter_mut() {
2293                if is_sensitive_bundle_key(key) {
2294                    *child = JsonValue::String("[redacted]".to_string());
2295                } else {
2296                    redact_bundle_value(child);
2297                }
2298            }
2299        }
2300        _ => {}
2301    }
2302}
2303
2304fn is_sensitive_bundle_key(key: &str) -> bool {
2305    let lower = key.to_ascii_lowercase();
2306    lower.contains("secret")
2307        || lower.contains("token")
2308        || lower.contains("password")
2309        || lower.contains("api_key")
2310        || lower.contains("apikey")
2311        || lower == "authorization"
2312        || lower == "cookie"
2313        || lower == "set-cookie"
2314}
2315
2316fn looks_like_secret_value(value: &str) -> bool {
2317    let trimmed = value.trim();
2318    trimmed.starts_with("sk-")
2319        || trimmed.starts_with("ghp_")
2320        || trimmed.starts_with("ghs_")
2321        || trimmed.starts_with("xoxb-")
2322        || trimmed.starts_with("xoxp-")
2323        || trimmed.starts_with("AKIA")
2324        || (trimmed.len() > 48
2325            && trimmed
2326                .chars()
2327                .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
2328}
2329
2330fn secret_id_looks_logical(value: &str) -> bool {
2331    !looks_like_secret_value(value) && !value.trim().is_empty()
2332}
2333
2334#[cfg(test)]
2335mod tests {
2336    use super::*;
2337
2338    fn version_trace(
2339        id: &str,
2340        version: &str,
2341        side_target: &str,
2342        fuzzy: bool,
2343    ) -> CrystallizationTrace {
2344        CrystallizationTrace {
2345            id: id.to_string(),
2346            actions: vec![
2347                CrystallizationAction {
2348                    id: format!("{id}-branch"),
2349                    kind: "tool_call".to_string(),
2350                    name: "git.checkout_branch".to_string(),
2351                    parameters: BTreeMap::from([
2352                        ("repo_path".to_string(), json!(format!("/tmp/{id}"))),
2353                        (
2354                            "branch_name".to_string(),
2355                            json!(format!("release-{version}")),
2356                        ),
2357                    ]),
2358                    side_effects: vec![CrystallizationSideEffect {
2359                        kind: "git_ref".to_string(),
2360                        target: side_target.to_string(),
2361                        capability: Some("git.write".to_string()),
2362                        ..CrystallizationSideEffect::default()
2363                    }],
2364                    capabilities: vec!["git.write".to_string()],
2365                    deterministic: Some(true),
2366                    duration_ms: Some(20),
2367                    ..CrystallizationAction::default()
2368                },
2369                CrystallizationAction {
2370                    id: format!("{id}-manifest"),
2371                    kind: "file_mutation".to_string(),
2372                    name: "update_manifest_version".to_string(),
2373                    inputs: json!({"version": version, "path": "harn.toml"}),
2374                    parameters: BTreeMap::from([("version".to_string(), json!(version))]),
2375                    side_effects: vec![CrystallizationSideEffect {
2376                        kind: "file_write".to_string(),
2377                        target: "harn.toml".to_string(),
2378                        capability: Some("fs.write".to_string()),
2379                        ..CrystallizationSideEffect::default()
2380                    }],
2381                    capabilities: vec!["fs.write".to_string()],
2382                    deterministic: Some(true),
2383                    ..CrystallizationAction::default()
2384                },
2385                CrystallizationAction {
2386                    id: format!("{id}-release"),
2387                    kind: if fuzzy { "model_call" } else { "tool_call" }.to_string(),
2388                    name: "prepare_release_notes".to_string(),
2389                    inputs: json!({"release_target": "crates.io", "version": version}),
2390                    parameters: BTreeMap::from([
2391                        ("release_target".to_string(), json!("crates.io")),
2392                        ("version".to_string(), json!(version)),
2393                    ]),
2394                    fuzzy: Some(fuzzy),
2395                    deterministic: Some(!fuzzy),
2396                    cost: CrystallizationCost {
2397                        model_calls: if fuzzy { 1 } else { 0 },
2398                        input_tokens: if fuzzy { 1200 } else { 0 },
2399                        output_tokens: if fuzzy { 250 } else { 0 },
2400                        total_cost_usd: if fuzzy { 0.01 } else { 0.0 },
2401                        wall_ms: 3000,
2402                        ..CrystallizationCost::default()
2403                    },
2404                    ..CrystallizationAction::default()
2405                },
2406            ],
2407            ..CrystallizationTrace::default()
2408        }
2409    }
2410
2411    #[test]
2412    fn crystallizes_repeated_version_bump_with_parameters() {
2413        let traces = (0..5)
2414            .map(|idx| {
2415                version_trace(
2416                    &format!("trace_{idx}"),
2417                    &format!("0.7.{idx}"),
2418                    "release-branch",
2419                    false,
2420                )
2421            })
2422            .collect::<Vec<_>>();
2423
2424        let artifacts = crystallize_traces(
2425            traces,
2426            CrystallizeOptions {
2427                workflow_name: Some("version_bump".to_string()),
2428                ..CrystallizeOptions::default()
2429            },
2430        )
2431        .unwrap();
2432
2433        let candidate = &artifacts.report.candidates[0];
2434        assert!(candidate.rejection_reasons.is_empty());
2435        assert!(candidate.shadow.pass);
2436        assert_eq!(candidate.examples.len(), 5);
2437        let params = candidate
2438            .parameters
2439            .iter()
2440            .map(|param| param.name.as_str())
2441            .collect::<BTreeSet<_>>();
2442        assert!(params.contains("version"));
2443        assert!(params.contains("repo_path"));
2444        assert!(params.contains("branch_name"));
2445        assert!(artifacts.harn_code.contains("pipeline version_bump("));
2446        assert!(artifacts.eval_pack_toml.contains("crystallization-shadow"));
2447    }
2448
2449    #[test]
2450    fn rejects_divergent_side_effects() {
2451        let traces = vec![
2452            version_trace("trace_a", "0.7.1", "release-branch", false),
2453            version_trace("trace_b", "0.7.2", "main", false),
2454            version_trace("trace_c", "0.7.3", "release-branch", false),
2455        ];
2456
2457        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2458
2459        assert!(artifacts.report.candidates.is_empty());
2460        assert_eq!(artifacts.report.rejected_candidates.len(), 1);
2461        assert!(artifacts.report.rejected_candidates[0].rejection_reasons[0]
2462            .contains("divergent side effects"));
2463    }
2464
2465    #[test]
2466    fn preserves_remaining_fuzzy_segment() {
2467        let traces = (0..3)
2468            .map(|idx| {
2469                version_trace(
2470                    &format!("trace_{idx}"),
2471                    &format!("0.8.{idx}"),
2472                    "release-branch",
2473                    true,
2474                )
2475            })
2476            .collect::<Vec<_>>();
2477
2478        let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2479        let candidate = &artifacts.report.candidates[0];
2480
2481        assert!(candidate
2482            .steps
2483            .iter()
2484            .any(|step| step.segment == SegmentKind::Fuzzy));
2485        assert!(candidate.savings.remaining_model_calls > 0);
2486        assert!(artifacts.harn_code.contains("TODO: fuzzy segment"));
2487    }
2488
2489    fn plan_only_trace(id: &str, suffix: &str) -> CrystallizationTrace {
2490        CrystallizationTrace {
2491            id: id.to_string(),
2492            actions: vec![
2493                CrystallizationAction {
2494                    id: format!("{id}-classify"),
2495                    kind: "tool_call".to_string(),
2496                    name: "classify_issue".to_string(),
2497                    parameters: BTreeMap::from([
2498                        ("issue_id".to_string(), json!(format!("HAR-{suffix}"))),
2499                        ("team_key".to_string(), json!("HAR")),
2500                    ]),
2501                    capabilities: vec!["linear.read".to_string()],
2502                    deterministic: Some(true),
2503                    duration_ms: Some(15),
2504                    ..CrystallizationAction::default()
2505                },
2506                CrystallizationAction {
2507                    id: format!("{id}-receipt"),
2508                    kind: "receipt_write".to_string(),
2509                    name: "emit_receipt".to_string(),
2510                    inputs: json!({"summary": format!("plan only #{suffix}"), "kind": "plan"}),
2511                    parameters: BTreeMap::from([
2512                        ("kind".to_string(), json!("plan")),
2513                        ("summary".to_string(), json!(format!("plan only #{suffix}"))),
2514                    ]),
2515                    side_effects: vec![CrystallizationSideEffect {
2516                        kind: "receipt_write".to_string(),
2517                        target: "tenant_event_log".to_string(),
2518                        capability: Some("receipt.write".to_string()),
2519                        ..CrystallizationSideEffect::default()
2520                    }],
2521                    capabilities: vec!["receipt.write".to_string()],
2522                    deterministic: Some(true),
2523                    duration_ms: Some(5),
2524                    ..CrystallizationAction::default()
2525                },
2526            ],
2527            ..CrystallizationTrace::default()
2528        }
2529    }
2530
2531    fn version_traces(count: usize) -> Vec<CrystallizationTrace> {
2532        (0..count)
2533            .map(|idx| {
2534                version_trace(
2535                    &format!("trace_{idx}"),
2536                    &format!("0.7.{idx}"),
2537                    "release-branch",
2538                    false,
2539                )
2540            })
2541            .collect()
2542    }
2543
2544    #[test]
2545    fn build_bundle_assembles_versioned_manifest() {
2546        let traces = version_traces(5);
2547        let artifacts = crystallize_traces(
2548            traces.clone(),
2549            CrystallizeOptions {
2550                workflow_name: Some("version_bump".to_string()),
2551                package_name: Some("release-workflows".to_string()),
2552                author: Some("ops@example.com".to_string()),
2553                approver: Some("lead@example.com".to_string()),
2554                eval_pack_link: Some("eval-pack://release-workflows/v1".to_string()),
2555                ..CrystallizeOptions::default()
2556            },
2557        )
2558        .unwrap();
2559
2560        let bundle = build_crystallization_bundle(
2561            artifacts,
2562            &traces,
2563            BundleOptions {
2564                team: Some("platform".to_string()),
2565                repo: Some("burin-labs/harn".to_string()),
2566                ..BundleOptions::default()
2567            },
2568        )
2569        .unwrap();
2570
2571        let manifest = &bundle.manifest;
2572        assert_eq!(manifest.schema, BUNDLE_SCHEMA);
2573        assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
2574        assert_eq!(manifest.kind, BundleKind::Candidate);
2575        assert!(!manifest.candidate_id.is_empty());
2576        assert_eq!(manifest.workflow.name, "version_bump");
2577        assert_eq!(manifest.workflow.package_name, "release-workflows");
2578        assert_eq!(manifest.workflow.path, BUNDLE_WORKFLOW_FILE);
2579        assert_eq!(manifest.team.as_deref(), Some("platform"));
2580        assert_eq!(manifest.repo.as_deref(), Some("burin-labs/harn"));
2581        assert_eq!(manifest.external_key, "version-bump");
2582        assert_eq!(manifest.promotion.rollout_policy, "shadow_then_canary");
2583        assert_eq!(
2584            manifest.promotion.author.as_deref(),
2585            Some("ops@example.com")
2586        );
2587        assert_eq!(
2588            manifest.promotion.approver.as_deref(),
2589            Some("lead@example.com")
2590        );
2591        assert_eq!(manifest.promotion.workflow_version, "0.1.0");
2592        assert!(manifest.deterministic_steps.len() + manifest.fuzzy_steps.len() > 0);
2593        assert_eq!(manifest.source_traces.len(), traces.len());
2594        assert_eq!(manifest.fixtures.len(), traces.len());
2595        assert!(manifest.fixtures.iter().all(|fixture| fixture.redacted));
2596        assert!(manifest.redaction.applied);
2597        assert!(manifest.redaction.fixture_count > 0);
2598        assert!(manifest
2599            .eval_pack
2600            .as_ref()
2601            .is_some_and(|eval| eval.path == BUNDLE_EVAL_PACK_FILE));
2602        assert!(manifest
2603            .required_secrets
2604            .iter()
2605            .all(|secret| !secret.is_empty()));
2606    }
2607
2608    #[test]
2609    fn write_bundle_round_trips_through_disk() {
2610        let traces = version_traces(5);
2611        let artifacts = crystallize_traces(
2612            traces.clone(),
2613            CrystallizeOptions {
2614                workflow_name: Some("version_bump".to_string()),
2615                ..CrystallizeOptions::default()
2616            },
2617        )
2618        .unwrap();
2619        let bundle =
2620            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2621
2622        let dir = tempfile::tempdir().unwrap();
2623        let written = write_crystallization_bundle(&bundle, dir.path()).unwrap();
2624        assert_eq!(written.candidate_id, bundle.manifest.candidate_id);
2625
2626        // Files exist on disk.
2627        for relative in [
2628            BUNDLE_MANIFEST_FILE,
2629            BUNDLE_REPORT_FILE,
2630            BUNDLE_WORKFLOW_FILE,
2631            BUNDLE_EVAL_PACK_FILE,
2632        ] {
2633            assert!(dir.path().join(relative).exists(), "missing {relative}");
2634        }
2635        let fixtures_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
2636        assert!(fixtures_dir.is_dir());
2637        assert_eq!(
2638            std::fs::read_dir(&fixtures_dir).unwrap().count(),
2639            traces.len()
2640        );
2641
2642        // Manifest round-trips.
2643        let (loaded_manifest, loaded_traces) = load_crystallization_bundle(dir.path()).unwrap();
2644        assert_eq!(loaded_manifest, bundle.manifest);
2645        assert_eq!(loaded_traces.len(), traces.len());
2646
2647        // Validation passes.
2648        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2649        assert!(
2650            validation.problems.is_empty(),
2651            "unexpected problems: {:?}",
2652            validation.problems
2653        );
2654        assert!(validation.is_ok());
2655        assert!(validation.workflow_ok && validation.report_ok);
2656        assert!(validation.fixtures_ok && validation.redaction_ok);
2657
2658        // Shadow replay matches the persisted shadow report.
2659        let (replay_manifest, shadow) = shadow_replay_bundle(dir.path()).unwrap();
2660        assert_eq!(replay_manifest.candidate_id, bundle.manifest.candidate_id);
2661        assert!(shadow.pass, "shadow should still pass");
2662        assert_eq!(shadow.compared_traces, traces.len());
2663    }
2664
2665    #[test]
2666    fn validate_rejects_bundle_with_missing_workflow() {
2667        let traces = version_traces(3);
2668        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2669        let bundle =
2670            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2671
2672        let dir = tempfile::tempdir().unwrap();
2673        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2674        std::fs::remove_file(dir.path().join(BUNDLE_WORKFLOW_FILE)).unwrap();
2675
2676        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2677        assert!(!validation.is_ok());
2678        assert!(validation
2679            .problems
2680            .iter()
2681            .any(|problem| problem.contains("missing workflow file")));
2682    }
2683
2684    #[test]
2685    fn validate_rejects_bundle_with_unredacted_fixture() {
2686        let traces = version_traces(3);
2687        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2688        let mut bundle =
2689            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2690        // Force a fixture to claim it is unredacted; this must trip
2691        // validation so a malicious or careless producer cannot ship raw
2692        // private payloads under the bundle contract.
2693        bundle.manifest.fixtures[0].redacted = false;
2694        let dir = tempfile::tempdir().unwrap();
2695        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2696
2697        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2698        assert!(!validation.is_ok());
2699        assert!(validation
2700            .problems
2701            .iter()
2702            .any(|problem| problem.contains("not marked redacted")));
2703    }
2704
2705    #[test]
2706    fn validate_rejects_unsupported_schema_version() {
2707        let traces = version_traces(3);
2708        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2709        let mut bundle =
2710            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2711        bundle.manifest.schema_version = BUNDLE_SCHEMA_VERSION + 1;
2712        let dir = tempfile::tempdir().unwrap();
2713        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2714
2715        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2716        assert!(!validation.is_ok());
2717        assert!(validation
2718            .problems
2719            .iter()
2720            .any(|problem| problem.contains("schema_version")));
2721    }
2722
2723    #[test]
2724    fn redacts_secret_like_values_in_fixtures() {
2725        // Build secret-shaped strings at runtime so we exercise the
2726        // redaction prefixes (`xoxb-`, `ghp_`, `sk-`) without checking in
2727        // source that looks like a real credential to secret scanners.
2728        let slack_prefix = format!("{}{}", "xo", "xb-");
2729        let github_prefix = format!("{}{}", "gh", "p_");
2730        let openai_prefix = "sk-".to_string();
2731        let pad = "A".repeat(48);
2732        let slack_secret = format!("{slack_prefix}1234567890-{pad}");
2733        let github_secret = format!("{github_prefix}{pad}");
2734        let openai_secret = format!("{openai_prefix}{pad}");
2735
2736        let mut secret_action = CrystallizationAction {
2737            id: "secret".to_string(),
2738            kind: "tool_call".to_string(),
2739            name: "post_release_to_slack".to_string(),
2740            parameters: BTreeMap::from([
2741                ("slack_token".to_string(), json!(slack_secret)),
2742                ("channel".to_string(), json!("#releases")),
2743            ]),
2744            inputs: json!({
2745                "authorization": format!("Bearer {github_secret}"),
2746                "version": "0.7.1",
2747            }),
2748            ..CrystallizationAction::default()
2749        };
2750        secret_action
2751            .metadata
2752            .insert("api_key".to_string(), json!(openai_secret));
2753
2754        let mut trace = CrystallizationTrace {
2755            id: "trace_secret".to_string(),
2756            actions: vec![secret_action],
2757            ..CrystallizationTrace::default()
2758        };
2759        redact_trace_for_bundle(&mut trace);
2760        let action = &trace.actions[0];
2761        assert_eq!(
2762            action.parameters.get("slack_token"),
2763            Some(&json!("[redacted]"))
2764        );
2765        assert_eq!(action.parameters.get("channel"), Some(&json!("#releases")));
2766        let inputs = action.inputs.as_object().unwrap();
2767        assert_eq!(inputs.get("authorization"), Some(&json!("[redacted]")));
2768        assert_eq!(inputs.get("version"), Some(&json!("0.7.1")));
2769        assert_eq!(action.metadata.get("api_key"), Some(&json!("[redacted]")));
2770    }
2771
2772    #[test]
2773    fn plan_only_fixture_yields_plan_only_kind() {
2774        let traces = (0..3)
2775            .map(|idx| plan_only_trace(&format!("plan_{idx}"), &format!("{idx}")))
2776            .collect::<Vec<_>>();
2777        let artifacts = crystallize_traces(
2778            traces.clone(),
2779            CrystallizeOptions {
2780                workflow_name: Some("plan_only_triage".to_string()),
2781                ..CrystallizeOptions::default()
2782            },
2783        )
2784        .unwrap();
2785        let bundle =
2786            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2787        assert_eq!(bundle.manifest.kind, BundleKind::PlanOnly);
2788        assert_eq!(bundle.manifest.risk_level, "low");
2789    }
2790
2791    #[test]
2792    fn rejected_bundle_has_rejected_kind() {
2793        let traces = vec![
2794            version_trace("trace_a", "0.7.1", "release-branch", false),
2795            version_trace("trace_b", "0.7.2", "main", false),
2796            version_trace("trace_c", "0.7.3", "release-branch", false),
2797        ];
2798        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2799        let bundle =
2800            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2801        assert_eq!(bundle.manifest.kind, BundleKind::Rejected);
2802        assert!(bundle.manifest.candidate_id.is_empty());
2803        assert!(!bundle.manifest.rejection_reasons.is_empty());
2804        assert!(bundle.fixtures.is_empty());
2805    }
2806
2807    #[test]
2808    fn validate_round_trips_rejected_bundle() {
2809        let traces = vec![
2810            version_trace("trace_a", "0.7.1", "release-branch", false),
2811            version_trace("trace_b", "0.7.2", "main", false),
2812            version_trace("trace_c", "0.7.3", "release-branch", false),
2813        ];
2814        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2815        let bundle =
2816            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2817        let dir = tempfile::tempdir().unwrap();
2818        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2819
2820        let validation = validate_crystallization_bundle(dir.path()).unwrap();
2821        assert!(validation.is_ok(), "{:?}", validation.problems);
2822        assert_eq!(validation.kind, BundleKind::Rejected);
2823    }
2824
2825    #[test]
2826    fn shadow_replay_fails_when_fixture_diverges() {
2827        let traces = version_traces(3);
2828        let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2829        let bundle =
2830            build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2831        let dir = tempfile::tempdir().unwrap();
2832        write_crystallization_bundle(&bundle, dir.path()).unwrap();
2833
2834        // Tamper with one redacted fixture so its action list no longer
2835        // matches the candidate signature; the replay must fail without
2836        // panicking.
2837        let fixture_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
2838        let some_fixture = std::fs::read_dir(&fixture_dir)
2839            .unwrap()
2840            .next()
2841            .unwrap()
2842            .unwrap()
2843            .path();
2844        let mut tampered: CrystallizationTrace =
2845            serde_json::from_slice(&std::fs::read(&some_fixture).unwrap()).unwrap();
2846        tampered.actions.truncate(1);
2847        std::fs::write(&some_fixture, serde_json::to_vec_pretty(&tampered).unwrap()).unwrap();
2848
2849        let (_, shadow) = shadow_replay_bundle(dir.path()).unwrap();
2850        assert!(!shadow.pass);
2851        assert!(!shadow.failures.is_empty());
2852    }
2853}