1use std::collections::{BTreeMap, BTreeSet};
11use std::fmt::Write as _;
12use std::path::{Path, PathBuf};
13
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value as JsonValue};
16use sha2::{Digest, Sha256};
17
18use super::{new_id, now_rfc3339, RunRecord};
19use crate::value::VmError;
20
21const TRACE_SCHEMA_VERSION: u32 = 1;
22const DEFAULT_MIN_EXAMPLES: usize = 2;
23
24pub const BUNDLE_SCHEMA: &str = "harn.crystallization.candidate.bundle";
28pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
32pub const BUNDLE_MANIFEST_FILE: &str = "candidate.json";
34pub const BUNDLE_REPORT_FILE: &str = "report.json";
35pub const BUNDLE_WORKFLOW_FILE: &str = "workflow.harn";
36pub const BUNDLE_EVAL_PACK_FILE: &str = "harn.eval.toml";
37pub const BUNDLE_FIXTURES_DIR: &str = "fixtures";
38
39const DEFAULT_ROLLOUT_POLICY: &str = "shadow_then_canary";
42
43#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
44#[serde(default)]
45pub struct CrystallizationTrace {
46 pub version: u32,
47 pub id: String,
48 pub source: Option<String>,
49 pub source_hash: Option<String>,
50 pub workflow_id: Option<String>,
51 pub started_at: Option<String>,
52 pub finished_at: Option<String>,
53 pub flow: Option<CrystallizationFlowRef>,
54 pub actions: Vec<CrystallizationAction>,
55 pub usage: CrystallizationUsage,
56 pub metadata: BTreeMap<String, JsonValue>,
57}
58
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
60#[serde(default)]
61pub struct CrystallizationFlowRef {
62 pub trace_id: Option<String>,
63 pub agent_run_id: Option<String>,
64 pub transcript_ref: Option<String>,
65 pub atom_ids: Vec<String>,
66 pub slice_ids: Vec<String>,
67}
68
69#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
70#[serde(default)]
71pub struct CrystallizationAction {
72 pub id: String,
73 pub kind: String,
74 pub name: String,
75 pub timestamp: Option<String>,
76 pub inputs: JsonValue,
77 pub output: Option<JsonValue>,
78 pub observed_output: Option<JsonValue>,
79 pub parameters: BTreeMap<String, JsonValue>,
80 pub side_effects: Vec<CrystallizationSideEffect>,
81 pub capabilities: Vec<String>,
82 pub required_secrets: Vec<String>,
83 pub approval: Option<CrystallizationApproval>,
84 pub cost: CrystallizationCost,
85 pub duration_ms: Option<i64>,
86 pub deterministic: Option<bool>,
87 pub fuzzy: Option<bool>,
88 pub metadata: BTreeMap<String, JsonValue>,
89}
90
91#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
92#[serde(default)]
93pub struct CrystallizationSideEffect {
94 pub kind: String,
95 pub target: String,
96 pub capability: Option<String>,
97 pub mutation: Option<String>,
98 pub metadata: BTreeMap<String, JsonValue>,
99}
100
101#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
102#[serde(default)]
103pub struct CrystallizationApproval {
104 pub prompt: Option<String>,
105 pub approver: Option<String>,
106 pub required: bool,
107 pub boundary: Option<String>,
108}
109
110#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
111#[serde(default)]
112pub struct CrystallizationCost {
113 pub model: Option<String>,
114 pub model_calls: i64,
115 pub input_tokens: i64,
116 pub output_tokens: i64,
117 pub total_cost_usd: f64,
118 pub wall_ms: i64,
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
122#[serde(default)]
123pub struct CrystallizationUsage {
124 pub model_calls: i64,
125 pub input_tokens: i64,
126 pub output_tokens: i64,
127 pub total_cost_usd: f64,
128 pub wall_ms: i64,
129}
130
131#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
132#[serde(rename_all = "snake_case")]
133pub enum SegmentKind {
134 #[default]
135 Deterministic,
136 Fuzzy,
137}
138
139type SequenceExample = (usize, usize);
140type RepeatedSequence = (Vec<String>, Vec<SequenceExample>);
141
142#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
143#[serde(default)]
144pub struct WorkflowCandidateParameter {
145 pub name: String,
146 pub source_paths: Vec<String>,
147 pub examples: Vec<String>,
148 pub required: bool,
149}
150
151#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
152#[serde(default)]
153pub struct WorkflowCandidateStep {
154 pub index: usize,
155 pub kind: String,
156 pub name: String,
157 pub segment: SegmentKind,
158 pub parameter_refs: Vec<String>,
159 pub constants: BTreeMap<String, JsonValue>,
160 pub preconditions: Vec<String>,
161 pub side_effects: Vec<CrystallizationSideEffect>,
162 pub capabilities: Vec<String>,
163 pub required_secrets: Vec<String>,
164 pub approval: Option<CrystallizationApproval>,
165 pub expected_output: Option<JsonValue>,
166 pub review_notes: Vec<String>,
167}
168
169#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
170#[serde(default)]
171pub struct WorkflowCandidateExample {
172 pub trace_id: String,
173 pub source_hash: String,
174 pub start_index: usize,
175 pub action_ids: Vec<String>,
176}
177
178#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
179#[serde(default)]
180pub struct PromotionMetadata {
181 pub source_trace_hashes: Vec<String>,
182 pub author: Option<String>,
183 pub approver: Option<String>,
184 pub created_at: String,
185 pub version: String,
186 pub package_name: String,
187 pub capability_set: Vec<String>,
188 pub secrets_required: Vec<String>,
189 pub rollback_target: Option<String>,
190 pub eval_pack_link: Option<String>,
191}
192
193#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
194#[serde(default)]
195pub struct SavingsEstimate {
196 pub model_calls_avoided: i64,
197 pub input_tokens_avoided: i64,
198 pub output_tokens_avoided: i64,
199 pub estimated_cost_usd_avoided: f64,
200 pub wall_ms_avoided: i64,
201 pub cpu_runtime_cost_usd: f64,
202 pub remaining_model_calls: i64,
203}
204
205#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
206#[serde(default)]
207pub struct ShadowTraceResult {
208 pub trace_id: String,
209 pub pass: bool,
210 pub details: Vec<String>,
211}
212
213#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
214#[serde(default)]
215pub struct ShadowRunReport {
216 pub pass: bool,
217 pub compared_traces: usize,
218 pub failures: Vec<String>,
219 pub traces: Vec<ShadowTraceResult>,
220}
221
222#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
223#[serde(default)]
224pub struct WorkflowCandidate {
225 pub id: String,
226 pub name: String,
227 pub confidence: f64,
228 pub sequence_signature: Vec<String>,
229 pub parameters: Vec<WorkflowCandidateParameter>,
230 pub steps: Vec<WorkflowCandidateStep>,
231 pub examples: Vec<WorkflowCandidateExample>,
232 pub capabilities: Vec<String>,
233 pub required_secrets: Vec<String>,
234 pub approval_points: Vec<CrystallizationApproval>,
235 pub side_effects: Vec<CrystallizationSideEffect>,
236 pub expected_outputs: Vec<JsonValue>,
237 pub warnings: Vec<String>,
238 pub rejection_reasons: Vec<String>,
239 pub promotion: PromotionMetadata,
240 pub savings: SavingsEstimate,
241 pub shadow: ShadowRunReport,
242}
243
244impl WorkflowCandidate {
245 pub fn is_safe_to_propose(&self) -> bool {
246 self.rejection_reasons.is_empty()
247 }
248}
249
250#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
251#[serde(default)]
252pub struct CrystallizationReport {
253 pub version: u32,
254 pub generated_at: String,
255 pub source_trace_count: usize,
256 pub selected_candidate_id: Option<String>,
257 pub candidates: Vec<WorkflowCandidate>,
258 pub rejected_candidates: Vec<WorkflowCandidate>,
259 pub warnings: Vec<String>,
260 pub input_format: CrystallizationInputFormat,
261 pub harn_code_path: Option<String>,
262 pub eval_pack_path: Option<String>,
263 #[serde(skip_serializing_if = "Option::is_none")]
268 pub segment_summary: Option<SegmentSummary>,
269 #[serde(skip_serializing_if = "Option::is_none")]
273 pub recovery_summary: Option<RecoveryFeedbackSummary>,
274}
275
276#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
280#[serde(default)]
281pub struct SegmentSummary {
282 pub deterministic_count: usize,
283 pub agentic_count: usize,
284 pub safe_to_automate: Vec<String>,
285 pub requires_human_review: Vec<String>,
286 pub plain_language: String,
287}
288
289#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
294#[serde(default)]
295pub struct RecoveryFeedbackSummary {
296 pub shell_failures_seen: usize,
297 pub recovery_advice_runs: usize,
298 pub failures_fed_into_agent: bool,
302 pub failed_steps: Vec<String>,
303 pub representation: String,
305}
306
307#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
308#[serde(default)]
309pub struct CrystallizationInputFormat {
310 pub name: String,
311 pub version: u32,
312 pub required_fields: Vec<String>,
313 pub preserved_fields: Vec<String>,
314}
315
316#[derive(Clone, Debug, Default)]
317pub struct CrystallizeOptions {
318 pub min_examples: usize,
319 pub workflow_name: Option<String>,
320 pub package_name: Option<String>,
321 pub author: Option<String>,
322 pub approver: Option<String>,
323 pub eval_pack_link: Option<String>,
324}
325
326#[derive(Clone, Debug, Default)]
327pub struct CrystallizationArtifacts {
328 pub report: CrystallizationReport,
329 pub harn_code: String,
330 pub eval_pack_toml: String,
331}
332
333pub fn crystallize_traces(
334 traces: Vec<CrystallizationTrace>,
335 options: CrystallizeOptions,
336) -> Result<CrystallizationArtifacts, VmError> {
337 let min_examples = options.min_examples.max(DEFAULT_MIN_EXAMPLES);
338 if traces.len() < min_examples {
339 return Err(VmError::Runtime(format!(
340 "crystallize requires at least {min_examples} traces, got {}",
341 traces.len()
342 )));
343 }
344
345 let normalized = traces.into_iter().map(normalize_trace).collect::<Vec<_>>();
346 let mut candidates = mine_candidates(&normalized, min_examples, &options);
347 let mut rejected_candidates = Vec::new();
348 for candidate in &mut candidates {
349 candidate.shadow = shadow_candidate(candidate, &normalized);
350 if !candidate.shadow.pass {
351 candidate
352 .rejection_reasons
353 .extend(candidate.shadow.failures.clone());
354 }
355 }
356
357 let mut accepted = Vec::new();
358 for candidate in candidates {
359 if candidate.is_safe_to_propose() {
360 accepted.push(candidate);
361 } else {
362 rejected_candidates.push(candidate);
363 }
364 }
365 accepted.sort_by(|left, right| {
366 right
367 .confidence
368 .partial_cmp(&left.confidence)
369 .unwrap_or(std::cmp::Ordering::Equal)
370 .then_with(|| right.steps.len().cmp(&left.steps.len()))
371 });
372
373 let selected = accepted.first();
374 let harn_code = selected
375 .map(generate_harn_code)
376 .unwrap_or_else(|| rejected_workflow_stub(&rejected_candidates));
377 let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
378
379 let report = CrystallizationReport {
380 version: 1,
381 generated_at: now_rfc3339(),
382 source_trace_count: normalized.len(),
383 selected_candidate_id: selected.map(|candidate| candidate.id.clone()),
384 candidates: accepted,
385 rejected_candidates,
386 warnings: Vec::new(),
387 input_format: CrystallizationInputFormat {
388 name: "harn.crystallization.trace".to_string(),
389 version: TRACE_SCHEMA_VERSION,
390 required_fields: vec!["id".to_string(), "actions".to_string()],
391 preserved_fields: vec![
392 "ordered actions".to_string(),
393 "tool calls".to_string(),
394 "model calls".to_string(),
395 "human approvals".to_string(),
396 "file mutations".to_string(),
397 "external API calls".to_string(),
398 "observed outputs".to_string(),
399 "costs".to_string(),
400 "timestamps".to_string(),
401 "source hashes".to_string(),
402 "Flow provenance references".to_string(),
403 ],
404 },
405 harn_code_path: None,
406 eval_pack_path: None,
407 segment_summary: None,
408 recovery_summary: None,
409 };
410
411 Ok(CrystallizationArtifacts {
412 report,
413 harn_code,
414 eval_pack_toml,
415 })
416}
417
418pub fn synthesize_candidate_from_trace(
431 trace: CrystallizationTrace,
432 options: CrystallizeOptions,
433 extra_parameters: Vec<WorkflowCandidateParameter>,
434 segment_summary: Option<SegmentSummary>,
435 recovery_summary: Option<RecoveryFeedbackSummary>,
436) -> Result<CrystallizationArtifacts, VmError> {
437 if trace.actions.is_empty() {
438 return Err(VmError::Runtime(
439 "synthesize_candidate_from_trace requires a trace with at least one action".to_string(),
440 ));
441 }
442 let normalized = normalize_trace(trace);
443 let mut candidate = build_single_trace_candidate(&normalized, &options, extra_parameters);
444 candidate.shadow = shadow_candidate(&candidate, std::slice::from_ref(&normalized));
445 if !candidate.shadow.pass {
446 candidate
447 .rejection_reasons
448 .extend(candidate.shadow.failures.clone());
449 }
450 let (accepted, rejected) = if candidate.is_safe_to_propose() {
451 (vec![candidate], Vec::new())
452 } else {
453 (Vec::new(), vec![candidate])
454 };
455
456 let selected = accepted.first();
457 let harn_code = selected
458 .map(generate_harn_code)
459 .unwrap_or_else(|| rejected_workflow_stub(&rejected));
460 let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
461 let selected_id = selected.map(|candidate| candidate.id.clone());
462
463 let report = CrystallizationReport {
464 version: 1,
465 generated_at: now_rfc3339(),
466 source_trace_count: 1,
467 selected_candidate_id: selected_id,
468 candidates: accepted,
469 rejected_candidates: rejected,
470 warnings: Vec::new(),
471 input_format: CrystallizationInputFormat {
472 name: "harn.crystallization.trace".to_string(),
473 version: TRACE_SCHEMA_VERSION,
474 required_fields: vec!["id".to_string(), "actions".to_string()],
475 preserved_fields: vec![
476 "ordered actions".to_string(),
477 "deterministic events".to_string(),
478 "agentic events".to_string(),
479 "tool/shell observations".to_string(),
480 "recovery advice".to_string(),
481 "release metadata".to_string(),
482 "source hashes".to_string(),
483 ],
484 },
485 harn_code_path: None,
486 eval_pack_path: None,
487 segment_summary,
488 recovery_summary,
489 };
490
491 Ok(CrystallizationArtifacts {
492 report,
493 harn_code,
494 eval_pack_toml,
495 })
496}
497
498fn build_single_trace_candidate(
504 trace: &CrystallizationTrace,
505 options: &CrystallizeOptions,
506 extra_parameters: Vec<WorkflowCandidateParameter>,
507) -> WorkflowCandidate {
508 let mut steps = Vec::with_capacity(trace.actions.len());
509 let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
510 let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
511 let mut warnings = Vec::new();
512 let sequence: Vec<String> = trace.actions.iter().map(action_signature).collect();
513
514 for (idx, action) in trace.actions.iter().enumerate() {
515 let mut parameter_refs = BTreeSet::new();
516 for (name, value) in &action.parameters {
517 if is_scalar(value) {
518 let ident = sanitize_identifier(name);
519 parameter_values
520 .entry(ident.clone())
521 .or_default()
522 .insert(json_scalar_string(value));
523 parameter_paths
524 .entry(ident.clone())
525 .or_default()
526 .insert(format!("steps[{idx}].parameters.{name}"));
527 parameter_refs.insert(ident);
528 }
529 }
530
531 let fuzzy = action.fuzzy.unwrap_or(false) || action.kind == "model_call";
532 if fuzzy {
533 warnings.push(format!(
534 "step {} '{}' remains fuzzy and requires review/LLM handling",
535 idx + 1,
536 action.name
537 ));
538 }
539
540 steps.push(WorkflowCandidateStep {
541 index: idx + 1,
542 kind: action.kind.clone(),
543 name: action.name.clone(),
544 segment: if fuzzy {
545 SegmentKind::Fuzzy
546 } else {
547 SegmentKind::Deterministic
548 },
549 parameter_refs: parameter_refs.into_iter().collect(),
550 constants: constants_for_action(action),
551 preconditions: preconditions_for_action(action),
552 side_effects: action.side_effects.clone(),
553 capabilities: sorted_strings(action.capabilities.iter().cloned()),
554 required_secrets: sorted_strings(action.required_secrets.iter().cloned()),
555 approval: action.approval.clone(),
556 expected_output: action
557 .observed_output
558 .clone()
559 .or_else(|| action.output.clone()),
560 review_notes: review_notes_for_action(action),
561 });
562 }
563
564 let mut parameters: Vec<WorkflowCandidateParameter> = parameter_values
565 .iter()
566 .map(|(name, values)| WorkflowCandidateParameter {
567 name: name.clone(),
568 source_paths: parameter_paths
569 .get(name)
570 .map(|paths| paths.iter().cloned().collect())
571 .unwrap_or_default(),
572 examples: values.iter().take(5).cloned().collect(),
573 required: true,
574 })
575 .collect();
576 let extra_names: BTreeSet<String> = extra_parameters.iter().map(|p| p.name.clone()).collect();
581 parameters.retain(|p| !extra_names.contains(&p.name));
582 parameters.extend(extra_parameters);
583 parameters.sort_by(|left, right| left.name.cmp(&right.name));
584
585 let example_refs = vec![WorkflowCandidateExample {
586 trace_id: trace.id.clone(),
587 source_hash: trace.source_hash.clone().unwrap_or_default(),
588 start_index: 0,
589 action_ids: trace
590 .actions
591 .iter()
592 .map(|action| action.id.clone())
593 .collect(),
594 }];
595
596 let capabilities = sorted_strings(
597 steps
598 .iter()
599 .flat_map(|step| step.capabilities.iter().cloned()),
600 );
601 let required_secrets = sorted_strings(
602 steps
603 .iter()
604 .flat_map(|step| step.required_secrets.iter().cloned()),
605 );
606 let approval_points = steps
607 .iter()
608 .filter_map(|step| step.approval.clone())
609 .collect::<Vec<_>>();
610 let side_effects = sorted_side_effects(
611 steps
612 .iter()
613 .flat_map(|step| step.side_effects.iter().cloned())
614 .collect(),
615 );
616 let expected_outputs = steps
617 .iter()
618 .filter_map(|step| step.expected_output.clone())
619 .collect::<Vec<_>>();
620
621 let savings = estimate_savings(std::slice::from_ref(trace), &[(0, 0)], &steps);
622 let confidence = confidence_for(&[(0, 0)], 1, &steps, true);
623 let name = options
624 .workflow_name
625 .clone()
626 .unwrap_or_else(|| infer_workflow_name(&steps));
627 let package_name = options
628 .package_name
629 .clone()
630 .unwrap_or_else(|| name.replace('_', "-"));
631
632 WorkflowCandidate {
633 id: stable_candidate_id(&sequence, &example_refs),
634 name,
635 confidence,
636 sequence_signature: sequence,
637 parameters,
638 steps,
639 examples: example_refs.clone(),
640 capabilities: capabilities.clone(),
641 required_secrets: required_secrets.clone(),
642 approval_points,
643 side_effects,
644 expected_outputs,
645 warnings,
646 rejection_reasons: Vec::new(),
647 promotion: PromotionMetadata {
648 source_trace_hashes: example_refs
649 .iter()
650 .map(|example| example.source_hash.clone())
651 .collect(),
652 author: options.author.clone(),
653 approver: options.approver.clone(),
654 created_at: now_rfc3339(),
655 version: "0.1.0".to_string(),
656 package_name,
657 capability_set: capabilities,
658 secrets_required: required_secrets,
659 rollback_target: Some("keep source trace and previous package version".to_string()),
660 eval_pack_link: options.eval_pack_link.clone(),
661 },
662 savings,
663 shadow: ShadowRunReport::default(),
664 }
665}
666
667fn review_notes_for_action(action: &CrystallizationAction) -> Vec<String> {
668 let mut notes = Vec::new();
669 if action.kind == "shell_failure"
670 || matches!(
671 action
672 .metadata
673 .get("success")
674 .and_then(|value| value.as_bool()),
675 Some(false)
676 )
677 {
678 notes.push(format!(
679 "shell/tool step '{}' failed in the source trace; reviewer should confirm \
680 whether deterministic recovery is possible before promotion.",
681 action.name
682 ));
683 }
684 if let Some(hint) = action
685 .metadata
686 .get("recovery_hint")
687 .and_then(|value| value.as_str())
688 .filter(|hint| !hint.trim().is_empty())
689 {
690 notes.push(format!("recovery hint from source trace: {hint}"));
691 }
692 if action.kind == "agent_recovery_advice" {
693 notes.push(
694 "agent-authored recovery advice; treat as advisory, never as deterministic truth."
695 .to_string(),
696 );
697 }
698 notes
699}
700
701pub fn load_crystallization_traces_from_dir(
702 dir: &Path,
703) -> Result<Vec<CrystallizationTrace>, VmError> {
704 let mut paths = Vec::new();
705 collect_json_paths(dir, &mut paths)?;
706 if paths.is_empty() {
707 return Err(VmError::Runtime(format!(
708 "no .json trace files found under {}",
709 dir.display()
710 )));
711 }
712 paths.sort();
713 paths
714 .iter()
715 .map(|path| load_crystallization_trace(path))
716 .collect()
717}
718
719pub fn load_crystallization_trace(path: &Path) -> Result<CrystallizationTrace, VmError> {
720 let content = std::fs::read_to_string(path).map_err(|error| {
721 VmError::Runtime(format!(
722 "failed to read crystallization trace {}: {error}",
723 path.display()
724 ))
725 })?;
726 let value: JsonValue = serde_json::from_str(&content).map_err(|error| {
727 VmError::Runtime(format!(
728 "failed to parse crystallization trace {}: {error}",
729 path.display()
730 ))
731 })?;
732
733 let mut trace = if value.get("actions").is_some() {
734 serde_json::from_value::<CrystallizationTrace>(value.clone()).map_err(|error| {
735 VmError::Runtime(format!(
736 "failed to decode crystallization trace {}: {error}",
737 path.display()
738 ))
739 })?
740 } else if value.get("stages").is_some() || value.get("_type") == Some(&json!("workflow_run")) {
741 let run: RunRecord = serde_json::from_value(value.clone()).map_err(|error| {
742 VmError::Runtime(format!(
743 "failed to decode run record {} as crystallization input: {error}",
744 path.display()
745 ))
746 })?;
747 trace_from_run_record(run)
748 } else {
749 return Err(VmError::Runtime(format!(
750 "{} is neither a crystallization trace nor a workflow run record",
751 path.display()
752 )));
753 };
754 if trace.source.is_none() {
755 trace.source = Some(path.display().to_string());
756 }
757 if trace.source_hash.is_none() {
758 trace.source_hash = Some(hash_bytes(content.as_bytes()));
759 }
760 Ok(normalize_trace(trace))
761}
762
763pub fn write_crystallization_artifacts(
764 mut artifacts: CrystallizationArtifacts,
765 workflow_path: &Path,
766 report_path: &Path,
767 eval_pack_path: Option<&Path>,
768) -> Result<CrystallizationReport, VmError> {
769 crate::atomic_io::atomic_write(workflow_path, artifacts.harn_code.as_bytes()).map_err(
770 |error| {
771 VmError::Runtime(format!(
772 "failed to write generated workflow {}: {error}",
773 workflow_path.display()
774 ))
775 },
776 )?;
777
778 artifacts.report.harn_code_path = Some(workflow_path.display().to_string());
779 if let Some(path) = eval_pack_path {
780 if !artifacts.eval_pack_toml.trim().is_empty() {
781 crate::atomic_io::atomic_write(path, artifacts.eval_pack_toml.as_bytes()).map_err(
782 |error| {
783 VmError::Runtime(format!(
784 "failed to write eval pack {}: {error}",
785 path.display()
786 ))
787 },
788 )?;
789 artifacts.report.eval_pack_path = Some(path.display().to_string());
790 if let Some(candidate) = artifacts.report.candidates.first_mut() {
791 candidate.promotion.eval_pack_link = Some(path.display().to_string());
792 }
793 }
794 }
795
796 let report_json = serde_json::to_string_pretty(&artifacts.report)
797 .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
798 crate::atomic_io::atomic_write(report_path, report_json.as_bytes()).map_err(|error| {
799 VmError::Runtime(format!(
800 "failed to write crystallization report {}: {error}",
801 report_path.display()
802 ))
803 })?;
804 Ok(artifacts.report)
805}
806
807fn collect_json_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), VmError> {
808 let entries = std::fs::read_dir(dir).map_err(|error| {
809 VmError::Runtime(format!(
810 "failed to read crystallization trace dir {}: {error}",
811 dir.display()
812 ))
813 })?;
814 for entry in entries {
815 let entry = entry.map_err(|error| {
816 VmError::Runtime(format!(
817 "failed to read entry in trace dir {}: {error}",
818 dir.display()
819 ))
820 })?;
821 let path = entry.path();
822 if path.is_dir() {
823 collect_json_paths(&path, out)?;
824 } else if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
825 out.push(path);
826 }
827 }
828 Ok(())
829}
830
831fn trace_from_run_record(run: RunRecord) -> CrystallizationTrace {
832 let mut actions = Vec::new();
833 for stage in &run.stages {
834 actions.push(CrystallizationAction {
835 id: stage.id.clone(),
836 kind: if stage.kind.is_empty() {
837 "stage".to_string()
838 } else {
839 stage.kind.clone()
840 },
841 name: stage.node_id.clone(),
842 timestamp: Some(stage.started_at.clone()),
843 output: stage.visible_text.as_ref().map(|text| json!(text)),
844 observed_output: stage.visible_text.as_ref().map(|text| json!(text)),
845 duration_ms: stage.usage.as_ref().map(|usage| usage.total_duration_ms),
846 cost: stage
847 .usage
848 .as_ref()
849 .map(|usage| CrystallizationCost {
850 model_calls: usage.call_count,
851 input_tokens: usage.input_tokens,
852 output_tokens: usage.output_tokens,
853 total_cost_usd: usage.total_cost,
854 wall_ms: usage.total_duration_ms,
855 model: usage.models.first().cloned(),
856 })
857 .unwrap_or_default(),
858 deterministic: Some(
859 stage
860 .usage
861 .as_ref()
862 .map(|usage| usage.call_count == 0)
863 .unwrap_or(true),
864 ),
865 fuzzy: Some(
866 stage
867 .usage
868 .as_ref()
869 .is_some_and(|usage| usage.call_count > 0),
870 ),
871 metadata: stage.metadata.clone(),
872 ..CrystallizationAction::default()
873 });
874 }
875 for tool in &run.tool_recordings {
876 actions.push(CrystallizationAction {
877 id: tool.tool_use_id.clone(),
878 kind: "tool_call".to_string(),
879 name: tool.tool_name.clone(),
880 timestamp: Some(tool.timestamp.clone()),
881 output: Some(json!(tool.result)),
882 observed_output: Some(json!(tool.result)),
883 duration_ms: Some(tool.duration_ms as i64),
884 deterministic: Some(!tool.is_rejected),
885 fuzzy: Some(false),
886 metadata: BTreeMap::from([
887 ("args_hash".to_string(), json!(tool.args_hash)),
888 ("iteration".to_string(), json!(tool.iteration)),
889 ("is_rejected".to_string(), json!(tool.is_rejected)),
890 ]),
891 ..CrystallizationAction::default()
892 });
893 }
894 for question in &run.hitl_questions {
895 actions.push(CrystallizationAction {
896 id: question.request_id.clone(),
897 kind: "human_approval".to_string(),
898 name: question.agent.clone(),
899 timestamp: Some(question.asked_at.clone()),
900 approval: Some(CrystallizationApproval {
901 prompt: Some(question.prompt.clone()),
902 required: true,
903 boundary: Some("hitl".to_string()),
904 ..CrystallizationApproval::default()
905 }),
906 deterministic: Some(false),
907 fuzzy: Some(false),
908 metadata: question
909 .trace_id
910 .as_ref()
911 .map(|trace_id| BTreeMap::from([("trace_id".to_string(), json!(trace_id))]))
912 .unwrap_or_default(),
913 ..CrystallizationAction::default()
914 });
915 }
916 actions.sort_by(|left, right| left.timestamp.cmp(&right.timestamp));
917
918 CrystallizationTrace {
919 version: TRACE_SCHEMA_VERSION,
920 id: run.id.clone(),
921 workflow_id: Some(run.workflow_id.clone()),
922 started_at: Some(run.started_at.clone()),
923 finished_at: run.finished_at.clone(),
924 actions,
925 usage: run
926 .usage
927 .map(|usage| CrystallizationUsage {
928 model_calls: usage.call_count,
929 input_tokens: usage.input_tokens,
930 output_tokens: usage.output_tokens,
931 total_cost_usd: usage.total_cost,
932 wall_ms: usage.total_duration_ms,
933 })
934 .unwrap_or_default(),
935 metadata: run.metadata.clone(),
936 ..CrystallizationTrace::default()
937 }
938}
939
940fn normalize_trace(mut trace: CrystallizationTrace) -> CrystallizationTrace {
941 if trace.version == 0 {
942 trace.version = TRACE_SCHEMA_VERSION;
943 }
944 if trace.id.trim().is_empty() {
945 trace.id = new_id("trace");
946 }
947 if trace.source_hash.is_none() {
948 let payload = serde_json::to_vec(&trace.actions).unwrap_or_default();
949 trace.source_hash = Some(hash_bytes(&payload));
950 }
951 for (idx, action) in trace.actions.iter_mut().enumerate() {
952 if action.id.trim().is_empty() {
953 action.id = format!("action_{}", idx + 1);
954 }
955 if action.kind.trim().is_empty() {
956 action.kind = "action".to_string();
957 }
958 if action.name.trim().is_empty() {
959 action.name = action.kind.clone();
960 }
961 action.capabilities.sort();
962 action.capabilities.dedup();
963 action.required_secrets.sort();
964 action.required_secrets.dedup();
965 action.side_effects = sorted_side_effects(std::mem::take(&mut action.side_effects));
966 if action.cost.model_calls == 0 && action.kind == "model_call" {
967 action.cost.model_calls = 1;
968 }
969 }
970 if trace.usage == CrystallizationUsage::default() {
971 for action in &trace.actions {
972 trace.usage.model_calls += action.cost.model_calls;
973 trace.usage.input_tokens += action.cost.input_tokens;
974 trace.usage.output_tokens += action.cost.output_tokens;
975 trace.usage.total_cost_usd += action.cost.total_cost_usd;
976 trace.usage.wall_ms += action.cost.wall_ms + action.duration_ms.unwrap_or_default();
977 }
978 }
979 trace
980}
981
982fn mine_candidates(
983 traces: &[CrystallizationTrace],
984 min_examples: usize,
985 options: &CrystallizeOptions,
986) -> Vec<WorkflowCandidate> {
987 let signatures = traces
988 .iter()
989 .map(|trace| {
990 trace
991 .actions
992 .iter()
993 .map(action_signature)
994 .collect::<Vec<_>>()
995 })
996 .collect::<Vec<_>>();
997 let Some((sequence, examples)) = best_repeated_sequence(&signatures, min_examples) else {
998 return Vec::new();
999 };
1000
1001 let mut example_refs = Vec::new();
1002 for (trace_index, start_index) in &examples {
1003 let trace = &traces[*trace_index];
1004 example_refs.push(WorkflowCandidateExample {
1005 trace_id: trace.id.clone(),
1006 source_hash: trace.source_hash.clone().unwrap_or_default(),
1007 start_index: *start_index,
1008 action_ids: trace.actions[*start_index..*start_index + sequence.len()]
1009 .iter()
1010 .map(|action| action.id.clone())
1011 .collect(),
1012 });
1013 }
1014
1015 let mut steps = Vec::new();
1016 let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1017 let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1018 let mut rejection_reasons = Vec::new();
1019 let mut warnings = Vec::new();
1020
1021 for step_index in 0..sequence.len() {
1022 let actions = examples
1023 .iter()
1024 .map(|(trace_index, start_index)| {
1025 &traces[*trace_index].actions[*start_index + step_index]
1026 })
1027 .collect::<Vec<_>>();
1028 let first = actions[0];
1029 if !compatible_side_effects(&actions) {
1030 rejection_reasons.push(format!(
1031 "step {} '{}' has divergent side effects across examples",
1032 step_index + 1,
1033 first.name
1034 ));
1035 }
1036
1037 let mut parameter_refs = BTreeSet::new();
1038 for action in &actions {
1039 for (name, value) in &action.parameters {
1040 if is_scalar(value) {
1041 parameter_values
1042 .entry(sanitize_identifier(name))
1043 .or_default()
1044 .insert(json_scalar_string(value));
1045 parameter_paths
1046 .entry(sanitize_identifier(name))
1047 .or_default()
1048 .insert(format!("steps[{step_index}].parameters.{name}"));
1049 parameter_refs.insert(sanitize_identifier(name));
1050 }
1051 }
1052 }
1053 collect_varying_parameters(
1054 &actions,
1055 "inputs",
1056 |action| &action.inputs,
1057 &mut parameter_values,
1058 &mut parameter_paths,
1059 &mut parameter_refs,
1060 );
1061
1062 let fuzzy = first.fuzzy.unwrap_or(false)
1063 || first.kind == "model_call"
1064 || actions.iter().any(|action| action.fuzzy.unwrap_or(false));
1065 if fuzzy {
1066 warnings.push(format!(
1067 "step {} '{}' remains fuzzy and requires review/LLM handling",
1068 step_index + 1,
1069 first.name
1070 ));
1071 }
1072
1073 steps.push(WorkflowCandidateStep {
1074 index: step_index + 1,
1075 kind: first.kind.clone(),
1076 name: first.name.clone(),
1077 segment: if fuzzy {
1078 SegmentKind::Fuzzy
1079 } else {
1080 SegmentKind::Deterministic
1081 },
1082 parameter_refs: parameter_refs.into_iter().collect(),
1083 constants: constants_for_action(first),
1084 preconditions: preconditions_for_action(first),
1085 side_effects: first.side_effects.clone(),
1086 capabilities: sorted_strings(first.capabilities.iter().cloned()),
1087 required_secrets: sorted_strings(first.required_secrets.iter().cloned()),
1088 approval: first.approval.clone(),
1089 expected_output: stable_expected_output(&actions),
1090 review_notes: Vec::new(),
1091 });
1092 }
1093
1094 let parameters = parameter_values
1095 .iter()
1096 .map(|(name, values)| WorkflowCandidateParameter {
1097 name: name.clone(),
1098 source_paths: parameter_paths
1099 .get(name)
1100 .map(|paths| paths.iter().cloned().collect())
1101 .unwrap_or_default(),
1102 examples: values.iter().take(5).cloned().collect(),
1103 required: true,
1104 })
1105 .collect::<Vec<_>>();
1106
1107 let capabilities = sorted_strings(
1108 steps
1109 .iter()
1110 .flat_map(|step| step.capabilities.iter().cloned()),
1111 );
1112 let required_secrets = sorted_strings(
1113 steps
1114 .iter()
1115 .flat_map(|step| step.required_secrets.iter().cloned()),
1116 );
1117 let approval_points = steps
1118 .iter()
1119 .filter_map(|step| step.approval.clone())
1120 .collect::<Vec<_>>();
1121 let side_effects = sorted_side_effects(
1122 steps
1123 .iter()
1124 .flat_map(|step| step.side_effects.iter().cloned())
1125 .collect(),
1126 );
1127 let expected_outputs = steps
1128 .iter()
1129 .filter_map(|step| step.expected_output.clone())
1130 .collect::<Vec<_>>();
1131 let savings = estimate_savings(traces, &examples, &steps);
1132 let confidence = confidence_for(
1133 &examples,
1134 traces.len(),
1135 &steps,
1136 rejection_reasons.is_empty(),
1137 );
1138 let name = options
1139 .workflow_name
1140 .clone()
1141 .unwrap_or_else(|| infer_workflow_name(&steps));
1142 let package_name = options
1143 .package_name
1144 .clone()
1145 .unwrap_or_else(|| name.replace('_', "-"));
1146
1147 vec![WorkflowCandidate {
1148 id: stable_candidate_id(&sequence, &example_refs),
1149 name,
1150 confidence,
1151 sequence_signature: sequence,
1152 parameters,
1153 steps,
1154 examples: example_refs.clone(),
1155 capabilities: capabilities.clone(),
1156 required_secrets: required_secrets.clone(),
1157 approval_points,
1158 side_effects,
1159 expected_outputs,
1160 warnings,
1161 rejection_reasons,
1162 promotion: PromotionMetadata {
1163 source_trace_hashes: example_refs
1164 .iter()
1165 .map(|example| example.source_hash.clone())
1166 .collect(),
1167 author: options.author.clone(),
1168 approver: options.approver.clone(),
1169 created_at: now_rfc3339(),
1170 version: "0.1.0".to_string(),
1171 package_name,
1172 capability_set: capabilities,
1173 secrets_required: required_secrets,
1174 rollback_target: Some("keep source traces and previous package version".to_string()),
1175 eval_pack_link: options.eval_pack_link.clone(),
1176 },
1177 savings,
1178 shadow: ShadowRunReport::default(),
1179 }]
1180}
1181
1182fn best_repeated_sequence(
1183 signatures: &[Vec<String>],
1184 min_examples: usize,
1185) -> Option<RepeatedSequence> {
1186 let mut occurrences: BTreeMap<Vec<String>, Vec<(usize, usize)>> = BTreeMap::new();
1187 for (trace_index, trace_signatures) in signatures.iter().enumerate() {
1188 for start in 0..trace_signatures.len() {
1189 let max_len = (trace_signatures.len() - start).min(12);
1190 for len in 2..=max_len {
1191 let sequence = trace_signatures[start..start + len].to_vec();
1192 occurrences
1193 .entry(sequence)
1194 .or_default()
1195 .push((trace_index, start));
1196 }
1197 }
1198 }
1199
1200 occurrences
1201 .into_iter()
1202 .filter_map(|(sequence, positions)| {
1203 let mut seen = BTreeSet::new();
1204 let mut examples = Vec::new();
1205 for (trace_index, start) in positions {
1206 if seen.insert(trace_index) {
1207 examples.push((trace_index, start));
1208 }
1209 }
1210 if examples.len() >= min_examples {
1211 Some((sequence, examples))
1212 } else {
1213 None
1214 }
1215 })
1216 .max_by(
1217 |(left_sequence, left_examples), (right_sequence, right_examples)| {
1218 left_examples
1219 .len()
1220 .cmp(&right_examples.len())
1221 .then_with(|| left_sequence.len().cmp(&right_sequence.len()))
1222 },
1223 )
1224}
1225
1226fn action_signature(action: &CrystallizationAction) -> String {
1227 let mut parameter_keys = action.parameters.keys().cloned().collect::<Vec<_>>();
1228 parameter_keys.sort();
1229 format!(
1230 "{}:{}:{}",
1231 action.kind,
1232 action.name,
1233 parameter_keys.join(",")
1234 )
1235}
1236
1237fn compatible_side_effects(actions: &[&CrystallizationAction]) -> bool {
1238 let first = sorted_side_effects(actions[0].side_effects.clone());
1239 actions
1240 .iter()
1241 .skip(1)
1242 .all(|action| sorted_side_effects(action.side_effects.clone()) == first)
1243}
1244
1245fn collect_varying_parameters(
1246 actions: &[&CrystallizationAction],
1247 root: &str,
1248 value_for: impl Fn(&CrystallizationAction) -> &JsonValue,
1249 parameter_values: &mut BTreeMap<String, BTreeSet<String>>,
1250 parameter_paths: &mut BTreeMap<String, BTreeSet<String>>,
1251 parameter_refs: &mut BTreeSet<String>,
1252) {
1253 let mut paths = BTreeMap::<String, Vec<JsonValue>>::new();
1254 for action in actions {
1255 collect_scalar_paths(value_for(action), root, &mut paths);
1256 }
1257 for (path, values) in paths {
1258 if values.len() != actions.len() {
1259 continue;
1260 }
1261 let unique = values
1262 .iter()
1263 .map(json_scalar_string)
1264 .collect::<BTreeSet<_>>();
1265 if unique.len() < 2 {
1266 continue;
1267 }
1268 let name = parameter_name_for_path(&path);
1269 parameter_values
1270 .entry(name.clone())
1271 .or_default()
1272 .extend(unique);
1273 parameter_paths
1274 .entry(name.clone())
1275 .or_default()
1276 .insert(path);
1277 parameter_refs.insert(name);
1278 }
1279}
1280
1281fn collect_scalar_paths(
1282 value: &JsonValue,
1283 prefix: &str,
1284 out: &mut BTreeMap<String, Vec<JsonValue>>,
1285) {
1286 match value {
1287 JsonValue::Object(map) => {
1288 for (key, child) in map {
1289 collect_scalar_paths(child, &format!("{prefix}.{key}"), out);
1290 }
1291 }
1292 JsonValue::Array(items) => {
1293 for (idx, child) in items.iter().enumerate() {
1294 collect_scalar_paths(child, &format!("{prefix}[{idx}]"), out);
1295 }
1296 }
1297 _ if is_scalar(value) => {
1298 out.entry(prefix.to_string())
1299 .or_default()
1300 .push(value.clone());
1301 }
1302 _ => {}
1303 }
1304}
1305
1306fn parameter_name_for_path(path: &str) -> String {
1307 let lower = path.to_ascii_lowercase();
1308 for (needle, name) in [
1309 ("version", "version"),
1310 ("repo_path", "repo_path"),
1311 ("repo", "repo_path"),
1312 ("branch_name", "branch_name"),
1313 ("branch", "branch_name"),
1314 ("release_target", "release_target"),
1315 ("target", "release_target"),
1316 ] {
1317 if lower.contains(needle) {
1318 return name.to_string();
1319 }
1320 }
1321 let tail = path
1322 .rsplit(['.', '['])
1323 .next()
1324 .unwrap_or("param")
1325 .trim_end_matches(']');
1326 sanitize_identifier(tail)
1327}
1328
1329fn constants_for_action(action: &CrystallizationAction) -> BTreeMap<String, JsonValue> {
1330 let mut constants = BTreeMap::new();
1331 constants.insert("kind".to_string(), json!(action.kind));
1332 constants.insert("name".to_string(), json!(action.name));
1333 if action.deterministic.unwrap_or(false) {
1334 constants.insert("deterministic".to_string(), json!(true));
1335 }
1336 constants
1337}
1338
1339fn preconditions_for_action(action: &CrystallizationAction) -> Vec<String> {
1340 let mut out = Vec::new();
1341 for capability in &action.capabilities {
1342 out.push(format!("capability '{capability}' is available"));
1343 }
1344 for secret in &action.required_secrets {
1345 out.push(format!("secret '{secret}' is configured"));
1346 }
1347 if let Some(approval) = &action.approval {
1348 if approval.required {
1349 out.push("human approval boundary is preserved".to_string());
1350 }
1351 }
1352 out
1353}
1354
1355fn stable_expected_output(actions: &[&CrystallizationAction]) -> Option<JsonValue> {
1356 let first = actions[0]
1357 .observed_output
1358 .as_ref()
1359 .or(actions[0].output.as_ref())?;
1360 if actions
1361 .iter()
1362 .all(|action| action.observed_output.as_ref().or(action.output.as_ref()) == Some(first))
1363 {
1364 Some(first.clone())
1365 } else {
1366 None
1367 }
1368}
1369
1370fn shadow_candidate(
1371 candidate: &WorkflowCandidate,
1372 traces: &[CrystallizationTrace],
1373) -> ShadowRunReport {
1374 let mut failures = Vec::new();
1375 let mut results = Vec::new();
1376 for example in &candidate.examples {
1377 let Some(trace) = traces.iter().find(|trace| trace.id == example.trace_id) else {
1378 failures.push(format!("missing source trace {}", example.trace_id));
1379 continue;
1380 };
1381 let mut details = Vec::new();
1382 let end = example.start_index + candidate.steps.len();
1383 if end > trace.actions.len() {
1384 details.push("candidate sequence extends past trace action list".to_string());
1385 } else {
1386 let signatures = trace.actions[example.start_index..end]
1387 .iter()
1388 .map(action_signature)
1389 .collect::<Vec<_>>();
1390 if signatures != candidate.sequence_signature {
1391 details.push("action sequence signature changed".to_string());
1392 }
1393 for (offset, step) in candidate.steps.iter().enumerate() {
1394 let action = &trace.actions[example.start_index + offset];
1395 if sorted_side_effects(action.side_effects.clone()) != step.side_effects {
1396 details.push(format!(
1397 "step {} side effects differ for action {}",
1398 step.index, action.id
1399 ));
1400 }
1401 if action.approval.as_ref().map(|approval| approval.required)
1402 != step.approval.as_ref().map(|approval| approval.required)
1403 {
1404 details.push(format!("step {} approval boundary differs", step.index));
1405 }
1406 if step.segment == SegmentKind::Deterministic {
1407 if let Some(expected) = &step.expected_output {
1408 let actual = action.observed_output.as_ref().or(action.output.as_ref());
1409 if actual != Some(expected) {
1410 details
1411 .push(format!("step {} deterministic output differs", step.index));
1412 }
1413 }
1414 }
1415 }
1416 }
1417 let pass = details.is_empty();
1418 if !pass {
1419 failures.push(format!("trace {} failed shadow comparison", trace.id));
1420 }
1421 results.push(ShadowTraceResult {
1422 trace_id: trace.id.clone(),
1423 pass,
1424 details,
1425 });
1426 }
1427 ShadowRunReport {
1428 pass: failures.is_empty(),
1429 compared_traces: results.len(),
1430 failures,
1431 traces: results,
1432 }
1433}
1434
1435fn estimate_savings(
1436 traces: &[CrystallizationTrace],
1437 examples: &[(usize, usize)],
1438 steps: &[WorkflowCandidateStep],
1439) -> SavingsEstimate {
1440 let mut estimate = SavingsEstimate::default();
1441 for (trace_index, start_index) in examples {
1442 let trace = &traces[*trace_index];
1443 for action in &trace.actions[*start_index..*start_index + steps.len()] {
1444 if action.kind == "model_call" || action.fuzzy.unwrap_or(false) {
1445 estimate.remaining_model_calls += action.cost.model_calls.max(1);
1446 } else {
1447 estimate.model_calls_avoided += action.cost.model_calls;
1448 estimate.input_tokens_avoided += action.cost.input_tokens;
1449 estimate.output_tokens_avoided += action.cost.output_tokens;
1450 estimate.estimated_cost_usd_avoided += action.cost.total_cost_usd;
1451 estimate.wall_ms_avoided +=
1452 action.cost.wall_ms + action.duration_ms.unwrap_or_default();
1453 }
1454 }
1455 }
1456 estimate.cpu_runtime_cost_usd = 0.0;
1457 estimate
1458}
1459
1460fn confidence_for(
1461 examples: &[(usize, usize)],
1462 trace_count: usize,
1463 steps: &[WorkflowCandidateStep],
1464 safe: bool,
1465) -> f64 {
1466 if !safe || trace_count == 0 {
1467 return 0.0;
1468 }
1469 let coverage = examples.len() as f64 / trace_count as f64;
1470 let deterministic = steps
1471 .iter()
1472 .filter(|step| step.segment == SegmentKind::Deterministic)
1473 .count() as f64
1474 / steps.len().max(1) as f64;
1475 ((coverage * 0.65) + (deterministic * 0.35)).min(0.99)
1476}
1477
1478fn infer_workflow_name(steps: &[WorkflowCandidateStep]) -> String {
1479 let names = steps
1480 .iter()
1481 .map(|step| step.name.to_ascii_lowercase())
1482 .collect::<Vec<_>>()
1483 .join("_");
1484 if names.contains("version") || names.contains("release") {
1485 "crystallized_version_bump".to_string()
1486 } else {
1487 "crystallized_workflow".to_string()
1488 }
1489}
1490
1491pub fn generate_harn_code(candidate: &WorkflowCandidate) -> String {
1492 let mut out = String::new();
1493 let params = if candidate.parameters.is_empty() {
1494 "task".to_string()
1495 } else {
1496 candidate
1497 .parameters
1498 .iter()
1499 .map(|parameter| parameter.name.as_str())
1500 .collect::<Vec<_>>()
1501 .join(", ")
1502 };
1503 writeln!(out, "/**").unwrap();
1504 writeln!(
1505 out,
1506 " * Generated by harn crystallize. Review before promotion."
1507 )
1508 .unwrap();
1509 writeln!(out, " * Candidate: {}", candidate.id).unwrap();
1510 writeln!(
1511 out,
1512 " * Source trace hashes: {}",
1513 candidate.promotion.source_trace_hashes.join(", ")
1514 )
1515 .unwrap();
1516 writeln!(
1517 out,
1518 " * Capabilities: {}",
1519 if candidate.capabilities.is_empty() {
1520 "none".to_string()
1521 } else {
1522 candidate.capabilities.join(", ")
1523 }
1524 )
1525 .unwrap();
1526 writeln!(
1527 out,
1528 " * Required secrets: {}",
1529 if candidate.required_secrets.is_empty() {
1530 "none".to_string()
1531 } else {
1532 candidate.required_secrets.join(", ")
1533 }
1534 )
1535 .unwrap();
1536 writeln!(out, " */").unwrap();
1537 writeln!(out, "pipeline {}({}) {{", candidate.name, params).unwrap();
1538 writeln!(out, " let review_warnings = []").unwrap();
1539 for step in &candidate.steps {
1540 writeln!(out, " // Step {}: {} {}", step.index, step.kind, step.name).unwrap();
1541 for side_effect in &step.side_effects {
1542 writeln!(
1543 out,
1544 " // side_effect: {} {}",
1545 side_effect.kind, side_effect.target
1546 )
1547 .unwrap();
1548 }
1549 if let Some(approval) = &step.approval {
1550 if approval.required {
1551 writeln!(
1552 out,
1553 " // approval_required: {}",
1554 approval
1555 .boundary
1556 .clone()
1557 .unwrap_or_else(|| "human_review".to_string())
1558 )
1559 .unwrap();
1560 }
1561 }
1562 if step.segment == SegmentKind::Fuzzy {
1563 writeln!(
1564 out,
1565 " // TODO: fuzzy segment still needs LLM/reviewer handling before deterministic promotion."
1566 )
1567 .unwrap();
1568 writeln!(
1569 out,
1570 " review_warnings.push(\"fuzzy step: {}\")",
1571 escape_harn_string(&step.name)
1572 )
1573 .unwrap();
1574 }
1575 writeln!(
1576 out,
1577 " log(\"crystallized step {}: {}\")",
1578 step.index,
1579 escape_harn_string(&step.name)
1580 )
1581 .unwrap();
1582 }
1583 writeln!(
1584 out,
1585 " return {{status: \"shadow_ready\", candidate_id: \"{}\", review_warnings: review_warnings}}",
1586 escape_harn_string(&candidate.id)
1587 )
1588 .unwrap();
1589 writeln!(out, "}}").unwrap();
1590 out
1591}
1592
1593fn rejected_workflow_stub(rejected: &[WorkflowCandidate]) -> String {
1594 let mut out = String::new();
1595 writeln!(
1596 out,
1597 "// Generated by harn crystallize. No safe candidate was proposed."
1598 )
1599 .unwrap();
1600 writeln!(out, "pipeline crystallized_workflow(task) {{").unwrap();
1601 writeln!(out, " log(\"no safe crystallization candidate\")").unwrap();
1602 writeln!(
1603 out,
1604 " return {{status: \"rejected\", rejected_candidates: {}}}",
1605 rejected.len()
1606 )
1607 .unwrap();
1608 writeln!(out, "}}").unwrap();
1609 out
1610}
1611
1612pub fn generate_eval_pack(candidate: &WorkflowCandidate) -> String {
1613 let mut out = String::new();
1614 writeln!(out, "version = 1").unwrap();
1615 writeln!(
1616 out,
1617 "id = \"{}-crystallization\"",
1618 candidate.promotion.package_name
1619 )
1620 .unwrap();
1621 writeln!(
1622 out,
1623 "name = \"{} crystallization shadow evals\"",
1624 candidate.name
1625 )
1626 .unwrap();
1627 writeln!(out).unwrap();
1628 writeln!(out, "[package]").unwrap();
1629 writeln!(out, "name = \"{}\"", candidate.promotion.package_name).unwrap();
1630 writeln!(out, "version = \"{}\"", candidate.promotion.version).unwrap();
1631 writeln!(out).unwrap();
1632 for example in &candidate.examples {
1633 writeln!(out, "[[fixtures]]").unwrap();
1634 writeln!(out, "id = \"{}\"", example.trace_id).unwrap();
1635 writeln!(out, "kind = \"jsonl-trace\"").unwrap();
1636 writeln!(out, "trace_id = \"{}\"", example.trace_id).unwrap();
1637 writeln!(out).unwrap();
1638 }
1639 writeln!(out, "[[rubrics]]").unwrap();
1640 writeln!(out, "id = \"shadow-determinism\"").unwrap();
1641 writeln!(out, "kind = \"deterministic\"").unwrap();
1642 writeln!(out).unwrap();
1643 writeln!(out, "[[rubrics.assertions]]").unwrap();
1644 writeln!(out, "kind = \"crystallization-shadow\"").unwrap();
1645 writeln!(
1646 out,
1647 "expected = {{ candidate_id = \"{}\", pass = true }}",
1648 candidate.id
1649 )
1650 .unwrap();
1651 writeln!(out).unwrap();
1652 writeln!(out, "[[cases]]").unwrap();
1653 writeln!(out, "id = \"{}-shadow\"", candidate.name).unwrap();
1654 writeln!(out, "name = \"{} shadow replay\"", candidate.name).unwrap();
1655 writeln!(out, "rubrics = [\"shadow-determinism\"]").unwrap();
1656 writeln!(out, "severity = \"blocking\"").unwrap();
1657 out
1658}
1659
1660fn stable_candidate_id(sequence: &[String], examples: &[WorkflowCandidateExample]) -> String {
1661 let mut hasher = Sha256::new();
1662 for item in sequence {
1663 hasher.update(item.as_bytes());
1664 hasher.update([0]);
1665 }
1666 for example in examples {
1667 hasher.update(example.source_hash.as_bytes());
1668 hasher.update([0]);
1669 }
1670 format!("candidate_{}", hex_prefix(hasher.finalize().as_slice(), 16))
1671}
1672
1673fn hash_bytes(bytes: &[u8]) -> String {
1674 let mut hasher = Sha256::new();
1675 hasher.update(bytes);
1676 format!("sha256:{}", hex::encode(hasher.finalize()))
1677}
1678
1679fn hex_prefix(bytes: &[u8], chars: usize) -> String {
1680 hex::encode(bytes).chars().take(chars).collect::<String>()
1681}
1682
1683fn sorted_strings(items: impl Iterator<Item = String>) -> Vec<String> {
1684 let mut set = items.collect::<BTreeSet<_>>();
1685 set.retain(|item| !item.trim().is_empty());
1686 set.into_iter().collect()
1687}
1688
1689fn sorted_side_effects(items: Vec<CrystallizationSideEffect>) -> Vec<CrystallizationSideEffect> {
1690 let mut items = items
1691 .into_iter()
1692 .filter(|item| !item.kind.trim().is_empty() || !item.target.trim().is_empty())
1693 .collect::<Vec<_>>();
1694 items.sort_by_key(side_effect_sort_key);
1695 items.dedup_by(|left, right| side_effect_sort_key(left) == side_effect_sort_key(right));
1696 items
1697}
1698
1699fn side_effect_sort_key(item: &CrystallizationSideEffect) -> String {
1700 format!(
1701 "{}\x1f{}\x1f{}\x1f{}",
1702 item.kind,
1703 item.target,
1704 item.capability.clone().unwrap_or_default(),
1705 item.mutation.clone().unwrap_or_default()
1706 )
1707}
1708
1709fn is_scalar(value: &JsonValue) -> bool {
1710 matches!(
1711 value,
1712 JsonValue::String(_) | JsonValue::Number(_) | JsonValue::Bool(_) | JsonValue::Null
1713 )
1714}
1715
1716fn json_scalar_string(value: &JsonValue) -> String {
1717 match value {
1718 JsonValue::String(value) => value.clone(),
1719 other => other.to_string(),
1720 }
1721}
1722
1723fn sanitize_identifier(raw: &str) -> String {
1724 let mut out = String::new();
1725 for (idx, ch) in raw.chars().enumerate() {
1726 if ch.is_ascii_alphanumeric() || ch == '_' {
1727 if idx == 0 && ch.is_ascii_digit() {
1728 out.push('_');
1729 }
1730 out.push(ch.to_ascii_lowercase());
1731 } else if !out.ends_with('_') {
1732 out.push('_');
1733 }
1734 }
1735 let trimmed = out.trim_matches('_').to_string();
1736 if trimmed.is_empty() {
1737 "param".to_string()
1738 } else {
1739 trimmed
1740 }
1741}
1742
1743fn escape_harn_string(value: &str) -> String {
1744 value.replace('\\', "\\\\").replace('"', "\\\"")
1745}
1746
1747#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1769#[serde(default)]
1770pub struct BundleGenerator {
1771 pub tool: String,
1772 pub version: String,
1773}
1774
1775impl Default for BundleGenerator {
1776 fn default() -> Self {
1777 Self {
1778 tool: "harn".to_string(),
1779 version: env!("CARGO_PKG_VERSION").to_string(),
1780 }
1781 }
1782}
1783
1784#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1785#[serde(default)]
1786pub struct BundleWorkflowRef {
1787 pub path: String,
1789 pub name: String,
1791 pub package_name: String,
1793 pub package_version: String,
1795}
1796
1797#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1798#[serde(default)]
1799pub struct BundleSourceTrace {
1800 pub trace_id: String,
1801 pub source_hash: String,
1802 pub source_url: Option<String>,
1805 pub source_receipt_id: Option<String>,
1809 pub fixture_path: Option<String>,
1812}
1813
1814#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1815#[serde(default)]
1816pub struct BundleStep {
1817 pub index: usize,
1818 pub kind: String,
1819 pub name: String,
1820 pub segment: SegmentKind,
1821 pub parameter_refs: Vec<String>,
1822 pub side_effects: Vec<CrystallizationSideEffect>,
1823 pub capabilities: Vec<String>,
1824 pub required_secrets: Vec<String>,
1825 pub approval: Option<CrystallizationApproval>,
1826 pub review_notes: Vec<String>,
1827}
1828
1829impl BundleStep {
1830 fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
1831 Self {
1832 index: step.index,
1833 kind: step.kind.clone(),
1834 name: step.name.clone(),
1835 segment: step.segment.clone(),
1836 parameter_refs: step.parameter_refs.clone(),
1837 side_effects: step.side_effects.clone(),
1838 capabilities: step.capabilities.clone(),
1839 required_secrets: step.required_secrets.clone(),
1840 approval: step.approval.clone(),
1841 review_notes: step.review_notes.clone(),
1842 }
1843 }
1844}
1845
1846#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1847#[serde(default)]
1848pub struct BundleEvalPackRef {
1849 pub path: String,
1851 pub link: Option<String>,
1854}
1855
1856#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1857#[serde(default)]
1858pub struct BundleFixtureRef {
1859 pub path: String,
1860 pub trace_id: String,
1861 pub source_hash: String,
1862 pub redacted: bool,
1863}
1864
1865#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
1866#[serde(default)]
1867pub struct BundlePromotion {
1868 pub owner: Option<String>,
1869 pub approver: Option<String>,
1870 pub author: Option<String>,
1871 pub rollout_policy: String,
1874 pub rollback_target: Option<String>,
1875 pub created_at: String,
1876 pub workflow_version: String,
1877 pub package_name: String,
1878}
1879
1880impl Default for BundlePromotion {
1881 fn default() -> Self {
1882 Self {
1883 owner: None,
1884 approver: None,
1885 author: None,
1886 rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
1887 rollback_target: None,
1888 created_at: String::new(),
1889 workflow_version: String::new(),
1890 package_name: String::new(),
1891 }
1892 }
1893}
1894
1895#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1896#[serde(default)]
1897pub struct BundleRedactionSummary {
1898 pub applied: bool,
1899 pub rules: Vec<String>,
1900 pub summary: String,
1901 pub fixture_count: usize,
1904}
1905
1906#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1907#[serde(default)]
1908pub struct CrystallizationBundleManifest {
1909 pub schema: String,
1910 pub schema_version: u32,
1911 pub generated_at: String,
1912 pub generator: BundleGenerator,
1913 pub kind: BundleKind,
1914 pub candidate_id: String,
1915 pub external_key: String,
1916 pub title: String,
1917 pub team: Option<String>,
1918 pub repo: Option<String>,
1919 pub risk_level: String,
1920 pub workflow: BundleWorkflowRef,
1921 pub source_trace_hashes: Vec<String>,
1922 pub source_traces: Vec<BundleSourceTrace>,
1923 pub deterministic_steps: Vec<BundleStep>,
1924 pub fuzzy_steps: Vec<BundleStep>,
1925 pub side_effects: Vec<CrystallizationSideEffect>,
1926 pub capabilities: Vec<String>,
1927 pub required_secrets: Vec<String>,
1928 pub savings: SavingsEstimate,
1929 pub shadow: ShadowRunReport,
1930 pub eval_pack: Option<BundleEvalPackRef>,
1931 pub fixtures: Vec<BundleFixtureRef>,
1932 pub promotion: BundlePromotion,
1933 pub redaction: BundleRedactionSummary,
1934 pub confidence: f64,
1935 pub rejection_reasons: Vec<String>,
1936 pub warnings: Vec<String>,
1937}
1938
1939#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1940#[serde(rename_all = "snake_case")]
1941pub enum BundleKind {
1942 #[default]
1945 Candidate,
1946 PlanOnly,
1950 Rejected,
1954}
1955
1956#[derive(Clone, Debug, Default)]
1957pub struct BundleOptions {
1958 pub external_key: Option<String>,
1961 pub title: Option<String>,
1962 pub team: Option<String>,
1963 pub repo: Option<String>,
1964 pub risk_level: Option<String>,
1965 pub rollout_policy: Option<String>,
1966}
1967
1968#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
1969#[serde(default)]
1970pub struct CrystallizationBundle {
1971 pub manifest: CrystallizationBundleManifest,
1972 pub report: CrystallizationReport,
1973 pub harn_code: String,
1974 pub eval_pack_toml: String,
1975 pub fixtures: Vec<CrystallizationTrace>,
1976}
1977
1978#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
1980#[serde(default)]
1981pub struct BundleValidation {
1982 pub bundle_dir: String,
1983 pub schema: String,
1984 pub schema_version: u32,
1985 pub kind: BundleKind,
1986 pub candidate_id: String,
1987 pub manifest_ok: bool,
1988 pub workflow_ok: bool,
1989 pub report_ok: bool,
1990 pub eval_pack_ok: bool,
1991 pub fixtures_ok: bool,
1992 pub redaction_ok: bool,
1993 pub problems: Vec<String>,
1994}
1995
1996impl BundleValidation {
1997 pub fn is_ok(&self) -> bool {
1998 self.problems.is_empty()
1999 }
2000}
2001
2002pub fn build_crystallization_bundle(
2006 artifacts: CrystallizationArtifacts,
2007 traces: &[CrystallizationTrace],
2008 options: BundleOptions,
2009) -> Result<CrystallizationBundle, VmError> {
2010 let CrystallizationArtifacts {
2011 report,
2012 harn_code,
2013 eval_pack_toml,
2014 } = artifacts;
2015
2016 let (selected, kind) = match report
2017 .selected_candidate_id
2018 .as_deref()
2019 .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2020 {
2021 Some(candidate) => {
2022 let kind = if candidate_is_plan_only(candidate) {
2023 BundleKind::PlanOnly
2024 } else {
2025 BundleKind::Candidate
2026 };
2027 (Some(candidate), kind)
2028 }
2029 None => (None, BundleKind::Rejected),
2030 };
2031
2032 let workflow_name = selected
2033 .map(|candidate| candidate.name.clone())
2034 .unwrap_or_else(|| "crystallized_workflow".to_string());
2035 let package_name = selected
2036 .map(|candidate| candidate.promotion.package_name.clone())
2037 .unwrap_or_else(|| workflow_name.replace('_', "-"));
2038 let workflow_version = selected
2039 .map(|candidate| candidate.promotion.version.clone())
2040 .unwrap_or_else(|| "0.0.0".to_string());
2041
2042 let manifest_workflow = BundleWorkflowRef {
2043 path: BUNDLE_WORKFLOW_FILE.to_string(),
2044 name: workflow_name.clone(),
2045 package_name: package_name.clone(),
2046 package_version: workflow_version.clone(),
2047 };
2048
2049 let external_key = options
2050 .external_key
2051 .clone()
2052 .filter(|key| !key.trim().is_empty())
2053 .unwrap_or_else(|| sanitize_external_key(&workflow_name));
2054 let title = options
2055 .title
2056 .clone()
2057 .filter(|title| !title.trim().is_empty())
2058 .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
2059 let risk_level = options
2060 .risk_level
2061 .clone()
2062 .filter(|risk| !risk.trim().is_empty())
2063 .unwrap_or_else(|| infer_risk_level(selected));
2064 let rollout_policy = options
2065 .rollout_policy
2066 .clone()
2067 .filter(|policy| !policy.trim().is_empty())
2068 .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
2069
2070 let (deterministic_steps, fuzzy_steps) = match selected {
2071 Some(candidate) => candidate
2072 .steps
2073 .iter()
2074 .map(BundleStep::from_candidate_step)
2075 .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
2076 None => (Vec::new(), Vec::new()),
2077 };
2078
2079 let source_trace_hashes = selected
2080 .map(|candidate| candidate.promotion.source_trace_hashes.clone())
2081 .unwrap_or_default();
2082
2083 let mut source_traces = Vec::new();
2084 let mut fixture_refs = Vec::new();
2085 let mut fixture_payloads = Vec::new();
2086 if let Some(candidate) = selected {
2087 for example in &candidate.examples {
2088 let trace = traces.iter().find(|trace| trace.id == example.trace_id);
2089 let fixture_relative = trace.map(|trace| {
2090 format!(
2091 "{BUNDLE_FIXTURES_DIR}/{}.json",
2092 sanitize_fixture_name(&trace.id)
2093 )
2094 });
2095 source_traces.push(BundleSourceTrace {
2096 trace_id: example.trace_id.clone(),
2097 source_hash: example.source_hash.clone(),
2098 source_url: trace.and_then(|trace| trace.source.clone()),
2099 source_receipt_id: trace
2100 .and_then(|trace| trace.metadata.get("source_receipt_id"))
2101 .and_then(|value| value.as_str().map(str::to_string)),
2102 fixture_path: fixture_relative.clone(),
2103 });
2104 if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
2105 let mut redacted = trace.clone();
2106 redact_trace_for_bundle(&mut redacted);
2107 fixture_refs.push(BundleFixtureRef {
2108 path: fixture_path,
2109 trace_id: trace.id.clone(),
2110 source_hash: trace.source_hash.clone().unwrap_or_default(),
2111 redacted: true,
2112 });
2113 fixture_payloads.push(redacted);
2114 }
2115 }
2116 }
2117
2118 let author = selected.and_then(|candidate| candidate.promotion.author.clone());
2122 let promotion = BundlePromotion {
2123 owner: author.clone(),
2124 approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
2125 author,
2126 rollout_policy,
2127 rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
2128 created_at: now_rfc3339(),
2129 workflow_version,
2130 package_name: package_name.clone(),
2131 };
2132
2133 let redaction = BundleRedactionSummary {
2134 applied: !fixture_payloads.is_empty(),
2135 rules: vec![
2136 "sensitive_keys".to_string(),
2137 "secret_value_heuristic".to_string(),
2138 ],
2139 summary: if fixture_payloads.is_empty() {
2140 "no fixtures emitted".to_string()
2141 } else {
2142 "fixture payloads scrubbed of secret-like values and sensitive keys before write"
2143 .to_string()
2144 },
2145 fixture_count: fixture_payloads.len(),
2146 };
2147
2148 let eval_pack = if eval_pack_toml.trim().is_empty() {
2149 None
2150 } else {
2151 Some(BundleEvalPackRef {
2152 path: BUNDLE_EVAL_PACK_FILE.to_string(),
2153 link: selected
2154 .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
2155 .filter(|link| !link.trim().is_empty()),
2156 })
2157 };
2158
2159 let manifest = CrystallizationBundleManifest {
2160 schema: BUNDLE_SCHEMA.to_string(),
2161 schema_version: BUNDLE_SCHEMA_VERSION,
2162 generated_at: now_rfc3339(),
2163 generator: BundleGenerator::default(),
2164 kind,
2165 candidate_id: selected
2166 .map(|candidate| candidate.id.clone())
2167 .unwrap_or_default(),
2168 external_key,
2169 title,
2170 team: options.team,
2171 repo: options.repo,
2172 risk_level,
2173 workflow: manifest_workflow,
2174 source_trace_hashes,
2175 source_traces,
2176 deterministic_steps,
2177 fuzzy_steps,
2178 side_effects: selected
2179 .map(|candidate| candidate.side_effects.clone())
2180 .unwrap_or_default(),
2181 capabilities: selected
2182 .map(|candidate| candidate.capabilities.clone())
2183 .unwrap_or_default(),
2184 required_secrets: selected
2185 .map(|candidate| candidate.required_secrets.clone())
2186 .unwrap_or_default(),
2187 savings: selected
2188 .map(|candidate| candidate.savings.clone())
2189 .unwrap_or_default(),
2190 shadow: selected
2191 .map(|candidate| candidate.shadow.clone())
2192 .unwrap_or_default(),
2193 eval_pack,
2194 fixtures: fixture_refs,
2195 promotion,
2196 redaction,
2197 confidence: selected
2198 .map(|candidate| candidate.confidence)
2199 .unwrap_or(0.0),
2200 rejection_reasons: report
2201 .rejected_candidates
2202 .iter()
2203 .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
2204 .collect(),
2205 warnings: report.warnings.clone(),
2206 };
2207
2208 Ok(CrystallizationBundle {
2209 manifest,
2210 report,
2211 harn_code,
2212 eval_pack_toml,
2213 fixtures: fixture_payloads,
2214 })
2215}
2216
2217pub fn write_crystallization_bundle(
2221 bundle: &CrystallizationBundle,
2222 bundle_dir: &Path,
2223) -> Result<CrystallizationBundleManifest, VmError> {
2224 std::fs::create_dir_all(bundle_dir).map_err(|error| {
2225 VmError::Runtime(format!(
2226 "failed to create bundle dir {}: {error}",
2227 bundle_dir.display()
2228 ))
2229 })?;
2230 write_bytes(
2231 &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
2232 bundle.harn_code.as_bytes(),
2233 )?;
2234 let report_json = serde_json::to_vec_pretty(&bundle.report)
2235 .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
2236 write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
2237
2238 if !bundle.eval_pack_toml.trim().is_empty() {
2239 write_bytes(
2240 &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
2241 bundle.eval_pack_toml.as_bytes(),
2242 )?;
2243 }
2244
2245 if !bundle.fixtures.is_empty() {
2246 let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
2247 std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
2248 VmError::Runtime(format!(
2249 "failed to create fixtures dir {}: {error}",
2250 fixtures_dir.display()
2251 ))
2252 })?;
2253 for trace in &bundle.fixtures {
2254 let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
2255 let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
2256 VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
2257 })?;
2258 write_bytes(&path, &payload)?;
2259 }
2260 }
2261
2262 let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
2263 .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
2264 write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
2265 Ok(bundle.manifest.clone())
2266}
2267
2268pub fn load_crystallization_bundle_manifest(
2272 bundle_dir: &Path,
2273) -> Result<CrystallizationBundleManifest, VmError> {
2274 let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
2275 let bytes = std::fs::read(&manifest_path).map_err(|error| {
2276 VmError::Runtime(format!(
2277 "failed to read bundle manifest {}: {error}",
2278 manifest_path.display()
2279 ))
2280 })?;
2281 let manifest: CrystallizationBundleManifest =
2282 serde_json::from_slice(&bytes).map_err(|error| {
2283 VmError::Runtime(format!(
2284 "failed to decode bundle manifest {}: {error}",
2285 manifest_path.display()
2286 ))
2287 })?;
2288 if manifest.schema != BUNDLE_SCHEMA {
2289 return Err(VmError::Runtime(format!(
2290 "bundle {} has unrecognized schema {:?} (expected {})",
2291 bundle_dir.display(),
2292 manifest.schema,
2293 BUNDLE_SCHEMA
2294 )));
2295 }
2296 if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
2297 return Err(VmError::Runtime(format!(
2298 "bundle {} schema_version {} is newer than supported {}",
2299 bundle_dir.display(),
2300 manifest.schema_version,
2301 BUNDLE_SCHEMA_VERSION
2302 )));
2303 }
2304 Ok(manifest)
2305}
2306
2307pub fn load_crystallization_bundle(
2311 bundle_dir: &Path,
2312) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
2313 let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
2314 let mut traces = Vec::new();
2315 for fixture in &manifest.fixtures {
2316 let path = bundle_dir.join(&fixture.path);
2317 traces.push(load_crystallization_trace(&path)?);
2318 }
2319 Ok((manifest, traces))
2320}
2321
2322pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
2325 let mut validation = BundleValidation {
2326 bundle_dir: bundle_dir.display().to_string(),
2327 ..BundleValidation::default()
2328 };
2329 let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
2330 Ok(manifest) => manifest,
2331 Err(error) => {
2332 validation.problems.push(error.to_string());
2333 return Ok(validation);
2334 }
2335 };
2336 validation.manifest_ok = true;
2337 validation.schema = manifest.schema.clone();
2338 validation.schema_version = manifest.schema_version;
2339 validation.kind = manifest.kind.clone();
2340 validation.candidate_id = manifest.candidate_id.clone();
2341
2342 let workflow_path = bundle_dir.join(&manifest.workflow.path);
2343 if workflow_path.exists() {
2344 validation.workflow_ok = true;
2345 } else {
2346 validation
2347 .problems
2348 .push(format!("missing workflow file {}", workflow_path.display()));
2349 }
2350
2351 let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2352 match std::fs::read(&report_path) {
2353 Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
2354 Ok(report) => {
2355 validation.report_ok = true;
2356 if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2357 && manifest.candidate_id.is_empty()
2358 {
2359 validation
2360 .problems
2361 .push("manifest is non-rejected but has empty candidate_id".to_string());
2362 }
2363 if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2364 && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
2365 {
2366 validation.problems.push(format!(
2367 "report selected_candidate_id {:?} does not match manifest candidate_id {}",
2368 report.selected_candidate_id, manifest.candidate_id
2369 ));
2370 }
2371 }
2372 Err(error) => {
2373 validation
2374 .problems
2375 .push(format!("invalid report.json: {error}"));
2376 }
2377 },
2378 Err(error) => {
2379 validation.problems.push(format!(
2380 "missing report file {}: {error}",
2381 report_path.display()
2382 ));
2383 }
2384 }
2385
2386 if let Some(eval_pack) = &manifest.eval_pack {
2387 let path = bundle_dir.join(&eval_pack.path);
2388 if path.exists() {
2389 validation.eval_pack_ok = true;
2390 } else {
2391 validation.problems.push(format!(
2392 "manifest references eval pack {} but file is missing",
2393 path.display()
2394 ));
2395 }
2396 } else {
2397 validation.eval_pack_ok = true;
2398 }
2399
2400 let mut fixtures_problem = false;
2401 for fixture in &manifest.fixtures {
2402 let path = bundle_dir.join(&fixture.path);
2403 if !path.exists() {
2404 validation
2405 .problems
2406 .push(format!("missing fixture {}", path.display()));
2407 fixtures_problem = true;
2408 continue;
2409 }
2410 if !fixture.redacted {
2411 validation.problems.push(format!(
2412 "fixture {} is not marked redacted; bundle must not ship raw private payloads",
2413 fixture.path
2414 ));
2415 fixtures_problem = true;
2416 }
2417 }
2418 validation.fixtures_ok = !fixtures_problem;
2419
2420 if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
2421 validation
2422 .problems
2423 .push("redaction.applied is false but bundle includes fixtures".to_string());
2424 } else {
2425 validation.redaction_ok = true;
2426 }
2427 if !manifest
2428 .required_secrets
2429 .iter()
2430 .all(|secret| secret_id_looks_logical(secret))
2431 {
2432 validation.problems.push(
2433 "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
2434 );
2435 }
2436
2437 Ok(validation)
2438}
2439
2440pub fn shadow_replay_bundle(
2446 bundle_dir: &Path,
2447) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
2448 let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
2449 let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2450 let bytes = std::fs::read(&report_path).map_err(|error| {
2451 VmError::Runtime(format!(
2452 "failed to read bundle report {}: {error}",
2453 report_path.display()
2454 ))
2455 })?;
2456 let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
2457 VmError::Runtime(format!(
2458 "failed to decode bundle report {}: {error}",
2459 report_path.display()
2460 ))
2461 })?;
2462 let candidate = report
2463 .selected_candidate_id
2464 .as_deref()
2465 .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2466 .ok_or_else(|| {
2467 VmError::Runtime(format!(
2468 "bundle {} has no selected candidate to replay",
2469 bundle_dir.display()
2470 ))
2471 })?;
2472 let shadow = shadow_candidate(candidate, &traces);
2473 Ok((manifest, shadow))
2474}
2475
2476fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
2477 crate::atomic_io::atomic_write(path, bytes)
2478 .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
2479}
2480
2481fn sanitize_fixture_name(raw: &str) -> String {
2482 let cleaned = raw
2483 .chars()
2484 .map(|ch| {
2485 if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
2486 ch
2487 } else {
2488 '_'
2489 }
2490 })
2491 .collect::<String>();
2492 if cleaned.trim_matches('_').is_empty() {
2493 "trace".to_string()
2494 } else {
2495 cleaned.trim_matches('_').to_string()
2496 }
2497}
2498
2499fn sanitize_external_key(raw: &str) -> String {
2500 let mut out = String::new();
2501 let mut prev_dash = false;
2502 for ch in raw.chars() {
2503 let lowered = ch.to_ascii_lowercase();
2504 if lowered.is_ascii_alphanumeric() {
2505 out.push(lowered);
2506 prev_dash = false;
2507 } else if !prev_dash && !out.is_empty() {
2508 out.push('-');
2509 prev_dash = true;
2510 }
2511 }
2512 let trimmed = out.trim_matches('-').to_string();
2513 if trimmed.is_empty() {
2514 "crystallized-workflow".to_string()
2515 } else {
2516 trimmed
2517 }
2518}
2519
2520fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
2521 if let Some(candidate) = candidate {
2522 format!(
2523 "{} ({} step{})",
2524 candidate.name,
2525 candidate.steps.len(),
2526 if candidate.steps.len() == 1 { "" } else { "s" }
2527 )
2528 } else {
2529 format!("rejected: {fallback_name}")
2530 }
2531}
2532
2533fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
2534 let Some(candidate) = candidate else {
2535 return "high".to_string();
2536 };
2537 let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
2538 let needs_secret = !candidate.required_secrets.is_empty();
2539 if touches_external && needs_secret {
2540 "high".to_string()
2541 } else if touches_external || needs_secret {
2542 "medium".to_string()
2543 } else {
2544 "low".to_string()
2545 }
2546}
2547
2548fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
2549 let kind = effect.kind.to_ascii_lowercase();
2550 if kind.is_empty() {
2551 return false;
2552 }
2553 let internal = kind.contains("receipt")
2557 || kind.contains("event_log")
2558 || kind.contains("memo")
2559 || kind.contains("plan");
2560 if internal {
2561 return false;
2562 }
2563 kind.contains("post")
2564 || kind.contains("write")
2565 || kind.contains("publish")
2566 || kind.contains("delete")
2567 || kind.contains("send")
2568}
2569
2570fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
2571 if candidate.steps.is_empty() {
2572 return false;
2573 }
2574 candidate.side_effects.iter().all(|effect| {
2575 let kind = effect.kind.to_ascii_lowercase();
2576 kind.is_empty()
2579 || kind.contains("receipt")
2580 || kind.contains("event_log")
2581 || kind.contains("memo")
2582 || kind.contains("plan")
2583 || (kind.contains("file") && !kind.contains("publish"))
2584 })
2585}
2586
2587fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
2588 for action in &mut trace.actions {
2589 redact_bundle_value(&mut action.inputs);
2590 if let Some(output) = action.output.as_mut() {
2591 redact_bundle_value(output);
2592 }
2593 if let Some(observed) = action.observed_output.as_mut() {
2594 redact_bundle_value(observed);
2595 }
2596 for value in action.parameters.values_mut() {
2597 redact_bundle_value(value);
2598 }
2599 for (_, value) in action.metadata.iter_mut() {
2600 redact_bundle_value(value);
2601 }
2602 }
2603 for (_, value) in trace.metadata.iter_mut() {
2604 redact_bundle_value(value);
2605 }
2606}
2607
2608fn redact_bundle_value(value: &mut JsonValue) {
2609 match value {
2610 JsonValue::String(text) if looks_like_secret_value(text) => {
2611 *text = "[redacted]".to_string();
2612 }
2613 JsonValue::Array(items) => {
2614 for item in items {
2615 redact_bundle_value(item);
2616 }
2617 }
2618 JsonValue::Object(map) => {
2619 for (key, child) in map.iter_mut() {
2620 if is_sensitive_bundle_key(key) {
2621 *child = JsonValue::String("[redacted]".to_string());
2622 } else {
2623 redact_bundle_value(child);
2624 }
2625 }
2626 }
2627 _ => {}
2628 }
2629}
2630
2631fn is_sensitive_bundle_key(key: &str) -> bool {
2632 let lower = key.to_ascii_lowercase();
2633 lower.contains("secret")
2634 || lower.contains("token")
2635 || lower.contains("password")
2636 || lower.contains("api_key")
2637 || lower.contains("apikey")
2638 || lower == "authorization"
2639 || lower == "cookie"
2640 || lower == "set-cookie"
2641}
2642
2643fn looks_like_secret_value(value: &str) -> bool {
2644 let trimmed = value.trim();
2645 trimmed.starts_with("sk-")
2646 || trimmed.starts_with("ghp_")
2647 || trimmed.starts_with("ghs_")
2648 || trimmed.starts_with("xoxb-")
2649 || trimmed.starts_with("xoxp-")
2650 || trimmed.starts_with("AKIA")
2651 || (trimmed.len() > 48
2652 && trimmed
2653 .chars()
2654 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
2655}
2656
2657fn secret_id_looks_logical(value: &str) -> bool {
2658 !looks_like_secret_value(value) && !value.trim().is_empty()
2659}
2660
2661#[cfg(test)]
2662mod tests {
2663 use super::*;
2664
2665 fn version_trace(
2666 id: &str,
2667 version: &str,
2668 side_target: &str,
2669 fuzzy: bool,
2670 ) -> CrystallizationTrace {
2671 CrystallizationTrace {
2672 id: id.to_string(),
2673 actions: vec![
2674 CrystallizationAction {
2675 id: format!("{id}-branch"),
2676 kind: "tool_call".to_string(),
2677 name: "git.checkout_branch".to_string(),
2678 parameters: BTreeMap::from([
2679 ("repo_path".to_string(), json!(format!("/tmp/{id}"))),
2680 (
2681 "branch_name".to_string(),
2682 json!(format!("release-{version}")),
2683 ),
2684 ]),
2685 side_effects: vec![CrystallizationSideEffect {
2686 kind: "git_ref".to_string(),
2687 target: side_target.to_string(),
2688 capability: Some("git.write".to_string()),
2689 ..CrystallizationSideEffect::default()
2690 }],
2691 capabilities: vec!["git.write".to_string()],
2692 deterministic: Some(true),
2693 duration_ms: Some(20),
2694 ..CrystallizationAction::default()
2695 },
2696 CrystallizationAction {
2697 id: format!("{id}-manifest"),
2698 kind: "file_mutation".to_string(),
2699 name: "update_manifest_version".to_string(),
2700 inputs: json!({"version": version, "path": "harn.toml"}),
2701 parameters: BTreeMap::from([("version".to_string(), json!(version))]),
2702 side_effects: vec![CrystallizationSideEffect {
2703 kind: "file_write".to_string(),
2704 target: "harn.toml".to_string(),
2705 capability: Some("fs.write".to_string()),
2706 ..CrystallizationSideEffect::default()
2707 }],
2708 capabilities: vec!["fs.write".to_string()],
2709 deterministic: Some(true),
2710 ..CrystallizationAction::default()
2711 },
2712 CrystallizationAction {
2713 id: format!("{id}-release"),
2714 kind: if fuzzy { "model_call" } else { "tool_call" }.to_string(),
2715 name: "prepare_release_notes".to_string(),
2716 inputs: json!({"release_target": "crates.io", "version": version}),
2717 parameters: BTreeMap::from([
2718 ("release_target".to_string(), json!("crates.io")),
2719 ("version".to_string(), json!(version)),
2720 ]),
2721 fuzzy: Some(fuzzy),
2722 deterministic: Some(!fuzzy),
2723 cost: CrystallizationCost {
2724 model_calls: if fuzzy { 1 } else { 0 },
2725 input_tokens: if fuzzy { 1200 } else { 0 },
2726 output_tokens: if fuzzy { 250 } else { 0 },
2727 total_cost_usd: if fuzzy { 0.01 } else { 0.0 },
2728 wall_ms: 3000,
2729 ..CrystallizationCost::default()
2730 },
2731 ..CrystallizationAction::default()
2732 },
2733 ],
2734 ..CrystallizationTrace::default()
2735 }
2736 }
2737
2738 #[test]
2739 fn crystallizes_repeated_version_bump_with_parameters() {
2740 let traces = (0..5)
2741 .map(|idx| {
2742 version_trace(
2743 &format!("trace_{idx}"),
2744 &format!("0.7.{idx}"),
2745 "release-branch",
2746 false,
2747 )
2748 })
2749 .collect::<Vec<_>>();
2750
2751 let artifacts = crystallize_traces(
2752 traces,
2753 CrystallizeOptions {
2754 workflow_name: Some("version_bump".to_string()),
2755 ..CrystallizeOptions::default()
2756 },
2757 )
2758 .unwrap();
2759
2760 let candidate = &artifacts.report.candidates[0];
2761 assert!(candidate.rejection_reasons.is_empty());
2762 assert!(candidate.shadow.pass);
2763 assert_eq!(candidate.examples.len(), 5);
2764 let params = candidate
2765 .parameters
2766 .iter()
2767 .map(|param| param.name.as_str())
2768 .collect::<BTreeSet<_>>();
2769 assert!(params.contains("version"));
2770 assert!(params.contains("repo_path"));
2771 assert!(params.contains("branch_name"));
2772 assert!(artifacts.harn_code.contains("pipeline version_bump("));
2773 assert!(artifacts.eval_pack_toml.contains("crystallization-shadow"));
2774 }
2775
2776 #[test]
2777 fn rejects_divergent_side_effects() {
2778 let traces = vec![
2779 version_trace("trace_a", "0.7.1", "release-branch", false),
2780 version_trace("trace_b", "0.7.2", "main", false),
2781 version_trace("trace_c", "0.7.3", "release-branch", false),
2782 ];
2783
2784 let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2785
2786 assert!(artifacts.report.candidates.is_empty());
2787 assert_eq!(artifacts.report.rejected_candidates.len(), 1);
2788 assert!(artifacts.report.rejected_candidates[0].rejection_reasons[0]
2789 .contains("divergent side effects"));
2790 }
2791
2792 #[test]
2793 fn preserves_remaining_fuzzy_segment() {
2794 let traces = (0..3)
2795 .map(|idx| {
2796 version_trace(
2797 &format!("trace_{idx}"),
2798 &format!("0.8.{idx}"),
2799 "release-branch",
2800 true,
2801 )
2802 })
2803 .collect::<Vec<_>>();
2804
2805 let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
2806 let candidate = &artifacts.report.candidates[0];
2807
2808 assert!(candidate
2809 .steps
2810 .iter()
2811 .any(|step| step.segment == SegmentKind::Fuzzy));
2812 assert!(candidate.savings.remaining_model_calls > 0);
2813 assert!(artifacts.harn_code.contains("TODO: fuzzy segment"));
2814 }
2815
2816 fn plan_only_trace(id: &str, suffix: &str) -> CrystallizationTrace {
2817 CrystallizationTrace {
2818 id: id.to_string(),
2819 actions: vec![
2820 CrystallizationAction {
2821 id: format!("{id}-classify"),
2822 kind: "tool_call".to_string(),
2823 name: "classify_issue".to_string(),
2824 parameters: BTreeMap::from([
2825 ("issue_id".to_string(), json!(format!("HAR-{suffix}"))),
2826 ("team_key".to_string(), json!("HAR")),
2827 ]),
2828 capabilities: vec!["linear.read".to_string()],
2829 deterministic: Some(true),
2830 duration_ms: Some(15),
2831 ..CrystallizationAction::default()
2832 },
2833 CrystallizationAction {
2834 id: format!("{id}-receipt"),
2835 kind: "receipt_write".to_string(),
2836 name: "emit_receipt".to_string(),
2837 inputs: json!({"summary": format!("plan only #{suffix}"), "kind": "plan"}),
2838 parameters: BTreeMap::from([
2839 ("kind".to_string(), json!("plan")),
2840 ("summary".to_string(), json!(format!("plan only #{suffix}"))),
2841 ]),
2842 side_effects: vec![CrystallizationSideEffect {
2843 kind: "receipt_write".to_string(),
2844 target: "tenant_event_log".to_string(),
2845 capability: Some("receipt.write".to_string()),
2846 ..CrystallizationSideEffect::default()
2847 }],
2848 capabilities: vec!["receipt.write".to_string()],
2849 deterministic: Some(true),
2850 duration_ms: Some(5),
2851 ..CrystallizationAction::default()
2852 },
2853 ],
2854 ..CrystallizationTrace::default()
2855 }
2856 }
2857
2858 fn version_traces(count: usize) -> Vec<CrystallizationTrace> {
2859 (0..count)
2860 .map(|idx| {
2861 version_trace(
2862 &format!("trace_{idx}"),
2863 &format!("0.7.{idx}"),
2864 "release-branch",
2865 false,
2866 )
2867 })
2868 .collect()
2869 }
2870
2871 #[test]
2872 fn build_bundle_assembles_versioned_manifest() {
2873 let traces = version_traces(5);
2874 let artifacts = crystallize_traces(
2875 traces.clone(),
2876 CrystallizeOptions {
2877 workflow_name: Some("version_bump".to_string()),
2878 package_name: Some("release-workflows".to_string()),
2879 author: Some("ops@example.com".to_string()),
2880 approver: Some("lead@example.com".to_string()),
2881 eval_pack_link: Some("eval-pack://release-workflows/v1".to_string()),
2882 ..CrystallizeOptions::default()
2883 },
2884 )
2885 .unwrap();
2886
2887 let bundle = build_crystallization_bundle(
2888 artifacts,
2889 &traces,
2890 BundleOptions {
2891 team: Some("platform".to_string()),
2892 repo: Some("burin-labs/harn".to_string()),
2893 ..BundleOptions::default()
2894 },
2895 )
2896 .unwrap();
2897
2898 let manifest = &bundle.manifest;
2899 assert_eq!(manifest.schema, BUNDLE_SCHEMA);
2900 assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
2901 assert_eq!(manifest.kind, BundleKind::Candidate);
2902 assert!(!manifest.candidate_id.is_empty());
2903 assert_eq!(manifest.workflow.name, "version_bump");
2904 assert_eq!(manifest.workflow.package_name, "release-workflows");
2905 assert_eq!(manifest.workflow.path, BUNDLE_WORKFLOW_FILE);
2906 assert_eq!(manifest.team.as_deref(), Some("platform"));
2907 assert_eq!(manifest.repo.as_deref(), Some("burin-labs/harn"));
2908 assert_eq!(manifest.external_key, "version-bump");
2909 assert_eq!(manifest.promotion.rollout_policy, "shadow_then_canary");
2910 assert_eq!(
2911 manifest.promotion.author.as_deref(),
2912 Some("ops@example.com")
2913 );
2914 assert_eq!(
2915 manifest.promotion.approver.as_deref(),
2916 Some("lead@example.com")
2917 );
2918 assert_eq!(manifest.promotion.workflow_version, "0.1.0");
2919 assert!(manifest.deterministic_steps.len() + manifest.fuzzy_steps.len() > 0);
2920 assert_eq!(manifest.source_traces.len(), traces.len());
2921 assert_eq!(manifest.fixtures.len(), traces.len());
2922 assert!(manifest.fixtures.iter().all(|fixture| fixture.redacted));
2923 assert!(manifest.redaction.applied);
2924 assert!(manifest.redaction.fixture_count > 0);
2925 assert!(manifest
2926 .eval_pack
2927 .as_ref()
2928 .is_some_and(|eval| eval.path == BUNDLE_EVAL_PACK_FILE));
2929 assert!(manifest
2930 .required_secrets
2931 .iter()
2932 .all(|secret| !secret.is_empty()));
2933 }
2934
2935 #[test]
2936 fn write_bundle_round_trips_through_disk() {
2937 let traces = version_traces(5);
2938 let artifacts = crystallize_traces(
2939 traces.clone(),
2940 CrystallizeOptions {
2941 workflow_name: Some("version_bump".to_string()),
2942 ..CrystallizeOptions::default()
2943 },
2944 )
2945 .unwrap();
2946 let bundle =
2947 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2948
2949 let dir = tempfile::tempdir().unwrap();
2950 let written = write_crystallization_bundle(&bundle, dir.path()).unwrap();
2951 assert_eq!(written.candidate_id, bundle.manifest.candidate_id);
2952
2953 for relative in [
2955 BUNDLE_MANIFEST_FILE,
2956 BUNDLE_REPORT_FILE,
2957 BUNDLE_WORKFLOW_FILE,
2958 BUNDLE_EVAL_PACK_FILE,
2959 ] {
2960 assert!(dir.path().join(relative).exists(), "missing {relative}");
2961 }
2962 let fixtures_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
2963 assert!(fixtures_dir.is_dir());
2964 assert_eq!(
2965 std::fs::read_dir(&fixtures_dir).unwrap().count(),
2966 traces.len()
2967 );
2968
2969 let (loaded_manifest, loaded_traces) = load_crystallization_bundle(dir.path()).unwrap();
2971 assert_eq!(loaded_manifest, bundle.manifest);
2972 assert_eq!(loaded_traces.len(), traces.len());
2973
2974 let validation = validate_crystallization_bundle(dir.path()).unwrap();
2976 assert!(
2977 validation.problems.is_empty(),
2978 "unexpected problems: {:?}",
2979 validation.problems
2980 );
2981 assert!(validation.is_ok());
2982 assert!(validation.workflow_ok && validation.report_ok);
2983 assert!(validation.fixtures_ok && validation.redaction_ok);
2984
2985 let (replay_manifest, shadow) = shadow_replay_bundle(dir.path()).unwrap();
2987 assert_eq!(replay_manifest.candidate_id, bundle.manifest.candidate_id);
2988 assert!(shadow.pass, "shadow should still pass");
2989 assert_eq!(shadow.compared_traces, traces.len());
2990 }
2991
2992 #[test]
2993 fn validate_rejects_bundle_with_missing_workflow() {
2994 let traces = version_traces(3);
2995 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
2996 let bundle =
2997 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
2998
2999 let dir = tempfile::tempdir().unwrap();
3000 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3001 std::fs::remove_file(dir.path().join(BUNDLE_WORKFLOW_FILE)).unwrap();
3002
3003 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3004 assert!(!validation.is_ok());
3005 assert!(validation
3006 .problems
3007 .iter()
3008 .any(|problem| problem.contains("missing workflow file")));
3009 }
3010
3011 #[test]
3012 fn validate_rejects_bundle_with_unredacted_fixture() {
3013 let traces = version_traces(3);
3014 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3015 let mut bundle =
3016 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3017 bundle.manifest.fixtures[0].redacted = false;
3021 let dir = tempfile::tempdir().unwrap();
3022 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3023
3024 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3025 assert!(!validation.is_ok());
3026 assert!(validation
3027 .problems
3028 .iter()
3029 .any(|problem| problem.contains("not marked redacted")));
3030 }
3031
3032 #[test]
3033 fn validate_rejects_unsupported_schema_version() {
3034 let traces = version_traces(3);
3035 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3036 let mut bundle =
3037 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3038 bundle.manifest.schema_version = BUNDLE_SCHEMA_VERSION + 1;
3039 let dir = tempfile::tempdir().unwrap();
3040 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3041
3042 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3043 assert!(!validation.is_ok());
3044 assert!(validation
3045 .problems
3046 .iter()
3047 .any(|problem| problem.contains("schema_version")));
3048 }
3049
3050 #[test]
3051 fn redacts_secret_like_values_in_fixtures() {
3052 let slack_prefix = format!("{}{}", "xo", "xb-");
3056 let github_prefix = format!("{}{}", "gh", "p_");
3057 let openai_prefix = "sk-".to_string();
3058 let pad = "A".repeat(48);
3059 let slack_secret = format!("{slack_prefix}1234567890-{pad}");
3060 let github_secret = format!("{github_prefix}{pad}");
3061 let openai_secret = format!("{openai_prefix}{pad}");
3062
3063 let mut secret_action = CrystallizationAction {
3064 id: "secret".to_string(),
3065 kind: "tool_call".to_string(),
3066 name: "post_release_to_slack".to_string(),
3067 parameters: BTreeMap::from([
3068 ("slack_token".to_string(), json!(slack_secret)),
3069 ("channel".to_string(), json!("#releases")),
3070 ]),
3071 inputs: json!({
3072 "authorization": format!("Bearer {github_secret}"),
3073 "version": "0.7.1",
3074 }),
3075 ..CrystallizationAction::default()
3076 };
3077 secret_action
3078 .metadata
3079 .insert("api_key".to_string(), json!(openai_secret));
3080
3081 let mut trace = CrystallizationTrace {
3082 id: "trace_secret".to_string(),
3083 actions: vec![secret_action],
3084 ..CrystallizationTrace::default()
3085 };
3086 redact_trace_for_bundle(&mut trace);
3087 let action = &trace.actions[0];
3088 assert_eq!(
3089 action.parameters.get("slack_token"),
3090 Some(&json!("[redacted]"))
3091 );
3092 assert_eq!(action.parameters.get("channel"), Some(&json!("#releases")));
3093 let inputs = action.inputs.as_object().unwrap();
3094 assert_eq!(inputs.get("authorization"), Some(&json!("[redacted]")));
3095 assert_eq!(inputs.get("version"), Some(&json!("0.7.1")));
3096 assert_eq!(action.metadata.get("api_key"), Some(&json!("[redacted]")));
3097 }
3098
3099 #[test]
3100 fn plan_only_fixture_yields_plan_only_kind() {
3101 let traces = (0..3)
3102 .map(|idx| plan_only_trace(&format!("plan_{idx}"), &format!("{idx}")))
3103 .collect::<Vec<_>>();
3104 let artifacts = crystallize_traces(
3105 traces.clone(),
3106 CrystallizeOptions {
3107 workflow_name: Some("plan_only_triage".to_string()),
3108 ..CrystallizeOptions::default()
3109 },
3110 )
3111 .unwrap();
3112 let bundle =
3113 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3114 assert_eq!(bundle.manifest.kind, BundleKind::PlanOnly);
3115 assert_eq!(bundle.manifest.risk_level, "low");
3116 }
3117
3118 #[test]
3119 fn rejected_bundle_has_rejected_kind() {
3120 let traces = vec![
3121 version_trace("trace_a", "0.7.1", "release-branch", false),
3122 version_trace("trace_b", "0.7.2", "main", false),
3123 version_trace("trace_c", "0.7.3", "release-branch", false),
3124 ];
3125 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3126 let bundle =
3127 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3128 assert_eq!(bundle.manifest.kind, BundleKind::Rejected);
3129 assert!(bundle.manifest.candidate_id.is_empty());
3130 assert!(!bundle.manifest.rejection_reasons.is_empty());
3131 assert!(bundle.fixtures.is_empty());
3132 }
3133
3134 #[test]
3135 fn validate_round_trips_rejected_bundle() {
3136 let traces = vec![
3137 version_trace("trace_a", "0.7.1", "release-branch", false),
3138 version_trace("trace_b", "0.7.2", "main", false),
3139 version_trace("trace_c", "0.7.3", "release-branch", false),
3140 ];
3141 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3142 let bundle =
3143 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3144 let dir = tempfile::tempdir().unwrap();
3145 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3146
3147 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3148 assert!(validation.is_ok(), "{:?}", validation.problems);
3149 assert_eq!(validation.kind, BundleKind::Rejected);
3150 }
3151
3152 #[test]
3153 fn shadow_replay_fails_when_fixture_diverges() {
3154 let traces = version_traces(3);
3155 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3156 let bundle =
3157 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3158 let dir = tempfile::tempdir().unwrap();
3159 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3160
3161 let fixture_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
3165 let some_fixture = std::fs::read_dir(&fixture_dir)
3166 .unwrap()
3167 .next()
3168 .unwrap()
3169 .unwrap()
3170 .path();
3171 let mut tampered: CrystallizationTrace =
3172 serde_json::from_slice(&std::fs::read(&some_fixture).unwrap()).unwrap();
3173 tampered.actions.truncate(1);
3174 std::fs::write(&some_fixture, serde_json::to_vec_pretty(&tampered).unwrap()).unwrap();
3175
3176 let (_, shadow) = shadow_replay_bundle(dir.path()).unwrap();
3177 assert!(!shadow.pass);
3178 assert!(!shadow.failures.is_empty());
3179 }
3180}