1use std::collections::{BTreeMap, BTreeSet};
11use std::fmt::Write as _;
12use std::path::{Path, PathBuf};
13
14use serde::{Deserialize, Serialize};
15use serde_json::{json, Value as JsonValue};
16use sha2::{Digest, Sha256};
17
18use super::{
19 new_id, now_rfc3339, run_replay_oracle_trace, ReplayAllowlistRule, ReplayExpectation,
20 ReplayFixture, ReplayOracleReport, ReplayOracleTrace, ReplayTraceRun, RunRecord,
21};
22use crate::value::VmError;
23
24const TRACE_SCHEMA_VERSION: u32 = 1;
25const DEFAULT_MIN_EXAMPLES: usize = 2;
26const DEFAULT_PROMOTION_MIN_CONFIDENCE: f64 = 0.80;
27
28pub const BUNDLE_SCHEMA: &str = "harn.crystallization.candidate.bundle";
32pub const BUNDLE_SCHEMA_VERSION: u32 = 1;
36pub const BUNDLE_MANIFEST_FILE: &str = "candidate.json";
38pub const BUNDLE_REPORT_FILE: &str = "report.json";
39pub const BUNDLE_WORKFLOW_FILE: &str = "workflow.harn";
40pub const BUNDLE_EVAL_PACK_FILE: &str = "harn.eval.toml";
41pub const BUNDLE_FIXTURES_DIR: &str = "fixtures";
42
43const DEFAULT_ROLLOUT_POLICY: &str = "shadow_then_canary";
46
47#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
48#[serde(default)]
49pub struct CrystallizationTrace {
50 pub version: u32,
51 pub id: String,
52 pub source: Option<String>,
53 pub source_hash: Option<String>,
54 pub workflow_id: Option<String>,
55 pub started_at: Option<String>,
56 pub finished_at: Option<String>,
57 pub flow: Option<CrystallizationFlowRef>,
58 pub actions: Vec<CrystallizationAction>,
59 pub replay_run: Option<ReplayTraceRun>,
60 pub replay_allowlist: Vec<ReplayAllowlistRule>,
61 pub usage: CrystallizationUsage,
62 pub metadata: BTreeMap<String, JsonValue>,
63}
64
65#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
66#[serde(default)]
67pub struct CrystallizationFlowRef {
68 pub trace_id: Option<String>,
69 pub agent_run_id: Option<String>,
70 pub transcript_ref: Option<String>,
71 pub atom_ids: Vec<String>,
72 pub slice_ids: Vec<String>,
73}
74
75#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
76#[serde(default)]
77pub struct CrystallizationAction {
78 pub id: String,
79 pub kind: String,
80 pub name: String,
81 pub timestamp: Option<String>,
82 pub inputs: JsonValue,
83 pub output: Option<JsonValue>,
84 pub observed_output: Option<JsonValue>,
85 pub parameters: BTreeMap<String, JsonValue>,
86 pub side_effects: Vec<CrystallizationSideEffect>,
87 pub capabilities: Vec<String>,
88 pub required_secrets: Vec<String>,
89 pub approval: Option<CrystallizationApproval>,
90 pub cost: CrystallizationCost,
91 pub duration_ms: Option<i64>,
92 pub deterministic: Option<bool>,
93 pub fuzzy: Option<bool>,
94 pub metadata: BTreeMap<String, JsonValue>,
95}
96
97#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
98#[serde(default)]
99pub struct CrystallizationSideEffect {
100 pub kind: String,
101 pub target: String,
102 pub capability: Option<String>,
103 pub mutation: Option<String>,
104 pub metadata: BTreeMap<String, JsonValue>,
105}
106
107#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
108#[serde(default)]
109pub struct CrystallizationApproval {
110 pub prompt: Option<String>,
111 pub approver: Option<String>,
112 pub required: bool,
113 pub boundary: Option<String>,
114}
115
116#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
117#[serde(default)]
118pub struct CrystallizationCost {
119 pub model: Option<String>,
120 pub model_calls: i64,
121 pub input_tokens: i64,
122 pub output_tokens: i64,
123 pub total_cost_usd: f64,
124 pub wall_ms: i64,
125}
126
127#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
128#[serde(default)]
129pub struct CrystallizationUsage {
130 pub model_calls: i64,
131 pub input_tokens: i64,
132 pub output_tokens: i64,
133 pub total_cost_usd: f64,
134 pub wall_ms: i64,
135}
136
137#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
138#[serde(rename_all = "snake_case")]
139pub enum SegmentKind {
140 #[default]
141 Deterministic,
142 Fuzzy,
143}
144
145type SequenceExample = (usize, usize);
146type RepeatedSequence = (Vec<String>, Vec<SequenceExample>);
147
148#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
149#[serde(default)]
150pub struct WorkflowCandidateParameter {
151 pub name: String,
152 pub source_paths: Vec<String>,
153 pub examples: Vec<String>,
154 pub required: bool,
155}
156
157#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
158#[serde(default)]
159pub struct WorkflowCandidateStep {
160 pub index: usize,
161 pub kind: String,
162 pub name: String,
163 pub segment: SegmentKind,
164 pub parameter_refs: Vec<String>,
165 pub constants: BTreeMap<String, JsonValue>,
166 pub preconditions: Vec<String>,
167 pub side_effects: Vec<CrystallizationSideEffect>,
168 pub capabilities: Vec<String>,
169 pub required_secrets: Vec<String>,
170 pub approval: Option<CrystallizationApproval>,
171 pub expected_output: Option<JsonValue>,
172 pub review_notes: Vec<String>,
173}
174
175#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
176#[serde(default)]
177pub struct WorkflowCandidateExample {
178 pub trace_id: String,
179 pub source_hash: String,
180 pub start_index: usize,
181 pub action_ids: Vec<String>,
182}
183
184#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
185#[serde(default)]
186pub struct PromotionMetadata {
187 pub source_trace_hashes: Vec<String>,
188 pub author: Option<String>,
189 pub approver: Option<String>,
190 pub created_at: String,
191 pub version: String,
192 pub package_name: String,
193 pub capability_set: Vec<String>,
194 pub secrets_required: Vec<String>,
195 pub rollback_target: Option<String>,
196 pub eval_pack_link: Option<String>,
197 pub sample_count: usize,
198 pub confidence: f64,
199 pub shadow_success_count: usize,
200 pub shadow_failure_count: usize,
201 pub divergence_history: Vec<PromotionDivergenceRecord>,
202 pub approval_history: Vec<PromotionApprovalRecord>,
203 pub criteria: PromotionCriteria,
204 pub estimated_time_token_savings: SavingsEstimate,
205}
206
207#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
208#[serde(default)]
209pub struct PromotionCriteria {
210 pub min_examples: usize,
211 pub min_confidence: f64,
212 pub requires_shadow_pass: bool,
213 pub requires_no_rejections: bool,
214 pub requires_human_approval: bool,
215 pub approval_reason: Option<String>,
216 pub status: PromotionStatus,
217 pub reasons: Vec<String>,
218}
219
220#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
221#[serde(rename_all = "snake_case")]
222pub enum PromotionStatus {
223 #[default]
224 Blocked,
225 NeedsApproval,
226 Ready,
227}
228
229#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
230#[serde(default)]
231pub struct PromotionDivergenceRecord {
232 pub trace_id: String,
233 pub path: Option<String>,
234 pub message: String,
235 pub left: Option<JsonValue>,
236 pub right: Option<JsonValue>,
237}
238
239#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
240#[serde(default)]
241pub struct PromotionApprovalRecord {
242 pub actor: String,
243 pub decision: String,
244 pub recorded_at: String,
245 pub reason: Option<String>,
246}
247
248#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
249#[serde(default)]
250pub struct SavingsEstimate {
251 pub model_calls_avoided: i64,
252 pub input_tokens_avoided: i64,
253 pub output_tokens_avoided: i64,
254 pub estimated_cost_usd_avoided: f64,
255 pub wall_ms_avoided: i64,
256 pub cpu_runtime_cost_usd: f64,
257 pub remaining_model_calls: i64,
258}
259
260#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
261#[serde(default)]
262pub struct ShadowTraceResult {
263 pub trace_id: String,
264 pub source_hash: String,
265 pub pass: bool,
266 pub details: Vec<String>,
267 pub compared_receipts: usize,
268 pub replay_oracle: Option<ReplayOracleReport>,
269}
270
271#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
272#[serde(default)]
273pub struct ShadowRunReport {
274 pub pass: bool,
275 pub compared_traces: usize,
276 pub failures: Vec<String>,
277 pub traces: Vec<ShadowTraceResult>,
278}
279
280#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
281#[serde(default)]
282pub struct WorkflowClusterKey {
283 pub goal: Option<String>,
284 pub tool_sequence: Vec<String>,
285 pub touched_artifact_types: Vec<String>,
286 pub success_criteria: Vec<String>,
287}
288
289#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
290#[serde(default)]
291pub struct WorkflowCandidate {
292 pub id: String,
293 pub name: String,
294 pub confidence: f64,
295 pub cluster_key: WorkflowClusterKey,
296 pub sequence_signature: Vec<String>,
297 pub parameters: Vec<WorkflowCandidateParameter>,
298 pub steps: Vec<WorkflowCandidateStep>,
299 pub examples: Vec<WorkflowCandidateExample>,
300 pub capabilities: Vec<String>,
301 pub required_secrets: Vec<String>,
302 pub approval_points: Vec<CrystallizationApproval>,
303 pub side_effects: Vec<CrystallizationSideEffect>,
304 pub expected_outputs: Vec<JsonValue>,
305 pub expected_receipts: Vec<JsonValue>,
306 pub warnings: Vec<String>,
307 pub rejection_reasons: Vec<String>,
308 pub promotion: PromotionMetadata,
309 pub savings: SavingsEstimate,
310 pub shadow: ShadowRunReport,
311}
312
313impl WorkflowCandidate {
314 pub fn is_safe_to_propose(&self) -> bool {
315 self.rejection_reasons.is_empty()
316 }
317}
318
319#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
320#[serde(default)]
321pub struct CrystallizationReport {
322 pub version: u32,
323 pub generated_at: String,
324 pub source_trace_count: usize,
325 pub excluded_trace_count: usize,
326 pub selected_candidate_id: Option<String>,
327 pub candidates: Vec<WorkflowCandidate>,
328 pub rejected_candidates: Vec<WorkflowCandidate>,
329 pub warnings: Vec<String>,
330 pub input_format: CrystallizationInputFormat,
331 pub harn_code_path: Option<String>,
332 pub eval_pack_path: Option<String>,
333 #[serde(skip_serializing_if = "Option::is_none")]
338 pub segment_summary: Option<SegmentSummary>,
339 #[serde(skip_serializing_if = "Option::is_none")]
343 pub recovery_summary: Option<RecoveryFeedbackSummary>,
344}
345
346#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
350#[serde(default)]
351pub struct SegmentSummary {
352 pub deterministic_count: usize,
353 pub agentic_count: usize,
354 pub safe_to_automate: Vec<String>,
355 pub requires_human_review: Vec<String>,
356 pub plain_language: String,
357}
358
359#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
364#[serde(default)]
365pub struct RecoveryFeedbackSummary {
366 pub shell_failures_seen: usize,
367 pub recovery_advice_runs: usize,
368 pub failures_fed_into_agent: bool,
372 pub failed_steps: Vec<String>,
373 pub representation: String,
375}
376
377#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
378#[serde(default)]
379pub struct CrystallizationInputFormat {
380 pub name: String,
381 pub version: u32,
382 pub required_fields: Vec<String>,
383 pub preserved_fields: Vec<String>,
384}
385
386#[derive(Clone, Debug, Default)]
387pub struct CrystallizeOptions {
388 pub min_examples: usize,
389 pub shadow_traces: Vec<CrystallizationTrace>,
390 pub promotion_min_confidence: f64,
391 pub workflow_name: Option<String>,
392 pub package_name: Option<String>,
393 pub author: Option<String>,
394 pub approver: Option<String>,
395 pub eval_pack_link: Option<String>,
396}
397
398#[derive(Clone, Debug, Default)]
399pub struct CrystallizationArtifacts {
400 pub report: CrystallizationReport,
401 pub harn_code: String,
402 pub eval_pack_toml: String,
403}
404
405pub fn crystallize_traces(
406 traces: Vec<CrystallizationTrace>,
407 options: CrystallizeOptions,
408) -> Result<CrystallizationArtifacts, VmError> {
409 let min_examples = options.min_examples.max(DEFAULT_MIN_EXAMPLES);
410 if traces.len() < min_examples {
411 return Err(VmError::Runtime(format!(
412 "crystallize requires at least {min_examples} traces, got {}",
413 traces.len()
414 )));
415 }
416
417 let normalized_all = traces.into_iter().map(normalize_trace).collect::<Vec<_>>();
418 let (normalized, excluded_mining) = partition_mineable_traces(normalized_all);
419 if normalized.len() < min_examples {
420 return Err(VmError::Runtime(format!(
421 "crystallize requires at least {min_examples} eligible traces, got {} (excluded {})",
422 normalized.len(),
423 excluded_mining.len()
424 )));
425 }
426 let (shadow_traces, excluded_shadow) = partition_mineable_traces(
427 options
428 .shadow_traces
429 .iter()
430 .cloned()
431 .map(normalize_trace)
432 .collect(),
433 );
434 let mut shadow_pool = normalized.clone();
435 shadow_pool.extend(shadow_traces);
436
437 let mut candidates = mine_candidates(&normalized, min_examples, &options);
438 let mut rejected_candidates = Vec::new();
439 for candidate in &mut candidates {
440 candidate.shadow = shadow_candidate(candidate, &shadow_pool);
441 if !candidate.shadow.pass {
442 candidate
443 .rejection_reasons
444 .extend(candidate.shadow.failures.clone());
445 }
446 refresh_promotion_metadata(candidate, min_examples, &options);
447 }
448
449 let mut accepted = Vec::new();
450 for candidate in candidates {
451 if candidate.is_safe_to_propose() {
452 accepted.push(candidate);
453 } else {
454 rejected_candidates.push(candidate);
455 }
456 }
457 accepted.sort_by(|left, right| {
458 right
459 .confidence
460 .partial_cmp(&left.confidence)
461 .unwrap_or(std::cmp::Ordering::Equal)
462 .then_with(|| right.steps.len().cmp(&left.steps.len()))
463 });
464
465 let selected = accepted.first();
466 let harn_code = selected
467 .map(generate_harn_code)
468 .unwrap_or_else(|| rejected_workflow_stub(&rejected_candidates));
469 let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
470
471 let report = CrystallizationReport {
472 version: 1,
473 generated_at: now_rfc3339(),
474 source_trace_count: normalized.len(),
475 excluded_trace_count: excluded_mining.len() + excluded_shadow.len(),
476 selected_candidate_id: selected.map(|candidate| candidate.id.clone()),
477 candidates: accepted,
478 rejected_candidates,
479 warnings: Vec::new(),
480 input_format: CrystallizationInputFormat {
481 name: "harn.crystallization.trace".to_string(),
482 version: TRACE_SCHEMA_VERSION,
483 required_fields: vec!["id".to_string(), "actions".to_string()],
484 preserved_fields: vec![
485 "ordered actions".to_string(),
486 "tool calls".to_string(),
487 "model calls".to_string(),
488 "human approvals".to_string(),
489 "file mutations".to_string(),
490 "external API calls".to_string(),
491 "observed outputs".to_string(),
492 "costs".to_string(),
493 "timestamps".to_string(),
494 "source hashes".to_string(),
495 "Flow provenance references".to_string(),
496 ],
497 },
498 harn_code_path: None,
499 eval_pack_path: None,
500 segment_summary: None,
501 recovery_summary: None,
502 };
503
504 Ok(CrystallizationArtifacts {
505 report,
506 harn_code,
507 eval_pack_toml,
508 })
509}
510
511pub fn synthesize_candidate_from_trace(
524 trace: CrystallizationTrace,
525 options: CrystallizeOptions,
526 extra_parameters: Vec<WorkflowCandidateParameter>,
527 segment_summary: Option<SegmentSummary>,
528 recovery_summary: Option<RecoveryFeedbackSummary>,
529) -> Result<CrystallizationArtifacts, VmError> {
530 if trace.actions.is_empty() {
531 return Err(VmError::Runtime(
532 "synthesize_candidate_from_trace requires a trace with at least one action".to_string(),
533 ));
534 }
535 let normalized = normalize_trace(trace);
536 let mut candidate = build_single_trace_candidate(&normalized, &options, extra_parameters);
537 candidate.shadow = shadow_candidate(&candidate, std::slice::from_ref(&normalized));
538 if !candidate.shadow.pass {
539 candidate
540 .rejection_reasons
541 .extend(candidate.shadow.failures.clone());
542 }
543 refresh_promotion_metadata(&mut candidate, 1, &options);
544 let (accepted, rejected) = if candidate.is_safe_to_propose() {
545 (vec![candidate], Vec::new())
546 } else {
547 (Vec::new(), vec![candidate])
548 };
549
550 let selected = accepted.first();
551 let harn_code = selected
552 .map(generate_harn_code)
553 .unwrap_or_else(|| rejected_workflow_stub(&rejected));
554 let eval_pack_toml = selected.map(generate_eval_pack).unwrap_or_default();
555 let selected_id = selected.map(|candidate| candidate.id.clone());
556
557 let report = CrystallizationReport {
558 version: 1,
559 generated_at: now_rfc3339(),
560 source_trace_count: 1,
561 excluded_trace_count: 0,
562 selected_candidate_id: selected_id,
563 candidates: accepted,
564 rejected_candidates: rejected,
565 warnings: Vec::new(),
566 input_format: CrystallizationInputFormat {
567 name: "harn.crystallization.trace".to_string(),
568 version: TRACE_SCHEMA_VERSION,
569 required_fields: vec!["id".to_string(), "actions".to_string()],
570 preserved_fields: vec![
571 "ordered actions".to_string(),
572 "deterministic events".to_string(),
573 "agentic events".to_string(),
574 "tool/shell observations".to_string(),
575 "recovery advice".to_string(),
576 "release metadata".to_string(),
577 "source hashes".to_string(),
578 ],
579 },
580 harn_code_path: None,
581 eval_pack_path: None,
582 segment_summary,
583 recovery_summary,
584 };
585
586 Ok(CrystallizationArtifacts {
587 report,
588 harn_code,
589 eval_pack_toml,
590 })
591}
592
593fn build_single_trace_candidate(
599 trace: &CrystallizationTrace,
600 options: &CrystallizeOptions,
601 extra_parameters: Vec<WorkflowCandidateParameter>,
602) -> WorkflowCandidate {
603 let mut steps = Vec::with_capacity(trace.actions.len());
604 let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
605 let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
606 let mut warnings = Vec::new();
607 let sequence: Vec<String> = trace.actions.iter().map(action_signature).collect();
608
609 for (idx, action) in trace.actions.iter().enumerate() {
610 let mut parameter_refs = BTreeSet::new();
611 for (name, value) in &action.parameters {
612 if is_scalar(value) {
613 let ident = sanitize_identifier(name);
614 parameter_values
615 .entry(ident.clone())
616 .or_default()
617 .insert(json_scalar_string(value));
618 parameter_paths
619 .entry(ident.clone())
620 .or_default()
621 .insert(format!("steps[{idx}].parameters.{name}"));
622 parameter_refs.insert(ident);
623 }
624 }
625
626 let fuzzy = action.fuzzy.unwrap_or(false) || action.kind == "model_call";
627 if fuzzy {
628 warnings.push(format!(
629 "step {} '{}' remains fuzzy and requires review/LLM handling",
630 idx + 1,
631 action.name
632 ));
633 }
634
635 steps.push(WorkflowCandidateStep {
636 index: idx + 1,
637 kind: action.kind.clone(),
638 name: action.name.clone(),
639 segment: if fuzzy {
640 SegmentKind::Fuzzy
641 } else {
642 SegmentKind::Deterministic
643 },
644 parameter_refs: parameter_refs.into_iter().collect(),
645 constants: constants_for_action(action),
646 preconditions: preconditions_for_action(action),
647 side_effects: action.side_effects.clone(),
648 capabilities: sorted_strings(action.capabilities.iter().cloned()),
649 required_secrets: sorted_strings(action.required_secrets.iter().cloned()),
650 approval: action.approval.clone(),
651 expected_output: action
652 .observed_output
653 .clone()
654 .or_else(|| action.output.clone()),
655 review_notes: review_notes_for_action(action),
656 });
657 }
658
659 let mut parameters: Vec<WorkflowCandidateParameter> = parameter_values
660 .iter()
661 .map(|(name, values)| WorkflowCandidateParameter {
662 name: name.clone(),
663 source_paths: parameter_paths
664 .get(name)
665 .map(|paths| paths.iter().cloned().collect())
666 .unwrap_or_default(),
667 examples: values.iter().take(5).cloned().collect(),
668 required: true,
669 })
670 .collect();
671 let extra_names: BTreeSet<String> = extra_parameters.iter().map(|p| p.name.clone()).collect();
676 parameters.retain(|p| !extra_names.contains(&p.name));
677 parameters.extend(extra_parameters);
678 parameters.sort_by(|left, right| left.name.cmp(&right.name));
679
680 let example_refs = vec![WorkflowCandidateExample {
681 trace_id: trace.id.clone(),
682 source_hash: trace.source_hash.clone().unwrap_or_default(),
683 start_index: 0,
684 action_ids: trace
685 .actions
686 .iter()
687 .map(|action| action.id.clone())
688 .collect(),
689 }];
690
691 let capabilities = sorted_strings(
692 steps
693 .iter()
694 .flat_map(|step| step.capabilities.iter().cloned()),
695 );
696 let required_secrets = sorted_strings(
697 steps
698 .iter()
699 .flat_map(|step| step.required_secrets.iter().cloned()),
700 );
701 let approval_points = steps
702 .iter()
703 .filter_map(|step| step.approval.clone())
704 .collect::<Vec<_>>();
705 let side_effects = sorted_side_effects(
706 steps
707 .iter()
708 .flat_map(|step| step.side_effects.iter().cloned())
709 .collect(),
710 );
711 let expected_outputs = steps
712 .iter()
713 .filter_map(|step| step.expected_output.clone())
714 .collect::<Vec<_>>();
715 let expected_receipts = expected_receipts_for_examples(std::slice::from_ref(trace), &[(0, 0)]);
716
717 let savings = estimate_savings(std::slice::from_ref(trace), &[(0, 0)], &steps);
718 let confidence = confidence_for(&[(0, 0)], 1, &steps, true);
719 let name = options
720 .workflow_name
721 .clone()
722 .unwrap_or_else(|| infer_workflow_name(&steps));
723 let package_name = options
724 .package_name
725 .clone()
726 .unwrap_or_else(|| name.replace('_', "-"));
727
728 WorkflowCandidate {
729 id: stable_candidate_id(&sequence, &example_refs),
730 name,
731 confidence,
732 cluster_key: cluster_key_for_candidate(std::slice::from_ref(trace), &[(0, 0)], &steps),
733 sequence_signature: sequence,
734 parameters,
735 steps,
736 examples: example_refs.clone(),
737 capabilities: capabilities.clone(),
738 required_secrets: required_secrets.clone(),
739 approval_points,
740 side_effects,
741 expected_outputs,
742 expected_receipts,
743 warnings,
744 rejection_reasons: Vec::new(),
745 promotion: PromotionMetadata {
746 source_trace_hashes: example_refs
747 .iter()
748 .map(|example| example.source_hash.clone())
749 .collect(),
750 author: options.author.clone(),
751 approver: options.approver.clone(),
752 created_at: now_rfc3339(),
753 version: "0.1.0".to_string(),
754 package_name,
755 capability_set: capabilities,
756 secrets_required: required_secrets,
757 rollback_target: Some("keep source trace and previous package version".to_string()),
758 eval_pack_link: options.eval_pack_link.clone(),
759 ..PromotionMetadata::default()
760 },
761 savings,
762 shadow: ShadowRunReport::default(),
763 }
764}
765
766fn review_notes_for_action(action: &CrystallizationAction) -> Vec<String> {
767 let mut notes = Vec::new();
768 if action.kind == "shell_failure"
769 || matches!(
770 action
771 .metadata
772 .get("success")
773 .and_then(|value| value.as_bool()),
774 Some(false)
775 )
776 {
777 notes.push(format!(
778 "shell/tool step '{}' failed in the source trace; reviewer should confirm \
779 whether deterministic recovery is possible before promotion.",
780 action.name
781 ));
782 }
783 if let Some(hint) = action
784 .metadata
785 .get("recovery_hint")
786 .and_then(|value| value.as_str())
787 .filter(|hint| !hint.trim().is_empty())
788 {
789 notes.push(format!("recovery hint from source trace: {hint}"));
790 }
791 if action.kind == "agent_recovery_advice" {
792 notes.push(
793 "agent-authored recovery advice; treat as advisory, never as deterministic truth."
794 .to_string(),
795 );
796 }
797 notes
798}
799
800pub fn load_crystallization_traces_from_dir(
801 dir: &Path,
802) -> Result<Vec<CrystallizationTrace>, VmError> {
803 let mut paths = Vec::new();
804 collect_json_paths(dir, &mut paths)?;
805 if paths.is_empty() {
806 return Err(VmError::Runtime(format!(
807 "no .json trace files found under {}",
808 dir.display()
809 )));
810 }
811 paths.sort();
812 paths
813 .iter()
814 .map(|path| load_crystallization_trace(path))
815 .collect()
816}
817
818pub fn load_crystallization_trace(path: &Path) -> Result<CrystallizationTrace, VmError> {
819 let content = std::fs::read_to_string(path).map_err(|error| {
820 VmError::Runtime(format!(
821 "failed to read crystallization trace {}: {error}",
822 path.display()
823 ))
824 })?;
825 let value: JsonValue = serde_json::from_str(&content).map_err(|error| {
826 VmError::Runtime(format!(
827 "failed to parse crystallization trace {}: {error}",
828 path.display()
829 ))
830 })?;
831
832 let mut trace = if value.get("actions").is_some() {
833 serde_json::from_value::<CrystallizationTrace>(value.clone()).map_err(|error| {
834 VmError::Runtime(format!(
835 "failed to decode crystallization trace {}: {error}",
836 path.display()
837 ))
838 })?
839 } else if value.get("stages").is_some() || value.get("_type") == Some(&json!("workflow_run")) {
840 let run: RunRecord = serde_json::from_value(value.clone()).map_err(|error| {
841 VmError::Runtime(format!(
842 "failed to decode run record {} as crystallization input: {error}",
843 path.display()
844 ))
845 })?;
846 trace_from_run_record(run)
847 } else {
848 return Err(VmError::Runtime(format!(
849 "{} is neither a crystallization trace nor a workflow run record",
850 path.display()
851 )));
852 };
853 if trace.source.is_none() {
854 trace.source = Some(path.display().to_string());
855 }
856 if trace.source_hash.is_none() {
857 trace.source_hash = Some(hash_bytes(content.as_bytes()));
858 }
859 Ok(normalize_trace(trace))
860}
861
862pub fn write_crystallization_artifacts(
863 mut artifacts: CrystallizationArtifacts,
864 workflow_path: &Path,
865 report_path: &Path,
866 eval_pack_path: Option<&Path>,
867) -> Result<CrystallizationReport, VmError> {
868 crate::atomic_io::atomic_write(workflow_path, artifacts.harn_code.as_bytes()).map_err(
869 |error| {
870 VmError::Runtime(format!(
871 "failed to write generated workflow {}: {error}",
872 workflow_path.display()
873 ))
874 },
875 )?;
876
877 artifacts.report.harn_code_path = Some(workflow_path.display().to_string());
878 if let Some(path) = eval_pack_path {
879 if !artifacts.eval_pack_toml.trim().is_empty() {
880 crate::atomic_io::atomic_write(path, artifacts.eval_pack_toml.as_bytes()).map_err(
881 |error| {
882 VmError::Runtime(format!(
883 "failed to write eval pack {}: {error}",
884 path.display()
885 ))
886 },
887 )?;
888 artifacts.report.eval_pack_path = Some(path.display().to_string());
889 if let Some(candidate) = artifacts.report.candidates.first_mut() {
890 candidate.promotion.eval_pack_link = Some(path.display().to_string());
891 }
892 }
893 }
894
895 let report_json = serde_json::to_string_pretty(&artifacts.report)
896 .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
897 crate::atomic_io::atomic_write(report_path, report_json.as_bytes()).map_err(|error| {
898 VmError::Runtime(format!(
899 "failed to write crystallization report {}: {error}",
900 report_path.display()
901 ))
902 })?;
903 Ok(artifacts.report)
904}
905
906fn collect_json_paths(dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), VmError> {
907 let entries = std::fs::read_dir(dir).map_err(|error| {
908 VmError::Runtime(format!(
909 "failed to read crystallization trace dir {}: {error}",
910 dir.display()
911 ))
912 })?;
913 for entry in entries {
914 let entry = entry.map_err(|error| {
915 VmError::Runtime(format!(
916 "failed to read entry in trace dir {}: {error}",
917 dir.display()
918 ))
919 })?;
920 let path = entry.path();
921 if path.is_dir() {
922 collect_json_paths(&path, out)?;
923 } else if path.extension().and_then(|ext| ext.to_str()) == Some("json") {
924 out.push(path);
925 }
926 }
927 Ok(())
928}
929
930fn trace_from_run_record(run: RunRecord) -> CrystallizationTrace {
931 let mut actions = Vec::new();
932 for stage in &run.stages {
933 actions.push(CrystallizationAction {
934 id: stage.id.clone(),
935 kind: if stage.kind.is_empty() {
936 "stage".to_string()
937 } else {
938 stage.kind.clone()
939 },
940 name: stage.node_id.clone(),
941 timestamp: Some(stage.started_at.clone()),
942 output: stage.visible_text.as_ref().map(|text| json!(text)),
943 observed_output: stage.visible_text.as_ref().map(|text| json!(text)),
944 duration_ms: stage.usage.as_ref().map(|usage| usage.total_duration_ms),
945 cost: stage
946 .usage
947 .as_ref()
948 .map(|usage| CrystallizationCost {
949 model_calls: usage.call_count,
950 input_tokens: usage.input_tokens,
951 output_tokens: usage.output_tokens,
952 total_cost_usd: usage.total_cost,
953 wall_ms: usage.total_duration_ms,
954 model: usage.models.first().cloned(),
955 })
956 .unwrap_or_default(),
957 deterministic: Some(
958 stage
959 .usage
960 .as_ref()
961 .map(|usage| usage.call_count == 0)
962 .unwrap_or(true),
963 ),
964 fuzzy: Some(
965 stage
966 .usage
967 .as_ref()
968 .is_some_and(|usage| usage.call_count > 0),
969 ),
970 metadata: stage.metadata.clone(),
971 ..CrystallizationAction::default()
972 });
973 }
974 for tool in &run.tool_recordings {
975 actions.push(CrystallizationAction {
976 id: tool.tool_use_id.clone(),
977 kind: "tool_call".to_string(),
978 name: tool.tool_name.clone(),
979 timestamp: Some(tool.timestamp.clone()),
980 output: Some(json!(tool.result)),
981 observed_output: Some(json!(tool.result)),
982 duration_ms: Some(tool.duration_ms as i64),
983 deterministic: Some(!tool.is_rejected),
984 fuzzy: Some(false),
985 metadata: BTreeMap::from([
986 ("args_hash".to_string(), json!(tool.args_hash)),
987 ("iteration".to_string(), json!(tool.iteration)),
988 ("is_rejected".to_string(), json!(tool.is_rejected)),
989 ]),
990 ..CrystallizationAction::default()
991 });
992 }
993 for question in &run.hitl_questions {
994 actions.push(CrystallizationAction {
995 id: question.request_id.clone(),
996 kind: "human_approval".to_string(),
997 name: question.agent.clone(),
998 timestamp: Some(question.asked_at.clone()),
999 approval: Some(CrystallizationApproval {
1000 prompt: Some(question.prompt.clone()),
1001 required: true,
1002 boundary: Some("hitl".to_string()),
1003 ..CrystallizationApproval::default()
1004 }),
1005 deterministic: Some(false),
1006 fuzzy: Some(false),
1007 metadata: question
1008 .trace_id
1009 .as_ref()
1010 .map(|trace_id| BTreeMap::from([("trace_id".to_string(), json!(trace_id))]))
1011 .unwrap_or_default(),
1012 ..CrystallizationAction::default()
1013 });
1014 }
1015 actions.sort_by(|left, right| left.timestamp.cmp(&right.timestamp));
1016
1017 CrystallizationTrace {
1018 version: TRACE_SCHEMA_VERSION,
1019 id: run.id.clone(),
1020 workflow_id: Some(run.workflow_id.clone()),
1021 started_at: Some(run.started_at.clone()),
1022 finished_at: run.finished_at.clone(),
1023 actions,
1024 replay_run: run.replay_fixture.as_ref().map(replay_run_from_fixture),
1025 replay_allowlist: default_trace_replay_allowlist(),
1026 usage: run
1027 .usage
1028 .map(|usage| CrystallizationUsage {
1029 model_calls: usage.call_count,
1030 input_tokens: usage.input_tokens,
1031 output_tokens: usage.output_tokens,
1032 total_cost_usd: usage.total_cost,
1033 wall_ms: usage.total_duration_ms,
1034 })
1035 .unwrap_or_default(),
1036 metadata: run.metadata.clone(),
1037 ..CrystallizationTrace::default()
1038 }
1039}
1040
1041fn replay_run_from_fixture(fixture: &ReplayFixture) -> ReplayTraceRun {
1042 ReplayTraceRun {
1043 run_id: fixture.source_run_id.clone(),
1044 final_artifacts: fixture
1045 .stage_assertions
1046 .iter()
1047 .map(|assertion| {
1048 json!({
1049 "node_id": assertion.node_id.clone(),
1050 "expected_status": assertion.expected_status.clone(),
1051 "expected_outcome": assertion.expected_outcome.clone(),
1052 "expected_branch": assertion.expected_branch.clone(),
1053 "required_artifact_kinds": assertion.required_artifact_kinds.clone(),
1054 "visible_text_contains": assertion.visible_text_contains.clone(),
1055 })
1056 })
1057 .collect(),
1058 policy_decisions: vec![json!({
1059 "workflow_id": fixture.workflow_id.clone(),
1060 "expected_status": fixture.expected_status.clone(),
1061 "eval_kind": fixture.eval_kind.clone(),
1062 })],
1063 ..ReplayTraceRun::default()
1064 }
1065}
1066
1067fn default_trace_replay_allowlist() -> Vec<ReplayAllowlistRule> {
1068 vec![ReplayAllowlistRule {
1069 path: "/run_id".to_string(),
1070 reason: "run ids are allocated per execution".to_string(),
1071 replacement: None,
1072 }]
1073}
1074
1075fn normalize_trace(mut trace: CrystallizationTrace) -> CrystallizationTrace {
1076 if trace.version == 0 {
1077 trace.version = TRACE_SCHEMA_VERSION;
1078 }
1079 if trace.id.trim().is_empty() {
1080 trace.id = new_id("trace");
1081 }
1082 if trace.source_hash.is_none() {
1083 let payload = serde_json::to_vec(&trace.actions).unwrap_or_default();
1084 trace.source_hash = Some(hash_bytes(&payload));
1085 }
1086 for (idx, action) in trace.actions.iter_mut().enumerate() {
1087 if action.id.trim().is_empty() {
1088 action.id = format!("action_{}", idx + 1);
1089 }
1090 if action.kind.trim().is_empty() {
1091 action.kind = "action".to_string();
1092 }
1093 if action.name.trim().is_empty() {
1094 action.name = action.kind.clone();
1095 }
1096 action.capabilities.sort();
1097 action.capabilities.dedup();
1098 action.required_secrets.sort();
1099 action.required_secrets.dedup();
1100 action.side_effects = sorted_side_effects(std::mem::take(&mut action.side_effects));
1101 if action.cost.model_calls == 0 && action.kind == "model_call" {
1102 action.cost.model_calls = 1;
1103 }
1104 }
1105 if trace.usage == CrystallizationUsage::default() {
1106 for action in &trace.actions {
1107 trace.usage.model_calls += action.cost.model_calls;
1108 trace.usage.input_tokens += action.cost.input_tokens;
1109 trace.usage.output_tokens += action.cost.output_tokens;
1110 trace.usage.total_cost_usd += action.cost.total_cost_usd;
1111 trace.usage.wall_ms += action.cost.wall_ms + action.duration_ms.unwrap_or_default();
1112 }
1113 }
1114 trace
1115}
1116
1117fn partition_mineable_traces(
1118 traces: Vec<CrystallizationTrace>,
1119) -> (Vec<CrystallizationTrace>, Vec<CrystallizationTrace>) {
1120 traces
1121 .into_iter()
1122 .partition(|trace| trace_is_mineable(trace).is_ok())
1123}
1124
1125fn trace_is_mineable(trace: &CrystallizationTrace) -> Result<(), String> {
1126 if metadata_has_unresolved_policy_violation(&trace.metadata) {
1127 return Err("trace has unresolved policy violations".to_string());
1128 }
1129 for action in &trace.actions {
1130 if metadata_has_unresolved_policy_violation(&action.metadata) {
1131 return Err(format!(
1132 "action {} has unresolved policy violations",
1133 action.id
1134 ));
1135 }
1136 if action_has_nondeterministic_side_effect(action) {
1137 return Err(format!(
1138 "action {} has nondeterministic side effects",
1139 action.id
1140 ));
1141 }
1142 }
1143 Ok(())
1144}
1145
1146fn metadata_has_unresolved_policy_violation(metadata: &BTreeMap<String, JsonValue>) -> bool {
1147 for (key, value) in metadata {
1148 let lower = key.to_ascii_lowercase();
1149 if lower.contains("unresolved_policy_violation")
1150 || lower == "policy_violation_unresolved"
1151 || lower == "policy_violation"
1152 {
1153 if value.as_bool() == Some(true) {
1154 return true;
1155 }
1156 if value
1157 .as_str()
1158 .is_some_and(|text| text.eq_ignore_ascii_case("unresolved"))
1159 {
1160 return true;
1161 }
1162 if value.as_array().is_some_and(|items| !items.is_empty()) {
1163 return true;
1164 }
1165 }
1166 if lower == "policy_status"
1167 && value
1168 .as_str()
1169 .is_some_and(|text| text.eq_ignore_ascii_case("unresolved"))
1170 {
1171 return true;
1172 }
1173 }
1174 false
1175}
1176
1177fn action_has_nondeterministic_side_effect(action: &CrystallizationAction) -> bool {
1178 if action.side_effects.is_empty() {
1179 return false;
1180 }
1181 if action
1182 .metadata
1183 .get("nondeterministic_side_effect")
1184 .and_then(JsonValue::as_bool)
1185 == Some(true)
1186 {
1187 return true;
1188 }
1189 if action.deterministic == Some(false) && action.fuzzy.unwrap_or(false) {
1190 return true;
1191 }
1192 action.side_effects.iter().any(|effect| {
1193 effect
1194 .metadata
1195 .get("nondeterministic")
1196 .and_then(JsonValue::as_bool)
1197 == Some(true)
1198 })
1199}
1200
1201fn mine_candidates(
1202 traces: &[CrystallizationTrace],
1203 min_examples: usize,
1204 options: &CrystallizeOptions,
1205) -> Vec<WorkflowCandidate> {
1206 let signatures = traces
1207 .iter()
1208 .map(|trace| {
1209 trace
1210 .actions
1211 .iter()
1212 .map(action_signature)
1213 .collect::<Vec<_>>()
1214 })
1215 .collect::<Vec<_>>();
1216 let Some((sequence, examples)) = best_repeated_sequence(&signatures, min_examples) else {
1217 return Vec::new();
1218 };
1219
1220 let mut example_refs = Vec::new();
1221 for (trace_index, start_index) in &examples {
1222 let trace = &traces[*trace_index];
1223 example_refs.push(WorkflowCandidateExample {
1224 trace_id: trace.id.clone(),
1225 source_hash: trace.source_hash.clone().unwrap_or_default(),
1226 start_index: *start_index,
1227 action_ids: trace.actions[*start_index..*start_index + sequence.len()]
1228 .iter()
1229 .map(|action| action.id.clone())
1230 .collect(),
1231 });
1232 }
1233
1234 let mut steps = Vec::new();
1235 let mut parameter_values: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1236 let mut parameter_paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1237 let mut rejection_reasons = Vec::new();
1238 let mut warnings = Vec::new();
1239
1240 for step_index in 0..sequence.len() {
1241 let actions = examples
1242 .iter()
1243 .map(|(trace_index, start_index)| {
1244 &traces[*trace_index].actions[*start_index + step_index]
1245 })
1246 .collect::<Vec<_>>();
1247 let first = actions[0];
1248 if !compatible_side_effects(&actions) {
1249 rejection_reasons.push(format!(
1250 "step {} '{}' has divergent side effects across examples",
1251 step_index + 1,
1252 first.name
1253 ));
1254 }
1255
1256 let mut parameter_refs = BTreeSet::new();
1257 for action in &actions {
1258 for (name, value) in &action.parameters {
1259 if is_scalar(value) {
1260 parameter_values
1261 .entry(sanitize_identifier(name))
1262 .or_default()
1263 .insert(json_scalar_string(value));
1264 parameter_paths
1265 .entry(sanitize_identifier(name))
1266 .or_default()
1267 .insert(format!("steps[{step_index}].parameters.{name}"));
1268 parameter_refs.insert(sanitize_identifier(name));
1269 }
1270 }
1271 }
1272 collect_varying_parameters(
1273 &actions,
1274 "inputs",
1275 |action| &action.inputs,
1276 &mut parameter_values,
1277 &mut parameter_paths,
1278 &mut parameter_refs,
1279 );
1280
1281 let fuzzy = first.fuzzy.unwrap_or(false)
1282 || first.kind == "model_call"
1283 || actions.iter().any(|action| action.fuzzy.unwrap_or(false));
1284 if fuzzy {
1285 warnings.push(format!(
1286 "step {} '{}' remains fuzzy and requires review/LLM handling",
1287 step_index + 1,
1288 first.name
1289 ));
1290 }
1291
1292 steps.push(WorkflowCandidateStep {
1293 index: step_index + 1,
1294 kind: first.kind.clone(),
1295 name: first.name.clone(),
1296 segment: if fuzzy {
1297 SegmentKind::Fuzzy
1298 } else {
1299 SegmentKind::Deterministic
1300 },
1301 parameter_refs: parameter_refs.into_iter().collect(),
1302 constants: constants_for_action(first),
1303 preconditions: preconditions_for_action(first),
1304 side_effects: first.side_effects.clone(),
1305 capabilities: sorted_strings(first.capabilities.iter().cloned()),
1306 required_secrets: sorted_strings(first.required_secrets.iter().cloned()),
1307 approval: first.approval.clone(),
1308 expected_output: stable_expected_output(&actions),
1309 review_notes: Vec::new(),
1310 });
1311 }
1312
1313 let parameters = parameter_values
1314 .iter()
1315 .map(|(name, values)| WorkflowCandidateParameter {
1316 name: name.clone(),
1317 source_paths: parameter_paths
1318 .get(name)
1319 .map(|paths| paths.iter().cloned().collect())
1320 .unwrap_or_default(),
1321 examples: values.iter().take(5).cloned().collect(),
1322 required: true,
1323 })
1324 .collect::<Vec<_>>();
1325
1326 let capabilities = sorted_strings(
1327 steps
1328 .iter()
1329 .flat_map(|step| step.capabilities.iter().cloned()),
1330 );
1331 let required_secrets = sorted_strings(
1332 steps
1333 .iter()
1334 .flat_map(|step| step.required_secrets.iter().cloned()),
1335 );
1336 let approval_points = steps
1337 .iter()
1338 .filter_map(|step| step.approval.clone())
1339 .collect::<Vec<_>>();
1340 let side_effects = sorted_side_effects(
1341 steps
1342 .iter()
1343 .flat_map(|step| step.side_effects.iter().cloned())
1344 .collect(),
1345 );
1346 let expected_outputs = steps
1347 .iter()
1348 .filter_map(|step| step.expected_output.clone())
1349 .collect::<Vec<_>>();
1350 let expected_receipts = expected_receipts_for_examples(traces, &examples);
1351 let savings = estimate_savings(traces, &examples, &steps);
1352 let confidence = confidence_for(
1353 &examples,
1354 traces.len(),
1355 &steps,
1356 rejection_reasons.is_empty(),
1357 );
1358 let name = options
1359 .workflow_name
1360 .clone()
1361 .unwrap_or_else(|| infer_workflow_name(&steps));
1362 let package_name = options
1363 .package_name
1364 .clone()
1365 .unwrap_or_else(|| name.replace('_', "-"));
1366
1367 vec![WorkflowCandidate {
1368 id: stable_candidate_id(&sequence, &example_refs),
1369 name,
1370 confidence,
1371 cluster_key: cluster_key_for_candidate(traces, &examples, &steps),
1372 sequence_signature: sequence,
1373 parameters,
1374 steps,
1375 examples: example_refs.clone(),
1376 capabilities: capabilities.clone(),
1377 required_secrets: required_secrets.clone(),
1378 approval_points,
1379 side_effects,
1380 expected_outputs,
1381 expected_receipts,
1382 warnings,
1383 rejection_reasons,
1384 promotion: PromotionMetadata {
1385 source_trace_hashes: example_refs
1386 .iter()
1387 .map(|example| example.source_hash.clone())
1388 .collect(),
1389 author: options.author.clone(),
1390 approver: options.approver.clone(),
1391 created_at: now_rfc3339(),
1392 version: "0.1.0".to_string(),
1393 package_name,
1394 capability_set: capabilities,
1395 secrets_required: required_secrets,
1396 rollback_target: Some("keep source traces and previous package version".to_string()),
1397 eval_pack_link: options.eval_pack_link.clone(),
1398 ..PromotionMetadata::default()
1399 },
1400 savings,
1401 shadow: ShadowRunReport::default(),
1402 }]
1403}
1404
1405fn best_repeated_sequence(
1406 signatures: &[Vec<String>],
1407 min_examples: usize,
1408) -> Option<RepeatedSequence> {
1409 let mut occurrences: BTreeMap<Vec<String>, Vec<(usize, usize)>> = BTreeMap::new();
1410 for (trace_index, trace_signatures) in signatures.iter().enumerate() {
1411 for start in 0..trace_signatures.len() {
1412 let max_len = (trace_signatures.len() - start).min(12);
1413 for len in 2..=max_len {
1414 let sequence = trace_signatures[start..start + len].to_vec();
1415 occurrences
1416 .entry(sequence)
1417 .or_default()
1418 .push((trace_index, start));
1419 }
1420 }
1421 }
1422
1423 occurrences
1424 .into_iter()
1425 .filter_map(|(sequence, positions)| {
1426 let mut seen = BTreeSet::new();
1427 let mut examples = Vec::new();
1428 for (trace_index, start) in positions {
1429 if seen.insert(trace_index) {
1430 examples.push((trace_index, start));
1431 }
1432 }
1433 if examples.len() >= min_examples {
1434 Some((sequence, examples))
1435 } else {
1436 None
1437 }
1438 })
1439 .max_by(
1440 |(left_sequence, left_examples), (right_sequence, right_examples)| {
1441 left_examples
1442 .len()
1443 .cmp(&right_examples.len())
1444 .then_with(|| left_sequence.len().cmp(&right_sequence.len()))
1445 },
1446 )
1447}
1448
1449fn action_signature(action: &CrystallizationAction) -> String {
1450 let mut parameter_keys = action.parameters.keys().cloned().collect::<Vec<_>>();
1451 parameter_keys.sort();
1452 format!(
1453 "{}:{}:{}",
1454 action.kind,
1455 action.name,
1456 parameter_keys.join(",")
1457 )
1458}
1459
1460fn compatible_side_effects(actions: &[&CrystallizationAction]) -> bool {
1461 let first = sorted_side_effects(actions[0].side_effects.clone());
1462 actions
1463 .iter()
1464 .skip(1)
1465 .all(|action| sorted_side_effects(action.side_effects.clone()) == first)
1466}
1467
1468fn collect_varying_parameters(
1469 actions: &[&CrystallizationAction],
1470 root: &str,
1471 value_for: impl Fn(&CrystallizationAction) -> &JsonValue,
1472 parameter_values: &mut BTreeMap<String, BTreeSet<String>>,
1473 parameter_paths: &mut BTreeMap<String, BTreeSet<String>>,
1474 parameter_refs: &mut BTreeSet<String>,
1475) {
1476 let mut paths = BTreeMap::<String, Vec<JsonValue>>::new();
1477 for action in actions {
1478 collect_scalar_paths(value_for(action), root, &mut paths);
1479 }
1480 for (path, values) in paths {
1481 if values.len() != actions.len() {
1482 continue;
1483 }
1484 let unique = values
1485 .iter()
1486 .map(json_scalar_string)
1487 .collect::<BTreeSet<_>>();
1488 if unique.len() < 2 {
1489 continue;
1490 }
1491 let name = parameter_name_for_path(&path);
1492 parameter_values
1493 .entry(name.clone())
1494 .or_default()
1495 .extend(unique);
1496 parameter_paths
1497 .entry(name.clone())
1498 .or_default()
1499 .insert(path);
1500 parameter_refs.insert(name);
1501 }
1502}
1503
1504fn collect_scalar_paths(
1505 value: &JsonValue,
1506 prefix: &str,
1507 out: &mut BTreeMap<String, Vec<JsonValue>>,
1508) {
1509 match value {
1510 JsonValue::Object(map) => {
1511 for (key, child) in map {
1512 collect_scalar_paths(child, &format!("{prefix}.{key}"), out);
1513 }
1514 }
1515 JsonValue::Array(items) => {
1516 for (idx, child) in items.iter().enumerate() {
1517 collect_scalar_paths(child, &format!("{prefix}[{idx}]"), out);
1518 }
1519 }
1520 _ if is_scalar(value) => {
1521 out.entry(prefix.to_string())
1522 .or_default()
1523 .push(value.clone());
1524 }
1525 _ => {}
1526 }
1527}
1528
1529fn parameter_name_for_path(path: &str) -> String {
1530 let lower = path.to_ascii_lowercase();
1531 for (needle, name) in [
1532 ("version", "version"),
1533 ("repo_path", "repo_path"),
1534 ("repo", "repo_path"),
1535 ("branch_name", "branch_name"),
1536 ("branch", "branch_name"),
1537 ("release_target", "release_target"),
1538 ("target", "release_target"),
1539 ] {
1540 if lower.contains(needle) {
1541 return name.to_string();
1542 }
1543 }
1544 let tail = path
1545 .rsplit(['.', '['])
1546 .next()
1547 .unwrap_or("param")
1548 .trim_end_matches(']');
1549 sanitize_identifier(tail)
1550}
1551
1552fn constants_for_action(action: &CrystallizationAction) -> BTreeMap<String, JsonValue> {
1553 let mut constants = BTreeMap::new();
1554 constants.insert("kind".to_string(), json!(action.kind));
1555 constants.insert("name".to_string(), json!(action.name));
1556 if action.deterministic.unwrap_or(false) {
1557 constants.insert("deterministic".to_string(), json!(true));
1558 }
1559 constants
1560}
1561
1562fn preconditions_for_action(action: &CrystallizationAction) -> Vec<String> {
1563 let mut out = Vec::new();
1564 for capability in &action.capabilities {
1565 out.push(format!("capability '{capability}' is available"));
1566 }
1567 for secret in &action.required_secrets {
1568 out.push(format!("secret '{secret}' is configured"));
1569 }
1570 if let Some(approval) = &action.approval {
1571 if approval.required {
1572 out.push("human approval boundary is preserved".to_string());
1573 }
1574 }
1575 out
1576}
1577
1578fn stable_expected_output(actions: &[&CrystallizationAction]) -> Option<JsonValue> {
1579 let first = actions[0]
1580 .observed_output
1581 .as_ref()
1582 .or(actions[0].output.as_ref())?;
1583 if actions
1584 .iter()
1585 .all(|action| action.observed_output.as_ref().or(action.output.as_ref()) == Some(first))
1586 {
1587 Some(first.clone())
1588 } else {
1589 None
1590 }
1591}
1592
1593fn expected_receipts_for_examples(
1594 traces: &[CrystallizationTrace],
1595 examples: &[(usize, usize)],
1596) -> Vec<JsonValue> {
1597 examples
1598 .iter()
1599 .find_map(|(trace_index, _)| {
1600 traces
1601 .get(*trace_index)
1602 .and_then(|trace| trace.replay_run.as_ref())
1603 .map(|run| run.effect_receipts.clone())
1604 .filter(|receipts| !receipts.is_empty())
1605 })
1606 .unwrap_or_default()
1607}
1608
1609fn cluster_key_for_candidate(
1610 traces: &[CrystallizationTrace],
1611 examples: &[(usize, usize)],
1612 steps: &[WorkflowCandidateStep],
1613) -> WorkflowClusterKey {
1614 let goal = examples.iter().find_map(|(trace_index, _)| {
1615 traces.get(*trace_index).and_then(|trace| {
1616 trace
1617 .metadata
1618 .get("goal")
1619 .and_then(JsonValue::as_str)
1620 .map(str::to_string)
1621 .or_else(|| trace.workflow_id.clone())
1622 })
1623 });
1624 let tool_sequence = steps
1625 .iter()
1626 .filter(|step| step.kind == "tool_call" || step.kind == "file_mutation")
1627 .map(|step| step.name.clone())
1628 .collect::<Vec<_>>();
1629 let touched_artifact_types = sorted_strings(
1630 steps
1631 .iter()
1632 .flat_map(|step| step.side_effects.iter().map(artifact_type_for_side_effect)),
1633 );
1634 let success_criteria = sorted_strings(examples.iter().filter_map(|(trace_index, _)| {
1635 traces
1636 .get(*trace_index)
1637 .and_then(|trace| trace.metadata.get("success_criteria"))
1638 .and_then(JsonValue::as_str)
1639 .map(str::to_string)
1640 }));
1641
1642 WorkflowClusterKey {
1643 goal,
1644 tool_sequence,
1645 touched_artifact_types,
1646 success_criteria,
1647 }
1648}
1649
1650fn artifact_type_for_side_effect(effect: &CrystallizationSideEffect) -> String {
1651 if let Some(kind) = effect
1652 .metadata
1653 .get("artifact_type")
1654 .and_then(JsonValue::as_str)
1655 .filter(|kind| !kind.trim().is_empty())
1656 {
1657 return kind.to_string();
1658 }
1659 let lower_kind = effect.kind.to_ascii_lowercase();
1660 if lower_kind.contains("git") {
1661 "git".to_string()
1662 } else if lower_kind.contains("file") {
1663 Path::new(&effect.target)
1664 .extension()
1665 .and_then(|ext| ext.to_str())
1666 .map(|ext| format!("file:{ext}"))
1667 .unwrap_or_else(|| "file".to_string())
1668 } else if lower_kind.contains("receipt") {
1669 "receipt".to_string()
1670 } else if lower_kind.contains("publish") {
1671 "package_publish".to_string()
1672 } else if !lower_kind.is_empty() {
1673 lower_kind
1674 } else {
1675 "unknown".to_string()
1676 }
1677}
1678
1679fn refresh_promotion_metadata(
1680 candidate: &mut WorkflowCandidate,
1681 min_examples: usize,
1682 options: &CrystallizeOptions,
1683) {
1684 let min_confidence = if options.promotion_min_confidence > 0.0 {
1685 options.promotion_min_confidence
1686 } else {
1687 DEFAULT_PROMOTION_MIN_CONFIDENCE
1688 };
1689 let shadow_success_count = candidate
1690 .shadow
1691 .traces
1692 .iter()
1693 .filter(|trace| trace.pass)
1694 .count();
1695 let shadow_failure_count = candidate
1696 .shadow
1697 .traces
1698 .len()
1699 .saturating_sub(shadow_success_count);
1700 let sample_count = candidate
1701 .shadow
1702 .compared_traces
1703 .max(candidate.examples.len());
1704 let divergence_history = candidate
1705 .shadow
1706 .traces
1707 .iter()
1708 .filter(|trace| !trace.pass)
1709 .flat_map(divergence_records_for_shadow_trace)
1710 .collect::<Vec<_>>();
1711 let source_trace_hashes = sorted_strings(
1712 candidate
1713 .promotion
1714 .source_trace_hashes
1715 .iter()
1716 .cloned()
1717 .chain(
1718 candidate
1719 .shadow
1720 .traces
1721 .iter()
1722 .map(|trace| trace.source_hash.clone()),
1723 ),
1724 );
1725 let approval_history = options
1726 .approver
1727 .as_ref()
1728 .filter(|approver| !approver.trim().is_empty())
1729 .map(|approver| {
1730 vec![PromotionApprovalRecord {
1731 actor: approver.clone(),
1732 decision: "approved_for_shadow_promotion".to_string(),
1733 recorded_at: now_rfc3339(),
1734 reason: Some("approver supplied in crystallization options".to_string()),
1735 }]
1736 })
1737 .unwrap_or_default();
1738 let mut criteria = PromotionCriteria {
1739 min_examples,
1740 min_confidence,
1741 requires_shadow_pass: true,
1742 requires_no_rejections: true,
1743 requires_human_approval: true,
1744 approval_reason: Some(
1745 "workflow candidates start in shadow mode and require explicit human approval before promotion".to_string(),
1746 ),
1747 ..PromotionCriteria::default()
1748 };
1749 if sample_count < min_examples {
1750 criteria.reasons.push(format!(
1751 "sample count {} is below required minimum {min_examples}",
1752 sample_count
1753 ));
1754 }
1755 if candidate.confidence < min_confidence {
1756 criteria.reasons.push(format!(
1757 "confidence {:.2} is below required minimum {:.2}",
1758 candidate.confidence, min_confidence
1759 ));
1760 }
1761 if !candidate.shadow.pass {
1762 criteria
1763 .reasons
1764 .push("shadow comparison did not pass".to_string());
1765 }
1766 if !candidate.rejection_reasons.is_empty() {
1767 criteria
1768 .reasons
1769 .push("candidate has rejection reasons".to_string());
1770 }
1771 if approval_history.is_empty() {
1772 criteria
1773 .reasons
1774 .push("human approval has not been recorded".to_string());
1775 }
1776 criteria.status = if !candidate.rejection_reasons.is_empty()
1777 || !candidate.shadow.pass
1778 || sample_count < min_examples
1779 || candidate.confidence < min_confidence
1780 {
1781 PromotionStatus::Blocked
1782 } else if approval_history.is_empty() {
1783 PromotionStatus::NeedsApproval
1784 } else {
1785 PromotionStatus::Ready
1786 };
1787
1788 candidate.promotion.sample_count = sample_count;
1789 candidate.promotion.source_trace_hashes = source_trace_hashes;
1790 candidate.promotion.confidence = candidate.confidence;
1791 candidate.promotion.shadow_success_count = shadow_success_count;
1792 candidate.promotion.shadow_failure_count = shadow_failure_count;
1793 candidate.promotion.divergence_history = divergence_history;
1794 candidate.promotion.approval_history = approval_history;
1795 candidate.promotion.criteria = criteria;
1796 candidate.promotion.estimated_time_token_savings = candidate.savings.clone();
1797}
1798
1799fn divergence_records_for_shadow_trace(
1800 trace: &ShadowTraceResult,
1801) -> Vec<PromotionDivergenceRecord> {
1802 if let Some(report) = &trace.replay_oracle {
1803 if let Some(divergence) = &report.divergence {
1804 return vec![PromotionDivergenceRecord {
1805 trace_id: trace.trace_id.clone(),
1806 path: Some(divergence.path.clone()),
1807 message: divergence.message.clone(),
1808 left: Some(divergence.left.clone()),
1809 right: Some(divergence.right.clone()),
1810 }];
1811 }
1812 }
1813 trace
1814 .details
1815 .iter()
1816 .map(|detail| PromotionDivergenceRecord {
1817 trace_id: trace.trace_id.clone(),
1818 path: None,
1819 message: detail.clone(),
1820 left: None,
1821 right: None,
1822 })
1823 .collect()
1824}
1825
1826fn shadow_candidate(
1827 candidate: &WorkflowCandidate,
1828 traces: &[CrystallizationTrace],
1829) -> ShadowRunReport {
1830 let mut failures = Vec::new();
1831 let mut results = Vec::new();
1832 let mut compared_trace_ids = BTreeSet::new();
1833 for example in &candidate.examples {
1834 let Some(trace) = traces.iter().find(|trace| trace.id == example.trace_id) else {
1835 failures.push(format!("missing source trace {}", example.trace_id));
1836 continue;
1837 };
1838 compared_trace_ids.insert(trace.id.clone());
1839 let result = shadow_trace_result(candidate, trace, example.start_index);
1840 if !result.pass {
1841 failures.push(format!("trace {} failed shadow comparison", trace.id));
1842 }
1843 results.push(result);
1844 }
1845
1846 for trace in traces {
1847 if compared_trace_ids.contains(&trace.id) {
1848 continue;
1849 }
1850 if let Some(start_index) = find_sequence_start(trace, &candidate.sequence_signature) {
1851 compared_trace_ids.insert(trace.id.clone());
1852 let result = shadow_trace_result(candidate, trace, start_index);
1853 if !result.pass {
1854 failures.push(format!("trace {} failed shadow comparison", trace.id));
1855 }
1856 results.push(result);
1857 }
1858 }
1859
1860 ShadowRunReport {
1861 pass: failures.is_empty(),
1862 compared_traces: results.len(),
1863 failures,
1864 traces: results,
1865 }
1866}
1867
1868fn find_sequence_start(trace: &CrystallizationTrace, sequence: &[String]) -> Option<usize> {
1869 if sequence.is_empty() || trace.actions.len() < sequence.len() {
1870 return None;
1871 }
1872 trace
1873 .actions
1874 .windows(sequence.len())
1875 .position(|window| window.iter().map(action_signature).collect::<Vec<_>>() == sequence)
1876}
1877
1878fn shadow_trace_result(
1879 candidate: &WorkflowCandidate,
1880 trace: &CrystallizationTrace,
1881 start_index: usize,
1882) -> ShadowTraceResult {
1883 let mut details = Vec::new();
1884 let end = start_index + candidate.steps.len();
1885 if end > trace.actions.len() {
1886 details.push("candidate sequence extends past trace action list".to_string());
1887 } else {
1888 let signatures = trace.actions[start_index..end]
1889 .iter()
1890 .map(action_signature)
1891 .collect::<Vec<_>>();
1892 if signatures != candidate.sequence_signature {
1893 details.push("action sequence signature changed".to_string());
1894 }
1895 for (offset, step) in candidate.steps.iter().enumerate() {
1896 let action = &trace.actions[start_index + offset];
1897 if sorted_side_effects(action.side_effects.clone()) != step.side_effects {
1898 details.push(format!(
1899 "step {} side effects differ for action {}",
1900 step.index, action.id
1901 ));
1902 }
1903 if action.approval.as_ref().map(|approval| approval.required)
1904 != step.approval.as_ref().map(|approval| approval.required)
1905 {
1906 details.push(format!("step {} approval boundary differs", step.index));
1907 }
1908 if step.segment == SegmentKind::Deterministic {
1909 if let Some(expected) = &step.expected_output {
1910 let actual = action.observed_output.as_ref().or(action.output.as_ref());
1911 if actual != Some(expected) {
1912 details.push(format!("step {} deterministic output differs", step.index));
1913 }
1914 }
1915 }
1916 }
1917 }
1918
1919 let (replay_oracle, compared_receipts) = replay_oracle_for_shadow(candidate, trace);
1920 if let Some(report) = &replay_oracle {
1921 if !report.passed {
1922 if let Some(divergence) = &report.divergence {
1923 details.push(format!(
1924 "receipt replay diverged at {}: {}",
1925 divergence.path, divergence.message
1926 ));
1927 } else {
1928 details.push("receipt replay oracle did not pass".to_string());
1929 }
1930 }
1931 }
1932
1933 ShadowTraceResult {
1934 trace_id: trace.id.clone(),
1935 source_hash: trace.source_hash.clone().unwrap_or_default(),
1936 pass: details.is_empty(),
1937 details,
1938 compared_receipts,
1939 replay_oracle,
1940 }
1941}
1942
1943fn replay_oracle_for_shadow(
1944 candidate: &WorkflowCandidate,
1945 trace: &CrystallizationTrace,
1946) -> (Option<ReplayOracleReport>, usize) {
1947 let Some(first_run) = trace.replay_run.as_ref() else {
1948 return (None, 0);
1949 };
1950 if first_run.effect_receipts.is_empty() && candidate.expected_receipts.is_empty() {
1951 return (None, 0);
1952 }
1953 let mut second_run = first_run.clone();
1954 second_run.run_id = format!("shadow_{}_{}", candidate.id, trace.id);
1955 second_run.effect_receipts = candidate.expected_receipts.clone();
1956 let compared_receipts = first_run
1957 .effect_receipts
1958 .len()
1959 .max(second_run.effect_receipts.len());
1960 let oracle_trace = ReplayOracleTrace {
1961 name: format!("{}_shadow_receipts_{}", candidate.name, trace.id),
1962 description: Some(
1963 "crystallization shadow receipt comparison using the replay oracle".to_string(),
1964 ),
1965 expect: ReplayExpectation::Match,
1966 allowlist: trace.replay_allowlist.clone(),
1967 first_run: first_run.clone(),
1968 second_run,
1969 ..ReplayOracleTrace::default()
1970 };
1971 let oracle_name = oracle_trace.name.clone();
1972 let second_run_counts = oracle_trace.second_run.counts();
1973 let report = match run_replay_oracle_trace(&oracle_trace) {
1974 Ok(report) => report,
1975 Err(error) => ReplayOracleReport {
1976 name: oracle_name,
1977 expectation: ReplayExpectation::Match,
1978 passed: false,
1979 first_run_counts: first_run.counts(),
1980 second_run_counts,
1981 protocol_fixture_refs: Vec::new(),
1982 divergence: Some(super::ReplayDivergence {
1983 path: "/".to_string(),
1984 left: JsonValue::Null,
1985 right: JsonValue::Null,
1986 message: error.to_string(),
1987 }),
1988 },
1989 };
1990 (Some(report), compared_receipts)
1991}
1992
1993fn estimate_savings(
1994 traces: &[CrystallizationTrace],
1995 examples: &[(usize, usize)],
1996 steps: &[WorkflowCandidateStep],
1997) -> SavingsEstimate {
1998 let mut estimate = SavingsEstimate::default();
1999 for (trace_index, start_index) in examples {
2000 let trace = &traces[*trace_index];
2001 for action in &trace.actions[*start_index..*start_index + steps.len()] {
2002 if action.kind == "model_call" || action.fuzzy.unwrap_or(false) {
2003 estimate.remaining_model_calls += action.cost.model_calls.max(1);
2004 } else {
2005 estimate.model_calls_avoided += action.cost.model_calls;
2006 estimate.input_tokens_avoided += action.cost.input_tokens;
2007 estimate.output_tokens_avoided += action.cost.output_tokens;
2008 estimate.estimated_cost_usd_avoided += action.cost.total_cost_usd;
2009 estimate.wall_ms_avoided +=
2010 action.cost.wall_ms + action.duration_ms.unwrap_or_default();
2011 }
2012 }
2013 }
2014 estimate.cpu_runtime_cost_usd = 0.0;
2015 estimate
2016}
2017
2018fn confidence_for(
2019 examples: &[(usize, usize)],
2020 trace_count: usize,
2021 steps: &[WorkflowCandidateStep],
2022 safe: bool,
2023) -> f64 {
2024 if !safe || trace_count == 0 {
2025 return 0.0;
2026 }
2027 let coverage = examples.len() as f64 / trace_count as f64;
2028 let deterministic = steps
2029 .iter()
2030 .filter(|step| step.segment == SegmentKind::Deterministic)
2031 .count() as f64
2032 / steps.len().max(1) as f64;
2033 ((coverage * 0.65) + (deterministic * 0.35)).min(0.99)
2034}
2035
2036fn infer_workflow_name(steps: &[WorkflowCandidateStep]) -> String {
2037 let names = steps
2038 .iter()
2039 .map(|step| step.name.to_ascii_lowercase())
2040 .collect::<Vec<_>>()
2041 .join("_");
2042 if names.contains("version") || names.contains("release") {
2043 "crystallized_version_bump".to_string()
2044 } else {
2045 "crystallized_workflow".to_string()
2046 }
2047}
2048
2049pub fn generate_harn_code(candidate: &WorkflowCandidate) -> String {
2050 let mut out = String::new();
2051 let params = if candidate.parameters.is_empty() {
2052 "task".to_string()
2053 } else {
2054 candidate
2055 .parameters
2056 .iter()
2057 .map(|parameter| parameter.name.as_str())
2058 .collect::<Vec<_>>()
2059 .join(", ")
2060 };
2061 writeln!(out, "/**").unwrap();
2062 writeln!(
2063 out,
2064 " * Generated by harn crystallize. Review before promotion."
2065 )
2066 .unwrap();
2067 writeln!(out, " * Candidate: {}", candidate.id).unwrap();
2068 writeln!(
2069 out,
2070 " * Source trace hashes: {}",
2071 candidate.promotion.source_trace_hashes.join(", ")
2072 )
2073 .unwrap();
2074 writeln!(
2075 out,
2076 " * Capabilities: {}",
2077 if candidate.capabilities.is_empty() {
2078 "none".to_string()
2079 } else {
2080 candidate.capabilities.join(", ")
2081 }
2082 )
2083 .unwrap();
2084 writeln!(
2085 out,
2086 " * Required secrets: {}",
2087 if candidate.required_secrets.is_empty() {
2088 "none".to_string()
2089 } else {
2090 candidate.required_secrets.join(", ")
2091 }
2092 )
2093 .unwrap();
2094 writeln!(out, " */").unwrap();
2095 writeln!(out, "pipeline {}({}) {{", candidate.name, params).unwrap();
2096 writeln!(out, " let review_warnings = []").unwrap();
2097 for step in &candidate.steps {
2098 writeln!(out, " // Step {}: {} {}", step.index, step.kind, step.name).unwrap();
2099 for side_effect in &step.side_effects {
2100 writeln!(
2101 out,
2102 " // side_effect: {} {}",
2103 side_effect.kind, side_effect.target
2104 )
2105 .unwrap();
2106 }
2107 if let Some(approval) = &step.approval {
2108 if approval.required {
2109 writeln!(
2110 out,
2111 " // approval_required: {}",
2112 approval
2113 .boundary
2114 .clone()
2115 .unwrap_or_else(|| "human_review".to_string())
2116 )
2117 .unwrap();
2118 }
2119 }
2120 if step.segment == SegmentKind::Fuzzy {
2121 writeln!(
2122 out,
2123 " // TODO: fuzzy segment still needs LLM/reviewer handling before deterministic promotion."
2124 )
2125 .unwrap();
2126 writeln!(
2127 out,
2128 " review_warnings.push(\"fuzzy step: {}\")",
2129 escape_harn_string(&step.name)
2130 )
2131 .unwrap();
2132 }
2133 writeln!(
2134 out,
2135 " log(\"crystallized step {}: {}\")",
2136 step.index,
2137 escape_harn_string(&step.name)
2138 )
2139 .unwrap();
2140 }
2141 writeln!(
2142 out,
2143 " return {{status: \"shadow_ready\", candidate_id: \"{}\", review_warnings: review_warnings}}",
2144 escape_harn_string(&candidate.id)
2145 )
2146 .unwrap();
2147 writeln!(out, "}}").unwrap();
2148 out
2149}
2150
2151fn rejected_workflow_stub(rejected: &[WorkflowCandidate]) -> String {
2152 let mut out = String::new();
2153 writeln!(
2154 out,
2155 "// Generated by harn crystallize. No safe candidate was proposed."
2156 )
2157 .unwrap();
2158 writeln!(out, "pipeline crystallized_workflow(task) {{").unwrap();
2159 writeln!(out, " log(\"no safe crystallization candidate\")").unwrap();
2160 writeln!(
2161 out,
2162 " return {{status: \"rejected\", rejected_candidates: {}}}",
2163 rejected.len()
2164 )
2165 .unwrap();
2166 writeln!(out, "}}").unwrap();
2167 out
2168}
2169
2170pub fn generate_eval_pack(candidate: &WorkflowCandidate) -> String {
2171 let mut out = String::new();
2172 writeln!(out, "version = 1").unwrap();
2173 writeln!(
2174 out,
2175 "id = \"{}-crystallization\"",
2176 candidate.promotion.package_name
2177 )
2178 .unwrap();
2179 writeln!(
2180 out,
2181 "name = \"{} crystallization shadow evals\"",
2182 candidate.name
2183 )
2184 .unwrap();
2185 writeln!(out).unwrap();
2186 writeln!(out, "[package]").unwrap();
2187 writeln!(out, "name = \"{}\"", candidate.promotion.package_name).unwrap();
2188 writeln!(out, "version = \"{}\"", candidate.promotion.version).unwrap();
2189 writeln!(out).unwrap();
2190 for example in &candidate.examples {
2191 writeln!(out, "[[fixtures]]").unwrap();
2192 writeln!(out, "id = \"{}\"", example.trace_id).unwrap();
2193 writeln!(out, "kind = \"jsonl-trace\"").unwrap();
2194 writeln!(out, "trace_id = \"{}\"", example.trace_id).unwrap();
2195 writeln!(out).unwrap();
2196 }
2197 writeln!(out, "[[rubrics]]").unwrap();
2198 writeln!(out, "id = \"shadow-determinism\"").unwrap();
2199 writeln!(out, "kind = \"deterministic\"").unwrap();
2200 writeln!(out).unwrap();
2201 writeln!(out, "[[rubrics.assertions]]").unwrap();
2202 writeln!(out, "kind = \"crystallization-shadow\"").unwrap();
2203 writeln!(
2204 out,
2205 "expected = {{ candidate_id = \"{}\", pass = true }}",
2206 candidate.id
2207 )
2208 .unwrap();
2209 writeln!(out).unwrap();
2210 writeln!(out, "[[cases]]").unwrap();
2211 writeln!(out, "id = \"{}-shadow\"", candidate.name).unwrap();
2212 writeln!(out, "name = \"{} shadow replay\"", candidate.name).unwrap();
2213 writeln!(out, "rubrics = [\"shadow-determinism\"]").unwrap();
2214 writeln!(out, "severity = \"blocking\"").unwrap();
2215 out
2216}
2217
2218fn stable_candidate_id(sequence: &[String], examples: &[WorkflowCandidateExample]) -> String {
2219 let mut hasher = Sha256::new();
2220 for item in sequence {
2221 hasher.update(item.as_bytes());
2222 hasher.update([0]);
2223 }
2224 for example in examples {
2225 hasher.update(example.source_hash.as_bytes());
2226 hasher.update([0]);
2227 }
2228 format!("candidate_{}", hex_prefix(hasher.finalize().as_slice(), 16))
2229}
2230
2231fn hash_bytes(bytes: &[u8]) -> String {
2232 let mut hasher = Sha256::new();
2233 hasher.update(bytes);
2234 format!("sha256:{}", hex::encode(hasher.finalize()))
2235}
2236
2237fn hex_prefix(bytes: &[u8], chars: usize) -> String {
2238 hex::encode(bytes).chars().take(chars).collect::<String>()
2239}
2240
2241fn sorted_strings(items: impl Iterator<Item = String>) -> Vec<String> {
2242 let mut set = items.collect::<BTreeSet<_>>();
2243 set.retain(|item| !item.trim().is_empty());
2244 set.into_iter().collect()
2245}
2246
2247fn sorted_side_effects(items: Vec<CrystallizationSideEffect>) -> Vec<CrystallizationSideEffect> {
2248 let mut items = items
2249 .into_iter()
2250 .filter(|item| !item.kind.trim().is_empty() || !item.target.trim().is_empty())
2251 .collect::<Vec<_>>();
2252 items.sort_by_key(side_effect_sort_key);
2253 items.dedup_by(|left, right| side_effect_sort_key(left) == side_effect_sort_key(right));
2254 items
2255}
2256
2257fn side_effect_sort_key(item: &CrystallizationSideEffect) -> String {
2258 format!(
2259 "{}\x1f{}\x1f{}\x1f{}",
2260 item.kind,
2261 item.target,
2262 item.capability.clone().unwrap_or_default(),
2263 item.mutation.clone().unwrap_or_default()
2264 )
2265}
2266
2267fn is_scalar(value: &JsonValue) -> bool {
2268 matches!(
2269 value,
2270 JsonValue::String(_) | JsonValue::Number(_) | JsonValue::Bool(_) | JsonValue::Null
2271 )
2272}
2273
2274fn json_scalar_string(value: &JsonValue) -> String {
2275 match value {
2276 JsonValue::String(value) => value.clone(),
2277 other => other.to_string(),
2278 }
2279}
2280
2281fn sanitize_identifier(raw: &str) -> String {
2282 let mut out = String::new();
2283 for (idx, ch) in raw.chars().enumerate() {
2284 if ch.is_ascii_alphanumeric() || ch == '_' {
2285 if idx == 0 && ch.is_ascii_digit() {
2286 out.push('_');
2287 }
2288 out.push(ch.to_ascii_lowercase());
2289 } else if !out.ends_with('_') {
2290 out.push('_');
2291 }
2292 }
2293 let trimmed = out.trim_matches('_').to_string();
2294 if trimmed.is_empty() {
2295 "param".to_string()
2296 } else {
2297 trimmed
2298 }
2299}
2300
2301fn escape_harn_string(value: &str) -> String {
2302 value.replace('\\', "\\\\").replace('"', "\\\"")
2303}
2304
2305#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
2327#[serde(default)]
2328pub struct BundleGenerator {
2329 pub tool: String,
2330 pub version: String,
2331}
2332
2333impl Default for BundleGenerator {
2334 fn default() -> Self {
2335 Self {
2336 tool: "harn".to_string(),
2337 version: env!("CARGO_PKG_VERSION").to_string(),
2338 }
2339 }
2340}
2341
2342#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2343#[serde(default)]
2344pub struct BundleWorkflowRef {
2345 pub path: String,
2347 pub name: String,
2349 pub package_name: String,
2351 pub package_version: String,
2353}
2354
2355#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2356#[serde(default)]
2357pub struct BundleSourceTrace {
2358 pub trace_id: String,
2359 pub source_hash: String,
2360 pub source_url: Option<String>,
2363 pub source_receipt_id: Option<String>,
2367 pub fixture_path: Option<String>,
2370}
2371
2372#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
2373#[serde(default)]
2374pub struct BundleStep {
2375 pub index: usize,
2376 pub kind: String,
2377 pub name: String,
2378 pub segment: SegmentKind,
2379 pub parameter_refs: Vec<String>,
2380 pub side_effects: Vec<CrystallizationSideEffect>,
2381 pub capabilities: Vec<String>,
2382 pub required_secrets: Vec<String>,
2383 pub approval: Option<CrystallizationApproval>,
2384 pub review_notes: Vec<String>,
2385}
2386
2387impl BundleStep {
2388 fn from_candidate_step(step: &WorkflowCandidateStep) -> Self {
2389 Self {
2390 index: step.index,
2391 kind: step.kind.clone(),
2392 name: step.name.clone(),
2393 segment: step.segment.clone(),
2394 parameter_refs: step.parameter_refs.clone(),
2395 side_effects: step.side_effects.clone(),
2396 capabilities: step.capabilities.clone(),
2397 required_secrets: step.required_secrets.clone(),
2398 approval: step.approval.clone(),
2399 review_notes: step.review_notes.clone(),
2400 }
2401 }
2402}
2403
2404#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2405#[serde(default)]
2406pub struct BundleEvalPackRef {
2407 pub path: String,
2409 pub link: Option<String>,
2412}
2413
2414#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2415#[serde(default)]
2416pub struct BundleFixtureRef {
2417 pub path: String,
2418 pub trace_id: String,
2419 pub source_hash: String,
2420 pub redacted: bool,
2421}
2422
2423#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
2424#[serde(default)]
2425pub struct BundlePromotion {
2426 pub owner: Option<String>,
2427 pub approver: Option<String>,
2428 pub author: Option<String>,
2429 pub rollout_policy: String,
2432 pub rollback_target: Option<String>,
2433 pub created_at: String,
2434 pub workflow_version: String,
2435 pub package_name: String,
2436 pub sample_count: usize,
2437 pub confidence: f64,
2438 pub shadow_success_count: usize,
2439 pub shadow_failure_count: usize,
2440 pub divergence_history: Vec<PromotionDivergenceRecord>,
2441 pub approval_history: Vec<PromotionApprovalRecord>,
2442 pub criteria: PromotionCriteria,
2443 pub estimated_time_token_savings: SavingsEstimate,
2444}
2445
2446impl Default for BundlePromotion {
2447 fn default() -> Self {
2448 Self {
2449 owner: None,
2450 approver: None,
2451 author: None,
2452 rollout_policy: DEFAULT_ROLLOUT_POLICY.to_string(),
2453 rollback_target: None,
2454 created_at: String::new(),
2455 workflow_version: String::new(),
2456 package_name: String::new(),
2457 sample_count: 0,
2458 confidence: 0.0,
2459 shadow_success_count: 0,
2460 shadow_failure_count: 0,
2461 divergence_history: Vec::new(),
2462 approval_history: Vec::new(),
2463 criteria: PromotionCriteria::default(),
2464 estimated_time_token_savings: SavingsEstimate::default(),
2465 }
2466 }
2467}
2468
2469#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2470#[serde(default)]
2471pub struct BundleRedactionSummary {
2472 pub applied: bool,
2473 pub rules: Vec<String>,
2474 pub summary: String,
2475 pub fixture_count: usize,
2478}
2479
2480#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
2481#[serde(default)]
2482pub struct CrystallizationBundleManifest {
2483 pub schema: String,
2484 pub schema_version: u32,
2485 pub generated_at: String,
2486 pub generator: BundleGenerator,
2487 pub kind: BundleKind,
2488 pub candidate_id: String,
2489 pub external_key: String,
2490 pub title: String,
2491 pub team: Option<String>,
2492 pub repo: Option<String>,
2493 pub risk_level: String,
2494 pub workflow: BundleWorkflowRef,
2495 pub source_trace_hashes: Vec<String>,
2496 pub source_traces: Vec<BundleSourceTrace>,
2497 pub deterministic_steps: Vec<BundleStep>,
2498 pub fuzzy_steps: Vec<BundleStep>,
2499 pub side_effects: Vec<CrystallizationSideEffect>,
2500 pub capabilities: Vec<String>,
2501 pub required_secrets: Vec<String>,
2502 pub savings: SavingsEstimate,
2503 pub shadow: ShadowRunReport,
2504 pub eval_pack: Option<BundleEvalPackRef>,
2505 pub fixtures: Vec<BundleFixtureRef>,
2506 pub promotion: BundlePromotion,
2507 pub redaction: BundleRedactionSummary,
2508 pub confidence: f64,
2509 pub rejection_reasons: Vec<String>,
2510 pub warnings: Vec<String>,
2511}
2512
2513#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2514#[serde(rename_all = "snake_case")]
2515pub enum BundleKind {
2516 #[default]
2519 Candidate,
2520 PlanOnly,
2524 Rejected,
2528}
2529
2530#[derive(Clone, Debug, Default)]
2531pub struct BundleOptions {
2532 pub external_key: Option<String>,
2535 pub title: Option<String>,
2536 pub team: Option<String>,
2537 pub repo: Option<String>,
2538 pub risk_level: Option<String>,
2539 pub rollout_policy: Option<String>,
2540}
2541
2542#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
2543#[serde(default)]
2544pub struct CrystallizationBundle {
2545 pub manifest: CrystallizationBundleManifest,
2546 pub report: CrystallizationReport,
2547 pub harn_code: String,
2548 pub eval_pack_toml: String,
2549 pub fixtures: Vec<CrystallizationTrace>,
2550}
2551
2552#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
2554#[serde(default)]
2555pub struct BundleValidation {
2556 pub bundle_dir: String,
2557 pub schema: String,
2558 pub schema_version: u32,
2559 pub kind: BundleKind,
2560 pub candidate_id: String,
2561 pub manifest_ok: bool,
2562 pub workflow_ok: bool,
2563 pub report_ok: bool,
2564 pub eval_pack_ok: bool,
2565 pub fixtures_ok: bool,
2566 pub redaction_ok: bool,
2567 pub problems: Vec<String>,
2568}
2569
2570impl BundleValidation {
2571 pub fn is_ok(&self) -> bool {
2572 self.problems.is_empty()
2573 }
2574}
2575
2576pub fn build_crystallization_bundle(
2580 artifacts: CrystallizationArtifacts,
2581 traces: &[CrystallizationTrace],
2582 options: BundleOptions,
2583) -> Result<CrystallizationBundle, VmError> {
2584 let CrystallizationArtifacts {
2585 report,
2586 harn_code,
2587 eval_pack_toml,
2588 } = artifacts;
2589
2590 let (selected, kind) = match report
2591 .selected_candidate_id
2592 .as_deref()
2593 .and_then(|id| report.candidates.iter().find(|c| c.id == id))
2594 {
2595 Some(candidate) => {
2596 let kind = if candidate_is_plan_only(candidate) {
2597 BundleKind::PlanOnly
2598 } else {
2599 BundleKind::Candidate
2600 };
2601 (Some(candidate), kind)
2602 }
2603 None => (None, BundleKind::Rejected),
2604 };
2605
2606 let workflow_name = selected
2607 .map(|candidate| candidate.name.clone())
2608 .unwrap_or_else(|| "crystallized_workflow".to_string());
2609 let package_name = selected
2610 .map(|candidate| candidate.promotion.package_name.clone())
2611 .unwrap_or_else(|| workflow_name.replace('_', "-"));
2612 let workflow_version = selected
2613 .map(|candidate| candidate.promotion.version.clone())
2614 .unwrap_or_else(|| "0.0.0".to_string());
2615
2616 let manifest_workflow = BundleWorkflowRef {
2617 path: BUNDLE_WORKFLOW_FILE.to_string(),
2618 name: workflow_name.clone(),
2619 package_name: package_name.clone(),
2620 package_version: workflow_version.clone(),
2621 };
2622
2623 let external_key = options
2624 .external_key
2625 .clone()
2626 .filter(|key| !key.trim().is_empty())
2627 .unwrap_or_else(|| sanitize_external_key(&workflow_name));
2628 let title = options
2629 .title
2630 .clone()
2631 .filter(|title| !title.trim().is_empty())
2632 .unwrap_or_else(|| infer_bundle_title(selected, &workflow_name));
2633 let risk_level = options
2634 .risk_level
2635 .clone()
2636 .filter(|risk| !risk.trim().is_empty())
2637 .unwrap_or_else(|| infer_risk_level(selected));
2638 let rollout_policy = options
2639 .rollout_policy
2640 .clone()
2641 .filter(|policy| !policy.trim().is_empty())
2642 .unwrap_or_else(|| DEFAULT_ROLLOUT_POLICY.to_string());
2643
2644 let (deterministic_steps, fuzzy_steps) = match selected {
2645 Some(candidate) => candidate
2646 .steps
2647 .iter()
2648 .map(BundleStep::from_candidate_step)
2649 .partition::<Vec<_>, _>(|step| step.segment == SegmentKind::Deterministic),
2650 None => (Vec::new(), Vec::new()),
2651 };
2652
2653 let source_trace_hashes = selected
2654 .map(|candidate| candidate.promotion.source_trace_hashes.clone())
2655 .unwrap_or_default();
2656
2657 let mut source_traces = Vec::new();
2658 let mut fixture_refs = Vec::new();
2659 let mut fixture_payloads = Vec::new();
2660 if let Some(candidate) = selected {
2661 let mut fixture_trace_ids = BTreeSet::new();
2662 for example in &candidate.examples {
2663 fixture_trace_ids.insert(example.trace_id.clone());
2664 }
2665 for trace in traces {
2666 if find_sequence_start(trace, &candidate.sequence_signature).is_some() {
2667 fixture_trace_ids.insert(trace.id.clone());
2668 }
2669 }
2670 for trace_id in fixture_trace_ids {
2671 let trace = traces.iter().find(|trace| trace.id == trace_id);
2672 let source_hash = trace
2673 .and_then(|trace| trace.source_hash.clone())
2674 .or_else(|| {
2675 candidate
2676 .examples
2677 .iter()
2678 .find(|example| example.trace_id == trace_id)
2679 .map(|example| example.source_hash.clone())
2680 })
2681 .unwrap_or_default();
2682 let fixture_relative = trace.map(|trace| {
2683 format!(
2684 "{BUNDLE_FIXTURES_DIR}/{}.json",
2685 sanitize_fixture_name(&trace.id)
2686 )
2687 });
2688 source_traces.push(BundleSourceTrace {
2689 trace_id: trace_id.clone(),
2690 source_hash: source_hash.clone(),
2691 source_url: trace.and_then(|trace| trace.source.clone()),
2692 source_receipt_id: trace
2693 .and_then(|trace| trace.metadata.get("source_receipt_id"))
2694 .and_then(|value| value.as_str().map(str::to_string)),
2695 fixture_path: fixture_relative.clone(),
2696 });
2697 if let (Some(trace), Some(fixture_path)) = (trace, fixture_relative.clone()) {
2698 let mut redacted = trace.clone();
2699 redact_trace_for_bundle(&mut redacted);
2700 fixture_refs.push(BundleFixtureRef {
2701 path: fixture_path,
2702 trace_id: trace.id.clone(),
2703 source_hash,
2704 redacted: true,
2705 });
2706 fixture_payloads.push(redacted);
2707 }
2708 }
2709 }
2710
2711 let author = selected.and_then(|candidate| candidate.promotion.author.clone());
2715 let promotion = BundlePromotion {
2716 owner: author.clone(),
2717 approver: selected.and_then(|candidate| candidate.promotion.approver.clone()),
2718 author,
2719 rollout_policy,
2720 rollback_target: selected.and_then(|candidate| candidate.promotion.rollback_target.clone()),
2721 created_at: now_rfc3339(),
2722 workflow_version,
2723 package_name: package_name.clone(),
2724 sample_count: selected
2725 .map(|candidate| candidate.promotion.sample_count)
2726 .unwrap_or_default(),
2727 confidence: selected
2728 .map(|candidate| candidate.promotion.confidence)
2729 .unwrap_or_default(),
2730 shadow_success_count: selected
2731 .map(|candidate| candidate.promotion.shadow_success_count)
2732 .unwrap_or_default(),
2733 shadow_failure_count: selected
2734 .map(|candidate| candidate.promotion.shadow_failure_count)
2735 .unwrap_or_default(),
2736 divergence_history: selected
2737 .map(|candidate| candidate.promotion.divergence_history.clone())
2738 .unwrap_or_default(),
2739 approval_history: selected
2740 .map(|candidate| candidate.promotion.approval_history.clone())
2741 .unwrap_or_default(),
2742 criteria: selected
2743 .map(|candidate| candidate.promotion.criteria.clone())
2744 .unwrap_or_default(),
2745 estimated_time_token_savings: selected
2746 .map(|candidate| candidate.promotion.estimated_time_token_savings.clone())
2747 .unwrap_or_default(),
2748 };
2749
2750 let redaction = BundleRedactionSummary {
2751 applied: !fixture_payloads.is_empty(),
2752 rules: vec![
2753 "sensitive_keys".to_string(),
2754 "secret_value_heuristic".to_string(),
2755 ],
2756 summary: if fixture_payloads.is_empty() {
2757 "no fixtures emitted".to_string()
2758 } else {
2759 "fixture payloads scrubbed of secret-like values and sensitive keys before write"
2760 .to_string()
2761 },
2762 fixture_count: fixture_payloads.len(),
2763 };
2764
2765 let eval_pack = if eval_pack_toml.trim().is_empty() {
2766 None
2767 } else {
2768 Some(BundleEvalPackRef {
2769 path: BUNDLE_EVAL_PACK_FILE.to_string(),
2770 link: selected
2771 .and_then(|candidate| candidate.promotion.eval_pack_link.clone())
2772 .filter(|link| !link.trim().is_empty()),
2773 })
2774 };
2775
2776 let manifest = CrystallizationBundleManifest {
2777 schema: BUNDLE_SCHEMA.to_string(),
2778 schema_version: BUNDLE_SCHEMA_VERSION,
2779 generated_at: now_rfc3339(),
2780 generator: BundleGenerator::default(),
2781 kind,
2782 candidate_id: selected
2783 .map(|candidate| candidate.id.clone())
2784 .unwrap_or_default(),
2785 external_key,
2786 title,
2787 team: options.team,
2788 repo: options.repo,
2789 risk_level,
2790 workflow: manifest_workflow,
2791 source_trace_hashes,
2792 source_traces,
2793 deterministic_steps,
2794 fuzzy_steps,
2795 side_effects: selected
2796 .map(|candidate| candidate.side_effects.clone())
2797 .unwrap_or_default(),
2798 capabilities: selected
2799 .map(|candidate| candidate.capabilities.clone())
2800 .unwrap_or_default(),
2801 required_secrets: selected
2802 .map(|candidate| candidate.required_secrets.clone())
2803 .unwrap_or_default(),
2804 savings: selected
2805 .map(|candidate| candidate.savings.clone())
2806 .unwrap_or_default(),
2807 shadow: selected
2808 .map(|candidate| candidate.shadow.clone())
2809 .unwrap_or_default(),
2810 eval_pack,
2811 fixtures: fixture_refs,
2812 promotion,
2813 redaction,
2814 confidence: selected
2815 .map(|candidate| candidate.confidence)
2816 .unwrap_or(0.0),
2817 rejection_reasons: report
2818 .rejected_candidates
2819 .iter()
2820 .flat_map(|candidate| candidate.rejection_reasons.iter().cloned())
2821 .collect(),
2822 warnings: report.warnings.clone(),
2823 };
2824
2825 Ok(CrystallizationBundle {
2826 manifest,
2827 report,
2828 harn_code,
2829 eval_pack_toml,
2830 fixtures: fixture_payloads,
2831 })
2832}
2833
2834pub fn write_crystallization_bundle(
2838 bundle: &CrystallizationBundle,
2839 bundle_dir: &Path,
2840) -> Result<CrystallizationBundleManifest, VmError> {
2841 std::fs::create_dir_all(bundle_dir).map_err(|error| {
2842 VmError::Runtime(format!(
2843 "failed to create bundle dir {}: {error}",
2844 bundle_dir.display()
2845 ))
2846 })?;
2847 write_bytes(
2848 &bundle_dir.join(BUNDLE_WORKFLOW_FILE),
2849 bundle.harn_code.as_bytes(),
2850 )?;
2851 let report_json = serde_json::to_vec_pretty(&bundle.report)
2852 .map_err(|error| VmError::Runtime(format!("failed to encode report JSON: {error}")))?;
2853 write_bytes(&bundle_dir.join(BUNDLE_REPORT_FILE), &report_json)?;
2854
2855 if !bundle.eval_pack_toml.trim().is_empty() {
2856 write_bytes(
2857 &bundle_dir.join(BUNDLE_EVAL_PACK_FILE),
2858 bundle.eval_pack_toml.as_bytes(),
2859 )?;
2860 }
2861
2862 if !bundle.fixtures.is_empty() {
2863 let fixtures_dir = bundle_dir.join(BUNDLE_FIXTURES_DIR);
2864 std::fs::create_dir_all(&fixtures_dir).map_err(|error| {
2865 VmError::Runtime(format!(
2866 "failed to create fixtures dir {}: {error}",
2867 fixtures_dir.display()
2868 ))
2869 })?;
2870 for trace in &bundle.fixtures {
2871 let path = fixtures_dir.join(format!("{}.json", sanitize_fixture_name(&trace.id)));
2872 let payload = serde_json::to_vec_pretty(trace).map_err(|error| {
2873 VmError::Runtime(format!("failed to encode fixture {}: {error}", trace.id))
2874 })?;
2875 write_bytes(&path, &payload)?;
2876 }
2877 }
2878
2879 let manifest_json = serde_json::to_vec_pretty(&bundle.manifest)
2880 .map_err(|error| VmError::Runtime(format!("failed to encode manifest JSON: {error}")))?;
2881 write_bytes(&bundle_dir.join(BUNDLE_MANIFEST_FILE), &manifest_json)?;
2882 Ok(bundle.manifest.clone())
2883}
2884
2885pub fn load_crystallization_bundle_manifest(
2889 bundle_dir: &Path,
2890) -> Result<CrystallizationBundleManifest, VmError> {
2891 let manifest_path = bundle_dir.join(BUNDLE_MANIFEST_FILE);
2892 let bytes = std::fs::read(&manifest_path).map_err(|error| {
2893 VmError::Runtime(format!(
2894 "failed to read bundle manifest {}: {error}",
2895 manifest_path.display()
2896 ))
2897 })?;
2898 let manifest: CrystallizationBundleManifest =
2899 serde_json::from_slice(&bytes).map_err(|error| {
2900 VmError::Runtime(format!(
2901 "failed to decode bundle manifest {}: {error}",
2902 manifest_path.display()
2903 ))
2904 })?;
2905 if manifest.schema != BUNDLE_SCHEMA {
2906 return Err(VmError::Runtime(format!(
2907 "bundle {} has unrecognized schema {:?} (expected {})",
2908 bundle_dir.display(),
2909 manifest.schema,
2910 BUNDLE_SCHEMA
2911 )));
2912 }
2913 if manifest.schema_version > BUNDLE_SCHEMA_VERSION {
2914 return Err(VmError::Runtime(format!(
2915 "bundle {} schema_version {} is newer than supported {}",
2916 bundle_dir.display(),
2917 manifest.schema_version,
2918 BUNDLE_SCHEMA_VERSION
2919 )));
2920 }
2921 Ok(manifest)
2922}
2923
2924pub fn load_crystallization_bundle(
2928 bundle_dir: &Path,
2929) -> Result<(CrystallizationBundleManifest, Vec<CrystallizationTrace>), VmError> {
2930 let manifest = load_crystallization_bundle_manifest(bundle_dir)?;
2931 let mut traces = Vec::new();
2932 for fixture in &manifest.fixtures {
2933 let path = bundle_dir.join(&fixture.path);
2934 traces.push(load_crystallization_trace(&path)?);
2935 }
2936 Ok((manifest, traces))
2937}
2938
2939pub fn validate_crystallization_bundle(bundle_dir: &Path) -> Result<BundleValidation, VmError> {
2942 let mut validation = BundleValidation {
2943 bundle_dir: bundle_dir.display().to_string(),
2944 ..BundleValidation::default()
2945 };
2946 let manifest = match load_crystallization_bundle_manifest(bundle_dir) {
2947 Ok(manifest) => manifest,
2948 Err(error) => {
2949 validation.problems.push(error.to_string());
2950 return Ok(validation);
2951 }
2952 };
2953 validation.manifest_ok = true;
2954 validation.schema = manifest.schema.clone();
2955 validation.schema_version = manifest.schema_version;
2956 validation.kind = manifest.kind.clone();
2957 validation.candidate_id = manifest.candidate_id.clone();
2958
2959 let workflow_path = bundle_dir.join(&manifest.workflow.path);
2960 if workflow_path.exists() {
2961 validation.workflow_ok = true;
2962 } else {
2963 validation
2964 .problems
2965 .push(format!("missing workflow file {}", workflow_path.display()));
2966 }
2967
2968 let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
2969 match std::fs::read(&report_path) {
2970 Ok(bytes) => match serde_json::from_slice::<CrystallizationReport>(&bytes) {
2971 Ok(report) => {
2972 validation.report_ok = true;
2973 if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2974 && manifest.candidate_id.is_empty()
2975 {
2976 validation
2977 .problems
2978 .push("manifest is non-rejected but has empty candidate_id".to_string());
2979 }
2980 if matches!(manifest.kind, BundleKind::Candidate | BundleKind::PlanOnly)
2981 && report.selected_candidate_id.as_deref() != Some(&manifest.candidate_id)
2982 {
2983 validation.problems.push(format!(
2984 "report selected_candidate_id {:?} does not match manifest candidate_id {}",
2985 report.selected_candidate_id, manifest.candidate_id
2986 ));
2987 }
2988 }
2989 Err(error) => {
2990 validation
2991 .problems
2992 .push(format!("invalid report.json: {error}"));
2993 }
2994 },
2995 Err(error) => {
2996 validation.problems.push(format!(
2997 "missing report file {}: {error}",
2998 report_path.display()
2999 ));
3000 }
3001 }
3002
3003 if let Some(eval_pack) = &manifest.eval_pack {
3004 let path = bundle_dir.join(&eval_pack.path);
3005 if path.exists() {
3006 validation.eval_pack_ok = true;
3007 } else {
3008 validation.problems.push(format!(
3009 "manifest references eval pack {} but file is missing",
3010 path.display()
3011 ));
3012 }
3013 } else {
3014 validation.eval_pack_ok = true;
3015 }
3016
3017 let mut fixtures_problem = false;
3018 for fixture in &manifest.fixtures {
3019 let path = bundle_dir.join(&fixture.path);
3020 if !path.exists() {
3021 validation
3022 .problems
3023 .push(format!("missing fixture {}", path.display()));
3024 fixtures_problem = true;
3025 continue;
3026 }
3027 if !fixture.redacted {
3028 validation.problems.push(format!(
3029 "fixture {} is not marked redacted; bundle must not ship raw private payloads",
3030 fixture.path
3031 ));
3032 fixtures_problem = true;
3033 }
3034 }
3035 validation.fixtures_ok = !fixtures_problem;
3036
3037 if !manifest.redaction.applied && !manifest.fixtures.is_empty() {
3038 validation
3039 .problems
3040 .push("redaction.applied is false but bundle includes fixtures".to_string());
3041 } else {
3042 validation.redaction_ok = true;
3043 }
3044 if !manifest
3045 .required_secrets
3046 .iter()
3047 .all(|secret| secret_id_looks_logical(secret))
3048 {
3049 validation.problems.push(
3050 "required_secrets contains a non-logical id (looks like a raw secret)".to_string(),
3051 );
3052 }
3053
3054 Ok(validation)
3055}
3056
3057pub fn shadow_replay_bundle(
3063 bundle_dir: &Path,
3064) -> Result<(CrystallizationBundleManifest, ShadowRunReport), VmError> {
3065 let (manifest, traces) = load_crystallization_bundle(bundle_dir)?;
3066 let report_path = bundle_dir.join(BUNDLE_REPORT_FILE);
3067 let bytes = std::fs::read(&report_path).map_err(|error| {
3068 VmError::Runtime(format!(
3069 "failed to read bundle report {}: {error}",
3070 report_path.display()
3071 ))
3072 })?;
3073 let report: CrystallizationReport = serde_json::from_slice(&bytes).map_err(|error| {
3074 VmError::Runtime(format!(
3075 "failed to decode bundle report {}: {error}",
3076 report_path.display()
3077 ))
3078 })?;
3079 let candidate = report
3080 .selected_candidate_id
3081 .as_deref()
3082 .and_then(|id| report.candidates.iter().find(|c| c.id == id))
3083 .ok_or_else(|| {
3084 VmError::Runtime(format!(
3085 "bundle {} has no selected candidate to replay",
3086 bundle_dir.display()
3087 ))
3088 })?;
3089 let shadow = shadow_candidate(candidate, &traces);
3090 Ok((manifest, shadow))
3091}
3092
3093fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), VmError> {
3094 crate::atomic_io::atomic_write(path, bytes)
3095 .map_err(|error| VmError::Runtime(format!("failed to write {}: {error}", path.display())))
3096}
3097
3098fn sanitize_fixture_name(raw: &str) -> String {
3099 let cleaned = raw
3100 .chars()
3101 .map(|ch| {
3102 if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
3103 ch
3104 } else {
3105 '_'
3106 }
3107 })
3108 .collect::<String>();
3109 if cleaned.trim_matches('_').is_empty() {
3110 "trace".to_string()
3111 } else {
3112 cleaned.trim_matches('_').to_string()
3113 }
3114}
3115
3116fn sanitize_external_key(raw: &str) -> String {
3117 let mut out = String::new();
3118 let mut prev_dash = false;
3119 for ch in raw.chars() {
3120 let lowered = ch.to_ascii_lowercase();
3121 if lowered.is_ascii_alphanumeric() {
3122 out.push(lowered);
3123 prev_dash = false;
3124 } else if !prev_dash && !out.is_empty() {
3125 out.push('-');
3126 prev_dash = true;
3127 }
3128 }
3129 let trimmed = out.trim_matches('-').to_string();
3130 if trimmed.is_empty() {
3131 "crystallized-workflow".to_string()
3132 } else {
3133 trimmed
3134 }
3135}
3136
3137fn infer_bundle_title(candidate: Option<&WorkflowCandidate>, fallback_name: &str) -> String {
3138 if let Some(candidate) = candidate {
3139 format!(
3140 "{} ({} step{})",
3141 candidate.name,
3142 candidate.steps.len(),
3143 if candidate.steps.len() == 1 { "" } else { "s" }
3144 )
3145 } else {
3146 format!("rejected: {fallback_name}")
3147 }
3148}
3149
3150fn infer_risk_level(candidate: Option<&WorkflowCandidate>) -> String {
3151 let Some(candidate) = candidate else {
3152 return "high".to_string();
3153 };
3154 let touches_external = candidate.side_effects.iter().any(side_effect_is_external);
3155 let needs_secret = !candidate.required_secrets.is_empty();
3156 if touches_external && needs_secret {
3157 "high".to_string()
3158 } else if touches_external || needs_secret {
3159 "medium".to_string()
3160 } else {
3161 "low".to_string()
3162 }
3163}
3164
3165fn side_effect_is_external(effect: &CrystallizationSideEffect) -> bool {
3166 let kind = effect.kind.to_ascii_lowercase();
3167 if kind.is_empty() {
3168 return false;
3169 }
3170 let internal = kind.contains("receipt")
3174 || kind.contains("event_log")
3175 || kind.contains("memo")
3176 || kind.contains("plan");
3177 if internal {
3178 return false;
3179 }
3180 kind.contains("post")
3181 || kind.contains("write")
3182 || kind.contains("publish")
3183 || kind.contains("delete")
3184 || kind.contains("send")
3185}
3186
3187fn candidate_is_plan_only(candidate: &WorkflowCandidate) -> bool {
3188 if candidate.steps.is_empty() {
3189 return false;
3190 }
3191 candidate.side_effects.iter().all(|effect| {
3192 let kind = effect.kind.to_ascii_lowercase();
3193 kind.is_empty()
3196 || kind.contains("receipt")
3197 || kind.contains("event_log")
3198 || kind.contains("memo")
3199 || kind.contains("plan")
3200 || (kind.contains("file") && !kind.contains("publish"))
3201 })
3202}
3203
3204fn redact_trace_for_bundle(trace: &mut CrystallizationTrace) {
3205 for action in &mut trace.actions {
3206 redact_bundle_value(&mut action.inputs);
3207 if let Some(output) = action.output.as_mut() {
3208 redact_bundle_value(output);
3209 }
3210 if let Some(observed) = action.observed_output.as_mut() {
3211 redact_bundle_value(observed);
3212 }
3213 for value in action.parameters.values_mut() {
3214 redact_bundle_value(value);
3215 }
3216 for (_, value) in action.metadata.iter_mut() {
3217 redact_bundle_value(value);
3218 }
3219 }
3220 for (_, value) in trace.metadata.iter_mut() {
3221 redact_bundle_value(value);
3222 }
3223 if let Some(run) = trace.replay_run.as_mut() {
3224 redact_replay_run_for_bundle(run);
3225 }
3226}
3227
3228fn redact_replay_run_for_bundle(run: &mut ReplayTraceRun) {
3229 for value in run
3230 .event_log_entries
3231 .iter_mut()
3232 .chain(run.trigger_firings.iter_mut())
3233 .chain(run.llm_interactions.iter_mut())
3234 .chain(run.protocol_interactions.iter_mut())
3235 .chain(run.approval_interactions.iter_mut())
3236 .chain(run.effect_receipts.iter_mut())
3237 .chain(run.agent_transcript_deltas.iter_mut())
3238 .chain(run.final_artifacts.iter_mut())
3239 .chain(run.policy_decisions.iter_mut())
3240 {
3241 redact_bundle_value(value);
3242 }
3243}
3244
3245fn redact_bundle_value(value: &mut JsonValue) {
3246 match value {
3247 JsonValue::String(text) if looks_like_secret_value(text) => {
3248 *text = "[redacted]".to_string();
3249 }
3250 JsonValue::Array(items) => {
3251 for item in items {
3252 redact_bundle_value(item);
3253 }
3254 }
3255 JsonValue::Object(map) => {
3256 for (key, child) in map.iter_mut() {
3257 if is_sensitive_bundle_key(key) {
3258 *child = JsonValue::String("[redacted]".to_string());
3259 } else {
3260 redact_bundle_value(child);
3261 }
3262 }
3263 }
3264 _ => {}
3265 }
3266}
3267
3268fn is_sensitive_bundle_key(key: &str) -> bool {
3269 let lower = key.to_ascii_lowercase();
3270 lower.contains("secret")
3271 || lower.contains("token")
3272 || lower.contains("password")
3273 || lower.contains("api_key")
3274 || lower.contains("apikey")
3275 || lower == "authorization"
3276 || lower == "cookie"
3277 || lower == "set-cookie"
3278}
3279
3280fn looks_like_secret_value(value: &str) -> bool {
3281 let trimmed = value.trim();
3282 trimmed.starts_with("sk-")
3283 || trimmed.starts_with("ghp_")
3284 || trimmed.starts_with("ghs_")
3285 || trimmed.starts_with("xoxb-")
3286 || trimmed.starts_with("xoxp-")
3287 || trimmed.starts_with("AKIA")
3288 || (trimmed.len() > 48
3289 && trimmed
3290 .chars()
3291 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'))
3292}
3293
3294fn secret_id_looks_logical(value: &str) -> bool {
3295 !looks_like_secret_value(value) && !value.trim().is_empty()
3296}
3297
3298#[cfg(test)]
3299mod tests {
3300 use super::*;
3301
3302 fn version_trace(
3303 id: &str,
3304 version: &str,
3305 side_target: &str,
3306 fuzzy: bool,
3307 ) -> CrystallizationTrace {
3308 CrystallizationTrace {
3309 id: id.to_string(),
3310 actions: vec![
3311 CrystallizationAction {
3312 id: format!("{id}-branch"),
3313 kind: "tool_call".to_string(),
3314 name: "git.checkout_branch".to_string(),
3315 parameters: BTreeMap::from([
3316 ("repo_path".to_string(), json!(format!("/tmp/{id}"))),
3317 (
3318 "branch_name".to_string(),
3319 json!(format!("release-{version}")),
3320 ),
3321 ]),
3322 side_effects: vec![CrystallizationSideEffect {
3323 kind: "git_ref".to_string(),
3324 target: side_target.to_string(),
3325 capability: Some("git.write".to_string()),
3326 ..CrystallizationSideEffect::default()
3327 }],
3328 capabilities: vec!["git.write".to_string()],
3329 deterministic: Some(true),
3330 duration_ms: Some(20),
3331 ..CrystallizationAction::default()
3332 },
3333 CrystallizationAction {
3334 id: format!("{id}-manifest"),
3335 kind: "file_mutation".to_string(),
3336 name: "update_manifest_version".to_string(),
3337 inputs: json!({"version": version, "path": "harn.toml"}),
3338 parameters: BTreeMap::from([("version".to_string(), json!(version))]),
3339 side_effects: vec![CrystallizationSideEffect {
3340 kind: "file_write".to_string(),
3341 target: "harn.toml".to_string(),
3342 capability: Some("fs.write".to_string()),
3343 ..CrystallizationSideEffect::default()
3344 }],
3345 capabilities: vec!["fs.write".to_string()],
3346 deterministic: Some(true),
3347 ..CrystallizationAction::default()
3348 },
3349 CrystallizationAction {
3350 id: format!("{id}-release"),
3351 kind: if fuzzy { "model_call" } else { "tool_call" }.to_string(),
3352 name: "prepare_release_notes".to_string(),
3353 inputs: json!({"release_target": "crates.io", "version": version}),
3354 parameters: BTreeMap::from([
3355 ("release_target".to_string(), json!("crates.io")),
3356 ("version".to_string(), json!(version)),
3357 ]),
3358 fuzzy: Some(fuzzy),
3359 deterministic: Some(!fuzzy),
3360 cost: CrystallizationCost {
3361 model_calls: if fuzzy { 1 } else { 0 },
3362 input_tokens: if fuzzy { 1200 } else { 0 },
3363 output_tokens: if fuzzy { 250 } else { 0 },
3364 total_cost_usd: if fuzzy { 0.01 } else { 0.0 },
3365 wall_ms: 3000,
3366 ..CrystallizationCost::default()
3367 },
3368 ..CrystallizationAction::default()
3369 },
3370 ],
3371 ..CrystallizationTrace::default()
3372 }
3373 }
3374
3375 #[test]
3376 fn crystallizes_repeated_version_bump_with_parameters() {
3377 let traces = (0..5)
3378 .map(|idx| {
3379 version_trace(
3380 &format!("trace_{idx}"),
3381 &format!("0.7.{idx}"),
3382 "release-branch",
3383 false,
3384 )
3385 })
3386 .collect::<Vec<_>>();
3387
3388 let artifacts = crystallize_traces(
3389 traces,
3390 CrystallizeOptions {
3391 workflow_name: Some("version_bump".to_string()),
3392 ..CrystallizeOptions::default()
3393 },
3394 )
3395 .unwrap();
3396
3397 let candidate = &artifacts.report.candidates[0];
3398 assert!(candidate.rejection_reasons.is_empty());
3399 assert!(candidate.shadow.pass);
3400 assert_eq!(candidate.examples.len(), 5);
3401 let params = candidate
3402 .parameters
3403 .iter()
3404 .map(|param| param.name.as_str())
3405 .collect::<BTreeSet<_>>();
3406 assert!(params.contains("version"));
3407 assert!(params.contains("repo_path"));
3408 assert!(params.contains("branch_name"));
3409 assert!(artifacts.harn_code.contains("pipeline version_bump("));
3410 assert!(artifacts.eval_pack_toml.contains("crystallization-shadow"));
3411 }
3412
3413 #[test]
3414 fn rejects_divergent_side_effects() {
3415 let traces = vec![
3416 version_trace("trace_a", "0.7.1", "release-branch", false),
3417 version_trace("trace_b", "0.7.2", "main", false),
3418 version_trace("trace_c", "0.7.3", "release-branch", false),
3419 ];
3420
3421 let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
3422
3423 assert!(artifacts.report.candidates.is_empty());
3424 assert_eq!(artifacts.report.rejected_candidates.len(), 1);
3425 assert!(artifacts.report.rejected_candidates[0].rejection_reasons[0]
3426 .contains("divergent side effects"));
3427 }
3428
3429 #[test]
3430 fn preserves_remaining_fuzzy_segment() {
3431 let traces = (0..3)
3432 .map(|idx| {
3433 version_trace(
3434 &format!("trace_{idx}"),
3435 &format!("0.8.{idx}"),
3436 "release-branch",
3437 true,
3438 )
3439 })
3440 .collect::<Vec<_>>();
3441
3442 let artifacts = crystallize_traces(traces, CrystallizeOptions::default()).unwrap();
3443 let candidate = &artifacts.report.candidates[0];
3444
3445 assert!(candidate
3446 .steps
3447 .iter()
3448 .any(|step| step.segment == SegmentKind::Fuzzy));
3449 assert!(candidate.savings.remaining_model_calls > 0);
3450 assert!(artifacts.harn_code.contains("TODO: fuzzy segment"));
3451 }
3452
3453 #[test]
3454 fn excludes_unresolved_policy_violations_from_mining() {
3455 let mut traces = version_traces(2);
3456 let mut excluded = version_trace("trace_policy_blocked", "0.7.99", "release-branch", false);
3457 excluded
3458 .metadata
3459 .insert("unresolved_policy_violation".to_string(), json!(true));
3460 traces.push(excluded);
3461
3462 let artifacts = crystallize_traces(
3463 traces,
3464 CrystallizeOptions {
3465 min_examples: 2,
3466 ..CrystallizeOptions::default()
3467 },
3468 )
3469 .unwrap();
3470
3471 assert_eq!(artifacts.report.source_trace_count, 2);
3472 assert_eq!(artifacts.report.excluded_trace_count, 1);
3473 assert!(artifacts.report.selected_candidate_id.is_some());
3474 }
3475
3476 fn plan_only_trace(id: &str, suffix: &str) -> CrystallizationTrace {
3477 CrystallizationTrace {
3478 id: id.to_string(),
3479 actions: vec![
3480 CrystallizationAction {
3481 id: format!("{id}-classify"),
3482 kind: "tool_call".to_string(),
3483 name: "classify_issue".to_string(),
3484 parameters: BTreeMap::from([
3485 ("issue_id".to_string(), json!(format!("HAR-{suffix}"))),
3486 ("team_key".to_string(), json!("HAR")),
3487 ]),
3488 capabilities: vec!["linear.read".to_string()],
3489 deterministic: Some(true),
3490 duration_ms: Some(15),
3491 ..CrystallizationAction::default()
3492 },
3493 CrystallizationAction {
3494 id: format!("{id}-receipt"),
3495 kind: "receipt_write".to_string(),
3496 name: "emit_receipt".to_string(),
3497 inputs: json!({"summary": format!("plan only #{suffix}"), "kind": "plan"}),
3498 parameters: BTreeMap::from([
3499 ("kind".to_string(), json!("plan")),
3500 ("summary".to_string(), json!(format!("plan only #{suffix}"))),
3501 ]),
3502 side_effects: vec![CrystallizationSideEffect {
3503 kind: "receipt_write".to_string(),
3504 target: "tenant_event_log".to_string(),
3505 capability: Some("receipt.write".to_string()),
3506 ..CrystallizationSideEffect::default()
3507 }],
3508 capabilities: vec!["receipt.write".to_string()],
3509 deterministic: Some(true),
3510 duration_ms: Some(5),
3511 ..CrystallizationAction::default()
3512 },
3513 ],
3514 ..CrystallizationTrace::default()
3515 }
3516 }
3517
3518 fn version_traces(count: usize) -> Vec<CrystallizationTrace> {
3519 (0..count)
3520 .map(|idx| {
3521 version_trace(
3522 &format!("trace_{idx}"),
3523 &format!("0.7.{idx}"),
3524 "release-branch",
3525 false,
3526 )
3527 })
3528 .collect()
3529 }
3530
3531 #[test]
3532 fn build_bundle_assembles_versioned_manifest() {
3533 let traces = version_traces(5);
3534 let artifacts = crystallize_traces(
3535 traces.clone(),
3536 CrystallizeOptions {
3537 workflow_name: Some("version_bump".to_string()),
3538 package_name: Some("release-workflows".to_string()),
3539 author: Some("ops@example.com".to_string()),
3540 approver: Some("lead@example.com".to_string()),
3541 eval_pack_link: Some("eval-pack://release-workflows/v1".to_string()),
3542 ..CrystallizeOptions::default()
3543 },
3544 )
3545 .unwrap();
3546
3547 let bundle = build_crystallization_bundle(
3548 artifacts,
3549 &traces,
3550 BundleOptions {
3551 team: Some("platform".to_string()),
3552 repo: Some("burin-labs/harn".to_string()),
3553 ..BundleOptions::default()
3554 },
3555 )
3556 .unwrap();
3557
3558 let manifest = &bundle.manifest;
3559 assert_eq!(manifest.schema, BUNDLE_SCHEMA);
3560 assert_eq!(manifest.schema_version, BUNDLE_SCHEMA_VERSION);
3561 assert_eq!(manifest.kind, BundleKind::Candidate);
3562 assert!(!manifest.candidate_id.is_empty());
3563 assert_eq!(manifest.workflow.name, "version_bump");
3564 assert_eq!(manifest.workflow.package_name, "release-workflows");
3565 assert_eq!(manifest.workflow.path, BUNDLE_WORKFLOW_FILE);
3566 assert_eq!(manifest.team.as_deref(), Some("platform"));
3567 assert_eq!(manifest.repo.as_deref(), Some("burin-labs/harn"));
3568 assert_eq!(manifest.external_key, "version-bump");
3569 assert_eq!(manifest.promotion.rollout_policy, "shadow_then_canary");
3570 assert_eq!(
3571 manifest.promotion.author.as_deref(),
3572 Some("ops@example.com")
3573 );
3574 assert_eq!(
3575 manifest.promotion.approver.as_deref(),
3576 Some("lead@example.com")
3577 );
3578 assert_eq!(manifest.promotion.workflow_version, "0.1.0");
3579 assert!(manifest.deterministic_steps.len() + manifest.fuzzy_steps.len() > 0);
3580 assert_eq!(manifest.source_traces.len(), traces.len());
3581 assert_eq!(manifest.fixtures.len(), traces.len());
3582 assert!(manifest.fixtures.iter().all(|fixture| fixture.redacted));
3583 assert!(manifest.redaction.applied);
3584 assert!(manifest.redaction.fixture_count > 0);
3585 assert!(manifest
3586 .eval_pack
3587 .as_ref()
3588 .is_some_and(|eval| eval.path == BUNDLE_EVAL_PACK_FILE));
3589 assert!(manifest
3590 .required_secrets
3591 .iter()
3592 .all(|secret| !secret.is_empty()));
3593 }
3594
3595 #[test]
3596 fn write_bundle_round_trips_through_disk() {
3597 let traces = version_traces(5);
3598 let artifacts = crystallize_traces(
3599 traces.clone(),
3600 CrystallizeOptions {
3601 workflow_name: Some("version_bump".to_string()),
3602 ..CrystallizeOptions::default()
3603 },
3604 )
3605 .unwrap();
3606 let bundle =
3607 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3608
3609 let dir = tempfile::tempdir().unwrap();
3610 let written = write_crystallization_bundle(&bundle, dir.path()).unwrap();
3611 assert_eq!(written.candidate_id, bundle.manifest.candidate_id);
3612
3613 for relative in [
3615 BUNDLE_MANIFEST_FILE,
3616 BUNDLE_REPORT_FILE,
3617 BUNDLE_WORKFLOW_FILE,
3618 BUNDLE_EVAL_PACK_FILE,
3619 ] {
3620 assert!(dir.path().join(relative).exists(), "missing {relative}");
3621 }
3622 let fixtures_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
3623 assert!(fixtures_dir.is_dir());
3624 assert_eq!(
3625 std::fs::read_dir(&fixtures_dir).unwrap().count(),
3626 traces.len()
3627 );
3628
3629 let (loaded_manifest, loaded_traces) = load_crystallization_bundle(dir.path()).unwrap();
3631 assert_eq!(loaded_manifest, bundle.manifest);
3632 assert_eq!(loaded_traces.len(), traces.len());
3633
3634 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3636 assert!(
3637 validation.problems.is_empty(),
3638 "unexpected problems: {:?}",
3639 validation.problems
3640 );
3641 assert!(validation.is_ok());
3642 assert!(validation.workflow_ok && validation.report_ok);
3643 assert!(validation.fixtures_ok && validation.redaction_ok);
3644
3645 let (replay_manifest, shadow) = shadow_replay_bundle(dir.path()).unwrap();
3647 assert_eq!(replay_manifest.candidate_id, bundle.manifest.candidate_id);
3648 assert!(shadow.pass, "shadow should still pass");
3649 assert_eq!(shadow.compared_traces, traces.len());
3650 }
3651
3652 #[test]
3653 fn validate_rejects_bundle_with_missing_workflow() {
3654 let traces = version_traces(3);
3655 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3656 let bundle =
3657 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3658
3659 let dir = tempfile::tempdir().unwrap();
3660 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3661 std::fs::remove_file(dir.path().join(BUNDLE_WORKFLOW_FILE)).unwrap();
3662
3663 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3664 assert!(!validation.is_ok());
3665 assert!(validation
3666 .problems
3667 .iter()
3668 .any(|problem| problem.contains("missing workflow file")));
3669 }
3670
3671 #[test]
3672 fn validate_rejects_bundle_with_unredacted_fixture() {
3673 let traces = version_traces(3);
3674 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3675 let mut bundle =
3676 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3677 bundle.manifest.fixtures[0].redacted = false;
3681 let dir = tempfile::tempdir().unwrap();
3682 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3683
3684 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3685 assert!(!validation.is_ok());
3686 assert!(validation
3687 .problems
3688 .iter()
3689 .any(|problem| problem.contains("not marked redacted")));
3690 }
3691
3692 #[test]
3693 fn validate_rejects_unsupported_schema_version() {
3694 let traces = version_traces(3);
3695 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3696 let mut bundle =
3697 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3698 bundle.manifest.schema_version = BUNDLE_SCHEMA_VERSION + 1;
3699 let dir = tempfile::tempdir().unwrap();
3700 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3701
3702 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3703 assert!(!validation.is_ok());
3704 assert!(validation
3705 .problems
3706 .iter()
3707 .any(|problem| problem.contains("schema_version")));
3708 }
3709
3710 #[test]
3711 fn redacts_secret_like_values_in_fixtures() {
3712 let slack_prefix = format!("{}{}", "xo", "xb-");
3716 let github_prefix = format!("{}{}", "gh", "p_");
3717 let openai_prefix = "sk-".to_string();
3718 let pad = "A".repeat(48);
3719 let slack_secret = format!("{slack_prefix}1234567890-{pad}");
3720 let github_secret = format!("{github_prefix}{pad}");
3721 let openai_secret = format!("{openai_prefix}{pad}");
3722
3723 let mut secret_action = CrystallizationAction {
3724 id: "secret".to_string(),
3725 kind: "tool_call".to_string(),
3726 name: "post_release_to_slack".to_string(),
3727 parameters: BTreeMap::from([
3728 ("slack_token".to_string(), json!(slack_secret)),
3729 ("channel".to_string(), json!("#releases")),
3730 ]),
3731 inputs: json!({
3732 "authorization": format!("Bearer {github_secret}"),
3733 "version": "0.7.1",
3734 }),
3735 ..CrystallizationAction::default()
3736 };
3737 secret_action
3738 .metadata
3739 .insert("api_key".to_string(), json!(openai_secret));
3740
3741 let mut trace = CrystallizationTrace {
3742 id: "trace_secret".to_string(),
3743 actions: vec![secret_action],
3744 ..CrystallizationTrace::default()
3745 };
3746 redact_trace_for_bundle(&mut trace);
3747 let action = &trace.actions[0];
3748 assert_eq!(
3749 action.parameters.get("slack_token"),
3750 Some(&json!("[redacted]"))
3751 );
3752 assert_eq!(action.parameters.get("channel"), Some(&json!("#releases")));
3753 let inputs = action.inputs.as_object().unwrap();
3754 assert_eq!(inputs.get("authorization"), Some(&json!("[redacted]")));
3755 assert_eq!(inputs.get("version"), Some(&json!("0.7.1")));
3756 assert_eq!(action.metadata.get("api_key"), Some(&json!("[redacted]")));
3757 }
3758
3759 #[test]
3760 fn plan_only_fixture_yields_plan_only_kind() {
3761 let traces = (0..3)
3762 .map(|idx| plan_only_trace(&format!("plan_{idx}"), &format!("{idx}")))
3763 .collect::<Vec<_>>();
3764 let artifacts = crystallize_traces(
3765 traces.clone(),
3766 CrystallizeOptions {
3767 workflow_name: Some("plan_only_triage".to_string()),
3768 ..CrystallizeOptions::default()
3769 },
3770 )
3771 .unwrap();
3772 let bundle =
3773 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3774 assert_eq!(bundle.manifest.kind, BundleKind::PlanOnly);
3775 assert_eq!(bundle.manifest.risk_level, "low");
3776 }
3777
3778 #[test]
3779 fn rejected_bundle_has_rejected_kind() {
3780 let traces = vec![
3781 version_trace("trace_a", "0.7.1", "release-branch", false),
3782 version_trace("trace_b", "0.7.2", "main", false),
3783 version_trace("trace_c", "0.7.3", "release-branch", false),
3784 ];
3785 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3786 let bundle =
3787 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3788 assert_eq!(bundle.manifest.kind, BundleKind::Rejected);
3789 assert!(bundle.manifest.candidate_id.is_empty());
3790 assert!(!bundle.manifest.rejection_reasons.is_empty());
3791 assert!(bundle.fixtures.is_empty());
3792 }
3793
3794 #[test]
3795 fn validate_round_trips_rejected_bundle() {
3796 let traces = vec![
3797 version_trace("trace_a", "0.7.1", "release-branch", false),
3798 version_trace("trace_b", "0.7.2", "main", false),
3799 version_trace("trace_c", "0.7.3", "release-branch", false),
3800 ];
3801 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3802 let bundle =
3803 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3804 let dir = tempfile::tempdir().unwrap();
3805 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3806
3807 let validation = validate_crystallization_bundle(dir.path()).unwrap();
3808 assert!(validation.is_ok(), "{:?}", validation.problems);
3809 assert_eq!(validation.kind, BundleKind::Rejected);
3810 }
3811
3812 #[test]
3813 fn shadow_replay_fails_when_fixture_diverges() {
3814 let traces = version_traces(3);
3815 let artifacts = crystallize_traces(traces.clone(), CrystallizeOptions::default()).unwrap();
3816 let bundle =
3817 build_crystallization_bundle(artifacts, &traces, BundleOptions::default()).unwrap();
3818 let dir = tempfile::tempdir().unwrap();
3819 write_crystallization_bundle(&bundle, dir.path()).unwrap();
3820
3821 let fixture_dir = dir.path().join(BUNDLE_FIXTURES_DIR);
3825 let some_fixture = std::fs::read_dir(&fixture_dir)
3826 .unwrap()
3827 .next()
3828 .unwrap()
3829 .unwrap()
3830 .path();
3831 let mut tampered: CrystallizationTrace =
3832 serde_json::from_slice(&std::fs::read(&some_fixture).unwrap()).unwrap();
3833 tampered.actions.truncate(1);
3834 std::fs::write(&some_fixture, serde_json::to_vec_pretty(&tampered).unwrap()).unwrap();
3835
3836 let (_, shadow) = shadow_replay_bundle(dir.path()).unwrap();
3837 assert!(!shadow.pass);
3838 assert!(!shadow.failures.is_empty());
3839 }
3840}