1use std::collections::{BTreeMap, BTreeSet};
4
5use serde::{Deserialize, Serialize};
6
7use crate::stdlib::harn_entry::{call_harn_export_json, call_harn_export_typed};
8use crate::stdlib::xml::escape_xml_text;
9
10use super::{
11 handoff_artifact_record, handoff_from_json_value, microcompact_tool_output, new_id,
12 normalize_handoff_artifact_json, now_rfc3339, ContextPolicy, StageContract,
13 VerificationContract,
14};
15
16pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
18 let max_chars = max_tokens * 4;
19 if let Some(ref text) = artifact.text {
20 if text.len() > max_chars && max_chars >= 200 {
21 artifact.text = Some(microcompact_tool_output(text, max_chars));
22 artifact.estimated_tokens = Some(max_tokens);
23 }
24 }
25}
26
27pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
30 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
31 artifacts.retain(|artifact| {
32 let text = artifact.text.as_deref().unwrap_or("");
33 if text.is_empty() {
34 return true;
35 }
36 let hash = {
37 use std::hash::{Hash, Hasher};
38 let mut hasher = std::collections::hash_map::DefaultHasher::new();
39 text.hash(&mut hasher);
40 hasher.finish()
41 };
42 seen_hashes.insert(hash)
43 });
44}
45
46pub fn select_artifacts_adaptive(
49 mut artifacts: Vec<ArtifactRecord>,
50 policy: &ContextPolicy,
51) -> Vec<ArtifactRecord> {
52 drop_stale_evidence_artifacts(&mut artifacts);
53 dedup_artifacts(&mut artifacts);
54
55 if let Some(max_tokens) = policy.max_tokens {
58 let count = artifacts.len().max(1);
59 let per_artifact_budget = max_tokens / count;
60 let cap = per_artifact_budget.max(500).min(max_tokens);
61 for artifact in &mut artifacts {
62 let est = artifact.estimated_tokens.unwrap_or(0);
63 if est > cap * 2 {
64 microcompact_artifact(artifact, cap);
65 }
66 }
67 }
68
69 select_artifacts(artifacts, policy)
70}
71
72fn metadata_string_list(artifact: &ArtifactRecord, key: &str) -> Vec<String> {
73 artifact
74 .metadata
75 .get(key)
76 .and_then(|value| value.as_array())
77 .map(|items| {
78 items
79 .iter()
80 .filter_map(|item| item.as_str())
81 .map(str::trim)
82 .filter(|value| !value.is_empty())
83 .map(ToOwned::to_owned)
84 .collect::<Vec<_>>()
85 })
86 .unwrap_or_default()
87}
88
89fn drop_stale_evidence_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
90 let fresh_changed_paths: BTreeSet<String> = artifacts
91 .iter()
92 .filter(|artifact| freshness_rank(artifact.freshness.as_deref()) >= 2)
93 .flat_map(|artifact| metadata_string_list(artifact, "changed_paths"))
94 .collect();
95 if fresh_changed_paths.is_empty() {
96 return;
97 }
98
99 artifacts.retain(|artifact| {
100 let evidence_paths = metadata_string_list(artifact, "evidence_paths");
101 if evidence_paths.is_empty() {
102 return true;
103 }
104 if freshness_rank(artifact.freshness.as_deref()) >= 2 {
105 return true;
106 }
107 !evidence_paths
108 .iter()
109 .any(|path| fresh_changed_paths.contains(path))
110 });
111}
112
113fn normalize_artifact_kind(kind: &str) -> String {
114 match kind {
115 "resource"
116 | "handoff"
117 | "workspace_file"
118 | "editor_selection"
119 | "workspace_snapshot"
120 | "transcript_summary"
121 | "summary"
122 | "plan"
123 | "diff"
124 | "git_diff"
125 | "patch"
126 | "patch_set"
127 | "patch_proposal"
128 | "diff_review"
129 | "review_decision"
130 | "verification_bundle"
131 | "apply_intent"
132 | "verification_result"
133 | "test_result"
134 | "command_result"
135 | "provider_payload"
136 | "worker_result"
137 | "worker_notification"
138 | "artifact" => kind.to_string(),
139 "file" => "workspace_file".to_string(),
140 "transcript" => "transcript_summary".to_string(),
141 "verification" => "verification_result".to_string(),
142 "test" => "test_result".to_string(),
143 other if other.trim().is_empty() => "artifact".to_string(),
144 other => other.to_string(),
145 }
146}
147
148fn default_artifact_priority(kind: &str) -> i64 {
149 match kind {
150 "verification_result" | "test_result" => 100,
151 "verification_bundle" => 95,
152 "handoff" => 92,
153 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
154 | "review_decision" | "apply_intent" => 90,
155 "plan" => 80,
156 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
157 "summary" | "transcript_summary" => 60,
158 "command_result" => 50,
159 _ => 40,
160 }
161}
162
163fn freshness_rank(value: Option<&str>) -> i64 {
164 match value.unwrap_or_default() {
165 "fresh" | "live" => 3,
166 "recent" => 2,
167 "stale" => 0,
168 _ => 1,
169 }
170}
171
172#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
173#[serde(default)]
174pub struct ArtifactRecord {
175 #[serde(rename = "_type")]
176 pub type_name: String,
177 pub id: String,
178 pub kind: String,
179 pub title: Option<String>,
180 pub text: Option<String>,
181 pub data: Option<serde_json::Value>,
182 pub source: Option<String>,
183 pub created_at: String,
184 pub freshness: Option<String>,
185 pub priority: Option<i64>,
186 pub lineage: Vec<String>,
187 pub relevance: Option<f64>,
188 pub estimated_tokens: Option<usize>,
189 pub stage: Option<String>,
190 pub metadata: BTreeMap<String, serde_json::Value>,
191}
192
193impl ArtifactRecord {
194 pub fn redact_in_place(&mut self, policy: &crate::redact::RedactionPolicy) {
199 if let Some(text) = self.text.as_mut() {
200 let scrubbed = policy.redact_string(text);
201 if let std::borrow::Cow::Owned(replacement) = scrubbed {
202 *text = replacement;
203 }
204 }
205 if let Some(data) = self.data.as_mut() {
206 policy.redact_json_in_place(data);
207 }
208 for (key, value) in self.metadata.iter_mut() {
209 if policy.field_is_sensitive(key) {
210 *value = serde_json::Value::String(crate::redact::REDACTED_PLACEHOLDER.to_string());
211 } else {
212 policy.redact_json_in_place(value);
213 }
214 }
215 }
216
217 pub fn normalize(mut self) -> Self {
218 if self.type_name.is_empty() {
219 self.type_name = "artifact".to_string();
220 }
221 if self.id.is_empty() {
222 self.id = new_id("artifact");
223 }
224 if self.created_at.is_empty() {
225 self.created_at = now_rfc3339();
226 }
227 if self.kind.is_empty() {
228 self.kind = "artifact".to_string();
229 }
230 self.kind = normalize_artifact_kind(&self.kind);
231 if self.estimated_tokens.is_none() {
232 self.estimated_tokens = self
233 .text
234 .as_ref()
235 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
236 }
237 if self.priority.is_none() {
238 self.priority = Some(default_artifact_priority(&self.kind));
239 }
240 self
241 }
242}
243
244pub fn select_artifacts(
245 mut artifacts: Vec<ArtifactRecord>,
246 policy: &ContextPolicy,
247) -> Vec<ArtifactRecord> {
248 artifacts.retain(|artifact| {
249 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
250 && !policy.exclude_kinds.contains(&artifact.kind)
251 && (policy.include_stages.is_empty()
252 || artifact
253 .stage
254 .as_ref()
255 .is_some_and(|stage| policy.include_stages.contains(stage)))
256 });
257 artifacts.sort_by(|a, b| {
258 let b_pinned = policy.pinned_ids.contains(&b.id);
259 let a_pinned = policy.pinned_ids.contains(&a.id);
260 b_pinned
261 .cmp(&a_pinned)
262 .then_with(|| {
263 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
264 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
265 b_prio_kind.cmp(&a_prio_kind)
266 })
267 .then_with(|| {
268 b.priority
269 .unwrap_or_default()
270 .cmp(&a.priority.unwrap_or_default())
271 })
272 .then_with(|| {
273 if policy.prefer_fresh {
274 freshness_rank(b.freshness.as_deref())
275 .cmp(&freshness_rank(a.freshness.as_deref()))
276 } else {
277 std::cmp::Ordering::Equal
278 }
279 })
280 .then_with(|| {
281 if policy.prefer_recent {
282 b.created_at.cmp(&a.created_at)
283 } else {
284 std::cmp::Ordering::Equal
285 }
286 })
287 .then_with(|| {
288 b.relevance
289 .partial_cmp(&a.relevance)
290 .unwrap_or(std::cmp::Ordering::Equal)
291 })
292 .then_with(|| {
293 a.estimated_tokens
294 .unwrap_or(usize::MAX)
295 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
296 })
297 });
298
299 let mut selected = Vec::new();
300 let mut used_tokens = 0usize;
301 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
302 let effective_max_tokens = policy
303 .max_tokens
304 .map(|max| max.saturating_sub(reserve_tokens));
305 for artifact in artifacts {
306 if let Some(max_artifacts) = policy.max_artifacts {
307 if selected.len() >= max_artifacts {
308 break;
309 }
310 }
311 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
312 if let Some(max_tokens) = effective_max_tokens {
313 if used_tokens + next_tokens > max_tokens {
314 continue;
315 }
316 }
317 used_tokens += next_tokens;
318 selected.push(artifact);
319 }
320 selected
321}
322
323pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
324 let mut parts = Vec::new();
325 for artifact in artifacts {
326 let title = artifact
327 .title
328 .clone()
329 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
330 let body = artifact
331 .text
332 .clone()
333 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
334 .unwrap_or_default();
335 match policy.render.as_deref() {
336 Some("json") => {
337 parts.push(
338 serde_json::json!({
339 "id": artifact.id,
340 "kind": artifact.kind,
341 "title": title,
342 "source": artifact.source,
343 "freshness": artifact.freshness,
344 "priority": artifact.priority,
345 "text": body,
346 })
347 .to_string(),
348 );
349 }
350 _ => parts.push(format!(
351 "<artifact>\n<title>{}</title>\n<kind>{}</kind>\n<source>{}</source>\n\
352<freshness>{}</freshness>\n<priority>{}</priority>\n<body>\n{}\n</body>\n</artifact>",
353 escape_xml_text(&title),
354 escape_xml_text(&artifact.kind),
355 escape_xml_text(
356 artifact
357 .source
358 .clone()
359 .unwrap_or_else(|| "unknown".to_string())
360 .as_str(),
361 ),
362 escape_xml_text(
363 artifact
364 .freshness
365 .clone()
366 .unwrap_or_else(|| "normal".to_string())
367 .as_str(),
368 ),
369 artifact.priority.unwrap_or_default(),
370 body
371 )),
372 }
373 }
374 parts.join("\n\n")
375}
376
377#[derive(Clone, Debug, Deserialize, PartialEq)]
378pub struct SelectedWorkflowStageArtifacts {
379 pub artifacts: Vec<ArtifactRecord>,
380 pub context_policy: ContextPolicy,
381}
382
383pub async fn select_workflow_stage_artifacts(
384 ctx: &crate::vm::AsyncBuiltinCtx,
385 artifacts: &[ArtifactRecord],
386 context_policy: &ContextPolicy,
387 input_contract: &StageContract,
388) -> Result<SelectedWorkflowStageArtifacts, crate::value::VmError> {
389 let payload = serde_json::json!({
390 "artifacts": artifacts,
391 "context_policy": context_policy,
392 "input_contract": input_contract,
393 });
394 let mut selected: SelectedWorkflowStageArtifacts = call_harn_export_typed(
395 ctx,
396 "std/workflow/context",
397 "workflow_select_stage_artifacts",
398 "workflow_select_stage_artifacts",
399 payload,
400 )
401 .await?;
402 selected.artifacts = selected
403 .artifacts
404 .into_iter()
405 .map(ArtifactRecord::normalize)
406 .collect();
407 Ok(selected)
408}
409
410#[derive(Clone, Debug, PartialEq, Eq)]
411pub struct PreparedWorkflowStagePrompt {
412 pub prompt: String,
413 pub rendered_context: String,
414 pub rendered_verification: String,
415}
416
417pub async fn prepare_workflow_stage_prompt(
418 ctx: &crate::vm::AsyncBuiltinCtx,
419 task: &str,
420 task_label: Option<&str>,
421 artifacts: &[ArtifactRecord],
422 context_policy: &ContextPolicy,
423 rendered_context: Option<&str>,
424 verification_contracts: &[VerificationContract],
425) -> Result<PreparedWorkflowStagePrompt, crate::value::VmError> {
426 let payload = serde_json::json!({
427 "task": task,
428 "task_label": task_label,
429 "artifacts": artifacts,
430 "context_policy": context_policy,
431 "rendered_context": rendered_context,
432 "verification_contracts": verification_contracts,
433 });
434 let prepared = call_harn_export_json(
435 ctx,
436 "std/workflow/prompts",
437 "workflow_prepare_stage_prompt",
438 "workflow_prepare_stage_prompt",
439 payload,
440 )
441 .await?;
442 let prepared = prepared.as_object().ok_or_else(|| {
443 crate::value::VmError::Runtime(
444 "workflow_prepare_stage_prompt must return a dict".to_string(),
445 )
446 })?;
447 Ok(PreparedWorkflowStagePrompt {
448 prompt: workflow_prompt_string_field(prepared, "prompt")?,
449 rendered_context: workflow_prompt_string_field(prepared, "rendered_context")?,
450 rendered_verification: workflow_prompt_string_field(prepared, "rendered_verification")?,
451 })
452}
453
454fn workflow_prompt_string_field(
455 value: &serde_json::Map<String, serde_json::Value>,
456 field: &str,
457) -> Result<String, crate::value::VmError> {
458 value
459 .get(field)
460 .and_then(serde_json::Value::as_str)
461 .map(ToOwned::to_owned)
462 .ok_or_else(|| {
463 crate::value::VmError::Runtime(format!(
464 "workflow_prepare_stage_prompt must return string field '{field}'"
465 ))
466 })
467}
468
469pub fn normalize_artifact(
470 value: &crate::value::VmValue,
471) -> Result<ArtifactRecord, crate::value::VmError> {
472 let artifact: ArtifactRecord = super::parse_json_value(value)?;
473 let artifact = artifact.normalize();
474 if artifact.kind == "handoff" {
475 let json = serde_json::to_value(&artifact).map_err(|error| {
476 crate::value::VmError::Runtime(format!("artifact handoff encode error: {error}"))
477 })?;
478 let handoff = handoff_from_json_value(&json)
479 .or_else(|| {
480 artifact
481 .data
482 .as_ref()
483 .and_then(|data| normalize_handoff_artifact_json(data.clone()).ok())
484 })
485 .ok_or_else(|| {
486 crate::value::VmError::Runtime(
487 "artifact handoff data must contain a valid handoff payload".to_string(),
488 )
489 })?;
490 return Ok(handoff_artifact_record(&handoff, Some(&artifact)));
491 }
492 Ok(artifact)
493}