1use std::collections::{BTreeMap, BTreeSet};
4
5use serde::{Deserialize, Serialize};
6
7use crate::stdlib::harn_entry::{call_harn_export_json, call_harn_export_typed};
8
9use super::{
10 handoff_artifact_record, handoff_from_json_value, microcompact_tool_output, new_id,
11 normalize_handoff_artifact_json, now_rfc3339, ContextPolicy, StageContract,
12 VerificationContract,
13};
14
15pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
17 let max_chars = max_tokens * 4;
18 if let Some(ref text) = artifact.text {
19 if text.len() > max_chars && max_chars >= 200 {
20 artifact.text = Some(microcompact_tool_output(text, max_chars));
21 artifact.estimated_tokens = Some(max_tokens);
22 }
23 }
24}
25
26pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
29 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
30 artifacts.retain(|artifact| {
31 let text = artifact.text.as_deref().unwrap_or("");
32 if text.is_empty() {
33 return true;
34 }
35 let hash = {
36 use std::hash::{Hash, Hasher};
37 let mut hasher = std::collections::hash_map::DefaultHasher::new();
38 text.hash(&mut hasher);
39 hasher.finish()
40 };
41 seen_hashes.insert(hash)
42 });
43}
44
45pub fn select_artifacts_adaptive(
48 mut artifacts: Vec<ArtifactRecord>,
49 policy: &ContextPolicy,
50) -> Vec<ArtifactRecord> {
51 drop_stale_evidence_artifacts(&mut artifacts);
52 dedup_artifacts(&mut artifacts);
53
54 if let Some(max_tokens) = policy.max_tokens {
57 let count = artifacts.len().max(1);
58 let per_artifact_budget = max_tokens / count;
59 let cap = per_artifact_budget.max(500).min(max_tokens);
60 for artifact in &mut artifacts {
61 let est = artifact.estimated_tokens.unwrap_or(0);
62 if est > cap * 2 {
63 microcompact_artifact(artifact, cap);
64 }
65 }
66 }
67
68 select_artifacts(artifacts, policy)
69}
70
71fn metadata_string_list(artifact: &ArtifactRecord, key: &str) -> Vec<String> {
72 artifact
73 .metadata
74 .get(key)
75 .and_then(|value| value.as_array())
76 .map(|items| {
77 items
78 .iter()
79 .filter_map(|item| item.as_str())
80 .map(str::trim)
81 .filter(|value| !value.is_empty())
82 .map(ToOwned::to_owned)
83 .collect::<Vec<_>>()
84 })
85 .unwrap_or_default()
86}
87
88fn drop_stale_evidence_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
89 let fresh_changed_paths: BTreeSet<String> = artifacts
90 .iter()
91 .filter(|artifact| freshness_rank(artifact.freshness.as_deref()) >= 2)
92 .flat_map(|artifact| metadata_string_list(artifact, "changed_paths"))
93 .collect();
94 if fresh_changed_paths.is_empty() {
95 return;
96 }
97
98 artifacts.retain(|artifact| {
99 let evidence_paths = metadata_string_list(artifact, "evidence_paths");
100 if evidence_paths.is_empty() {
101 return true;
102 }
103 if freshness_rank(artifact.freshness.as_deref()) >= 2 {
104 return true;
105 }
106 !evidence_paths
107 .iter()
108 .any(|path| fresh_changed_paths.contains(path))
109 });
110}
111
112fn normalize_artifact_kind(kind: &str) -> String {
113 match kind {
114 "resource"
115 | "handoff"
116 | "workspace_file"
117 | "editor_selection"
118 | "workspace_snapshot"
119 | "transcript_summary"
120 | "summary"
121 | "plan"
122 | "diff"
123 | "git_diff"
124 | "patch"
125 | "patch_set"
126 | "patch_proposal"
127 | "diff_review"
128 | "review_decision"
129 | "verification_bundle"
130 | "apply_intent"
131 | "verification_result"
132 | "test_result"
133 | "command_result"
134 | "provider_payload"
135 | "worker_result"
136 | "worker_notification"
137 | "artifact" => kind.to_string(),
138 "file" => "workspace_file".to_string(),
139 "transcript" => "transcript_summary".to_string(),
140 "verification" => "verification_result".to_string(),
141 "test" => "test_result".to_string(),
142 other if other.trim().is_empty() => "artifact".to_string(),
143 other => other.to_string(),
144 }
145}
146
147fn default_artifact_priority(kind: &str) -> i64 {
148 match kind {
149 "verification_result" | "test_result" => 100,
150 "verification_bundle" => 95,
151 "handoff" => 92,
152 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
153 | "review_decision" | "apply_intent" => 90,
154 "plan" => 80,
155 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
156 "summary" | "transcript_summary" => 60,
157 "command_result" => 50,
158 _ => 40,
159 }
160}
161
162fn freshness_rank(value: Option<&str>) -> i64 {
163 match value.unwrap_or_default() {
164 "fresh" | "live" => 3,
165 "recent" => 2,
166 "stale" => 0,
167 _ => 1,
168 }
169}
170
171#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
172#[serde(default)]
173pub struct ArtifactRecord {
174 #[serde(rename = "_type")]
175 pub type_name: String,
176 pub id: String,
177 pub kind: String,
178 pub title: Option<String>,
179 pub text: Option<String>,
180 pub data: Option<serde_json::Value>,
181 pub source: Option<String>,
182 pub created_at: String,
183 pub freshness: Option<String>,
184 pub priority: Option<i64>,
185 pub lineage: Vec<String>,
186 pub relevance: Option<f64>,
187 pub estimated_tokens: Option<usize>,
188 pub stage: Option<String>,
189 pub metadata: BTreeMap<String, serde_json::Value>,
190}
191
192impl ArtifactRecord {
193 pub fn redact_in_place(&mut self, policy: &crate::redact::RedactionPolicy) {
198 if let Some(text) = self.text.as_mut() {
199 let scrubbed = policy.redact_string(text);
200 if let std::borrow::Cow::Owned(replacement) = scrubbed {
201 *text = replacement;
202 }
203 }
204 if let Some(data) = self.data.as_mut() {
205 policy.redact_json_in_place(data);
206 }
207 for (key, value) in self.metadata.iter_mut() {
208 if policy.field_is_sensitive(key) {
209 *value = serde_json::Value::String(crate::redact::REDACTED_PLACEHOLDER.to_string());
210 } else {
211 policy.redact_json_in_place(value);
212 }
213 }
214 }
215
216 pub fn normalize(mut self) -> Self {
217 if self.type_name.is_empty() {
218 self.type_name = "artifact".to_string();
219 }
220 if self.id.is_empty() {
221 self.id = new_id("artifact");
222 }
223 if self.created_at.is_empty() {
224 self.created_at = now_rfc3339();
225 }
226 if self.kind.is_empty() {
227 self.kind = "artifact".to_string();
228 }
229 self.kind = normalize_artifact_kind(&self.kind);
230 if self.estimated_tokens.is_none() {
231 self.estimated_tokens = self
232 .text
233 .as_ref()
234 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
235 }
236 if self.priority.is_none() {
237 self.priority = Some(default_artifact_priority(&self.kind));
238 }
239 self
240 }
241}
242
243pub fn select_artifacts(
244 mut artifacts: Vec<ArtifactRecord>,
245 policy: &ContextPolicy,
246) -> Vec<ArtifactRecord> {
247 artifacts.retain(|artifact| {
248 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
249 && !policy.exclude_kinds.contains(&artifact.kind)
250 && (policy.include_stages.is_empty()
251 || artifact
252 .stage
253 .as_ref()
254 .is_some_and(|stage| policy.include_stages.contains(stage)))
255 });
256 artifacts.sort_by(|a, b| {
257 let b_pinned = policy.pinned_ids.contains(&b.id);
258 let a_pinned = policy.pinned_ids.contains(&a.id);
259 b_pinned
260 .cmp(&a_pinned)
261 .then_with(|| {
262 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
263 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
264 b_prio_kind.cmp(&a_prio_kind)
265 })
266 .then_with(|| {
267 b.priority
268 .unwrap_or_default()
269 .cmp(&a.priority.unwrap_or_default())
270 })
271 .then_with(|| {
272 if policy.prefer_fresh {
273 freshness_rank(b.freshness.as_deref())
274 .cmp(&freshness_rank(a.freshness.as_deref()))
275 } else {
276 std::cmp::Ordering::Equal
277 }
278 })
279 .then_with(|| {
280 if policy.prefer_recent {
281 b.created_at.cmp(&a.created_at)
282 } else {
283 std::cmp::Ordering::Equal
284 }
285 })
286 .then_with(|| {
287 b.relevance
288 .partial_cmp(&a.relevance)
289 .unwrap_or(std::cmp::Ordering::Equal)
290 })
291 .then_with(|| {
292 a.estimated_tokens
293 .unwrap_or(usize::MAX)
294 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
295 })
296 });
297
298 let mut selected = Vec::new();
299 let mut used_tokens = 0usize;
300 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
301 let effective_max_tokens = policy
302 .max_tokens
303 .map(|max| max.saturating_sub(reserve_tokens));
304 for artifact in artifacts {
305 if let Some(max_artifacts) = policy.max_artifacts {
306 if selected.len() >= max_artifacts {
307 break;
308 }
309 }
310 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
311 if let Some(max_tokens) = effective_max_tokens {
312 if used_tokens + next_tokens > max_tokens {
313 continue;
314 }
315 }
316 used_tokens += next_tokens;
317 selected.push(artifact);
318 }
319 selected
320}
321
322pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
323 let mut parts = Vec::new();
324 for artifact in artifacts {
325 let title = artifact
326 .title
327 .clone()
328 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
329 let body = artifact
330 .text
331 .clone()
332 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
333 .unwrap_or_default();
334 match policy.render.as_deref() {
335 Some("json") => {
336 parts.push(
337 serde_json::json!({
338 "id": artifact.id,
339 "kind": artifact.kind,
340 "title": title,
341 "source": artifact.source,
342 "freshness": artifact.freshness,
343 "priority": artifact.priority,
344 "text": body,
345 })
346 .to_string(),
347 );
348 }
349 _ => parts.push(format!(
350 "<artifact>\n<title>{}</title>\n<kind>{}</kind>\n<source>{}</source>\n\
351<freshness>{}</freshness>\n<priority>{}</priority>\n<body>\n{}\n</body>\n</artifact>",
352 escape_prompt_text(&title),
353 escape_prompt_text(&artifact.kind),
354 escape_prompt_text(
355 artifact
356 .source
357 .clone()
358 .unwrap_or_else(|| "unknown".to_string())
359 .as_str(),
360 ),
361 escape_prompt_text(
362 artifact
363 .freshness
364 .clone()
365 .unwrap_or_else(|| "normal".to_string())
366 .as_str(),
367 ),
368 artifact.priority.unwrap_or_default(),
369 body
370 )),
371 }
372 }
373 parts.join("\n\n")
374}
375
376#[derive(Clone, Debug, Deserialize, PartialEq)]
377pub struct SelectedWorkflowStageArtifacts {
378 pub artifacts: Vec<ArtifactRecord>,
379 pub context_policy: ContextPolicy,
380}
381
382pub async fn select_workflow_stage_artifacts(
383 artifacts: &[ArtifactRecord],
384 context_policy: &ContextPolicy,
385 input_contract: &StageContract,
386) -> Result<SelectedWorkflowStageArtifacts, crate::value::VmError> {
387 let payload = serde_json::json!({
388 "artifacts": artifacts,
389 "context_policy": context_policy,
390 "input_contract": input_contract,
391 });
392 let mut selected: SelectedWorkflowStageArtifacts = call_harn_export_typed(
393 "std/workflow/context",
394 "workflow_select_stage_artifacts",
395 "workflow_select_stage_artifacts",
396 payload,
397 )
398 .await?;
399 selected.artifacts = selected
400 .artifacts
401 .into_iter()
402 .map(ArtifactRecord::normalize)
403 .collect();
404 Ok(selected)
405}
406
407#[derive(Clone, Debug, PartialEq, Eq)]
408pub struct PreparedWorkflowStagePrompt {
409 pub prompt: String,
410 pub rendered_context: String,
411 pub rendered_verification: String,
412}
413
414pub async fn prepare_workflow_stage_prompt(
415 task: &str,
416 task_label: Option<&str>,
417 artifacts: &[ArtifactRecord],
418 context_policy: &ContextPolicy,
419 rendered_context: Option<&str>,
420 verification_contracts: &[VerificationContract],
421) -> Result<PreparedWorkflowStagePrompt, crate::value::VmError> {
422 let payload = serde_json::json!({
423 "task": task,
424 "task_label": task_label,
425 "artifacts": artifacts,
426 "context_policy": context_policy,
427 "rendered_context": rendered_context,
428 "verification_contracts": verification_contracts,
429 });
430 let prepared = call_harn_export_json(
431 "std/workflow/prompts",
432 "workflow_prepare_stage_prompt",
433 "workflow_prepare_stage_prompt",
434 payload,
435 )
436 .await?;
437 let prepared = prepared.as_object().ok_or_else(|| {
438 crate::value::VmError::Runtime(
439 "workflow_prepare_stage_prompt must return a dict".to_string(),
440 )
441 })?;
442 Ok(PreparedWorkflowStagePrompt {
443 prompt: workflow_prompt_string_field(prepared, "prompt")?,
444 rendered_context: workflow_prompt_string_field(prepared, "rendered_context")?,
445 rendered_verification: workflow_prompt_string_field(prepared, "rendered_verification")?,
446 })
447}
448
449fn workflow_prompt_string_field(
450 value: &serde_json::Map<String, serde_json::Value>,
451 field: &str,
452) -> Result<String, crate::value::VmError> {
453 value
454 .get(field)
455 .and_then(serde_json::Value::as_str)
456 .map(ToOwned::to_owned)
457 .ok_or_else(|| {
458 crate::value::VmError::Runtime(format!(
459 "workflow_prepare_stage_prompt must return string field '{field}'"
460 ))
461 })
462}
463
464fn escape_prompt_text(text: &str) -> String {
465 text.replace('&', "&")
466 .replace('<', "<")
467 .replace('>', ">")
468}
469
470pub fn normalize_artifact(
471 value: &crate::value::VmValue,
472) -> Result<ArtifactRecord, crate::value::VmError> {
473 let artifact: ArtifactRecord = super::parse_json_value(value)?;
474 let artifact = artifact.normalize();
475 if artifact.kind == "handoff" {
476 let json = serde_json::to_value(&artifact).map_err(|error| {
477 crate::value::VmError::Runtime(format!("artifact handoff encode error: {error}"))
478 })?;
479 let handoff = handoff_from_json_value(&json)
480 .or_else(|| {
481 artifact
482 .data
483 .as_ref()
484 .and_then(|data| normalize_handoff_artifact_json(data.clone()).ok())
485 })
486 .ok_or_else(|| {
487 crate::value::VmError::Runtime(
488 "artifact handoff data must contain a valid handoff payload".to_string(),
489 )
490 })?;
491 return Ok(handoff_artifact_record(&handoff, Some(&artifact)));
492 }
493 Ok(artifact)
494}