1use std::collections::{BTreeMap, BTreeSet};
4
5use serde::{Deserialize, Serialize};
6
7use crate::stdlib::harn_entry::{call_harn_export_json, call_harn_export_typed};
8
9use super::{
10 handoff_artifact_record, handoff_from_json_value, microcompact_tool_output, new_id,
11 normalize_handoff_artifact_json, now_rfc3339, ContextPolicy, StageContract,
12 VerificationContract,
13};
14
15pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
17 let max_chars = max_tokens * 4;
18 if let Some(ref text) = artifact.text {
19 if text.len() > max_chars && max_chars >= 200 {
20 artifact.text = Some(microcompact_tool_output(text, max_chars));
21 artifact.estimated_tokens = Some(max_tokens);
22 }
23 }
24}
25
26pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
29 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
30 artifacts.retain(|artifact| {
31 let text = artifact.text.as_deref().unwrap_or("");
32 if text.is_empty() {
33 return true;
34 }
35 let hash = {
36 use std::hash::{Hash, Hasher};
37 let mut hasher = std::collections::hash_map::DefaultHasher::new();
38 text.hash(&mut hasher);
39 hasher.finish()
40 };
41 seen_hashes.insert(hash)
42 });
43}
44
45pub fn select_artifacts_adaptive(
48 mut artifacts: Vec<ArtifactRecord>,
49 policy: &ContextPolicy,
50) -> Vec<ArtifactRecord> {
51 drop_stale_evidence_artifacts(&mut artifacts);
52 dedup_artifacts(&mut artifacts);
53
54 if let Some(max_tokens) = policy.max_tokens {
57 let count = artifacts.len().max(1);
58 let per_artifact_budget = max_tokens / count;
59 let cap = per_artifact_budget.max(500).min(max_tokens);
60 for artifact in &mut artifacts {
61 let est = artifact.estimated_tokens.unwrap_or(0);
62 if est > cap * 2 {
63 microcompact_artifact(artifact, cap);
64 }
65 }
66 }
67
68 select_artifacts(artifacts, policy)
69}
70
71fn metadata_string_list(artifact: &ArtifactRecord, key: &str) -> Vec<String> {
72 artifact
73 .metadata
74 .get(key)
75 .and_then(|value| value.as_array())
76 .map(|items| {
77 items
78 .iter()
79 .filter_map(|item| item.as_str())
80 .map(str::trim)
81 .filter(|value| !value.is_empty())
82 .map(ToOwned::to_owned)
83 .collect::<Vec<_>>()
84 })
85 .unwrap_or_default()
86}
87
88fn drop_stale_evidence_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
89 let fresh_changed_paths: BTreeSet<String> = artifacts
90 .iter()
91 .filter(|artifact| freshness_rank(artifact.freshness.as_deref()) >= 2)
92 .flat_map(|artifact| metadata_string_list(artifact, "changed_paths"))
93 .collect();
94 if fresh_changed_paths.is_empty() {
95 return;
96 }
97
98 artifacts.retain(|artifact| {
99 let evidence_paths = metadata_string_list(artifact, "evidence_paths");
100 if evidence_paths.is_empty() {
101 return true;
102 }
103 if freshness_rank(artifact.freshness.as_deref()) >= 2 {
104 return true;
105 }
106 !evidence_paths
107 .iter()
108 .any(|path| fresh_changed_paths.contains(path))
109 });
110}
111
112fn normalize_artifact_kind(kind: &str) -> String {
113 match kind {
114 "resource"
115 | "handoff"
116 | "workspace_file"
117 | "editor_selection"
118 | "workspace_snapshot"
119 | "transcript_summary"
120 | "summary"
121 | "plan"
122 | "diff"
123 | "git_diff"
124 | "patch"
125 | "patch_set"
126 | "patch_proposal"
127 | "diff_review"
128 | "review_decision"
129 | "verification_bundle"
130 | "apply_intent"
131 | "verification_result"
132 | "test_result"
133 | "command_result"
134 | "provider_payload"
135 | "worker_result"
136 | "worker_notification"
137 | "artifact" => kind.to_string(),
138 "file" => "workspace_file".to_string(),
139 "transcript" => "transcript_summary".to_string(),
140 "verification" => "verification_result".to_string(),
141 "test" => "test_result".to_string(),
142 other if other.trim().is_empty() => "artifact".to_string(),
143 other => other.to_string(),
144 }
145}
146
147fn default_artifact_priority(kind: &str) -> i64 {
148 match kind {
149 "verification_result" | "test_result" => 100,
150 "verification_bundle" => 95,
151 "handoff" => 92,
152 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
153 | "review_decision" | "apply_intent" => 90,
154 "plan" => 80,
155 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
156 "summary" | "transcript_summary" => 60,
157 "command_result" => 50,
158 _ => 40,
159 }
160}
161
162fn freshness_rank(value: Option<&str>) -> i64 {
163 match value.unwrap_or_default() {
164 "fresh" | "live" => 3,
165 "recent" => 2,
166 "stale" => 0,
167 _ => 1,
168 }
169}
170
171#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
172#[serde(default)]
173pub struct ArtifactRecord {
174 #[serde(rename = "_type")]
175 pub type_name: String,
176 pub id: String,
177 pub kind: String,
178 pub title: Option<String>,
179 pub text: Option<String>,
180 pub data: Option<serde_json::Value>,
181 pub source: Option<String>,
182 pub created_at: String,
183 pub freshness: Option<String>,
184 pub priority: Option<i64>,
185 pub lineage: Vec<String>,
186 pub relevance: Option<f64>,
187 pub estimated_tokens: Option<usize>,
188 pub stage: Option<String>,
189 pub metadata: BTreeMap<String, serde_json::Value>,
190}
191
192impl ArtifactRecord {
193 pub fn normalize(mut self) -> Self {
194 if self.type_name.is_empty() {
195 self.type_name = "artifact".to_string();
196 }
197 if self.id.is_empty() {
198 self.id = new_id("artifact");
199 }
200 if self.created_at.is_empty() {
201 self.created_at = now_rfc3339();
202 }
203 if self.kind.is_empty() {
204 self.kind = "artifact".to_string();
205 }
206 self.kind = normalize_artifact_kind(&self.kind);
207 if self.estimated_tokens.is_none() {
208 self.estimated_tokens = self
209 .text
210 .as_ref()
211 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
212 }
213 if self.priority.is_none() {
214 self.priority = Some(default_artifact_priority(&self.kind));
215 }
216 self
217 }
218}
219
220pub fn select_artifacts(
221 mut artifacts: Vec<ArtifactRecord>,
222 policy: &ContextPolicy,
223) -> Vec<ArtifactRecord> {
224 artifacts.retain(|artifact| {
225 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
226 && !policy.exclude_kinds.contains(&artifact.kind)
227 && (policy.include_stages.is_empty()
228 || artifact
229 .stage
230 .as_ref()
231 .is_some_and(|stage| policy.include_stages.contains(stage)))
232 });
233 artifacts.sort_by(|a, b| {
234 let b_pinned = policy.pinned_ids.contains(&b.id);
235 let a_pinned = policy.pinned_ids.contains(&a.id);
236 b_pinned
237 .cmp(&a_pinned)
238 .then_with(|| {
239 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
240 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
241 b_prio_kind.cmp(&a_prio_kind)
242 })
243 .then_with(|| {
244 b.priority
245 .unwrap_or_default()
246 .cmp(&a.priority.unwrap_or_default())
247 })
248 .then_with(|| {
249 if policy.prefer_fresh {
250 freshness_rank(b.freshness.as_deref())
251 .cmp(&freshness_rank(a.freshness.as_deref()))
252 } else {
253 std::cmp::Ordering::Equal
254 }
255 })
256 .then_with(|| {
257 if policy.prefer_recent {
258 b.created_at.cmp(&a.created_at)
259 } else {
260 std::cmp::Ordering::Equal
261 }
262 })
263 .then_with(|| {
264 b.relevance
265 .partial_cmp(&a.relevance)
266 .unwrap_or(std::cmp::Ordering::Equal)
267 })
268 .then_with(|| {
269 a.estimated_tokens
270 .unwrap_or(usize::MAX)
271 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
272 })
273 });
274
275 let mut selected = Vec::new();
276 let mut used_tokens = 0usize;
277 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
278 let effective_max_tokens = policy
279 .max_tokens
280 .map(|max| max.saturating_sub(reserve_tokens));
281 for artifact in artifacts {
282 if let Some(max_artifacts) = policy.max_artifacts {
283 if selected.len() >= max_artifacts {
284 break;
285 }
286 }
287 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
288 if let Some(max_tokens) = effective_max_tokens {
289 if used_tokens + next_tokens > max_tokens {
290 continue;
291 }
292 }
293 used_tokens += next_tokens;
294 selected.push(artifact);
295 }
296 selected
297}
298
299pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
300 let mut parts = Vec::new();
301 for artifact in artifacts {
302 let title = artifact
303 .title
304 .clone()
305 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
306 let body = artifact
307 .text
308 .clone()
309 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
310 .unwrap_or_default();
311 match policy.render.as_deref() {
312 Some("json") => {
313 parts.push(
314 serde_json::json!({
315 "id": artifact.id,
316 "kind": artifact.kind,
317 "title": title,
318 "source": artifact.source,
319 "freshness": artifact.freshness,
320 "priority": artifact.priority,
321 "text": body,
322 })
323 .to_string(),
324 );
325 }
326 _ => parts.push(format!(
327 "<artifact>\n<title>{}</title>\n<kind>{}</kind>\n<source>{}</source>\n\
328<freshness>{}</freshness>\n<priority>{}</priority>\n<body>\n{}\n</body>\n</artifact>",
329 escape_prompt_text(&title),
330 escape_prompt_text(&artifact.kind),
331 escape_prompt_text(
332 artifact
333 .source
334 .clone()
335 .unwrap_or_else(|| "unknown".to_string())
336 .as_str(),
337 ),
338 escape_prompt_text(
339 artifact
340 .freshness
341 .clone()
342 .unwrap_or_else(|| "normal".to_string())
343 .as_str(),
344 ),
345 artifact.priority.unwrap_or_default(),
346 body
347 )),
348 }
349 }
350 parts.join("\n\n")
351}
352
353#[derive(Clone, Debug, Deserialize, PartialEq)]
354pub struct SelectedWorkflowStageArtifacts {
355 pub artifacts: Vec<ArtifactRecord>,
356 pub context_policy: ContextPolicy,
357}
358
359pub async fn select_workflow_stage_artifacts(
360 artifacts: &[ArtifactRecord],
361 context_policy: &ContextPolicy,
362 input_contract: &StageContract,
363) -> Result<SelectedWorkflowStageArtifacts, crate::value::VmError> {
364 let payload = serde_json::json!({
365 "artifacts": artifacts,
366 "context_policy": context_policy,
367 "input_contract": input_contract,
368 });
369 let mut selected: SelectedWorkflowStageArtifacts = call_harn_export_typed(
370 "std/workflow/context",
371 "workflow_select_stage_artifacts",
372 "workflow_select_stage_artifacts",
373 payload,
374 )
375 .await?;
376 selected.artifacts = selected
377 .artifacts
378 .into_iter()
379 .map(ArtifactRecord::normalize)
380 .collect();
381 Ok(selected)
382}
383
384#[derive(Clone, Debug, PartialEq, Eq)]
385pub struct PreparedWorkflowStagePrompt {
386 pub prompt: String,
387 pub rendered_context: String,
388 pub rendered_verification: String,
389}
390
391pub async fn prepare_workflow_stage_prompt(
392 task: &str,
393 task_label: Option<&str>,
394 artifacts: &[ArtifactRecord],
395 context_policy: &ContextPolicy,
396 rendered_context: Option<&str>,
397 verification_contracts: &[VerificationContract],
398) -> Result<PreparedWorkflowStagePrompt, crate::value::VmError> {
399 let payload = serde_json::json!({
400 "task": task,
401 "task_label": task_label,
402 "artifacts": artifacts,
403 "context_policy": context_policy,
404 "rendered_context": rendered_context,
405 "verification_contracts": verification_contracts,
406 });
407 let prepared = call_harn_export_json(
408 "std/workflow/prompts",
409 "workflow_prepare_stage_prompt",
410 "workflow_prepare_stage_prompt",
411 payload,
412 )
413 .await?;
414 let prepared = prepared.as_object().ok_or_else(|| {
415 crate::value::VmError::Runtime(
416 "workflow_prepare_stage_prompt must return a dict".to_string(),
417 )
418 })?;
419 Ok(PreparedWorkflowStagePrompt {
420 prompt: workflow_prompt_string_field(prepared, "prompt")?,
421 rendered_context: workflow_prompt_string_field(prepared, "rendered_context")?,
422 rendered_verification: workflow_prompt_string_field(prepared, "rendered_verification")?,
423 })
424}
425
426fn workflow_prompt_string_field(
427 value: &serde_json::Map<String, serde_json::Value>,
428 field: &str,
429) -> Result<String, crate::value::VmError> {
430 value
431 .get(field)
432 .and_then(serde_json::Value::as_str)
433 .map(ToOwned::to_owned)
434 .ok_or_else(|| {
435 crate::value::VmError::Runtime(format!(
436 "workflow_prepare_stage_prompt must return string field '{field}'"
437 ))
438 })
439}
440
441fn escape_prompt_text(text: &str) -> String {
442 text.replace('&', "&")
443 .replace('<', "<")
444 .replace('>', ">")
445}
446
447pub fn normalize_artifact(
448 value: &crate::value::VmValue,
449) -> Result<ArtifactRecord, crate::value::VmError> {
450 let artifact: ArtifactRecord = super::parse_json_value(value)?;
451 let artifact = artifact.normalize();
452 if artifact.kind == "handoff" {
453 let json = serde_json::to_value(&artifact).map_err(|error| {
454 crate::value::VmError::Runtime(format!("artifact handoff encode error: {error}"))
455 })?;
456 let handoff = handoff_from_json_value(&json)
457 .or_else(|| {
458 artifact
459 .data
460 .as_ref()
461 .and_then(|data| normalize_handoff_artifact_json(data.clone()).ok())
462 })
463 .ok_or_else(|| {
464 crate::value::VmError::Runtime(
465 "artifact handoff data must contain a valid handoff payload".to_string(),
466 )
467 })?;
468 return Ok(handoff_artifact_record(&handoff, Some(&artifact)));
469 }
470 Ok(artifact)
471}