Skip to main content

harn_vm/orchestration/
artifacts.rs

1//! Artifact types, normalization, selection, and context rendering.
2
3use std::collections::{BTreeMap, BTreeSet};
4
5use serde::{Deserialize, Serialize};
6
7use super::{microcompact_tool_output, new_id, now_rfc3339, ContextPolicy, VerificationContract};
8
9/// Snip an artifact's text to fit within a token budget.
10pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
11    let max_chars = max_tokens * 4;
12    if let Some(ref text) = artifact.text {
13        if text.len() > max_chars && max_chars >= 200 {
14            artifact.text = Some(microcompact_tool_output(text, max_chars));
15            artifact.estimated_tokens = Some(max_tokens);
16        }
17    }
18}
19
20/// Deduplicate artifacts by removing those with identical text content,
21/// keeping the one with higher priority.
22pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
23    let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
24    artifacts.retain(|artifact| {
25        let text = artifact.text.as_deref().unwrap_or("");
26        if text.is_empty() {
27            return true;
28        }
29        let hash = {
30            use std::hash::{Hash, Hasher};
31            let mut hasher = std::collections::hash_map::DefaultHasher::new();
32            text.hash(&mut hasher);
33            hasher.finish()
34        };
35        seen_hashes.insert(hash)
36    });
37}
38
39/// Enhanced artifact selection: dedup, microcompact oversized artifacts,
40/// then delegate to the standard `select_artifacts`.
41pub fn select_artifacts_adaptive(
42    mut artifacts: Vec<ArtifactRecord>,
43    policy: &ContextPolicy,
44) -> Vec<ArtifactRecord> {
45    dedup_artifacts(&mut artifacts);
46
47    // Cap individual artifacts to a fraction of the total budget, with a 500-token
48    // floor but never exceeding the total (so a single artifact can't overrun).
49    if let Some(max_tokens) = policy.max_tokens {
50        let count = artifacts.len().max(1);
51        let per_artifact_budget = max_tokens / count;
52        let cap = per_artifact_budget.max(500).min(max_tokens);
53        for artifact in &mut artifacts {
54            let est = artifact.estimated_tokens.unwrap_or(0);
55            if est > cap * 2 {
56                microcompact_artifact(artifact, cap);
57            }
58        }
59    }
60
61    select_artifacts(artifacts, policy)
62}
63
64fn normalize_artifact_kind(kind: &str) -> String {
65    match kind {
66        "resource"
67        | "workspace_file"
68        | "editor_selection"
69        | "workspace_snapshot"
70        | "transcript_summary"
71        | "summary"
72        | "plan"
73        | "diff"
74        | "git_diff"
75        | "patch"
76        | "patch_set"
77        | "patch_proposal"
78        | "diff_review"
79        | "review_decision"
80        | "verification_bundle"
81        | "apply_intent"
82        | "verification_result"
83        | "test_result"
84        | "command_result"
85        | "provider_payload"
86        | "worker_result"
87        | "worker_notification"
88        | "artifact" => kind.to_string(),
89        "file" => "workspace_file".to_string(),
90        "transcript" => "transcript_summary".to_string(),
91        "verification" => "verification_result".to_string(),
92        "test" => "test_result".to_string(),
93        other if other.trim().is_empty() => "artifact".to_string(),
94        other => other.to_string(),
95    }
96}
97
98fn default_artifact_priority(kind: &str) -> i64 {
99    match kind {
100        "verification_result" | "test_result" => 100,
101        "verification_bundle" => 95,
102        "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
103        | "review_decision" | "apply_intent" => 90,
104        "plan" => 80,
105        "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
106        "summary" | "transcript_summary" => 60,
107        "command_result" => 50,
108        _ => 40,
109    }
110}
111
112fn freshness_rank(value: Option<&str>) -> i64 {
113    match value.unwrap_or_default() {
114        "fresh" | "live" => 3,
115        "recent" => 2,
116        "stale" => 0,
117        _ => 1,
118    }
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
122#[serde(default)]
123pub struct ArtifactRecord {
124    #[serde(rename = "_type")]
125    pub type_name: String,
126    pub id: String,
127    pub kind: String,
128    pub title: Option<String>,
129    pub text: Option<String>,
130    pub data: Option<serde_json::Value>,
131    pub source: Option<String>,
132    pub created_at: String,
133    pub freshness: Option<String>,
134    pub priority: Option<i64>,
135    pub lineage: Vec<String>,
136    pub relevance: Option<f64>,
137    pub estimated_tokens: Option<usize>,
138    pub stage: Option<String>,
139    pub metadata: BTreeMap<String, serde_json::Value>,
140}
141
142impl ArtifactRecord {
143    pub fn normalize(mut self) -> Self {
144        if self.type_name.is_empty() {
145            self.type_name = "artifact".to_string();
146        }
147        if self.id.is_empty() {
148            self.id = new_id("artifact");
149        }
150        if self.created_at.is_empty() {
151            self.created_at = now_rfc3339();
152        }
153        if self.kind.is_empty() {
154            self.kind = "artifact".to_string();
155        }
156        self.kind = normalize_artifact_kind(&self.kind);
157        if self.estimated_tokens.is_none() {
158            self.estimated_tokens = self
159                .text
160                .as_ref()
161                .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
162        }
163        if self.priority.is_none() {
164            self.priority = Some(default_artifact_priority(&self.kind));
165        }
166        self
167    }
168}
169
170pub fn select_artifacts(
171    mut artifacts: Vec<ArtifactRecord>,
172    policy: &ContextPolicy,
173) -> Vec<ArtifactRecord> {
174    artifacts.retain(|artifact| {
175        (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
176            && !policy.exclude_kinds.contains(&artifact.kind)
177            && (policy.include_stages.is_empty()
178                || artifact
179                    .stage
180                    .as_ref()
181                    .is_some_and(|stage| policy.include_stages.contains(stage)))
182    });
183    artifacts.sort_by(|a, b| {
184        let b_pinned = policy.pinned_ids.contains(&b.id);
185        let a_pinned = policy.pinned_ids.contains(&a.id);
186        b_pinned
187            .cmp(&a_pinned)
188            .then_with(|| {
189                let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
190                let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
191                b_prio_kind.cmp(&a_prio_kind)
192            })
193            .then_with(|| {
194                b.priority
195                    .unwrap_or_default()
196                    .cmp(&a.priority.unwrap_or_default())
197            })
198            .then_with(|| {
199                if policy.prefer_fresh {
200                    freshness_rank(b.freshness.as_deref())
201                        .cmp(&freshness_rank(a.freshness.as_deref()))
202                } else {
203                    std::cmp::Ordering::Equal
204                }
205            })
206            .then_with(|| {
207                if policy.prefer_recent {
208                    b.created_at.cmp(&a.created_at)
209                } else {
210                    std::cmp::Ordering::Equal
211                }
212            })
213            .then_with(|| {
214                b.relevance
215                    .partial_cmp(&a.relevance)
216                    .unwrap_or(std::cmp::Ordering::Equal)
217            })
218            .then_with(|| {
219                a.estimated_tokens
220                    .unwrap_or(usize::MAX)
221                    .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
222            })
223    });
224
225    let mut selected = Vec::new();
226    let mut used_tokens = 0usize;
227    let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
228    let effective_max_tokens = policy
229        .max_tokens
230        .map(|max| max.saturating_sub(reserve_tokens));
231    for artifact in artifacts {
232        if let Some(max_artifacts) = policy.max_artifacts {
233            if selected.len() >= max_artifacts {
234                break;
235            }
236        }
237        let next_tokens = artifact.estimated_tokens.unwrap_or(0);
238        if let Some(max_tokens) = effective_max_tokens {
239            if used_tokens + next_tokens > max_tokens {
240                continue;
241            }
242        }
243        used_tokens += next_tokens;
244        selected.push(artifact);
245    }
246    selected
247}
248
249pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
250    let mut parts = Vec::new();
251    for artifact in artifacts {
252        let title = artifact
253            .title
254            .clone()
255            .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
256        let body = artifact
257            .text
258            .clone()
259            .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
260            .unwrap_or_default();
261        match policy.render.as_deref() {
262            Some("json") => {
263                parts.push(
264                    serde_json::json!({
265                        "id": artifact.id,
266                        "kind": artifact.kind,
267                        "title": title,
268                        "source": artifact.source,
269                        "freshness": artifact.freshness,
270                        "priority": artifact.priority,
271                        "text": body,
272                    })
273                    .to_string(),
274                );
275            }
276            _ => parts.push(format!(
277                "<artifact>\n<title>{}</title>\n<kind>{}</kind>\n<source>{}</source>\n\
278<freshness>{}</freshness>\n<priority>{}</priority>\n<body>\n{}\n</body>\n</artifact>",
279                escape_prompt_text(&title),
280                escape_prompt_text(&artifact.kind),
281                escape_prompt_text(
282                    artifact
283                        .source
284                        .clone()
285                        .unwrap_or_else(|| "unknown".to_string())
286                        .as_str(),
287                ),
288                escape_prompt_text(
289                    artifact
290                        .freshness
291                        .clone()
292                        .unwrap_or_else(|| "normal".to_string())
293                        .as_str(),
294                ),
295                artifact.priority.unwrap_or_default(),
296                body
297            )),
298        }
299    }
300    parts.join("\n\n")
301}
302
303pub fn render_workflow_prompt(
304    task: &str,
305    task_label: Option<&str>,
306    rendered_verification: &str,
307    rendered_context: &str,
308) -> String {
309    let label = task_label
310        .map(str::trim)
311        .filter(|value| !value.is_empty())
312        .unwrap_or("Task");
313    let mut prompt = format!(
314        "<workflow_task>\n<label>{}</label>\n<instructions>\n{}\n</instructions>\n</workflow_task>",
315        escape_prompt_text(label),
316        task.trim(),
317    );
318    let verification = rendered_verification.trim();
319    if !verification.is_empty() {
320        prompt.push_str("\n\n<workflow_verification>\n");
321        prompt.push_str(verification);
322        prompt.push_str("\n</workflow_verification>");
323    }
324    let context = rendered_context.trim();
325    if !context.is_empty() {
326        prompt.push_str("\n\n<workflow_context>\n");
327        prompt.push_str(context);
328        prompt.push_str("\n</workflow_context>");
329    }
330    prompt.push_str(
331        "\n\n<workflow_response_contract>\n\
332Respond to the workflow task above. Do not continue the trailing artifact text verbatim. \
333Keep commentary minimal and use the active tool-calling contract for concrete progress.\n\
334</workflow_response_contract>",
335    );
336    prompt
337}
338
339pub fn render_verification_context(contracts: &[VerificationContract]) -> String {
340    if contracts.is_empty() {
341        return String::new();
342    }
343
344    let mut out = String::from(
345        "Treat this verifier contract as the source of truth for exact identifiers, file paths, and required wiring. Prefer the exact strings below over guessed synonyms.\n",
346    );
347
348    for contract in contracts {
349        out.push_str("\n<contract>\n");
350        if let Some(source_node) = contract.source_node.as_deref() {
351            out.push_str("<source_node>");
352            out.push_str(&escape_prompt_text(source_node));
353            out.push_str("</source_node>\n");
354        }
355        if let Some(summary) = contract.summary.as_deref() {
356            out.push_str("<summary>");
357            out.push_str(&escape_prompt_text(summary));
358            out.push_str("</summary>\n");
359        }
360        if let Some(command) = contract.command.as_deref() {
361            out.push_str("<command>");
362            out.push_str(&escape_prompt_text(command));
363            out.push_str("</command>\n");
364        }
365        if let Some(expect_status) = contract.expect_status {
366            out.push_str("<expect_status>");
367            out.push_str(&expect_status.to_string());
368            out.push_str("</expect_status>\n");
369        }
370        if let Some(assert_text) = contract.assert_text.as_deref() {
371            out.push_str("<assert_text>");
372            out.push_str(&escape_prompt_text(assert_text));
373            out.push_str("</assert_text>\n");
374        }
375        if let Some(expect_text) = contract.expect_text.as_deref() {
376            out.push_str("<expect_text>");
377            out.push_str(&escape_prompt_text(expect_text));
378            out.push_str("</expect_text>\n");
379        }
380        if !contract.required_identifiers.is_empty() {
381            out.push_str("<required_identifiers>\n");
382            for value in &contract.required_identifiers {
383                out.push_str("- ");
384                out.push_str(&escape_prompt_text(value));
385                out.push('\n');
386            }
387            out.push_str("</required_identifiers>\n");
388        }
389        if !contract.required_paths.is_empty() {
390            out.push_str("<required_paths>\n");
391            for value in &contract.required_paths {
392                out.push_str("- ");
393                out.push_str(&escape_prompt_text(value));
394                out.push('\n');
395            }
396            out.push_str("</required_paths>\n");
397        }
398        if !contract.required_text.is_empty() {
399            out.push_str("<required_text>\n");
400            for value in &contract.required_text {
401                out.push_str("- ");
402                out.push_str(&escape_prompt_text(value));
403                out.push('\n');
404            }
405            out.push_str("</required_text>\n");
406        }
407        if !contract.checks.is_empty() {
408            out.push_str("<checks>\n");
409            for check in &contract.checks {
410                out.push_str("- ");
411                out.push_str(&escape_prompt_text(&check.kind));
412                out.push_str(": ");
413                out.push_str(&escape_prompt_text(&check.value));
414                if let Some(note) = check.note.as_deref() {
415                    out.push_str(" (");
416                    out.push_str(&escape_prompt_text(note));
417                    out.push(')');
418                }
419                out.push('\n');
420            }
421            out.push_str("</checks>\n");
422        }
423        if !contract.notes.is_empty() {
424            out.push_str("<notes>\n");
425            for note in &contract.notes {
426                out.push_str("- ");
427                out.push_str(&escape_prompt_text(note));
428                out.push('\n');
429            }
430            out.push_str("</notes>\n");
431        }
432        out.push_str("</contract>");
433    }
434
435    out
436}
437
438fn escape_prompt_text(text: &str) -> String {
439    text.replace('&', "&amp;")
440        .replace('<', "&lt;")
441        .replace('>', "&gt;")
442}
443
444pub fn normalize_artifact(
445    value: &crate::value::VmValue,
446) -> Result<ArtifactRecord, crate::value::VmError> {
447    let artifact: ArtifactRecord = super::parse_json_value(value)?;
448    Ok(artifact.normalize())
449}