1use std::collections::{BTreeMap, BTreeSet};
4
5use serde::{Deserialize, Serialize};
6
7use super::{microcompact_tool_output, new_id, now_rfc3339, ContextPolicy, VerificationContract};
8
9pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
11 let max_chars = max_tokens * 4;
12 if let Some(ref text) = artifact.text {
13 if text.len() > max_chars && max_chars >= 200 {
14 artifact.text = Some(microcompact_tool_output(text, max_chars));
15 artifact.estimated_tokens = Some(max_tokens);
16 }
17 }
18}
19
20pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
23 let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
24 artifacts.retain(|artifact| {
25 let text = artifact.text.as_deref().unwrap_or("");
26 if text.is_empty() {
27 return true;
28 }
29 let hash = {
30 use std::hash::{Hash, Hasher};
31 let mut hasher = std::collections::hash_map::DefaultHasher::new();
32 text.hash(&mut hasher);
33 hasher.finish()
34 };
35 seen_hashes.insert(hash)
36 });
37}
38
39pub fn select_artifacts_adaptive(
42 mut artifacts: Vec<ArtifactRecord>,
43 policy: &ContextPolicy,
44) -> Vec<ArtifactRecord> {
45 dedup_artifacts(&mut artifacts);
46
47 if let Some(max_tokens) = policy.max_tokens {
50 let count = artifacts.len().max(1);
51 let per_artifact_budget = max_tokens / count;
52 let cap = per_artifact_budget.max(500).min(max_tokens);
53 for artifact in &mut artifacts {
54 let est = artifact.estimated_tokens.unwrap_or(0);
55 if est > cap * 2 {
56 microcompact_artifact(artifact, cap);
57 }
58 }
59 }
60
61 select_artifacts(artifacts, policy)
62}
63
64fn normalize_artifact_kind(kind: &str) -> String {
65 match kind {
66 "resource"
67 | "workspace_file"
68 | "editor_selection"
69 | "workspace_snapshot"
70 | "transcript_summary"
71 | "summary"
72 | "plan"
73 | "diff"
74 | "git_diff"
75 | "patch"
76 | "patch_set"
77 | "patch_proposal"
78 | "diff_review"
79 | "review_decision"
80 | "verification_bundle"
81 | "apply_intent"
82 | "verification_result"
83 | "test_result"
84 | "command_result"
85 | "provider_payload"
86 | "worker_result"
87 | "worker_notification"
88 | "artifact" => kind.to_string(),
89 "file" => "workspace_file".to_string(),
90 "transcript" => "transcript_summary".to_string(),
91 "verification" => "verification_result".to_string(),
92 "test" => "test_result".to_string(),
93 other if other.trim().is_empty() => "artifact".to_string(),
94 other => other.to_string(),
95 }
96}
97
98fn default_artifact_priority(kind: &str) -> i64 {
99 match kind {
100 "verification_result" | "test_result" => 100,
101 "verification_bundle" => 95,
102 "diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
103 | "review_decision" | "apply_intent" => 90,
104 "plan" => 80,
105 "workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
106 "summary" | "transcript_summary" => 60,
107 "command_result" => 50,
108 _ => 40,
109 }
110}
111
112fn freshness_rank(value: Option<&str>) -> i64 {
113 match value.unwrap_or_default() {
114 "fresh" | "live" => 3,
115 "recent" => 2,
116 "stale" => 0,
117 _ => 1,
118 }
119}
120
121#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
122#[serde(default)]
123pub struct ArtifactRecord {
124 #[serde(rename = "_type")]
125 pub type_name: String,
126 pub id: String,
127 pub kind: String,
128 pub title: Option<String>,
129 pub text: Option<String>,
130 pub data: Option<serde_json::Value>,
131 pub source: Option<String>,
132 pub created_at: String,
133 pub freshness: Option<String>,
134 pub priority: Option<i64>,
135 pub lineage: Vec<String>,
136 pub relevance: Option<f64>,
137 pub estimated_tokens: Option<usize>,
138 pub stage: Option<String>,
139 pub metadata: BTreeMap<String, serde_json::Value>,
140}
141
142impl ArtifactRecord {
143 pub fn normalize(mut self) -> Self {
144 if self.type_name.is_empty() {
145 self.type_name = "artifact".to_string();
146 }
147 if self.id.is_empty() {
148 self.id = new_id("artifact");
149 }
150 if self.created_at.is_empty() {
151 self.created_at = now_rfc3339();
152 }
153 if self.kind.is_empty() {
154 self.kind = "artifact".to_string();
155 }
156 self.kind = normalize_artifact_kind(&self.kind);
157 if self.estimated_tokens.is_none() {
158 self.estimated_tokens = self
159 .text
160 .as_ref()
161 .map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
162 }
163 if self.priority.is_none() {
164 self.priority = Some(default_artifact_priority(&self.kind));
165 }
166 self
167 }
168}
169
170pub fn select_artifacts(
171 mut artifacts: Vec<ArtifactRecord>,
172 policy: &ContextPolicy,
173) -> Vec<ArtifactRecord> {
174 artifacts.retain(|artifact| {
175 (policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
176 && !policy.exclude_kinds.contains(&artifact.kind)
177 && (policy.include_stages.is_empty()
178 || artifact
179 .stage
180 .as_ref()
181 .is_some_and(|stage| policy.include_stages.contains(stage)))
182 });
183 artifacts.sort_by(|a, b| {
184 let b_pinned = policy.pinned_ids.contains(&b.id);
185 let a_pinned = policy.pinned_ids.contains(&a.id);
186 b_pinned
187 .cmp(&a_pinned)
188 .then_with(|| {
189 let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
190 let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
191 b_prio_kind.cmp(&a_prio_kind)
192 })
193 .then_with(|| {
194 b.priority
195 .unwrap_or_default()
196 .cmp(&a.priority.unwrap_or_default())
197 })
198 .then_with(|| {
199 if policy.prefer_fresh {
200 freshness_rank(b.freshness.as_deref())
201 .cmp(&freshness_rank(a.freshness.as_deref()))
202 } else {
203 std::cmp::Ordering::Equal
204 }
205 })
206 .then_with(|| {
207 if policy.prefer_recent {
208 b.created_at.cmp(&a.created_at)
209 } else {
210 std::cmp::Ordering::Equal
211 }
212 })
213 .then_with(|| {
214 b.relevance
215 .partial_cmp(&a.relevance)
216 .unwrap_or(std::cmp::Ordering::Equal)
217 })
218 .then_with(|| {
219 a.estimated_tokens
220 .unwrap_or(usize::MAX)
221 .cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
222 })
223 });
224
225 let mut selected = Vec::new();
226 let mut used_tokens = 0usize;
227 let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
228 let effective_max_tokens = policy
229 .max_tokens
230 .map(|max| max.saturating_sub(reserve_tokens));
231 for artifact in artifacts {
232 if let Some(max_artifacts) = policy.max_artifacts {
233 if selected.len() >= max_artifacts {
234 break;
235 }
236 }
237 let next_tokens = artifact.estimated_tokens.unwrap_or(0);
238 if let Some(max_tokens) = effective_max_tokens {
239 if used_tokens + next_tokens > max_tokens {
240 continue;
241 }
242 }
243 used_tokens += next_tokens;
244 selected.push(artifact);
245 }
246 selected
247}
248
249pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
250 let mut parts = Vec::new();
251 for artifact in artifacts {
252 let title = artifact
253 .title
254 .clone()
255 .unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
256 let body = artifact
257 .text
258 .clone()
259 .or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
260 .unwrap_or_default();
261 match policy.render.as_deref() {
262 Some("json") => {
263 parts.push(
264 serde_json::json!({
265 "id": artifact.id,
266 "kind": artifact.kind,
267 "title": title,
268 "source": artifact.source,
269 "freshness": artifact.freshness,
270 "priority": artifact.priority,
271 "text": body,
272 })
273 .to_string(),
274 );
275 }
276 _ => parts.push(format!(
277 "<artifact>\n<title>{}</title>\n<kind>{}</kind>\n<source>{}</source>\n\
278<freshness>{}</freshness>\n<priority>{}</priority>\n<body>\n{}\n</body>\n</artifact>",
279 escape_prompt_text(&title),
280 escape_prompt_text(&artifact.kind),
281 escape_prompt_text(
282 artifact
283 .source
284 .clone()
285 .unwrap_or_else(|| "unknown".to_string())
286 .as_str(),
287 ),
288 escape_prompt_text(
289 artifact
290 .freshness
291 .clone()
292 .unwrap_or_else(|| "normal".to_string())
293 .as_str(),
294 ),
295 artifact.priority.unwrap_or_default(),
296 body
297 )),
298 }
299 }
300 parts.join("\n\n")
301}
302
303pub fn render_workflow_prompt(
304 task: &str,
305 task_label: Option<&str>,
306 rendered_verification: &str,
307 rendered_context: &str,
308) -> String {
309 let label = task_label
310 .map(str::trim)
311 .filter(|value| !value.is_empty())
312 .unwrap_or("Task");
313 let mut prompt = format!(
314 "<workflow_task>\n<label>{}</label>\n<instructions>\n{}\n</instructions>\n</workflow_task>",
315 escape_prompt_text(label),
316 task.trim(),
317 );
318 let verification = rendered_verification.trim();
319 if !verification.is_empty() {
320 prompt.push_str("\n\n<workflow_verification>\n");
321 prompt.push_str(verification);
322 prompt.push_str("\n</workflow_verification>");
323 }
324 let context = rendered_context.trim();
325 if !context.is_empty() {
326 prompt.push_str("\n\n<workflow_context>\n");
327 prompt.push_str(context);
328 prompt.push_str("\n</workflow_context>");
329 }
330 prompt.push_str(
331 "\n\n<workflow_response_contract>\n\
332Respond to the workflow task above. Do not continue the trailing artifact text verbatim. \
333Keep commentary minimal and use the active tool-calling contract for concrete progress.\n\
334</workflow_response_contract>",
335 );
336 prompt
337}
338
339pub fn render_verification_context(contracts: &[VerificationContract]) -> String {
340 if contracts.is_empty() {
341 return String::new();
342 }
343
344 let mut out = String::from(
345 "Treat this verifier contract as the source of truth for exact identifiers, file paths, and required wiring. Prefer the exact strings below over guessed synonyms.\n",
346 );
347
348 for contract in contracts {
349 out.push_str("\n<contract>\n");
350 if let Some(source_node) = contract.source_node.as_deref() {
351 out.push_str("<source_node>");
352 out.push_str(&escape_prompt_text(source_node));
353 out.push_str("</source_node>\n");
354 }
355 if let Some(summary) = contract.summary.as_deref() {
356 out.push_str("<summary>");
357 out.push_str(&escape_prompt_text(summary));
358 out.push_str("</summary>\n");
359 }
360 if let Some(command) = contract.command.as_deref() {
361 out.push_str("<command>");
362 out.push_str(&escape_prompt_text(command));
363 out.push_str("</command>\n");
364 }
365 if let Some(expect_status) = contract.expect_status {
366 out.push_str("<expect_status>");
367 out.push_str(&expect_status.to_string());
368 out.push_str("</expect_status>\n");
369 }
370 if let Some(assert_text) = contract.assert_text.as_deref() {
371 out.push_str("<assert_text>");
372 out.push_str(&escape_prompt_text(assert_text));
373 out.push_str("</assert_text>\n");
374 }
375 if let Some(expect_text) = contract.expect_text.as_deref() {
376 out.push_str("<expect_text>");
377 out.push_str(&escape_prompt_text(expect_text));
378 out.push_str("</expect_text>\n");
379 }
380 if !contract.required_identifiers.is_empty() {
381 out.push_str("<required_identifiers>\n");
382 for value in &contract.required_identifiers {
383 out.push_str("- ");
384 out.push_str(&escape_prompt_text(value));
385 out.push('\n');
386 }
387 out.push_str("</required_identifiers>\n");
388 }
389 if !contract.required_paths.is_empty() {
390 out.push_str("<required_paths>\n");
391 for value in &contract.required_paths {
392 out.push_str("- ");
393 out.push_str(&escape_prompt_text(value));
394 out.push('\n');
395 }
396 out.push_str("</required_paths>\n");
397 }
398 if !contract.required_text.is_empty() {
399 out.push_str("<required_text>\n");
400 for value in &contract.required_text {
401 out.push_str("- ");
402 out.push_str(&escape_prompt_text(value));
403 out.push('\n');
404 }
405 out.push_str("</required_text>\n");
406 }
407 if !contract.checks.is_empty() {
408 out.push_str("<checks>\n");
409 for check in &contract.checks {
410 out.push_str("- ");
411 out.push_str(&escape_prompt_text(&check.kind));
412 out.push_str(": ");
413 out.push_str(&escape_prompt_text(&check.value));
414 if let Some(note) = check.note.as_deref() {
415 out.push_str(" (");
416 out.push_str(&escape_prompt_text(note));
417 out.push(')');
418 }
419 out.push('\n');
420 }
421 out.push_str("</checks>\n");
422 }
423 if !contract.notes.is_empty() {
424 out.push_str("<notes>\n");
425 for note in &contract.notes {
426 out.push_str("- ");
427 out.push_str(&escape_prompt_text(note));
428 out.push('\n');
429 }
430 out.push_str("</notes>\n");
431 }
432 out.push_str("</contract>");
433 }
434
435 out
436}
437
438fn escape_prompt_text(text: &str) -> String {
439 text.replace('&', "&")
440 .replace('<', "<")
441 .replace('>', ">")
442}
443
444pub fn normalize_artifact(
445 value: &crate::value::VmValue,
446) -> Result<ArtifactRecord, crate::value::VmError> {
447 let artifact: ArtifactRecord = super::parse_json_value(value)?;
448 Ok(artifact.normalize())
449}