use std::collections::{BTreeMap, BTreeSet};
use serde::{Deserialize, Serialize};
use crate::stdlib::harn_entry::{call_harn_export_json, call_harn_export_typed};
use super::{
handoff_artifact_record, handoff_from_json_value, microcompact_tool_output, new_id,
normalize_handoff_artifact_json, now_rfc3339, ContextPolicy, StageContract,
VerificationContract,
};
pub fn microcompact_artifact(artifact: &mut ArtifactRecord, max_tokens: usize) {
let max_chars = max_tokens * 4;
if let Some(ref text) = artifact.text {
if text.len() > max_chars && max_chars >= 200 {
artifact.text = Some(microcompact_tool_output(text, max_chars));
artifact.estimated_tokens = Some(max_tokens);
}
}
}
pub fn dedup_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
let mut seen_hashes: BTreeSet<u64> = BTreeSet::new();
artifacts.retain(|artifact| {
let text = artifact.text.as_deref().unwrap_or("");
if text.is_empty() {
return true;
}
let hash = {
use std::hash::{Hash, Hasher};
let mut hasher = std::collections::hash_map::DefaultHasher::new();
text.hash(&mut hasher);
hasher.finish()
};
seen_hashes.insert(hash)
});
}
pub fn select_artifacts_adaptive(
mut artifacts: Vec<ArtifactRecord>,
policy: &ContextPolicy,
) -> Vec<ArtifactRecord> {
drop_stale_evidence_artifacts(&mut artifacts);
dedup_artifacts(&mut artifacts);
if let Some(max_tokens) = policy.max_tokens {
let count = artifacts.len().max(1);
let per_artifact_budget = max_tokens / count;
let cap = per_artifact_budget.max(500).min(max_tokens);
for artifact in &mut artifacts {
let est = artifact.estimated_tokens.unwrap_or(0);
if est > cap * 2 {
microcompact_artifact(artifact, cap);
}
}
}
select_artifacts(artifacts, policy)
}
fn metadata_string_list(artifact: &ArtifactRecord, key: &str) -> Vec<String> {
artifact
.metadata
.get(key)
.and_then(|value| value.as_array())
.map(|items| {
items
.iter()
.filter_map(|item| item.as_str())
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
fn drop_stale_evidence_artifacts(artifacts: &mut Vec<ArtifactRecord>) {
let fresh_changed_paths: BTreeSet<String> = artifacts
.iter()
.filter(|artifact| freshness_rank(artifact.freshness.as_deref()) >= 2)
.flat_map(|artifact| metadata_string_list(artifact, "changed_paths"))
.collect();
if fresh_changed_paths.is_empty() {
return;
}
artifacts.retain(|artifact| {
let evidence_paths = metadata_string_list(artifact, "evidence_paths");
if evidence_paths.is_empty() {
return true;
}
if freshness_rank(artifact.freshness.as_deref()) >= 2 {
return true;
}
!evidence_paths
.iter()
.any(|path| fresh_changed_paths.contains(path))
});
}
fn normalize_artifact_kind(kind: &str) -> String {
match kind {
"resource"
| "handoff"
| "workspace_file"
| "editor_selection"
| "workspace_snapshot"
| "transcript_summary"
| "summary"
| "plan"
| "diff"
| "git_diff"
| "patch"
| "patch_set"
| "patch_proposal"
| "diff_review"
| "review_decision"
| "verification_bundle"
| "apply_intent"
| "verification_result"
| "test_result"
| "command_result"
| "provider_payload"
| "worker_result"
| "worker_notification"
| "artifact" => kind.to_string(),
"file" => "workspace_file".to_string(),
"transcript" => "transcript_summary".to_string(),
"verification" => "verification_result".to_string(),
"test" => "test_result".to_string(),
other if other.trim().is_empty() => "artifact".to_string(),
other => other.to_string(),
}
}
fn default_artifact_priority(kind: &str) -> i64 {
match kind {
"verification_result" | "test_result" => 100,
"verification_bundle" => 95,
"handoff" => 92,
"diff" | "git_diff" | "patch" | "patch_set" | "patch_proposal" | "diff_review"
| "review_decision" | "apply_intent" => 90,
"plan" => 80,
"workspace_file" | "workspace_snapshot" | "editor_selection" | "resource" => 70,
"summary" | "transcript_summary" => 60,
"command_result" => 50,
_ => 40,
}
}
fn freshness_rank(value: Option<&str>) -> i64 {
match value.unwrap_or_default() {
"fresh" | "live" => 3,
"recent" => 2,
"stale" => 0,
_ => 1,
}
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct ArtifactRecord {
#[serde(rename = "_type")]
pub type_name: String,
pub id: String,
pub kind: String,
pub title: Option<String>,
pub text: Option<String>,
pub data: Option<serde_json::Value>,
pub source: Option<String>,
pub created_at: String,
pub freshness: Option<String>,
pub priority: Option<i64>,
pub lineage: Vec<String>,
pub relevance: Option<f64>,
pub estimated_tokens: Option<usize>,
pub stage: Option<String>,
pub metadata: BTreeMap<String, serde_json::Value>,
}
impl ArtifactRecord {
pub fn normalize(mut self) -> Self {
if self.type_name.is_empty() {
self.type_name = "artifact".to_string();
}
if self.id.is_empty() {
self.id = new_id("artifact");
}
if self.created_at.is_empty() {
self.created_at = now_rfc3339();
}
if self.kind.is_empty() {
self.kind = "artifact".to_string();
}
self.kind = normalize_artifact_kind(&self.kind);
if self.estimated_tokens.is_none() {
self.estimated_tokens = self
.text
.as_ref()
.map(|text| ((text.len() as f64) / 4.0).ceil() as usize);
}
if self.priority.is_none() {
self.priority = Some(default_artifact_priority(&self.kind));
}
self
}
}
pub fn select_artifacts(
mut artifacts: Vec<ArtifactRecord>,
policy: &ContextPolicy,
) -> Vec<ArtifactRecord> {
artifacts.retain(|artifact| {
(policy.include_kinds.is_empty() || policy.include_kinds.contains(&artifact.kind))
&& !policy.exclude_kinds.contains(&artifact.kind)
&& (policy.include_stages.is_empty()
|| artifact
.stage
.as_ref()
.is_some_and(|stage| policy.include_stages.contains(stage)))
});
artifacts.sort_by(|a, b| {
let b_pinned = policy.pinned_ids.contains(&b.id);
let a_pinned = policy.pinned_ids.contains(&a.id);
b_pinned
.cmp(&a_pinned)
.then_with(|| {
let b_prio_kind = policy.prioritize_kinds.contains(&b.kind);
let a_prio_kind = policy.prioritize_kinds.contains(&a.kind);
b_prio_kind.cmp(&a_prio_kind)
})
.then_with(|| {
b.priority
.unwrap_or_default()
.cmp(&a.priority.unwrap_or_default())
})
.then_with(|| {
if policy.prefer_fresh {
freshness_rank(b.freshness.as_deref())
.cmp(&freshness_rank(a.freshness.as_deref()))
} else {
std::cmp::Ordering::Equal
}
})
.then_with(|| {
if policy.prefer_recent {
b.created_at.cmp(&a.created_at)
} else {
std::cmp::Ordering::Equal
}
})
.then_with(|| {
b.relevance
.partial_cmp(&a.relevance)
.unwrap_or(std::cmp::Ordering::Equal)
})
.then_with(|| {
a.estimated_tokens
.unwrap_or(usize::MAX)
.cmp(&b.estimated_tokens.unwrap_or(usize::MAX))
})
});
let mut selected = Vec::new();
let mut used_tokens = 0usize;
let reserve_tokens = policy.reserve_tokens.unwrap_or(0);
let effective_max_tokens = policy
.max_tokens
.map(|max| max.saturating_sub(reserve_tokens));
for artifact in artifacts {
if let Some(max_artifacts) = policy.max_artifacts {
if selected.len() >= max_artifacts {
break;
}
}
let next_tokens = artifact.estimated_tokens.unwrap_or(0);
if let Some(max_tokens) = effective_max_tokens {
if used_tokens + next_tokens > max_tokens {
continue;
}
}
used_tokens += next_tokens;
selected.push(artifact);
}
selected
}
pub fn render_artifacts_context(artifacts: &[ArtifactRecord], policy: &ContextPolicy) -> String {
let mut parts = Vec::new();
for artifact in artifacts {
let title = artifact
.title
.clone()
.unwrap_or_else(|| format!("{} {}", artifact.kind, artifact.id));
let body = artifact
.text
.clone()
.or_else(|| artifact.data.as_ref().map(|v| v.to_string()))
.unwrap_or_default();
match policy.render.as_deref() {
Some("json") => {
parts.push(
serde_json::json!({
"id": artifact.id,
"kind": artifact.kind,
"title": title,
"source": artifact.source,
"freshness": artifact.freshness,
"priority": artifact.priority,
"text": body,
})
.to_string(),
);
}
_ => parts.push(format!(
"<artifact>\n<title>{}</title>\n<kind>{}</kind>\n<source>{}</source>\n\
<freshness>{}</freshness>\n<priority>{}</priority>\n<body>\n{}\n</body>\n</artifact>",
escape_prompt_text(&title),
escape_prompt_text(&artifact.kind),
escape_prompt_text(
artifact
.source
.clone()
.unwrap_or_else(|| "unknown".to_string())
.as_str(),
),
escape_prompt_text(
artifact
.freshness
.clone()
.unwrap_or_else(|| "normal".to_string())
.as_str(),
),
artifact.priority.unwrap_or_default(),
body
)),
}
}
parts.join("\n\n")
}
#[derive(Clone, Debug, Deserialize, PartialEq)]
pub struct SelectedWorkflowStageArtifacts {
pub artifacts: Vec<ArtifactRecord>,
pub context_policy: ContextPolicy,
}
pub async fn select_workflow_stage_artifacts(
artifacts: &[ArtifactRecord],
context_policy: &ContextPolicy,
input_contract: &StageContract,
) -> Result<SelectedWorkflowStageArtifacts, crate::value::VmError> {
let payload = serde_json::json!({
"artifacts": artifacts,
"context_policy": context_policy,
"input_contract": input_contract,
});
let mut selected: SelectedWorkflowStageArtifacts = call_harn_export_typed(
"std/workflow/context",
"workflow_select_stage_artifacts",
"workflow_select_stage_artifacts",
payload,
)
.await?;
selected.artifacts = selected
.artifacts
.into_iter()
.map(ArtifactRecord::normalize)
.collect();
Ok(selected)
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PreparedWorkflowStagePrompt {
pub prompt: String,
pub rendered_context: String,
pub rendered_verification: String,
}
pub async fn prepare_workflow_stage_prompt(
task: &str,
task_label: Option<&str>,
artifacts: &[ArtifactRecord],
context_policy: &ContextPolicy,
rendered_context: Option<&str>,
verification_contracts: &[VerificationContract],
) -> Result<PreparedWorkflowStagePrompt, crate::value::VmError> {
let payload = serde_json::json!({
"task": task,
"task_label": task_label,
"artifacts": artifacts,
"context_policy": context_policy,
"rendered_context": rendered_context,
"verification_contracts": verification_contracts,
});
let prepared = call_harn_export_json(
"std/workflow/prompts",
"workflow_prepare_stage_prompt",
"workflow_prepare_stage_prompt",
payload,
)
.await?;
let prepared = prepared.as_object().ok_or_else(|| {
crate::value::VmError::Runtime(
"workflow_prepare_stage_prompt must return a dict".to_string(),
)
})?;
Ok(PreparedWorkflowStagePrompt {
prompt: workflow_prompt_string_field(prepared, "prompt")?,
rendered_context: workflow_prompt_string_field(prepared, "rendered_context")?,
rendered_verification: workflow_prompt_string_field(prepared, "rendered_verification")?,
})
}
fn workflow_prompt_string_field(
value: &serde_json::Map<String, serde_json::Value>,
field: &str,
) -> Result<String, crate::value::VmError> {
value
.get(field)
.and_then(serde_json::Value::as_str)
.map(ToOwned::to_owned)
.ok_or_else(|| {
crate::value::VmError::Runtime(format!(
"workflow_prepare_stage_prompt must return string field '{field}'"
))
})
}
fn escape_prompt_text(text: &str) -> String {
text.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
}
pub fn normalize_artifact(
value: &crate::value::VmValue,
) -> Result<ArtifactRecord, crate::value::VmError> {
let artifact: ArtifactRecord = super::parse_json_value(value)?;
let artifact = artifact.normalize();
if artifact.kind == "handoff" {
let json = serde_json::to_value(&artifact).map_err(|error| {
crate::value::VmError::Runtime(format!("artifact handoff encode error: {error}"))
})?;
let handoff = handoff_from_json_value(&json)
.or_else(|| {
artifact
.data
.as_ref()
.and_then(|data| normalize_handoff_artifact_json(data.clone()).ok())
})
.ok_or_else(|| {
crate::value::VmError::Runtime(
"artifact handoff data must contain a valid handoff payload".to_string(),
)
})?;
return Ok(handoff_artifact_record(&handoff, Some(&artifact)));
}
Ok(artifact)
}