use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::tools::spec::ToolResult;
pub use zagens_core::workshop::WorkshopConfig;
const CHARS_PER_TOKEN_ESTIMATE: usize = 3;
pub const WORKSHOP_LAST_TOOL_RESULT_VAR: &str = "last_tool_result";
const LARGE_OUTPUT_ROOT_ENV: &str = "DEEPSEEK_LARGE_OUTPUT_ROOT";
const LARGE_OUTPUT_PERSIST_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LargeOutputExternalRef {
pub ref_id: String,
pub tool_name: String,
pub char_count: usize,
pub storage_var: String,
}
impl LargeOutputExternalRef {
#[must_use]
pub fn new(tool_name: &str, char_count: usize) -> Self {
Self {
ref_id: format!("lout_{}", &Uuid::new_v4().to_string()[..8]),
tool_name: tool_name.to_string(),
char_count,
storage_var: WORKSHOP_LAST_TOOL_RESULT_VAR.to_string(),
}
}
#[must_use]
pub fn to_json_line(&self) -> String {
serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct LargeOutputPersistRecord {
pub schema_version: u32,
pub external_ref: LargeOutputExternalRef,
pub session_id: String,
pub raw_bytes: usize,
}
#[must_use]
pub fn should_persist_large_output_for_namespace(namespace: &str) -> bool {
let ns = namespace.trim();
!ns.is_empty() && ns != "workspace"
}
#[must_use]
pub fn large_output_dir(session_id: &str) -> PathBuf {
let sessions_base = std::env::var_os(LARGE_OUTPUT_ROOT_ENV)
.map(PathBuf::from)
.unwrap_or_else(|| zagens_config::user_data_path_or_relative("sessions"));
sessions_base.join(session_id).join("large_outputs")
}
pub fn persist_large_output_blob(
session_id: &str,
external_ref: &LargeOutputExternalRef,
raw: &str,
) -> std::io::Result<PathBuf> {
let dir = large_output_dir(session_id);
std::fs::create_dir_all(&dir)?;
let raw_path = dir.join(format!("{}.txt", external_ref.ref_id));
std::fs::write(&raw_path, raw)?;
let record = LargeOutputPersistRecord {
schema_version: LARGE_OUTPUT_PERSIST_SCHEMA_VERSION,
external_ref: external_ref.clone(),
session_id: session_id.to_string(),
raw_bytes: raw.len(),
};
let meta_path = dir.join(format!("{}.json", external_ref.ref_id));
std::fs::write(&meta_path, serde_json::to_string(&record).unwrap())?;
Ok(meta_path)
}
pub fn load_large_output_persist_record(
session_id: &str,
ref_id: &str,
) -> std::io::Result<LargeOutputPersistRecord> {
let path = large_output_dir(session_id).join(format!("{ref_id}.json"));
let raw = std::fs::read_to_string(path)?;
serde_json::from_str(&raw).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
#[must_use]
pub fn large_output_meta_path(session_id: &str, ref_id: &str) -> PathBuf {
large_output_dir(session_id).join(format!("{ref_id}.json"))
}
pub const LARGE_OUTPUT_METADATA_KEY: &str = "large_output";
#[must_use]
pub fn artifact_refs_from_tool_output(
session_id: Option<&str>,
content: &str,
metadata: Option<&serde_json::Value>,
) -> Vec<PathBuf> {
if let Some(meta) = metadata
&& let Some(lo) = meta.get(LARGE_OUTPUT_METADATA_KEY)
{
if let Some(path) = lo.get("meta_path").and_then(|v| v.as_str()) {
let p = PathBuf::from(path);
if p.is_file() {
return vec![p];
}
}
if let Some(ref_id) = lo.get("ref_id").and_then(|v| v.as_str())
&& let Some(sid) = session_id
{
let p = large_output_meta_path(sid, ref_id);
if p.is_file() {
return vec![p];
}
}
}
if let Some(sid) = session_id
&& let Some(ext) = parse_workshop_ref_from_message(content)
{
let p = large_output_meta_path(sid, &ext.ref_id);
if p.is_file() {
return vec![p];
}
}
Vec::new()
}
pub fn load_raw_from_artifact_meta_path(meta_path: &Path) -> std::io::Result<String> {
let raw = std::fs::read_to_string(meta_path)?;
let record: LargeOutputPersistRecord = serde_json::from_str(&raw)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
let raw_path =
large_output_dir(&record.session_id).join(format!("{}.txt", record.external_ref.ref_id));
std::fs::read_to_string(raw_path)
}
#[must_use]
pub fn parse_workshop_ref_from_message(message: &str) -> Option<LargeOutputExternalRef> {
message.lines().find_map(|line| {
let line = line.trim();
let json = line.strip_prefix("[workshop-ref: ")?.trim_end_matches(']');
serde_json::from_str(json).ok()
})
}
#[must_use]
pub fn estimate_tokens(text: &str) -> usize {
let chars = text.chars().count();
chars.div_ceil(CHARS_PER_TOKEN_ESTIMATE)
}
#[derive(Debug, Clone, PartialEq)]
pub enum RouteDecision {
PassThrough,
Synthesise {
estimated_tokens: usize,
threshold: usize,
},
}
#[derive(Debug, Clone, Default)]
pub struct LargeOutputRouter {
config: WorkshopConfig,
}
impl LargeOutputRouter {
#[must_use]
pub fn new(config: WorkshopConfig) -> Self {
Self { config }
}
#[must_use]
pub fn route(&self, tool_name: &str, result: &ToolResult, raw_bypass: bool) -> RouteDecision {
if raw_bypass || !result.success {
return RouteDecision::PassThrough;
}
let threshold = self.config.threshold_for(tool_name);
let estimated_tokens = estimate_tokens(&result.content);
if estimated_tokens > threshold {
RouteDecision::Synthesise {
estimated_tokens,
threshold,
}
} else {
RouteDecision::PassThrough
}
}
#[must_use]
#[allow(dead_code)] pub fn synthesis_prompt(tool_name: &str, raw_output: &str, estimated_tokens: usize) -> String {
format!(
"You are a synthesis assistant. The tool `{tool_name}` produced {estimated_tokens} tokens \
of output that is too large to include directly in the parent context.\n\n\
Summarise the output below into a concise, faithful synthesis of ≤ 800 words. \
Preserve key facts, numbers, file paths, error messages, and any actionable \
information. Do NOT add commentary or interpretation beyond what is in the source.\n\n\
<raw_tool_output>\n{raw_output}\n</raw_tool_output>"
)
}
#[must_use]
pub fn wrap_synthesis(
tool_name: &str,
synthesis: &str,
estimated_tokens: usize,
threshold: usize,
external_ref: Option<&LargeOutputExternalRef>,
) -> String {
let ref_line = external_ref
.map(|r| format!("[workshop-ref: {}]\n", r.to_json_line()))
.unwrap_or_default();
format!(
"{ref_line}[workshop-synthesis: tool={tool_name}, raw_tokens≈{estimated_tokens}, \
threshold={threshold}, raw_stored_in={WORKSHOP_LAST_TOOL_RESULT_VAR}]\n\n{synthesis}"
)
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct WorkshopVariables {
#[serde(default)]
pub last_tool_result: String,
#[serde(default)]
pub last_tool_name: String,
#[serde(default)]
pub last_output_ref: Option<LargeOutputExternalRef>,
}
impl WorkshopVariables {
pub fn store_raw(&mut self, tool_name: &str, raw: &str) -> LargeOutputExternalRef {
let external_ref = LargeOutputExternalRef::new(tool_name, raw.chars().count());
self.last_tool_result = raw.to_string();
self.last_tool_name = tool_name.to_string();
self.last_output_ref = Some(external_ref.clone());
external_ref
}
#[must_use]
#[allow(dead_code)] pub fn take_raw(&mut self) -> Option<(String, String)> {
if self.last_tool_result.is_empty() {
return None;
}
let content = std::mem::take(&mut self.last_tool_result);
let name = std::mem::take(&mut self.last_tool_name);
self.last_output_ref = None;
Some((name, content))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
fn make_result(content: &str) -> ToolResult {
ToolResult::success(content.to_string())
}
#[test]
fn pass_through_below_threshold() {
let router = LargeOutputRouter::default();
let small = "x".repeat(100);
let result = make_result(&small);
assert_eq!(
router.route("read_file", &result, false),
RouteDecision::PassThrough
);
}
#[test]
fn synthesise_above_threshold() {
let router = LargeOutputRouter::default();
let big = "a".repeat(13_000);
let result = make_result(&big);
assert!(matches!(
router.route("read_file", &result, false),
RouteDecision::Synthesise { .. }
));
}
#[test]
fn raw_bypass_skips_routing() {
let router = LargeOutputRouter::default();
let big = "a".repeat(13_000);
let result = make_result(&big);
assert_eq!(
router.route("exec_shell", &result, true),
RouteDecision::PassThrough
);
}
#[test]
fn error_results_always_pass_through() {
let router = LargeOutputRouter::default();
let big = "error: ".repeat(2_000);
let result = ToolResult::error(big);
assert_eq!(
router.route("exec_shell", &result, false),
RouteDecision::PassThrough
);
}
#[test]
fn per_tool_threshold_override() {
let mut per_tool = HashMap::new();
per_tool.insert("grep_files".to_string(), 100); let config = WorkshopConfig {
large_output_threshold_tokens: Some(4096),
per_tool_thresholds: Some(per_tool),
};
let router = LargeOutputRouter::new(config);
let medium = "b".repeat(400);
let result = make_result(&medium);
assert!(matches!(
router.route("grep_files", &result, false),
RouteDecision::Synthesise { .. }
));
assert_eq!(
router.route("read_file", &result, false),
RouteDecision::PassThrough
);
}
#[test]
fn synthesise_at_one_megabyte_boundary() {
let router = LargeOutputRouter::default();
let one_mb_plus = "z".repeat(1_100_000);
let result = make_result(&one_mb_plus);
match router.route("read_file", &result, false) {
RouteDecision::Synthesise {
estimated_tokens,
threshold,
} => {
assert!(estimated_tokens > threshold);
assert!(
estimated_tokens >= 350_000,
"1.1MB should estimate well above threshold"
);
}
RouteDecision::PassThrough => panic!("1.1 MB output must route to synthesis"),
}
}
#[test]
fn estimate_tokens_conservative() {
assert_eq!(estimate_tokens("123456789"), 3);
assert_eq!(estimate_tokens("1234567890"), 4);
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn workshop_variables_store_and_take() {
let mut vars = WorkshopVariables::default();
assert!(vars.take_raw().is_none());
vars.store_raw("read_file", "raw content here");
let taken = vars.take_raw().expect("should have content");
assert_eq!(taken.0, "read_file");
assert_eq!(taken.1, "raw content here");
assert!(vars.last_output_ref.is_none());
assert!(vars.take_raw().is_none());
}
#[test]
fn store_raw_records_external_ref() {
let mut vars = WorkshopVariables::default();
let big = "y".repeat(10_000);
let external_ref = vars.store_raw("grep_files", &big);
assert!(external_ref.ref_id.starts_with("lout_"));
assert_eq!(external_ref.tool_name, "grep_files");
assert_eq!(external_ref.char_count, 10_000);
assert_eq!(
vars.last_output_ref.as_ref().map(|r| r.ref_id.as_str()),
Some(external_ref.ref_id.as_str())
);
}
#[test]
fn wrap_synthesis_includes_provenance_header() {
let external_ref = LargeOutputExternalRef::new("web_search", 5000);
let wrapped = LargeOutputRouter::wrap_synthesis(
"web_search",
"key facts here",
5000,
4096,
Some(&external_ref),
);
assert!(wrapped.contains("workshop-synthesis"));
assert!(wrapped.contains("workshop-ref:"));
assert!(wrapped.contains("web_search"));
assert!(wrapped.contains("5000"));
assert!(wrapped.contains("key facts here"));
assert!(wrapped.contains(&external_ref.ref_id));
}
#[test]
fn should_persist_skips_workspace_namespace() {
assert!(!should_persist_large_output_for_namespace("workspace"));
assert!(!should_persist_large_output_for_namespace(""));
assert!(should_persist_large_output_for_namespace("sess_abc"));
}
#[test]
fn large_output_persist_round_trip() {
let tmp = tempfile::tempdir().expect("tempdir");
unsafe { std::env::set_var(LARGE_OUTPUT_ROOT_ENV, tmp.path()) };
let session_id = "sess_test_roundtrip";
let raw = "payload-".repeat(800);
let external_ref = LargeOutputExternalRef::new("read_file", raw.chars().count());
persist_large_output_blob(session_id, &external_ref, &raw).expect("persist");
let wrapped = LargeOutputRouter::wrap_synthesis(
"read_file",
"summary",
5000,
4096,
Some(&external_ref),
);
let parsed =
parse_workshop_ref_from_message(&wrapped).expect("workshop-ref line in synthesis");
assert_eq!(parsed.ref_id, external_ref.ref_id);
let record =
load_large_output_persist_record(session_id, &parsed.ref_id).expect("load meta");
assert_eq!(record.schema_version, LARGE_OUTPUT_PERSIST_SCHEMA_VERSION);
assert_eq!(record.session_id, session_id);
assert_eq!(record.raw_bytes, raw.len());
let raw_path = large_output_dir(session_id).join(format!("{}.txt", parsed.ref_id));
assert_eq!(std::fs::read_to_string(raw_path).expect("raw blob"), raw);
unsafe { std::env::remove_var(LARGE_OUTPUT_ROOT_ENV) };
}
#[test]
fn artifact_refs_from_metadata_meta_path() {
let tmp = tempfile::tempdir().expect("tempdir");
unsafe { std::env::set_var(LARGE_OUTPUT_ROOT_ENV, tmp.path()) };
let session_id = "sess_meta_path";
let raw = "blob".repeat(500);
let external_ref = LargeOutputExternalRef::new("grep_files", raw.chars().count());
let meta_path =
persist_large_output_blob(session_id, &external_ref, &raw).expect("persist");
let wrapped = LargeOutputRouter::wrap_synthesis(
"grep_files",
"summary",
5000,
4096,
Some(&external_ref),
);
let metadata = serde_json::json!({
LARGE_OUTPUT_METADATA_KEY: {
"ref_id": external_ref.ref_id,
"meta_path": meta_path.display().to_string(),
}
});
let refs = artifact_refs_from_tool_output(None, &wrapped, Some(&metadata));
assert_eq!(refs.len(), 1);
assert_eq!(refs[0], meta_path);
let loaded = load_raw_from_artifact_meta_path(&refs[0]).expect("load raw via meta");
assert_eq!(loaded, raw);
unsafe { std::env::remove_var(LARGE_OUTPUT_ROOT_ENV) };
}
#[test]
fn artifact_refs_fallback_workshop_ref_with_session_id() {
let tmp = tempfile::tempdir().expect("tempdir");
unsafe { std::env::set_var(LARGE_OUTPUT_ROOT_ENV, tmp.path()) };
let session_id = "sess_workshop_fallback";
let raw = "z".repeat(2000);
let external_ref = LargeOutputExternalRef::new("read_file", raw.chars().count());
persist_large_output_blob(session_id, &external_ref, &raw).expect("persist");
let wrapped = LargeOutputRouter::wrap_synthesis(
"read_file",
"summary",
5000,
4096,
Some(&external_ref),
);
let refs = artifact_refs_from_tool_output(Some(session_id), &wrapped, None);
assert_eq!(refs.len(), 1);
assert_eq!(
load_raw_from_artifact_meta_path(&refs[0]).expect("load"),
raw
);
unsafe { std::env::remove_var(LARGE_OUTPUT_ROOT_ENV) };
}
}
use std::sync::Arc;
use tokio::sync::Mutex;
pub struct TuiWorkshopHost(pub Option<Arc<Mutex<WorkshopVariables>>>);
impl zagens_core::engine::hosts::WorkshopHost for TuiWorkshopHost {}