harness_webfetch/
format.rs1use std::fs;
2use std::path::{Path, PathBuf};
3use url::Url;
4
5use crate::types::FetchMetadata;
6
7pub fn render_request_block(meta: &FetchMetadata) -> String {
8 let chain = meta.redirect_chain.join(" -> ");
9 format!(
10 "<request>\n <url>{}</url>\n <final_url>{}</final_url>\n <method>{}</method>\n <status>{}</status>\n <content_type>{}</content_type>\n <redirect_chain>{}</redirect_chain>\n</request>",
11 meta.url,
12 meta.final_url,
13 meta.method.as_str(),
14 meta.status,
15 meta.content_type,
16 chain,
17 )
18}
19
20pub struct FormatOkArgs<'a> {
21 pub meta: &'a FetchMetadata,
22 pub extract_hint: &'a str,
23 pub markdown: Option<&'a str>,
24 pub raw: Option<&'a str>,
25 pub log_path: Option<&'a str>,
26 pub byte_cap: bool,
27 pub total_bytes: usize,
28}
29
30pub fn format_ok_text(args: FormatOkArgs<'_>) -> String {
31 let header = render_request_block(args.meta);
32 let body_inner = match args.extract_hint {
33 "markdown" => args.markdown.unwrap_or("").to_string(),
34 "raw" => args.raw.unwrap_or("").to_string(),
35 "both" => format!(
36 "<markdown>\n{}\n</markdown>\n<raw_body>\n{}\n</raw_body>",
37 args.markdown.unwrap_or(""),
38 args.raw.unwrap_or(""),
39 ),
40 _ => String::new(),
41 };
42 let body_block = format!("<body extract=\"{}\">\n{}\n</body>", args.extract_hint, body_inner);
43
44 let hint = if args.byte_cap && args.log_path.is_some() {
45 format!(
46 "(Response exceeded inline cap; showing head+tail of {} bytes. Full response at {} — Read with offset/limit to paginate.)",
47 args.total_bytes,
48 args.log_path.unwrap(),
49 )
50 } else {
51 let original_host = host_of(&args.meta.url);
52 let final_host = host_of(&args.meta.final_url);
53 let warn = if args.meta.url != args.meta.final_url && original_host != final_host {
54 format!(
55 " (Final URL host differs from original: {} -> {}. Verify this is expected.)",
56 original_host, final_host
57 )
58 } else {
59 String::new()
60 };
61 let cache_tag = if args.meta.from_cache {
62 let age = args.meta.cache_age_sec.unwrap_or(0);
63 format!(" (Served from session cache; age {}s.)", age)
64 } else {
65 String::new()
66 };
67 let ct = if args.meta.content_type.is_empty() {
68 "unknown".to_string()
69 } else {
70 args.meta.content_type.clone()
71 };
72 format!(
73 "(Response complete. {} bytes total. Content-type: {}. Fetched in {}ms.{}{})",
74 args.total_bytes, ct, args.meta.fetched_ms, warn, cache_tag
75 )
76 };
77
78 format!("{}\n{}\n{}", header, body_block, hint)
79}
80
81pub struct FormatRedirectLoopArgs<'a> {
82 pub meta: &'a FetchMetadata,
83 pub max_redirects: u32,
84}
85
86pub fn format_redirect_loop_text(args: FormatRedirectLoopArgs<'_>) -> String {
87 let header = render_request_block(args.meta);
88 let chain = args.meta.redirect_chain.join(" -> ");
89 let hint = format!(
90 "(Redirect limit ({}) exceeded. Chain: {}. Set max_redirects higher OR pass the final URL directly.)",
91 args.max_redirects, chain
92 );
93 format!("{}\n{}", header, hint)
94}
95
96pub struct FormatHttpErrorArgs<'a> {
97 pub meta: &'a FetchMetadata,
98 pub body: &'a str,
99}
100
101pub fn format_http_error_text(args: FormatHttpErrorArgs<'_>) -> String {
102 let header = render_request_block(args.meta);
103 let body_block = format!("<body>\n{}\n</body>", args.body);
104 let hint = format!(
105 "(HTTP {}. {}. Retry or adjust the request per the body.)",
106 args.meta.status,
107 short_reason(args.meta.status),
108 );
109 format!("{}\n{}\n{}", header, body_block, hint)
110}
111
112fn short_reason(status: u16) -> &'static str {
113 match status {
114 400 => "Bad Request",
115 401 => "Unauthorized — check auth headers",
116 403 => "Forbidden — check permissions or auth",
117 404 => "Not Found",
118 408 => "Request Timeout",
119 410 => "Gone",
120 418 => "I'm a teapot",
121 429 => "Too Many Requests — back off",
122 500 => "Internal Server Error",
123 502 => "Bad Gateway",
124 503 => "Service Unavailable",
125 504 => "Gateway Timeout",
126 s if (400..500).contains(&s) => "Client error",
127 s if s >= 500 => "Server error",
128 _ => "Non-success status",
129 }
130}
131
132pub fn host_of(url: &str) -> String {
133 Url::parse(url)
134 .ok()
135 .and_then(|u| u.host_str().map(|s| s.to_string()))
136 .unwrap_or_default()
137}
138
139pub struct SpillArgs<'a> {
142 pub bytes: &'a [u8],
143 pub dir: &'a Path,
144 pub session_id: &'a str,
145 pub content_type: &'a str,
146}
147
148pub fn spill_to_file(args: SpillArgs<'_>) -> std::io::Result<PathBuf> {
149 let dir = args.dir.join(args.session_id);
150 fs::create_dir_all(&dir)?;
151 let ext = extension_for(args.content_type);
152 let filename = format!("{}.{}", uuid::Uuid::new_v4(), ext);
153 let path = dir.join(filename);
154 fs::write(&path, args.bytes)?;
155 Ok(path)
156}
157
158fn extension_for(content_type: &str) -> &'static str {
159 let lower = content_type.to_ascii_lowercase();
160 if lower.contains("text/html") || lower.contains("xhtml") {
161 "html"
162 } else if lower.contains("json") {
163 "json"
164 } else if lower.contains("xml") {
165 "xml"
166 } else if lower.contains("csv") {
167 "csv"
168 } else if lower.contains("markdown") {
169 "md"
170 } else if lower.contains("text/") {
171 "txt"
172 } else {
173 "bin"
174 }
175}
176
177pub fn head_and_tail(
180 bytes: &[u8],
181 head_bytes: usize,
182 tail_bytes: usize,
183 log_path: &str,
184) -> String {
185 if bytes.len() <= head_bytes + tail_bytes {
186 return String::from_utf8_lossy(bytes).into_owned();
187 }
188 let head = String::from_utf8_lossy(&bytes[..head_bytes]).into_owned();
189 let tail = String::from_utf8_lossy(&bytes[bytes.len() - tail_bytes..]).into_owned();
190 let elided = bytes.len() - head_bytes - tail_bytes;
191 format!(
192 "{}\n\n... ({} bytes elided; full response at {}) ...\n\n{}",
193 head, elided, log_path, tail
194 )
195}