Skip to main content

harness_webfetch/
format.rs

1use std::fs;
2use std::path::{Path, PathBuf};
3use url::Url;
4
5use crate::types::FetchMetadata;
6
7pub fn render_request_block(meta: &FetchMetadata) -> String {
8    let chain = meta.redirect_chain.join(" -> ");
9    format!(
10        "<request>\n  <url>{}</url>\n  <final_url>{}</final_url>\n  <method>{}</method>\n  <status>{}</status>\n  <content_type>{}</content_type>\n  <redirect_chain>{}</redirect_chain>\n</request>",
11        meta.url,
12        meta.final_url,
13        meta.method.as_str(),
14        meta.status,
15        meta.content_type,
16        chain,
17    )
18}
19
20pub struct FormatOkArgs<'a> {
21    pub meta: &'a FetchMetadata,
22    pub extract_hint: &'a str,
23    pub markdown: Option<&'a str>,
24    pub raw: Option<&'a str>,
25    pub log_path: Option<&'a str>,
26    pub byte_cap: bool,
27    pub total_bytes: usize,
28}
29
30pub fn format_ok_text(args: FormatOkArgs<'_>) -> String {
31    let header = render_request_block(args.meta);
32    let body_inner = match args.extract_hint {
33        "markdown" => args.markdown.unwrap_or("").to_string(),
34        "raw" => args.raw.unwrap_or("").to_string(),
35        "both" => format!(
36            "<markdown>\n{}\n</markdown>\n<raw_body>\n{}\n</raw_body>",
37            args.markdown.unwrap_or(""),
38            args.raw.unwrap_or(""),
39        ),
40        _ => String::new(),
41    };
42    let body_block = format!("<body extract=\"{}\">\n{}\n</body>", args.extract_hint, body_inner);
43
44    let hint = if args.byte_cap && args.log_path.is_some() {
45        format!(
46            "(Response exceeded inline cap; showing head+tail of {} bytes. Full response at {} — Read with offset/limit to paginate.)",
47            args.total_bytes,
48            args.log_path.unwrap(),
49        )
50    } else {
51        let original_host = host_of(&args.meta.url);
52        let final_host = host_of(&args.meta.final_url);
53        let warn = if args.meta.url != args.meta.final_url && original_host != final_host {
54            format!(
55                " (Final URL host differs from original: {} -> {}. Verify this is expected.)",
56                original_host, final_host
57            )
58        } else {
59            String::new()
60        };
61        let cache_tag = if args.meta.from_cache {
62            let age = args.meta.cache_age_sec.unwrap_or(0);
63            format!(" (Served from session cache; age {}s.)", age)
64        } else {
65            String::new()
66        };
67        let ct = if args.meta.content_type.is_empty() {
68            "unknown".to_string()
69        } else {
70            args.meta.content_type.clone()
71        };
72        format!(
73            "(Response complete. {} bytes total. Content-type: {}. Fetched in {}ms.{}{})",
74            args.total_bytes, ct, args.meta.fetched_ms, warn, cache_tag
75        )
76    };
77
78    format!("{}\n{}\n{}", header, body_block, hint)
79}
80
81pub struct FormatRedirectLoopArgs<'a> {
82    pub meta: &'a FetchMetadata,
83    pub max_redirects: u32,
84}
85
86pub fn format_redirect_loop_text(args: FormatRedirectLoopArgs<'_>) -> String {
87    let header = render_request_block(args.meta);
88    let chain = args.meta.redirect_chain.join(" -> ");
89    let hint = format!(
90        "(Redirect limit ({}) exceeded. Chain: {}. Set max_redirects higher OR pass the final URL directly.)",
91        args.max_redirects, chain
92    );
93    format!("{}\n{}", header, hint)
94}
95
96pub struct FormatHttpErrorArgs<'a> {
97    pub meta: &'a FetchMetadata,
98    pub body: &'a str,
99}
100
101pub fn format_http_error_text(args: FormatHttpErrorArgs<'_>) -> String {
102    let header = render_request_block(args.meta);
103    let body_block = format!("<body>\n{}\n</body>", args.body);
104    let hint = format!(
105        "(HTTP {}. {}. Retry or adjust the request per the body.)",
106        args.meta.status,
107        short_reason(args.meta.status),
108    );
109    format!("{}\n{}\n{}", header, body_block, hint)
110}
111
112fn short_reason(status: u16) -> &'static str {
113    match status {
114        400 => "Bad Request",
115        401 => "Unauthorized — check auth headers",
116        403 => "Forbidden — check permissions or auth",
117        404 => "Not Found",
118        408 => "Request Timeout",
119        410 => "Gone",
120        418 => "I'm a teapot",
121        429 => "Too Many Requests — back off",
122        500 => "Internal Server Error",
123        502 => "Bad Gateway",
124        503 => "Service Unavailable",
125        504 => "Gateway Timeout",
126        s if (400..500).contains(&s) => "Client error",
127        s if s >= 500 => "Server error",
128        _ => "Non-success status",
129    }
130}
131
132pub fn host_of(url: &str) -> String {
133    Url::parse(url)
134        .ok()
135        .and_then(|u| u.host_str().map(|s| s.to_string()))
136        .unwrap_or_default()
137}
138
139// ---- spill-to-file ----
140
141pub struct SpillArgs<'a> {
142    pub bytes: &'a [u8],
143    pub dir: &'a Path,
144    pub session_id: &'a str,
145    pub content_type: &'a str,
146}
147
148pub fn spill_to_file(args: SpillArgs<'_>) -> std::io::Result<PathBuf> {
149    let dir = args.dir.join(args.session_id);
150    fs::create_dir_all(&dir)?;
151    let ext = extension_for(args.content_type);
152    let filename = format!("{}.{}", uuid::Uuid::new_v4(), ext);
153    let path = dir.join(filename);
154    fs::write(&path, args.bytes)?;
155    Ok(path)
156}
157
158fn extension_for(content_type: &str) -> &'static str {
159    let lower = content_type.to_ascii_lowercase();
160    if lower.contains("text/html") || lower.contains("xhtml") {
161        "html"
162    } else if lower.contains("json") {
163        "json"
164    } else if lower.contains("xml") {
165        "xml"
166    } else if lower.contains("csv") {
167        "csv"
168    } else if lower.contains("markdown") {
169        "md"
170    } else if lower.contains("text/") {
171        "txt"
172    } else {
173        "bin"
174    }
175}
176
177/// Return head (first N bytes) + tail (last N bytes) concatenated with
178/// an elision marker. Mirrors the bash head+tail spill pattern.
179pub fn head_and_tail(
180    bytes: &[u8],
181    head_bytes: usize,
182    tail_bytes: usize,
183    log_path: &str,
184) -> String {
185    if bytes.len() <= head_bytes + tail_bytes {
186        return String::from_utf8_lossy(bytes).into_owned();
187    }
188    let head = String::from_utf8_lossy(&bytes[..head_bytes]).into_owned();
189    let tail = String::from_utf8_lossy(&bytes[bytes.len() - tail_bytes..]).into_owned();
190    let elided = bytes.len() - head_bytes - tail_bytes;
191    format!(
192        "{}\n\n... ({} bytes elided; full response at {}) ...\n\n{}",
193        head, elided, log_path, tail
194    )
195}