use defect_config::{FetchFormat, FetchToolConfig};
pub(super) fn render(
body: &[u8],
content_type: Option<&str>,
format: FetchFormat,
config: &FetchToolConfig,
) -> Result<String, String> {
if body.is_empty() {
return Ok(String::new());
}
let mime = content_type
.map(parse_main_type)
.unwrap_or(MainType::Unknown);
match format {
FetchFormat::Markdown => render_markdown(body, mime, content_type, config),
FetchFormat::Html => render_html(body, mime, content_type),
FetchFormat::Text => render_text(body, mime, content_type),
}
}
fn render_markdown(
body: &[u8],
mime: MainType,
raw_ct: Option<&str>,
config: &FetchToolConfig,
) -> Result<String, String> {
match mime {
MainType::Html => {
let html = body_as_str(body)?;
if !config.html_to_markdown {
return Ok(format!(
"<!-- html_to_markdown disabled in config; returning raw HTML -->\n{html}"
));
}
htmd::convert(html).map_err(|e| format!("html-to-markdown conversion failed: {e}"))
}
MainType::Text => Ok(body_as_str(body)?.to_string()),
MainType::Binary | MainType::Unknown => Err(format_unsupported("markdown", raw_ct)),
}
}
fn render_html(body: &[u8], mime: MainType, raw_ct: Option<&str>) -> Result<String, String> {
match mime {
MainType::Html => Ok(body_as_str(body)?.to_string()),
_ => Err(format!("not HTML: {}", raw_ct.unwrap_or("<unset>"))),
}
}
fn render_text(body: &[u8], mime: MainType, raw_ct: Option<&str>) -> Result<String, String> {
match mime {
MainType::Html => {
let html = body_as_str(body)?;
let md =
htmd::convert(html).map_err(|e| format!("html-to-text conversion failed: {e}"))?;
Ok(strip_markdown(&md))
}
MainType::Text => Ok(body_as_str(body)?.to_string()),
MainType::Binary | MainType::Unknown => Err(format!(
"binary content-type: {}",
raw_ct.unwrap_or("<unset>")
)),
}
}
fn body_as_str(body: &[u8]) -> Result<&str, String> {
std::str::from_utf8(body).map_err(|e| format!("response body is not valid UTF-8: {e}"))
}
fn format_unsupported(format: &str, raw_ct: Option<&str>) -> String {
format!(
"unsupported content-type for {format} format: {}",
raw_ct.unwrap_or("<unset>")
)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum MainType {
Html,
Text,
Binary,
Unknown,
}
fn parse_main_type(content_type: &str) -> MainType {
let head = content_type
.split(';')
.next()
.unwrap_or("")
.trim()
.to_ascii_lowercase();
if head == "text/html" || head == "application/xhtml+xml" {
return MainType::Html;
}
if head.starts_with("text/") {
return MainType::Text;
}
if head == "application/json" || head == "application/xml" {
return MainType::Text;
}
if head.is_empty() {
return MainType::Unknown;
}
MainType::Binary
}
fn strip_markdown(md: &str) -> String {
let mut out = String::with_capacity(md.len());
let mut chars = md.chars().peekable();
while let Some(c) = chars.next() {
match c {
'#' | '*' | '_' | '`' | '>' => {
while chars.peek() == Some(&c) {
chars.next();
}
}
'[' => {
let mut text = String::new();
let mut closed_text = false;
for ch in chars.by_ref() {
if ch == ']' {
closed_text = true;
break;
}
text.push(ch);
}
out.push_str(&text);
if closed_text && chars.peek() == Some(&'(') {
chars.next();
for ch in chars.by_ref() {
if ch == ')' {
break;
}
}
}
}
_ => out.push(c),
}
}
out
}