use super::super::{McpToolCall, McpToolResult};
use anyhow::{anyhow, Result};
use html2text::from_read;
use reqwest;
use serde_json::{json, Value};
use std::io::Cursor;
use std::path::Path;
use tokio::fs as tokio_fs;
use url::Url;
pub async fn execute_read_html(call: &McpToolCall) -> Result<McpToolResult> {
let sources_value = match call.parameters.get("sources") {
Some(value) => value,
_ => {
return Ok(McpToolResult::error(
call.tool_name.clone(),
call.tool_id.clone(),
"Missing 'sources' parameter".to_string(),
))
}
};
match sources_value {
Value::String(source) => {
let trimmed = source.trim();
if trimmed.starts_with('[') && trimmed.ends_with(']') {
match serde_json::from_str::<Vec<String>>(trimmed) {
Ok(parsed_sources) => {
convert_multiple_html_to_md(call, &parsed_sources).await
}
Err(_) => {
convert_single_html_to_md(call, source).await
}
}
} else {
convert_single_html_to_md(call, source).await
}
}
Value::Array(sources) => {
let mut source_strings = Vec::new();
for source in sources {
match source.as_str() {
Some(s) => source_strings.push(s.to_string()),
None => {
return Ok(McpToolResult::error(
call.tool_name.clone(),
call.tool_id.clone(),
"Invalid source in array - all sources must be strings".to_string(),
))
}
}
}
convert_multiple_html_to_md(call, &source_strings).await
}
_ => Ok(McpToolResult::error(
call.tool_name.clone(),
call.tool_id.clone(),
"'sources' parameter must be a string or array of strings".to_string(),
)),
}
}
async fn convert_single_html_to_md(call: &McpToolCall, source: &str) -> Result<McpToolResult> {
let (html_content, source_type) = fetch_html_content(source).await?;
let markdown = html_to_markdown(&html_content)?;
Ok(McpToolResult {
tool_name: "read_html".to_string(),
tool_id: call.tool_id.clone(),
result: json!({
"success": true,
"conversions": [{
"source": source,
"type": source_type,
"markdown": markdown,
"size": markdown.len()
}],
"count": 1
}),
})
}
async fn convert_multiple_html_to_md(
call: &McpToolCall,
sources: &[String],
) -> Result<McpToolResult> {
let mut conversions = Vec::with_capacity(sources.len());
let mut failures = Vec::new();
for source in sources {
match fetch_html_content(source).await {
Ok((html_content, source_type)) => match html_to_markdown(&html_content) {
Ok(markdown) => {
conversions.push(json!({
"source": source,
"type": source_type,
"markdown": markdown,
"size": markdown.len()
}));
}
Err(e) => {
failures.push(format!("Failed to convert {} to markdown: {}", source, e));
}
},
Err(e) => {
failures.push(format!("Failed to fetch {}: {}", source, e));
}
}
}
Ok(McpToolResult {
tool_name: "read_html".to_string(),
tool_id: call.tool_id.clone(),
result: json!({
"success": !conversions.is_empty(),
"conversions": conversions,
"count": conversions.len(),
"failed": failures
}),
})
}
async fn fetch_html_content(source: &str) -> Result<(String, &'static str)> {
if let Ok(url) = Url::parse(source) {
if url.scheme() == "http" || url.scheme() == "https" {
let response = reqwest::get(source).await?;
if !response.status().is_success() {
return Err(anyhow!("HTTP error {}: {}", response.status(), source));
}
let html = response.text().await?;
Ok((html, "url"))
} else if url.scheme() == "file" {
let path = url
.to_file_path()
.map_err(|_| anyhow!("Invalid file URL: {}", source))?;
let html = tokio_fs::read_to_string(&path).await?;
Ok((html, "file"))
} else {
Err(anyhow!("Unsupported URL scheme: {}", url.scheme()))
}
} else {
let path = Path::new(source);
if !path.exists() {
return Err(anyhow!("File does not exist: {}", source));
}
if !path.is_file() {
return Err(anyhow!("Path is not a file: {}", source));
}
let html = tokio_fs::read_to_string(path).await?;
Ok((html, "file"))
}
}
fn html_to_markdown(html: &str) -> Result<String> {
from_read(Cursor::new(html), 180).map_err(|e| anyhow::anyhow!("HTML conversion error: {}", e))
}