mod mcp;
use clap::{Parser, Subcommand, ValueEnum};
use fetchkit::{FetchRequest, Tool, TOOL_LLMTXT};
use std::io::{self, Write};
#[derive(Debug, Clone, Copy, Default, ValueEnum)]
enum OutputFormat {
#[default]
Md,
Json,
}
#[derive(Parser, Debug)]
#[command(name = "fetchkit")]
#[command(author, version, about, long_about = None)]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,
#[arg(long)]
llmtxt: bool,
}
#[derive(Subcommand, Debug)]
enum Commands {
Mcp {
#[arg(long)]
hardened: bool,
#[arg(long)]
allow_env_proxy: bool,
#[arg(long)]
bot_auth_key: Option<String>,
#[arg(long)]
bot_auth_agent: Option<String>,
},
Fetch {
url: String,
#[arg(long, short, default_value = "md")]
output: OutputFormat,
#[arg(long)]
user_agent: Option<String>,
#[arg(long)]
hardened: bool,
#[arg(long)]
allow_env_proxy: bool,
#[arg(long)]
bot_auth_key: Option<String>,
#[arg(long)]
bot_auth_agent: Option<String>,
},
}
#[tokio::main]
async fn main() {
let cli = Cli::parse();
if cli.llmtxt {
writeln_safe(&TOOL_LLMTXT);
std::process::exit(0);
}
match cli.command {
Some(Commands::Mcp {
hardened,
allow_env_proxy,
bot_auth_key,
bot_auth_agent,
}) => {
mcp::run_server(build_tool(
None,
hardened,
allow_env_proxy,
bot_auth_key,
bot_auth_agent,
))
.await;
}
Some(Commands::Fetch {
url,
output,
user_agent,
hardened,
allow_env_proxy,
bot_auth_key,
bot_auth_agent,
}) => {
run_fetch(
&url,
output,
user_agent,
hardened,
allow_env_proxy,
bot_auth_key,
bot_auth_agent,
)
.await;
}
None => {
eprintln!("Usage: fetchkit fetch <URL>");
eprintln!(" or: fetchkit mcp");
eprintln!(" or: fetchkit --help");
std::process::exit(1);
}
}
}
fn build_tool(
user_agent: Option<String>,
hardened: bool,
allow_env_proxy: bool,
bot_auth_key: Option<String>,
bot_auth_agent: Option<String>,
) -> Tool {
let mut builder = Tool::builder().enable_markdown(true);
if hardened {
builder = builder.hardened();
}
if allow_env_proxy {
builder = builder.use_env_proxy(true);
}
if let Some(ua) = user_agent {
builder = builder.user_agent(ua);
}
#[cfg(feature = "bot-auth")]
if let Some(ref key) = bot_auth_key {
let config = fetchkit::BotAuthConfig::from_base64_seed(key).unwrap_or_else(|e| {
eprintln!("Error: {e}");
std::process::exit(1);
});
let config = if let Some(ref fqdn) = bot_auth_agent {
config.with_agent_fqdn(fqdn)
} else {
config
};
builder = builder.bot_auth(config);
}
#[cfg(not(feature = "bot-auth"))]
if bot_auth_key.is_some() {
eprintln!("Error: --bot-auth-key requires the bot-auth feature (rebuild with --features bot-auth)");
std::process::exit(1);
}
let _ = bot_auth_agent;
builder.build()
}
async fn run_fetch(
url: &str,
output: OutputFormat,
user_agent: Option<String>,
hardened: bool,
allow_env_proxy: bool,
bot_auth_key: Option<String>,
bot_auth_agent: Option<String>,
) {
let request = FetchRequest::new(url).as_markdown();
let tool = build_tool(
user_agent,
hardened,
allow_env_proxy,
bot_auth_key,
bot_auth_agent,
);
match tool.execute(request).await {
Ok(response) => match output {
OutputFormat::Md => print_md_with_frontmatter(&response),
OutputFormat::Json => {
let json = serde_json::to_string_pretty(&response).unwrap_or_else(|e| {
eprintln!("Error serializing response: {}", e);
std::process::exit(1);
});
writeln_safe(&json);
}
},
Err(e) => {
eprintln!("Error: {}", e);
std::process::exit(1);
}
}
}
fn print_md_with_frontmatter(response: &fetchkit::FetchResponse) {
writeln_safe(&format_md_with_frontmatter(response));
}
fn yaml_quote(value: &str) -> String {
serde_json::to_string(value).unwrap_or_else(|_| "\"\"".to_string())
}
fn format_md_with_frontmatter(response: &fetchkit::FetchResponse) -> String {
let mut output = String::new();
output.push_str("---\n");
output.push_str(&format!("url: {}\n", yaml_quote(&response.url)));
output.push_str(&format!("status_code: {}\n", response.status_code));
if let Some(ref ct) = response.content_type {
output.push_str(&format!("source_content_type: {}\n", yaml_quote(ct)));
}
if let Some(size) = response.size {
output.push_str(&format!("source_size: {}\n", size));
}
if let Some(ref lm) = response.last_modified {
output.push_str(&format!("last_modified: {}\n", yaml_quote(lm)));
}
if let Some(ref filename) = response.filename {
output.push_str(&format!("filename: {}\n", yaml_quote(filename)));
}
if let Some(truncated) = response.truncated {
if truncated {
output.push_str("truncated: true\n");
}
}
output.push_str("---\n");
if let Some(ref content) = response.content {
output.push_str(content);
} else if let Some(ref err) = response.error {
output.push_str(err);
}
output
}
fn writeln_safe(s: &str) {
let stdout = io::stdout();
let mut handle = stdout.lock();
if let Err(e) = writeln!(handle, "{}", s) {
if e.kind() == io::ErrorKind::BrokenPipe {
std::process::exit(0);
}
eprintln!("Error writing to stdout: {}", e);
std::process::exit(1);
}
}
#[cfg(test)]
mod tests {
use super::*;
use fetchkit::FetchResponse;
#[test]
fn test_format_md_basic() {
let response = FetchResponse {
url: "https://example.com".to_string(),
status_code: 200,
content_type: Some("text/html".to_string()),
content: Some("# Hello World".to_string()),
..Default::default()
};
let output = format_md_with_frontmatter(&response);
assert!(output.starts_with("---\n"));
assert!(output.contains("url: \"https://example.com\"\n"));
assert!(output.contains("status_code: 200\n"));
assert!(output.contains("source_content_type: \"text/html\"\n"));
assert!(output.contains("---\n# Hello World"));
}
#[test]
fn test_format_md_with_all_fields() {
let response = FetchResponse {
url: "https://example.com/page".to_string(),
status_code: 200,
content_type: Some("text/html".to_string()),
size: Some(1234),
last_modified: Some("Wed, 01 Jan 2025 00:00:00 GMT".to_string()),
filename: Some("page.html".to_string()),
truncated: Some(true),
content: Some("Content here".to_string()),
..Default::default()
};
let output = format_md_with_frontmatter(&response);
assert!(output.contains("source_size: 1234\n"));
assert!(output.contains("last_modified: \"Wed, 01 Jan 2025 00:00:00 GMT\"\n"));
assert!(output.contains("filename: \"page.html\"\n"));
assert!(output.contains("truncated: true\n"));
}
#[test]
fn test_format_md_error_as_body() {
let response = FetchResponse {
url: "https://example.com/file.pdf".to_string(),
status_code: 200,
content_type: Some("application/pdf".to_string()),
error: Some("Binary content not supported".to_string()),
..Default::default()
};
let output = format_md_with_frontmatter(&response);
assert!(!output.contains("error:"));
assert!(output.ends_with("---\nBinary content not supported"));
}
#[test]
fn test_format_md_truncated_false_omitted() {
let response = FetchResponse {
url: "https://example.com".to_string(),
status_code: 200,
truncated: Some(false),
content: Some("Content".to_string()),
..Default::default()
};
let output = format_md_with_frontmatter(&response);
assert!(!output.contains("truncated"));
}
#[test]
fn test_format_md_quotes_untrusted_scalars() {
let response = FetchResponse {
url: "https://example.com/a\nforged: true".to_string(),
status_code: 200,
filename: Some("*alias".to_string()),
content: Some("ok".to_string()),
..Default::default()
};
let output = format_md_with_frontmatter(&response);
assert!(output.contains("url: \"https://example.com/a\\nforged: true\"\n"));
assert!(output.contains("filename: \"*alias\"\n"));
assert!(!output.contains("\nforged: true\n"));
}
}