fetchkit-cli 0.4.1

Command line interface for FetchKit web content fetching tool
//! FetchKit CLI - Command-line interface for fetching web content
//!
//! Provides the `fetchkit` binary with subcommands for fetching URLs
//! and running an MCP server.
//!
//! # Usage
//!
//! ```text
//! fetchkit fetch <URL> [--output md|json] [--user-agent <UA>]
//! fetchkit mcp
//! fetchkit --llmtxt
//! ```

mod mcp;

use clap::{Parser, Subcommand, ValueEnum};
use fetchkit::{FetchRequest, Tool, TOOL_LLMTXT};
use std::io::{self, Write};

/// Output format for fetch subcommand
#[derive(Debug, Clone, Copy, Default, ValueEnum)]
enum OutputFormat {
    /// Markdown with YAML frontmatter
    #[default]
    Md,
    /// JSON format
    Json,
}

/// FetchKit - AI-friendly web content fetching tool
#[derive(Parser, Debug)]
#[command(name = "fetchkit")]
#[command(author, version, about, long_about = None)]
struct Cli {
    #[command(subcommand)]
    command: Option<Commands>,

    /// Print full help with examples (llmtxt)
    #[arg(long)]
    llmtxt: bool,
}

#[derive(Subcommand, Debug)]
enum Commands {
    /// Run as MCP (Model Context Protocol) server over stdio
    Mcp {
        /// Apply the hardened outbound policy profile
        #[arg(long)]
        hardened: bool,

        /// Allow HTTP_PROXY/HTTPS_PROXY/NO_PROXY from the environment
        #[arg(long)]
        allow_env_proxy: bool,

        /// Ed25519 secret key seed (base64url, 32 bytes) for Web Bot Auth signing
        #[arg(long)]
        bot_auth_key: Option<String>,

        /// Agent FQDN for Signature-Agent header (requires --bot-auth-key)
        #[arg(long)]
        bot_auth_agent: Option<String>,
    },
    /// Fetch URL and output as markdown with metadata frontmatter
    Fetch {
        /// URL to fetch
        url: String,

        /// Output format
        #[arg(long, short, default_value = "md")]
        output: OutputFormat,

        /// Custom User-Agent
        #[arg(long)]
        user_agent: Option<String>,

        /// Apply the hardened outbound policy profile
        #[arg(long)]
        hardened: bool,

        /// Allow HTTP_PROXY/HTTPS_PROXY/NO_PROXY from the environment
        #[arg(long)]
        allow_env_proxy: bool,

        /// Ed25519 secret key seed (base64url, 32 bytes) for Web Bot Auth signing
        #[arg(long)]
        bot_auth_key: Option<String>,

        /// Agent FQDN for Signature-Agent header (requires --bot-auth-key)
        #[arg(long)]
        bot_auth_agent: Option<String>,
    },
}

#[tokio::main]
async fn main() {
    let cli = Cli::parse();

    // Handle --llmtxt flag
    if cli.llmtxt {
        writeln_safe(&TOOL_LLMTXT);
        std::process::exit(0);
    }

    match cli.command {
        Some(Commands::Mcp {
            hardened,
            allow_env_proxy,
            bot_auth_key,
            bot_auth_agent,
        }) => {
            mcp::run_server(build_tool(
                None,
                hardened,
                allow_env_proxy,
                bot_auth_key,
                bot_auth_agent,
            ))
            .await;
        }
        Some(Commands::Fetch {
            url,
            output,
            user_agent,
            hardened,
            allow_env_proxy,
            bot_auth_key,
            bot_auth_agent,
        }) => {
            run_fetch(
                &url,
                output,
                user_agent,
                hardened,
                allow_env_proxy,
                bot_auth_key,
                bot_auth_agent,
            )
            .await;
        }
        None => {
            eprintln!("Usage: fetchkit fetch <URL>");
            eprintln!("   or: fetchkit mcp");
            eprintln!("   or: fetchkit --help");
            std::process::exit(1);
        }
    }
}

fn build_tool(
    user_agent: Option<String>,
    hardened: bool,
    allow_env_proxy: bool,
    bot_auth_key: Option<String>,
    bot_auth_agent: Option<String>,
) -> Tool {
    let mut builder = Tool::builder().enable_markdown(true);

    if hardened {
        builder = builder.hardened();
    }

    if allow_env_proxy {
        builder = builder.use_env_proxy(true);
    }

    if let Some(ua) = user_agent {
        builder = builder.user_agent(ua);
    }

    #[cfg(feature = "bot-auth")]
    if let Some(ref key) = bot_auth_key {
        let config = fetchkit::BotAuthConfig::from_base64_seed(key).unwrap_or_else(|e| {
            eprintln!("Error: {e}");
            std::process::exit(1);
        });
        let config = if let Some(ref fqdn) = bot_auth_agent {
            config.with_agent_fqdn(fqdn)
        } else {
            config
        };
        builder = builder.bot_auth(config);
    }

    #[cfg(not(feature = "bot-auth"))]
    if bot_auth_key.is_some() {
        eprintln!("Error: --bot-auth-key requires the bot-auth feature (rebuild with --features bot-auth)");
        std::process::exit(1);
    }

    let _ = bot_auth_agent; // suppress unused warning without feature

    builder.build()
}

async fn run_fetch(
    url: &str,
    output: OutputFormat,
    user_agent: Option<String>,
    hardened: bool,
    allow_env_proxy: bool,
    bot_auth_key: Option<String>,
    bot_auth_agent: Option<String>,
) {
    // Build request with markdown conversion
    let request = FetchRequest::new(url).as_markdown();
    let tool = build_tool(
        user_agent,
        hardened,
        allow_env_proxy,
        bot_auth_key,
        bot_auth_agent,
    );

    // Execute request
    match tool.execute(request).await {
        Ok(response) => match output {
            OutputFormat::Md => print_md_with_frontmatter(&response),
            OutputFormat::Json => {
                let json = serde_json::to_string_pretty(&response).unwrap_or_else(|e| {
                    eprintln!("Error serializing response: {}", e);
                    std::process::exit(1);
                });
                writeln_safe(&json);
            }
        },
        Err(e) => {
            eprintln!("Error: {}", e);
            std::process::exit(1);
        }
    }
}

fn print_md_with_frontmatter(response: &fetchkit::FetchResponse) {
    writeln_safe(&format_md_with_frontmatter(response));
}

fn yaml_quote(value: &str) -> String {
    serde_json::to_string(value).unwrap_or_else(|_| "\"\"".to_string())
}

/// Format response as markdown with YAML frontmatter
fn format_md_with_frontmatter(response: &fetchkit::FetchResponse) -> String {
    let mut output = String::new();

    // Build frontmatter
    output.push_str("---\n");
    output.push_str(&format!("url: {}\n", yaml_quote(&response.url)));
    output.push_str(&format!("status_code: {}\n", response.status_code));
    if let Some(ref ct) = response.content_type {
        output.push_str(&format!("source_content_type: {}\n", yaml_quote(ct)));
    }
    if let Some(size) = response.size {
        output.push_str(&format!("source_size: {}\n", size));
    }
    if let Some(ref lm) = response.last_modified {
        output.push_str(&format!("last_modified: {}\n", yaml_quote(lm)));
    }
    if let Some(ref filename) = response.filename {
        output.push_str(&format!("filename: {}\n", yaml_quote(filename)));
    }
    if let Some(truncated) = response.truncated {
        if truncated {
            output.push_str("truncated: true\n");
        }
    }
    output.push_str("---\n");

    // Append content, or error as body for unsupported content
    if let Some(ref content) = response.content {
        output.push_str(content);
    } else if let Some(ref err) = response.error {
        output.push_str(err);
    }

    output
}

/// Write to stdout, exit silently on broken pipe
fn writeln_safe(s: &str) {
    let stdout = io::stdout();
    let mut handle = stdout.lock();
    if let Err(e) = writeln!(handle, "{}", s) {
        if e.kind() == io::ErrorKind::BrokenPipe {
            std::process::exit(0);
        }
        eprintln!("Error writing to stdout: {}", e);
        std::process::exit(1);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use fetchkit::FetchResponse;

    #[test]
    fn test_format_md_basic() {
        let response = FetchResponse {
            url: "https://example.com".to_string(),
            status_code: 200,
            content_type: Some("text/html".to_string()),
            content: Some("# Hello World".to_string()),
            ..Default::default()
        };

        let output = format_md_with_frontmatter(&response);

        assert!(output.starts_with("---\n"));
        assert!(output.contains("url: \"https://example.com\"\n"));
        assert!(output.contains("status_code: 200\n"));
        assert!(output.contains("source_content_type: \"text/html\"\n"));
        assert!(output.contains("---\n# Hello World"));
    }

    #[test]
    fn test_format_md_with_all_fields() {
        let response = FetchResponse {
            url: "https://example.com/page".to_string(),
            status_code: 200,
            content_type: Some("text/html".to_string()),
            size: Some(1234),
            last_modified: Some("Wed, 01 Jan 2025 00:00:00 GMT".to_string()),
            filename: Some("page.html".to_string()),
            truncated: Some(true),
            content: Some("Content here".to_string()),
            ..Default::default()
        };

        let output = format_md_with_frontmatter(&response);

        assert!(output.contains("source_size: 1234\n"));
        assert!(output.contains("last_modified: \"Wed, 01 Jan 2025 00:00:00 GMT\"\n"));
        assert!(output.contains("filename: \"page.html\"\n"));
        assert!(output.contains("truncated: true\n"));
    }

    #[test]
    fn test_format_md_error_as_body() {
        let response = FetchResponse {
            url: "https://example.com/file.pdf".to_string(),
            status_code: 200,
            content_type: Some("application/pdf".to_string()),
            error: Some("Binary content not supported".to_string()),
            ..Default::default()
        };

        let output = format_md_with_frontmatter(&response);

        // Error should appear as body, not in frontmatter
        assert!(!output.contains("error:"));
        assert!(output.ends_with("---\nBinary content not supported"));
    }

    #[test]
    fn test_format_md_truncated_false_omitted() {
        let response = FetchResponse {
            url: "https://example.com".to_string(),
            status_code: 200,
            truncated: Some(false),
            content: Some("Content".to_string()),
            ..Default::default()
        };

        let output = format_md_with_frontmatter(&response);

        // truncated: false should not appear
        assert!(!output.contains("truncated"));
    }

    #[test]
    fn test_format_md_quotes_untrusted_scalars() {
        let response = FetchResponse {
            url: "https://example.com/a\nforged: true".to_string(),
            status_code: 200,
            filename: Some("*alias".to_string()),
            content: Some("ok".to_string()),
            ..Default::default()
        };

        let output = format_md_with_frontmatter(&response);

        assert!(output.contains("url: \"https://example.com/a\\nforged: true\"\n"));
        assert!(output.contains("filename: \"*alias\"\n"));
        assert!(!output.contains("\nforged: true\n"));
    }
}