use std::path::PathBuf;
use clap::builder::NonEmptyStringValueParser;
use clap::{ArgAction, Args, Parser, Subcommand, ValueEnum, value_parser};
#[derive(Parser, Debug)]
#[command(
name = "servo-fetch",
version,
about = "A browser engine in a binary — fetch, render, and extract web content."
)]
pub(crate) struct Cli {
#[command(subcommand)]
pub command: Option<Command>,
#[command(flatten)]
pub fetch: FetchArgs,
#[arg(short = 'v', long, action = ArgAction::Count, global = true, conflicts_with = "quiet")]
pub verbose: u8,
#[arg(short = 'q', long, global = true)]
pub quiet: bool,
#[arg(long = "allow-private-addresses", hide = true, global = true)]
pub allow_private_addresses: bool,
}
#[derive(Args, Debug)]
pub(crate) struct FetchArgs {
#[arg(num_args = 1..)]
pub urls: Vec<String>,
#[arg(long, value_enum, value_name = "FORMAT", default_value_t = Format::Markdown,
conflicts_with_all = ["js"])]
pub format: Format,
#[arg(long)]
pub full_page: bool,
#[arg(long, value_name = "EXPR")]
pub js: Option<String>,
#[arg(short = 't', long, default_value_t = 30, value_parser = value_parser!(u64).range(1..), value_name = "SECS")]
pub timeout: u64,
#[arg(long, default_value_t = 0, value_parser = value_parser!(u64).range(0..=10_000), value_name = "MS")]
pub settle: u64,
#[arg(long, value_name = "CSS", value_parser = NonEmptyStringValueParser::new())]
pub selector: Option<String>,
#[arg(long, value_name = "UA")]
pub user_agent: Option<String>,
#[arg(long, value_name = "FILE")]
pub cookies: Option<PathBuf>,
#[arg(long, value_name = "FILE", conflicts_with_all = ["js", "selector", "format"])]
pub schema: Option<PathBuf>,
#[arg(short = 'o', long, value_name = "FILE", conflicts_with_all = ["output_dir"])]
pub output: Option<PathBuf>,
#[arg(long, value_name = "DIR")]
pub output_dir: Option<PathBuf>,
#[arg(long, value_name = "POLICY", value_enum, default_value_t = VisibilityArg::Moderate)]
pub visibility: VisibilityArg,
}
#[derive(Debug, Clone, Copy, ValueEnum)]
pub(crate) enum VisibilityArg {
Moderate,
Strict,
Off,
}
impl VisibilityArg {
pub(crate) fn to_policy(self) -> servo_fetch::VisibilityPolicy {
match self {
Self::Moderate => servo_fetch::VisibilityPolicy::moderate(),
Self::Strict => servo_fetch::VisibilityPolicy::strict(),
Self::Off => servo_fetch::VisibilityPolicy::off(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub(crate) enum Format {
Markdown,
Json,
Html,
Text,
Png,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub(crate) enum CrawlFormat {
Markdown,
Json,
}
#[derive(Subcommand, Debug)]
pub(crate) enum Command {
Mcp(McpArgs),
Serve(ServeArgs),
Crawl(CrawlArgs),
Map(MapArgs),
Healthcheck(HealthcheckArgs),
}
impl Command {
pub(crate) fn needs_servo_init(&self) -> bool {
!matches!(self, Self::Healthcheck(_))
}
}
#[derive(Args, Debug)]
pub(crate) struct McpArgs {
#[arg(long, value_name = "PORT")]
pub port: Option<u16>,
}
#[derive(Args, Debug)]
pub(crate) struct ServeArgs {
#[arg(long, value_name = "HOST", default_value = "127.0.0.1")]
pub host: String,
#[arg(long, value_name = "PORT", default_value_t = 3000)]
pub port: u16,
}
#[derive(Args, Debug)]
pub(crate) struct CrawlArgs {
pub url: String,
#[arg(long, default_value_t = 50, value_name = "N")]
pub limit: usize,
#[arg(long, default_value_t = 3, value_name = "N")]
pub max_depth: usize,
#[arg(long, value_name = "GLOB")]
pub include: Vec<String>,
#[arg(long, value_name = "GLOB")]
pub exclude: Vec<String>,
#[arg(long, value_enum, value_name = "FORMAT", default_value_t = CrawlFormat::Markdown)]
pub format: CrawlFormat,
#[arg(long, value_name = "CSS", value_parser = NonEmptyStringValueParser::new())]
pub selector: Option<String>,
#[arg(short = 't', long, default_value_t = 30, value_parser = value_parser!(u64).range(1..), value_name = "SECS")]
pub timeout: u64,
#[arg(long, default_value_t = 0, value_parser = value_parser!(u64).range(0..=10_000), value_name = "MS")]
pub settle: u64,
#[arg(long, default_value_t = 1, value_parser = value_parser!(u64).range(1..=64), value_name = "N")]
pub concurrency: u64,
#[arg(long, default_value_t = 500, value_parser = value_parser!(u64).range(0..=60_000), value_name = "MS")]
pub delay_ms: u64,
#[arg(long, value_name = "UA")]
pub user_agent: Option<String>,
#[arg(long, value_name = "FILE")]
pub cookies: Option<PathBuf>,
#[arg(long, value_name = "DIR")]
pub output_dir: Option<PathBuf>,
}
#[derive(Args, Debug)]
pub(crate) struct MapArgs {
pub url: String,
#[arg(long, default_value_t = 5000, value_name = "N")]
pub limit: usize,
#[arg(long, value_name = "GLOB")]
pub include: Vec<String>,
#[arg(long, value_name = "GLOB")]
pub exclude: Vec<String>,
#[arg(long)]
pub json: bool,
#[arg(long)]
pub no_fallback: bool,
#[arg(long, value_name = "UA")]
pub user_agent: Option<String>,
#[arg(short = 't', long, default_value_t = 30, value_parser = value_parser!(u64).range(1..), value_name = "SECS")]
pub timeout: u64,
}
#[derive(Args, Debug)]
pub(crate) struct HealthcheckArgs {
#[arg(long, value_name = "PORT", default_value_t = 3000)]
pub port: u16,
}
#[cfg(test)]
mod tests {
use clap::error::ErrorKind;
use super::*;
use crate::commands::fetch::validate_args;
fn parse(args: &[&str]) -> Result<Cli, clap::Error> {
Cli::try_parse_from(std::iter::once("servo-fetch").chain(args.iter().copied()))
}
#[track_caller]
fn error_kind(args: &[&str]) -> ErrorKind {
parse(args).unwrap_err().kind()
}
#[track_caller]
fn assert_validation_err(args: &[&str], expected: &str) {
let err = validate_args(&parse(args).unwrap().fetch).unwrap_err().to_string();
assert!(err.contains(expected), "expected {expected:?}, got: {err}");
}
#[test]
fn format_from_str() {
use ValueEnum;
assert!(Format::from_str("markdown", true).is_ok());
assert!(Format::from_str("json", true).is_ok());
assert!(Format::from_str("html", true).is_ok());
assert!(Format::from_str("text", true).is_ok());
assert!(Format::from_str("png", true).is_ok());
assert!(Format::from_str("xml", true).is_err());
}
#[test]
fn crawl_format_from_str() {
use ValueEnum;
assert!(CrawlFormat::from_str("markdown", true).is_ok());
assert!(CrawlFormat::from_str("json", true).is_ok());
assert!(CrawlFormat::from_str("html", true).is_err());
}
#[test]
fn settle_rejects_out_of_range() {
assert_eq!(
error_kind(&["--settle", "10001", "https://example.com"]),
ErrorKind::ValueValidation,
);
}
#[test]
fn invalid_format_rejected() {
assert_eq!(
error_kind(&["--format", "xml", "https://example.com"]),
ErrorKind::InvalidValue,
);
}
#[test]
fn full_page_requires_format_png() {
assert_validation_err(
&["--full-page", "https://example.com"],
"--full-page requires --format png",
);
}
#[test]
fn full_page_with_format_png_is_allowed() {
let cli = parse(&["--full-page", "--format", "png", "-o", "out.png", "https://example.com"]).unwrap();
validate_args(&cli.fetch).unwrap();
}
#[test]
fn schema_conflicts_with_selector() {
assert_eq!(
error_kind(&["--schema", "s.json", "--selector", "div", "https://example.com"]),
ErrorKind::ArgumentConflict,
);
}
#[test]
fn schema_conflicts_with_format() {
assert_eq!(
error_kind(&["--schema", "s.json", "--format", "json", "https://example.com"]),
ErrorKind::ArgumentConflict,
);
}
#[test]
fn format_png_conflicts_with_js() {
assert_eq!(
error_kind(&["--format", "png", "--js", "document.title", "https://example.com"]),
ErrorKind::ArgumentConflict,
);
}
#[test]
fn format_png_conflicts_with_selector() {
assert_validation_err(
&["--format", "png", "--selector", "article", "https://example.com"],
"--selector cannot be used with --format png",
);
}
#[test]
fn format_png_conflicts_with_schema() {
assert_eq!(
error_kind(&["--format", "png", "--schema", "s.json", "https://example.com"]),
ErrorKind::ArgumentConflict,
);
}
#[test]
fn format_png_with_multi_urls_errors() {
assert_validation_err(
&["--format", "png", "https://example.com", "https://example.org"],
"--format png only supports a single URL",
);
}
#[test]
fn format_png_conflicts_with_output_dir() {
assert_validation_err(
&["--format", "png", "--output-dir", "out", "https://example.com"],
"--format png cannot be used with --output-dir",
);
}
#[test]
fn format_png_with_output_file_is_allowed() {
let cli = parse(&["--format", "png", "-o", "out.png", "https://example.com"]).unwrap();
validate_args(&cli.fetch).unwrap();
}
#[test]
fn format_png_with_dash_output_is_allowed() {
let cli = parse(&["--format", "png", "-o", "-", "https://example.com"]).unwrap();
validate_args(&cli.fetch).unwrap();
}
#[test]
fn output_conflicts_with_output_dir() {
assert_eq!(
error_kind(&["-o", "out.md", "--output-dir", "out", "https://example.com"]),
ErrorKind::ArgumentConflict,
);
}
#[test]
fn output_with_js_is_allowed() {
parse(&["-o", "out.txt", "--js", "document.title", "https://example.com"]).unwrap();
}
#[test]
fn format_html_with_selector_errors() {
assert_validation_err(
&["--format", "html", "--selector", "article", "https://example.com"],
"--selector cannot be used with --format html or text",
);
}
#[test]
fn format_html_with_multi_urls_errors() {
assert_validation_err(
&["--format", "html", "https://example.com", "https://example.org"],
"cannot be used with multiple URLs",
);
}
#[test]
fn output_with_multi_urls_errors() {
assert_validation_err(
&["-o", "out.md", "https://example.com", "https://example.org"],
"only valid with a single URL",
);
}
}