use clap::builder::NonEmptyStringValueParser;
use clap::{ArgAction, Args, Parser, Subcommand, ValueEnum, value_parser};
#[derive(Parser)]
#[command(
name = "servo-fetch",
version,
about = "A browser engine in a binary — fetch, render, and extract web content."
)]
pub(crate) struct Cli {
#[command(subcommand)]
pub command: Option<Command>,
#[command(flatten)]
pub fetch: FetchArgs,
#[arg(short = 'v', long, action = ArgAction::Count, global = true, conflicts_with = "quiet")]
pub verbose: u8,
#[arg(short = 'q', long, global = true)]
pub quiet: bool,
#[arg(long = "allow-private-addresses", hide = true, global = true)]
pub allow_private_addresses: bool,
}
#[derive(Args, Debug)]
pub(crate) struct FetchArgs {
#[arg(num_args = 1..)]
pub urls: Vec<String>,
#[arg(long, conflicts_with_all = ["screenshot", "js"])]
pub json: bool,
#[arg(long, value_name = "FILE", conflicts_with_all = ["json", "js"])]
pub screenshot: Option<String>,
#[arg(long, requires = "screenshot")]
pub full_page: bool,
#[arg(long, value_name = "EXPR", conflicts_with_all = ["json", "screenshot"])]
pub js: Option<String>,
#[arg(short = 't', long, default_value_t = 30, value_parser = value_parser!(u64).range(1..), value_name = "SECS")]
pub timeout: u64,
#[arg(long, default_value_t = 0, value_parser = value_parser!(u64).range(0..=10_000), value_name = "MS")]
pub settle: u64,
#[arg(long, value_name = "CSS", value_parser = NonEmptyStringValueParser::new())]
pub selector: Option<String>,
#[arg(long, value_name = "MODE", value_enum, conflicts_with_all = ["json", "screenshot", "js", "selector"])]
pub raw: Option<RawMode>,
#[arg(long, value_name = "UA")]
pub user_agent: Option<String>,
#[arg(long, value_name = "FILE", conflicts_with_all = ["screenshot", "js", "raw", "selector"])]
pub schema: Option<std::path::PathBuf>,
#[arg(long, value_name = "POLICY", value_enum, default_value_t = VisibilityArg::Moderate)]
pub visibility: VisibilityArg,
}
#[derive(Debug, Clone, Copy, ValueEnum)]
pub(crate) enum VisibilityArg {
Moderate,
Strict,
Off,
}
impl VisibilityArg {
pub(crate) fn to_policy(self) -> servo_fetch::VisibilityPolicy {
match self {
Self::Moderate => servo_fetch::VisibilityPolicy::moderate(),
Self::Strict => servo_fetch::VisibilityPolicy::strict(),
Self::Off => servo_fetch::VisibilityPolicy::off(),
}
}
}
#[derive(Debug, Clone, Copy, ValueEnum)]
pub(crate) enum RawMode {
Html,
Text,
}
#[derive(Subcommand)]
pub(crate) enum Command {
Mcp(McpArgs),
Serve(ServeArgs),
Crawl(CrawlArgs),
Map(MapArgs),
}
#[derive(Args, Debug)]
pub(crate) struct McpArgs {
#[arg(long, value_name = "PORT")]
pub port: Option<u16>,
}
#[derive(Args, Debug)]
pub(crate) struct ServeArgs {
#[arg(long, value_name = "HOST", default_value = "127.0.0.1")]
pub host: String,
#[arg(long, value_name = "PORT", default_value_t = 3000)]
pub port: u16,
}
#[derive(Args, Debug)]
pub(crate) struct CrawlArgs {
pub url: String,
#[arg(long, default_value_t = 50, value_name = "N")]
pub limit: usize,
#[arg(long, default_value_t = 3, value_name = "N")]
pub max_depth: usize,
#[arg(long, value_name = "GLOB")]
pub include: Vec<String>,
#[arg(long, value_name = "GLOB")]
pub exclude: Vec<String>,
#[arg(long)]
pub json: bool,
#[arg(long, value_name = "CSS", value_parser = NonEmptyStringValueParser::new())]
pub selector: Option<String>,
#[arg(short = 't', long, default_value_t = 30, value_parser = value_parser!(u64).range(1..), value_name = "SECS")]
pub timeout: u64,
#[arg(long, default_value_t = 0, value_parser = value_parser!(u64).range(0..=10_000), value_name = "MS")]
pub settle: u64,
#[arg(long, default_value_t = 1, value_parser = value_parser!(u64).range(1..=64), value_name = "N")]
pub concurrency: u64,
#[arg(long, default_value_t = 500, value_parser = value_parser!(u64).range(0..=60_000), value_name = "MS")]
pub delay_ms: u64,
#[arg(long, value_name = "UA")]
pub user_agent: Option<String>,
}
#[derive(Args, Debug)]
pub(crate) struct MapArgs {
pub url: String,
#[arg(long, default_value_t = 5000, value_name = "N")]
pub limit: usize,
#[arg(long, value_name = "GLOB")]
pub include: Vec<String>,
#[arg(long, value_name = "GLOB")]
pub exclude: Vec<String>,
#[arg(long)]
pub json: bool,
#[arg(long)]
pub no_fallback: bool,
#[arg(long, value_name = "UA")]
pub user_agent: Option<String>,
#[arg(short = 't', long, default_value_t = 30, value_parser = value_parser!(u64).range(1..), value_name = "SECS")]
pub timeout: u64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn raw_mode_from_str() {
use ValueEnum;
assert!(RawMode::from_str("html", true).is_ok());
assert!(RawMode::from_str("text", true).is_ok());
assert!(RawMode::from_str("xml", true).is_err());
}
}
#[cfg(test)]
mod cli_tests {
use assert_cmd::Command;
use predicates::prelude::*;
fn servo_fetch() -> Command {
Command::cargo_bin("servo-fetch").expect("binary exists")
}
#[test]
fn conflicting_json_and_screenshot() {
servo_fetch()
.args(["--json", "--screenshot", "out.png", "https://example.com"])
.assert()
.failure()
.stderr(predicate::str::contains("cannot be used with"));
}
#[test]
fn settle_rejects_out_of_range() {
servo_fetch()
.args(["--settle", "10001", "https://example.com"])
.assert()
.failure()
.stderr(predicate::str::contains("invalid value"));
}
#[test]
fn raw_conflicts_with_json() {
servo_fetch()
.args(["--raw", "html", "--json", "https://example.com"])
.assert()
.failure()
.stderr(predicate::str::contains("cannot be used with"));
}
#[test]
fn full_page_requires_screenshot() {
servo_fetch()
.args(["--full-page", "https://example.com"])
.assert()
.failure()
.stderr(predicate::str::contains("--screenshot"));
}
#[test]
fn schema_conflicts_with_selector() {
servo_fetch()
.args(["--schema", "s.json", "--selector", "div", "https://example.com"])
.assert()
.failure()
.stderr(predicate::str::contains("cannot be used with"));
}
}