mod commands;
mod teardown;
use clap::{Parser, Subcommand};
use commands::scrape::Format;
use teardown::{CmdError, finish, install_signal_teardown};
#[derive(Parser)]
#[command(
name = "crw",
version,
about = "Web scraper for AI agents",
long_about = "Unified CLI for web scraping, crawling, search, and serving.\n\n\
The fastest web scraper built for AI agents and LLM data pipelines.\n\n\
Examples:\n \
crw example.com # Scrape URL (default mode)\n \
crw scrape example.com --format json\n \
crw search \"rust web scraper\" --json --fields title,url,snippet # LLM-ready JSON\n \
crw crawl example.com --depth 3 # BFS crawl\n \
crw map example.com # Discover URLs\n \
crw serve --port 3000 # Start REST API server\n \
crw mcp # Start MCP server\n \
crw browse # Start browser automation MCP\n \
crw setup # Interactive setup wizard",
after_help = "INSTALL:\n \
brew install us/crw/crw # macOS / Linux\n \
cargo install crw-cli # Any Rust toolchain\n \
curl -fsSL https://raw.githubusercontent.com/us/crw/main/install.sh | sh\n\n\
DOCS: https://docs.fastcrw.com · https://github.com/us/crw\n\
CLOUD: https://fastcrw.com (500 free credits, no monthly reset)\n\
SEARCH: `crw setup --local` boots a JSON-enabled SearXNG on 127.0.0.1:8080.\n\
\x20 Public instances (searx.be, priv.au, ...) usually block JSON requests.\n\
"
)]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,
#[arg(value_name = "URL", conflicts_with = "command")]
url: Option<String>,
#[arg(short, long, value_enum, default_value = "markdown")]
format: Option<Format>,
#[arg(short, long, value_name = "FILE")]
output: Option<String>,
#[arg(long)]
raw: bool,
#[arg(long)]
js: bool,
#[arg(long, value_name = "SELECTOR")]
css: Option<String>,
#[arg(long, value_name = "EXPR")]
xpath: Option<String>,
#[arg(long, value_name = "URL")]
proxy: Option<String>,
#[arg(long)]
stealth: bool,
#[arg(long, conflicts_with = "extract")]
summary: bool,
#[arg(long, value_name = "TEXT", requires = "summary")]
prompt: Option<String>,
#[arg(long, value_name = "SCHEMA")]
extract: Option<String>,
#[arg(long, value_name = "NAME")]
llm_provider: Option<String>,
#[arg(long, value_name = "KEY")]
llm_key: Option<String>,
#[arg(long, value_name = "MODEL")]
llm_model: Option<String>,
#[arg(long, value_name = "URL")]
llm_base_url: Option<String>,
#[arg(long, conflicts_with_all = ["command", "url"])]
reset: bool,
#[arg(long, requires = "reset")]
yes: bool,
}
#[derive(Subcommand)]
enum Commands {
Scrape(commands::scrape::ScrapeArgs),
Search(commands::search::SearchArgs),
Crawl(commands::crawl::CrawlArgs),
Map(commands::map::MapArgs),
Serve(commands::serve::ServeArgs),
Mcp(commands::mcp::McpArgs),
Browse(commands::browse::BrowseArgs),
Setup(commands::setup::SetupArgs),
}
#[tokio::main]
async fn main() {
let cli = Cli::parse();
if cli.reset {
let args = commands::setup::SetupArgs {
non_interactive: false,
cloud: false,
local: false,
no_color: false,
reset_shell: false,
reset: true,
yes: cli.yes,
};
commands::setup::run(args).await;
return;
}
let result: Result<(), CmdError> = match cli.command {
Some(Commands::Scrape(args)) => {
install_signal_teardown();
commands::scrape::run(args).await
}
Some(Commands::Search(args)) => {
commands::search::run(args).await;
Ok(())
}
Some(Commands::Crawl(args)) => {
install_signal_teardown();
commands::crawl::run(args).await
}
Some(Commands::Map(args)) => {
install_signal_teardown();
commands::map::run(args).await
}
Some(Commands::Serve(args)) => {
commands::serve::run(args).await;
Ok(())
}
Some(Commands::Mcp(args)) => {
install_signal_teardown();
commands::mcp::run(args).await
}
Some(Commands::Browse(args)) => {
if let Err(e) = commands::browse::run(args).await {
eprintln!("error: {e}");
std::process::exit(1); }
Ok(())
}
Some(Commands::Setup(args)) => {
commands::setup::run(args).await;
Ok(())
}
None => {
if let Some(url) = cli.url {
install_signal_teardown();
let args = commands::scrape::ScrapeArgs {
url,
format: cli.format.unwrap_or(Format::Markdown),
output: cli.output,
raw: cli.raw,
js: cli.js,
css: cli.css,
xpath: cli.xpath,
proxy: cli.proxy,
stealth: cli.stealth,
summary: cli.summary,
prompt: cli.prompt,
extract: cli.extract,
llm_provider: cli.llm_provider,
llm_key: cli.llm_key,
llm_model: cli.llm_model,
llm_base_url: cli.llm_base_url,
};
commands::scrape::run(args).await
} else {
use clap::CommandFactory;
Cli::command().print_help().unwrap();
println!();
Ok(())
}
}
};
finish(result);
}