use std::path::PathBuf;
use std::process::ExitCode;
use std::sync::Arc;
use clap::{Parser, Subcommand};
const HELP_BANNER: &str = r#"
.--~~,__ ____
:-....,-------`~~'._.' / __ \____ _ _____ _____
`-,,, ,_ ;'~U' / /_/ / __ \ | / / _ \/ ___/
_,-' ,'`-__; '--. / _, _/ /_/ / |/ / __/ /
(_/'~~ ''''(; /_/ |_|\____/|___/\___/_/
"#;
#[derive(Debug, Parser)]
#[command(
name = "rover",
version,
about = "Web fetch & prep for LLM agents",
before_help = HELP_BANNER,
)]
struct Cli {
#[arg(long, global = true)]
config: Option<PathBuf>,
#[cfg(feature = "local-inference")]
#[arg(long, global = true)]
unsafe_disable_model_integrity_check: bool,
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Subcommand)]
enum Command {
Mcp(McpArgs),
Fetch(FetchArgs),
Batch {
id: String,
#[arg(long)]
monitor: bool,
#[arg(long)]
cancel: bool,
#[arg(long, value_enum, default_value_t = OutputFormat::Human)]
format: OutputFormat,
#[arg(long)]
from_event: Option<i64>,
},
Task {
id: String,
#[arg(long)]
monitor: bool,
#[arg(long)]
cancel: bool,
#[arg(long, value_enum, default_value_t = OutputFormat::Human)]
format: OutputFormat,
#[arg(long)]
from_event: Option<i64>,
},
#[command(subcommand)]
Cache(CacheCmd),
Doctor(DoctorArgs),
#[command(subcommand)]
Config(ConfigCmd),
#[cfg(feature = "local-inference")]
#[command(subcommand)]
Model(rover::cli::model::ModelCmd),
}
#[derive(Debug, clap::Args)]
struct FetchArgs {
url: String,
#[arg(long)]
force_refresh: bool,
#[arg(long)]
ignore_robots: bool,
#[arg(long)]
user_agent: Option<String>,
#[arg(long)]
timeout_secs: Option<u64>,
#[arg(long)]
rate_limit_rpm: Option<u32>,
#[arg(long)]
per_host_concurrency: Option<u32>,
#[arg(long)]
global_concurrency: Option<u32>,
#[arg(long)]
max_retries: Option<u8>,
#[arg(long)]
max_tokens: Option<usize>,
#[arg(long, value_name = "JSON")]
summarize: Option<String>,
}
#[derive(Debug, clap::Args)]
struct McpArgs {
#[arg(long)]
ignore_robots: bool,
#[arg(long)]
rate_limit_rpm: Option<u32>,
#[arg(long)]
per_host_concurrency: Option<u32>,
#[arg(long)]
global_concurrency: Option<u32>,
#[arg(long)]
max_retries: Option<u8>,
}
#[derive(Debug, Clone, Copy, clap::ValueEnum)]
enum OutputFormat {
Human,
Ndjson,
}
impl From<OutputFormat> for rover::cli::task::OutputFormat {
fn from(value: OutputFormat) -> Self {
match value {
OutputFormat::Human => rover::cli::task::OutputFormat::Human,
OutputFormat::Ndjson => rover::cli::task::OutputFormat::Ndjson,
}
}
}
#[derive(Debug, Subcommand)]
enum CacheCmd {
List {
#[arg(long, default_value_t = 20)]
limit: u64,
#[arg(long, default_value_t = 0)]
offset: u64,
},
Get { url: String },
Purge {
pattern: String,
#[arg(long)]
all: bool,
},
Stats,
}
impl CacheCmd {
fn into_runtime_args(self) -> rover::cli::cache::Args {
match self {
CacheCmd::List { limit, offset } => rover::cli::cache::Args::List { limit, offset },
CacheCmd::Get { url } => rover::cli::cache::Args::Get { url },
CacheCmd::Purge { pattern, all } => rover::cli::cache::Args::Purge { pattern, all },
CacheCmd::Stats => rover::cli::cache::Args::Stats,
}
}
}
#[derive(Debug, Subcommand)]
enum ConfigCmd {
Show,
Set { key: String, value: String },
}
#[derive(Debug, clap::Args)]
struct DoctorArgs {
#[arg(long, default_value = "human")]
format: String,
}
#[allow(dead_code)]
async fn build_summarizer_service(
db: rover::storage::Db,
config: &rover::config::Config,
) -> anyhow::Result<Arc<rover::summarizer::SummarizerService>> {
let registry = Arc::new(
rover::summarizer::registry::build(config, config.tokenizer.default)
.map_err(anyhow::Error::from)?,
);
Ok(Arc::new(
rover::summarizer::SummarizerService::new(
db,
registry,
config.summarization.fallback_to_extractive,
)
.with_guard(std::sync::Arc::new(
rover::guard::Guard::from_config(&config.prompt_injection)
.map_err(anyhow::Error::from)?,
)),
))
}
fn main() -> ExitCode {
rover::fetcher::client::install_ring_provider();
let cli = Cli::parse();
let default_filter = match &cli.command {
Command::Mcp(_) => "info,rover=debug",
_ => "warn",
};
rover::telemetry::init(default_filter);
#[cfg(feature = "local-inference")]
{
if cli.unsafe_disable_model_integrity_check {
unsafe { std::env::set_var(rover::model_integrity::DISABLE_ENV, "1") };
}
if rover::model_integrity::check_disabled() {
tracing::warn!(
target: "rover::model_integrity",
"model integrity verification is DISABLED \
(--unsafe-disable-model-integrity-check / {}); cached model files will NOT be \
checked for tampering before loading",
rover::model_integrity::DISABLE_ENV,
);
}
}
let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.expect("tokio runtime");
runtime.block_on(dispatch(cli))
}
async fn dispatch(cli: Cli) -> ExitCode {
let result = match cli.command {
Command::Fetch(args) => {
rover::cli::fetch::run(args.into_runtime_args(), cli.config.as_deref()).await
}
Command::Cache(sub) => {
let args = sub.into_runtime_args();
rover::cli::cache::run(args, cli.config.as_deref()).await
}
Command::Mcp(args) => {
rover::cli::mcp::run(args.into_runtime_args(), cli.config.as_deref()).await
}
Command::Task {
id,
monitor,
cancel,
format,
from_event,
} => {
rover::cli::task::run(
rover::cli::task::Args {
id,
monitor,
cancel,
format: format.into(),
from_event,
expect_kind: None,
},
cli.config.as_deref(),
)
.await
}
Command::Batch {
id,
monitor,
cancel,
format,
from_event,
} => {
rover::cli::batch::run(
rover::cli::task::Args {
id,
monitor,
cancel,
format: format.into(),
from_event,
expect_kind: Some("batch_fetch"),
},
cli.config.as_deref(),
)
.await
}
Command::Doctor(args) => {
let cfg = match rover::config::load_resolved(cli.config.as_deref()) {
Ok(c) => c,
Err(e) => {
eprintln!("rover: loading config: {e}");
return ExitCode::from(1);
}
};
return match rover::cli::doctor::run(
rover::cli::doctor::Args {
format: args.format,
},
cfg,
)
.await
{
Ok(code) => ExitCode::from(code as u8),
Err(e) => {
eprintln!("rover: {e}");
ExitCode::from(1)
}
};
}
Command::Config(cmd) => match cmd {
ConfigCmd::Show => {
let res = rover::cli::config::show(rover::cli::config::ShowArgs {
config_path: cli.config.clone(),
});
return match res {
Ok(code) => ExitCode::from(code as u8),
Err(e) => {
eprintln!("rover: {e}");
ExitCode::from(1)
}
};
}
ConfigCmd::Set { key, value } => {
let res = rover::cli::config::set(rover::cli::config::SetArgs {
config_path: cli.config.clone(),
key,
value,
});
return match res {
Ok(code) => ExitCode::from(code as u8),
Err(e) => {
eprintln!("rover: {e}");
ExitCode::from(1)
}
};
}
},
#[cfg(feature = "local-inference")]
Command::Model(cmd) => rover::cli::model::run(cmd).await,
};
match result {
Ok(()) => ExitCode::SUCCESS,
Err(e) => {
eprintln!("rover: {e}");
ExitCode::from(1)
}
}
}
impl FetchArgs {
fn into_runtime_args(self) -> rover::cli::fetch::Args {
rover::cli::fetch::Args {
url: self.url,
force_refresh: self.force_refresh,
ignore_robots: self.ignore_robots,
user_agent: self.user_agent,
timeout_secs: self.timeout_secs,
rate_limit_rpm: self.rate_limit_rpm,
per_host_concurrency: self.per_host_concurrency,
global_concurrency: self.global_concurrency,
max_retries: self.max_retries,
max_tokens: self.max_tokens,
summarize: self.summarize,
}
}
}
impl McpArgs {
fn into_runtime_args(self) -> rover::cli::mcp::Args {
rover::cli::mcp::Args {
ignore_robots: self.ignore_robots,
rate_limit_rpm: self.rate_limit_rpm,
per_host_concurrency: self.per_host_concurrency,
global_concurrency: self.global_concurrency,
max_retries: self.max_retries,
}
}
}