Skip to main content

argus_config/
cli.rs

1use clap::{Parser, Subcommand};
2
3#[derive(Debug, Parser)]
4#[command(name = "argus")]
5pub struct Cli {
6    #[command(subcommand)]
7    pub command: Command,
8}
9
10#[derive(Debug, Subcommand)]
11pub enum Command {
12    /// Run the crawler. With --redis-url, omit --seed-url to run as a consumer only.
13    Crawl(CrawlOpts),
14
15    /// Push URLs onto the Redis frontier and exit. Use to feed a shared queue.
16    Seed(SeedOpts),
17}
18
19#[derive(Debug, Parser)]
20pub struct CrawlOpts {
21    /// Starting URL. Required unless using --redis-url as consumer-only.
22    #[arg(long)]
23    pub seed_url: Option<String>,
24
25    #[arg(long, default_value_t = 32)]
26    pub global_concurrency: usize,
27
28    #[arg(long, default_value_t = 1)]
29    pub per_host_concurrency: usize,
30
31    #[arg(long, default_value_t = 500)]
32    pub per_host_delay_ms: u64,
33
34    #[arg(long, default_value_t = 2)]
35    pub max_depth: u16,
36
37    /// Redis URL for distributed mode. If set with no value, uses redis://127.0.0.1:6379/ (matches docker-compose).
38    #[arg(long, num_args = 0..=1, default_missing_value = "redis://127.0.0.1:6379/")]
39    pub redis_url: Option<String>,
40
41    /// When using Redis, use Redis for per-host rate limiting so all processes share the same limit.
42    #[arg(long)]
43    pub redis_rate_limit: bool,
44
45    /// Directory to persist fetch results (metadata JSON + body files). If unset, nothing is written to disk.
46    #[arg(long)]
47    pub storage_dir: Option<std::path::PathBuf>,
48}
49
50#[derive(Debug, Parser)]
51pub struct SeedOpts {
52    /// Redis URL. No value = redis://127.0.0.1:6379/
53    #[arg(long, num_args = 0..=1, default_missing_value = "redis://127.0.0.1:6379/")]
54    pub redis_url: Option<String>,
55
56    /// URLs to push onto the frontier.
57    #[arg(short, long, required = true)]
58    pub url: Vec<String>,
59}