criner_cli/
args.rs

1use std::path::PathBuf;
2
3fn parse_local_time(src: &str) -> Result<time::Time, time::error::Parse> {
4    time::Time::parse(
5        src,
6        &time::macros::format_description!("[hour repr:24 padding:none]:[minute padding:zero]"),
7    )
8}
9
10#[derive(Debug, clap::Parser)]
11#[clap(about = "Interact with crates.io from the command-line")]
12pub struct Args {
13    #[clap(subcommand)]
14    pub sub: Option<SubCommands>,
15}
16
17#[derive(Debug, clap::Parser)]
18pub enum SubCommands {
19    /// Mine crates.io in an incorruptible and resumable fashion
20    #[clap(display_order = 0)]
21    #[clap(disable_version_flag(true))]
22    Mine {
23        /// If set, no gui will be presented. Best with RUST_LOG=info to see basic information.
24        #[clap(long)]
25        no_gui: bool,
26
27        /// The amount of frames to show per second
28        #[clap(long, name = "frames-per-second", default_value = "6.0")]
29        fps: f32,
30
31        /// The amount of progress messages to keep in a ring buffer.
32        #[clap(short = 's', long, default_value = "100")]
33        progress_message_scrollback_buffer_size: usize,
34
35        /// If set, the crates-index database for additional metadata will not be downloaded.
36        ///
37        /// It costs a lot of initial processing time and IO when writing changes back to the database,
38        /// which isn't helpful while on a slow disk - right now it does so unconditionally and doesn't track
39        /// that the work was already done.
40        #[clap(long, short = 'D')]
41        no_db_download: bool,
42
43        /// The amount of IO-bound processors to run concurrently.
44        ///
45        /// A way to choose a value is to see which part of the I/O is actually the bottle neck.
46        /// Depending on that number, one should experiment with an amount of processors that saturate
47        /// either input or output.
48        /// Most commonly, these are bound to the input, as it is the network.
49        #[clap(long, alias = "io", value_name = "io", default_value = "10")]
50        io_bound_processors: u32,
51
52        /// The amount of CPU- and Output-bound processors to run concurrently.
53        ///
54        /// These will perform a computation followed by flushing its result to disk in the form
55        /// of multiple small files.
56        /// It's recommended to adjust that number to whatever can saturate the speed of writing to disk,
57        /// as these processors will yield when writing, allowing other processors to compute.
58        /// Computes are relatively inexpensive compared to the writes.
59        #[clap(long, alias = "cpu-o", value_name = "cpu-o", default_value = "20")]
60        cpu_o_bound_processors: u32,
61
62        /// The amount of CPU-bound processors to run concurrently.
63        ///
64        /// One can assume that one of these can occupy one core of a CPU.
65        /// However, they will not use a lot of IO, nor will they use much memory.
66        #[clap(long, alias = "cpu", value_name = "cpu", default_value = "4")]
67        cpu_bound_processors: u32,
68
69        /// Path to the possibly existing crates.io repository clone. If unset, it will be cloned to a temporary spot.
70        #[clap(short = 'c', long, name = "REPO")]
71        repository: Option<PathBuf>,
72
73        /// The amount of time we can take for the computation. Specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
74        #[clap(long, short = 't')]
75        time_limit: Option<humantime::Duration>,
76
77        /// The time between each fetch operation, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
78        #[clap(long, short = 'f', default_value = "5min")]
79        fetch_every: humantime::Duration,
80
81        /// If set, the amount of times the fetch stage will run. If set to 0, it will never run.
82        #[clap(long, short = 'F')]
83        fetch_at_most: Option<usize>,
84
85        /// The time between each processing run, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
86        #[clap(long, short = 'p', default_value = "5min")]
87        process_every: humantime::Duration,
88
89        /// If set, the amount of times the process stage will run. If set to 0, they will never run.
90        #[clap(long, short = 'P')]
91        process_at_most: Option<usize>,
92
93        /// The time between each reporting and processing run, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
94        #[clap(long, short = 'r', default_value = "5min")]
95        report_every: humantime::Duration,
96
97        /// If set, the amount of times the reporting stage will run. If set to 0, they will never run.
98        #[clap(long, short = 'R')]
99        report_at_most: Option<usize>,
100
101        /// If set, declare at which local time to download the crates.io database and digest it.
102        ///
103        /// This job runs every 24h, as the database is updated that often.
104        /// If unset, the job starts right away.
105        /// Format is HH:MM, e.g. '14:30' for 2:30 pm or 03:15 for quarter past 3 in the morning.
106        #[clap(long, short = 'd', value_parser = parse_local_time)]
107        download_crates_io_database_every_24_hours_starting_at: Option<time::Time>,
108
109        /// If set, the reporting stage will only iterate over crates that match the given standard unix glob.
110        ///
111        /// moz* would match only crates starting with 'moz' for example.
112        #[clap(long, short = 'g')]
113        glob: Option<String>,
114
115        /// Path to the possibly existing database. It's used to persist all mining results.
116        #[clap(default_value = "criner.db")]
117        db_path: PathBuf,
118    },
119    /// Export all Criner data into a format friendly for exploration via SQL, best viewed with https://sqlitebrowser.org
120    ///
121    /// Criner stores binary blobs internally and migrates them on the fly, which is optimized for raw performance.
122    /// It's also impractical for exploring the data by hand, so the exported data will explode all types into
123    /// tables with each column being a field. Foreign key relations are set accordingly to allow joins.
124    /// Use this to get an overview of what's available, and possibly contribute a report generator which implements
125    /// a query using raw data and writes it into reports.
126    #[clap(display_order = 1)]
127    #[clap(disable_version_flag(true))]
128    Export {
129        /// The path to the source database in sqlite format
130        input_db_path: PathBuf,
131
132        /// Path to which to write the exported data. If it exists the operation will fail.
133        export_db_path: PathBuf,
134    },
135    #[cfg(feature = "migration")]
136    /// A special purpose command only to be executed in special circumstances
137    #[clap(display_order = 9)]
138    Migrate,
139}
140
141impl Default for SubCommands {
142    fn default() -> Self {
143        SubCommands::Mine {
144            no_gui: false,
145            fps: 6.0,
146            progress_message_scrollback_buffer_size: 100,
147            io_bound_processors: 5,
148            cpu_bound_processors: 2,
149            cpu_o_bound_processors: 10,
150            repository: None,
151            time_limit: None,
152            fetch_every: std::time::Duration::from_secs(60).into(),
153            fetch_at_most: None,
154            no_db_download: false,
155            process_every: std::time::Duration::from_secs(60).into(),
156            process_at_most: None,
157            download_crates_io_database_every_24_hours_starting_at: Some(
158                parse_local_time("3:00").expect("valid statically known time"),
159            ),
160            report_every: std::time::Duration::from_secs(60).into(),
161            report_at_most: None,
162            db_path: PathBuf::from("criner.db"),
163            glob: None,
164        }
165    }
166}