criner_cli/args.rs
1use std::path::PathBuf;
2
3fn parse_local_time(src: &str) -> Result<time::Time, time::error::Parse> {
4 time::Time::parse(
5 src,
6 &time::macros::format_description!("[hour repr:24 padding:none]:[minute padding:zero]"),
7 )
8}
9
10#[derive(Debug, clap::Parser)]
11#[clap(about = "Interact with crates.io from the command-line")]
12pub struct Args {
13 #[clap(subcommand)]
14 pub sub: Option<SubCommands>,
15}
16
17#[derive(Debug, clap::Parser)]
18pub enum SubCommands {
19 /// Mine crates.io in an incorruptible and resumable fashion
20 #[clap(display_order = 0)]
21 #[clap(disable_version_flag(true))]
22 Mine {
23 /// If set, no gui will be presented. Best with RUST_LOG=info to see basic information.
24 #[clap(long)]
25 no_gui: bool,
26
27 /// The amount of frames to show per second
28 #[clap(long, name = "frames-per-second", default_value = "6.0")]
29 fps: f32,
30
31 /// The amount of progress messages to keep in a ring buffer.
32 #[clap(short = 's', long, default_value = "100")]
33 progress_message_scrollback_buffer_size: usize,
34
35 /// If set, the crates-index database for additional metadata will not be downloaded.
36 ///
37 /// It costs a lot of initial processing time and IO when writing changes back to the database,
38 /// which isn't helpful while on a slow disk - right now it does so unconditionally and doesn't track
39 /// that the work was already done.
40 #[clap(long, short = 'D')]
41 no_db_download: bool,
42
43 /// The amount of IO-bound processors to run concurrently.
44 ///
45 /// A way to choose a value is to see which part of the I/O is actually the bottle neck.
46 /// Depending on that number, one should experiment with an amount of processors that saturate
47 /// either input or output.
48 /// Most commonly, these are bound to the input, as it is the network.
49 #[clap(long, alias = "io", value_name = "io", default_value = "10")]
50 io_bound_processors: u32,
51
52 /// The amount of CPU- and Output-bound processors to run concurrently.
53 ///
54 /// These will perform a computation followed by flushing its result to disk in the form
55 /// of multiple small files.
56 /// It's recommended to adjust that number to whatever can saturate the speed of writing to disk,
57 /// as these processors will yield when writing, allowing other processors to compute.
58 /// Computes are relatively inexpensive compared to the writes.
59 #[clap(long, alias = "cpu-o", value_name = "cpu-o", default_value = "20")]
60 cpu_o_bound_processors: u32,
61
62 /// The amount of CPU-bound processors to run concurrently.
63 ///
64 /// One can assume that one of these can occupy one core of a CPU.
65 /// However, they will not use a lot of IO, nor will they use much memory.
66 #[clap(long, alias = "cpu", value_name = "cpu", default_value = "4")]
67 cpu_bound_processors: u32,
68
69 /// Path to the possibly existing crates.io repository clone. If unset, it will be cloned to a temporary spot.
70 #[clap(short = 'c', long, name = "REPO")]
71 repository: Option<PathBuf>,
72
73 /// The amount of time we can take for the computation. Specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
74 #[clap(long, short = 't')]
75 time_limit: Option<humantime::Duration>,
76
77 /// The time between each fetch operation, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
78 #[clap(long, short = 'f', default_value = "5min")]
79 fetch_every: humantime::Duration,
80
81 /// If set, the amount of times the fetch stage will run. If set to 0, it will never run.
82 #[clap(long, short = 'F')]
83 fetch_at_most: Option<usize>,
84
85 /// The time between each processing run, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
86 #[clap(long, short = 'p', default_value = "5min")]
87 process_every: humantime::Duration,
88
89 /// If set, the amount of times the process stage will run. If set to 0, they will never run.
90 #[clap(long, short = 'P')]
91 process_at_most: Option<usize>,
92
93 /// The time between each reporting and processing run, specified in humantime, like 10s, 5min, or 2h, or '3h 2min 2s'
94 #[clap(long, short = 'r', default_value = "5min")]
95 report_every: humantime::Duration,
96
97 /// If set, the amount of times the reporting stage will run. If set to 0, they will never run.
98 #[clap(long, short = 'R')]
99 report_at_most: Option<usize>,
100
101 /// If set, declare at which local time to download the crates.io database and digest it.
102 ///
103 /// This job runs every 24h, as the database is updated that often.
104 /// If unset, the job starts right away.
105 /// Format is HH:MM, e.g. '14:30' for 2:30 pm or 03:15 for quarter past 3 in the morning.
106 #[clap(long, short = 'd', value_parser = parse_local_time)]
107 download_crates_io_database_every_24_hours_starting_at: Option<time::Time>,
108
109 /// If set, the reporting stage will only iterate over crates that match the given standard unix glob.
110 ///
111 /// moz* would match only crates starting with 'moz' for example.
112 #[clap(long, short = 'g')]
113 glob: Option<String>,
114
115 /// Path to the possibly existing database. It's used to persist all mining results.
116 #[clap(default_value = "criner.db")]
117 db_path: PathBuf,
118 },
119 /// Export all Criner data into a format friendly for exploration via SQL, best viewed with https://sqlitebrowser.org
120 ///
121 /// Criner stores binary blobs internally and migrates them on the fly, which is optimized for raw performance.
122 /// It's also impractical for exploring the data by hand, so the exported data will explode all types into
123 /// tables with each column being a field. Foreign key relations are set accordingly to allow joins.
124 /// Use this to get an overview of what's available, and possibly contribute a report generator which implements
125 /// a query using raw data and writes it into reports.
126 #[clap(display_order = 1)]
127 #[clap(disable_version_flag(true))]
128 Export {
129 /// The path to the source database in sqlite format
130 input_db_path: PathBuf,
131
132 /// Path to which to write the exported data. If it exists the operation will fail.
133 export_db_path: PathBuf,
134 },
135 #[cfg(feature = "migration")]
136 /// A special purpose command only to be executed in special circumstances
137 #[clap(display_order = 9)]
138 Migrate,
139}
140
141impl Default for SubCommands {
142 fn default() -> Self {
143 SubCommands::Mine {
144 no_gui: false,
145 fps: 6.0,
146 progress_message_scrollback_buffer_size: 100,
147 io_bound_processors: 5,
148 cpu_bound_processors: 2,
149 cpu_o_bound_processors: 10,
150 repository: None,
151 time_limit: None,
152 fetch_every: std::time::Duration::from_secs(60).into(),
153 fetch_at_most: None,
154 no_db_download: false,
155 process_every: std::time::Duration::from_secs(60).into(),
156 process_at_most: None,
157 download_crates_io_database_every_24_hours_starting_at: Some(
158 parse_local_time("3:00").expect("valid statically known time"),
159 ),
160 report_every: std::time::Duration::from_secs(60).into(),
161 report_at_most: None,
162 db_path: PathBuf::from("criner.db"),
163 glob: None,
164 }
165 }
166}