agave_validator/commands/exit/
mod.rs

1#[cfg(target_os = "linux")]
2use std::{io, thread, time::Duration};
3use {
4    crate::{
5        admin_rpc_service,
6        commands::{monitor, wait_for_restart_window, Error, FromClapArgMatches, Result},
7    },
8    clap::{value_t_or_exit, App, Arg, ArgMatches, SubCommand},
9    solana_clap_utils::{
10        hidden_unless_forced,
11        input_validators::{is_parsable, is_valid_percentage},
12    },
13    std::path::Path,
14};
15
16const COMMAND: &str = "exit";
17
18const DEFAULT_MIN_IDLE_TIME: &str = "10";
19const DEFAULT_MAX_DELINQUENT_STAKE: &str = "5";
20
21#[derive(Clone, Debug, PartialEq)]
22pub enum PostExitAction {
23    // Run the agave-validator monitor command indefinitely
24    Monitor,
25    // Block until the exiting validator process has terminated
26    Wait,
27}
28
29#[derive(Debug, PartialEq)]
30pub struct ExitArgs {
31    pub force: bool,
32    pub post_exit_action: Option<PostExitAction>,
33    pub min_idle_time: usize,
34    pub max_delinquent_stake: u8,
35    pub skip_new_snapshot_check: bool,
36    pub skip_health_check: bool,
37}
38
39impl FromClapArgMatches for ExitArgs {
40    fn from_clap_arg_match(matches: &ArgMatches) -> Result<Self> {
41        let post_exit_action = if matches.is_present("monitor") {
42            Some(PostExitAction::Monitor)
43        } else if matches.is_present("no_wait_for_exit") {
44            None
45        } else {
46            Some(PostExitAction::Wait)
47        };
48
49        // Deprecated in v3.0.0
50        if matches.is_present("wait_for_exit") {
51            eprintln!(
52                "WARN: The --wait-for-exit flag has been deprecated, waiting for exit is now the \
53                 default behavior"
54            );
55        }
56        // Deprecated in v3.1.0
57        if matches.is_present("monitor") {
58            eprintln!(
59                "WARN: The --monitor flag has been deprecated, use \"agave-validator monitor\" \
60                 instead"
61            );
62        }
63
64        Ok(ExitArgs {
65            force: matches.is_present("force"),
66            post_exit_action,
67            min_idle_time: value_t_or_exit!(matches, "min_idle_time", usize),
68            max_delinquent_stake: value_t_or_exit!(matches, "max_delinquent_stake", u8),
69            skip_new_snapshot_check: matches.is_present("skip_new_snapshot_check"),
70            skip_health_check: matches.is_present("skip_health_check"),
71        })
72    }
73}
74
75pub fn command<'a>() -> App<'a, 'a> {
76    SubCommand::with_name(COMMAND)
77        .about("Send an exit request to the validator")
78        .arg(
79            Arg::with_name("force")
80                .short("f")
81                .long("force")
82                .takes_value(false)
83                .help(
84                    "Request the validator exit immediately instead of waiting for a restart \
85                     window",
86                ),
87        )
88        .arg(
89            Arg::with_name("monitor")
90                .short("m")
91                .long("monitor")
92                .takes_value(false)
93                .requires("no_wait_for_exit")
94                .hidden(hidden_unless_forced())
95                .help("Monitor the validator after sending the exit request"),
96        )
97        .arg(
98            Arg::with_name("wait_for_exit")
99                .long("wait-for-exit")
100                .conflicts_with("monitor")
101                .hidden(hidden_unless_forced())
102                .help("Wait for the validator to terminate after sending the exit request"),
103        )
104        .arg(
105            Arg::with_name("no_wait_for_exit")
106                .long("no-wait-for-exit")
107                .takes_value(false)
108                .conflicts_with("wait_for_exit")
109                .help("Do not wait for the validator to terminate after sending the exit request"),
110        )
111        .arg(
112            Arg::with_name("min_idle_time")
113                .long("min-idle-time")
114                .takes_value(true)
115                .validator(is_parsable::<usize>)
116                .value_name("MINUTES")
117                .default_value(DEFAULT_MIN_IDLE_TIME)
118                .help("Minimum time that the validator should not be leader before restarting"),
119        )
120        .arg(
121            Arg::with_name("max_delinquent_stake")
122                .long("max-delinquent-stake")
123                .takes_value(true)
124                .validator(is_valid_percentage)
125                .default_value(DEFAULT_MAX_DELINQUENT_STAKE)
126                .value_name("PERCENT")
127                .help("The maximum delinquent stake % permitted for an exit"),
128        )
129        .arg(
130            Arg::with_name("skip_new_snapshot_check")
131                .long("skip-new-snapshot-check")
132                .help("Skip check for a new snapshot"),
133        )
134        .arg(
135            Arg::with_name("skip_health_check")
136                .long("skip-health-check")
137                .help("Skip health check"),
138        )
139}
140
141pub fn execute(matches: &ArgMatches, ledger_path: &Path) -> Result<()> {
142    let exit_args = ExitArgs::from_clap_arg_match(matches)?;
143
144    if !exit_args.force {
145        wait_for_restart_window::wait_for_restart_window(
146            ledger_path,
147            None,
148            exit_args.min_idle_time,
149            exit_args.max_delinquent_stake,
150            exit_args.skip_new_snapshot_check,
151            exit_args.skip_health_check,
152        )?;
153    }
154
155    // Grab the pid from the process before initiating exit as the running
156    // validator will be unable to respond after exit has returned.
157    //
158    // Additionally, only check the pid() RPC call result if it will be used.
159    // In an upgrade scenario, it is possible that a binary that calls pid()
160    // will be initating exit against a process that doesn't support pid().
161    const WAIT_FOR_EXIT_UNSUPPORTED_ERROR: &str = "remote process exit cannot be waited on. \
162                                                   `--wait-for-exit` is not supported by the \
163                                                   remote process";
164    let post_exit_action = exit_args.post_exit_action.clone();
165    let validator_pid = admin_rpc_service::runtime().block_on(async move {
166        let admin_client = admin_rpc_service::connect(ledger_path).await?;
167        let validator_pid = match post_exit_action {
168            Some(PostExitAction::Wait) => admin_client
169                .pid()
170                .await
171                .map_err(|_err| Error::Dynamic(WAIT_FOR_EXIT_UNSUPPORTED_ERROR.into()))?,
172            _ => 0,
173        };
174        admin_client.exit().await?;
175
176        Ok::<u32, Error>(validator_pid)
177    })?;
178
179    println!("Exit request sent");
180
181    match exit_args.post_exit_action {
182        None => Ok(()),
183        Some(PostExitAction::Monitor) => monitor::execute(matches, ledger_path),
184        Some(PostExitAction::Wait) => poll_until_pid_terminates(validator_pid),
185    }?;
186
187    Ok(())
188}
189
190#[cfg(target_os = "linux")]
191fn poll_until_pid_terminates(pid: u32) -> Result<()> {
192    let pid = i32::try_from(pid)?;
193
194    println!("Waiting for agave-validator process {pid} to terminate");
195    loop {
196        // From man kill(2)
197        //
198        // If sig is 0, then no signal is sent, but existence and permission
199        // checks are still performed; this can be used to check for the
200        // existence of a process ID or process group ID that the caller is
201        // permitted to signal.
202        let result = unsafe {
203            libc::kill(pid, /*sig:*/ 0)
204        };
205        if result >= 0 {
206            // Give the process some time to exit before checking again
207            thread::sleep(Duration::from_millis(500));
208        } else {
209            let errno = io::Error::last_os_error()
210                .raw_os_error()
211                .ok_or(Error::Dynamic("unable to read raw os error".into()))?;
212            match errno {
213                libc::ESRCH => {
214                    println!("Done, agave-validator process {pid} has terminated");
215                    break;
216                }
217                libc::EINVAL => {
218                    // An invalid signal was specified, we only pass sig=0 so
219                    // this should not be possible
220                    Err(Error::Dynamic(
221                        format!("unexpected invalid signal error for kill({pid}, 0)").into(),
222                    ))?;
223                }
224                libc::EPERM => {
225                    Err(io::Error::from(io::ErrorKind::PermissionDenied))?;
226                }
227                unknown => {
228                    Err(Error::Dynamic(
229                        format!("unexpected errno for kill({pid}, 0): {unknown}").into(),
230                    ))?;
231                }
232            }
233        }
234    }
235
236    Ok(())
237}
238
239#[cfg(not(target_os = "linux"))]
240fn poll_until_pid_terminates(_pid: u32) -> Result<()> {
241    Err(Error::Dynamic(
242        "Unable to wait for agave-validator process termination on this platform".into(),
243    ))
244}
245
246#[cfg(test)]
247mod tests {
248    use {super::*, crate::commands::tests::verify_args_struct_by_command};
249
250    impl Default for ExitArgs {
251        fn default() -> Self {
252            ExitArgs {
253                min_idle_time: DEFAULT_MIN_IDLE_TIME
254                    .parse()
255                    .expect("invalid DEFAULT_MIN_IDLE_TIME"),
256                max_delinquent_stake: DEFAULT_MAX_DELINQUENT_STAKE
257                    .parse()
258                    .expect("invalid DEFAULT_MAX_DELINQUENT_STAKE"),
259                force: false,
260                post_exit_action: Some(PostExitAction::Wait),
261                skip_new_snapshot_check: false,
262                skip_health_check: false,
263            }
264        }
265    }
266
267    #[test]
268    fn verify_args_struct_by_command_exit_default() {
269        verify_args_struct_by_command(command(), vec![COMMAND], ExitArgs::default());
270    }
271
272    #[test]
273    fn verify_args_struct_by_command_exit_with_force() {
274        verify_args_struct_by_command(
275            command(),
276            vec![COMMAND, "--force"],
277            ExitArgs {
278                force: true,
279                ..ExitArgs::default()
280            },
281        );
282    }
283
284    #[test]
285    fn verify_args_struct_by_command_exit_with_post_exit_action() {
286        verify_args_struct_by_command(
287            command(),
288            vec![COMMAND, "--monitor", "--no-wait-for-exit"],
289            ExitArgs {
290                post_exit_action: Some(PostExitAction::Monitor),
291                ..ExitArgs::default()
292            },
293        );
294
295        verify_args_struct_by_command(
296            command(),
297            vec![COMMAND, "--no-wait-for-exit"],
298            ExitArgs {
299                post_exit_action: None,
300                ..ExitArgs::default()
301            },
302        );
303
304        verify_args_struct_by_command(
305            command(),
306            vec![COMMAND, "--wait-for-exit"],
307            ExitArgs {
308                post_exit_action: Some(PostExitAction::Wait),
309                ..ExitArgs::default()
310            },
311        );
312    }
313
314    #[test]
315    fn verify_args_struct_by_command_exit_with_min_idle_time() {
316        verify_args_struct_by_command(
317            command(),
318            vec![COMMAND, "--min-idle-time", "60"],
319            ExitArgs {
320                min_idle_time: 60,
321                ..ExitArgs::default()
322            },
323        );
324    }
325
326    #[test]
327    fn verify_args_struct_by_command_exit_with_max_delinquent_stake() {
328        verify_args_struct_by_command(
329            command(),
330            vec![COMMAND, "--max-delinquent-stake", "10"],
331            ExitArgs {
332                max_delinquent_stake: 10,
333                ..ExitArgs::default()
334            },
335        );
336    }
337
338    #[test]
339    fn verify_args_struct_by_command_exit_with_skip_new_snapshot_check() {
340        verify_args_struct_by_command(
341            command(),
342            vec![COMMAND, "--skip-new-snapshot-check"],
343            ExitArgs {
344                skip_new_snapshot_check: true,
345                ..ExitArgs::default()
346            },
347        );
348    }
349
350    #[test]
351    fn verify_args_struct_by_command_exit_with_skip_health_check() {
352        verify_args_struct_by_command(
353            command(),
354            vec![COMMAND, "--skip-health-check"],
355            ExitArgs {
356                skip_health_check: true,
357                ..ExitArgs::default()
358            },
359        );
360    }
361}