gosh_remote/
cli.rs

1// [[file:../remote.note::3a532d42][3a532d42]]
2use super::*;
3use gut::cli::*;
4use gut::fs::*;
5
6pub use gut::prelude::*;
7// 3a532d42 ends here
8
9// [[file:../remote.note::bdfa3d68][bdfa3d68]]
10const GOSH_SCHEDULER_FILE: &str = "gosh-remote-scheduler.lock";
11
12fn read_scheduler_address_from_lock_file(scheduler_address_file: &Path, timeout: f64) -> Result<String> {
13    debug!("reading scheduler address from file: {scheduler_address_file:?}");
14    LockFile::wait(scheduler_address_file, timeout)?;
15    let o = gut::fs::read_file(scheduler_address_file)?.trim().to_string();
16    Ok(o)
17}
18// bdfa3d68 ends here
19
20// [[file:../remote.note::512e88e7][512e88e7]]
21// use crate::remote::{Client, Server};
22
23/// The client side for running program concurrently distributed over multiple
24/// remote nodes
25#[derive(StructOpt)]
26struct ClientCli {
27    /// The remote execution service address, e.g. localhost:3031
28    #[structopt(long = "address", conflicts_with = "scheduler-address-file")]
29    scheduler_address: Option<String>,
30
31    /// The scheduler address to be read from file `scheduler_address_file`
32    #[structopt(short = 'w', default_value = GOSH_SCHEDULER_FILE)]
33    scheduler_address_file: PathBuf,
34
35    #[clap(subcommand)]
36    action: ClientAction,
37}
38
39#[derive(Subcommand)]
40enum ClientAction {
41    Run(ClientRun),
42    /// Request server to add a new node for remote computation.
43    AddNode {
44        /// The node to be added into node list for remote computation.
45        node: String,
46    },
47    /// Request server to compute molecule from `mol_path`
48    Compute {
49        mol_path: PathBuf,
50    },
51}
52
53#[derive(StructOpt)]
54/// request server to run a cmd
55struct ClientRun {
56    /// The cmd to run in remote session
57    cmd: String,
58
59    /// The working dir to run the cmd
60    #[structopt(long, default_value = ".")]
61    wrk_dir: PathBuf,
62}
63
64impl ClientCli {
65    async fn enter_main(self) -> Result<()> {
66        use crate::Client;
67        let scheduler_address = if let Some(a) = self.scheduler_address {
68            a
69        } else {
70            read_scheduler_address_from_lock_file(&self.scheduler_address_file, 2.0)?
71        };
72
73        let client = Client::connect(&scheduler_address);
74        match self.action {
75            ClientAction::Run(run) => {
76                let wrk_dir = run.wrk_dir.canonicalize()?;
77                let o = client.run_cmd(&run.cmd, &wrk_dir).await?;
78                println!("{o}");
79            }
80            ClientAction::AddNode { node } => {
81                client.add_node(&node).await?;
82            }
83
84            ClientAction::Compute { mol_path } => {
85                use gchemol::prelude::*;
86                use gchemol::Molecule;
87
88                let mol = Molecule::from_file(&mol_path)?;
89                let o = client.compute_molecule(&mol).await?;
90                println!("{o}");
91            }
92        }
93
94        Ok(())
95    }
96}
97// 512e88e7 ends here
98
99// [[file:../remote.note::674c2404][674c2404]]
100use base::LockFile;
101use server::Server;
102
103#[derive(Debug, Clone, ValueEnum)]
104enum ServerMode {
105    AsScheduler,
106    AsWorker,
107}
108
109/// The server side for running program concurrently distributed over multiple remote nodes
110#[derive(Parser, Debug)]
111struct ServerCli {
112    /// Bind on the address for providing remote execution service
113    #[arg(long)]
114    address: String,
115
116    /// The server mode to start.
117    #[arg(value_enum)]
118    mode: ServerMode,
119
120    /// The block box model template directory. Setting this argument
121    /// will enable remote computation service for molecule, beyond
122    /// run simple command line.
123    #[arg(short = 't')]
124    bbm_dir: Option<PathBuf>,
125}
126
127impl ServerCli {
128    async fn enter_main(self) -> Result<()> {
129        let address = &self.address;
130        let server = Server::bind(address);
131        match self.mode {
132            ServerMode::AsScheduler => {
133                println!("Start scheduler serivce at {address:?}");
134                server.serve_as_scheduler().await;
135            }
136            ServerMode::AsWorker => {
137                if let Some(bbm_dir) = self.bbm_dir {
138                    use gosh_model::BlackBoxModel;
139
140                    println!("Start chemical model serivce at {address:?}");
141                    let bbm = BlackBoxModel::from_dir(bbm_dir)?;
142                    server.serve_as_chemical_model(bbm).await?;
143                } else {
144                    server.serve_as_worker().await?;
145                }
146            }
147        }
148
149        Ok(())
150    }
151
152    async fn run_as_scheduler(address: String) -> Result<()> {
153        let server = ServerCli {
154            address: address,
155            mode: ServerMode::AsScheduler,
156            bbm_dir: None,
157        };
158        server.enter_main().await?;
159        Ok(())
160    }
161
162    async fn run_as_worker(address: String) -> Result<()> {
163        let server = ServerCli {
164            address: address,
165            mode: ServerMode::AsWorker,
166            bbm_dir: None,
167        };
168        server.enter_main().await?;
169        Ok(())
170    }
171
172    async fn run_as_model(address: String, bbm_dir: PathBuf) -> Result<()> {
173        let server = ServerCli {
174            address: address,
175            mode: ServerMode::AsWorker,
176            bbm_dir: bbm_dir.into(),
177        };
178        server.enter_main().await?;
179        Ok(())
180    }
181}
182// 674c2404 ends here
183
184// [[file:../remote.note::001e63a1][001e63a1]]
185/// Start scheduler and worker services automatically when run in MPI
186/// environment (to be called with mpirun command)
187#[derive(Parser)]
188struct BootstrapCli {
189    /// The scheduler address will be wrote into `address_file`
190    #[arg(short = 'w', default_value = GOSH_SCHEDULER_FILE)]
191    address_file: PathBuf,
192
193    #[arg(long, default_value = "2.0")]
194    timeout: f64,
195
196    /// The black box model template directory, required for chemical
197    /// model computation service
198    #[arg(short = 't')]
199    bbm_dir: Option<PathBuf>,
200
201    /// The server mode to start.
202    #[arg(value_enum)]
203    mode: ServerMode,
204}
205
206impl BootstrapCli {
207    async fn enter_main(&self) -> Result<()> {
208        let node = hostname();
209        let address = default_server_address();
210        let address_file = self.address_file.to_owned();
211        let timeout = self.timeout;
212        let bbm_dir = self.bbm_dir.clone();
213        match self.mode {
214            ServerMode::AsScheduler => {
215                info!("install scheduler on {node}");
216                let _lock = LockFile::new(&address_file, &address)?;
217                ServerCli::run_as_scheduler(address).await?;
218            }
219            ServerMode::AsWorker => {
220                info!("install worker on {node}");
221                let o = read_scheduler_address_from_lock_file(&address_file, timeout)?;
222                // tell the scheduler add this worker
223                crate::Client::connect(o).add_node(&address).await?;
224                if let Some(bbm_dir) = bbm_dir {
225                    ServerCli::run_as_model(address, bbm_dir).await?;
226                } else {
227                    ServerCli::run_as_worker(address).await?;
228                }
229            }
230        }
231        Ok(())
232    }
233}
234
235fn default_server_address() -> String {
236    match get_free_tcp_address().expect("tcp address") {
237        std::net::SocketAddr::V4(addr) => addr.to_string(),
238        std::net::SocketAddr::V6(_) => panic!("IPV6 is not supported"),
239    }
240}
241// 001e63a1 ends here
242
243// [[file:../remote.note::5f9971ad][5f9971ad]]
244/// A helper program for running program concurrently distributed over multiple
245/// remote nodes
246#[derive(Parser)]
247#[clap(author, version, about)]
248struct Cli {
249    #[structopt(flatten)]
250    verbose: gut::cli::Verbosity,
251
252    #[clap(subcommand)]
253    command: RemoteCommand,
254}
255
256#[derive(Subcommand)]
257enum RemoteCommand {
258    Client(ClientCli),
259    Server(ServerCli),
260    Bootstrap(BootstrapCli),
261}
262
263pub async fn remote_enter_main() -> Result<()> {
264    let args = Cli::parse();
265    args.verbose.setup_logger();
266
267    match args.command {
268        RemoteCommand::Client(client) => {
269            client.enter_main().await?;
270        }
271        RemoteCommand::Server(server) => {
272            debug!("Run VASP for interactive calculation ...");
273            server.enter_main().await?;
274        }
275        RemoteCommand::Bootstrap(bootstrap) => {
276            bootstrap.enter_main().await?;
277        }
278    }
279
280    Ok(())
281}
282// 5f9971ad ends here