chromedriver_launch/
chromedriver.rs

1// Copyright (C) 2025 Daniel Mueller <deso@posteo.net>
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4use std::collections::HashSet;
5use std::net::IpAddr;
6use std::net::Ipv4Addr;
7use std::net::SocketAddr;
8use std::path::Path;
9use std::path::PathBuf;
10use std::process::Child;
11use std::process::Command;
12use std::process::Stdio;
13use std::thread::sleep;
14use std::time::Duration;
15use std::time::Instant;
16
17use anyhow::bail;
18use anyhow::Context as _;
19use anyhow::Result;
20
21use crate::socket;
22use crate::tcp;
23
24
25/// The name of the `chromedriver` binary.
26const CHROME_DRIVER: &str = "chromedriver";
27/// The timeout used when searching for a bound local port.
28const PORT_FIND_TIMEOUT: Duration = Duration::from_secs(30);
29
30
31fn find_localhost_port(pid: u32) -> Result<u16> {
32  let start = Instant::now();
33
34  // Wait for the driver process to bind to a local host address.
35  let port = loop {
36    let inodes = socket::socket_inodes(pid)?.collect::<Result<HashSet<_>>>()?;
37    let result = tcp::parse(pid)?.find(|result| match result {
38      Ok(entry) => {
39        if inodes.contains(&entry.inode) {
40          entry.addr == Ipv4Addr::LOCALHOST
41        } else {
42          false
43        }
44      },
45      Err(_) => true,
46    });
47    match result {
48      None => {
49        if start.elapsed() >= PORT_FIND_TIMEOUT {
50          bail!("failed to find local host port for process {pid}");
51        }
52        sleep(Duration::from_millis(1))
53      },
54      Some(result) => {
55        break result
56          .context("failed to find localhost proc tcp entry")?
57          .port
58      },
59    }
60  };
61
62  Ok(port)
63}
64
65
66/// A builder for configurable launch of a Chromedriver process.
67#[derive(Debug)]
68pub struct Builder {
69  /// The path to the `chromedriver` binary to use.
70  chromedriver: PathBuf,
71  /// The timeout to use waiting for `chromedriver` to start up
72  /// properly.
73  timeout: Duration,
74}
75
76impl Builder {
77  /// Set the Chromedriver to use.
78  pub fn set_chromedriver(mut self, chromedriver: impl AsRef<Path>) -> Self {
79    self.chromedriver = chromedriver.as_ref().to_path_buf();
80    self
81  }
82
83  /// Set the timeout to wait for Chromedriver to start up properly.
84  pub fn set_timeout(mut self, timeout: Duration) -> Self {
85    self.timeout = timeout;
86    self
87  }
88
89  /// Launch the Chromedriver process and wait for it to be fully
90  /// initialized and serving a webdriver service.
91  pub fn launch(self) -> Result<Chromedriver> {
92    let process = Command::new(CHROME_DRIVER)
93      .arg("--port=0")
94      .stdout(Stdio::piped())
95      .stderr(Stdio::piped())
96      .spawn()
97      .with_context(|| format!("failed to launch `{CHROME_DRIVER}` instance"))?;
98
99    let pid = process.id();
100    let port = find_localhost_port(pid)?;
101
102    let slf = Chromedriver { process, port };
103    Ok(slf)
104  }
105}
106
107impl Default for Builder {
108  fn default() -> Self {
109    Self {
110      chromedriver: PathBuf::from(CHROME_DRIVER),
111      timeout: PORT_FIND_TIMEOUT,
112    }
113  }
114}
115
116
117/// A client for shaving data of websites.
118#[derive(Debug)]
119pub struct Chromedriver {
120  /// The Chromdriver process.
121  process: Child,
122  /// The port on which the webdriver protocol is being served.
123  port: u16,
124}
125
126impl Chromedriver {
127  /// Launch a Chromedriver process and wait for it to be serving a
128  /// webdriver service.
129  pub fn launch() -> Result<Self> {
130    Self::builder().launch()
131  }
132
133  /// Create a [`Builder`] for configurable launch of a Chromedriver
134  /// process.
135  pub fn builder() -> Builder {
136    Builder::default()
137  }
138
139  /// Destroy the Chromedriver process, freeing up all resources.
140  #[inline]
141  fn destroy_impl(&mut self) -> Result<()> {
142    let () = self
143      .process
144      .kill()
145      .context("failed to shut down chromedriver process")?;
146    // Clean up the child to prevent any build up of zombie processes.
147    // The `kill()` should pretty much be immediate, so the `wait()`
148    // shouldn't be blocking for long. However, using `try_wait()`
149    // instead could probably be racy, as `kill()` will only deliver the
150    // signal, not ensure that it got processed to completion.
151    let _status = self.process.wait()?;
152    Ok(())
153  }
154
155  /// Destroy the Chromedriver process, freeing up all resources.
156  #[inline]
157  pub fn destroy(mut self) -> Result<()> {
158    self.destroy_impl()
159  }
160
161  /// Retrieve the socket address on which the webdriver service is
162  /// listening.
163  #[inline]
164  pub fn socket_addr(&self) -> SocketAddr {
165    SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), self.port)
166  }
167}
168
169impl Drop for Chromedriver {
170  fn drop(&mut self) {
171    let _result = self.destroy_impl();
172  }
173}
174
175
176#[cfg(test)]
177mod tests {
178  use super::*;
179
180  use std::net::TcpListener;
181  use std::process;
182
183
184  /// Check that we can find a bound port on localhost.
185  #[test]
186  fn localhost_port_finding() {
187    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
188    let addr = listener.local_addr().unwrap();
189    let port = find_localhost_port(process::id()).unwrap();
190    assert_eq!(port, addr.port());
191  }
192}