chromedriver_launch/
chromedriver.rs

1// Copyright (C) 2025 Daniel Mueller <deso@posteo.net>
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4use std::collections::HashSet;
5use std::net::IpAddr;
6use std::net::Ipv4Addr;
7use std::net::SocketAddr;
8use std::path::Path;
9use std::path::PathBuf;
10use std::process::Child;
11use std::process::Command;
12use std::process::Stdio;
13use std::thread::sleep;
14use std::time::Duration;
15use std::time::Instant;
16
17use anyhow::bail;
18use anyhow::Context as _;
19use anyhow::Result;
20
21use crate::socket;
22use crate::tcp;
23
24
25/// The name of the `chromedriver` binary.
26const CHROME_DRIVER: &str = "chromedriver";
27/// The timeout used when searching for a bound local port.
28const PORT_FIND_TIMEOUT: Duration = Duration::from_secs(30);
29
30
31fn find_localhost_port(pid: u32) -> Result<u16> {
32  let start = Instant::now();
33
34  // Wait for the driver process to bind to a local host address.
35  let port = loop {
36    let inodes = socket::socket_inodes(pid)?.collect::<Result<HashSet<_>>>()?;
37    let result = tcp::parse(pid)?.find(|result| match result {
38      Ok(entry) => {
39        if inodes.contains(&entry.inode) {
40          entry.addr == Ipv4Addr::LOCALHOST
41        } else {
42          false
43        }
44      },
45      Err(_) => true,
46    });
47    match result {
48      None => {
49        if start.elapsed() >= PORT_FIND_TIMEOUT {
50          bail!("failed to find local host port for process {pid}");
51        }
52        sleep(Duration::from_millis(1))
53      },
54      Some(result) => {
55        break result
56          .context("failed to find localhost proc tcp entry")?
57          .port
58      },
59    }
60  };
61
62  Ok(port)
63}
64
65
66/// A builder for configurable launch of a Chromedriver process.
67#[derive(Debug)]
68pub struct Builder {
69  /// The path to the `chromedriver` binary to use.
70  chromedriver: PathBuf,
71  /// The timeout to use waiting for `chromedriver` to start up
72  /// properly.
73  timeout: Duration,
74}
75
76impl Builder {
77  /// Set the Chromedriver to use.
78  pub fn set_chromedriver(mut self, chromedriver: impl AsRef<Path>) -> Self {
79    self.chromedriver = chromedriver.as_ref().to_path_buf();
80    self
81  }
82
83  /// Set the timeout to wait for Chromedriver to start up properly.
84  pub fn set_timeout(mut self, timeout: Duration) -> Self {
85    self.timeout = timeout;
86    self
87  }
88
89  /// Launch the Chromedriver process and wait for it to be fully
90  /// initialized and serving a webdriver service.
91  pub fn launch(self) -> Result<Chromedriver> {
92    let process = Command::new(CHROME_DRIVER)
93      .arg("--port=0")
94      .stdout(Stdio::piped())
95      .stderr(Stdio::piped())
96      .spawn()
97      .with_context(|| format!("failed to launch `{CHROME_DRIVER}` instance"))?;
98
99    let pid = process.id();
100    let port = find_localhost_port(pid)?;
101
102    let slf = Chromedriver { process, port };
103    Ok(slf)
104  }
105}
106
107impl Default for Builder {
108  fn default() -> Self {
109    Self {
110      chromedriver: PathBuf::from(CHROME_DRIVER),
111      timeout: PORT_FIND_TIMEOUT,
112    }
113  }
114}
115
116
117/// A client for shaving data of websites.
118pub struct Chromedriver {
119  /// The Chromdriver process.
120  process: Child,
121  /// The port on which the webdriver protocol is being served.
122  port: u16,
123}
124
125impl Chromedriver {
126  /// Launch a Chromedriver process and wait for it to be serving a
127  /// webdriver service.
128  pub fn launch() -> Result<Self> {
129    Self::builder().launch()
130  }
131
132  /// Create a [`Builder`] for configurable launch of a Chromedriver
133  /// process.
134  pub fn builder() -> Builder {
135    Builder::default()
136  }
137
138  /// Destroy the Chromedriver process, freeing up all resources.
139  #[inline]
140  fn destroy_impl(&mut self) -> Result<()> {
141    self
142      .process
143      .kill()
144      .context("failed to shut down chromedriver process")
145  }
146
147  /// Destroy the Chromedriver process, freeing up all resources.
148  #[inline]
149  pub fn destroy(mut self) -> Result<()> {
150    self.destroy_impl()
151  }
152
153  /// Retrieve the socket address on which the webdriver service is
154  /// listening.
155  #[inline]
156  pub fn socket_addr(&self) -> SocketAddr {
157    SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), self.port)
158  }
159}
160
161impl Drop for Chromedriver {
162  fn drop(&mut self) {
163    let _result = self.destroy_impl();
164  }
165}
166
167
168#[cfg(test)]
169mod tests {
170  use super::*;
171
172  use std::net::TcpListener;
173  use std::process;
174
175
176  /// Check that we can find a bound port on localhost.
177  #[test]
178  fn localhost_port_finding() {
179    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
180    let addr = listener.local_addr().unwrap();
181    let port = find_localhost_port(process::id()).unwrap();
182    assert_eq!(port, addr.port());
183  }
184}