Skip to main content

bestool_canopy/
client.rs

1use std::{
2	fmt,
3	future::Future,
4	io::Write,
5	net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr},
6	sync::{Arc, Mutex, OnceLock},
7	time::{Duration, Instant},
8};
9
10use flate2::{Compression, write::GzEncoder};
11use hickory_resolver::{
12	ConnectionProvider, Resolver,
13	config::{ConnectionConfig, NameServerConfig, ResolverConfig},
14	net::runtime::TokioRuntimeProvider,
15};
16use miette::{IntoDiagnostic, Result, WrapErr};
17use rcgen::{CertificateParams, DistinguishedName, DnType, KeyPair};
18use reqwest::Url;
19use time::{Duration as TimeDuration, OffsetDateTime};
20use tokio::sync::RwLock;
21use tracing::debug;
22
23use crate::Redacted;
24
25pub const DEFAULT_CANOPY_URL: &str = "https://meta.tamanu.app";
26
27/// Base URL for the tailscale-internal canopy endpoint.
28///
29/// On hosts that share the canopy tailnet, posting to this URL works without
30/// mTLS — the tailscale identity is the auth.
31pub const TAILSCALE_URL: &str = "https://canopy.tail53aef.ts.net";
32
33/// Bare hostname used for `resolve_to_addrs` overrides.
34const TAILSCALE_HOST: &str = "canopy.tail53aef.ts.net";
35
36/// Hardcoded tailscale IPs for canopy, used when tailscale DNS
37/// (100.100.100.100) is unreachable but the tailnet otherwise is.
38const CANOPY_HARDCODED_V4: Ipv4Addr = Ipv4Addr::new(100, 99, 98, 97);
39const CANOPY_HARDCODED_V6: Ipv6Addr =
40	Ipv6Addr::new(0xfd7a, 0x115c, 0xa1e0, 0, 0, 0, 0x9337, 0xfb52);
41
42/// How long renewed canopy certs are valid for.
43///
44/// Set well above [`CERT_RENEW_AFTER`] so a renewal failure doesn't immediately
45/// strand the client.
46const CERT_VALIDITY_DAYS: i64 = 6;
47
48/// How long to wait between scheduled cert renewals.
49///
50/// Renewal runs in a background task in the daemon; the legacy single-shot
51/// alerts command builds the client once and exits well within this window.
52pub const CERT_RENEW_AFTER: Duration = Duration::from_secs(5 * 24 * 60 * 60);
53
54/// Timeout for the tailscale availability probe.
55const TAILSCALE_PROBE_TIMEOUT: Duration = Duration::from_secs(5);
56
57/// Timeout for the tailscale DNS lookup (against 100.100.100.100).
58///
59/// Bounds the lookup so a wedged tailscale DNS server can't stall discovery;
60/// on timeout we fall back to the hardcoded IPs, which are probed concurrently
61/// anyway.
62const DNS_LOOKUP_TIMEOUT: Duration = Duration::from_secs(2);
63
64/// How long a tailnet-reachability discovery is trusted before re-probing.
65///
66/// Short enough that tailscale coming up or going down is picked up promptly,
67/// long enough that a burst of client constructions in one process shares a
68/// single discovery instead of each paying the probe cost.
69const PROBE_CACHE_TTL: Duration = Duration::from_secs(60);
70
71/// Factory producing the base [`reqwest::ClientBuilder`] for canopy's clients.
72///
73/// The caller supplies this so it owns cross-cutting client config
74/// (`SSLKEYLOGFILE`, proxies, …). Canopy invokes it whenever it needs to build or
75/// rebuild a client — at probe time, on mTLS cert renewal, and on reload — then
76/// layers its own concerns (its [`user_agent`], mTLS identity, DNS overrides,
77/// timeouts) on top.
78pub type ClientBuilderFactory = Arc<dyn Fn() -> reqwest::ClientBuilder + Send + Sync>;
79
80/// A non-2xx response from a canopy endpoint.
81///
82/// The generated endpoint methods return this (wrapped in a [`miette::Report`])
83/// on any non-success status; downcast the report to it to branch on the code,
84/// e.g. [`TargetOutcome::from_result`](crate::TargetOutcome::from_result) maps a
85/// backup-target `412`/`409` to a dormant device.
86#[derive(Debug, Clone)]
87pub struct CanopyHttpError {
88	/// HTTP status returned by canopy.
89	pub status: reqwest::StatusCode,
90	/// The endpoint path that was called (mTLS-mode form, e.g. `/backup-target`).
91	pub path: String,
92	/// Response body, best-effort (empty if it couldn't be read).
93	pub body: String,
94}
95
96impl fmt::Display for CanopyHttpError {
97	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98		write!(
99			f,
100			"canopy {} returned {}: {}",
101			self.path, self.status, self.body
102		)
103	}
104}
105
106impl std::error::Error for CanopyHttpError {}
107impl miette::Diagnostic for CanopyHttpError {}
108
109/// User-agent set on every canopy request, e.g.
110/// `bestool-canopy/0.5.0 (Linux 7.0.9 Arch Linux; x86_64)`.
111///
112/// Identifies this client crate and its version; the OS comment is detected at
113/// runtime and cached. The client sets this itself on top of the caller's
114/// [`ClientBuilderFactory`], so canopy traffic identifies the client library
115/// regardless of the calling binary.
116fn user_agent() -> &'static str {
117	static UA: OnceLock<String> = OnceLock::new();
118	UA.get_or_init(|| {
119		let os = sysinfo::System::long_os_version()
120			.or_else(sysinfo::System::name)
121			.unwrap_or_else(|| std::env::consts::OS.to_owned());
122		format!(
123			"bestool-canopy/{} ({os}; {})",
124			env!("CARGO_PKG_VERSION"),
125			sysinfo::System::cpu_arch(),
126		)
127	})
128}
129
130/// Probe the canopy tailnet endpoint, returning a client routed to it if
131/// reachable.
132///
133/// The returned client carries the same DNS / hardcoded-IP resolution override
134/// the reporting client uses and presents **no** client certificate — callers
135/// reaching canopy this way authenticate by tailnet identity. Returns `None`
136/// when the tailnet endpoint isn't reachable, so callers can fall back to
137/// public mTLS.
138pub async fn tailscale_client(make_builder: &ClientBuilderFactory) -> Option<reqwest::Client> {
139	let tailscale_url = TAILSCALE_URL
140		.parse()
141		.expect("default tailscale URL is valid");
142	probe_tailscale(&tailscale_url, make_builder, true).await
143}
144
145/// HTTP client with auth configured for talking to a canopy server.
146///
147/// Tries two auth paths in order of preference:
148/// 1. **Tailscale**: if the canopy tailnet endpoint is reachable, plain HTTPS
149///    works (auth is implicit via tailscale identity).
150/// 2. **mTLS**: a fresh self-signed cert from the device key, short-lived
151///    ([`CERT_VALIDITY_DAYS`]); for long-running daemons, [`Self::renew`]
152///    should tick on [`CERT_RENEW_AFTER`] to swap in a fresh cert before expiry.
153///
154/// [`Self::refresh`] re-probes tailscale and swaps modes on reload.
155pub struct CanopyClient {
156	/// Base URL for the mTLS path (canopy's public API, from the registration's
157	/// `api_url`). Used only on the mTLS path. Fixed for the client's lifetime.
158	base_url: Url,
159	/// Base URL for the tailscale path (defaults to [`TAILSCALE_URL`]). Used only
160	/// on the tailscale path. Fixed for the client's lifetime.
161	tailscale_url: Url,
162	device_key: Option<Redacted<String>>,
163	/// Produces the base client builder; see [`ClientBuilderFactory`].
164	make_builder: ClientBuilderFactory,
165	state: RwLock<State>,
166}
167
168enum State {
169	Tailscale(reqwest::Client),
170	Mtls(reqwest::Client),
171}
172
173impl State {
174	fn is_tailscale(&self) -> bool {
175		matches!(self, State::Tailscale(_))
176	}
177
178	fn http(&self) -> reqwest::Client {
179		match self {
180			State::Tailscale(http) | State::Mtls(http) => http.clone(),
181		}
182	}
183}
184
185impl fmt::Debug for CanopyClient {
186	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
187		f.debug_struct("CanopyClient").finish_non_exhaustive()
188	}
189}
190
191impl CanopyClient {
192	/// Build a canopy client against the default public ([`DEFAULT_CANOPY_URL`])
193	/// and tailscale ([`TAILSCALE_URL`]) endpoints. Use [`Self::with_urls`] to
194	/// override them.
195	///
196	/// Probes the tailscale endpoint first; if reachable, uses it. Otherwise, if
197	/// a device key PEM is provided, builds an mTLS client. Returns `Ok(None)` if
198	/// neither path is available.
199	///
200	/// `make_builder` supplies the base [`reqwest::ClientBuilder`] — see
201	/// [`ClientBuilderFactory`].
202	pub async fn new(
203		device_key_pem: Option<&str>,
204		make_builder: impl Fn() -> reqwest::ClientBuilder + Send + Sync + 'static,
205	) -> Result<Option<Self>> {
206		Self::with_urls(
207			DEFAULT_CANOPY_URL
208				.parse()
209				.expect("default canopy URL is valid"),
210			TAILSCALE_URL
211				.parse()
212				.expect("default tailscale URL is valid"),
213			device_key_pem,
214			make_builder,
215		)
216		.await
217	}
218
219	/// Build a canopy client against explicit endpoints.
220	///
221	/// `base_url` is canopy's public API URL (the registration's `api_url`),
222	/// used on the mTLS path; `tailscale_url` is the tailnet endpoint used on
223	/// the tailscale path. Both are fixed for the client's lifetime. See
224	/// [`Self::new`] for the other arguments and the default-endpoint form.
225	pub async fn with_urls(
226		base_url: Url,
227		tailscale_url: Url,
228		device_key_pem: Option<&str>,
229		make_builder: impl Fn() -> reqwest::ClientBuilder + Send + Sync + 'static,
230	) -> Result<Option<Self>> {
231		let device_key = device_key_pem.map(|s| Redacted(s.to_owned()));
232		let make_builder: ClientBuilderFactory = Arc::new(make_builder);
233
234		if let Some(http) = probe_tailscale(&tailscale_url, &make_builder, true).await {
235			debug!("canopy: tailscale endpoint reachable, preferring it");
236			return Ok(Some(Self {
237				base_url,
238				tailscale_url,
239				device_key,
240				make_builder,
241				state: RwLock::new(State::Tailscale(http)),
242			}));
243		}
244
245		if let Some(pem) = device_key_pem {
246			debug!("canopy: tailscale unreachable, falling back to mTLS");
247			let http = build_mtls_http(&make_builder, pem)?;
248			return Ok(Some(Self {
249				base_url,
250				tailscale_url,
251				device_key,
252				make_builder,
253				state: RwLock::new(State::Mtls(http)),
254			}));
255		}
256
257		Ok(None)
258	}
259
260	/// Returns true if the client is currently using the tailscale path.
261	pub async fn is_tailscale(&self) -> bool {
262		self.state.read().await.is_tailscale()
263	}
264
265	/// Re-probe tailscale and swap modes if the picture has changed.
266	///
267	/// Intended to be called when the daemon receives a reload signal.
268	pub async fn refresh(&self) -> Result<()> {
269		if let Some(http) = probe_tailscale(&self.tailscale_url, &self.make_builder, false).await {
270			let mut state = self.state.write().await;
271			if !state.is_tailscale() {
272				debug!("canopy refresh: switching to tailscale path");
273			}
274			*state = State::Tailscale(http);
275			return Ok(());
276		}
277
278		if let Some(pem) = &self.device_key {
279			let http = build_mtls_http(&self.make_builder, &pem.0)?;
280			let mut state = self.state.write().await;
281			if state.is_tailscale() {
282				debug!("canopy refresh: tailscale dropped, falling back to mTLS");
283			}
284			*state = State::Mtls(http);
285			return Ok(());
286		}
287
288		debug!("canopy refresh: no auth path available, keeping current state");
289		Ok(())
290	}
291
292	/// Rebuild the underlying HTTP client with a fresh certificate.
293	///
294	/// No-op in tailscale mode (no cert to rotate). In mTLS mode, atomically
295	/// replaces the live client; in-flight requests continue with the old
296	/// client until they complete.
297	pub async fn renew(&self) -> Result<()> {
298		let Some(pem) = &self.device_key else {
299			return Ok(());
300		};
301		let mut state = self.state.write().await;
302		if state.is_tailscale() {
303			return Ok(());
304		}
305		*state = State::Mtls(build_mtls_http(&self.make_builder, &pem.0)?);
306		Ok(())
307	}
308
309	/// Resolve the HTTP client + URL for `path` on the current auth path.
310	///
311	/// `path` is the mTLS-mode path (e.g. `/backup-target`); over tailscale the
312	/// same endpoint is mounted under `/public`, so this prepends it.
313	async fn endpoint_url(&self, path: &str) -> Result<(reqwest::Client, Url)> {
314		let state = self.state.read().await;
315		let url = match &*state {
316			State::Tailscale(_) => self
317				.tailscale_url
318				.join(&format!("/public{path}"))
319				.into_diagnostic()
320				.wrap_err_with(|| format!("building tailscale /public{path} URL"))?,
321			State::Mtls(_) => self
322				.base_url
323				.join(path)
324				.into_diagnostic()
325				.wrap_err_with(|| format!("building {path} URL"))?,
326		};
327		Ok((state.http(), url))
328	}
329
330	/// Send a request to `path` on the current auth path, gzipping the JSON body
331	/// when there is one.
332	///
333	/// A non-success status becomes a [`CanopyHttpError`] (downcast the returned
334	/// report to inspect the status — e.g. [`TargetOutcome::from_result`]). This
335	/// is the shared core behind the generated endpoint methods.
336	async fn send_call<B: serde::Serialize + ?Sized>(
337		&self,
338		method: reqwest::Method,
339		path: &str,
340		body: Option<&B>,
341	) -> Result<reqwest::Response> {
342		let (http, url) = self.endpoint_url(path).await?;
343		debug!(%url, %method, "canopy request");
344		let mut req = http.request(method, url);
345		if let Some(body) = body {
346			let raw = serde_json::to_vec(body)
347				.into_diagnostic()
348				.wrap_err_with(|| format!("serialising canopy {path} body"))?;
349			let compressed = gzip_bytes(&raw)
350				.into_diagnostic()
351				.wrap_err_with(|| format!("gzipping canopy {path} body"))?;
352			req = req
353				.header(reqwest::header::CONTENT_TYPE, "application/json")
354				.header(reqwest::header::CONTENT_ENCODING, "gzip")
355				.body(compressed);
356		}
357
358		let response = req
359			.send()
360			.await
361			.into_diagnostic()
362			.wrap_err_with(|| format!("calling canopy {path}"))?;
363
364		let status = response.status();
365		if !status.is_success() {
366			let body = response.text().await.unwrap_or_default();
367			return Err(miette::Report::new(CanopyHttpError {
368				status,
369				path: path.to_owned(),
370				body,
371			}));
372		}
373		Ok(response)
374	}
375
376	/// Call an endpoint and parse its JSON response. Backs the generated methods.
377	pub(crate) async fn call_json<B, R>(
378		&self,
379		method: reqwest::Method,
380		path: &str,
381		body: Option<&B>,
382	) -> Result<R>
383	where
384		B: serde::Serialize + ?Sized,
385		R: serde::de::DeserializeOwned,
386	{
387		let response = self.send_call(method, path, body).await?;
388		response
389			.json::<R>()
390			.await
391			.into_diagnostic()
392			.wrap_err_with(|| format!("parsing canopy {path} response"))
393	}
394
395	/// Call an endpoint that returns no body. Backs the generated methods.
396	pub(crate) async fn call_empty<B: serde::Serialize + ?Sized>(
397		&self,
398		method: reqwest::Method,
399		path: &str,
400		body: Option<&B>,
401	) -> Result<()> {
402		self.send_call(method, path, body).await.map(drop)
403	}
404
405	/// GET a path, routed via tailscale when available, returning the raw response.
406	///
407	/// Escape hatch behind the generated endpoint methods; needs the `raw-requests`
408	/// feature. In tailscale mode the request goes to `{tailscale_url}{tailscale_path}`
409	/// (typically `/public/...`); in mTLS mode to `{base_url}{mtls_path}`.
410	#[cfg(feature = "raw-requests")]
411	pub async fn get(&self, tailscale_path: &str, mtls_path: &str) -> Result<reqwest::Response> {
412		let (http, url) = {
413			let state = self.state.read().await;
414			let url = match &*state {
415				State::Tailscale(_) => self
416					.tailscale_url
417					.join(tailscale_path)
418					.into_diagnostic()
419					.wrap_err("building tailscale GET URL")?,
420				State::Mtls(_) => self
421					.base_url
422					.join(mtls_path)
423					.into_diagnostic()
424					.wrap_err("building mTLS GET URL")?,
425			};
426			(state.http(), url)
427		};
428
429		debug!(%url, "GET via canopy");
430		http.get(url)
431			.send()
432			.await
433			.into_diagnostic()
434			.wrap_err("GET via canopy")
435	}
436
437	/// Start a request to an arbitrary canopy endpoint on the current auth path.
438	///
439	/// Escape hatch behind the generated endpoint methods; needs the `raw-requests`
440	/// feature. `path` is the mTLS-mode path; over tailscale it's routed under
441	/// `/public`, the same convention the generated methods follow.
442	#[cfg(feature = "raw-requests")]
443	pub async fn request(
444		&self,
445		method: reqwest::Method,
446		path: &str,
447	) -> Result<reqwest::RequestBuilder> {
448		let (http, url) = self.endpoint_url(path).await?;
449		debug!(%url, %method, "arbitrary canopy request");
450		Ok(http.request(method, url))
451	}
452
453	/// Call an arbitrary canopy endpoint and parse its JSON response.
454	///
455	/// Escape hatch behind the generated endpoint methods; needs the `raw-requests`
456	/// feature. Prefer a generated method where one exists. When passing no body,
457	/// pin the inference with a turbofish, e.g. `None::<&()>`. The body is gzipped,
458	/// like every canopy request.
459	#[cfg(feature = "raw-requests")]
460	pub async fn request_json<Res: serde::de::DeserializeOwned>(
461		&self,
462		method: reqwest::Method,
463		path: &str,
464		body: Option<&(impl serde::Serialize + ?Sized)>,
465	) -> Result<Res> {
466		self.call_json(method, path, body).await
467	}
468}
469
470/// Probe the tailscale canopy endpoint, returning a configured `reqwest::Client`
471/// routed to it if reachable and `None` otherwise (so callers fall back to mTLS).
472///
473/// For canopy's own tailnet endpoint the work is short-circuited and shared:
474/// 1. **Gate** — if no tailscale interface is present on this host
475///    ([`tailscale_present`]), the tailnet is unreachable by definition, so
476///    skip all network I/O and return `None` immediately.
477/// 2. **Cache** — when `use_cache` is set, a discovery from the last
478///    [`PROBE_CACHE_TTL`] is reused instead of re-probing. `refresh` passes
479///    `false` to force a fresh discovery on reload.
480/// 3. **Discovery** — the tailscale-DNS-resolved probe and the hardcoded-IP
481///    probe run *concurrently* ([`discover_tailnet`]); the first success wins.
482///
483/// `GET /public/servers` is the probe target because:
484/// - it lives under `/public/...`, the only mount that accepts tagged-device
485///   tailscale callers (everything else 403s with `tagged-device-not-allowed`);
486/// - it's a `GET` with no body, no `VersionHeader` requirement, and no auth;
487/// - it's read-only, so probing it has no side effects.
488async fn probe_tailscale(
489	tailscale_url: &Url,
490	make_builder: &ClientBuilderFactory,
491	use_cache: bool,
492) -> Option<reqwest::Client> {
493	let host = tailscale_url.host_str()?;
494
495	// The gate, cache, and hardcoded-IP discovery below are specific to canopy's
496	// own tailnet endpoint; probe any other tailscale URL with plain resolution.
497	if host != TAILSCALE_HOST {
498		return probe_once(tailscale_url, host, &[], make_builder).await;
499	}
500
501	if use_cache && let Some(outcome) = cached_outcome() {
502		debug!("canopy: reusing cached tailnet reachability");
503		return match outcome {
504			TailnetOutcome::Unreachable => None,
505			TailnetOutcome::Reachable(addrs) => build_probe_client(host, &addrs, make_builder),
506		};
507	}
508
509	let discovered = discover_tailnet(tailscale_url, host, make_builder).await;
510	store_outcome(match &discovered {
511		Some((addrs, _)) => TailnetOutcome::Reachable(addrs.clone()),
512		None => TailnetOutcome::Unreachable,
513	});
514	discovered.map(|(_, client)| client)
515}
516
517/// Discover a reachable route to the canopy tailnet endpoint, or `None`.
518///
519/// Returns the addresses that worked alongside the client built for them, so
520/// the caller can both cache the route and reuse the client without rebuilding.
521async fn discover_tailnet(
522	tailscale_url: &Url,
523	host: &str,
524	make_builder: &ClientBuilderFactory,
525) -> Option<(Vec<SocketAddr>, reqwest::Client)> {
526	if !tailscale_present() {
527		debug!("canopy: no tailscale interface on this host; skipping tailnet probe");
528		return None;
529	}
530
531	let via_dns = async {
532		let addrs = resolve_via_tailscale_dns().await;
533		if addrs.is_empty() {
534			return None;
535		}
536		probe_once(tailscale_url, host, &addrs, make_builder)
537			.await
538			.map(|client| (addrs, client))
539	};
540
541	let via_hardcoded = async {
542		let addrs = vec![
543			SocketAddr::new(IpAddr::V4(CANOPY_HARDCODED_V4), 443),
544			SocketAddr::new(IpAddr::V6(CANOPY_HARDCODED_V6), 443),
545		];
546		probe_once(tailscale_url, host, &addrs, make_builder)
547			.await
548			.map(|client| (addrs, client))
549	};
550
551	race_first_some(via_dns, via_hardcoded).await
552}
553
554/// Resolve `canopy` via the tailscale DNS server (100.100.100.100), bounded by
555/// [`DNS_LOOKUP_TIMEOUT`]. Returns an empty vec on timeout or lookup failure.
556async fn resolve_via_tailscale_dns() -> Vec<SocketAddr> {
557	match tokio::time::timeout(DNS_LOOKUP_TIMEOUT, tailscale_resolver().lookup_ip("canopy")).await {
558		Ok(Ok(addrs)) => addrs.iter().map(|ip| SocketAddr::new(ip, 443)).collect(),
559		Ok(Err(err)) => {
560			debug!("canopy tailscale DNS lookup failed: {err}");
561			Vec::new()
562		}
563		Err(_) => {
564			debug!("canopy tailscale DNS lookup timed out");
565			Vec::new()
566		}
567	}
568}
569
570/// Build the probe client for `host`, resolving it to `addrs` when non-empty
571/// (the tailnet-discovery override); otherwise plain DNS is used.
572fn build_probe_client(
573	host: &str,
574	addrs: &[SocketAddr],
575	make_builder: &ClientBuilderFactory,
576) -> Option<reqwest::Client> {
577	let mut builder = make_builder()
578		.user_agent(user_agent())
579		.timeout(TAILSCALE_PROBE_TIMEOUT);
580	if !addrs.is_empty() {
581		builder = builder.resolve_to_addrs(host, addrs);
582	}
583	builder.build().ok()
584}
585
586/// Build a client for `addrs` and confirm `GET {tailscale_url}/public/servers`
587/// responds 2xx; return the client on success, `None` on any other outcome.
588async fn probe_once(
589	tailscale_url: &Url,
590	host: &str,
591	addrs: &[SocketAddr],
592	make_builder: &ClientBuilderFactory,
593) -> Option<reqwest::Client> {
594	let client = build_probe_client(host, addrs, make_builder)?;
595	let url = tailscale_url.join("/public/servers").ok()?;
596	match client.get(url).send().await {
597		Ok(resp) if resp.status().is_success() => Some(client),
598		Ok(resp) => {
599			debug!(status = %resp.status(), ?addrs, "canopy tailscale probe: unexpected status");
600			None
601		}
602		Err(err) => {
603			debug!(?addrs, "canopy tailscale probe failed: {err}");
604			None
605		}
606	}
607}
608
609/// Await two probes concurrently, resolving to the first that yields `Some`.
610///
611/// If the first to finish yields `None`, the other is awaited to completion.
612async fn race_first_some<T>(
613	a: impl Future<Output = Option<T>>,
614	b: impl Future<Output = Option<T>>,
615) -> Option<T> {
616	use futures::future::{Either, select};
617
618	let a = std::pin::pin!(a);
619	let b = std::pin::pin!(b);
620	match select(a, b).await {
621		Either::Left((Some(v), _)) => Some(v),
622		Either::Right((Some(v), _)) => Some(v),
623		Either::Left((None, rest)) => rest.await,
624		Either::Right((None, rest)) => rest.await,
625	}
626}
627
628/// Whether any local interface holds a tailscale-assigned address.
629///
630/// Tailscale hands out IPv4 from the `100.64.0.0/10` CGNAT range and IPv6 from
631/// its `fd7a:115c:a1e0::/48` ULA prefix. When neither is present the host isn't
632/// on the tailnet, so probing canopy's tailnet endpoint can only ever time out
633/// — the check lets us skip it and go straight to mTLS. A host that reaches the
634/// tailnet purely through a subnet router (no address of its own) is treated as
635/// absent and falls back to mTLS, which still works.
636fn tailscale_present() -> bool {
637	sysinfo::Networks::new_with_refreshed_list()
638		.values()
639		.flat_map(|net| net.ip_networks())
640		.any(|net| is_tailscale_addr(&net.addr))
641}
642
643fn is_tailscale_addr(addr: &IpAddr) -> bool {
644	match addr {
645		IpAddr::V4(v4) => {
646			let o = v4.octets();
647			o[0] == 100 && (64..=127).contains(&o[1])
648		}
649		IpAddr::V6(v6) => {
650			let s = v6.segments();
651			s[0] == 0xfd7a && s[1] == 0x115c && s[2] == 0xa1e0
652		}
653	}
654}
655
656/// Outcome of a tailnet-reachability discovery, cached for [`PROBE_CACHE_TTL`].
657#[derive(Clone)]
658enum TailnetOutcome {
659	/// Reachable via these addresses (empty = plain DNS resolution worked).
660	Reachable(Vec<SocketAddr>),
661	Unreachable,
662}
663
664struct CachedProbe {
665	stored_at: Instant,
666	outcome: TailnetOutcome,
667}
668
669fn probe_cache() -> &'static Mutex<Option<CachedProbe>> {
670	static CACHE: OnceLock<Mutex<Option<CachedProbe>>> = OnceLock::new();
671	CACHE.get_or_init(|| Mutex::new(None))
672}
673
674/// The cached outcome if one was stored within the last [`PROBE_CACHE_TTL`].
675fn cached_outcome() -> Option<TailnetOutcome> {
676	let guard = probe_cache().lock().expect("canopy probe cache poisoned");
677	let entry = guard.as_ref()?;
678	(entry.stored_at.elapsed() < PROBE_CACHE_TTL).then(|| entry.outcome.clone())
679}
680
681fn store_outcome(outcome: TailnetOutcome) {
682	*probe_cache().lock().expect("canopy probe cache poisoned") = Some(CachedProbe {
683		stored_at: Instant::now(),
684		outcome,
685	});
686}
687
688fn tailscale_resolver() -> Resolver<impl ConnectionProvider> {
689	Resolver::builder_with_config(
690		ResolverConfig::from_parts(
691			None,
692			vec!["tail53aef.ts.net.".parse().unwrap()],
693			vec![NameServerConfig::new(
694				"100.100.100.100".parse().unwrap(),
695				true,
696				vec![ConnectionConfig::udp()],
697			)],
698		),
699		TokioRuntimeProvider::default(),
700	)
701	.build()
702	.expect("tailscale resolver config is hardcoded and cannot fail to build")
703}
704
705fn gzip_bytes(bytes: &[u8]) -> std::io::Result<Vec<u8>> {
706	let mut encoder = GzEncoder::new(Vec::with_capacity(bytes.len() / 2), Compression::default());
707	encoder.write_all(bytes)?;
708	encoder.finish()
709}
710
711/// Build a short-lived self-signed client certificate from a P-256 device key
712/// PEM and wrap it as a reqwest mTLS [`Identity`].
713///
714/// Canopy identifies a device by its certificate's public key (SPKI), not by a
715/// CA chain, so a fresh self-signed cert from the device key is all that's
716/// needed. The same device key drives both the long-running canopy client here
717/// and the one-shot `canopy register` enrollment handshake, so they present the
718/// same identity to canopy.
719pub fn device_identity(device_key_pem: &str) -> Result<reqwest::Identity> {
720	let key_pair = KeyPair::from_pem(device_key_pem)
721		.into_diagnostic()
722		.wrap_err("parsing device key PEM")?;
723
724	let mut params = CertificateParams::new(vec!["device.local".into()])
725		.into_diagnostic()
726		.wrap_err("building certificate params")?;
727	params.distinguished_name = DistinguishedName::new();
728	params
729		.distinguished_name
730		.push(DnType::CommonName, "device.local");
731
732	let now = OffsetDateTime::now_utc();
733	params.not_before = now - TimeDuration::minutes(1);
734	params.not_after = now + TimeDuration::days(CERT_VALIDITY_DAYS);
735
736	let cert = params
737		.self_signed(&key_pair)
738		.into_diagnostic()
739		.wrap_err("self-signing certificate")?;
740
741	let mut combined = cert.pem();
742	combined.push('\n');
743	combined.push_str(&key_pair.serialize_pem());
744
745	reqwest::Identity::from_pem(combined.as_bytes())
746		.into_diagnostic()
747		.wrap_err("building reqwest TLS identity")
748}
749
750fn build_mtls_http(
751	make_builder: &ClientBuilderFactory,
752	device_key_pem: &str,
753) -> Result<reqwest::Client> {
754	let identity = device_identity(device_key_pem)?;
755
756	make_builder()
757		.user_agent(user_agent())
758		.identity(identity)
759		.use_rustls_tls()
760		.timeout(Duration::from_secs(30))
761		.build()
762		.into_diagnostic()
763		.wrap_err("building canopy HTTP client")
764}
765
766#[cfg(test)]
767mod tests {
768	use super::*;
769
770	const TEST_DEVICE_KEY: &str = "\
771-----BEGIN PRIVATE KEY-----
772MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgVvhzsYiidp38GYn1
773KxD5Wipc/h8lglVsy1UFZq/SZbGhRANCAAT2EsEq7xjeWVnim9XwdYXga/LBbppm
774fXLgamTYOa/w9n/Ta64fiYWmN54kEd0DgnflJDLtID321Zz6xswvK/VN
775-----END PRIVATE KEY-----";
776
777	fn test_factory() -> ClientBuilderFactory {
778		Arc::new(reqwest::Client::builder)
779	}
780
781	#[test]
782	fn build_mtls_http_from_p256_key() {
783		// Direct mTLS-path build, bypassing the async constructor / tailscale probe.
784		let result = build_mtls_http(&test_factory(), TEST_DEVICE_KEY);
785		assert!(result.is_ok(), "{:?}", result.err());
786	}
787
788	#[test]
789	fn build_mtls_http_fails_on_garbage_key() {
790		assert!(build_mtls_http(&test_factory(), "not a real PEM").is_err());
791	}
792
793	#[tokio::test]
794	async fn renew_with_mtls_state_swaps_in_fresh_client() {
795		// Construct an mTLS-state client directly (no network probe) and renew it.
796		let http = build_mtls_http(&test_factory(), TEST_DEVICE_KEY).unwrap();
797		let client = CanopyClient {
798			base_url: DEFAULT_CANOPY_URL.parse().unwrap(),
799			tailscale_url: TAILSCALE_URL.parse().unwrap(),
800			device_key: Some(Redacted(TEST_DEVICE_KEY.to_owned())),
801			make_builder: test_factory(),
802			state: RwLock::new(State::Mtls(http)),
803		};
804		client.renew().await.expect("renew should succeed");
805		assert!(!client.is_tailscale().await);
806	}
807
808	#[tokio::test]
809	async fn renew_is_noop_in_tailscale_mode() {
810		// Tailscale-state client with no device key — renew is a no-op.
811		let http = reqwest::Client::new();
812		let client = CanopyClient {
813			base_url: DEFAULT_CANOPY_URL.parse().unwrap(),
814			tailscale_url: TAILSCALE_URL.parse().unwrap(),
815			device_key: None,
816			make_builder: test_factory(),
817			state: RwLock::new(State::Tailscale(http)),
818		};
819		client.renew().await.expect("renew should be a no-op");
820		assert!(client.is_tailscale().await);
821	}
822
823	fn mtls_client_against(base: &str) -> CanopyClient {
824		let http = build_mtls_http(&test_factory(), TEST_DEVICE_KEY).unwrap();
825		CanopyClient {
826			base_url: base.parse().unwrap(),
827			tailscale_url: TAILSCALE_URL.parse().unwrap(),
828			device_key: Some(Redacted(TEST_DEVICE_KEY.to_owned())),
829			make_builder: test_factory(),
830			state: RwLock::new(State::Mtls(http)),
831		}
832	}
833
834	struct Captured {
835		request_line: String,
836		headers: String,
837		body: Vec<u8>,
838	}
839
840	/// Bind a loopback socket and answer exactly one HTTP request with
841	/// `response`, capturing the received request line, headers, and body.
842	fn serve_once(response: &'static str) -> (String, std::thread::JoinHandle<Captured>) {
843		use std::io::{Read, Write};
844		use std::net::TcpListener;
845
846		let listener = TcpListener::bind("127.0.0.1:0").unwrap();
847		let base = format!("http://{}", listener.local_addr().unwrap());
848		let handle = std::thread::spawn(move || {
849			let (mut stream, _) = listener.accept().unwrap();
850			let mut buf = Vec::new();
851			let mut chunk = [0u8; 1024];
852			let header_end = loop {
853				if let Some(pos) = buf.windows(4).position(|w| w == b"\r\n\r\n") {
854					break pos + 4;
855				}
856				let n = stream.read(&mut chunk).unwrap();
857				if n == 0 {
858					panic!("connection closed before headers were complete");
859				}
860				buf.extend_from_slice(&chunk[..n]);
861			};
862
863			let head = String::from_utf8_lossy(&buf[..header_end]).into_owned();
864			let content_length = head
865				.lines()
866				.find_map(|line| {
867					let (name, value) = line.split_once(':')?;
868					name.trim()
869						.eq_ignore_ascii_case("content-length")
870						.then(|| value.trim().parse::<usize>().ok())
871						.flatten()
872				})
873				.unwrap_or(0);
874
875			let mut body = buf[header_end..].to_vec();
876			while body.len() < content_length {
877				let n = stream.read(&mut chunk).unwrap();
878				if n == 0 {
879					break;
880				}
881				body.extend_from_slice(&chunk[..n]);
882			}
883
884			stream.write_all(response.as_bytes()).unwrap();
885			stream.flush().unwrap();
886
887			let mut lines = head.lines();
888			let request_line = lines.next().unwrap_or_default().to_owned();
889			let headers = lines.collect::<Vec<_>>().join("\n");
890			Captured {
891				request_line,
892				headers,
893				body,
894			}
895		});
896		(base, handle)
897	}
898
899	#[derive(Debug, serde::Deserialize, PartialEq)]
900	struct Echo {
901		ok: bool,
902		who: String,
903	}
904
905	#[tokio::test]
906	async fn call_json_gzips_body_sets_user_agent_and_parses_response() {
907		let (base, handle) = serve_once(
908			"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 26\r\n\r\n{\"ok\":true,\"who\":\"device\"}",
909		);
910		let client = mtls_client_against(&base);
911
912		let payload = serde_json::json!({ "hello": "world" });
913		let got: Echo = client
914			.call_json(reqwest::Method::POST, "/thing", Some(&payload))
915			.await
916			.expect("call_json should succeed");
917
918		assert_eq!(
919			got,
920			Echo {
921				ok: true,
922				who: "device".into()
923			}
924		);
925
926		let captured = handle.join().unwrap();
927		assert!(
928			captured.request_line.starts_with("POST /thing "),
929			"unexpected request line: {}",
930			captured.request_line
931		);
932		let headers = captured.headers.to_ascii_lowercase();
933		assert!(
934			headers.contains("user-agent: bestool-canopy/"),
935			"missing canopy user-agent in:\n{}",
936			captured.headers
937		);
938		assert!(
939			headers.contains("content-encoding: gzip"),
940			"body should be gzipped:\n{}",
941			captured.headers
942		);
943		// The body is gzipped on the wire; decompress before comparing.
944		use flate2::read::GzDecoder;
945		use std::io::Read as _;
946		let mut decoder = GzDecoder::new(&captured.body[..]);
947		let mut raw = Vec::new();
948		decoder
949			.read_to_end(&mut raw)
950			.expect("body should be valid gzip");
951		let sent: serde_json::Value = serde_json::from_slice(&raw).unwrap();
952		assert_eq!(sent, payload);
953	}
954
955	#[tokio::test]
956	async fn call_json_errors_on_non_success_with_body() {
957		let (base, handle) =
958			serve_once("HTTP/1.1 418 I'm a teapot\r\nContent-Length: 14\r\n\r\nno coffee here");
959		let client = mtls_client_against(&base);
960
961		let err = client
962			.call_json::<(), serde_json::Value>(reqwest::Method::GET, "/brew", None::<&()>)
963			.await
964			.expect_err("non-2xx should error");
965		let msg = err.to_string();
966		assert!(msg.contains("/brew"), "expected path in error: {msg}");
967		assert!(msg.contains("418"), "expected status in error: {msg}");
968		assert!(
969			msg.contains("no coffee here"),
970			"expected body text in error: {msg}"
971		);
972
973		handle.join().unwrap();
974	}
975
976	#[test]
977	fn user_agent_identifies_the_crate_with_os_comment() {
978		let ua = user_agent();
979		assert!(
980			ua.starts_with(concat!("bestool-canopy/", env!("CARGO_PKG_VERSION"), " ")),
981			"unexpected user-agent: {ua}"
982		);
983		assert!(ua.contains('('), "expected OS comment in: {ua}");
984		assert!(ua.ends_with(')'), "expected OS comment in: {ua}");
985		assert!(
986			ua.contains(sysinfo::System::cpu_arch().as_str()),
987			"expected arch in: {ua}"
988		);
989	}
990
991	#[test]
992	fn tailscale_addr_classifies_cgnat_v4() {
993		assert!(is_tailscale_addr(&IpAddr::V4(Ipv4Addr::new(100, 64, 0, 1))));
994		assert!(is_tailscale_addr(&IpAddr::V4(Ipv4Addr::new(
995			100, 127, 255, 255
996		))));
997		assert!(is_tailscale_addr(&IpAddr::V4(CANOPY_HARDCODED_V4)));
998		// Just outside the 100.64.0.0/10 range on either side.
999		assert!(!is_tailscale_addr(&IpAddr::V4(Ipv4Addr::new(
1000			100, 63, 255, 255
1001		))));
1002		assert!(!is_tailscale_addr(&IpAddr::V4(Ipv4Addr::new(
1003			100, 128, 0, 0
1004		))));
1005		// A plain public/private v4 must not read as tailscale.
1006		assert!(!is_tailscale_addr(&IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))));
1007		assert!(!is_tailscale_addr(&IpAddr::V4(Ipv4Addr::new(100, 0, 0, 1))));
1008	}
1009
1010	#[test]
1011	fn tailscale_addr_classifies_ula_v6() {
1012		assert!(is_tailscale_addr(&IpAddr::V6(CANOPY_HARDCODED_V6)));
1013		assert!(is_tailscale_addr(&IpAddr::V6(Ipv6Addr::new(
1014			0xfd7a, 0x115c, 0xa1e0, 0, 0, 0, 0, 1
1015		))));
1016		// Different ULA prefix — not tailscale.
1017		assert!(!is_tailscale_addr(&IpAddr::V6(Ipv6Addr::new(
1018			0xfd00, 0x115c, 0xa1e0, 0, 0, 0, 0, 1
1019		))));
1020		assert!(!is_tailscale_addr(&IpAddr::V6(Ipv6Addr::LOCALHOST)));
1021	}
1022
1023	#[test]
1024	fn probe_cache_roundtrips_and_expires() {
1025		store_outcome(TailnetOutcome::Reachable(vec![SocketAddr::new(
1026			IpAddr::V4(CANOPY_HARDCODED_V4),
1027			443,
1028		)]));
1029		match cached_outcome() {
1030			Some(TailnetOutcome::Reachable(addrs)) => {
1031				assert_eq!(
1032					addrs,
1033					vec![SocketAddr::new(IpAddr::V4(CANOPY_HARDCODED_V4), 443)]
1034				);
1035			}
1036			other => panic!(
1037				"expected freshly stored Reachable, got {:?}",
1038				other.is_some()
1039			),
1040		}
1041
1042		// A stale entry (stored before the TTL window) reads as a miss.
1043		// Guard the subtraction: a freshly started process may not have enough
1044		// monotonic headroom to represent an instant a full TTL in the past.
1045		if let Some(stale) = Instant::now().checked_sub(PROBE_CACHE_TTL + Duration::from_secs(1)) {
1046			*probe_cache().lock().unwrap() = Some(CachedProbe {
1047				stored_at: stale,
1048				outcome: TailnetOutcome::Unreachable,
1049			});
1050			assert!(cached_outcome().is_none());
1051		}
1052	}
1053
1054	#[test]
1055	fn gzip_bytes_roundtrips() {
1056		use flate2::read::GzDecoder;
1057		use std::io::Read;
1058
1059		let original = br#"{"health":[{"check":"x","result":"passed"}]}"#;
1060		let compressed = gzip_bytes(original).expect("gzip should succeed");
1061		assert!(
1062			compressed.starts_with(&[0x1f, 0x8b]),
1063			"expected gzip magic bytes"
1064		);
1065		let mut decoder = GzDecoder::new(&compressed[..]);
1066		let mut decompressed = Vec::new();
1067		decoder.read_to_end(&mut decompressed).unwrap();
1068		assert_eq!(decompressed, original);
1069	}
1070}