Skip to main content

bestool_canopy/
registration.rs

1//! Single, machine-bound, encrypted store for this host's canopy enrollment.
2//!
3//! Everything the agent needs to talk to canopy — the mTLS device key, the
4//! server id, and (once enrolled) the device id and api url — lives in one
5//! encrypted file:
6//!
7//! - Linux: `/etc/bestool/canopy-registration`
8//! - Windows: `%ProgramData%\bestool\canopy-registration`
9//!
10//! Encryption reuses algae (the age/scrypt profile this workspace already uses
11//! for `protect`/`reveal` and the enrollment ticket). The local file is keyed
12//! by a passphrase derived from the host's machine id, so a cloned disk can't
13//! reuse it on a different machine and the device key isn't at rest in
14//! plaintext. The same format is used for `canopy export` blobs, keyed by an
15//! operator passphrase instead — see [`encrypt_with_passphrase`].
16//!
17//! The machine-id binding is a deliberately weak, software-only measure. Where
18//! a TPM is present it could augment this — sealing or deriving the unlock key
19//! in hardware via [`machine_passphrase`] — while hosts without one keep using
20//! the machine id, and neither the file format nor any consumer changes.
21
22#[cfg(unix)]
23use std::os::unix::fs::PermissionsExt as _;
24use std::{
25	fmt,
26	path::{Path, PathBuf},
27};
28
29use algae_cli::{
30	passphrases::Passphrase,
31	streams::{decrypt_stream, encrypt_stream},
32};
33use base64::{
34	Engine as _,
35	engine::general_purpose::{STANDARD_NO_PAD, URL_SAFE_NO_PAD},
36};
37use miette::{IntoDiagnostic as _, Result, WrapErr as _, miette};
38use serde::{Deserialize, Serialize};
39use tracing::{debug, info, warn};
40
41const VERSION: &str = "registration-1";
42
43/// Environment variable overriding the base directory for the registration
44/// file. Set by tests and honoured for ad-hoc relocation; when set, legacy
45/// migration is skipped.
46const DIR_ENV: &str = "BESTOOL_CANOPY_DIR";
47
48/// blake3 KDF context string for the machine-id-derived file passphrase. Bump
49/// the version suffix if the derivation ever changes.
50const KDF_CONTEXT: &str = "bestool canopy-registration v1 (machine-id)";
51
52/// Unix mode for the registration file. Group-readable so unprivileged runs
53/// sharing the daemon's group (e.g. `bestool tamanu doctor` run by hand) read
54/// the same registration instead of falling back to the database and
55/// rewriting the legacy `/etc/tamanu` files.
56#[cfg(unix)]
57const REG_FILE_MODE: u32 = 0o640;
58
59/// scrypt work factor (`N = 2^REG_WORK_FACTOR`) for the registration file.
60///
61/// The machine passphrase is a 256-bit blake3-derived key, so scrypt's
62/// memory-hardness adds no protection; age's default calibrates to ~1 second
63/// of scrypt, which on a fast server is a 512MiB arena — enough to blow
64/// through a service MemoryMax. 2^12 keeps the arena at 4MiB.
65const REG_WORK_FACTOR: u8 = 12;
66
67/// This host's canopy enrollment state.
68///
69/// Every field is optional so a partially-provisioned or migrated host can
70/// still be represented; `canopy register` populates all of them.
71#[derive(Clone, Serialize, Deserialize)]
72pub struct Registration {
73	pub v: String,
74	#[serde(default, skip_serializing_if = "Option::is_none")]
75	pub server_id: Option<String>,
76	#[serde(default, skip_serializing_if = "Option::is_none")]
77	pub device_key: Option<String>,
78	#[serde(default, skip_serializing_if = "Option::is_none")]
79	pub device_id: Option<String>,
80	#[serde(default, skip_serializing_if = "Option::is_none")]
81	pub api_url: Option<String>,
82}
83
84impl Default for Registration {
85	fn default() -> Self {
86		Self {
87			v: VERSION.to_owned(),
88			server_id: None,
89			device_key: None,
90			device_id: None,
91			api_url: None,
92		}
93	}
94}
95
96impl fmt::Debug for Registration {
97	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
98		f.debug_struct("Registration")
99			.field("v", &self.v)
100			.field("server_id", &self.server_id)
101			.field(
102				"device_key",
103				&self.device_key.as_ref().map(|_| "<redacted>"),
104			)
105			.field("device_id", &self.device_id)
106			.field("api_url", &self.api_url)
107			.finish()
108	}
109}
110
111/// Default base directory for the registration file (honours [`DIR_ENV`]).
112///
113/// Uses the platform convention for machine-global state: `/etc` on Linux,
114/// `%ProgramData%` on Windows.
115pub fn default_dir() -> PathBuf {
116	if let Some(dir) = std::env::var_os(DIR_ENV) {
117		return PathBuf::from(dir);
118	}
119	#[cfg(windows)]
120	{
121		let base = std::env::var_os("ProgramData").unwrap_or_else(|| r"C:\ProgramData".into());
122		PathBuf::from(base).join("bestool")
123	}
124	#[cfg(not(windows))]
125	{
126		PathBuf::from("/etc/bestool")
127	}
128}
129
130fn registration_file(dir: &Path) -> PathBuf {
131	dir.join("canopy-registration")
132}
133
134// Legacy plaintext paths, mirroring bestool-tamanu's `standard_*` paths. Kept
135// as literals here because canopy can't depend on the tamanu crate.
136fn legacy_server_id_path() -> PathBuf {
137	if cfg!(windows) {
138		PathBuf::from(r"C:\Tamanu\server-id")
139	} else {
140		PathBuf::from("/etc/tamanu/server-id")
141	}
142}
143
144fn legacy_device_key_path() -> PathBuf {
145	if cfg!(windows) {
146		PathBuf::from(r"C:\Tamanu\device-key.pem")
147	} else {
148		PathBuf::from("/etc/tamanu/device-key.pem")
149	}
150}
151
152/// Process-wide cache of a successfully loaded registration, so repeated
153/// reporting reads (e.g. the doctor tick) don't re-run scrypt each time. Only
154/// populated on a hit; a fresh enrollment is picked up on the next process
155/// start.
156static CACHE: tokio::sync::OnceCell<Registration> = tokio::sync::OnceCell::const_new();
157
158/// Load the registration from the default location.
159///
160/// If the file is absent, migrates from the legacy `/etc/tamanu` plaintext
161/// files (unless [`DIR_ENV`] is set). Returns `None` when there's nothing to
162/// load.
163pub async fn load() -> Result<Option<Registration>> {
164	if let Some(reg) = CACHE.get() {
165		return Ok(Some(reg.clone()));
166	}
167
168	let dir = default_dir();
169	let path = registration_file(&dir);
170	let reg = if path.exists() {
171		Some(read_and_decrypt(&path).await?)
172	} else if std::env::var_os(DIR_ENV).is_some() {
173		None
174	} else {
175		migrate_from_legacy(&dir).await?
176	};
177
178	if let Some(ref reg) = reg {
179		let _ = CACHE.set(reg.clone());
180	}
181	Ok(reg)
182}
183
184/// Load the registration from a specific directory, without legacy migration.
185pub async fn load_from(dir: &Path) -> Result<Option<Registration>> {
186	let path = registration_file(dir);
187	if path.exists() {
188		Ok(Some(read_and_decrypt(&path).await?))
189	} else {
190		Ok(None)
191	}
192}
193
194/// Encrypt and store the registration at the default location.
195pub async fn store(reg: &Registration) -> Result<()> {
196	store_in(&default_dir(), reg).await
197}
198
199/// Encrypt and store the registration in a specific directory.
200pub async fn store_in(dir: &Path, reg: &Registration) -> Result<()> {
201	tokio::fs::create_dir_all(dir)
202		.await
203		.into_diagnostic()
204		.wrap_err_with(|| format!("creating {}", dir.display()))?;
205	let plaintext = serde_json::to_vec(reg)
206		.into_diagnostic()
207		.wrap_err("serialising registration")?;
208	let ciphertext = encrypt_bytes(&plaintext, machine_passphrase()?)?;
209	write_atomic(&registration_file(dir), &ciphertext).await
210}
211
212/// Encrypt a registration under an operator passphrase, for `canopy export`.
213pub fn encrypt_with_passphrase(reg: &Registration, passphrase: Passphrase) -> Result<Vec<u8>> {
214	let plaintext = serde_json::to_vec(reg)
215		.into_diagnostic()
216		.wrap_err("serialising registration")?;
217	encrypt_bytes(&plaintext, passphrase)
218}
219
220/// Generate a fresh random passphrase for `canopy export`.
221///
222/// ~128 bits from a URL-safe base64 of 16 random bytes — enough entropy to make
223/// brute force infeasible, with no wordlist to bloat the binary.
224pub fn generate_passphrase() -> Result<String> {
225	let mut bytes = [0u8; 16];
226	getrandom::fill(&mut bytes).map_err(|e| miette!("generating passphrase: {e}"))?;
227	Ok(URL_SAFE_NO_PAD.encode(bytes))
228}
229
230/// Decrypt a registration from an operator passphrase, for `canopy import`.
231pub fn decrypt_with_passphrase(bytes: &[u8], passphrase: Passphrase) -> Result<Registration> {
232	let plaintext = decrypt_bytes(bytes, passphrase)?;
233	serde_json::from_slice(&plaintext)
234		.into_diagnostic()
235		.wrap_err("parsing registration")
236}
237
238async fn read_and_decrypt(path: &Path) -> Result<Registration> {
239	// Repair the mode of files written before group read was granted.
240	// Best-effort: only the owner can chmod, and unprivileged readers that get
241	// this far don't need to.
242	#[cfg(unix)]
243	if let Ok(meta) = tokio::fs::metadata(path).await
244		&& meta.permissions().mode() & 0o777 != REG_FILE_MODE
245	{
246		let _ =
247			tokio::fs::set_permissions(path, std::fs::Permissions::from_mode(REG_FILE_MODE)).await;
248	}
249
250	let bytes = tokio::fs::read(path)
251		.await
252		.into_diagnostic()
253		.wrap_err_with(|| format!("reading {}", path.display()))?;
254	let plaintext = decrypt_bytes(&bytes, machine_passphrase()?)
255		.wrap_err("decrypting registration (was this disk cloned from another machine?)")?;
256
257	// Files written before the work factor was fixed used age's calibrated
258	// default, which costs hundreds of MiB to decrypt on every load. Re-encrypt
259	// once with the cheap factor. Best-effort: unprivileged readers can't write
260	// here, and the owner will on its next load.
261	if scrypt_work_factor(&bytes).is_some_and(|log_n| log_n > REG_WORK_FACTOR) {
262		match encrypt_bytes(&plaintext, machine_passphrase()?) {
263			Ok(cheap) => match write_atomic(path, &cheap).await {
264				Ok(()) => {
265					info!(path = %path.display(), "re-encrypted registration with cheap work factor")
266				}
267				Err(err) => debug!(%err, "could not rewrite registration with cheap work factor"),
268			},
269			Err(err) => debug!(%err, "could not re-encrypt registration with cheap work factor"),
270		}
271	}
272
273	serde_json::from_slice(&plaintext)
274		.into_diagnostic()
275		.wrap_err("parsing registration")
276}
277
278/// Extract the scrypt work factor (log_n) from an age file header.
279///
280/// The header is ASCII text even in the binary format: a version line, then
281/// `-> scrypt <salt> <log_n>` for passphrase-encrypted files.
282fn scrypt_work_factor(ciphertext: &[u8]) -> Option<u8> {
283	ciphertext
284		.split(|&b| b == b'\n')
285		.take(2)
286		.filter_map(|line| std::str::from_utf8(line).ok())
287		.find_map(|line| line.strip_prefix("-> scrypt "))
288		.and_then(|rest| rest.split_ascii_whitespace().nth(1))
289		.and_then(|n| n.parse().ok())
290}
291
292async fn migrate_from_legacy(dir: &Path) -> Result<Option<Registration>> {
293	let sid_path = legacy_server_id_path();
294	let key_path = legacy_device_key_path();
295	let server_id = read_trimmed(&sid_path);
296	let device_key = std::fs::read_to_string(&key_path)
297		.ok()
298		.filter(|s| !s.trim().is_empty());
299
300	if server_id.is_none() && device_key.is_none() {
301		return Ok(None);
302	}
303
304	let reg = Registration {
305		server_id,
306		device_key,
307		..Registration::default()
308	};
309	info!("migrating canopy registration from legacy /etc/tamanu files");
310
311	// Write the consolidated file, then prove it reads back from scratch before
312	// removing the only other copy of the device key. Any failure leaves the
313	// legacy files in place so the next run retries.
314	if let Err(err) = store_in(dir, &reg).await {
315		warn!(%err, "could not write consolidated registration; keeping legacy files");
316		return Ok(Some(reg));
317	}
318	match load_from(dir).await {
319		Ok(Some(roundtrip))
320			if roundtrip.server_id == reg.server_id && roundtrip.device_key == reg.device_key =>
321		{
322			delete_legacy(&sid_path, &key_path);
323		}
324		Ok(_) => warn!("registration did not round-trip; keeping legacy files"),
325		Err(err) => warn!(%err, "could not verify written registration; keeping legacy files"),
326	}
327
328	Ok(Some(reg))
329}
330
331fn delete_legacy(sid_path: &Path, key_path: &Path) {
332	for path in [sid_path, key_path] {
333		match std::fs::remove_file(path) {
334			Ok(()) => debug!(path = %path.display(), "removed migrated legacy file"),
335			Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
336			Err(err) => warn!(path = %path.display(), %err, "could not remove legacy file"),
337		}
338	}
339}
340
341fn read_trimmed(path: &Path) -> Option<String> {
342	std::fs::read_to_string(path)
343		.ok()
344		.map(|s| s.trim().to_owned())
345		.filter(|s| !s.is_empty())
346}
347
348/// Build the passphrase that unlocks the local registration file from the
349/// host's machine id, read via the `machine-uid` crate (machine-id on Linux,
350/// MachineGuid on Windows, IOPlatformUUID on macOS). A TPM, where one is
351/// present, could augment this by sealing the key in hardware; hosts without a
352/// TPM keep using the machine id.
353fn machine_passphrase() -> Result<Passphrase> {
354	let id =
355		machine_uid::get().map_err(|err| miette!("could not read the host machine id: {err}"))?;
356	Ok(Passphrase::with_work_factor(
357		derive_passphrase(&id).into(),
358		REG_WORK_FACTOR,
359	))
360}
361
362fn derive_passphrase(machine_id: &str) -> String {
363	let key = blake3::derive_key(KDF_CONTEXT, machine_id.as_bytes());
364	STANDARD_NO_PAD.encode(key)
365}
366
367// algae's stream API takes `Box<dyn Identity>` (not `Send`), which would poison
368// the `Send` futures the reporting path requires. The payload is tiny and fully
369// in-memory (no tokio reactor needed), so we drive algae to completion on the
370// current thread with `block_on` inside a synchronous helper — nothing
371// non-`Send` is then held across an `.await` in the async callers.
372fn encrypt_bytes(plaintext: &[u8], passphrase: Passphrase) -> Result<Vec<u8>> {
373	futures::executor::block_on(async {
374		let mut out = futures::io::Cursor::new(Vec::new());
375		encrypt_stream(plaintext, &mut out, Box::new(passphrase))
376			.await
377			.wrap_err("encrypting registration")?;
378		Ok(out.into_inner())
379	})
380}
381
382fn decrypt_bytes(ciphertext: &[u8], passphrase: Passphrase) -> Result<Vec<u8>> {
383	futures::executor::block_on(async {
384		let reader = futures::io::Cursor::new(ciphertext.to_vec());
385		let mut out: Vec<u8> = Vec::new();
386		decrypt_stream(reader, &mut out, Box::new(passphrase))
387			.await
388			.wrap_err("decrypting registration")?;
389		Ok(out)
390	})
391}
392
393async fn write_atomic(path: &Path, bytes: &[u8]) -> Result<()> {
394	let tmp = path.with_extension("tmp");
395	let mut opts = tokio::fs::OpenOptions::new();
396	opts.write(true).create(true).truncate(true);
397	#[cfg(windows)]
398	{
399		const FILE_ATTRIBUTE_HIDDEN: u32 = 0x0000_0002;
400		opts.attributes(FILE_ATTRIBUTE_HIDDEN);
401	}
402	#[cfg(unix)]
403	{
404		opts.mode(REG_FILE_MODE);
405	}
406	let mut f = opts
407		.open(&tmp)
408		.await
409		.into_diagnostic()
410		.wrap_err_with(|| format!("creating {}", tmp.display()))?;
411	use tokio::io::AsyncWriteExt as _;
412	f.write_all(bytes).await.into_diagnostic()?;
413	f.sync_all().await.into_diagnostic()?;
414	drop(f);
415
416	// `mode()` only applies on creation and is filtered by the umask, so set
417	// the permissions explicitly to cover pre-existing tmp files and
418	// restrictive service umasks.
419	#[cfg(unix)]
420	tokio::fs::set_permissions(&tmp, std::fs::Permissions::from_mode(REG_FILE_MODE))
421		.await
422		.into_diagnostic()
423		.wrap_err_with(|| format!("setting permissions on {}", tmp.display()))?;
424
425	tokio::fs::rename(&tmp, path)
426		.await
427		.into_diagnostic()
428		.wrap_err_with(|| format!("renaming into {}", path.display()))
429}
430
431#[cfg(test)]
432mod tests {
433	use super::*;
434
435	fn passphrase(s: &str) -> Passphrase {
436		Passphrase::new(s.to_owned().into())
437	}
438
439	fn sample() -> Registration {
440		Registration {
441			server_id: Some("7deb2793-0425-427e-8a19-7213946fa9be".into()),
442			device_key: Some(
443				"-----BEGIN PRIVATE KEY-----\nMIG...\n-----END PRIVATE KEY-----\n".into(),
444			),
445			device_id: Some("11111111-2222-3333-4444-555555555555".into()),
446			api_url: Some("https://canopy.example/".into()),
447			..Registration::default()
448		}
449	}
450
451	#[test]
452	fn debug_redacts_device_key() {
453		let dbg = format!("{:?}", sample());
454		assert!(dbg.contains("<redacted>"), "{dbg}");
455		assert!(!dbg.contains("BEGIN PRIVATE KEY"), "{dbg}");
456	}
457
458	#[test]
459	fn passphrase_roundtrip() {
460		let reg = sample();
461		let blob = encrypt_with_passphrase(&reg, passphrase("a-test-passphrase")).unwrap();
462		let back = decrypt_with_passphrase(&blob, passphrase("a-test-passphrase")).unwrap();
463		assert_eq!(back.server_id, reg.server_id);
464		assert_eq!(back.device_key, reg.device_key);
465		assert_eq!(back.device_id, reg.device_id);
466		assert_eq!(back.api_url, reg.api_url);
467	}
468
469	#[test]
470	fn passphrase_decrypt_rejects_wrong_passphrase() {
471		let blob = encrypt_with_passphrase(&sample(), passphrase("right-passphrase")).unwrap();
472		assert!(decrypt_with_passphrase(&blob, passphrase("wrong-passphrase")).is_err());
473	}
474
475	#[test]
476	fn derive_passphrase_is_stable_and_machine_specific() {
477		assert_eq!(
478			derive_passphrase("machine-aaaa"),
479			derive_passphrase("machine-aaaa")
480		);
481		assert_ne!(
482			derive_passphrase("machine-aaaa"),
483			derive_passphrase("machine-bbbb")
484		);
485	}
486
487	#[tokio::test]
488	async fn store_and_load_from_dir_roundtrip() {
489		let dir = tempfile::tempdir().unwrap();
490		assert!(load_from(dir.path()).await.unwrap().is_none());
491
492		let reg = sample();
493		store_in(dir.path(), &reg).await.unwrap();
494
495		let back = load_from(dir.path()).await.unwrap().unwrap();
496		assert_eq!(back.server_id, reg.server_id);
497		assert_eq!(back.device_key, reg.device_key);
498
499		// File must not contain the plaintext key.
500		let raw = std::fs::read(registration_file(dir.path())).unwrap();
501		assert!(
502			!raw.windows(b"PRIVATE KEY".len())
503				.any(|w| w == b"PRIVATE KEY"),
504			"registration file should be encrypted"
505		);
506	}
507
508	#[tokio::test]
509	async fn store_uses_cheap_work_factor() {
510		let dir = tempfile::tempdir().unwrap();
511		store_in(dir.path(), &sample()).await.unwrap();
512
513		let raw = std::fs::read(registration_file(dir.path())).unwrap();
514		assert_eq!(scrypt_work_factor(&raw), Some(REG_WORK_FACTOR));
515	}
516
517	#[tokio::test]
518	async fn load_reencrypts_expensive_files() {
519		let dir = tempfile::tempdir().unwrap();
520		let path = registration_file(dir.path());
521		let reg = sample();
522
523		// Simulate a file written before the work factor was fixed (one notch
524		// up, to keep the test fast).
525		let machine_id = machine_uid::get().unwrap();
526		let expensive = Passphrase::with_work_factor(
527			derive_passphrase(&machine_id).into(),
528			REG_WORK_FACTOR + 1,
529		);
530		let blob = encrypt_bytes(&serde_json::to_vec(&reg).unwrap(), expensive).unwrap();
531		write_atomic(&path, &blob).await.unwrap();
532		assert_eq!(scrypt_work_factor(&blob), Some(REG_WORK_FACTOR + 1));
533
534		let back = load_from(dir.path()).await.unwrap().unwrap();
535		assert_eq!(back.server_id, reg.server_id);
536		assert_eq!(back.device_key, reg.device_key);
537
538		let raw = std::fs::read(&path).unwrap();
539		assert_eq!(scrypt_work_factor(&raw), Some(REG_WORK_FACTOR));
540		let again = load_from(dir.path()).await.unwrap().unwrap();
541		assert_eq!(again.server_id, reg.server_id);
542	}
543
544	#[cfg(unix)]
545	#[tokio::test]
546	async fn store_writes_group_readable_file() {
547		let dir = tempfile::tempdir().unwrap();
548		store_in(dir.path(), &sample()).await.unwrap();
549
550		let mode = std::fs::metadata(registration_file(dir.path()))
551			.unwrap()
552			.permissions()
553			.mode() & 0o777;
554		assert_eq!(
555			mode, REG_FILE_MODE,
556			"expected {REG_FILE_MODE:o}, got {mode:o}"
557		);
558	}
559
560	#[cfg(unix)]
561	#[tokio::test]
562	async fn load_repairs_mode_of_old_files() {
563		let dir = tempfile::tempdir().unwrap();
564		store_in(dir.path(), &sample()).await.unwrap();
565
566		let path = registration_file(dir.path());
567		std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o600)).unwrap();
568
569		load_from(dir.path()).await.unwrap().unwrap();
570		let mode = std::fs::metadata(&path).unwrap().permissions().mode() & 0o777;
571		assert_eq!(
572			mode, REG_FILE_MODE,
573			"expected {REG_FILE_MODE:o}, got {mode:o}"
574		);
575	}
576}