Skip to main content

cloudillo_core/
acme.rs

1// SPDX-FileCopyrightText: Szilárd Hajba
2// SPDX-License-Identifier: LGPL-3.0-or-later
3
4//! ACME subsystem. Handles automatic certificate management using Let's Encrypt.
5
6use axum::extract::State;
7use axum::http::header::HeaderMap;
8use instant_acme::{self as acme, Account};
9use rustls::crypto::CryptoProvider;
10use rustls::sign::CertifiedKey;
11use rustls_pki_types::{CertificateDer, PrivateKeyDer, pem::PemObject};
12use std::sync::Arc;
13use x509_parser::parse_x509_certificate;
14
15use crate::dns::{DnsResolver, create_recursive_resolver, validate_domain_address};
16use crate::prelude::*;
17use crate::scheduler::{Task, TaskId};
18use crate::{ScheduleEmailFn, ScheduleEmailParams};
19use cloudillo_types::auth_adapter::{self, TenantCertRenewalRow};
20
21use async_trait::async_trait;
22use serde::{Deserialize, Serialize};
23
24#[derive(Debug)]
25struct X509CertData {
26	private_key_pem: Box<str>,
27	certificate_pem: Box<str>,
28	expires_at: Timestamp,
29}
30
31/// Vars-table key for the persisted ACME account credentials. Stored under
32/// `TnId(0)` (global), matching the convention used for other server-wide
33/// secrets like `0:jwt_secret`.
34const ACME_ACCOUNT_VAR: &str = "acme_account";
35
36/// Load the persisted ACME account, or create a new one and persist its
37/// credentials on first use. Without persistence we'd hit Let's Encrypt's
38/// per-IP account-creation rate limit on every renewal cycle and leak the
39/// account key into the log on every call.
40async fn get_or_create_acme_account(state: &App, acme_email: &str) -> ClResult<Account> {
41	match state.auth_adapter.read_var(TnId(0), ACME_ACCOUNT_VAR).await {
42		Ok(json) => {
43			let credentials: acme::AccountCredentials = serde_json::from_str(&json)
44				.map_err(|_| Error::Internal("corrupt ACME credentials in vars".into()))?;
45			Ok(Account::builder()?.from_credentials(credentials).await?)
46		}
47		Err(Error::NotFound) => {
48			info!("Creating new ACME account for {}", acme_email);
49			let contact = format!("mailto:{}", acme_email);
50			let (account, credentials) = Account::builder()?
51				.create(
52					&acme::NewAccount {
53						contact: &[&contact],
54						terms_of_service_agreed: true,
55						only_return_existing: false,
56					},
57					acme::LetsEncrypt::Production.url().to_owned(),
58					None,
59				)
60				.await?;
61			let json = serde_json::to_string(&credentials)?;
62			state.auth_adapter.update_var(TnId(0), ACME_ACCOUNT_VAR, &json).await?;
63			Ok(account)
64		}
65		Err(e) => Err(e),
66	}
67}
68
69pub async fn init(
70	state: App,
71	acme_email: &str,
72	id_tag: &str,
73	app_domain: Option<&str>,
74) -> ClResult<()> {
75	info!("ACME init {}", acme_email);
76	let account = get_or_create_acme_account(&state, acme_email).await?;
77
78	// Look up the actual tenant ID instead of hardcoding to 1
79	let tn_id = state.auth_adapter.read_tn_id(id_tag).await?;
80	renew_tenant(state, &account, id_tag, tn_id.0, app_domain).await?;
81
82	Ok(())
83}
84
85pub async fn renew_tenant<'a>(
86	state: App,
87	account: &'a acme::Account,
88	id_tag: &'a str,
89	tn_id: u32,
90	app_domain: Option<&'a str>,
91) -> ClResult<()> {
92	let mut domains: Vec<String> = vec!["cl-o.".to_string() + id_tag];
93	if let Some(app_domain) = app_domain {
94		domains.push(app_domain.to_string());
95	} else {
96		info!("cloudillo app domain: {}", &id_tag);
97		domains.push(id_tag.into());
98	}
99
100	let cert = renew_domains(&state, account, domains).await?;
101	info!("ACME cert {}", &cert.expires_at);
102	state
103		.auth_adapter
104		.create_cert(&auth_adapter::CertData {
105			tn_id: TnId(tn_id),
106			id_tag: id_tag.into(),
107			domain: app_domain.unwrap_or(id_tag).into(),
108			key: cert.private_key_pem,
109			cert: cert.certificate_pem,
110			expires_at: cert.expires_at,
111			last_renewal_attempt_at: None,
112			last_renewal_error: None,
113			failure_count: 0,
114			notified_at: None,
115		})
116		.await?;
117
118	Ok(())
119}
120
121async fn renew_domains<'a>(
122	state: &'a App,
123	account: &'a acme::Account,
124	domains: Vec<String>,
125) -> ClResult<X509CertData> {
126	// Track every identifier we actually inserted into acme_challenge_map so
127	// we can remove the exact same keys on cleanup. The ACME server is free
128	// to normalize identifiers (case, trailing dots) and using the input
129	// `domains` list for removal could miss them.
130	let mut inserted_identifiers: Vec<Box<str>> = Vec::new();
131	let result = renew_domains_inner(state, account, &domains, &mut inserted_identifiers).await;
132
133	// Always clean up challenges, on both success and failure paths.
134	if let Ok(mut map) = state.acme_challenge_map.write() {
135		for ident in &inserted_identifiers {
136			map.remove(ident.as_ref());
137		}
138	} else {
139		warn!("ACME: failed to access challenge map for cleanup");
140	}
141
142	result
143}
144
145async fn renew_domains_inner<'a>(
146	state: &'a App,
147	account: &'a acme::Account,
148	domains: &'a [String],
149	inserted_identifiers: &'a mut Vec<Box<str>>,
150) -> ClResult<X509CertData> {
151	info!("ACME {:?}", domains);
152	let identifiers = domains
153		.iter()
154		.map(|domain| acme::Identifier::Dns(domain.clone()))
155		.collect::<Vec<_>>();
156
157	let mut order = account.new_order(&acme::NewOrder::new(identifiers.as_slice())).await?;
158
159	debug!("ACME order {:#?}", order.state());
160
161	let initial_status = order.state().status;
162	// `Pending` is the normal first-time path. `Ready` can happen when LE has
163	// already validated authorizations on a recent retry — finalize directly.
164	// Anything else (Valid/Invalid/Processing) is unexpected and should fail.
165	match initial_status {
166		acme::OrderStatus::Pending => {
167			let mut authorizations = order.authorizations();
168			while let Some(result) = authorizations.next().await {
169				let mut authz = result?;
170				match authz.status {
171					acme::AuthorizationStatus::Pending => {}
172					acme::AuthorizationStatus::Valid => continue,
173					status => {
174						// Log unexpected status and continue - may be Deactivated, Expired, or Revoked
175						warn!("Unexpected ACME authorization status: {:?}", status);
176						continue;
177					}
178				}
179
180				let mut challenge = authz
181					.challenge(acme::ChallengeType::Http01)
182					.ok_or(acme::Error::Str("no challenge"))?;
183				let identifier: Box<str> = challenge.identifier().to_string().into_boxed_str();
184				let token: Box<str> = challenge.key_authorization().as_str().into();
185				debug!("ACME challenge {} {}", identifier, token);
186				state
187					.acme_challenge_map
188					.write()
189					.map_err(|_| {
190						Error::ServiceUnavailable("failed to access ACME challenge map".into())
191					})?
192					.insert(identifier.clone(), token);
193				inserted_identifiers.push(identifier);
194
195				challenge.set_ready().await?;
196			}
197
198			info!("Start polling...");
199			// Create a more patient retry policy for Let's Encrypt validation
200			// Initial delay: 1s, backoff: 1.5x, timeout: 90s
201			// This gives LE plenty of time to validate multiple domains
202			let retry_policy = acme::RetryPolicy::new()
203				.initial_delay(std::time::Duration::from_secs(1))
204				.backoff(1.5)
205				.timeout(std::time::Duration::from_secs(90));
206
207			let status = order.poll_ready(&retry_policy).await?;
208
209			if status != acme::OrderStatus::Ready {
210				// Fetch authorization details to see validation errors
211				let mut authorizations = order.authorizations();
212				while let Some(result) = authorizations.next().await {
213					if let Ok(authz) = result {
214						for challenge in &authz.challenges {
215							if challenge.r#type == acme::ChallengeType::Http01
216								&& let Some(ref err) = challenge.error
217							{
218								warn!(
219									"ACME validation failed for {}: {}",
220									authz.identifier(),
221									err.detail.as_deref().unwrap_or("unknown error")
222								);
223							}
224						}
225					}
226				}
227				Err(acme::Error::Str("order not ready"))?;
228			}
229		}
230		acme::OrderStatus::Ready => {
231			info!("ACME order already Ready - skipping authorization phase");
232		}
233		other => {
234			warn!("Unexpected ACME order status on creation: {:?}", other);
235			return Err(Error::ConfigError("ACME initialization failed".into()));
236		}
237	}
238
239	let retry_policy = acme::RetryPolicy::new()
240		.initial_delay(std::time::Duration::from_secs(1))
241		.backoff(1.5)
242		.timeout(std::time::Duration::from_secs(90));
243
244	info!("Finalizing...");
245	let private_key_pem = order.finalize().await?;
246	let cert_chain_pem = order.poll_certificate(&retry_policy).await?;
247	info!("Got cert.");
248
249	let pem = &pem::parse(&cert_chain_pem)?;
250	let cert_der = pem.contents();
251	let (_, parsed_cert) = parse_x509_certificate(cert_der)?;
252	let not_after = parsed_cert.validity().not_after;
253
254	let certified_key = Arc::new(CertifiedKey::from_der(
255		CertificateDer::pem_slice_iter(cert_chain_pem.as_bytes())
256			.filter_map(Result::ok)
257			.collect(),
258		PrivateKeyDer::from_pem_slice(private_key_pem.as_bytes())?,
259		CryptoProvider::get_default().ok_or(acme::Error::Str("no crypto provider"))?,
260	)?);
261	for domain in domains {
262		state
263			.certs
264			.write()
265			.map_err(|_| Error::ServiceUnavailable("failed to access cert cache".into()))?
266			.insert(domain.clone().into_boxed_str(), certified_key.clone());
267	}
268
269	let cert_data = X509CertData {
270		private_key_pem: private_key_pem.into_boxed_str(),
271		certificate_pem: cert_chain_pem.into_boxed_str(),
272		expires_at: Timestamp(not_after.timestamp()),
273	};
274
275	Ok(cert_data)
276}
277
278pub async fn get_acme_challenge(
279	State(state): State<App>,
280	headers: HeaderMap,
281) -> ClResult<Box<str>> {
282	let domain = headers
283		.get("host")
284		.ok_or(Error::ValidationError("missing host header".into()))?
285		.to_str()?;
286	info!("ACME challenge for domain {:?}", domain);
287
288	if let Some(token) = state
289		.acme_challenge_map
290		.read()
291		.map_err(|_| Error::ServiceUnavailable("failed to access ACME challenge map".into()))?
292		.get(domain)
293	{
294		debug!("ACME challenge served for {}", domain);
295		Ok(token.clone())
296	} else {
297		debug!("ACME challenge not found for {}", domain);
298		Err(Error::PermissionDenied)
299	}
300}
301
302/// Renew the TLS certificate for a single proxy site via ACME.
303///
304/// Loads the persisted ACME account (creating it on first use), generates the
305/// certificate, stores it in the auth adapter, and invalidates the cert cache.
306/// Called inline from proxy site creation and manual renewal endpoints, as
307/// well as from the periodic `CertRenewalTask`.
308pub async fn renew_proxy_site_cert(
309	app: &App,
310	acme_email: &str,
311	site_id: i64,
312	domain: &str,
313) -> ClResult<()> {
314	let account = get_or_create_acme_account(app, acme_email).await?;
315
316	let domains = vec![domain.to_string()];
317	let cert = renew_domains(app, &account, domains).await?;
318
319	app.auth_adapter
320		.update_proxy_site_cert(
321			site_id,
322			&cert.certificate_pem,
323			&cert.private_key_pem,
324			cert.expires_at,
325		)
326		.await?;
327
328	// Note: renew_domains() already inserts the fresh cert into app.certs cache,
329	// so no cache invalidation needed here.
330
331	info!(domain = %domain, "Proxy site certificate renewed successfully");
332	Ok(())
333}
334
335// Certificate Renewal Task
336// ========================
337
338/// Certificate renewal task
339///
340/// Checks all tenants for missing or expiring certificates and renews them.
341/// Scheduled to run hourly via cron: "0 * * * *"
342#[derive(Clone, Debug, Serialize, Deserialize)]
343pub struct CertRenewalTask {
344	/// Number of days before expiration to trigger renewal (default: 30)
345	pub renewal_days: u32,
346	/// ACME email for account creation
347	pub acme_email: String,
348}
349
350impl CertRenewalTask {
351	/// Create new certificate renewal task
352	pub fn new(acme_email: String, renewal_days: u32) -> Self {
353		Self { renewal_days, acme_email }
354	}
355}
356
357#[async_trait]
358impl Task<App> for CertRenewalTask {
359	fn kind() -> &'static str {
360		"acme.cert_renewal"
361	}
362
363	fn kind_of(&self) -> &'static str {
364		Self::kind()
365	}
366
367	fn build(_id: TaskId, context: &str) -> ClResult<Arc<dyn Task<App>>> {
368		let task: CertRenewalTask = serde_json::from_str(context).map_err(|e| {
369			Error::ValidationError(format!("Failed to deserialize cert renewal task: {}", e))
370		})?;
371		Ok(Arc::new(task))
372	}
373
374	fn serialize(&self) -> String {
375		// Cannot fail: only String and u32 fields, no custom Serialize impl.
376		// Fallback to "null" so build() fails loudly rather than creating a
377		// corrupt task with default values.
378		serde_json::to_string(self).unwrap_or_else(|_| "null".to_string())
379	}
380
381	async fn run(&self, app: &App) -> ClResult<()> {
382		info!("Running certificate renewal check (renewal threshold: {} days)", self.renewal_days);
383
384		let tenants = app.auth_adapter.list_tenants_needing_cert_renewal(self.renewal_days).await?;
385		let proxy_sites = app
386			.auth_adapter
387			.list_proxy_sites_needing_cert_renewal(self.renewal_days)
388			.await?;
389
390		if tenants.is_empty() && proxy_sites.is_empty() {
391			info!("All certificates are valid");
392			return Ok(());
393		}
394
395		// Single resolver for the whole batch — same pattern as register.rs
396		let resolver = match create_recursive_resolver() {
397			Ok(r) => r,
398			Err(e) => {
399				error!(error = %e, "Cannot create DNS resolver; skipping renewal run");
400				return Ok(());
401			}
402		};
403
404		if !tenants.is_empty() {
405			info!("Found {} tenant(s) needing certificate renewal", tenants.len());
406			for row in tenants {
407				let app_domain: Option<&str> = None; // No custom domain support yet
408				let domains = build_domains_for_tenant(&row.id_tag, app_domain);
409
410				match check_domains_dns(&domains, &app.opts.local_address, &resolver).await {
411					Ok(()) => {}
412					Err(PreCheckError::Definitive(reason)) => {
413						warn!(
414							tn_id = %row.tn_id.0,
415							id_tag = %row.id_tag,
416							reason = %reason,
417							"Skipping ACME renewal: DNS pre-check failed"
418						);
419						handle_renewal_failure(app, &row, &reason).await;
420						continue;
421					}
422					Err(PreCheckError::Transient(reason)) => {
423						warn!(
424							tn_id = %row.tn_id.0,
425							id_tag = %row.id_tag,
426							reason = %reason,
427							"Skipping ACME renewal this run: transient DNS resolver error \
428							 (not counted as failure)"
429						);
430						continue;
431					}
432				}
433
434				info!("Renewing certificate for tenant: {} (tn_id={})", row.id_tag, row.tn_id.0);
435				match init(app.clone(), &self.acme_email, &row.id_tag, app_domain).await {
436					Ok(()) => {
437						info!(tn_id = %row.tn_id.0, id_tag = %row.id_tag,
438							"Certificate renewed successfully");
439						handle_renewal_success(app, &row, false).await;
440					}
441					Err(e) => {
442						let reason = format!("acme: {}", e);
443						error!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %reason,
444							"Failed to renew certificate");
445						handle_renewal_failure(app, &row, &reason).await;
446					}
447				}
448			}
449		}
450
451		if !proxy_sites.is_empty() {
452			info!("Found {} proxy site(s) needing certificate renewal", proxy_sites.len());
453
454			for site in proxy_sites {
455				let domains: Vec<String> = vec![site.domain.to_string()];
456				match check_domains_dns(&domains, &app.opts.local_address, &resolver).await {
457					Ok(()) => {}
458					Err(PreCheckError::Definitive(reason)) => {
459						warn!(
460							domain = %site.domain,
461							reason = %reason,
462							"Skipping ACME renewal for proxy site: DNS pre-check failed"
463						);
464						continue;
465					}
466					Err(PreCheckError::Transient(reason)) => {
467						warn!(
468							domain = %site.domain,
469							reason = %reason,
470							"Skipping ACME renewal for proxy site this run: transient DNS \
471							 resolver error"
472						);
473						continue;
474					}
475				}
476
477				info!(
478					"Renewing certificate for proxy site: {} (site_id={})",
479					site.domain, site.site_id
480				);
481
482				if let Err(e) =
483					renew_proxy_site_cert(app, &self.acme_email, site.site_id, &site.domain).await
484				{
485					error!(
486						domain = %site.domain,
487						error = %e,
488						"Failed to renew proxy site certificate"
489					);
490				}
491			}
492		}
493
494		info!("Certificate renewal check completed");
495		Ok(())
496	}
497}
498
499/// One-shot retry of `acme::init` for a single tenant, scheduled by bootstrap
500/// when the at-registration ACME attempt fails. Coordinated with the daily
501/// `CertRenewalTask` only by the in-task `read_cert_by_tn_id` short-circuit:
502/// each retry checks first whether a cert was installed since it was queued,
503/// and if so exits without contacting the ACME directory.
504///
505/// Three of these are typically queued at 2 / 5 / 15-minute delays. They
506/// survive process restart (unlike the previous `tokio::spawn` approach), and
507/// the per-key dedup in the scheduler stops repeated bootstrap-failure events
508/// from stacking duplicate retries on top of each other.
509#[derive(Clone, Debug, Serialize, Deserialize)]
510pub struct AcmeEarlyRetryTask {
511	pub tn_id: TnId,
512	pub acme_email: String,
513	pub id_tag: String,
514	pub app_domain: Option<String>,
515}
516
517#[async_trait]
518impl Task<App> for AcmeEarlyRetryTask {
519	fn kind() -> &'static str {
520		"acme.early_retry"
521	}
522
523	fn kind_of(&self) -> &'static str {
524		Self::kind()
525	}
526
527	fn build(_id: TaskId, context: &str) -> ClResult<Arc<dyn Task<App>>> {
528		let task: AcmeEarlyRetryTask = serde_json::from_str(context).map_err(|e| {
529			Error::ValidationError(format!("Failed to deserialize early retry task: {}", e))
530		})?;
531		Ok(Arc::new(task))
532	}
533
534	fn serialize(&self) -> String {
535		// Same as CertRenewalTask::serialize — no fallible field types.
536		serde_json::to_string(self).unwrap_or_else(|_| "null".to_string())
537	}
538
539	async fn run(&self, app: &App) -> ClResult<()> {
540		// `read_cert_by_tn_id` filters `cert IS NOT NULL AND key IS NOT NULL`,
541		// so Ok(_) ⇒ cert installed (by an earlier retry or the daily renewal
542		// task) and we should stop. Err(NotFound) ⇒ proceed.
543		if app.auth_adapter.read_cert_by_tn_id(self.tn_id).await.is_ok() {
544			info!(id_tag = %self.id_tag,
545				"ACME early retry: cert already present, skipping");
546			return Ok(());
547		}
548		info!(id_tag = %self.id_tag, "ACME early retry attempt");
549		match init(app.clone(), &self.acme_email, &self.id_tag, self.app_domain.as_deref()).await {
550			Ok(()) => {
551				info!(id_tag = %self.id_tag, "ACME early retry succeeded");
552				let row = TenantCertRenewalRow {
553					tn_id: self.tn_id,
554					id_tag: self.id_tag.clone().into(),
555					expires_at: None,
556					failure_count: 0,
557					last_renewal_error: None,
558					notified_at: None,
559				};
560				handle_renewal_success(app, &row, true).await;
561				Ok(())
562			}
563			Err(e) => {
564				warn!(error = %e, id_tag = %self.id_tag, "ACME early retry failed");
565				// Surface the error so the scheduler records the failure, but
566				// the other queued retries (separate tasks) still fire.
567				Err(e)
568			}
569		}
570	}
571}
572
573/// Register ACME-related tasks with the scheduler
574///
575/// Must be called during app initialization before the scheduler starts loading tasks
576pub fn register_tasks(app: &App) -> ClResult<()> {
577	app.scheduler.register::<CertRenewalTask>()?;
578	app.scheduler.register::<AcmeEarlyRetryTask>()?;
579	Ok(())
580}
581
582// ============================================================================
583// DNS pre-check + renewal-failure tracking helpers
584// ============================================================================
585
586const RENEWAL_NOTIFY_LONG_INTERVAL_SECS: i64 = 7 * 86400;
587const RENEWAL_NOTIFY_SHORT_INTERVAL_SECS: i64 = 86400;
588
589/// Build the list of domains a tenant cert needs to cover. Mirrors the logic
590/// in `renew_tenant`.
591fn build_domains_for_tenant(id_tag: &str, app_domain: Option<&str>) -> Vec<String> {
592	let mut domains = vec![format!("cl-o.{}", id_tag)];
593	domains.push(app_domain.unwrap_or(id_tag).to_string());
594	domains
595}
596
597/// Outcome of a DNS pre-check. Definitive failures (`"nodns"`, `"address"`)
598/// are deterministic — the tenant's DNS is genuinely misconfigured — so they
599/// escalate to suspension/notification. Transient failures (resolver network
600/// errors, timeouts) are not the tenant's fault; the renewal is skipped this
601/// run but failure_count / suspension state is left untouched so a flaky
602/// resolver around expiry can't push a healthy tenant into Suspended.
603enum PreCheckError {
604	Definitive(String),
605	Transient(String),
606}
607
608/// DNS pre-check for every domain in the list. Returns the error code
609/// (`"nodns"` or `"address"`, matching `register.rs` conventions) on the
610/// first definitive failure, or a transient error wrapping the underlying
611/// resolver error. If `local_address` is empty (e.g., local dev), the check
612/// is skipped — same as `register.rs` does.
613async fn check_domains_dns(
614	domains: &[String],
615	local_address: &[Box<str>],
616	resolver: &DnsResolver,
617) -> Result<(), PreCheckError> {
618	if local_address.is_empty() {
619		return Ok(());
620	}
621	for domain in domains {
622		match validate_domain_address(domain, local_address, resolver).await {
623			Ok(_) => {}
624			Err(Error::ValidationError(code)) => return Err(PreCheckError::Definitive(code)),
625			Err(e) => return Err(PreCheckError::Transient(format!("{}", e))),
626		}
627	}
628	Ok(())
629}
630
631pub async fn handle_renewal_success(
632	app: &App,
633	row: &TenantCertRenewalRow,
634	is_first_issuance: bool,
635) {
636	if let Err(e) = app.auth_adapter.record_cert_renewal_success(row.tn_id).await {
637		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
638			"Failed to record renewal success");
639	}
640	// Only flip status back to active when this row was previously suspended
641	// (i.e. its prior cert was past expiry). Calling `update_tenant_status('A')`
642	// unconditionally would bump `tenants.updated_at` on every nightly run for
643	// every healthy tenant.
644	let is_currently_expired = row.expires_at.is_some_and(|t| t.0 < Timestamp::now().0);
645	if is_currently_expired {
646		if let Err(e) = app.auth_adapter.update_tenant_status(row.tn_id, 'A').await {
647			warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
648				"Failed to clear suspended status after renewal");
649		} else {
650			info!(tn_id = %row.tn_id.0, id_tag = %row.id_tag,
651				"Tenant un-suspended after successful cert renewal");
652		}
653	}
654
655	// First-issuance hook: only fire when the caller passed
656	// `is_first_issuance = true` (currently only the bootstrap synthetic-row
657	// paths). The daily renewal task passes `false` here, so a row whose
658	// `expires_at` is somehow NULL for a non-first reason will not re-fire
659	// the hook.
660	if is_first_issuance
661		&& let Ok(hook) = app.ext::<crate::OnFirstCertIssuedFn>()
662		&& let Err(e) = hook(app, row.tn_id, &row.id_tag).await
663	{
664		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
665			"on_first_cert_issued hook failed");
666	}
667}
668
669async fn handle_renewal_failure(app: &App, row: &TenantCertRenewalRow, reason: &str) {
670	// Always record the failure so we have a counter, even on the
671	// initial-bootstrap path (no cert yet). The adapter upserts the row.
672	if let Err(e) = app.auth_adapter.record_cert_renewal_failure(row.tn_id, reason).await {
673		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
674			"Failed to record renewal failure");
675	}
676
677	let now = Timestamp::now().0;
678
679	let (days_until_expiry, already_expired) = match row.expires_at {
680		Some(expires_at) => {
681			let days = (expires_at.0 - now) / 86400;
682			(days, days <= 0)
683		}
684		// No cert yet — treat as already-expired for suspension/notify cadence.
685		None => (0, true),
686	};
687
688	// Suspend the tenant once the cert is past expiry (or absent). Flipping an
689	// already-suspended tenant to 'S' is a no-op; we never downgrade here.
690	if already_expired && let Err(e) = app.auth_adapter.update_tenant_status(row.tn_id, 'S').await {
691		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
692			"Failed to mark tenant suspended");
693	}
694
695	let should_notify = should_notify(row, now, days_until_expiry);
696	if !should_notify {
697		return;
698	}
699
700	let expires_at = row.expires_at.unwrap_or(Timestamp(now));
701	if let Err(e) = schedule_renewal_failure_email(
702		app,
703		row,
704		reason,
705		expires_at,
706		days_until_expiry,
707		already_expired,
708	)
709	.await
710	{
711		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
712			"Failed to schedule renewal-failure email");
713		return;
714	}
715
716	if let Err(e) = app.auth_adapter.record_cert_renewal_notification(row.tn_id).await {
717		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag, error = %e,
718			"Failed to stamp notified_at");
719	}
720}
721
722fn should_notify(row: &TenantCertRenewalRow, now: i64, days_until_expiry: i64) -> bool {
723	// First failure (no notification recorded yet): always notify.
724	let Some(last) = row.notified_at else {
725		return true;
726	};
727	let interval = if days_until_expiry <= 7 {
728		RENEWAL_NOTIFY_SHORT_INTERVAL_SECS
729	} else {
730		RENEWAL_NOTIFY_LONG_INTERVAL_SECS
731	};
732	now - last.0 >= interval
733}
734
735async fn schedule_renewal_failure_email(
736	app: &App,
737	row: &TenantCertRenewalRow,
738	reason: &str,
739	expires_at: Timestamp,
740	days_until_expiry: i64,
741	suspended: bool,
742) -> ClResult<()> {
743	let schedule_email = app.ext::<ScheduleEmailFn>()?;
744
745	// Tenant email lives on AuthProfile.
746	let profile = app.auth_adapter.read_tenant(&row.id_tag).await?;
747	let Some(email) = profile.email else {
748		warn!(tn_id = %row.tn_id.0, id_tag = %row.id_tag,
749			"Cannot send renewal-failure email: tenant has no email on file");
750		return Ok(());
751	};
752
753	// Pull the user's preferred language directly via the settings service —
754	// we can't depend on cloudillo-email here.
755	let lang = match app.settings.get(row.tn_id, "profile.lang").await {
756		Ok(Some(crate::settings::SettingValue::String(s))) => Some(s),
757		_ => None,
758	};
759
760	let base_id_tag = app.opts.base_id_tag.as_ref().map_or("cloudillo", AsRef::as_ref);
761	let local_address_str =
762		app.opts.local_address.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ");
763	let domain_for_display = format!("cl-o.{}", row.id_tag);
764
765	let template_vars = serde_json::json!({
766		"idTag": row.id_tag.as_ref(),
767		"domain": domain_for_display,
768		"daysUntilExpiry": days_until_expiry,
769		"expiresAt": expires_at.to_iso_string(),
770		"errorReason": reason,
771		"suspended": suspended,
772		"localAddress": local_address_str,
773		"base_id_tag": base_id_tag,
774		"instance_name": "Cloudillo",
775	});
776
777	let params = ScheduleEmailParams {
778		to: email.to_string(),
779		template_name: "cert_renewal_failed".to_string(),
780		template_vars,
781		lang,
782		// Once-per-day key so we don't queue duplicate emails when the task
783		// runs multiple times before sending (failure_count + day stamp).
784		custom_key: Some(format!(
785			"cert-renewal-failed:{}:{}",
786			row.tn_id.0,
787			Timestamp::now().0 / 86400
788		)),
789		from_name_override: Some(format!("Cloudillo | {}", base_id_tag.to_uppercase())),
790	};
791
792	schedule_email(app, row.tn_id, params).await
793}
794
795// vim: ts=4