Skip to main content

greentic_start/
lib.rs

1use std::collections::BTreeSet;
2use std::path::PathBuf;
3
4use anyhow::{Context, anyhow};
5use clap::Parser;
6use clap::error::ErrorKind;
7
8mod admin_certs;
9mod admin_server;
10mod bin_resolver;
11mod bundle_config;
12mod bundle_ref;
13mod capabilities;
14mod cards;
15mod cli_args;
16mod cloudflared;
17mod component_qa_ops;
18pub mod config;
19mod demo_qa_bridge;
20mod dependency_resolver;
21mod deployment_routes;
22mod dev_store_path;
23mod discovery;
24mod doctor;
25mod domains;
26mod endpoint_admit;
27mod endpoint_resolver;
28mod event_router;
29mod extension_resolver;
30pub(crate) mod flow_log;
31mod gmap;
32mod http_ingress;
33mod http_routes;
34mod identify_payload;
35mod ingress;
36mod ingress_dispatch;
37mod ingress_types;
38mod messaging_app;
39mod messaging_dto;
40mod messaging_egress;
41mod ngrok;
42pub mod notifier;
43mod offers;
44mod onboard;
45mod operator_i18n;
46mod operator_log;
47#[doc(hidden)]
48pub mod perf_harness;
49mod port_utils;
50mod post_ingress_hooks;
51mod project;
52mod provider_auth;
53pub mod provider_config_envelope;
54mod qa_persist;
55mod revision_boot;
56mod revision_dispatcher;
57mod revision_drain;
58pub mod revision_health_gate;
59mod revision_pin;
60mod revision_reload;
61mod revision_serve;
62mod revision_webhook_register;
63mod rollout_telemetry;
64mod runner_exec;
65mod runner_host;
66mod runner_integration;
67pub mod runtime;
68mod runtime_config;
69mod runtime_refs_store;
70pub mod runtime_state;
71mod secret_name;
72mod secret_requirements;
73mod secret_value;
74mod secrets_backend;
75mod secrets_client;
76mod secrets_gate;
77mod secrets_manager;
78mod secrets_setup;
79mod services;
80mod setup_input;
81mod setup_to_formspec;
82mod startup_contract;
83mod state_layout;
84mod static_routes;
85mod subscription_updater;
86mod subscriptions_universal;
87pub mod supervisor;
88#[cfg(test)]
89mod test_fixtures;
90mod timer_scheduler;
91mod tunnel_prompt;
92mod warmup;
93mod webhook_secret_resolver;
94mod webhook_updater;
95#[doc(hidden)]
96pub mod ws_test_support;
97
98use cli_args::{
99    Cli, Command, normalize_args, restart_name, start_request_from_args, stop_request_from_args,
100};
101pub use cli_args::{
102    CloudflaredModeArg, NatsModeArg, NgrokModeArg, RestartTarget, StartRequest, StopRequest,
103};
104
105const DEMO_DEFAULT_TENANT: &str = "demo";
106const DEMO_DEFAULT_TEAM: &str = "default";
107
108/// Default environment id when nothing is set. Flipped from `"dev"` to
109/// `"local"` as part of A4b — the `local` env is what `gtc setup` and
110/// `gtc start` auto-create per A4.
111pub const DEFAULT_ENV_ID: &str = "local";
112
113/// Legacy env id this crate accepts via the compat alias. Resolved values
114/// that match this string are remapped to [`DEFAULT_ENV_ID`] with a
115/// once-per-process warning, unless the operator disables the alias.
116pub const LEGACY_ENV_ID: &str = "dev";
117
118/// Env-var that disables the [`LEGACY_ENV_ID`] → [`DEFAULT_ENV_ID`] compat
119/// alias. Set to `1`, `true`, `yes`, or `on` (case-insensitive) to make
120/// any resolved value of `dev` hard-fail with a remediation hint.
121pub const DISABLE_ALIAS_ENV_VAR: &str = "GREENTIC_DISABLE_DEV_ALIAS";
122
123/// Resolve the effective environment string.
124///
125/// Priority: explicit override > `$GREENTIC_ENV` > [`DEFAULT_ENV_ID`]
126/// (`"local"`). After resolution, applies the [`LEGACY_ENV_ID`] →
127/// [`DEFAULT_ENV_ID`] compat alias: any value of `dev` is remapped to
128/// `local` with a once-per-process `tracing::warn!` unless
129/// [`DISABLE_ALIAS_ENV_VAR`] is set, in which case the resolution panics
130/// with a remediation hint.
131///
132/// This is the canonical helper for the `runner_host`, `secrets_setup`,
133/// and `qa_persist` paths. Mirrors `greentic_setup::resolve_env` (A4b
134/// PR2 in `greentic-setup`). If the duplication ever proves load-bearing,
135/// fold both into a shared helper in `greentic-deployer::cli::bootstrap`
136/// or similar.
137pub fn resolve_env(override_env: Option<&str>) -> String {
138    let raw = override_env
139        .map(|v| v.to_string())
140        .or_else(|| std::env::var("GREENTIC_ENV").ok())
141        .unwrap_or_else(|| DEFAULT_ENV_ID.to_string());
142    compat_alias::apply_dev_alias(&raw)
143}
144
145mod compat_alias {
146    //! `dev` → `local` compatibility alias (A4b).
147    //!
148    //! Mirrors `greentic_setup::compat_alias`. Centralizing into a shared
149    //! crate is deferred until the duplication starts mattering — the
150    //! logic is ~30 lines and the two crates have distinct test surfaces.
151
152    use std::sync::atomic::{AtomicBool, Ordering};
153
154    use super::{DEFAULT_ENV_ID, DISABLE_ALIAS_ENV_VAR, LEGACY_ENV_ID};
155
156    static WARNED: AtomicBool = AtomicBool::new(false);
157
158    /// Apply the `dev` → `local` compat alias. Returns the remapped value
159    /// for any input equal to [`LEGACY_ENV_ID`]; returns the input
160    /// unchanged for any other value. Panics if the alias is disabled via
161    /// [`DISABLE_ALIAS_ENV_VAR`] and the input is the legacy id.
162    pub fn apply_dev_alias(env: &str) -> String {
163        if env != LEGACY_ENV_ID {
164            return env.to_string();
165        }
166        if alias_disabled() {
167            // Hard-fail expiry gate. The panic message is the remediation —
168            // tracing may not be wired in every binary that consumes
169            // `resolve_env`, and `process::exit()` bypasses test harnesses.
170            panic!(
171                "environment `{LEGACY_ENV_ID}` is no longer accepted (set via {DISABLE_ALIAS_ENV_VAR}=1). \
172                 Migrate to `{DEFAULT_ENV_ID}` via `gtc op env migrate-dev {DEFAULT_ENV_ID} --check` then `--apply`, \
173                 or pass `--env {DEFAULT_ENV_ID}` / unset $GREENTIC_ENV.",
174            );
175        }
176        if !WARNED.swap(true, Ordering::SeqCst) {
177            tracing::warn!(
178                target: "greentic_start::compat_alias",
179                legacy = LEGACY_ENV_ID,
180                target_env = DEFAULT_ENV_ID,
181                "env `{LEGACY_ENV_ID}` is deprecated; resolving as `{DEFAULT_ENV_ID}` for this process. \
182                 Plan the migration with `gtc op env migrate-dev {DEFAULT_ENV_ID} --check`; \
183                 set {DISABLE_ALIAS_ENV_VAR}=1 to hard-fail on `{LEGACY_ENV_ID}` in CI.",
184            );
185        }
186        DEFAULT_ENV_ID.to_string()
187    }
188
189    fn alias_disabled() -> bool {
190        std::env::var(DISABLE_ALIAS_ENV_VAR)
191            .ok()
192            .map(|v| {
193                let v = v.trim().to_ascii_lowercase();
194                matches!(v.as_str(), "1" | "true" | "yes" | "on")
195            })
196            .unwrap_or(false)
197    }
198
199    /// Reset the warning latch. Test-only so multiple `apply_dev_alias`
200    /// invocations can each verify the once-per-process behavior.
201    #[cfg(test)]
202    pub(super) fn reset_warning_latch_for_tests() {
203        WARNED.store(false, Ordering::SeqCst);
204    }
205}
206
207pub fn run_start_request(request: StartRequest) -> anyhow::Result<()> {
208    run_start(request)
209}
210
211pub fn run_restart_request(mut request: StartRequest) -> anyhow::Result<()> {
212    if request.restart.is_empty() {
213        request.restart.push(RestartTarget::All);
214    }
215    run_start(request)
216}
217
218pub fn run_stop_request(request: StopRequest) -> anyhow::Result<()> {
219    let state_dir = resolve_state_dir(request.state_dir, request.bundle.as_deref())?;
220    runtime::demo_down_runtime(&state_dir, &request.tenant, &request.team, false)
221}
222
223pub fn run_from_env() -> anyhow::Result<()> {
224    let raw_tail: Vec<String> = std::env::args().skip(1).collect();
225    let tunnel_explicit = raw_tail
226        .iter()
227        .any(|a| a.starts_with("--cloudflared") || a.starts_with("--ngrok"));
228    let args = normalize_args(raw_tail);
229    let cli = match Cli::try_parse_from(args) {
230        Ok(cli) => cli,
231        Err(err)
232            if matches!(
233                err.kind(),
234                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
235            ) =>
236        {
237            print!("{err}");
238            return Ok(());
239        }
240        Err(err) => return Err(err.into()),
241    };
242    if let Some(locale) = cli.locale.as_deref() {
243        operator_i18n::set_locale(locale);
244    }
245
246    match cli.command {
247        Command::Start(args) | Command::Up(args) => {
248            run_start_request(start_request_from_args(args, tunnel_explicit))
249        }
250        Command::Restart(args) => {
251            run_restart_request(start_request_from_args(args, tunnel_explicit))
252        }
253        Command::Stop(args) => run_stop_request(stop_request_from_args(args)),
254        Command::Warmup(args) => crate::warmup::run_warmup_request(crate::warmup::WarmupRequest {
255            bundle: args.bundle,
256            cache_dir: args.cache_dir,
257            strict: args.strict,
258        }),
259        Command::Doctor(args) => {
260            let has_errors = crate::doctor::run_doctor(args)?;
261            if has_errors {
262                std::process::exit(1);
263            }
264            Ok(())
265        }
266    }
267}
268
269fn run_start(mut request: StartRequest) -> anyhow::Result<()> {
270    // Disable provider-core-only mode in demo so WASM components can access secrets directly.
271    // Without this, the runner-host blocks secrets_store.get() calls from WASM.
272    // SAFETY: This is called early in single-threaded startup before spawning workers.
273    unsafe {
274        std::env::set_var("GREENTIC_PROVIDER_CORE_ONLY", "0");
275    }
276
277    // Set GREENTIC_ENV to the A4b default (`local`) if not already set.
278    // A4's `bootstrap_local_environment` (below) creates `~/.greentic/environments/local/`
279    // and downstream secret resolution keys off this env. If the user already exported
280    // `GREENTIC_ENV=dev`, the A4b compat alias inside `resolve_env` remaps it to
281    // `local` with a once-per-process warning until the alias is disabled.
282    // SAFETY: This is called early in single-threaded startup before spawning workers.
283    if std::env::var("GREENTIC_ENV").is_err() {
284        unsafe {
285            std::env::set_var("GREENTIC_ENV", DEFAULT_ENV_ID);
286        }
287    }
288
289    bootstrap_local_environment()?;
290
291    // N1.2: bundle-less cold start. When launched without `--bundle` / `--config`,
292    // boot from the env's persisted state regardless of whether bundles are
293    // attached yet. The listener always comes up so `/livez`, `/readyz`, and
294    // `/status` are reachable; a missing or empty `runtime-config.v1` produces a
295    // zero-revision activation that serves probes + 404s for unrouted paths until
296    // bundles are attached (hot-attach lands in N2). When the runtime-config is
297    // populated, this is the same B0/B2/B3 path as before: load + validate, build
298    // an embedded runner host, run requests through the revision dispatcher.
299    if request.bundle.is_none() && request.config.is_none() {
300        let env_id = resolve_env(None);
301        let rc = runtime_config::load_or_empty(&env_id)?;
302        let store_root = greentic_deployer::environment::LocalFsStore::default_root()
303            .context("cannot determine the default environment store root (no home directory)")?;
304        let env_dir = runtime_config::env_dir_in(&store_root, &env_id)?;
305
306        // Initialize operator.log under the env directory before any
307        // `operator_log::*` call on this path; otherwise every banner,
308        // listener log, and warning is silently dropped (the logger
309        // no-ops until `init`).
310        let log_level = if request.quiet {
311            operator_log::Level::Warn
312        } else if request.verbose {
313            operator_log::Level::Debug
314        } else {
315            operator_log::Level::Info
316        };
317        let log_dir = operator_log::init(env_dir.join("logs"), log_level)?;
318        let _trace_guard = init_trace_log(&log_dir);
319
320        // Activate with the env's own DevStore secrets backend rather than
321        // HostBuilder's default env-var backend (which rejects non-local
322        // envs). A later step refines this to the per-tenant/pack-declared
323        // backend once the serving context is resolved.
324        let secrets: crate::secrets_gate::DynSecretsManager =
325            std::sync::Arc::new(crate::secrets_client::SecretsClient::open(&env_dir)?);
326        // Clone for the runtime-config watcher's rebuild closure (N2.2):
327        // it needs the same secrets backend to rebuild activations after
328        // the deployer rewrites `runtime-config.json`. `DynSecretsManager`
329        // is `Arc<dyn ...>`, so this is a refcount bump.
330        let watcher_secrets = std::sync::Arc::clone(&secrets);
331
332        // Load the Environment so the bind address can layer on top of the
333        // persisted `host_config.listen_addr`. The same `Environment` is
334        // threaded into `activate_runtime_config` so the activation path
335        // does not re-read the file (and cannot see a different snapshot).
336        let env_store = greentic_deployer::environment::LocalFsStore::new(store_root.clone());
337        let env_typed = greentic_types::EnvId::new(&env_id)
338            .with_context(|| format!("invalid environment id `{env_id}`"))?;
339        let environment =
340            greentic_deployer::environment::EnvironmentStore::load(&env_store, &env_typed)
341                .with_context(|| format!("loading environment `{env_id}` for bundle-less boot"))?;
342
343        // C5: open the env's runtime.json snapshot once and share it across
344        // every activation rebuild + the `runtime://` resolver every loaded
345        // pack reaches. The store is `Arc`-shared so a single in-memory
346        // snapshot is hot-reloaded on every `runtime.json` write — the
347        // watcher below calls `store.reload()` without rebuilding the full
348        // activation. The resolver implements
349        // `greentic_runner_host::runtime_refs::RuntimeRefResolver`, lives
350        // for the env's lifetime, and is cloned (refcount bump) into each
351        // revision's load.
352        let runtime_refs_store =
353            crate::runtime_refs_store::EnvironmentRuntimeStore::open(&env_dir, env_typed.clone())
354                .with_context(|| format!("opening runtime.json snapshot for env `{env_id}`"))?;
355        // The same `Arc` flows into BOTH the cold-start activation AND each
356        // reload-rebuilt activation (via the watcher's `default_rebuild`) so
357        // every revision in the env resolves `runtime://` URIs through one
358        // store — a snapshot flip is visible to the next request.
359        let runtime_ref_resolver: std::sync::Arc<
360            dyn greentic_runner_host::runtime_refs::RuntimeRefResolver,
361        > = std::sync::Arc::new(crate::runtime_refs_store::StartRuntimeRefResolver::new(
362            std::sync::Arc::clone(&runtime_refs_store),
363        ));
364
365        let activation_rt = tokio::runtime::Builder::new_multi_thread()
366            .enable_all()
367            .build()
368            .context("building runtime for revision activation")?;
369        let activation = activation_rt.block_on(revision_boot::activate_runtime_config(
370            &store_root,
371            &rc,
372            secrets,
373            &environment,
374            std::sync::Arc::clone(&runtime_ref_resolver),
375        ))?;
376
377        // Execution bridge: serve the activated revisions over a slim HTTP
378        // loop. Each request resolves to a deployment, the dispatcher picks a
379        // revision, and the request runs against that revision's runtime via
380        // `RunnerHost::handle_activity_for_revision`. This is the generic-JSON
381        // vertical slice — provider webhook parsing, WebChat/WS, and static
382        // assets under revisions stay on the legacy `--bundle` ingress.
383        let revision_boot::RuntimeConfigActivation { host, routing } = activation;
384        let bind_addr = revision_serve::resolve_bind_addr(Some(&environment.host_config));
385        let activation = std::sync::Arc::new(revision_serve::Activation {
386            host: std::sync::Arc::new(host),
387            routing: std::sync::Arc::new(routing),
388        });
389        let server = revision_serve::RevisionServer::start(revision_serve::RevisionServeConfig {
390            bind_addr,
391            activation: std::sync::Arc::clone(&activation),
392        })
393        .context("starting the revision ingress server")?;
394        let listen = std::net::SocketAddr::new(bind_addr.ip(), server.actual_port());
395        let (deployment_count, revision_count) = server.counts();
396        let banner = if revision_count == 0 {
397            format!(
398                "no bundles attached to env `{}` — serving probes only on http://{listen} \
399                 (attach a bundle with `gtc op bundles add`)",
400                rc.env_id
401            )
402        } else {
403            format!(
404                "serving {revision_count} revision(s) for env `{}` across {deployment_count} \
405                 deployment(s) on http://{listen}",
406                rc.env_id
407            )
408        };
409        operator_log::info(module_path!(), banner.clone());
410        println!("\n{banner}. Press Ctrl+C to stop.");
411
412        // Phase D: auto-register provider webhooks for the served revisions.
413        // Gated on a public_base_url — the bundle-less boot runs no tunnel, so
414        // a persisted or env-var-supplied address is the only one we can hand
415        // to a provider. With none, registration is skipped (register manually).
416        // Detached: the server is already listening, and a slow or stuck
417        // provider API call must not delay the watcher spawn or Ctrl+C
418        // handling; each invocation is bounded by `SETUP_WEBHOOK_TIMEOUT`.
419        //
420        // Precedence (no tunnel on this path): env-store > env var. Delegated
421        // to the canonical helper in `startup_contract` so this path stays in
422        // lockstep with the reload path in `revision_webhook_register`.
423        let public_base_url = startup_contract::resolve_public_base_url(&environment)?;
424        if revision_count > 0 {
425            let boot_activation = std::sync::Arc::clone(&activation);
426            let boot_url = public_base_url.clone();
427            let boot_env = environment;
428            activation_rt.spawn(async move {
429                revision_webhook_register::register_new_model_webhooks(
430                    &boot_activation,
431                    &boot_env,
432                    boot_url.as_deref(),
433                )
434                .await;
435            });
436        }
437        // The server holds its own `Arc<Activation>`; release ours so a later
438        // reload can free the superseded activation after its drain window.
439        drop(activation);
440
441        // N2.2 + C5: spawn the unified env-dir watcher. Dispatches per
442        // debounced batch:
443        //   - `runtime-config.json` / `environment.json` → rebuild the
444        //     activation + swap into the `RevisionServer` (the N2.2 flow:
445        //     `gtc op bundles add`, `revisions stage/warm`, `traffic set`,
446        //     `op messaging endpoint *`).
447        //   - `runtime.json` → refresh the in-memory `EnvironmentRuntime`
448        //     snapshot the resolver reads (cheap; no activation rebuild).
449        //     The deployer re-emits discovered values on every apply, so
450        //     coupling them to a full rebuild would churn cookies/pins.
451        // The server `Arc` lets the worker thread call `server.reload()`
452        // while the main thread still owns the original handle for
453        // shutdown.
454        let server = std::sync::Arc::new(server);
455        let snapshot_store_for_watcher = std::sync::Arc::clone(&runtime_refs_store);
456        let watcher = revision_reload::spawn_runtime_config_watcher(
457            env_dir.clone(),
458            revision_reload::DEFAULT_DEBOUNCE,
459            // Drain window matches the cold-start expectation: in-flight
460            // requests against the previous activation get ~30s to finish
461            // before the old `RunnerHost` drops. Tuned for local-dev;
462            // remote/cloud is Phase D scope.
463            std::time::Duration::from_secs(30),
464            std::sync::Arc::clone(&server),
465            revision_reload::default_rebuild(
466                store_root.clone(),
467                env_id.clone(),
468                watcher_secrets,
469                std::sync::Arc::clone(&runtime_ref_resolver),
470                activation_rt.handle().clone(),
471            ),
472            // Each reload that actually changed config (hot-attached
473            // deployment, new endpoint) re-registers webhooks against the
474            // freshly-served activation — AFTER the swap, so the registered
475            // URL is live before the provider validates or delivers to it.
476            // The URL is resolved freshly from the reloaded environment.json
477            // (with env-var fallback), so `gtc op env set-public-url` takes
478            // effect on the next reload without a process restart.
479            // Idempotent for unchanged routes (same URL + secret_token).
480            revision_webhook_register::post_reload_registration(
481                store_root.clone(),
482                env_id.clone(),
483                activation_rt.handle().clone(),
484            ),
485            // C5 snapshot-reload arm: pure `store.reload()` call.
486            move || snapshot_store_for_watcher.reload(),
487        )
488        .context("spawning runtime-config watcher")?;
489
490        if let Err(err) = activation_rt.block_on(tokio::signal::ctrl_c()) {
491            operator_log::warn(
492                module_path!(),
493                format!("revision serving Ctrl+C listener error: {err}"),
494            );
495        }
496        // Drop the watcher before stopping the server so the watcher's
497        // worker can't call `server.reload()` on a server that's already
498        // half-torn-down. Snapshot reloads and activation rebuilds share
499        // the same watcher (one debouncer, two dispatch arms), so this
500        // single drop suffices.
501        drop(watcher);
502        // Recover sole ownership for `stop()`. Any in-flight drain task
503        // spawned by N2.1's `reload()` owns an `Arc<Activation>`, NOT an
504        // `Arc<RevisionServer>`, so this should always succeed today. If a
505        // future contributor introduces a second `Arc<RevisionServer>`
506        // holder without updating the shutdown sequence, fall back to a
507        // warn-and-leak: skipping `stop()` leaves the listener thread
508        // running until process exit (which is moments away), which is
509        // strictly better than bailing out and skipping any other
510        // shutdown work the caller may have layered above us.
511        match std::sync::Arc::try_unwrap(server) {
512            Ok(server) => server.stop()?,
513            Err(_arc) => {
514                operator_log::warn(
515                    module_path!(),
516                    "RevisionServer Arc still has consumers at shutdown — \
517                     skipping graceful stop(); the listener thread will be \
518                     terminated on process exit.",
519                );
520            }
521        }
522        return Ok(());
523    }
524
525    // Temporary process-level API key fallback disabled while debugging the
526    // adaptive card/runtime path. Keep this block for quick re-enable if we
527    // need to revisit local Ollama compatibility.
528    //
529    // for key in ["OPENAI_API_KEY", "OLLAMA_API_KEY", "API_KEY"] {
530    //     if std::env::var(key).is_err() {
531    //         unsafe {
532    //             std::env::set_var(key, "ollama-placeholder");
533    //         }
534    //     }
535    // }
536
537    let restart: BTreeSet<String> = request.restart.iter().map(restart_name).collect();
538    let log_level = if request.quiet {
539        operator_log::Level::Warn
540    } else if request.verbose {
541        operator_log::Level::Debug
542    } else {
543        operator_log::Level::Info
544    };
545
546    // Initialize operator.log before any fallible setup so startup failures (bad
547    // bundle.yaml, missing config, unreadable paths) leave an on-disk trace.
548    let early_log_dir = request.log_dir.clone().unwrap_or_else(|| {
549        request
550            .bundle
551            .as_deref()
552            .map(|b| PathBuf::from(b).join("logs"))
553            .unwrap_or_else(|| {
554                std::env::current_dir()
555                    .unwrap_or_else(|_| PathBuf::from("."))
556                    .join("logs")
557            })
558    });
559    let log_dir = operator_log::init(early_log_dir, log_level)?;
560
561    // Install a tracing subscriber that writes RUST_LOG-filtered events to
562    // <log_dir>/trace.log. Without this, every `tracing::*` call in greentic
563    // crates (notably greentic-runner-host) is dropped because no subscriber
564    // is registered. We bind the appender guard to a long-lived `static` so
565    // background tasks can flush throughout the process lifetime.
566    let _trace_guard = init_trace_log(&log_dir);
567
568    let demo_paths = match bundle_config::resolve_demo_paths(
569        request.config.clone(),
570        request.bundle.as_deref(),
571    ) {
572        Ok(paths) => paths,
573        Err(err) => {
574            operator_log::error(
575                module_path!(),
576                format!("resolve_demo_paths failed: {err:#}"),
577            );
578            return Err(err);
579        }
580    };
581    let config_path = demo_paths.config_path.clone();
582    let config_dir = demo_paths.root_dir.clone();
583    let state_dir = demo_paths.state_dir.clone();
584
585    crate::warmup::adopt_bundle_cache_dir(&config_dir);
586
587    let resolved_log_dir = config_dir.join("logs");
588    if request.log_dir.is_none() && resolved_log_dir != log_dir {
589        operator_log::warn(
590            module_path!(),
591            format!(
592                "operator.log is at {} but resolved bundle log dir is {}; future logs stay at the former",
593                log_dir.display(),
594                resolved_log_dir.display()
595            ),
596        );
597    }
598
599    // Initialize flow execution logger (writes to logs/flow.log)
600    match flow_log::init(&log_dir) {
601        Ok(path) => {
602            operator_log::info(
603                module_path!(),
604                format!("flow.log initialized at {}", path.display()),
605            );
606        }
607        Err(e) => {
608            operator_log::warn(module_path!(), format!("failed to init flow.log: {e}"));
609        }
610    }
611
612    let mut demo_config = bundle_config::load_runtime_demo_config(&demo_paths, &request)?;
613    apply_nats_overrides(&mut demo_config, &request);
614    let static_routes = startup_contract::inspect_bundle(&config_dir)?;
615    let configured_public_base_url = startup_contract::configured_public_base_url_from_env()?;
616    // Persisted public_base_url from `gtc op env set-public-url`. Sits between
617    // the tunnel-discovered URL (always wins) and the `PUBLIC_BASE_URL` env var
618    // in the precedence chain. Failing to read this is non-fatal: a corrupt env
619    // store should not block a foreground startup, so we log and fall back.
620    let env_store_public_base_url =
621        match startup_contract::configured_public_base_url_from_env_store(&resolve_env(None)) {
622            Ok(value) => value,
623            Err(err) => {
624                operator_log::warn(
625                    module_path!(),
626                    format!("failed to read env-store public_base_url, falling back: {err:#}"),
627                );
628                None
629            }
630        };
631    let tenant = demo_config.tenant.clone();
632    let team = demo_config.team.clone();
633    let runtime_paths =
634        runtime_state::RuntimePaths::new(state_dir.clone(), tenant.clone(), team.clone());
635    runtime_state::clear_stop_request(&runtime_paths)?;
636
637    // Apply tunnel configuration from setup answers (.greentic/tunnel.json),
638    // then fall back to deployer auto-detection, then interactive prompt.
639    // CLI flags (--cloudflared/--ngrok) always take precedence.
640    if !request.tunnel_explicit
641        && let Some(tunnel) = load_tunnel_config(&config_dir)
642    {
643        match tunnel.mode.as_deref() {
644            Some("cloudflared") => {
645                operator_log::info(
646                    module_path!(),
647                    "tunnel mode 'cloudflared' configured in setup answers",
648                );
649                request.cloudflared = CloudflaredModeArg::On;
650                request.tunnel_explicit = true;
651            }
652            Some("ngrok") => {
653                operator_log::info(
654                    module_path!(),
655                    "tunnel mode 'ngrok' configured in setup answers",
656                );
657                request.ngrok = NgrokModeArg::On;
658                request.tunnel_explicit = true;
659            }
660            Some("off") => {
661                operator_log::info(
662                    module_path!(),
663                    "tunnel mode 'off' configured in setup answers",
664                );
665                request.tunnel_explicit = true;
666            }
667            _ => {}
668        }
669    }
670
671    // Auto-enable cloudflared when no deployer packs are present in the bundle
672    // (i.e. local dev mode). External webhooks (Webex, Telegram, etc.) need a
673    // public URL to reach the local instance.
674    if !request.tunnel_explicit {
675        let has_deployer =
676            !greentic_setup::deployment_targets::discover_deployer_pack_candidates(&config_dir)
677                .unwrap_or_default()
678                .is_empty();
679        if !has_deployer {
680            operator_log::info(
681                module_path!(),
682                "no deployer packs detected; defaulting to cloudflared tunnel",
683            );
684            request.cloudflared = CloudflaredModeArg::On;
685            request.tunnel_explicit = true;
686        }
687    }
688
689    // If the user didn't explicitly set a tunnel flag, prompt for tunnel selection
690    tunnel_prompt::maybe_prompt_tunnel(&mut request);
691
692    // Mutual exclusivity: if ngrok is explicitly enabled, disable cloudflared
693    // This allows `--ngrok on` to work without needing `--cloudflared off`
694    let effective_cloudflared = match (&request.cloudflared, &request.ngrok) {
695        // ngrok explicitly enabled → disable cloudflared (unless cloudflared also explicitly set)
696        (CloudflaredModeArg::On, NgrokModeArg::On) => {
697            operator_log::info(
698                module_path!(),
699                "ngrok enabled, disabling cloudflared (use --cloudflared on --ngrok off to override)",
700            );
701            CloudflaredModeArg::Off
702        }
703        (mode, _) => *mode,
704    };
705
706    let cloudflared = match effective_cloudflared {
707        CloudflaredModeArg::Off => None,
708        CloudflaredModeArg::On => {
709            let explicit = request.cloudflared_binary.clone();
710            let binary = bin_resolver::resolve_binary(
711                "cloudflared",
712                &bin_resolver::ResolveCtx {
713                    config_dir: config_dir.clone(),
714                    explicit_path: explicit,
715                },
716            )?;
717            Some(cloudflared::CloudflaredConfig {
718                binary,
719                local_port: demo_config.services.gateway.port,
720                extra_args: Vec::new(),
721                restart: restart.contains("cloudflared"),
722            })
723        }
724    };
725
726    let ngrok = match request.ngrok {
727        NgrokModeArg::Off => None,
728        NgrokModeArg::On => {
729            let explicit = request.ngrok_binary.clone();
730            let binary = bin_resolver::resolve_binary(
731                "ngrok",
732                &bin_resolver::ResolveCtx {
733                    config_dir: config_dir.clone(),
734                    explicit_path: explicit,
735                },
736            )?;
737            Some(ngrok::NgrokConfig {
738                binary,
739                local_port: demo_config.services.gateway.port,
740                extra_args: Vec::new(),
741                restart: restart.contains("ngrok"),
742            })
743        }
744    };
745
746    let handles = runtime::demo_up_services(
747        &config_path,
748        &demo_config,
749        &static_routes,
750        configured_public_base_url,
751        env_store_public_base_url,
752        cloudflared,
753        ngrok,
754        &restart,
755        request.runner_binary.clone(),
756        &log_dir,
757        request.verbose,
758        request.no_browser,
759    )?;
760
761    let _admin_server = if request.admin {
762        let resolved_certs_dir = admin_certs::resolve_admin_certs_dir(
763            &config_dir,
764            &state_dir,
765            request.admin_certs_dir.as_deref(),
766        )?;
767        let admin_cert_refs = admin_certs::load_admin_cert_refs();
768        operator_log::info(
769            module_path!(),
770            format!(
771                "admin certs source={} path={}",
772                resolved_certs_dir.source.as_str(),
773                resolved_certs_dir.path.display()
774            ),
775        );
776        if !admin_cert_refs.is_empty() {
777            operator_log::info(
778                module_path!(),
779                format!("admin cert refs {}", admin_cert_refs.join(" ")),
780            );
781        }
782        let tls_config = greentic_setup::admin::AdminTlsConfig {
783            server_cert: resolved_certs_dir.path.join("server.crt"),
784            server_key: resolved_certs_dir.path.join("server.key"),
785            client_ca: resolved_certs_dir.path.join("ca.crt"),
786            allowed_clients: admin_certs::load_admin_allowed_clients(
787                &config_dir,
788                &request.admin_allowed_clients,
789            ),
790            port: request.admin_port,
791        };
792        let admin_config = admin_server::AdminServerConfig {
793            tls_config,
794            bundle_root: config_dir.clone(),
795            runtime_paths: runtime_paths.clone(),
796        };
797        Some(
798            admin_server::AdminServer::start(admin_config).map_err(|err| {
799                anyhow!("admin mode requested but admin server failed to start: {err}")
800            })?,
801        )
802    } else {
803        None
804    };
805
806    operator_log::info(
807        module_path!(),
808        format!(
809            "demo start running config={} tenant={} team={}",
810            config_path.display(),
811            tenant,
812            team
813        ),
814    );
815    println!("\nReady. Press Ctrl+C to stop.");
816    let shutdown_reason = wait_for_shutdown(&runtime_paths)?;
817    operator_log::info(
818        module_path!(),
819        format!(
820            "runtime shutdown requested via {}",
821            shutdown_reason.as_str()
822        ),
823    );
824    if let Some(server) = _admin_server {
825        let _ = server.stop();
826    }
827    handles.stop()?;
828    runtime::demo_down_runtime(&state_dir, &tenant, &team, false)?;
829    let _ = runtime_state::clear_stop_request(&runtime_paths);
830    Ok(())
831}
832
833/// Crates whose log output is unconditionally clamped to `warn` regardless of
834/// the user's `RUST_LOG` setting. These are very chatty runtime/internals that
835/// drown trace.log under any debug-level base filter and rarely help debug
836/// greentic itself. Override by adjusting this list.
837const NOISY_TRACE_TARGETS: &[&str] = &[
838    "wasmtime",
839    "wasmtime_wasi",
840    "wasi_common",
841    "cranelift_codegen",
842    "cranelift_wasm",
843    "regalloc2",
844    "h2",
845    "hyper",
846    "hyper_util",
847    "rustls",
848    "tokio_util",
849    "tokio_tungstenite",
850    "tungstenite",
851    "want",
852    "mio",
853    "tower",
854];
855
856/// Build the trace.log `EnvFilter`. Starts from `RUST_LOG` (or `info` when
857/// unset) and then forcibly clamps known-noisy crates (wasmtime, h2, hyper,
858/// rustls, etc.) to `warn`. EnvFilter resolves last-write-wins per target, so
859/// appending after the user's directives is what makes the clamp stick.
860fn build_trace_filter() -> tracing_subscriber::EnvFilter {
861    use tracing_subscriber::EnvFilter;
862    let base = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
863    NOISY_TRACE_TARGETS.iter().fold(base, |filter, target| {
864        match format!("{target}=warn").parse() {
865            Ok(directive) => filter.add_directive(directive),
866            Err(_) => filter,
867        }
868    })
869}
870
871/// Install a `tracing` subscriber writing to `<log_dir>/trace.log`, filtered by
872/// `RUST_LOG` (defaults to `info`). Returns the appender guard, which must be
873/// kept alive for the process lifetime so the non-blocking writer flushes.
874fn init_trace_log(
875    log_dir: &std::path::Path,
876) -> Option<tracing_appender::non_blocking::WorkerGuard> {
877    use std::fs::OpenOptions;
878    use tracing_subscriber::layer::SubscriberExt;
879    use tracing_subscriber::util::SubscriberInitExt;
880
881    // Unified host log: operator_log writes here too; both formats co-exist
882    // line-by-line in append mode.
883    let path = log_dir.join("system.log");
884    let file = match OpenOptions::new().create(true).append(true).open(&path) {
885        Ok(f) => f,
886        Err(err) => {
887            operator_log::warn(
888                module_path!(),
889                format!("could not open system.log at {}: {err}", path.display()),
890            );
891            return None;
892        }
893    };
894    let (nb, guard) = tracing_appender::non_blocking(file);
895    let rust_log = std::env::var("RUST_LOG").unwrap_or_else(|_| "<unset>".to_string());
896    let filter = build_trace_filter();
897    let layer = tracing_subscriber::fmt::layer()
898        .with_writer(nb)
899        .with_ansi(false)
900        .with_target(true);
901    match tracing_subscriber::registry()
902        .with(filter)
903        .with(layer)
904        .try_init()
905    {
906        Ok(()) => {
907            operator_log::info(
908                module_path!(),
909                format!(
910                    "tracing subscriber writing to {} (RUST_LOG={rust_log})",
911                    path.display()
912                ),
913            );
914            tracing::info!(
915                target: "greentic_start",
916                rust_log = %rust_log,
917                "tracing subscriber installed"
918            );
919        }
920        Err(err) => {
921            operator_log::warn(
922                module_path!(),
923                format!(
924                    "tracing subscriber try_init failed (another subscriber already installed?): {err}"
925                ),
926            );
927            return None;
928        }
929    }
930    Some(guard)
931}
932
933/// Idempotently auto-create the `local` Environment on first `gtc start`.
934///
935/// Per A4 of `plans/next-gen-deployment.md`: every `gtc start`, `gtc up`, or
936/// `gtc restart` invocation guarantees a `local` Environment exists with the
937/// five default capability-slot bindings (deployer / secrets / telemetry /
938/// sessions / state) before any runner work runs. Subsequent calls find the
939/// env on disk and stay silent.
940fn bootstrap_local_environment() -> anyhow::Result<()> {
941    use greentic_deployer::cli::bootstrap::{LocalEnvOutcome, ensure_local_environment};
942    use greentic_deployer::environment::LocalFsStore;
943
944    let root = LocalFsStore::default_root()
945        .context("Cannot determine default environment store root (no home directory).")?;
946    let store = LocalFsStore::new(root.clone());
947    let (_env, outcome) = ensure_local_environment(&store, None)
948        .with_context(|| format!("Bootstrapping `local` environment at {}", root.display()))?;
949    if outcome == LocalEnvOutcome::Created {
950        operator_log::info(
951            module_path!(),
952            format!(
953                "bootstrapped `local` environment with default capability bindings at {}",
954                root.display()
955            ),
956        );
957    }
958    Ok(())
959}
960
961fn apply_nats_overrides(config: &mut config::DemoConfig, args: &StartRequest) {
962    let nats_mode = if args.no_nats {
963        NatsModeArg::Off
964    } else {
965        args.nats
966    };
967
968    if let Some(nats_url) = args.nats_url.as_ref() {
969        config.services.nats.url = nats_url.clone();
970    }
971
972    match nats_mode {
973        NatsModeArg::Off => {
974            config.services.nats.enabled = false;
975            config.services.nats.spawn.enabled = false;
976        }
977        NatsModeArg::On => {
978            config.services.nats.enabled = true;
979            config.services.nats.spawn.enabled = true;
980        }
981        NatsModeArg::External => {
982            config.services.nats.enabled = true;
983            config.services.nats.spawn.enabled = false;
984        }
985    }
986}
987
988fn resolve_state_dir(state_dir: Option<PathBuf>, bundle: Option<&str>) -> anyhow::Result<PathBuf> {
989    if let Some(state_dir) = state_dir {
990        return Ok(state_dir);
991    }
992    if let Some(bundle_ref) = bundle {
993        let resolved = bundle_ref::resolve_bundle_ref(bundle_ref)?;
994        return Ok(resolved.bundle_dir.join("state"));
995    }
996    Ok(PathBuf::from("state"))
997}
998
999/// Tunnel configuration loaded from `.greentic/tunnel.json`.
1000/// Written by `greentic-setup` when `platform_setup.tunnel` is present in
1001/// the setup answers document.
1002#[derive(serde::Deserialize)]
1003struct TunnelConfig {
1004    mode: Option<String>,
1005}
1006
1007fn load_tunnel_config(bundle_root: &std::path::Path) -> Option<TunnelConfig> {
1008    let path = bundle_root.join(".greentic").join("tunnel.json");
1009    let raw = std::fs::read_to_string(&path).ok()?;
1010    serde_json::from_str(&raw).ok()
1011}
1012
1013enum ShutdownReason {
1014    CtrlC,
1015    AdminStop,
1016}
1017
1018impl ShutdownReason {
1019    fn as_str(&self) -> &'static str {
1020        match self {
1021            Self::CtrlC => "ctrl_c",
1022            Self::AdminStop => "admin_stop",
1023        }
1024    }
1025}
1026
1027fn wait_for_shutdown(paths: &runtime_state::RuntimePaths) -> anyhow::Result<ShutdownReason> {
1028    let runtime =
1029        tokio::runtime::Runtime::new().context("failed to spawn runtime for Ctrl+C listener")?;
1030    let paths = paths.clone();
1031    runtime.block_on(async move {
1032        loop {
1033            tokio::select! {
1034                result = tokio::signal::ctrl_c() => {
1035                    result.map_err(|err| anyhow!("failed to wait for Ctrl+C: {err}"))?;
1036                    return Ok(ShutdownReason::CtrlC);
1037                }
1038                _ = tokio::time::sleep(std::time::Duration::from_millis(250)) => {
1039                    if runtime_state::read_stop_request(&paths)?.is_some() {
1040                        return Ok(ShutdownReason::AdminStop);
1041                    }
1042                }
1043            }
1044        }
1045    })
1046}
1047
1048#[cfg(test)]
1049pub(crate) fn test_env_lock() -> &'static std::sync::Mutex<()> {
1050    static LOCK: std::sync::OnceLock<std::sync::Mutex<()>> = std::sync::OnceLock::new();
1051    LOCK.get_or_init(|| std::sync::Mutex::new(()))
1052}
1053
1054#[cfg(test)]
1055mod tests {
1056    use super::*;
1057    use std::path::Path;
1058    use std::thread;
1059    use std::time::Duration;
1060
1061    #[test]
1062    fn build_trace_filter_clamps_noisy_targets_even_when_rust_log_unset() {
1063        let _guard = test_env_lock().lock().unwrap();
1064        // SAFETY: tests serialized via test_env_lock above.
1065        unsafe { std::env::remove_var("RUST_LOG") };
1066        let filter = build_trace_filter();
1067        let printed = filter.to_string();
1068        for target in NOISY_TRACE_TARGETS {
1069            assert!(
1070                printed.contains(&format!("{target}=warn")),
1071                "expected `{target}=warn` in filter, got: {printed}"
1072            );
1073        }
1074    }
1075
1076    #[test]
1077    fn build_trace_filter_clamps_noisy_targets_overriding_explicit_debug() {
1078        let _guard = test_env_lock().lock().unwrap();
1079        // SAFETY: tests serialized via test_env_lock above.
1080        unsafe { std::env::set_var("RUST_LOG", "wasmtime=debug,info") };
1081        let filter = build_trace_filter();
1082        let printed = filter.to_string();
1083        // The clamp directive appended after the user's directive should win
1084        // because EnvFilter resolves last-write-wins per target.
1085        assert!(
1086            printed.contains("wasmtime=warn"),
1087            "wasmtime clamp must override RUST_LOG override, got: {printed}"
1088        );
1089        // SAFETY: serialized.
1090        unsafe { std::env::remove_var("RUST_LOG") };
1091    }
1092
1093    #[test]
1094    fn apply_nats_overrides_disables_nats_for_flag() {
1095        let mut config = config::DemoConfig::default();
1096        let args = StartRequest {
1097            bundle: None,
1098            tenant: None,
1099            team: None,
1100            no_nats: false,
1101            nats: NatsModeArg::Off,
1102            nats_url: None,
1103            config: None,
1104            cloudflared: CloudflaredModeArg::Off,
1105            cloudflared_binary: None,
1106            ngrok: NgrokModeArg::Off,
1107            ngrok_binary: None,
1108            runner_binary: None,
1109            restart: Vec::new(),
1110            log_dir: None,
1111            verbose: false,
1112            quiet: false,
1113            no_browser: false,
1114            admin: false,
1115            admin_port: 9443,
1116            admin_certs_dir: None,
1117            admin_allowed_clients: Vec::new(),
1118            tunnel_explicit: true,
1119        };
1120        apply_nats_overrides(&mut config, &args);
1121        assert!(!config.services.nats.enabled);
1122        assert!(!config.services.nats.spawn.enabled);
1123    }
1124
1125    #[test]
1126    fn apply_nats_overrides_uses_external_url_without_spawn() {
1127        let mut config = config::DemoConfig::default();
1128        let args = StartRequest {
1129            bundle: None,
1130            tenant: None,
1131            team: None,
1132            no_nats: false,
1133            nats: NatsModeArg::External,
1134            nats_url: Some("nats://127.0.0.1:5555".into()),
1135            config: None,
1136            cloudflared: CloudflaredModeArg::Off,
1137            cloudflared_binary: None,
1138            ngrok: NgrokModeArg::Off,
1139            ngrok_binary: None,
1140            runner_binary: None,
1141            restart: Vec::new(),
1142            log_dir: None,
1143            verbose: false,
1144            quiet: false,
1145            no_browser: false,
1146            admin: false,
1147            admin_port: 9443,
1148            admin_certs_dir: None,
1149            admin_allowed_clients: Vec::new(),
1150            tunnel_explicit: true,
1151        };
1152        apply_nats_overrides(&mut config, &args);
1153        assert!(config.services.nats.enabled);
1154        assert!(!config.services.nats.spawn.enabled);
1155        assert_eq!(config.services.nats.url, "nats://127.0.0.1:5555");
1156    }
1157
1158    #[test]
1159    fn resolve_state_dir_uses_bundle_state_when_requested() {
1160        let temp = tempfile::tempdir().expect("tempdir");
1161        let bundle = temp.path();
1162        let state_dir =
1163            resolve_state_dir(None, Some(bundle.to_string_lossy().as_ref())).expect("state dir");
1164        assert_eq!(state_dir, bundle.join("state"));
1165    }
1166
1167    fn make_start_request(bundle: &Path) -> StartRequest {
1168        StartRequest {
1169            bundle: Some(bundle.display().to_string()),
1170            tenant: None,
1171            team: None,
1172            no_nats: false,
1173            nats: NatsModeArg::Off,
1174            nats_url: None,
1175            config: None,
1176            cloudflared: CloudflaredModeArg::Off,
1177            cloudflared_binary: None,
1178            ngrok: NgrokModeArg::Off,
1179            ngrok_binary: None,
1180            runner_binary: None,
1181            restart: Vec::new(),
1182            log_dir: None,
1183            verbose: false,
1184            quiet: false,
1185            no_browser: false,
1186            admin: false,
1187            admin_port: 9443,
1188            admin_certs_dir: None,
1189            admin_allowed_clients: Vec::new(),
1190            tunnel_explicit: true,
1191        }
1192    }
1193
1194    fn write_demo_bundle(bundle: &Path) {
1195        std::fs::create_dir_all(bundle).expect("bundle dir");
1196        std::fs::write(
1197            bundle.join("greentic.demo.yaml"),
1198            "tenant: demo\nteam: default\n",
1199        )
1200        .expect("write demo config");
1201    }
1202
1203    /// RAII guard that points `$HOME` at the given tempdir for the lifetime of
1204    /// the returned value, restoring the previous value on drop. Used to keep
1205    /// `bootstrap_local_environment` (and any other HOME-rooted state) from
1206    /// writing into the host's real `~/.greentic` during tests.
1207    struct HomeOverride {
1208        prev: Option<std::ffi::OsString>,
1209    }
1210
1211    impl HomeOverride {
1212        fn set(home: &Path) -> Self {
1213            let prev = std::env::var_os("HOME");
1214            // SAFETY: tests holding `test_env_lock` serialize env mutations.
1215            unsafe {
1216                std::env::set_var("HOME", home);
1217            }
1218            Self { prev }
1219        }
1220    }
1221
1222    impl Drop for HomeOverride {
1223        fn drop(&mut self) {
1224            // SAFETY: tests holding `test_env_lock` serialize env mutations.
1225            unsafe {
1226                match self.prev.take() {
1227                    Some(v) => std::env::set_var("HOME", v),
1228                    None => std::env::remove_var("HOME"),
1229                }
1230            }
1231        }
1232    }
1233
1234    fn request_runtime_stop(bundle: &Path) -> thread::JoinHandle<()> {
1235        let runtime_paths =
1236            runtime_state::RuntimePaths::new(bundle.join("state"), "demo", "default");
1237        thread::spawn(move || {
1238            thread::sleep(Duration::from_millis(350));
1239            runtime_state::write_stop_request(
1240                &runtime_paths,
1241                &runtime_state::StopRequest {
1242                    requested_by: "test".to_string(),
1243                    reason: Some("coverage".to_string()),
1244                },
1245            )
1246            .expect("write stop request");
1247        })
1248    }
1249
1250    #[test]
1251    fn run_start_request_embedded_mode_stops_cleanly() {
1252        let _env_guard = crate::test_env_lock()
1253            .lock()
1254            .unwrap_or_else(|err| err.into_inner());
1255        crate::operator_log::reset_for_tests();
1256        let temp = tempfile::tempdir().expect("tempdir");
1257        let _home = HomeOverride::set(temp.path());
1258        let bundle = temp.path().join("bundle");
1259        write_demo_bundle(&bundle);
1260        let stop_thread = request_runtime_stop(&bundle);
1261
1262        let request = make_start_request(&bundle);
1263        run_start_request(request).expect("start request");
1264        stop_thread.join().expect("join stop thread");
1265
1266        let paths = runtime_state::RuntimePaths::new(bundle.join("state"), "demo", "default");
1267        assert!(paths.service_manifest_path().exists());
1268        assert!(
1269            runtime_state::read_stop_request(&paths)
1270                .expect("read stop")
1271                .is_none()
1272        );
1273    }
1274
1275    #[test]
1276    fn run_restart_request_embedded_mode_stops_cleanly() {
1277        let _env_guard = crate::test_env_lock()
1278            .lock()
1279            .unwrap_or_else(|err| err.into_inner());
1280        crate::operator_log::reset_for_tests();
1281        let temp = tempfile::tempdir().expect("tempdir");
1282        let _home = HomeOverride::set(temp.path());
1283        let bundle = temp.path().join("bundle");
1284        write_demo_bundle(&bundle);
1285        let stop_thread = request_runtime_stop(&bundle);
1286
1287        let mut request = make_start_request(&bundle);
1288        request.verbose = true;
1289        run_restart_request(request).expect("restart request");
1290        stop_thread.join().expect("join stop thread");
1291
1292        let paths = runtime_state::RuntimePaths::new(bundle.join("state"), "demo", "default");
1293        assert!(paths.service_manifest_path().exists());
1294        assert!(
1295            runtime_state::read_stop_request(&paths)
1296                .expect("read stop")
1297                .is_none()
1298        );
1299    }
1300
1301    #[test]
1302    fn run_start_request_quiet_mode_returns_bundle_errors() {
1303        let _env_guard = crate::test_env_lock()
1304            .lock()
1305            .unwrap_or_else(|err| err.into_inner());
1306        crate::operator_log::reset_for_tests();
1307        let temp = tempfile::tempdir().expect("tempdir");
1308        let _home = HomeOverride::set(temp.path());
1309        let missing_bundle = temp.path().join("missing-bundle");
1310        let mut request = make_start_request(&missing_bundle);
1311        request.quiet = true;
1312
1313        let err = run_start_request(request).expect_err("missing bundle should error");
1314        let message = err.to_string();
1315        assert!(
1316            message.contains("bundle config not found")
1317                || message.contains("bundle path does not exist")
1318                || message.contains("unsupported bundle reference"),
1319            "unexpected error: {message}"
1320        );
1321    }
1322
1323    // The no-bundle runtime-config boot path (B0 load → B2 activation → fail
1324    // loud until B3 serving) is covered by the fully-isolated
1325    // `revision_boot::tests::activate_*` unit tests, which exercise
1326    // `activate_runtime_config` directly against an explicit store root. A
1327    // `run_start`-level test is deliberately omitted here: it must override
1328    // `HOME`/env while activation runs, which reliably trips a pre-existing
1329    // isolation gap in the lock-free `messaging_app` secrets tests (they read
1330    // env-derived paths without `test_env_lock`).
1331
1332    #[test]
1333    fn auto_enables_cloudflared_when_no_deployer_packs() {
1334        let dir = tempfile::tempdir().expect("tempdir");
1335        // Empty bundle dir → no deployer packs
1336        std::fs::create_dir_all(dir.path().join("packs")).expect("packs dir");
1337        let candidates =
1338            greentic_setup::deployment_targets::discover_deployer_pack_candidates(dir.path())
1339                .unwrap_or_default();
1340        assert!(
1341            candidates.is_empty(),
1342            "empty bundle should have no deployer"
1343        );
1344    }
1345
1346    #[test]
1347    fn detects_deployer_pack_when_present() {
1348        let dir = tempfile::tempdir().expect("tempdir");
1349        let deployer_dir = dir.path().join("providers").join("deployer");
1350        std::fs::create_dir_all(&deployer_dir).expect("deployer dir");
1351        std::fs::write(deployer_dir.join("terraform.gtpack"), b"fake").expect("write pack");
1352        let candidates =
1353            greentic_setup::deployment_targets::discover_deployer_pack_candidates(dir.path())
1354                .unwrap_or_default();
1355        assert!(
1356            !candidates.is_empty(),
1357            "bundle with terraform.gtpack should detect deployer"
1358        );
1359    }
1360
1361    #[test]
1362    fn bootstrap_creates_local_env_under_default_root() {
1363        let _env_guard = crate::test_env_lock()
1364            .lock()
1365            .unwrap_or_else(|err| err.into_inner());
1366        let temp = tempfile::tempdir().expect("tempdir");
1367        let _home = HomeOverride::set(temp.path());
1368        super::bootstrap_local_environment().expect("first bootstrap");
1369        let env_file = temp
1370            .path()
1371            .join(".greentic")
1372            .join("environments")
1373            .join("local")
1374            .join("environment.json");
1375        assert!(env_file.exists(), "expected env file at {env_file:?}");
1376    }
1377
1378    #[test]
1379    fn bootstrap_is_idempotent_across_calls() {
1380        let _env_guard = crate::test_env_lock()
1381            .lock()
1382            .unwrap_or_else(|err| err.into_inner());
1383        let temp = tempfile::tempdir().expect("tempdir");
1384        let _home = HomeOverride::set(temp.path());
1385        super::bootstrap_local_environment().expect("first bootstrap");
1386        super::bootstrap_local_environment().expect("second bootstrap");
1387        let env_file = temp
1388            .path()
1389            .join(".greentic")
1390            .join("environments")
1391            .join("local")
1392            .join("environment.json");
1393        assert!(env_file.exists());
1394    }
1395
1396    // ---- A4b compat-alias tests ------------------------------------------
1397    //
1398    // `GREENTIC_ENV` and `GREENTIC_DISABLE_DEV_ALIAS` are process-global;
1399    // serialize via the shared `test_env_lock`. Each test snapshots and
1400    // restores both vars + the warning latch so neighbors stay clean.
1401
1402    struct EnvVarsOverride {
1403        prev_env: Option<std::ffi::OsString>,
1404        prev_disable: Option<std::ffi::OsString>,
1405    }
1406
1407    impl EnvVarsOverride {
1408        fn clean() -> Self {
1409            let prev_env = std::env::var_os("GREENTIC_ENV");
1410            let prev_disable = std::env::var_os(DISABLE_ALIAS_ENV_VAR);
1411            // SAFETY: tests holding `test_env_lock` serialize env mutations.
1412            unsafe {
1413                std::env::remove_var("GREENTIC_ENV");
1414                std::env::remove_var(DISABLE_ALIAS_ENV_VAR);
1415            }
1416            super::compat_alias::reset_warning_latch_for_tests();
1417            Self {
1418                prev_env,
1419                prev_disable,
1420            }
1421        }
1422    }
1423
1424    impl Drop for EnvVarsOverride {
1425        fn drop(&mut self) {
1426            // SAFETY: tests holding `test_env_lock` serialize env mutations.
1427            unsafe {
1428                match self.prev_env.take() {
1429                    Some(v) => std::env::set_var("GREENTIC_ENV", v),
1430                    None => std::env::remove_var("GREENTIC_ENV"),
1431                }
1432                match self.prev_disable.take() {
1433                    Some(v) => std::env::set_var(DISABLE_ALIAS_ENV_VAR, v),
1434                    None => std::env::remove_var(DISABLE_ALIAS_ENV_VAR),
1435                }
1436            }
1437        }
1438    }
1439
1440    fn set_env_var(key: &str, value: &str) {
1441        // SAFETY: tests holding `test_env_lock` serialize env mutations.
1442        unsafe {
1443            std::env::set_var(key, value);
1444        }
1445    }
1446
1447    #[test]
1448    fn resolve_env_returns_local_by_default() {
1449        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1450        let _env = EnvVarsOverride::clean();
1451        assert_eq!(resolve_env(None), "local");
1452    }
1453
1454    #[test]
1455    fn resolve_env_passes_through_non_legacy_override() {
1456        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1457        let _env = EnvVarsOverride::clean();
1458        assert_eq!(resolve_env(Some("staging")), "staging");
1459        assert_eq!(resolve_env(Some("prod")), "prod");
1460        assert_eq!(resolve_env(Some("local")), "local");
1461    }
1462
1463    #[test]
1464    fn resolve_env_remaps_dev_override_to_local() {
1465        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1466        let _env = EnvVarsOverride::clean();
1467        assert_eq!(resolve_env(Some("dev")), "local");
1468    }
1469
1470    #[test]
1471    fn resolve_env_remaps_dev_env_var_to_local() {
1472        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1473        let _env = EnvVarsOverride::clean();
1474        set_env_var("GREENTIC_ENV", "dev");
1475        assert_eq!(resolve_env(None), "local");
1476    }
1477
1478    #[test]
1479    fn alias_warning_latches_once_until_reset() {
1480        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1481        let _env = EnvVarsOverride::clean();
1482        // First two calls remap; only the first fires warn. We can't count
1483        // tracing events without wiring a subscriber, so we exercise the
1484        // latch state by re-resetting and re-calling.
1485        assert_eq!(compat_alias::apply_dev_alias("dev"), "local");
1486        assert_eq!(compat_alias::apply_dev_alias("dev"), "local");
1487        compat_alias::reset_warning_latch_for_tests();
1488        assert_eq!(compat_alias::apply_dev_alias("dev"), "local");
1489    }
1490
1491    #[test]
1492    fn disable_alias_env_var_panics_on_dev() {
1493        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1494        let _env = EnvVarsOverride::clean();
1495        set_env_var(DISABLE_ALIAS_ENV_VAR, "1");
1496        let result = std::panic::catch_unwind(|| resolve_env(Some("dev")));
1497        assert!(
1498            result.is_err(),
1499            "resolve_env should panic when alias is disabled and input is `dev`"
1500        );
1501    }
1502
1503    #[test]
1504    fn disable_alias_accepts_truthy_strings() {
1505        for value in ["1", "true", "TRUE", "yes", "YES", "on", " true "] {
1506            let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1507            let _env = EnvVarsOverride::clean();
1508            set_env_var(DISABLE_ALIAS_ENV_VAR, value);
1509            let result = std::panic::catch_unwind(|| resolve_env(Some("dev")));
1510            assert!(
1511                result.is_err(),
1512                "DISABLE value `{value}` should hard-fail on dev resolution"
1513            );
1514        }
1515    }
1516
1517    #[test]
1518    fn disable_alias_does_not_panic_on_non_legacy_values() {
1519        let _guard = test_env_lock().lock().unwrap_or_else(|e| e.into_inner());
1520        let _env = EnvVarsOverride::clean();
1521        set_env_var(DISABLE_ALIAS_ENV_VAR, "1");
1522        // Non-legacy values pass through unaffected even when the alias is
1523        // disabled — the gate only fires on `dev`.
1524        assert_eq!(resolve_env(Some("local")), "local");
1525        assert_eq!(resolve_env(Some("staging")), "staging");
1526        assert_eq!(resolve_env(None), "local");
1527    }
1528}