Skip to main content

forge/
runtime.rs

1//! FORGE - The Rust Full-Stack Framework
2//!
3//! Single binary runtime that provides:
4//! - HTTP Gateway with RPC endpoints
5//! - SSE server for real-time subscriptions
6//! - Background job workers
7//! - Cron scheduler
8//! - Workflow engine
9//! - Cluster coordination
10
11use std::future::Future;
12use std::net::IpAddr;
13use std::path::PathBuf;
14use std::pin::Pin;
15use std::sync::Arc;
16use std::time::Duration;
17
18use axum::Router;
19use axum::body::Body;
20use axum::http::Request;
21use axum::response::Response;
22use tokio::sync::broadcast;
23
24use forge_core::CircuitBreakerClient;
25use forge_core::cluster::{LeaderRole, NodeId, NodeInfo, NodeRole, NodeStatus};
26use forge_core::config::{ForgeConfig, NodeRole as ConfigNodeRole};
27use forge_core::error::{ForgeError, Result};
28use forge_core::function::{ForgeMutation, ForgeQuery};
29use forge_core::mcp::ForgeMcpTool;
30use forge_runtime::migrations::{Migration, MigrationRunner, load_migrations_from_dir};
31
32use forge_runtime::cluster::{
33    GracefulShutdown, HeartbeatConfig, HeartbeatLoop, LeaderConfig, LeaderElection, NodeRegistry,
34    ShutdownConfig,
35};
36use forge_runtime::cron::{CronRegistry, CronRunner, CronRunnerConfig};
37use forge_runtime::daemon::{DaemonRegistry, DaemonRunner};
38use forge_runtime::db::Database;
39use forge_runtime::function::FunctionRegistry;
40use forge_runtime::gateway::{AuthConfig, GatewayConfig as RuntimeGatewayConfig, GatewayServer};
41use forge_runtime::jobs::{JobDispatcher, JobQueue, JobRegistry, Worker, WorkerConfig};
42use forge_runtime::mcp::McpToolRegistry;
43use forge_runtime::webhook::{WebhookRegistry, WebhookState, webhook_handler};
44use forge_runtime::workflow::{
45    EventStore, WorkflowExecutor, WorkflowRegistry, WorkflowScheduler, WorkflowSchedulerConfig,
46};
47use tokio_util::sync::CancellationToken;
48
49/// Type alias for frontend handler function.
50pub type FrontendHandler = fn(Request<Body>) -> Pin<Box<dyn Future<Output = Response> + Send>>;
51
52/// Prelude module for common imports.
53pub mod prelude {
54    // Common types
55    pub use chrono::{DateTime, Utc};
56    pub use uuid::Uuid;
57
58    // Serde re-exports for user code
59    pub use serde::{Deserialize, Serialize};
60    pub use serde_json;
61
62    /// Timestamp type alias for convenience.
63    pub type Timestamp = DateTime<Utc>;
64
65    // Core types
66    pub use forge_core::auth::TokenPair;
67    pub use forge_core::cluster::NodeRole;
68    pub use forge_core::config::ForgeConfig;
69    pub use forge_core::cron::{CronContext, ForgeCron};
70    pub use forge_core::daemon::{DaemonContext, ForgeDaemon};
71    pub use forge_core::env::EnvAccess;
72    pub use forge_core::error::{ForgeError, Result};
73    pub use forge_core::function::{
74        AuthContext, ForgeMutation, ForgeQuery, MutationContext, QueryContext,
75    };
76    pub use forge_core::job::{ForgeJob, JobContext, JobPriority};
77    pub use forge_core::mcp::{ForgeMcpTool, McpToolContext, McpToolResult};
78    pub use forge_core::realtime::Delta;
79    pub use forge_core::schema::{FieldDef, ModelMeta, SchemaRegistry, TableDef};
80    pub use forge_core::schemars::JsonSchema;
81    pub use forge_core::types::Upload;
82    pub use forge_core::webhook::{ForgeWebhook, WebhookContext, WebhookResult, WebhookSignature};
83    pub use forge_core::workflow::{ForgeWorkflow, WorkflowContext};
84
85    // Same axum version the runtime uses, avoids type mismatches in custom handlers
86    pub use axum;
87
88    pub use crate::{Forge, ForgeBuilder};
89}
90
91/// The main FORGE runtime.
92pub struct Forge {
93    config: ForgeConfig,
94    db: Option<Database>,
95    node_id: NodeId,
96    function_registry: FunctionRegistry,
97    mcp_registry: McpToolRegistry,
98    job_registry: JobRegistry,
99    cron_registry: Arc<CronRegistry>,
100    workflow_registry: WorkflowRegistry,
101    daemon_registry: Arc<DaemonRegistry>,
102    webhook_registry: Arc<WebhookRegistry>,
103    shutdown_tx: broadcast::Sender<()>,
104    /// Path to user migrations directory (default: ./migrations).
105    migrations_dir: PathBuf,
106    /// Additional migrations provided programmatically.
107    extra_migrations: Vec<Migration>,
108    /// Optional frontend handler for embedded SPA.
109    frontend_handler: Option<FrontendHandler>,
110    /// Custom axum routes merged into the top-level router.
111    custom_routes: Option<Router>,
112}
113
114impl Forge {
115    /// Create a new builder for configuring FORGE.
116    pub fn builder() -> ForgeBuilder {
117        ForgeBuilder::new()
118    }
119
120    /// Get the node ID.
121    pub fn node_id(&self) -> NodeId {
122        self.node_id
123    }
124
125    /// Get the configuration.
126    pub fn config(&self) -> &ForgeConfig {
127        &self.config
128    }
129
130    /// Get the function registry.
131    pub fn function_registry(&self) -> &FunctionRegistry {
132        &self.function_registry
133    }
134
135    /// Get the function registry mutably.
136    pub fn function_registry_mut(&mut self) -> &mut FunctionRegistry {
137        &mut self.function_registry
138    }
139
140    /// Get the MCP tool registry mutably.
141    pub fn mcp_registry_mut(&mut self) -> &mut McpToolRegistry {
142        &mut self.mcp_registry
143    }
144
145    /// Register an MCP tool without manually accessing the registry.
146    pub fn register_mcp_tool<T: ForgeMcpTool>(&mut self) -> &mut Self {
147        self.mcp_registry.register::<T>();
148        self
149    }
150
151    /// Get the job registry.
152    pub fn job_registry(&self) -> &JobRegistry {
153        &self.job_registry
154    }
155
156    /// Get the job registry mutably.
157    pub fn job_registry_mut(&mut self) -> &mut JobRegistry {
158        &mut self.job_registry
159    }
160
161    /// Get the cron registry.
162    pub fn cron_registry(&self) -> Arc<CronRegistry> {
163        self.cron_registry.clone()
164    }
165
166    /// Get the workflow registry.
167    pub fn workflow_registry(&self) -> &WorkflowRegistry {
168        &self.workflow_registry
169    }
170
171    /// Get the workflow registry mutably.
172    pub fn workflow_registry_mut(&mut self) -> &mut WorkflowRegistry {
173        &mut self.workflow_registry
174    }
175
176    /// Get the daemon registry.
177    pub fn daemon_registry(&self) -> Arc<DaemonRegistry> {
178        self.daemon_registry.clone()
179    }
180
181    /// Get the webhook registry.
182    pub fn webhook_registry(&self) -> Arc<WebhookRegistry> {
183        self.webhook_registry.clone()
184    }
185
186    /// Persist all registered workflow definitions to the database.
187    /// Fails startup if a definition's signature conflicts with a previously
188    /// registered one under the same name+version.
189    async fn persist_workflow_definitions(&self, pool: &sqlx::PgPool) -> Result<()> {
190        for info in self.workflow_registry.definitions() {
191            let status = if info.is_active {
192                "active"
193            } else if info.is_deprecated {
194                "deprecated"
195            } else {
196                "active"
197            };
198
199            // Try to insert. If row exists, check signature matches.
200            let existing = sqlx::query!(
201                r#"
202                SELECT workflow_signature FROM forge_workflow_definitions
203                WHERE workflow_name = $1 AND workflow_version = $2
204                "#,
205                info.name,
206                info.version,
207            )
208            .fetch_optional(pool)
209            .await
210            .map_err(|e| ForgeError::Database(e.to_string()))?;
211
212            if let Some(row) = existing {
213                if row.workflow_signature != info.signature {
214                    return Err(ForgeError::Config(format!(
215                        "Workflow '{}' version '{}' has a different signature than previously registered. \
216                         Persisted contract changed under the same version. \
217                         Expected signature: {}, got: {}. \
218                         Create a new version instead of modifying the existing one.",
219                        info.name, info.version, row.workflow_signature, info.signature
220                    )));
221                }
222                // Update status if changed
223                sqlx::query!(
224                    "UPDATE forge_workflow_definitions SET status = $3 WHERE workflow_name = $1 AND workflow_version = $2",
225                    info.name,
226                    info.version,
227                    status,
228                )
229                .execute(pool)
230                .await
231                .map_err(|e| ForgeError::Database(e.to_string()))?;
232            } else {
233                sqlx::query!(
234                    r#"
235                    INSERT INTO forge_workflow_definitions (workflow_name, workflow_version, workflow_signature, status)
236                    VALUES ($1, $2, $3, $4)
237                    "#,
238                    info.name,
239                    info.version,
240                    info.signature,
241                    status,
242                )
243                .execute(pool)
244                .await
245                .map_err(|e| ForgeError::Database(e.to_string()))?;
246            }
247
248            tracing::debug!(
249                workflow = info.name,
250                version = info.version,
251                signature = info.signature,
252                status = status,
253                "Workflow definition registered"
254            );
255        }
256
257        Ok(())
258    }
259
260    /// Run the FORGE server.
261    pub async fn run(mut self) -> Result<()> {
262        // Apply FORGE_OTEL_* environment variable overrides before initializing
263        self.config.observability.apply_env_overrides();
264
265        // Users shouldn't need tracing_subscriber boilerplate to see logs
266        let telemetry_config = forge_runtime::TelemetryConfig::from_observability_config(
267            &self.config.observability,
268            &self.config.project.name,
269            &self.config.project.version,
270        );
271        match forge_runtime::init_telemetry(
272            &telemetry_config,
273            &self.config.project.name,
274            &self.config.observability.log_level,
275        ) {
276            Ok(true) => {}
277            Ok(false) => {
278                // Subscriber already exists, user set one up manually
279            }
280            Err(e) => {
281                eprintln!("forge: failed to initialize telemetry: {e}");
282            }
283        }
284
285        tracing::debug!("Connecting to database");
286
287        // Connect to database
288        let db =
289            Database::from_config_with_service(&self.config.database, &self.config.project.name)
290                .await?;
291        let pool = db.primary().clone();
292        let jobs_pool = db.jobs_pool().clone();
293        let observability_pool = db.observability_pool().clone();
294        if let Some(handle) = db.start_health_monitor() {
295            let mut shutdown_rx = self.shutdown_tx.subscribe();
296            tokio::spawn(async move {
297                tokio::select! {
298                    _ = shutdown_rx.recv() => {}
299                    _ = handle => {}
300                }
301            });
302        }
303        self.db = Some(db);
304
305        tracing::debug!("Database connected");
306
307        // Run migrations with mesh-safe locking
308        // This acquires an advisory lock, so only one node runs migrations at a time
309        let runner = MigrationRunner::new(pool.clone());
310
311        // Load user migrations from directory + any programmatic ones
312        let mut user_migrations = load_migrations_from_dir(&self.migrations_dir)?;
313        user_migrations.extend(self.extra_migrations.clone());
314
315        runner.run(user_migrations).await?;
316        tracing::debug!("Migrations applied");
317
318        // Persist workflow definitions and validate signatures
319        if !self.workflow_registry.is_empty() {
320            self.persist_workflow_definitions(&pool).await?;
321        }
322
323        // Get local node info
324        let hostname = get_hostname();
325
326        // Support HOST env var (default 0.0.0.0), PORT env var (overrides config)
327        let ip_address: IpAddr = std::env::var("HOST")
328            .unwrap_or_else(|_| "0.0.0.0".to_string())
329            .parse()
330            .unwrap_or_else(|_| "0.0.0.0".parse().expect("valid IP literal"));
331
332        if let Ok(port_str) = std::env::var("PORT")
333            && let Ok(port) = port_str.parse::<u16>()
334        {
335            self.config.gateway.port = port;
336        }
337
338        let roles: Vec<NodeRole> = self
339            .config
340            .node
341            .roles
342            .iter()
343            .map(config_role_to_node_role)
344            .collect();
345
346        let node_info = NodeInfo::new_local(
347            hostname,
348            ip_address,
349            self.config.gateway.port,
350            self.config.gateway.grpc_port,
351            roles.clone(),
352            self.config.node.worker_capabilities.clone(),
353            env!("CARGO_PKG_VERSION").to_string(),
354        );
355
356        let node_id = node_info.id;
357        self.node_id = node_id;
358
359        // Create node registry
360        let node_registry = Arc::new(NodeRegistry::new(pool.clone(), node_info));
361
362        // Register node in cluster
363        if let Err(e) = node_registry.register().await {
364            tracing::debug!("Failed to register node (tables may not exist): {}", e);
365        }
366
367        // Set node status to active
368        if let Err(e) = node_registry.set_status(NodeStatus::Active).await {
369            tracing::debug!("Failed to set node status: {}", e);
370        }
371
372        // Create leader election for scheduler role
373        let leader_election = if roles.contains(&NodeRole::Scheduler) {
374            let election = Arc::new(LeaderElection::new(
375                pool.clone(),
376                node_id,
377                LeaderRole::Scheduler,
378                LeaderConfig::default(),
379            ));
380
381            // Try to become leader
382            if let Err(e) = election.try_become_leader().await {
383                tracing::debug!("Failed to acquire leadership: {}", e);
384            }
385
386            Some(election)
387        } else {
388            None
389        };
390
391        // Create graceful shutdown coordinator
392        let shutdown = Arc::new(GracefulShutdown::new(
393            node_registry.clone(),
394            leader_election.clone(),
395            ShutdownConfig::default(),
396        ));
397
398        // Create HTTP client with circuit breaker for actions and crons
399        let http_client = CircuitBreakerClient::with_defaults(reqwest::Client::new());
400
401        // Start background tasks based on roles
402        let mut handles = Vec::new();
403
404        // Start heartbeat loop
405        {
406            let heartbeat_pool = pool.clone();
407            let heartbeat_node_id = node_id;
408            let config = HeartbeatConfig::from_cluster_config(&self.config.cluster);
409            handles.push(tokio::spawn(async move {
410                let heartbeat = HeartbeatLoop::new(heartbeat_pool, heartbeat_node_id, config);
411                heartbeat.run().await;
412            }));
413        }
414
415        // Start leader election loop if scheduler role
416        if let Some(ref election) = leader_election {
417            let election = election.clone();
418            handles.push(tokio::spawn(async move {
419                election.run().await;
420            }));
421        }
422
423        // Start job worker if worker role
424        if roles.contains(&NodeRole::Worker) {
425            let job_queue = JobQueue::new(jobs_pool.clone());
426            let worker_config = WorkerConfig {
427                id: Some(node_id.as_uuid()),
428                capabilities: self.config.node.worker_capabilities.clone(),
429                max_concurrent: self.config.worker.max_concurrent_jobs,
430                poll_interval: Duration::from_millis(self.config.worker.poll_interval_ms),
431                ..Default::default()
432            };
433
434            let mut worker = Worker::new(
435                worker_config,
436                job_queue,
437                self.job_registry.clone(),
438                jobs_pool.clone(),
439            );
440
441            handles.push(tokio::spawn(async move {
442                if let Err(e) = worker.run().await {
443                    tracing::error!("Worker error: {}", e);
444                }
445            }));
446
447            tracing::debug!("Job worker started");
448        }
449
450        // Start cron runner if scheduler role and is leader
451        if roles.contains(&NodeRole::Scheduler) {
452            let cron_registry = self.cron_registry.clone();
453            let cron_pool = jobs_pool.clone();
454            let cron_http = http_client.clone();
455            let cron_leader_election = leader_election.clone();
456
457            let cron_config = CronRunnerConfig {
458                poll_interval: Duration::from_secs(1),
459                node_id: node_id.as_uuid(),
460                is_leader: cron_leader_election.is_none(),
461                leader_election: cron_leader_election,
462                run_stale_threshold: Duration::from_secs(15 * 60),
463            };
464
465            let cron_runner = CronRunner::new(cron_registry, cron_pool, cron_http, cron_config);
466
467            handles.push(tokio::spawn(async move {
468                if let Err(e) = cron_runner.run().await {
469                    tracing::error!("Cron runner error: {}", e);
470                }
471            }));
472
473            tracing::debug!("Cron scheduler started");
474        }
475
476        // Start workflow scheduler if scheduler role
477        let workflow_shutdown_token = CancellationToken::new();
478        if roles.contains(&NodeRole::Scheduler) {
479            let scheduler_executor = Arc::new(WorkflowExecutor::new(
480                Arc::new(self.workflow_registry.clone()),
481                jobs_pool.clone(),
482                http_client.clone(),
483            ));
484            let event_store = Arc::new(EventStore::new(jobs_pool.clone()));
485            let scheduler = WorkflowScheduler::new(
486                jobs_pool.clone(),
487                scheduler_executor,
488                event_store,
489                WorkflowSchedulerConfig::default(),
490            );
491
492            let shutdown_token = workflow_shutdown_token.clone();
493            handles.push(tokio::spawn(async move {
494                scheduler.run(shutdown_token).await;
495            }));
496
497            tracing::debug!("Workflow scheduler started");
498        }
499
500        // Create job dispatcher and workflow executor for dispatch capabilities
501        let job_queue_for_dispatch = JobQueue::new(jobs_pool.clone());
502        let job_dispatcher = Arc::new(JobDispatcher::new(
503            job_queue_for_dispatch,
504            self.job_registry.clone(),
505        ));
506        let workflow_executor = Arc::new(WorkflowExecutor::new(
507            Arc::new(self.workflow_registry.clone()),
508            jobs_pool.clone(),
509            http_client.clone(),
510        ));
511
512        // Start daemon runner if scheduler role (daemons run as singletons)
513        if roles.contains(&NodeRole::Scheduler) && !self.daemon_registry.is_empty() {
514            let daemon_registry = self.daemon_registry.clone();
515            let daemon_pool = jobs_pool.clone();
516            let daemon_http = http_client.clone();
517            let daemon_shutdown_rx = self.shutdown_tx.subscribe();
518
519            let daemon_runner = DaemonRunner::new(
520                daemon_registry,
521                daemon_pool,
522                daemon_http,
523                node_id.as_uuid(),
524                daemon_shutdown_rx,
525            )
526            .with_job_dispatch(job_dispatcher.clone())
527            .with_workflow_dispatch(workflow_executor.clone());
528
529            handles.push(tokio::spawn(async move {
530                if let Err(e) = daemon_runner.run().await {
531                    tracing::error!("Daemon runner error: {}", e);
532                }
533            }));
534
535            tracing::debug!("Daemon runner started");
536        }
537
538        // Reactor handle for shutdown
539        let mut reactor_handle = None;
540
541        // Start HTTP gateway if gateway role
542        if roles.contains(&NodeRole::Gateway) {
543            let gateway_config = RuntimeGatewayConfig {
544                port: self.config.gateway.port,
545                max_connections: self.config.gateway.max_connections,
546                sse_max_sessions: self.config.gateway.sse_max_sessions,
547                request_timeout_secs: self.config.gateway.request_timeout_secs,
548                cors_enabled: self.config.gateway.cors_enabled
549                    || !self.config.gateway.cors_origins.is_empty(),
550                cors_origins: self.config.gateway.cors_origins.clone(),
551                auth: AuthConfig::from_forge_config(&self.config.auth)
552                    .map_err(|e| ForgeError::Config(e.to_string()))?,
553                mcp: self.config.mcp.clone(),
554                quiet_routes: self.config.gateway.quiet_routes.clone(),
555                max_body_size_bytes: self.config.gateway.max_body_size_bytes()?,
556                token_ttl: forge_core::AuthTokenTtl {
557                    access_token_secs: self.config.auth.access_token_ttl_secs(),
558                    refresh_token_days: self.config.auth.refresh_token_ttl_days(),
559                },
560                project_name: self.config.project.name.clone(),
561            };
562
563            // Build gateway server (pass Database wrapper for read replica routing)
564            let db_ref = self
565                .db
566                .clone()
567                .ok_or_else(|| ForgeError::Internal("Database not initialized".into()))?;
568
569            let mut gateway = GatewayServer::new(
570                gateway_config,
571                self.function_registry.clone(),
572                db_ref.clone(),
573            )
574            .with_job_dispatcher(job_dispatcher.clone())
575            .with_workflow_dispatcher(workflow_executor.clone())
576            .with_mcp_registry(self.mcp_registry.clone());
577
578            // Wire signals (product analytics + diagnostics)
579            if self.config.signals.enabled {
580                let signals_pool = std::sync::Arc::new(db_ref.analytics_pool().clone());
581                let collector = forge_runtime::signals::SignalsCollector::spawn(
582                    signals_pool.clone(),
583                    self.config.signals.batch_size,
584                    std::time::Duration::from_millis(self.config.signals.flush_interval_ms),
585                );
586                gateway = gateway.with_signals_collector(collector);
587
588                // Spawn session reaper
589                forge_runtime::signals::session::spawn_session_reaper(
590                    signals_pool,
591                    self.config.signals.session_timeout_mins,
592                );
593
594                tracing::info!("Signals enabled (analytics + diagnostics)");
595            }
596
597            // Start the reactor for real-time updates
598            let reactor = gateway.reactor();
599            if let Err(e) = reactor.start().await {
600                tracing::error!("Failed to start reactor: {}", e);
601            } else {
602                tracing::debug!("Reactor started");
603                reactor_handle = Some(reactor);
604            }
605
606            // Build API router (all under /_api)
607            let api_router = gateway.router();
608
609            // Build final router with API
610            let mut router = Router::new().nest("/_api", api_router);
611
612            // Mount webhook routes under /_api (bypasses gateway auth middleware)
613            if !self.webhook_registry.is_empty() {
614                use axum::routing::post;
615                use tower_http::cors::{Any, CorsLayer};
616
617                let webhook_state = Arc::new(
618                    WebhookState::new(self.webhook_registry.clone(), pool.clone())
619                        .with_job_dispatcher(job_dispatcher.clone()),
620                );
621
622                // Webhook routes need their own CORS layer since they're outside the API router.
623                // Reuse gateway CORS policy rather than forcing wildcard access.
624                let webhook_cors = if self.config.gateway.cors_enabled
625                    || !self.config.gateway.cors_origins.is_empty()
626                {
627                    if self.config.gateway.cors_origins.iter().any(|o| o == "*") {
628                        CorsLayer::new()
629                            .allow_origin(Any)
630                            .allow_methods(Any)
631                            .allow_headers(Any)
632                    } else {
633                        use axum::http::Method;
634                        let origins: Vec<_> = self
635                            .config
636                            .gateway
637                            .cors_origins
638                            .iter()
639                            .filter_map(|o| o.parse().ok())
640                            .collect();
641                        CorsLayer::new()
642                            .allow_origin(origins)
643                            .allow_methods([
644                                Method::GET,
645                                Method::POST,
646                                Method::PUT,
647                                Method::DELETE,
648                                Method::PATCH,
649                                Method::OPTIONS,
650                            ])
651                            .allow_headers([
652                                axum::http::header::CONTENT_TYPE,
653                                axum::http::header::AUTHORIZATION,
654                                axum::http::header::ACCEPT,
655                                axum::http::HeaderName::from_static("x-webhook-signature"),
656                                axum::http::HeaderName::from_static("x-idempotency-key"),
657                            ])
658                            .allow_credentials(true)
659                    }
660                } else {
661                    CorsLayer::new()
662                };
663
664                let webhook_router = Router::new()
665                    .route("/{*path}", post(webhook_handler).with_state(webhook_state))
666                    .layer(axum::extract::DefaultBodyLimit::max(1024 * 1024))
667                    .layer(
668                        tower::ServiceBuilder::new()
669                            .layer(axum::error_handling::HandleErrorLayer::new(
670                                |err: tower::BoxError| async move {
671                                    if err.is::<tower::timeout::error::Elapsed>() {
672                                        return (
673                                            axum::http::StatusCode::REQUEST_TIMEOUT,
674                                            "Request timed out",
675                                        );
676                                    }
677                                    (
678                                        axum::http::StatusCode::SERVICE_UNAVAILABLE,
679                                        "Server overloaded",
680                                    )
681                                },
682                            ))
683                            .layer(tower::limit::ConcurrencyLimitLayer::new(
684                                self.config.gateway.max_connections,
685                            ))
686                            .layer(tower::timeout::TimeoutLayer::new(Duration::from_secs(
687                                self.config.gateway.request_timeout_secs,
688                            ))),
689                    )
690                    .layer(webhook_cors);
691
692                router = router.nest("/_api/webhooks", webhook_router);
693
694                tracing::debug!(
695                    webhooks = ?self.webhook_registry.paths().collect::<Vec<_>>(),
696                    "Webhook routes registered"
697                );
698            }
699
700            // MCP OAuth: mount OAuth routes or return JSON 404 for discovery
701            if self.config.mcp.enabled {
702                use axum::routing::get;
703
704                if let Some((oauth_api_router, oauth_state)) = gateway.oauth_router() {
705                    // OAuth API routes under /_api/oauth/* (bypass auth middleware)
706                    router = router.nest("/_api", oauth_api_router);
707
708                    // Well-known metadata at root level
709                    router = router
710                        .route(
711                            "/.well-known/oauth-authorization-server",
712                            get(forge_runtime::gateway::oauth::well_known_oauth_metadata)
713                                .with_state(oauth_state.clone()),
714                        )
715                        .route(
716                            "/.well-known/oauth-protected-resource",
717                            get(forge_runtime::gateway::oauth::well_known_resource_metadata)
718                                .with_state(oauth_state),
719                        );
720
721                    tracing::info!("OAuth 2.1 endpoints enabled for MCP");
722                } else {
723                    // OAuth not configured: return parseable JSON 404
724                    async fn oauth_not_supported() -> impl axum::response::IntoResponse {
725                        (
726                            axum::http::StatusCode::NOT_FOUND,
727                            axum::Json(serde_json::json!({
728                                "error": "oauth_not_supported",
729                                "error_description": "This server does not support OAuth. Connect without authentication."
730                            })),
731                        )
732                    }
733                    router = router
734                        .route(
735                            "/.well-known/oauth-authorization-server",
736                            get(oauth_not_supported),
737                        )
738                        .route(
739                            "/.well-known/oauth-protected-resource",
740                            get(oauth_not_supported),
741                        );
742                }
743            }
744
745            // Merge custom routes before frontend fallback so they take precedence
746            if let Some(custom) = self.custom_routes.take() {
747                router = router.merge(custom);
748                tracing::debug!("Custom routes merged");
749            }
750
751            // Add frontend handler as fallback if configured
752            if let Some(handler) = self.frontend_handler {
753                use axum::routing::get;
754                router = router.fallback(get(handler));
755                tracing::debug!("Frontend handler enabled");
756            }
757
758            let addr = gateway.addr();
759
760            handles.push(tokio::spawn(async move {
761                tracing::debug!(addr = %addr, "Gateway server binding");
762                let listener = tokio::net::TcpListener::bind(addr)
763                    .await
764                    .expect("Failed to bind");
765                if let Err(e) = axum::serve(listener, router).await {
766                    tracing::error!("Gateway server error: {}", e);
767                }
768            }));
769        }
770
771        tracing::info!(
772            queries = self.function_registry.queries().count(),
773            mutations = self.function_registry.mutations().count(),
774            jobs = self.job_registry.len(),
775            crons = self.cron_registry.len(),
776            workflows = self.workflow_registry.len(),
777            daemons = self.daemon_registry.len(),
778            webhooks = self.webhook_registry.len(),
779            mcp_tools = self.mcp_registry.len(),
780            "Functions registered"
781        );
782
783        {
784            let metrics_pool = observability_pool;
785            tokio::spawn(async move {
786                loop {
787                    tokio::time::sleep(Duration::from_secs(15)).await;
788                    forge_runtime::observability::record_pool_metrics(&metrics_pool);
789                }
790            });
791        }
792
793        // Startup banner: summary of config, roles, and capabilities
794        let role_names: Vec<&str> = roles.iter().map(|r| r.as_str()).collect();
795        let capabilities = &self.config.node.worker_capabilities;
796        tracing::info!(
797            node_id = %node_id,
798            project = %self.config.project.name,
799            version = env!("CARGO_PKG_VERSION"),
800            roles = ?role_names,
801            worker_capabilities = ?capabilities,
802            port = self.config.gateway.port,
803            db_pool_size = self.config.database.pool_size,
804            cluster_discovery = ?self.config.cluster.discovery,
805            observability = self.config.observability.enabled,
806            mcp = self.config.mcp.enabled,
807            "Forge started"
808        );
809
810        // Wait for shutdown signal
811        let mut shutdown_rx = self.shutdown_tx.subscribe();
812
813        tokio::select! {
814            _ = tokio::signal::ctrl_c() => {
815                tracing::debug!("Received ctrl-c");
816            }
817            _ = shutdown_rx.recv() => {
818                tracing::debug!("Received shutdown notification");
819            }
820        }
821
822        // Graceful shutdown
823        tracing::debug!("Graceful shutdown starting");
824
825        // Stop workflow scheduler
826        workflow_shutdown_token.cancel();
827
828        if let Err(e) = shutdown.shutdown().await {
829            tracing::warn!(error = %e, "Shutdown error");
830        }
831
832        // Stop leader election
833        if let Some(ref election) = leader_election {
834            election.stop();
835        }
836
837        // Stop reactor before closing database
838        if let Some(ref reactor) = reactor_handle {
839            reactor.stop();
840        }
841
842        // Close database connections
843        if let Some(ref db) = self.db {
844            db.close().await;
845        }
846
847        forge_runtime::shutdown_telemetry();
848        tracing::info!("Forge stopped");
849        Ok(())
850    }
851
852    /// Request shutdown.
853    pub fn shutdown(&self) {
854        let _ = self.shutdown_tx.send(());
855    }
856}
857
858/// Builder for configuring the FORGE runtime.
859pub struct ForgeBuilder {
860    config: Option<ForgeConfig>,
861    function_registry: FunctionRegistry,
862    mcp_registry: McpToolRegistry,
863    job_registry: JobRegistry,
864    cron_registry: CronRegistry,
865    workflow_registry: WorkflowRegistry,
866    daemon_registry: DaemonRegistry,
867    webhook_registry: WebhookRegistry,
868    migrations_dir: PathBuf,
869    extra_migrations: Vec<Migration>,
870    frontend_handler: Option<FrontendHandler>,
871    custom_routes: Option<Router>,
872}
873
874impl ForgeBuilder {
875    /// Create a new builder.
876    pub fn new() -> Self {
877        Self {
878            config: None,
879            function_registry: FunctionRegistry::new(),
880            mcp_registry: McpToolRegistry::new(),
881            job_registry: JobRegistry::new(),
882            cron_registry: CronRegistry::new(),
883            workflow_registry: WorkflowRegistry::new(),
884            daemon_registry: DaemonRegistry::new(),
885            webhook_registry: WebhookRegistry::new(),
886            migrations_dir: PathBuf::from("migrations"),
887            extra_migrations: Vec::new(),
888            frontend_handler: None,
889            custom_routes: None,
890        }
891    }
892
893    /// Set the directory to load migrations from.
894    ///
895    /// Defaults to `./migrations`. Migration files should be named like:
896    /// - `0001_create_users.sql`
897    /// - `0002_add_posts.sql`
898    pub fn migrations_dir(mut self, path: impl Into<PathBuf>) -> Self {
899        self.migrations_dir = path.into();
900        self
901    }
902
903    /// Add a migration programmatically.
904    ///
905    /// Use this for migrations that need to be generated at runtime,
906    /// or for testing. For most cases, use migration files instead.
907    pub fn migration(mut self, name: impl Into<String>, sql: impl Into<String>) -> Self {
908        self.extra_migrations.push(Migration::new(name, sql));
909        self
910    }
911
912    /// Set a frontend handler for serving embedded SPA assets.
913    ///
914    /// Use with the `embedded-frontend` feature to build a single binary
915    /// that includes both backend and frontend.
916    pub fn frontend_handler(mut self, handler: FrontendHandler) -> Self {
917        self.frontend_handler = Some(handler);
918        self
919    }
920
921    /// Add custom axum routes to the server.
922    ///
923    /// Routes are merged at the top level, outside `/_api`, giving full
924    /// control over headers, extractors, and response types. Avoid paths
925    /// starting with `/_api` as they conflict with internal routes.
926    ///
927    /// ```ignore
928    /// use axum::{Router, routing::get};
929    ///
930    /// let routes = Router::new()
931    ///     .route("/custom/health", get(|| async { "ok" }));
932    ///
933    /// builder.custom_routes(routes);
934    /// ```
935    pub fn custom_routes(mut self, router: Router) -> Self {
936        self.custom_routes = Some(router);
937        self
938    }
939
940    /// Automatically register all functions discovered via `#[forge::query]`,
941    /// `#[forge::mutation]`, `#[forge::job]`, `#[forge::cron]`, `#[forge::workflow]`,
942    /// `#[forge::daemon]`, `#[forge::webhook]`, and `#[forge::mcp_tool]` macros.
943    ///
944    /// This replaces the need to manually call `.register_query::<T>()` etc.
945    /// for every function in your application.
946    pub fn auto_register(mut self) -> Self {
947        crate::auto_register::auto_register_all(
948            &mut self.function_registry,
949            &mut self.job_registry,
950            &mut self.cron_registry,
951            &mut self.workflow_registry,
952            &mut self.daemon_registry,
953            &mut self.webhook_registry,
954            &mut self.mcp_registry,
955        );
956        self
957    }
958
959    /// Set the configuration.
960    pub fn config(mut self, config: ForgeConfig) -> Self {
961        self.config = Some(config);
962        self
963    }
964
965    /// Get mutable access to the function registry.
966    pub fn function_registry_mut(&mut self) -> &mut FunctionRegistry {
967        &mut self.function_registry
968    }
969
970    /// Get mutable access to the job registry.
971    pub fn job_registry_mut(&mut self) -> &mut JobRegistry {
972        &mut self.job_registry
973    }
974
975    /// Get mutable access to the MCP tool registry.
976    pub fn mcp_registry_mut(&mut self) -> &mut McpToolRegistry {
977        &mut self.mcp_registry
978    }
979
980    /// Register an MCP tool without manually accessing the registry.
981    pub fn register_mcp_tool<T: ForgeMcpTool>(mut self) -> Self {
982        self.mcp_registry.register::<T>();
983        self
984    }
985
986    /// Get mutable access to the cron registry.
987    pub fn cron_registry_mut(&mut self) -> &mut CronRegistry {
988        &mut self.cron_registry
989    }
990
991    /// Get mutable access to the workflow registry.
992    pub fn workflow_registry_mut(&mut self) -> &mut WorkflowRegistry {
993        &mut self.workflow_registry
994    }
995
996    /// Get mutable access to the daemon registry.
997    pub fn daemon_registry_mut(&mut self) -> &mut DaemonRegistry {
998        &mut self.daemon_registry
999    }
1000
1001    /// Get mutable access to the webhook registry.
1002    pub fn webhook_registry_mut(&mut self) -> &mut WebhookRegistry {
1003        &mut self.webhook_registry
1004    }
1005
1006    /// Register a query function.
1007    pub fn register_query<Q: ForgeQuery>(mut self) -> Self
1008    where
1009        Q::Args: serde::de::DeserializeOwned + Send + 'static,
1010        Q::Output: serde::Serialize + Send + 'static,
1011    {
1012        self.function_registry.register_query::<Q>();
1013        self
1014    }
1015
1016    /// Register a mutation function.
1017    pub fn register_mutation<M: ForgeMutation>(mut self) -> Self
1018    where
1019        M::Args: serde::de::DeserializeOwned + Send + 'static,
1020        M::Output: serde::Serialize + Send + 'static,
1021    {
1022        self.function_registry.register_mutation::<M>();
1023        self
1024    }
1025
1026    /// Register a background job.
1027    pub fn register_job<J: forge_core::ForgeJob>(mut self) -> Self
1028    where
1029        J::Args: serde::de::DeserializeOwned + Send + 'static,
1030        J::Output: serde::Serialize + Send + 'static,
1031    {
1032        self.job_registry.register::<J>();
1033        self
1034    }
1035
1036    /// Register a cron handler.
1037    pub fn register_cron<C: forge_core::ForgeCron>(mut self) -> Self {
1038        self.cron_registry.register::<C>();
1039        self
1040    }
1041
1042    /// Register a workflow.
1043    pub fn register_workflow<W: forge_core::ForgeWorkflow>(mut self) -> Self
1044    where
1045        W::Input: serde::de::DeserializeOwned,
1046        W::Output: serde::Serialize,
1047    {
1048        self.workflow_registry.register::<W>();
1049        self
1050    }
1051
1052    /// Register a daemon.
1053    pub fn register_daemon<D: forge_core::ForgeDaemon>(mut self) -> Self {
1054        self.daemon_registry.register::<D>();
1055        self
1056    }
1057
1058    /// Register a webhook.
1059    pub fn register_webhook<W: forge_core::ForgeWebhook>(mut self) -> Self {
1060        self.webhook_registry.register::<W>();
1061        self
1062    }
1063
1064    /// Build the FORGE runtime.
1065    pub fn build(self) -> Result<Forge> {
1066        let config = self
1067            .config
1068            .ok_or_else(|| ForgeError::Config("Configuration is required".to_string()))?;
1069
1070        let (shutdown_tx, _) = broadcast::channel(1);
1071
1072        Ok(Forge {
1073            config,
1074            db: None,
1075            node_id: NodeId::new(),
1076            function_registry: self.function_registry,
1077            mcp_registry: self.mcp_registry,
1078            job_registry: self.job_registry,
1079            cron_registry: Arc::new(self.cron_registry),
1080            workflow_registry: self.workflow_registry,
1081            daemon_registry: Arc::new(self.daemon_registry),
1082            webhook_registry: Arc::new(self.webhook_registry),
1083            shutdown_tx,
1084            migrations_dir: self.migrations_dir,
1085            extra_migrations: self.extra_migrations,
1086            frontend_handler: self.frontend_handler,
1087            custom_routes: self.custom_routes,
1088        })
1089    }
1090}
1091
1092impl Default for ForgeBuilder {
1093    fn default() -> Self {
1094        Self::new()
1095    }
1096}
1097
1098#[cfg(unix)]
1099fn get_hostname() -> String {
1100    nix::unistd::gethostname()
1101        .map(|h| h.to_string_lossy().to_string())
1102        .unwrap_or_else(|_| "unknown".to_string())
1103}
1104
1105#[cfg(not(unix))]
1106fn get_hostname() -> String {
1107    std::env::var("COMPUTERNAME")
1108        .or_else(|_| std::env::var("HOSTNAME"))
1109        .unwrap_or_else(|_| "unknown".to_string())
1110}
1111
1112/// Convert config NodeRole to cluster NodeRole.
1113fn config_role_to_node_role(role: &ConfigNodeRole) -> NodeRole {
1114    match role {
1115        ConfigNodeRole::Gateway => NodeRole::Gateway,
1116        ConfigNodeRole::Function => NodeRole::Function,
1117        ConfigNodeRole::Worker => NodeRole::Worker,
1118        ConfigNodeRole::Scheduler => NodeRole::Scheduler,
1119    }
1120}
1121
1122#[cfg(test)]
1123#[allow(clippy::unwrap_used, clippy::indexing_slicing)]
1124mod tests {
1125    use super::*;
1126    use std::future::Future;
1127    use std::pin::Pin;
1128
1129    use forge_core::mcp::{McpToolAnnotations, McpToolInfo};
1130
1131    struct TestMcpTool;
1132
1133    impl ForgeMcpTool for TestMcpTool {
1134        type Args = serde_json::Value;
1135        type Output = serde_json::Value;
1136
1137        fn info() -> McpToolInfo {
1138            McpToolInfo {
1139                name: "test.mcp.tool",
1140                title: None,
1141                description: None,
1142                required_role: None,
1143                is_public: false,
1144                timeout: None,
1145                rate_limit_requests: None,
1146                rate_limit_per_secs: None,
1147                rate_limit_key: None,
1148                annotations: McpToolAnnotations::default(),
1149                icons: &[],
1150            }
1151        }
1152
1153        fn execute(
1154            _ctx: &forge_core::McpToolContext,
1155            _args: Self::Args,
1156        ) -> Pin<Box<dyn Future<Output = forge_core::Result<Self::Output>> + Send + '_>> {
1157            Box::pin(async { Ok(serde_json::json!({ "ok": true })) })
1158        }
1159    }
1160
1161    #[test]
1162    fn test_forge_builder_new() {
1163        let builder = ForgeBuilder::new();
1164        assert!(builder.config.is_none());
1165    }
1166
1167    #[test]
1168    fn test_forge_builder_requires_config() {
1169        let builder = ForgeBuilder::new();
1170        let result = builder.build();
1171        assert!(result.is_err());
1172    }
1173
1174    #[test]
1175    fn test_forge_builder_with_config() {
1176        let config = ForgeConfig::default_with_database_url("postgres://localhost/test");
1177        let result = ForgeBuilder::new().config(config).build();
1178        assert!(result.is_ok());
1179    }
1180
1181    #[test]
1182    fn test_forge_builder_register_mcp_tool() {
1183        let builder = ForgeBuilder::new().register_mcp_tool::<TestMcpTool>();
1184        assert_eq!(builder.mcp_registry.len(), 1);
1185    }
1186
1187    #[test]
1188    fn test_config_role_conversion() {
1189        assert_eq!(
1190            config_role_to_node_role(&ConfigNodeRole::Gateway),
1191            NodeRole::Gateway
1192        );
1193        assert_eq!(
1194            config_role_to_node_role(&ConfigNodeRole::Worker),
1195            NodeRole::Worker
1196        );
1197        assert_eq!(
1198            config_role_to_node_role(&ConfigNodeRole::Scheduler),
1199            NodeRole::Scheduler
1200        );
1201        assert_eq!(
1202            config_role_to_node_role(&ConfigNodeRole::Function),
1203            NodeRole::Function
1204        );
1205    }
1206}