triton_distributed/
config.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use super::Result;
17use derive_builder::Builder;
18use figment::{
19    providers::{Env, Format, Serialized, Toml},
20    Figment,
21};
22use serde::{Deserialize, Serialize};
23use validator::Validate;
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct WorkerConfig {
27    /// Grace shutdown period for http-service.
28    pub graceful_shutdown_timeout: u64,
29}
30
31impl WorkerConfig {
32    pub fn from_settings() -> Self {
33        // Instantiates and reads server configurations from appropriate sources.
34        // All calls should be global and thread safe.
35        Figment::new()
36            .merge(Serialized::defaults(Self::default()))
37            .merge(Env::prefixed("TRITON_WORKER_"))
38            .extract()
39            .unwrap()
40    }
41}
42
43impl Default for WorkerConfig {
44    fn default() -> Self {
45        WorkerConfig {
46            graceful_shutdown_timeout: if cfg!(debug_assertions) {
47                1 // Debug build: 1 second
48            } else {
49                30 // Release build: 30 seconds
50            },
51        }
52    }
53}
54
55/// Runtime configuration
56/// Defines the configuration for Tokio runtimes
57#[derive(Serialize, Deserialize, Validate, Debug, Builder, Clone)]
58#[builder(build_fn(private, name = "build_internal"), derive(Debug, Serialize))]
59pub struct RuntimeConfig {
60    /// Maximum number of async worker threads
61    /// If set to 1, the runtime will run in single-threaded mode
62    #[validate(range(min = 1))]
63    #[builder(default = "16")]
64    #[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
65    pub max_worker_threads: usize,
66
67    /// Maximum number of blocking threads
68    /// Blocking threads are used for blocking operations, this value must be greater than 0.
69    #[validate(range(min = 1))]
70    #[builder(default = "16")]
71    #[builder_field_attr(serde(skip_serializing_if = "Option::is_none"))]
72    pub max_blocking_threads: usize,
73}
74
75impl RuntimeConfig {
76    pub fn builder() -> RuntimeConfigBuilder {
77        RuntimeConfigBuilder::default()
78    }
79
80    pub(crate) fn figment() -> Figment {
81        Figment::new()
82            .merge(Serialized::defaults(RuntimeConfig::default()))
83            .merge(Toml::file("/opt/triton/defaults/runtime.toml"))
84            .merge(Toml::file("/opt/triton/etc/runtime.toml"))
85            .merge(Env::prefixed("TRITON_RUNTIME_"))
86    }
87
88    /// Load the runtime configuration from the environment and configuration files
89    /// Configuration is priorities in the following order, where the last has the lowest priority:
90    /// 1. Environment variables (top priority)
91    /// 2. /opt/triton/etc/runtime.toml
92    /// 3. /opt/triton/defaults/runtime.toml (lowest priority)
93    ///
94    /// Environment variables are prefixed with `TRITON_RUNTIME_`
95    pub fn from_settings() -> Result<RuntimeConfig> {
96        let config: RuntimeConfig = Self::figment().extract()?;
97        config.validate()?;
98        Ok(config)
99    }
100
101    pub fn single_threaded() -> Self {
102        RuntimeConfig {
103            max_worker_threads: 1,
104            max_blocking_threads: 1,
105        }
106    }
107
108    /// Create a new default runtime configuration
109    pub(crate) fn create_runtime(&self) -> Result<tokio::runtime::Runtime> {
110        Ok(tokio::runtime::Builder::new_multi_thread()
111            .worker_threads(self.max_worker_threads)
112            .max_blocking_threads(self.max_blocking_threads)
113            .enable_all()
114            .build()?)
115    }
116}
117
118impl Default for RuntimeConfig {
119    fn default() -> Self {
120        Self::single_threaded()
121    }
122}
123
124impl RuntimeConfigBuilder {
125    /// Build and validate the runtime configuration
126    pub fn build(&self) -> Result<RuntimeConfig> {
127        let config = self.build_internal()?;
128        config.validate()?;
129        Ok(config)
130    }
131}