dynamo_llm/local_model/
runtime_config.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5
6use serde::{Deserialize, Serialize, de::DeserializeOwned};
7
8use crate::protocols::tensor;
9
10#[derive(Debug, Default, Clone, Serialize, Deserialize, Eq, PartialEq)]
11pub struct ModelRuntimeConfig {
12    pub total_kv_blocks: Option<u64>,
13
14    pub max_num_seqs: Option<u64>,
15
16    pub max_num_batched_tokens: Option<u64>,
17
18    pub tool_call_parser: Option<String>,
19
20    pub reasoning_parser: Option<String>,
21
22    /// Mapping of engine-specific runtime configs
23    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
24    pub runtime_data: HashMap<String, serde_json::Value>,
25
26    // Provide tensor model config in the case where the model type is Tensor.
27    // Currently use JSON object for convinence, the programmatic way is to
28    // define the model config struct as part of the tensor protocol and
29    // import it here.
30    // [gluo TODO] switch to ModelConfig if desired and workout a way to
31    // prepare it in a convinent way, the protobuf library used by tonic
32    // doesn't provide JSON parsing.
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub tensor_model_config: Option<tensor::TensorModelConfig>,
35}
36
37impl ModelRuntimeConfig {
38    pub fn new() -> Self {
39        Self::default()
40    }
41
42    pub fn set_engine_specific<T: Serialize>(&mut self, key: &str, value: T) -> anyhow::Result<()> {
43        self.runtime_data
44            .insert(key.to_string(), serde_json::to_value(value)?);
45        Ok(())
46    }
47
48    pub fn get_engine_specific<T: DeserializeOwned>(&self, key: &str) -> anyhow::Result<Option<T>> {
49        if let Some(value) = self.runtime_data.get(key) {
50            Ok(Some(serde_json::from_value(value.clone())?))
51        } else {
52            Ok(None)
53        }
54    }
55}