dynamo_llm/model_card/
create.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::model_card::model::ModelDeploymentCard;
5use anyhow::{Context, Result};
6use std::path::{Path, PathBuf};
7
8use crate::model_card::model::{ModelInfoType, PromptFormatterArtifact, TokenizerKind};
9
10use super::model::GenerationConfig;
11
12impl ModelDeploymentCard {
13    /// Allow user to override the name we register this model under.
14    /// Corresponds to vllm's `--served-model-name`.
15    pub fn set_name(&mut self, name: &str) {
16        self.display_name = name.to_string();
17        self.service_name = name.to_string();
18    }
19
20    /// Build an in-memory ModelDeploymentCard from either:
21    /// - a folder containing config.json, tokenizer.json and token_config.json
22    /// - a GGUF file
23    pub async fn load(config_path: impl AsRef<Path>) -> anyhow::Result<ModelDeploymentCard> {
24        let config_path = config_path.as_ref();
25        if config_path.is_dir() {
26            Self::from_local_path(config_path).await
27        } else {
28            Self::from_gguf(config_path).await
29        }
30    }
31
32    /// Creates a ModelDeploymentCard from a local directory path.
33    ///
34    /// Currently HuggingFace format is supported and following files are expected:
35    /// - config.json: Model configuration in HuggingFace format
36    /// - tokenizer.json: Tokenizer configuration in HuggingFace format
37    /// - tokenizer_config.json: Optional prompt formatter configuration
38    ///
39    /// # Arguments
40    /// * `local_root_dir` - Path to the local model directory
41    ///
42    /// # Errors
43    /// Returns an error if:
44    /// - The path doesn't exist or isn't a directory
45    /// - The path contains invalid Unicode characters
46    /// - Required model files are missing or invalid
47    async fn from_local_path(local_root_dir: impl AsRef<Path>) -> anyhow::Result<Self> {
48        let local_root_dir = local_root_dir.as_ref();
49        check_valid_local_repo_path(local_root_dir)?;
50        let repo_id = local_root_dir
51            .canonicalize()?
52            .to_str()
53            .ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))?
54            .to_string();
55        let model_name = local_root_dir
56            .file_name()
57            .and_then(|n| n.to_str())
58            .ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?;
59        Self::from_repo(&repo_id, model_name).await
60    }
61
62    async fn from_gguf(gguf_file: &Path) -> anyhow::Result<Self> {
63        let model_name = gguf_file
64            .iter()
65            .next_back()
66            .map(|n| n.to_string_lossy().to_string());
67        let Some(model_name) = model_name else {
68            // I think this would only happy on an empty path
69            anyhow::bail!(
70                "Could not extract model name from path '{}'",
71                gguf_file.display()
72            );
73        };
74
75        // TODO: we do this in HFConfig also, unify
76        let content = super::model::load_gguf(gguf_file)?;
77        let context_length = content.get_metadata()[&format!("{}.context_length", content.arch())]
78            .to_u32()
79            .unwrap_or(0) as usize;
80        tracing::debug!(context_length, "Loaded context length from GGUF");
81
82        Ok(Self {
83            display_name: model_name.to_string(),
84            service_name: model_name.to_string(),
85            model_info: Some(ModelInfoType::GGUF(gguf_file.to_path_buf())),
86            tokenizer: Some(TokenizerKind::from_gguf(gguf_file)?),
87            gen_config: None, // AFAICT there is no equivalent in a GGUF
88            prompt_formatter: Some(PromptFormatterArtifact::GGUF(gguf_file.to_path_buf())),
89            prompt_context: None, // TODO - auto-detect prompt context
90            revision: 0,
91            last_published: None,
92            context_length,
93            kv_cache_block_size: 0,
94        })
95    }
96
97    #[allow(dead_code)]
98    async fn from_ngc_repo(_: &str) -> anyhow::Result<Self> {
99        Err(anyhow::anyhow!(
100            "ModelDeploymentCard::from_ngc_repo is not implemented"
101        ))
102    }
103
104    async fn from_repo(repo_id: &str, model_name: &str) -> anyhow::Result<Self> {
105        // This is usually the right choice
106        let context_length = crate::file_json_field(
107            &PathBuf::from(repo_id).join("config.json"),
108            "max_position_embeddings",
109        )
110        // But sometimes this is
111        .or_else(|_| {
112            crate::file_json_field(
113                &PathBuf::from(repo_id).join("tokenizer_config.json"),
114                "model_max_length",
115            )
116        })
117        // If neither of those are present let the engine default it
118        .unwrap_or(0);
119
120        Ok(Self {
121            display_name: model_name.to_string(),
122            service_name: model_name.to_string(),
123            model_info: Some(ModelInfoType::from_repo(repo_id).await?),
124            tokenizer: Some(TokenizerKind::from_repo(repo_id).await?),
125            gen_config: GenerationConfig::from_repo(repo_id).await.ok(), // optional
126            prompt_formatter: PromptFormatterArtifact::from_repo(repo_id).await?,
127            prompt_context: None, // TODO - auto-detect prompt context
128            revision: 0,
129            last_published: None,
130            context_length,
131            kv_cache_block_size: 0, // set later
132        })
133    }
134}
135
136impl ModelInfoType {
137    pub async fn from_repo(repo_id: &str) -> Result<Self> {
138        Self::try_is_hf_repo(repo_id)
139            .await
140            .with_context(|| format!("unable to extract model info from repo {}", repo_id))
141    }
142
143    async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
144        Ok(Self::HfConfigJson(
145            check_for_file(repo, "config.json").await?,
146        ))
147    }
148}
149
150impl PromptFormatterArtifact {
151    pub async fn from_repo(repo_id: &str) -> Result<Option<Self>> {
152        // we should only error if we expect a prompt formatter and it's not found
153        // right now, we don't know when to expect it, so we just return Ok(Some/None)
154        Ok(Self::try_is_hf_repo(repo_id)
155            .await
156            .with_context(|| format!("unable to extract prompt format from repo {}", repo_id))
157            .ok())
158    }
159
160    async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
161        Ok(Self::HfTokenizerConfigJson(
162            check_for_file(repo, "tokenizer_config.json").await?,
163        ))
164    }
165}
166
167impl TokenizerKind {
168    pub async fn from_repo(repo_id: &str) -> Result<Self> {
169        Self::try_is_hf_repo(repo_id)
170            .await
171            .with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id))
172    }
173
174    async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
175        Ok(Self::HfTokenizerJson(
176            check_for_file(repo, "tokenizer.json").await?,
177        ))
178    }
179}
180
181impl GenerationConfig {
182    pub async fn from_repo(repo_id: &str) -> Result<Self> {
183        Self::try_is_hf_repo(repo_id)
184            .await
185            .with_context(|| format!("unable to extract generation config from repo {repo_id}"))
186    }
187
188    async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
189        Ok(Self::HfGenerationConfigJson(
190            check_for_file(repo, "generation_config.json").await?,
191        ))
192    }
193}
194
195/// Checks if the provided path contains the expected file.
196async fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result<String> {
197    let p = PathBuf::from(repo_id).join(file);
198    let name = p.display().to_string();
199    if !p.exists() {
200        anyhow::bail!("File not found: {name}")
201    }
202    Ok(name)
203}
204
205/// Checks if the provided path is a valid local repository path.
206///
207/// # Arguments
208/// * `path` - Path to validate
209///
210/// # Errors
211/// Returns an error if the path doesn't exist or isn't a directory
212fn check_valid_local_repo_path(path: impl AsRef<Path>) -> Result<()> {
213    let path = path.as_ref();
214    if !path.exists() {
215        return Err(anyhow::anyhow!(
216            "Model path does not exist: {}",
217            path.display()
218        ));
219    }
220
221    if !path.is_dir() {
222        return Err(anyhow::anyhow!(
223            "Model path is not a directory: {}",
224            path.display()
225        ));
226    }
227    Ok(())
228}