dynamo_llm/model_card/
create.rs1use crate::model_card::model::ModelDeploymentCard;
5use anyhow::{Context, Result};
6use std::path::{Path, PathBuf};
7
8use crate::model_card::model::{ModelInfoType, PromptFormatterArtifact, TokenizerKind};
9
10use super::model::GenerationConfig;
11
12impl ModelDeploymentCard {
13 pub fn set_name(&mut self, name: &str) {
16 self.display_name = name.to_string();
17 self.service_name = name.to_string();
18 }
19
20 pub async fn load(config_path: impl AsRef<Path>) -> anyhow::Result<ModelDeploymentCard> {
24 let config_path = config_path.as_ref();
25 if config_path.is_dir() {
26 Self::from_local_path(config_path).await
27 } else {
28 Self::from_gguf(config_path).await
29 }
30 }
31
32 async fn from_local_path(local_root_dir: impl AsRef<Path>) -> anyhow::Result<Self> {
48 let local_root_dir = local_root_dir.as_ref();
49 check_valid_local_repo_path(local_root_dir)?;
50 let repo_id = local_root_dir
51 .canonicalize()?
52 .to_str()
53 .ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))?
54 .to_string();
55 let model_name = local_root_dir
56 .file_name()
57 .and_then(|n| n.to_str())
58 .ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?;
59 Self::from_repo(&repo_id, model_name).await
60 }
61
62 async fn from_gguf(gguf_file: &Path) -> anyhow::Result<Self> {
63 let model_name = gguf_file
64 .iter()
65 .next_back()
66 .map(|n| n.to_string_lossy().to_string());
67 let Some(model_name) = model_name else {
68 anyhow::bail!(
70 "Could not extract model name from path '{}'",
71 gguf_file.display()
72 );
73 };
74
75 let content = super::model::load_gguf(gguf_file)?;
77 let context_length = content.get_metadata()[&format!("{}.context_length", content.arch())]
78 .to_u32()
79 .unwrap_or(0) as usize;
80 tracing::debug!(context_length, "Loaded context length from GGUF");
81
82 Ok(Self {
83 display_name: model_name.to_string(),
84 service_name: model_name.to_string(),
85 model_info: Some(ModelInfoType::GGUF(gguf_file.to_path_buf())),
86 tokenizer: Some(TokenizerKind::from_gguf(gguf_file)?),
87 gen_config: None, prompt_formatter: Some(PromptFormatterArtifact::GGUF(gguf_file.to_path_buf())),
89 prompt_context: None, revision: 0,
91 last_published: None,
92 context_length,
93 kv_cache_block_size: 0,
94 })
95 }
96
97 #[allow(dead_code)]
98 async fn from_ngc_repo(_: &str) -> anyhow::Result<Self> {
99 Err(anyhow::anyhow!(
100 "ModelDeploymentCard::from_ngc_repo is not implemented"
101 ))
102 }
103
104 async fn from_repo(repo_id: &str, model_name: &str) -> anyhow::Result<Self> {
105 let context_length = crate::file_json_field(
107 &PathBuf::from(repo_id).join("config.json"),
108 "max_position_embeddings",
109 )
110 .or_else(|_| {
112 crate::file_json_field(
113 &PathBuf::from(repo_id).join("tokenizer_config.json"),
114 "model_max_length",
115 )
116 })
117 .unwrap_or(0);
119
120 Ok(Self {
121 display_name: model_name.to_string(),
122 service_name: model_name.to_string(),
123 model_info: Some(ModelInfoType::from_repo(repo_id).await?),
124 tokenizer: Some(TokenizerKind::from_repo(repo_id).await?),
125 gen_config: GenerationConfig::from_repo(repo_id).await.ok(), prompt_formatter: PromptFormatterArtifact::from_repo(repo_id).await?,
127 prompt_context: None, revision: 0,
129 last_published: None,
130 context_length,
131 kv_cache_block_size: 0, })
133 }
134}
135
136impl ModelInfoType {
137 pub async fn from_repo(repo_id: &str) -> Result<Self> {
138 Self::try_is_hf_repo(repo_id)
139 .await
140 .with_context(|| format!("unable to extract model info from repo {}", repo_id))
141 }
142
143 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
144 Ok(Self::HfConfigJson(
145 check_for_file(repo, "config.json").await?,
146 ))
147 }
148}
149
150impl PromptFormatterArtifact {
151 pub async fn from_repo(repo_id: &str) -> Result<Option<Self>> {
152 Ok(Self::try_is_hf_repo(repo_id)
155 .await
156 .with_context(|| format!("unable to extract prompt format from repo {}", repo_id))
157 .ok())
158 }
159
160 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
161 Ok(Self::HfTokenizerConfigJson(
162 check_for_file(repo, "tokenizer_config.json").await?,
163 ))
164 }
165}
166
167impl TokenizerKind {
168 pub async fn from_repo(repo_id: &str) -> Result<Self> {
169 Self::try_is_hf_repo(repo_id)
170 .await
171 .with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id))
172 }
173
174 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
175 Ok(Self::HfTokenizerJson(
176 check_for_file(repo, "tokenizer.json").await?,
177 ))
178 }
179}
180
181impl GenerationConfig {
182 pub async fn from_repo(repo_id: &str) -> Result<Self> {
183 Self::try_is_hf_repo(repo_id)
184 .await
185 .with_context(|| format!("unable to extract generation config from repo {repo_id}"))
186 }
187
188 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
189 Ok(Self::HfGenerationConfigJson(
190 check_for_file(repo, "generation_config.json").await?,
191 ))
192 }
193}
194
195async fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result<String> {
197 let p = PathBuf::from(repo_id).join(file);
198 let name = p.display().to_string();
199 if !p.exists() {
200 anyhow::bail!("File not found: {name}")
201 }
202 Ok(name)
203}
204
205fn check_valid_local_repo_path(path: impl AsRef<Path>) -> Result<()> {
213 let path = path.as_ref();
214 if !path.exists() {
215 return Err(anyhow::anyhow!(
216 "Model path does not exist: {}",
217 path.display()
218 ));
219 }
220
221 if !path.is_dir() {
222 return Err(anyhow::anyhow!(
223 "Model path is not a directory: {}",
224 path.display()
225 ));
226 }
227 Ok(())
228}