dynamo_llm/model_card/
create.rs1use std::collections::HashMap;
17
18use crate::model_card::model::ModelDeploymentCard;
19use anyhow::{Context, Result};
20use std::fs;
21use std::path::Path;
22
23use crate::model_card::model::{ModelInfoType, PromptFormatterArtifact, TokenizerKind};
24
25impl ModelDeploymentCard {
26 pub async fn from_local_path(
42 local_root_dir: impl AsRef<Path>,
43 model_name: Option<&str>,
44 ) -> anyhow::Result<Self> {
45 let local_root_dir = local_root_dir.as_ref();
46 check_valid_local_repo_path(local_root_dir)?;
47 let repo_id = local_root_dir
48 .canonicalize()?
49 .to_str()
50 .ok_or_else(|| anyhow::anyhow!("Path contains invalid Unicode"))?
51 .to_string();
52 let model_name = model_name.unwrap_or(
53 local_root_dir
54 .file_name()
55 .and_then(|n| n.to_str())
56 .ok_or_else(|| anyhow::anyhow!("Invalid model directory name"))?,
57 );
58 Self::from_repo(&repo_id, model_name).await
59 }
60
61 pub async fn from_gguf(gguf_file: &Path, model_name: Option<&str>) -> anyhow::Result<Self> {
62 let model_name = model_name.map(|s| s.to_string()).or_else(|| {
63 gguf_file
64 .iter()
65 .next_back()
66 .map(|n| n.to_string_lossy().to_string())
67 });
68 let Some(model_name) = model_name else {
69 anyhow::bail!(
71 "Could not extract model name from path '{}'",
72 gguf_file.display()
73 );
74 };
75 Ok(Self {
76 display_name: model_name.to_string(),
77 service_name: model_name.to_string(),
78 model_info: Some(ModelInfoType::GGUF(gguf_file.to_path_buf())),
79 tokenizer: Some(TokenizerKind::from_gguf(gguf_file)?),
80 prompt_formatter: Some(PromptFormatterArtifact::GGUF(gguf_file.to_path_buf())),
81 prompt_context: None, revision: 0,
83 last_published: None,
84 requires_preprocessing: true,
85 })
86 }
87
88 pub async fn from_ngc_repo(_: &str) -> anyhow::Result<Self> {
91 Err(anyhow::anyhow!(
92 "ModelDeploymentCard::from_ngc_repo is not implemented"
93 ))
94 }
95
96 pub async fn from_repo(repo_id: &str, model_name: &str) -> anyhow::Result<Self> {
97 Ok(Self {
98 display_name: model_name.to_string(),
99 service_name: model_name.to_string(),
100 model_info: Some(ModelInfoType::from_repo(repo_id).await?),
101 tokenizer: Some(TokenizerKind::from_repo(repo_id).await?),
102 prompt_formatter: PromptFormatterArtifact::from_repo(repo_id).await?,
103 prompt_context: None, revision: 0,
105 last_published: None,
106 requires_preprocessing: true,
107 })
108 }
109}
110
111impl ModelInfoType {
112 pub async fn from_repo(repo_id: &str) -> Result<Self> {
113 Self::try_is_hf_repo(repo_id)
114 .await
115 .with_context(|| format!("unable to extract model info from repo {}", repo_id))
116 }
117
118 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
119 Ok(Self::HfConfigJson(
120 check_for_file(repo, "config.json").await?,
121 ))
122 }
123}
124
125impl PromptFormatterArtifact {
126 pub async fn from_repo(repo_id: &str) -> Result<Option<Self>> {
127 Ok(Self::try_is_hf_repo(repo_id)
130 .await
131 .with_context(|| format!("unable to extract prompt format from repo {}", repo_id))
132 .ok())
133 }
134
135 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
136 Ok(Self::HfTokenizerConfigJson(
137 check_for_file(repo, "tokenizer_config.json").await?,
138 ))
139 }
140}
141
142impl TokenizerKind {
143 pub async fn from_repo(repo_id: &str) -> Result<Self> {
144 Self::try_is_hf_repo(repo_id)
145 .await
146 .with_context(|| format!("unable to extract tokenizer kind from repo {}", repo_id))
147 }
148
149 async fn try_is_hf_repo(repo: &str) -> anyhow::Result<Self> {
150 Ok(Self::HfTokenizerJson(
151 check_for_file(repo, "tokenizer.json").await?,
152 ))
153 }
154}
155
156async fn check_for_file(repo_id: &str, file: &str) -> anyhow::Result<String> {
158 let mut files = check_for_files(repo_id, vec![file.to_string()]).await?;
159 let file = files
160 .remove(file)
161 .ok_or(anyhow::anyhow!("file {} not found", file))?;
162 Ok(file)
163}
164
165async fn check_for_files(repo_id: &str, files: Vec<String>) -> Result<HashMap<String, String>> {
166 let dir_entries =
167 fs::read_dir(repo_id).with_context(|| format!("Failed to read directory: {}", repo_id))?;
168 let mut found_files = HashMap::new();
169 for entry in dir_entries {
170 let entry =
171 entry.with_context(|| format!("Failed to read directory entry in {}", repo_id))?;
172 let path = entry.path();
173 let file_name = path
174 .file_name()
175 .and_then(|n| n.to_str())
176 .ok_or_else(|| anyhow::anyhow!("Invalid file name in {}", repo_id))?;
177 if files.contains(&file_name.to_string()) {
178 found_files.insert(
179 file_name.to_string(),
180 path.to_str()
181 .ok_or_else(|| anyhow::anyhow!("Invalid path"))?
182 .to_string(),
183 );
184 }
185 }
186 Ok(found_files)
187}
188
189fn check_valid_local_repo_path(path: impl AsRef<Path>) -> Result<()> {
197 let path = path.as_ref();
198 if !path.exists() {
199 return Err(anyhow::anyhow!(
200 "Model path does not exist: {}",
201 path.display()
202 ));
203 }
204
205 if !path.is_dir() {
206 return Err(anyhow::anyhow!(
207 "Model path is not a directory: {}",
208 path.display()
209 ));
210 }
211 Ok(())
212}