entrenar/research/ro_crate/
package.rs1use super::descriptor::RoCrateDescriptor;
4use super::entity::{EntityType, RoCrateEntity};
5use crate::research::artifact::ResearchArtifact;
6use serde_json::json;
7use std::collections::HashMap;
8#[cfg(not(target_arch = "wasm32"))]
9use std::io::Write;
10use std::path::{Path, PathBuf};
11
12#[derive(Debug, Clone)]
14pub struct RoCrate {
15 pub root: PathBuf,
17 pub descriptor: RoCrateDescriptor,
19 pub data_files: HashMap<String, Vec<u8>>,
21}
22
23impl RoCrate {
24 pub fn new(root: impl Into<PathBuf>) -> Self {
26 let mut descriptor = RoCrateDescriptor::new();
27
28 let root_entity = RoCrateEntity::root_dataset()
30 .with_property("datePublished", chrono::Utc::now().format("%Y-%m-%d").to_string());
31 descriptor.add_entity(root_entity);
32
33 Self { root: root.into(), descriptor, data_files: HashMap::new() }
34 }
35
36 pub fn from_artifact(artifact: &ResearchArtifact, root: impl Into<PathBuf>) -> Self {
38 let mut crate_pkg = Self::new(root);
39
40 if let Some(root_entity) = crate_pkg.descriptor.root_dataset_mut() {
42 root_entity.properties.insert("name".to_string(), json!(artifact.title));
43 if let Some(desc) = &artifact.description {
44 root_entity.properties.insert("description".to_string(), json!(desc));
45 }
46 root_entity.properties.insert("version".to_string(), json!(artifact.version));
47 root_entity
48 .properties
49 .insert("license".to_string(), json!(artifact.license.to_string()));
50
51 if let Some(doi) = &artifact.doi {
52 root_entity.properties.insert("identifier".to_string(), json!(doi));
53 }
54
55 if !artifact.keywords.is_empty() {
56 root_entity
57 .properties
58 .insert("keywords".to_string(), json!(artifact.keywords.join(", ")));
59 }
60 }
61
62 let mut author_ids = Vec::new();
64 for (i, author) in artifact.authors.iter().enumerate() {
65 let author_id = format!("#author-{}", i + 1);
66 author_ids.push(author_id.clone());
67
68 let mut person_entity = RoCrateEntity::person(&author_id, &author.name);
69
70 if let Some(orcid) = &author.orcid {
71 person_entity =
72 person_entity.with_property("identifier", format!("https://orcid.org/{orcid}"));
73 }
74
75 if let Some(affiliation) = author.affiliations.first() {
76 let org_id = format!("#org-{}", i + 1);
77 let org_entity = RoCrateEntity::new(&org_id, EntityType::Organization)
78 .with_name(&affiliation.name);
79 crate_pkg.descriptor.add_entity(org_entity);
80 person_entity = person_entity.with_reference("affiliation", &org_id);
81 }
82
83 crate_pkg.descriptor.add_entity(person_entity);
84 }
85
86 if !author_ids.is_empty() {
88 if let Some(root_entity) = crate_pkg.descriptor.root_dataset_mut() {
89 let author_refs: Vec<serde_json::Value> =
90 author_ids.iter().map(|id| json!({ "@id": id })).collect();
91 root_entity.properties.insert("author".to_string(), json!(author_refs));
92 }
93 }
94
95 crate_pkg
96 }
97
98 pub fn add_file(&mut self, path: impl Into<String>, content: Vec<u8>) {
100 let path_str = path.into();
101
102 let file_entity = RoCrateEntity::file(&path_str)
104 .with_property("contentSize", content.len().to_string())
105 .with_property("encodingFormat", guess_mime_type(&path_str));
106
107 self.descriptor.add_entity(file_entity);
108 self.data_files.insert(path_str, content);
109 }
110
111 pub fn add_text_file(&mut self, path: impl Into<String>, content: impl Into<String>) {
113 self.add_file(path, content.into().into_bytes());
114 }
115
116 pub fn to_directory(&self) -> std::io::Result<PathBuf> {
118 std::fs::create_dir_all(&self.root)?;
120
121 let metadata_path = self.root.join("ro-crate-metadata.json");
123 std::fs::write(&metadata_path, self.descriptor.to_json())?;
124
125 for (path, content) in &self.data_files {
127 let file_path = self.root.join(path);
128 if let Some(parent) = file_path.parent() {
129 std::fs::create_dir_all(parent)?;
130 }
131 std::fs::write(&file_path, content)?;
132 }
133
134 Ok(self.root.clone())
135 }
136
137 #[cfg(not(target_arch = "wasm32"))]
139 pub fn to_zip(&self) -> std::io::Result<Vec<u8>> {
140 let mut buffer = std::io::Cursor::new(Vec::new());
141
142 {
143 let mut zip = zip::ZipWriter::new(&mut buffer);
144 let options = zip::write::SimpleFileOptions::default()
145 .compression_method(zip::CompressionMethod::Deflated);
146
147 zip.start_file("ro-crate-metadata.json", options)?;
149 zip.write_all(self.descriptor.to_json().as_bytes())?;
150
151 for (path, content) in &self.data_files {
153 zip.start_file(path, options)?;
154 zip.write_all(content)?;
155 }
156
157 zip.finish()?;
158 }
159
160 Ok(buffer.into_inner())
161 }
162
163 pub fn entity_count(&self) -> usize {
165 self.descriptor.graph.len()
166 }
167
168 pub fn file_count(&self) -> usize {
170 self.data_files.len()
171 }
172}
173
174pub fn guess_mime_type(path: &str) -> &'static str {
176 let ext = Path::new(path).extension().and_then(|e| e.to_str()).unwrap_or("");
177
178 match ext.to_lowercase().as_str() {
179 "json" => "application/json",
180 "yaml" | "yml" => "application/x-yaml",
181 "csv" => "text/csv",
182 "txt" => "text/plain",
183 "md" => "text/markdown",
184 "py" => "text/x-python",
185 "rs" => "text/x-rust",
186 "pdf" => "application/pdf",
187 "png" => "image/png",
188 "jpg" | "jpeg" => "image/jpeg",
189 "parquet" => "application/vnd.apache.parquet",
190 "safetensors" => "application/octet-stream",
191 other => {
192 eprintln!(
193 "Warning: unknown file extension '{other}', defaulting to application/octet-stream"
194 );
195 "application/octet-stream"
196 }
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 #[test]
205 fn test_guess_mime_type_all_extension_variants() {
206 let cases: &[(&str, &str)] = &[
207 ("data.json", "application/json"),
208 ("config.yaml", "application/x-yaml"),
209 ("config.yml", "application/x-yaml"),
210 ("data.csv", "text/csv"),
211 ("readme.txt", "text/plain"),
212 ("notes.md", "text/markdown"),
213 ("script.py", "text/x-python"),
214 ("main.rs", "text/x-rust"),
215 ("paper.pdf", "application/pdf"),
216 ("image.png", "image/png"),
217 ("photo.jpg", "image/jpeg"),
218 ("photo.jpeg", "image/jpeg"),
219 ("data.parquet", "application/vnd.apache.parquet"),
220 ("model.safetensors", "application/octet-stream"),
221 ("archive.xyz", "application/octet-stream"),
222 ];
223
224 for &(path, expected) in cases {
225 let result = guess_mime_type(path);
226
227 let ext = Path::new(path).extension().and_then(|e| e.to_str()).unwrap_or("");
229
230 let matched = match ext.to_lowercase().as_str() {
231 "json" => "application/json",
232 "yaml" | "yml" => "application/x-yaml",
233 "csv" => "text/csv",
234 "txt" => "text/plain",
235 "md" => "text/markdown",
236 "py" => "text/x-python",
237 "rs" => "text/x-rust",
238 "pdf" => "application/pdf",
239 "png" => "image/png",
240 "jpg" | "jpeg" => "image/jpeg",
241 "parquet" => "application/vnd.apache.parquet",
242 "safetensors" => "application/octet-stream",
243 _other => "application/octet-stream",
244 };
245
246 assert_eq!(result, expected, "MIME mismatch for {path}");
247 assert_eq!(matched, expected, "match mismatch for {path}");
248 }
249 }
250}