Skip to main content

entrenar/research/ro_crate/
package.rs

1//! RO-Crate package for bundling research data.
2
3use super::descriptor::RoCrateDescriptor;
4use super::entity::{EntityType, RoCrateEntity};
5use crate::research::artifact::ResearchArtifact;
6use serde_json::json;
7use std::collections::HashMap;
8#[cfg(not(target_arch = "wasm32"))]
9use std::io::Write;
10use std::path::{Path, PathBuf};
11
12/// RO-Crate package
13#[derive(Debug, Clone)]
14pub struct RoCrate {
15    /// Root directory path
16    pub root: PathBuf,
17    /// Metadata descriptor
18    pub descriptor: RoCrateDescriptor,
19    /// Data files to include (relative path -> content)
20    pub data_files: HashMap<String, Vec<u8>>,
21}
22
23impl RoCrate {
24    /// Create a new RO-Crate
25    pub fn new(root: impl Into<PathBuf>) -> Self {
26        let mut descriptor = RoCrateDescriptor::new();
27
28        // Add root dataset
29        let root_entity = RoCrateEntity::root_dataset()
30            .with_property("datePublished", chrono::Utc::now().format("%Y-%m-%d").to_string());
31        descriptor.add_entity(root_entity);
32
33        Self { root: root.into(), descriptor, data_files: HashMap::new() }
34    }
35
36    /// Create from a research artifact
37    pub fn from_artifact(artifact: &ResearchArtifact, root: impl Into<PathBuf>) -> Self {
38        let mut crate_pkg = Self::new(root);
39
40        // Update root dataset with artifact metadata
41        if let Some(root_entity) = crate_pkg.descriptor.root_dataset_mut() {
42            root_entity.properties.insert("name".to_string(), json!(artifact.title));
43            if let Some(desc) = &artifact.description {
44                root_entity.properties.insert("description".to_string(), json!(desc));
45            }
46            root_entity.properties.insert("version".to_string(), json!(artifact.version));
47            root_entity
48                .properties
49                .insert("license".to_string(), json!(artifact.license.to_string()));
50
51            if let Some(doi) = &artifact.doi {
52                root_entity.properties.insert("identifier".to_string(), json!(doi));
53            }
54
55            if !artifact.keywords.is_empty() {
56                root_entity
57                    .properties
58                    .insert("keywords".to_string(), json!(artifact.keywords.join(", ")));
59            }
60        }
61
62        // Add author entities
63        let mut author_ids = Vec::new();
64        for (i, author) in artifact.authors.iter().enumerate() {
65            let author_id = format!("#author-{}", i + 1);
66            author_ids.push(author_id.clone());
67
68            let mut person_entity = RoCrateEntity::person(&author_id, &author.name);
69
70            if let Some(orcid) = &author.orcid {
71                person_entity =
72                    person_entity.with_property("identifier", format!("https://orcid.org/{orcid}"));
73            }
74
75            if let Some(affiliation) = author.affiliations.first() {
76                let org_id = format!("#org-{}", i + 1);
77                let org_entity = RoCrateEntity::new(&org_id, EntityType::Organization)
78                    .with_name(&affiliation.name);
79                crate_pkg.descriptor.add_entity(org_entity);
80                person_entity = person_entity.with_reference("affiliation", &org_id);
81            }
82
83            crate_pkg.descriptor.add_entity(person_entity);
84        }
85
86        // Link authors to root dataset
87        if !author_ids.is_empty() {
88            if let Some(root_entity) = crate_pkg.descriptor.root_dataset_mut() {
89                let author_refs: Vec<serde_json::Value> =
90                    author_ids.iter().map(|id| json!({ "@id": id })).collect();
91                root_entity.properties.insert("author".to_string(), json!(author_refs));
92            }
93        }
94
95        crate_pkg
96    }
97
98    /// Add a data file
99    pub fn add_file(&mut self, path: impl Into<String>, content: Vec<u8>) {
100        let path_str = path.into();
101
102        // Add file entity to descriptor
103        let file_entity = RoCrateEntity::file(&path_str)
104            .with_property("contentSize", content.len().to_string())
105            .with_property("encodingFormat", guess_mime_type(&path_str));
106
107        self.descriptor.add_entity(file_entity);
108        self.data_files.insert(path_str, content);
109    }
110
111    /// Add a text file
112    pub fn add_text_file(&mut self, path: impl Into<String>, content: impl Into<String>) {
113        self.add_file(path, content.into().into_bytes());
114    }
115
116    /// Write to a directory
117    pub fn to_directory(&self) -> std::io::Result<PathBuf> {
118        // Create root directory
119        std::fs::create_dir_all(&self.root)?;
120
121        // Write ro-crate-metadata.json
122        let metadata_path = self.root.join("ro-crate-metadata.json");
123        std::fs::write(&metadata_path, self.descriptor.to_json())?;
124
125        // Write data files
126        for (path, content) in &self.data_files {
127            let file_path = self.root.join(path);
128            if let Some(parent) = file_path.parent() {
129                std::fs::create_dir_all(parent)?;
130            }
131            std::fs::write(&file_path, content)?;
132        }
133
134        Ok(self.root.clone())
135    }
136
137    /// Create a ZIP archive
138    #[cfg(not(target_arch = "wasm32"))]
139    pub fn to_zip(&self) -> std::io::Result<Vec<u8>> {
140        let mut buffer = std::io::Cursor::new(Vec::new());
141
142        {
143            let mut zip = zip::ZipWriter::new(&mut buffer);
144            let options = zip::write::SimpleFileOptions::default()
145                .compression_method(zip::CompressionMethod::Deflated);
146
147            // Write ro-crate-metadata.json
148            zip.start_file("ro-crate-metadata.json", options)?;
149            zip.write_all(self.descriptor.to_json().as_bytes())?;
150
151            // Write data files
152            for (path, content) in &self.data_files {
153                zip.start_file(path, options)?;
154                zip.write_all(content)?;
155            }
156
157            zip.finish()?;
158        }
159
160        Ok(buffer.into_inner())
161    }
162
163    /// Get entity count
164    pub fn entity_count(&self) -> usize {
165        self.descriptor.graph.len()
166    }
167
168    /// Get file count
169    pub fn file_count(&self) -> usize {
170        self.data_files.len()
171    }
172}
173
174/// Guess MIME type from file extension
175pub fn guess_mime_type(path: &str) -> &'static str {
176    let ext = Path::new(path).extension().and_then(|e| e.to_str()).unwrap_or("");
177
178    match ext.to_lowercase().as_str() {
179        "json" => "application/json",
180        "yaml" | "yml" => "application/x-yaml",
181        "csv" => "text/csv",
182        "txt" => "text/plain",
183        "md" => "text/markdown",
184        "py" => "text/x-python",
185        "rs" => "text/x-rust",
186        "pdf" => "application/pdf",
187        "png" => "image/png",
188        "jpg" | "jpeg" => "image/jpeg",
189        "parquet" => "application/vnd.apache.parquet",
190        "safetensors" => "application/octet-stream",
191        other => {
192            eprintln!(
193                "Warning: unknown file extension '{other}', defaulting to application/octet-stream"
194            );
195            "application/octet-stream"
196        }
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203
204    #[test]
205    fn test_guess_mime_type_all_extension_variants() {
206        let cases: &[(&str, &str)] = &[
207            ("data.json", "application/json"),
208            ("config.yaml", "application/x-yaml"),
209            ("config.yml", "application/x-yaml"),
210            ("data.csv", "text/csv"),
211            ("readme.txt", "text/plain"),
212            ("notes.md", "text/markdown"),
213            ("script.py", "text/x-python"),
214            ("main.rs", "text/x-rust"),
215            ("paper.pdf", "application/pdf"),
216            ("image.png", "image/png"),
217            ("photo.jpg", "image/jpeg"),
218            ("photo.jpeg", "image/jpeg"),
219            ("data.parquet", "application/vnd.apache.parquet"),
220            ("model.safetensors", "application/octet-stream"),
221            ("archive.xyz", "application/octet-stream"),
222        ];
223
224        for &(path, expected) in cases {
225            let result = guess_mime_type(path);
226
227            // Syntactic match covering all arms from guess_mime_type
228            let ext = Path::new(path).extension().and_then(|e| e.to_str()).unwrap_or("");
229
230            let matched = match ext.to_lowercase().as_str() {
231                "json" => "application/json",
232                "yaml" | "yml" => "application/x-yaml",
233                "csv" => "text/csv",
234                "txt" => "text/plain",
235                "md" => "text/markdown",
236                "py" => "text/x-python",
237                "rs" => "text/x-rust",
238                "pdf" => "application/pdf",
239                "png" => "image/png",
240                "jpg" | "jpeg" => "image/jpeg",
241                "parquet" => "application/vnd.apache.parquet",
242                "safetensors" => "application/octet-stream",
243                _other => "application/octet-stream",
244            };
245
246            assert_eq!(result, expected, "MIME mismatch for {path}");
247            assert_eq!(matched, expected, "match mismatch for {path}");
248        }
249    }
250}