Skip to main content

dicom_test_files/
lib.rs

1//! A collection of DICOM files for testing DICOM parsers.
2//!
3//! To avoid users having to download all the files they are downloaded as they
4//! are needed and cached in the `/target` directory.
5//!
6//! The [`path`] function will automatically download the requested file
7//! and return a file path.
8//!
9//! ```no_run
10//! use dicom_test_files::path;
11//!
12//! # fn main() -> Result<(), dicom_test_files::Error> {
13//! let liver = path("pydicom/liver.dcm")?;
14//! // then open the file as you will (e.g. using DICOM-rs)
15//! # /*
16//! let dicom_data = dicom::object::open(liver);
17//! # */
18//! # Ok(())
19//! # }
20//! ```
21//!
22//! ## Source of data
23//!
24//! By default,
25//! all data sets are hosted in
26//! the `dicom-test-files` project's [main repository][1],
27//! in the `data` folder.
28//! Inspect this folder to know what DICOM test files are available.
29//!
30//! To override this source,
31//! you can set the environment variable `DICOM_TEST_FILES_URL`
32//! to the base path of the data set's raw contents
33//! (usually ending with `data` or `data/`).
34//!
35//! ```sh
36//! set DICOM_TEST_FILES_URL=https://raw.githubusercontent.com/Me/dicom-test-files/new/more-dicom/data
37//! cargo test
38//! ```
39//!
40//! [1]: https://github.com/robyoung/dicom-test-files/tree/master/data
41
42#![deny(missing_docs)]
43
44use sha2::{Digest, Sha256};
45use std::{
46    borrow::Cow,
47    env::{self, VarError},
48    fs, io,
49    path::{Path, PathBuf},
50};
51use test_file::{Compression, TestFile};
52
53mod entries;
54
55pub(crate) mod test_file;
56
57use entries::FILE_ENTRIES;
58
59/// Error type for test_dicom_files
60#[derive(Debug)]
61pub enum Error {
62    /// Returned when the provided name does not exist in the hash list
63    ///
64    /// If you are sure it does exist you may need to update to a newer version dicom_test_files.
65    NotFound,
66    /// Returned when the hash of the downloaded file does not match the previously generated hash
67    ///
68    /// This may mean you need to update to a newer version of dicom_test_files.
69    InvalidHash,
70    /// Returned when the file cannot be downloaded. Contains the generated URL.
71    Download(String),
72    /// Wrapped errors from std::io
73    Io(io::Error),
74    /// Failed to resolve data source URL
75    ResolveUrl(VarError),
76    /// Feature "zstd" is required for this file
77    ZstdRequired,
78}
79
80impl From<io::Error> for Error {
81    fn from(err: io::Error) -> Error {
82        Error::Io(err)
83    }
84}
85
86type Result<T, E = Error> = std::result::Result<T, E>;
87
88fn lookup(name: &str) -> Option<&'static TestFile> {
89    FILE_ENTRIES.iter().find(|entry| entry.name == name)
90}
91
92/// Fetch a DICOM file by its relative path (`name`)
93/// if it has not been downloaded yet,
94/// and return its path in the local file system.
95///
96/// This function will download and cache the file locally in
97/// `target/dicom_test_files`.
98pub fn path(name: &str) -> Result<PathBuf, Error> {
99    let entry = lookup(name).ok_or(Error::NotFound)?;
100    let cached_path = get_data_path().join(entry.name);
101    if !cached_path.exists() {
102        download(name, &cached_path)?;
103    }
104    Ok(cached_path)
105}
106
107/// Return a vector of local paths to all DICOM test files available.
108///
109/// This function will download any test file not yet in the file system
110/// and cache the files locally to `target/dicom_test_files`.
111///
112/// Note that this operation may be unnecessarily expensive.
113/// Retrieving only the files that you need via [`path`] is preferred.
114#[deprecated(note = "Too expensive. Use `path` for the files that you need.")]
115pub fn all() -> Result<Vec<PathBuf>, Error> {
116    FILE_ENTRIES
117        .iter()
118        .map(|TestFile { name, .. }| path(name))
119        .collect::<Result<Vec<PathBuf>, Error>>()
120}
121
122/// Determine the target data path
123pub(crate) fn get_data_path() -> PathBuf {
124    let mut target_dir = PathBuf::from(
125        env::current_exe()
126            .expect("exe path")
127            .parent()
128            .expect("exe parent"),
129    );
130    while target_dir.file_name() != Some(std::ffi::OsStr::new("target")) {
131        if !target_dir.pop() {
132            panic!("Cannot find target directory");
133        }
134    }
135    target_dir.join("dicom_test_files")
136}
137
138const DEFAULT_GITHUB_BASE_URL: &str =
139    "https://raw.githubusercontent.com/robyoung/dicom-test-files/master/data/";
140
141const RAW_GITHUBUSERCONTENT_URL: &str = "https://raw.githubusercontent.com";
142
143/// Determine the base URL in this environment.
144///
145/// When this is part of a pull request to the project,
146/// use the contents provided through the pull request's head branch.
147fn base_url() -> Result<Cow<'static, str>, VarError> {
148    if let Ok(url) = std::env::var("DICOM_TEST_FILES_URL") {
149        if !url.is_empty() {
150            let url = if !url.ends_with("/") {
151                format!("{url}/")
152            } else {
153                url
154            };
155            return Ok(url.into());
156        }
157    }
158
159    // CI: always true on GitHub Actions
160    let ci = std::env::var("CI").unwrap_or_default();
161    if ci == "true" {
162        // GITHUB_REPOSITORY
163        let github_repository = std::env::var("GITHUB_REPOSITORY").unwrap_or_default();
164
165        // only do this if target repository is dicom-test-files
166        if github_repository.ends_with("/dicom-test-files") {
167            // GITHUB_EVENT_NAME: can be pull_request
168            let github_event_name = std::env::var("GITHUB_EVENT_NAME")?;
169            if github_event_name == "pull_request" {
170                // GITHUB_HEAD_REF: name of the branch when it's a pull request
171                let github_head_ref = std::env::var("GITHUB_HEAD_REF")?;
172                let url = format!(
173                    "{RAW_GITHUBUSERCONTENT_URL}/{github_repository}/{github_head_ref}/data/",
174                );
175
176                return Ok(url.into());
177            }
178        }
179    }
180
181    Ok(DEFAULT_GITHUB_BASE_URL.into())
182}
183
184fn download(name: &str, cached_path: &Path) -> Result<(), Error> {
185    let file_entry = lookup(name).ok_or(Error::NotFound)?;
186
187    let target_parent_dir = cached_path.parent().unwrap();
188    fs::create_dir_all(target_parent_dir)?;
189
190    let url = base_url().map_err(Error::ResolveUrl)?.into_owned() + &file_entry.real_file_name();
191    let resp = ureq::get(&url)
192        .call()
193        .map_err(|e| Error::Download(format!("Failed to download {url}: {e}")))?;
194
195    // write into temporary file first
196    let tempdir = tempfile::tempdir_in(target_parent_dir)?;
197    let tempfile_path = tempdir.path().join("tmpfile");
198
199    {
200        let mut target = fs::File::create(&tempfile_path)?;
201        std::io::copy(&mut resp.into_body().as_reader(), &mut target)?;
202    }
203
204    check_hash(&tempfile_path, file_entry)?;
205    match file_entry.compression {
206        Compression::None => {
207            // move to target destination
208            fs::rename(tempfile_path, cached_path)?;
209        }
210        Compression::Zstd => {
211            // decode and write to target destination
212            write_zstd(tempfile_path.as_path(), cached_path)?;
213
214            // remove temporary file
215            fs::remove_file(tempfile_path).unwrap_or_else(|e| {
216                eprintln!("[dicom-test-files] Failed to remove temporary file: {e}");
217            });
218        }
219    }
220
221    Ok(())
222}
223
224#[cfg(feature = "zstd")]
225fn write_zstd(source_path: impl AsRef<Path>, cached_path: impl AsRef<Path>) -> Result<()> {
226    let mut decoder = zstd::Decoder::new(fs::File::open(source_path)?)?;
227    let mut target = fs::File::create(cached_path)?;
228    std::io::copy(&mut decoder, &mut target)?;
229    Ok(())
230}
231
232#[cfg(not(feature = "zstd"))]
233fn write_zstd(_source_path: impl AsRef<Path>, _cached_path: impl AsRef<Path>) -> Result<()> {
234    Err(Error::ZstdRequired)
235}
236
237fn check_hash(path: impl AsRef<Path>, file_entry: &TestFile) -> Result<()> {
238    let mut file = fs::File::open(path.as_ref())?;
239    let mut hasher = Sha256::new();
240    io::copy(&mut file, &mut hasher)?;
241    let hash = hasher.finalize();
242
243    if format!("{:x}", hash) != file_entry.hash {
244        fs::remove_file(path)?;
245        return Err(Error::InvalidHash);
246    }
247
248    Ok(())
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254
255    #[test]
256    fn load_a_single_path_1() {
257        // ensure it does not exist
258        let cached_path = get_data_path().join("pydicom/liver.dcm");
259        let _ = fs::remove_file(cached_path);
260
261        let path = path("pydicom/liver.dcm").unwrap();
262        let path = path.as_path();
263
264        assert_eq!(path.file_name().unwrap(), "liver.dcm");
265        assert!(path.exists());
266    }
267
268    #[test]
269    fn load_a_single_path_wg04_1() {
270        const FILE: &str = "WG04/JPLY/NM1_JPLY";
271        // ensure it does not exist
272        let cached_path = get_data_path().join(FILE);
273        let _ = fs::remove_file(cached_path);
274
275        let path = path(FILE).unwrap();
276        let path = path.as_path();
277
278        assert_eq!(path.file_name().unwrap(), "NM1_JPLY");
279        assert!(path.exists());
280
281        let metadata = std::fs::metadata(path).unwrap();
282        // check size
283        assert_eq!(metadata.len(), 9844);
284    }
285
286    #[cfg(feature = "zstd")]
287    #[test]
288    fn load_path_wg04_unc_1() {
289        const FILE: &str = "WG04/REF/NM1_UNC";
290        // ensure it does not exist beforehand
291        let cached_path = get_data_path().join(FILE);
292        let _ = fs::remove_file(cached_path);
293
294        let path = path(FILE).unwrap();
295        let path = path.as_path();
296
297        assert_eq!(path.file_name().unwrap(), "NM1_UNC");
298        assert!(path.exists());
299
300        let metadata = std::fs::metadata(path).unwrap();
301        // check size
302        assert_eq!(metadata.len(), 527066);
303    }
304
305    /// Ensure that it can load a segmentation from dcmqi test dataset
306    #[cfg(feature = "zstd")]
307    #[test]
308    fn load_path_dcmqi_segmentation() {
309        const FILE: &str = "dcmqi/segmentations/23x38x3/multiframe/mf.dcm";
310        // ensure it does not exist beforehand
311        let cached_path = get_data_path().join(FILE);
312        let _ = fs::remove_file(cached_path);
313
314        let path = path(FILE).unwrap();
315        let path = path.as_path();
316
317        assert_eq!(path.file_name().unwrap(), "mf.dcm");
318        assert!(path.exists());
319
320        let metadata = std::fs::metadata(path).unwrap();
321        // check size
322        assert_eq!(metadata.len(), 8016);
323    }
324
325    fn load_a_single_path_2() {
326        // ensure it does not exist
327        let cached_path = get_data_path().join("pydicom/CT_small.dcm");
328        let _ = fs::remove_file(cached_path);
329
330        let path = path("pydicom/CT_small.dcm").unwrap();
331        let path = path.as_path();
332
333        assert_eq!(path.file_name().unwrap(), "CT_small.dcm");
334        assert!(path.exists());
335    }
336
337    #[test]
338    fn load_a_single_path_concurrent() {
339        let handles: Vec<_> = (0..4)
340            .map(|_| std::thread::spawn(load_a_single_path_2))
341            .collect();
342        for h in handles {
343            h.join().unwrap();
344        }
345    }
346}