dicom_test_files/
lib.rs

1//! A collection of DICOM files for testing DICOM parsers.
2//!
3//! To avoid users having to download all the files they are downloaded as they
4//! are needed and cached in the `/target` directory.
5//!
6//! The [`path`] function will automatically download the requested file
7//! and return a file path.
8//!
9//! ```no_run
10//! use dicom_test_files::path;
11//!
12//! # fn main() -> Result<(), dicom_test_files::Error> {
13//! let liver = path("pydicom/liver.dcm")?;
14//! // then open the file as you will (e.g. using DICOM-rs)
15//! # /*
16//! let dicom_data = dicom::object::open(liver);
17//! # */
18//! # Ok(())
19//! # }
20//! ```
21//! 
22//! ## Source of data
23//! 
24//! By default,
25//! all data sets are hosted in
26//! the `dicom-test-files` project's [main repository][1],
27//! in the `data` folder.
28//! Inspect this folder to know what DICOM test files are available.
29//!
30//! To override this source,
31//! you can set the environment variable `DICOM_TEST_FILES_URL`
32//! to the base path of the data set's raw contents
33//! (usually ending with `data` or `data/`).
34//! 
35//! ```sh
36//! set DICOM_TEST_FILES_URL=https://raw.githubusercontent.com/Me/dicom-test-files/new/more-dicom/data
37//! cargo test
38//! ```
39//! 
40//! [1]: https://github.com/robyoung/dicom-test-files/tree/master/data
41
42#![deny(missing_docs)]
43
44use sha2::{Digest, Sha256};
45use test_file::{TestFile, Compression};
46use std::{
47    borrow::Cow,
48    env::{self, VarError},
49    fs, io,
50    path::{Path, PathBuf},
51};
52
53mod entries;
54
55pub(crate) mod test_file;
56
57
58use entries::FILE_ENTRIES;
59
60/// Error type for test_dicom_files
61#[derive(Debug)]
62pub enum Error {
63    /// Returned when the provided name does not exist in the hash list
64    ///
65    /// If you are sure it does exist you may need to update to a newer version dicom_test_files.
66    NotFound,
67    /// Returned when the hash of the downloaded file does not match the previously generated hash
68    ///
69    /// This may mean you need to update to a newer version of dicom_test_files.
70    InvalidHash,
71    /// Returned when the file cannot be downloaded. Contains the generated URL.
72    Download(String),
73    /// Wrapped errors from std::io
74    Io(io::Error),
75    /// Failed to resolve data source URL
76    ResolveUrl(VarError),
77    /// Feature "zstd" is required for this file 
78    ZstdRequired,
79}
80
81impl From<io::Error> for Error {
82    fn from(err: io::Error) -> Error {
83        Error::Io(err)
84    }
85}
86
87type Result<T, E = Error> = std::result::Result<T, E>;
88
89fn lookup(name: &str) -> Option<&'static TestFile> {
90    FILE_ENTRIES.iter().find(|entry| entry.name == name)
91}
92
93/// Fetch a DICOM file by its relative path (`name`)
94/// if it has not been downloaded yet,
95/// and return its path in the local file system.
96///
97/// This function will download and cache the file locally in
98/// `target/dicom_test_files`.
99pub fn path(name: &str) -> Result<PathBuf, Error> {
100    let entry = lookup(name).ok_or(Error::NotFound)?;
101    let cached_path = get_data_path().join(entry.name);
102    if !cached_path.exists() {
103        download(name, &cached_path)?;
104    }
105    Ok(cached_path)
106}
107
108/// Return a vector of local paths to all DICOM test files available.
109///
110/// This function will download any test file not yet in the file system
111/// and cache the files locally to `target/dicom_test_files`.
112///
113/// Note that this operation may be unnecessarily expensive.
114/// Retrieving only the files that you need via [`path`] is preferred.
115#[deprecated(note = "Too expensive. Use `path` for the files that you need.")]
116pub fn all() -> Result<Vec<PathBuf>, Error> {
117    FILE_ENTRIES
118        .iter()
119        .map(|TestFile { name, ..}| path(name))
120        .collect::<Result<Vec<PathBuf>, Error>>()
121}
122
123/// Determine the target data path
124pub(crate) fn get_data_path() -> PathBuf {
125    let mut target_dir = PathBuf::from(
126        env::current_exe()
127            .expect("exe path")
128            .parent()
129            .expect("exe parent"),
130    );
131    while target_dir.file_name() != Some(std::ffi::OsStr::new("target")) {
132        if !target_dir.pop() {
133            panic!("Cannot find target directory");
134        }
135    }
136    target_dir.join("dicom_test_files")
137}
138
139const DEFAULT_GITHUB_BASE_URL: &str =
140    "https://raw.githubusercontent.com/robyoung/dicom-test-files/master/data/";
141
142const RAW_GITHUBUSERCONTENT_URL: &str = "https://raw.githubusercontent.com";
143
144/// Determine the base URL in this environment.
145///
146/// When this is part of a pull request to the project,
147/// use the contents provided through the pull request's head branch.
148fn base_url() -> Result<Cow<'static, str>, VarError> {
149    if let Ok(url) = std::env::var("DICOM_TEST_FILES_URL") {
150        if url != "" {
151            let url = if !url.ends_with("/") {
152                format!("{url}/")
153            } else {
154                url
155            };
156            return Ok(url.into());
157        }
158    }
159
160    // CI: always true on GitHub Actions
161    let ci = std::env::var("CI").unwrap_or_default();
162    if ci == "true" {
163        // GITHUB_REPOSITORY
164        let github_repository = std::env::var("GITHUB_REPOSITORY").unwrap_or_default();
165
166        // only do this if target repository is dicom-test-files
167        if github_repository.ends_with("/dicom-test-files") {
168            // GITHUB_EVENT_NAME: can be pull_request
169            let github_event_name = std::env::var("GITHUB_EVENT_NAME")?;
170            if github_event_name == "pull_request" {
171                // GITHUB_HEAD_REF: name of the branch when it's a pull request
172                let github_head_ref = std::env::var("GITHUB_HEAD_REF")?;
173                let url = format!(
174                    "{}/{}/{}/data/",
175                    RAW_GITHUBUSERCONTENT_URL, github_repository, github_head_ref
176                );
177
178                return Ok(url.into());
179            }
180        }
181    }
182
183    Ok(DEFAULT_GITHUB_BASE_URL.into())
184}
185
186fn download(name: &str, cached_path: &PathBuf) -> Result<(), Error> {
187    let file_entry = lookup(name).ok_or(Error::NotFound)?;
188
189    let target_parent_dir = cached_path.as_path().parent().unwrap();
190    fs::create_dir_all(target_parent_dir)?;
191
192    let url = base_url().map_err(Error::ResolveUrl)?.to_owned() + file_entry.real_file_name();
193    let resp = ureq::get(url.as_ref())
194        .call()
195        .map_err(|e| Error::Download(format!("Failed to download {}: {}", url, e)))?;
196
197    // write into temporary file first
198    let tempdir = tempfile::tempdir_in(target_parent_dir)?;
199    let tempfile_path = tempdir.path().join("tmpfile");
200
201    {
202        let mut target = fs::File::create(&tempfile_path)?;
203        std::io::copy(&mut resp.into_body().as_reader(), &mut target)?;
204    }
205
206    check_hash(&tempfile_path, file_entry)?;
207    match file_entry.compression {
208        Compression::None => {
209            // move to target destination
210            fs::rename(tempfile_path, cached_path.as_path())?;
211        },
212        Compression::Zstd => {
213            // decode and write to target destination
214            write_zstd(tempfile_path.as_path(), cached_path.as_path())?;
215
216            // remove temporary file
217            fs::remove_file(tempfile_path).unwrap_or_else(|e| {
218                eprintln!("[dicom-test-files] Failed to remove temporary file: {}", e);
219            });
220        }
221    }
222
223    Ok(())
224}
225
226#[cfg(feature = "zstd")]
227fn write_zstd(source_path: impl AsRef<Path>, cached_path: impl AsRef<Path>) -> Result<()> {
228    let mut decoder = zstd::Decoder::new(fs::File::open(source_path)?)?;
229    let mut target = fs::File::create(cached_path)?;
230    std::io::copy(&mut decoder, &mut target)?;
231    Ok(())
232}
233
234#[cfg(not(feature = "zstd"))]
235fn write_zstd(_source_path: impl AsRef<Path>, _cached_path: impl AsRef<Path>) -> Result<()> {
236    Err(Error::ZstdRequired)
237}
238
239fn check_hash(path: impl AsRef<Path>, file_entry: &TestFile) -> Result<()> {
240    let mut file = fs::File::open(path.as_ref())?;
241    let mut hasher = Sha256::new();
242    io::copy(&mut file, &mut hasher)?;
243    let hash = hasher.finalize();
244
245    if format!("{:x}", hash) != file_entry.hash {
246        fs::remove_file(path)?;
247        return Err(Error::InvalidHash);
248    }
249
250    Ok(())
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn load_a_single_path_1() {
259        // ensure it does not exist
260        let cached_path = get_data_path().join("pydicom/liver.dcm");
261        let _ = fs::remove_file(cached_path);
262
263        let path = path("pydicom/liver.dcm").unwrap();
264        let path = path.as_path();
265
266        assert_eq!(path.file_name().unwrap(), "liver.dcm");
267        assert!(path.exists());
268    }
269
270    #[test]
271    fn load_a_single_path_wg04_1() {
272        const FILE: &str = "WG04/JPLY/NM1_JPLY";
273        // ensure it does not exist
274        let cached_path = get_data_path().join(FILE);
275        let _ = fs::remove_file(cached_path);
276
277        let path = path(FILE).unwrap();
278        let path = path.as_path();
279
280        assert_eq!(path.file_name().unwrap(), "NM1_JPLY");
281        assert!(path.exists());
282
283        let metadata = std::fs::metadata(path).unwrap();
284        // check size
285        assert_eq!(metadata.len(), 9844);
286    }
287
288    #[cfg(feature = "zstd")]
289    #[test]
290    fn load_path_wg04_unc_1() {
291        const FILE: &str = "WG04/REF/NM1_UNC";
292        // ensure it does not exist beforehand
293        let cached_path = get_data_path().join(FILE);
294        let _ = fs::remove_file(cached_path);
295
296        let path = path(FILE).unwrap();
297        let path = path.as_path();
298
299        assert_eq!(path.file_name().unwrap(), "NM1_UNC");
300        assert!(path.exists());
301
302        let metadata = std::fs::metadata(path).unwrap();
303        // check size
304        assert_eq!(metadata.len(), 527066);
305    }
306
307    fn load_a_single_path_2() {
308        // ensure it does not exist
309        let cached_path = get_data_path().join("pydicom/CT_small.dcm");
310        let _ = fs::remove_file(cached_path);
311
312        let path = path("pydicom/CT_small.dcm").unwrap();
313        let path = path.as_path();
314
315        assert_eq!(path.file_name().unwrap(), "CT_small.dcm");
316        assert!(path.exists());
317    }
318
319    #[test]
320    fn load_a_single_path_concurrent() {
321        let handles: Vec<_> = (0..4)
322            .map(|_| std::thread::spawn(load_a_single_path_2))
323            .collect();
324        for h in handles {
325            h.join().unwrap();
326        }
327    }
328}