1pub mod checksum;
2pub mod error;
3pub mod io;
4
5pub mod superblock;
7
8pub mod btree_v1;
10pub mod btree_v2;
11pub mod chunk_index;
12pub mod extensible_array;
13pub mod fixed_array;
14pub mod fractal_heap;
15pub mod global_heap;
16pub mod local_heap;
17pub mod shared_message_table;
18pub mod symbol_table;
19
20pub mod messages;
22pub mod object_header;
23
24pub mod attribute_api;
26pub mod dataset;
27pub mod datatype_api;
28pub mod group;
29pub mod reference;
30pub mod storage;
31
32pub mod filters;
34
35pub mod cache;
37
38use std::collections::HashMap;
39#[cfg(unix)]
40use std::ffi::{CString, OsStr};
41use std::fs::File;
42use std::io::ErrorKind;
43#[cfg(unix)]
44use std::os::fd::{AsRawFd, FromRawFd};
45#[cfg(unix)]
46use std::os::unix::ffi::OsStrExt;
47use std::path::{Component, Path, PathBuf};
48use std::sync::{Arc, OnceLock};
49
50use memmap2::Mmap;
51use cache::ChunkCache;
54use error::{Error, Result};
55use group::Group;
56use messages::HdfMessage;
57use object_header::ObjectHeader;
58use shared_message_table::SharedMessageTableRef;
59use storage::DynStorage;
60use superblock::Superblock;
61
62pub use attribute_api::Attribute;
64pub use cache::ChunkCacheStats;
65use dataset::DatasetTemplate;
66pub use dataset::{Dataset, DatasetChunk, DatasetChunkIterator, SliceInfo, SliceInfoElem};
67pub use datatype_api::{
68 dtype_element_size, CompoundField, EnumMember, H5Type, ReferenceType, StringEncoding,
69 StringPadding, StringSize, VarLenKind,
70};
71pub use error::ByteOrder;
72pub use filters::FilterRegistry;
73pub use messages::datatype::Datatype;
74pub use storage::{
75 BlockCacheStats, BlockCacheStorage, BytesStorage, FileStorage, MmapStorage,
76 RangeRequestStorage, Storage, StorageBuffer,
77};
78
79pub struct OpenOptions {
81 pub chunk_cache_bytes: usize,
83 pub chunk_cache_slots: usize,
85 pub filter_registry: Option<FilterRegistry>,
87 pub external_file_resolver: Option<Arc<dyn ExternalFileResolver>>,
90 pub external_link_resolver: Option<Arc<dyn ExternalLinkResolver>>,
92}
93
94impl Default for OpenOptions {
95 fn default() -> Self {
96 OpenOptions {
97 chunk_cache_bytes: 64 * 1024 * 1024,
98 chunk_cache_slots: 521,
99 filter_registry: None,
100 external_file_resolver: None,
101 external_link_resolver: None,
102 }
103 }
104}
105
106pub trait ExternalFileResolver: Send + Sync {
115 fn resolve_external_file(&self, filename: &str) -> Result<Option<DynStorage>>;
116}
117
118pub trait ExternalLinkResolver: Send + Sync {
127 fn resolve_external_link(&self, filename: &str) -> Result<Option<Hdf5File>>;
128}
129
130fn normalize_resolver_path(filename: &str, description: &str) -> Result<PathBuf> {
131 let path = Path::new(filename);
132 if path.as_os_str().is_empty() {
133 return Err(Error::InvalidData(format!("{description} path is empty")));
134 }
135
136 if path.is_absolute() {
137 return Err(Error::InvalidData(format!(
138 "{description} path must be relative: {filename}"
139 )));
140 }
141
142 let mut normalized = PathBuf::new();
143 for component in path.components() {
144 match component {
145 Component::Normal(name) => normalized.push(name),
146 Component::CurDir => {}
147 Component::Prefix(_) | Component::RootDir | Component::ParentDir => {
148 return Err(Error::InvalidData(format!(
149 "{description} path must stay within the resolver base directory: {filename}"
150 )));
151 }
152 }
153 }
154
155 if normalized.as_os_str().is_empty() {
156 return Err(Error::InvalidData(format!("{description} path is empty")));
157 }
158
159 Ok(normalized)
160}
161
162#[cfg(not(unix))]
163fn open_external_file_within_base(
164 base_dir: &Path,
165 relative_path: &Path,
166 description: &str,
167 filename: &str,
168) -> Result<Option<File>> {
169 let base = match base_dir.canonicalize() {
170 Ok(path) => path,
171 Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None),
172 Err(err) => return Err(err.into()),
173 };
174 let candidate = base.join(relative_path);
175 let resolved = match candidate.canonicalize() {
176 Ok(path) => path,
177 Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None),
178 Err(err) => return Err(err.into()),
179 };
180
181 if !resolved.starts_with(&base) {
182 return Err(Error::InvalidData(format!(
183 "{description} path escapes the resolver base directory: {filename}"
184 )));
185 }
186
187 Ok(Some(File::open(resolved)?))
188}
189
190#[cfg(unix)]
191fn open_external_file_within_base(
192 base_dir: &Path,
193 relative_path: &Path,
194 description: &str,
195 filename: &str,
196) -> Result<Option<File>> {
197 let mut dir = match open_unix_path(
198 base_dir,
199 libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOFOLLOW,
200 ) {
201 Ok(file) => file,
202 Err(err) if path_lookup_is_missing(&err) => return Ok(None),
203 Err(err) if path_lookup_is_symlink(&err) => {
204 return Err(Error::InvalidData(format!(
205 "{description} resolver base directory must not be a symlink"
206 )));
207 }
208 Err(err) => return Err(err.into()),
209 };
210 if !dir.metadata()?.is_dir() {
211 return Ok(None);
212 }
213
214 let parts: Vec<&OsStr> = relative_path
215 .components()
216 .filter_map(|component| match component {
217 Component::Normal(name) => Some(name),
218 _ => None,
219 })
220 .collect();
221
222 let Some((leaf, parents)) = parts.split_last() else {
223 return Err(Error::InvalidData(format!("{description} path is empty")));
224 };
225
226 for parent in parents {
227 dir = match open_unix_child(
228 &dir,
229 parent,
230 libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOFOLLOW,
231 ) {
232 Ok(file) => file,
233 Err(err) if path_lookup_is_missing(&err) => return Ok(None),
234 Err(err) if path_lookup_is_symlink(&err) => {
235 return Err(symlink_resolver_error(description, filename));
236 }
237 Err(err) => return Err(err.into()),
238 };
239 if !dir.metadata()?.is_dir() {
240 return Ok(None);
241 }
242 }
243
244 let file = match open_unix_child(
245 &dir,
246 leaf,
247 libc::O_RDONLY | libc::O_CLOEXEC | libc::O_NOFOLLOW,
248 ) {
249 Ok(file) => file,
250 Err(err) if path_lookup_is_missing(&err) => return Ok(None),
251 Err(err) if path_lookup_is_symlink(&err) => {
252 return Err(symlink_resolver_error(description, filename));
253 }
254 Err(err) => return Err(err.into()),
255 };
256
257 if file.metadata()?.is_dir() {
258 return Err(Error::InvalidData(format!(
259 "{description} path resolves to a directory: {filename}"
260 )));
261 }
262
263 Ok(Some(file))
264}
265
266#[cfg(unix)]
267fn open_unix_path(path: &Path, flags: libc::c_int) -> std::io::Result<File> {
268 let path = CString::new(path.as_os_str().as_bytes()).map_err(|_| {
269 std::io::Error::new(
270 std::io::ErrorKind::InvalidInput,
271 "filesystem path contains an interior NUL byte",
272 )
273 })?;
274 let fd = unsafe { libc::open(path.as_ptr(), flags) };
275 file_from_unix_fd(fd)
276}
277
278#[cfg(unix)]
279fn open_unix_child(dir: &File, name: &OsStr, flags: libc::c_int) -> std::io::Result<File> {
280 let name = CString::new(name.as_bytes()).map_err(|_| {
281 std::io::Error::new(
282 std::io::ErrorKind::InvalidInput,
283 "filesystem path contains an interior NUL byte",
284 )
285 })?;
286 let fd = unsafe { libc::openat(dir.as_raw_fd(), name.as_ptr(), flags) };
287 file_from_unix_fd(fd)
288}
289
290#[cfg(unix)]
291fn file_from_unix_fd(fd: libc::c_int) -> std::io::Result<File> {
292 if fd < 0 {
293 Err(std::io::Error::last_os_error())
294 } else {
295 Ok(unsafe { File::from_raw_fd(fd) })
296 }
297}
298
299#[cfg(unix)]
300fn path_lookup_is_missing(err: &std::io::Error) -> bool {
301 err.kind() == ErrorKind::NotFound
302 || matches!(err.raw_os_error(), Some(code) if code == libc::ENOTDIR)
303}
304
305#[cfg(unix)]
306fn path_lookup_is_symlink(err: &std::io::Error) -> bool {
307 matches!(err.raw_os_error(), Some(code) if code == libc::ELOOP)
308}
309
310#[cfg(unix)]
311fn symlink_resolver_error(description: &str, filename: &str) -> Error {
312 Error::InvalidData(format!(
313 "{description} path escapes the resolver base directory or uses a symlink: {filename}"
314 ))
315}
316
317#[derive(Debug, Clone)]
324pub struct FilesystemExternalFileResolver {
325 base_dir: PathBuf,
326}
327
328impl FilesystemExternalFileResolver {
329 pub fn new(base_dir: impl Into<PathBuf>) -> Self {
330 Self {
331 base_dir: base_dir.into(),
332 }
333 }
334
335 fn relative_path_for(&self, filename: &str) -> Result<PathBuf> {
336 normalize_resolver_path(filename, "external raw data file")
337 }
338}
339
340impl ExternalFileResolver for FilesystemExternalFileResolver {
341 fn resolve_external_file(&self, filename: &str) -> Result<Option<DynStorage>> {
342 let relative_path = self.relative_path_for(filename)?;
343 let Some(file) = open_external_file_within_base(
344 &self.base_dir,
345 &relative_path,
346 "external raw data file",
347 filename,
348 )?
349 else {
350 return Ok(None);
351 };
352 Ok(Some(Arc::new(FileStorage::from_file(file)?)))
353 }
354}
355
356pub struct FilesystemExternalLinkResolver {
364 base_dir: PathBuf,
365 cache: parking_lot::Mutex<HashMap<PathBuf, Hdf5File>>,
366}
367
368impl FilesystemExternalLinkResolver {
369 pub fn new(base_dir: impl Into<PathBuf>) -> Self {
370 Self {
371 base_dir: base_dir.into(),
372 cache: parking_lot::Mutex::new(HashMap::new()),
373 }
374 }
375
376 fn relative_path_for(&self, filename: &str) -> Result<PathBuf> {
377 normalize_resolver_path(filename, "external link")
378 }
379}
380
381impl ExternalLinkResolver for FilesystemExternalLinkResolver {
382 fn resolve_external_link(&self, filename: &str) -> Result<Option<Hdf5File>> {
383 let relative_path = self.relative_path_for(filename)?;
384
385 if let Some(file) = self.cache.lock().get(&relative_path).cloned() {
386 return Ok(Some(file));
387 }
388
389 let Some(opened) = open_external_file_within_base(
390 &self.base_dir,
391 &relative_path,
392 "external link",
393 filename,
394 )?
395 else {
396 return Ok(None);
397 };
398
399 let file = Hdf5File::from_storage(Arc::new(FileStorage::from_file(opened)?))?;
400 self.cache.lock().insert(relative_path, file.clone());
401 Ok(Some(file))
402 }
403}
404
405pub type HeaderCache = Arc<parking_lot::Mutex<HashMap<u64, Arc<ObjectHeader>>>>;
407
408#[derive(Clone)]
414pub struct Hdf5File {
415 context: Arc<FileContext>,
416}
417
418pub(crate) struct FileContext {
419 pub(crate) storage: DynStorage,
420 pub(crate) superblock: Superblock,
421 pub(crate) chunk_cache: Arc<ChunkCache>,
422 pub(crate) header_cache: HeaderCache,
423 pub(crate) dataset_path_cache: Arc<parking_lot::Mutex<HashMap<String, Arc<DatasetTemplate>>>>,
424 pub(crate) filter_registry: Arc<FilterRegistry>,
425 pub(crate) external_file_resolver: Option<Arc<dyn ExternalFileResolver>>,
426 pub(crate) external_link_resolver: Option<Arc<dyn ExternalLinkResolver>>,
427 pub(crate) external_file_cache: parking_lot::Mutex<HashMap<String, DynStorage>>,
428 sohm_table: OnceLock<std::result::Result<Option<SharedMessageTableRef>, String>>,
429 full_file_cache: OnceLock<StorageBuffer>,
430}
431
432impl FileContext {
433 pub(crate) fn read_range(&self, offset: u64, len: usize) -> Result<StorageBuffer> {
434 self.storage.read_range(offset, len)
435 }
436
437 pub(crate) fn full_file_data(&self) -> Result<StorageBuffer> {
438 if let Some(buffer) = self.full_file_cache.get() {
439 return Ok(buffer.clone());
440 }
441
442 let len = usize::try_from(self.storage.len()).map_err(|_| {
443 Error::InvalidData("file size exceeds platform usize capacity".to_string())
444 })?;
445 let buffer = self.storage.read_range(0, len)?;
446 let _ = self.full_file_cache.set(buffer);
447 Ok(self
448 .full_file_cache
449 .get()
450 .expect("full-file buffer must exist after successful initialization")
451 .clone())
452 }
453
454 pub(crate) fn get_or_parse_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
455 {
456 let cache = self.header_cache.lock();
457 if let Some(hdr) = cache.get(&addr) {
458 return Ok(Arc::clone(hdr));
459 }
460 }
461
462 let mut hdr = ObjectHeader::parse_at_storage(
463 self.storage.as_ref(),
464 addr,
465 self.superblock.offset_size,
466 self.superblock.length_size,
467 )?;
468 hdr.resolve_shared_messages_storage_with_sohm(
469 self.storage.as_ref(),
470 self.superblock.offset_size,
471 self.superblock.length_size,
472 |heap_id, message_type| self.resolve_sohm_message(heap_id, message_type),
473 )?;
474 let arc = Arc::new(hdr);
475 let mut cache = self.header_cache.lock();
476 cache.insert(addr, Arc::clone(&arc));
477 Ok(arc)
478 }
479
480 fn resolve_sohm_message(
481 &self,
482 heap_id: &[u8],
483 message_type: u16,
484 ) -> Result<Option<HdfMessage>> {
485 let Some(table) = self.sohm_table()? else {
486 return Ok(None);
487 };
488 table.resolve_heap_message(
489 heap_id,
490 message_type,
491 self.storage.as_ref(),
492 self.superblock.offset_size,
493 self.superblock.length_size,
494 Some(self.filter_registry.as_ref()),
495 )
496 }
497
498 fn sohm_table(&self) -> Result<Option<SharedMessageTableRef>> {
499 let cached = self
500 .sohm_table
501 .get_or_init(|| self.load_sohm_table().map_err(|err| err.to_string()));
502 match cached {
503 Ok(table) => Ok(table.clone()),
504 Err(message) => Err(Error::InvalidData(format!(
505 "failed to load SOHM table: {message}"
506 ))),
507 }
508 }
509
510 fn load_sohm_table(&self) -> Result<Option<SharedMessageTableRef>> {
511 let Some(extension_address) = self.superblock.extension_address else {
512 return Ok(None);
513 };
514 let extension = ObjectHeader::parse_at_storage(
515 self.storage.as_ref(),
516 extension_address,
517 self.superblock.offset_size,
518 self.superblock.length_size,
519 )?;
520
521 let shared_table = extension.messages.iter().find_map(|message| match message {
522 HdfMessage::SharedTable(table) => Some(table),
523 _ => None,
524 });
525 let Some(shared_table) = shared_table else {
526 return Ok(None);
527 };
528
529 let table = crate::shared_message_table::SharedMessageTable::parse_at_storage(
530 self.storage.as_ref(),
531 shared_table.table_address,
532 shared_table.num_indices,
533 self.superblock.offset_size,
534 )?;
535 Ok(Some(Arc::new(table)))
536 }
537
538 pub(crate) fn resolve_external_file(&self, filename: &str) -> Result<Option<DynStorage>> {
539 if let Some(storage) = self.external_file_cache.lock().get(filename).cloned() {
540 return Ok(Some(storage));
541 }
542
543 let Some(resolver) = self.external_file_resolver.as_ref() else {
544 return Ok(None);
545 };
546 let Some(storage) = resolver.resolve_external_file(filename)? else {
547 return Ok(None);
548 };
549 self.external_file_cache
550 .lock()
551 .insert(filename.to_string(), storage.clone());
552 Ok(Some(storage))
553 }
554}
555
556impl Hdf5File {
557 fn from_storage_impl(storage: DynStorage, options: OpenOptions) -> Result<Self> {
558 let superblock = Superblock::parse_from_storage(storage.as_ref())?;
559 let cache = Arc::new(ChunkCache::new(
560 options.chunk_cache_bytes,
561 options.chunk_cache_slots,
562 ));
563 let registry = options.filter_registry.unwrap_or_default();
564 let external_file_resolver = options.external_file_resolver;
565 let external_link_resolver = options.external_link_resolver;
566
567 Ok(Hdf5File {
568 context: Arc::new(FileContext {
569 storage,
570 superblock,
571 chunk_cache: cache,
572 header_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
573 dataset_path_cache: Arc::new(parking_lot::Mutex::new(HashMap::new())),
574 filter_registry: Arc::new(registry),
575 external_file_resolver,
576 external_link_resolver,
577 external_file_cache: parking_lot::Mutex::new(HashMap::new()),
578 sohm_table: OnceLock::new(),
579 full_file_cache: OnceLock::new(),
580 }),
581 })
582 }
583
584 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
586 Self::open_with_options(path, OpenOptions::default())
587 }
588
589 pub fn open_with_options(path: impl AsRef<Path>, options: OpenOptions) -> Result<Self> {
591 let path = path.as_ref();
592 Self::from_storage_with_options(Arc::new(FileStorage::open(path)?), options)
593 }
594
595 pub fn from_bytes(data: &[u8]) -> Result<Self> {
599 Self::from_bytes_with_options(data, OpenOptions::default())
600 }
601
602 pub fn from_bytes_with_options(data: &[u8], options: OpenOptions) -> Result<Self> {
606 Self::from_vec_with_options(data.to_vec(), options)
607 }
608
609 pub fn from_vec(data: Vec<u8>) -> Result<Self> {
611 Self::from_vec_with_options(data, OpenOptions::default())
612 }
613
614 pub fn from_vec_with_options(data: Vec<u8>, options: OpenOptions) -> Result<Self> {
616 Self::from_storage_with_options(Arc::new(BytesStorage::new(data)), options)
617 }
618
619 pub fn from_mmap_with_options(mmap: Mmap, options: OpenOptions) -> Result<Self> {
623 Self::from_storage_with_options(Arc::new(MmapStorage::new(mmap)), options)
624 }
625
626 pub fn from_storage(storage: DynStorage) -> Result<Self> {
628 Self::from_storage_with_options(storage, OpenOptions::default())
629 }
630
631 pub fn from_storage_with_options(storage: DynStorage, options: OpenOptions) -> Result<Self> {
633 Self::from_storage_impl(storage, options)
634 }
635
636 pub fn superblock(&self) -> &Superblock {
638 &self.context.superblock
639 }
640
641 pub fn storage(&self) -> &dyn Storage {
643 self.context.storage.as_ref()
644 }
645
646 pub fn chunk_cache_stats(&self) -> ChunkCacheStats {
648 self.context.chunk_cache.stats()
649 }
650
651 pub fn get_or_parse_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
655 self.context.get_or_parse_header(addr)
656 }
657
658 pub fn root_group(&self) -> Result<Group> {
660 let addr = self.context.superblock.root_object_header_address()?;
661
662 Ok(Group::new(
663 self.context.clone(),
664 addr,
665 "/".to_string(),
666 addr, ))
668 }
669
670 pub fn dataset(&self, path: &str) -> Result<Dataset> {
672 let parts: Vec<&str> = path
673 .trim_start_matches('/')
674 .split('/')
675 .filter(|s| !s.is_empty())
676 .collect();
677 let normalized_path = format!("/{}", parts.join("/"));
678
679 if parts.is_empty() {
680 return Err(Error::DatasetNotFound(path.to_string()).with_context(path));
681 }
682
683 if let Some(template) = self
684 .context
685 .dataset_path_cache
686 .lock()
687 .get(&normalized_path)
688 .cloned()
689 {
690 return Ok(Dataset::from_template(self.context.clone(), template));
691 }
692
693 let mut group = self.root_group()?;
694 for &part in &parts[..parts.len() - 1] {
695 group = group.group(part).map_err(|e| e.with_context(path))?;
696 }
697
698 let dataset = group
699 .dataset(parts[parts.len() - 1])
700 .map_err(|e| e.with_context(path))?;
701 if Arc::ptr_eq(&dataset.context, &self.context) {
702 self.context
703 .dataset_path_cache
704 .lock()
705 .insert(normalized_path, dataset.template());
706 }
707 Ok(dataset)
708 }
709
710 pub fn group(&self, path: &str) -> Result<Group> {
712 let parts: Vec<&str> = path
713 .trim_start_matches('/')
714 .split('/')
715 .filter(|s| !s.is_empty())
716 .collect();
717
718 let mut group = self.root_group()?;
719 for &part in &parts {
720 group = group.group(part)?;
721 }
722
723 Ok(group)
724 }
725}
726
727#[cfg(test)]
728mod tests {
729 use super::*;
730
731 #[test]
732 fn open_options_default() {
733 let opts = OpenOptions::default();
734 assert_eq!(opts.chunk_cache_bytes, 64 * 1024 * 1024);
735 assert_eq!(opts.chunk_cache_slots, 521);
736 assert!(opts.external_file_resolver.is_none());
737 }
738
739 #[test]
740 fn invalid_file() {
741 let data = b"this is not an HDF5 file";
742 let result = Hdf5File::from_bytes(data);
743 assert!(result.is_err());
744 }
745
746 #[test]
747 fn btree_v2_chunked_fixture_uses_btree_v2_index() {
748 use crate::messages::layout::{ChunkIndexing, DataLayout};
749
750 let path = Path::new(env!("CARGO_MANIFEST_DIR"))
751 .parent()
752 .unwrap()
753 .join("testdata/hdf5/btree_v2_chunked.h5");
754 if !path.exists() {
755 eprintln!("SKIPPED: fixture btree_v2_chunked.h5 not found");
756 return;
757 }
758
759 let file = Hdf5File::open(path).unwrap();
760 let dataset = file.dataset("/data").unwrap();
761 assert!(matches!(
762 dataset.layout,
763 DataLayout::Chunked {
764 chunk_indexing: Some(ChunkIndexing::BTreeV2),
765 ..
766 }
767 ));
768 }
769
770 #[test]
771 fn filesystem_external_file_resolver_reads_relative_file() {
772 let dir = tempfile::tempdir().unwrap();
773 let path = dir.path().join("raw.bin");
774 std::fs::write(&path, b"abcdef").unwrap();
775
776 let resolver = FilesystemExternalFileResolver::new(dir.path());
777 let storage = resolver
778 .resolve_external_file("raw.bin")
779 .unwrap()
780 .expect("raw file should resolve");
781 let bytes = storage.read_range(2, 3).unwrap();
782 assert_eq!(bytes.as_ref(), b"cde");
783 }
784
785 #[test]
786 fn filesystem_external_file_resolver_rejects_absolute_path() {
787 let dir = tempfile::tempdir().unwrap();
788 let path = dir.path().join("raw.bin");
789 std::fs::write(&path, b"abcdef").unwrap();
790
791 let resolver = FilesystemExternalFileResolver::new(dir.path());
792 let err = match resolver.resolve_external_file(path.to_str().unwrap()) {
793 Ok(_) => panic!("absolute external file path should be rejected"),
794 Err(err) => err,
795 };
796 assert!(err.to_string().contains("must be relative"));
797 }
798
799 #[test]
800 fn filesystem_external_file_resolver_rejects_parent_component() {
801 let dir = tempfile::tempdir().unwrap();
802 let resolver = FilesystemExternalFileResolver::new(dir.path());
803
804 let err = match resolver.resolve_external_file("../raw.bin") {
805 Ok(_) => panic!("parent external file path should be rejected"),
806 Err(err) => err,
807 };
808 assert!(err.to_string().contains("resolver base directory"));
809 }
810
811 #[cfg(unix)]
812 #[test]
813 fn filesystem_external_file_resolver_rejects_symlink_escape() {
814 use std::os::unix::fs::symlink;
815
816 let dir = tempfile::tempdir().unwrap();
817 let outside = tempfile::tempdir().unwrap();
818 let outside_path = outside.path().join("raw.bin");
819 std::fs::write(&outside_path, b"abcdef").unwrap();
820 symlink(&outside_path, dir.path().join("raw.bin")).unwrap();
821
822 let resolver = FilesystemExternalFileResolver::new(dir.path());
823 let err = match resolver.resolve_external_file("raw.bin") {
824 Ok(_) => panic!("symlink escape should be rejected"),
825 Err(err) => err,
826 };
827 assert!(err.to_string().contains("escapes"));
828 }
829
830 #[cfg(unix)]
831 #[test]
832 fn filesystem_external_file_resolver_rejects_symlink_inside_base() {
833 use std::os::unix::fs::symlink;
834
835 let dir = tempfile::tempdir().unwrap();
836 std::fs::write(dir.path().join("raw.bin"), b"abcdef").unwrap();
837 symlink("raw.bin", dir.path().join("link.bin")).unwrap();
838
839 let resolver = FilesystemExternalFileResolver::new(dir.path());
840 let err = match resolver.resolve_external_file("link.bin") {
841 Ok(_) => panic!("symlinks should be rejected even when they point inside the base"),
842 Err(err) => err,
843 };
844 assert!(err.to_string().contains("symlink"));
845 }
846
847 #[cfg(unix)]
848 #[test]
849 fn filesystem_external_file_resolver_rejects_symlink_directory_component() {
850 use std::os::unix::fs::symlink;
851
852 let dir = tempfile::tempdir().unwrap();
853 let real_dir = dir.path().join("real");
854 std::fs::create_dir(&real_dir).unwrap();
855 std::fs::write(real_dir.join("raw.bin"), b"abcdef").unwrap();
856 symlink("real", dir.path().join("linkdir")).unwrap();
857
858 let resolver = FilesystemExternalFileResolver::new(dir.path());
859 let err = match resolver.resolve_external_file("linkdir/raw.bin") {
860 Ok(_) => panic!("symlinked directory components should be rejected"),
861 Err(err) => err,
862 };
863 assert!(err.to_string().contains("symlink"));
864 }
865
866 #[test]
867 fn filesystem_external_link_resolver_rejects_absolute_path() {
868 let dir = tempfile::tempdir().unwrap();
869 let path = dir.path().join("linked.h5");
870 std::fs::write(&path, b"not really hdf5").unwrap();
871
872 let resolver = FilesystemExternalLinkResolver::new(dir.path());
873 let err = match resolver.resolve_external_link(path.to_str().unwrap()) {
874 Ok(_) => panic!("absolute external link path should be rejected"),
875 Err(err) => err,
876 };
877 assert!(err.to_string().contains("must be relative"));
878 }
879
880 #[test]
881 fn filesystem_external_link_resolver_rejects_parent_component() {
882 let dir = tempfile::tempdir().unwrap();
883 let resolver = FilesystemExternalLinkResolver::new(dir.path());
884
885 let err = match resolver.resolve_external_link("../linked.h5") {
886 Ok(_) => panic!("parent external link path should be rejected"),
887 Err(err) => err,
888 };
889 assert!(err.to_string().contains("resolver base directory"));
890 }
891
892 #[cfg(unix)]
893 #[test]
894 fn filesystem_external_link_resolver_rejects_symlink_escape() {
895 use std::os::unix::fs::symlink;
896
897 let dir = tempfile::tempdir().unwrap();
898 let outside = tempfile::tempdir().unwrap();
899 let outside_path = outside.path().join("linked.h5");
900 std::fs::write(&outside_path, b"not really hdf5").unwrap();
901 symlink(&outside_path, dir.path().join("linked.h5")).unwrap();
902
903 let resolver = FilesystemExternalLinkResolver::new(dir.path());
904 let err = match resolver.resolve_external_link("linked.h5") {
905 Ok(_) => panic!("symlink escape should be rejected"),
906 Err(err) => err,
907 };
908 assert!(err.to_string().contains("escapes"));
909 }
910}