1use std::collections::HashMap;
4
5use crate::attribute::extract_attributes_full;
6use crate::chunk_cache::ChunkCache;
7use crate::data_layout::DataLayout;
8use crate::data_read;
9use crate::dataspace::Dataspace;
10use crate::datatype::Datatype;
11use crate::error::{Error, FormatError};
12use crate::filter_pipeline::FilterPipeline;
13use crate::group_v1::GroupEntry;
14use crate::group_v2;
15use crate::message_type::MessageType;
16use crate::object_header::ObjectHeader;
17use crate::signature;
18use crate::superblock::Superblock;
19
20use crate::types::{attrs_to_map, classify_datatype, AttrValue, DType};
21
22pub struct File {
28 data: Vec<u8>,
29 superblock: Superblock,
30 chunk_cache: ChunkCache,
31 addr_offset: u64,
33}
34
35impl File {
36 pub fn open<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Error> {
38 let bytes = std::fs::read(path.as_ref()).map_err(Error::Io)?;
39 Self::from_bytes(bytes)
40 }
41
42 pub fn from_bytes(data: Vec<u8>) -> Result<Self, Error> {
44 let sig_offset = signature::find_signature(&data)?;
45 let mut superblock = Superblock::parse(&data, sig_offset)?;
46 let addr_offset = superblock.base_address;
47 superblock.root_group_address += addr_offset;
49 Ok(Self {
50 data,
51 superblock,
52 chunk_cache: ChunkCache::new(),
53 addr_offset,
54 })
55 }
56
57 pub fn root(&self) -> Group<'_> {
59 Group {
60 file: self,
61 address: self.superblock.root_group_address,
63 }
64 }
65
66 pub fn dataset(&self, path: &str) -> Result<Dataset<'_>, Error> {
68 let addr = group_v2::resolve_path_any(&self.data, &self.superblock, path)?;
69 let hdr = self.parse_header(addr)?;
70 if !has_message(&hdr, MessageType::DataLayout) {
71 return Err(Error::NotADataset(path.to_string()));
72 }
73 Ok(Dataset {
74 file: self,
75 header: hdr,
76 })
77 }
78
79 pub fn group(&self, path: &str) -> Result<Group<'_>, Error> {
81 let addr = group_v2::resolve_path_any(&self.data, &self.superblock, path)?;
82 Ok(Group {
83 file: self,
84 address: addr,
85 })
86 }
87
88 pub fn as_bytes(&self) -> &[u8] {
90 &self.data
91 }
92
93 pub fn superblock(&self) -> &Superblock {
95 &self.superblock
96 }
97
98 fn parse_header(&self, address: u64) -> Result<ObjectHeader, FormatError> {
99 ObjectHeader::parse_with_base(
100 &self.data,
101 address as usize,
102 self.superblock.offset_size,
103 self.superblock.length_size,
104 self.addr_offset,
105 )
106 }
107
108 fn offset_size(&self) -> u8 {
109 self.superblock.offset_size
110 }
111
112 fn length_size(&self) -> u8 {
113 self.superblock.length_size
114 }
115}
116
117impl std::fmt::Debug for File {
118 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119 f.debug_struct("File")
120 .field("size", &self.data.len())
121 .field("superblock_version", &self.superblock.version)
122 .finish()
123 }
124}
125
126pub struct Group<'f> {
132 file: &'f File,
133 address: u64,
134}
135
136impl<'f> Group<'f> {
137 pub fn datasets(&self) -> Result<Vec<String>, Error> {
139 let entries = self.children()?;
140 let mut names = Vec::new();
141 for entry in &entries {
142 let hdr = self.file.parse_header(entry.object_header_address)?;
143 if has_message(&hdr, MessageType::DataLayout) {
144 names.push(entry.name.clone());
145 }
146 }
147 Ok(names)
148 }
149
150 pub fn groups(&self) -> Result<Vec<String>, Error> {
152 let entries = self.children()?;
153 let mut names = Vec::new();
154 for entry in &entries {
155 let hdr = self.file.parse_header(entry.object_header_address)?;
156 if is_group(&hdr) {
157 names.push(entry.name.clone());
158 }
159 }
160 Ok(names)
161 }
162
163 pub fn attrs(&self) -> Result<HashMap<String, AttrValue>, Error> {
165 let hdr = self.file.parse_header(self.address)?;
166 let attr_msgs = extract_attributes_full(
167 &self.file.data,
168 &hdr,
169 self.file.offset_size(),
170 self.file.length_size(),
171 )?;
172 Ok(attrs_to_map(
173 &attr_msgs,
174 &self.file.data,
175 self.file.offset_size(),
176 self.file.length_size(),
177 ))
178 }
179
180 pub fn dataset(&self, name: &str) -> Result<Dataset<'f>, Error> {
182 let entries = self.children()?;
183 let entry = entries
184 .iter()
185 .find(|e| e.name == name)
186 .ok_or_else(|| Error::Format(FormatError::PathNotFound(name.to_string())))?;
187 let hdr = self.file.parse_header(entry.object_header_address)?;
188 if !has_message(&hdr, MessageType::DataLayout) {
189 return Err(Error::NotADataset(name.to_string()));
190 }
191 Ok(Dataset {
192 file: self.file,
193 header: hdr,
194 })
195 }
196
197 pub fn group(&self, name: &str) -> Result<Group<'f>, Error> {
199 let entries = self.children()?;
200 let entry = entries
201 .iter()
202 .find(|e| e.name == name)
203 .ok_or_else(|| Error::Format(FormatError::PathNotFound(name.to_string())))?;
204 Ok(Group {
205 file: self.file,
206 address: entry.object_header_address,
207 })
208 }
209
210 fn children(&self) -> Result<Vec<GroupEntry>, Error> {
211 let hdr = self.file.parse_header(self.address)?;
212 let os = self.file.offset_size();
213 let ls = self.file.length_size();
214 let base = self.file.addr_offset;
215 let mut entries = group_v2::resolve_group_entries(&self.file.data, &hdr, os, ls, base)
216 .map_err(Error::Format)?;
217 for entry in &mut entries {
219 entry.object_header_address += base;
220 }
221 Ok(entries)
222 }
223}
224
225#[derive(Debug)]
231pub struct Dataset<'f> {
232 file: &'f File,
233 header: ObjectHeader,
234}
235
236impl<'f> Dataset<'f> {
237 pub fn shape(&self) -> Result<Vec<u64>, Error> {
239 let ds = self.dataspace()?;
240 Ok(ds.dimensions.clone())
241 }
242
243 pub fn dtype(&self) -> Result<DType, Error> {
245 let dt = self.datatype()?;
246 Ok(classify_datatype(&dt))
247 }
248
249 pub fn read_f64(&self) -> Result<Vec<f64>, Error> {
251 let raw = self.read_raw()?;
252 let dt = self.datatype()?;
253 Ok(data_read::read_as_f64(&raw, &dt)?)
254 }
255
256 pub fn read_f32(&self) -> Result<Vec<f32>, Error> {
258 let raw = self.read_raw()?;
259 let dt = self.datatype()?;
260 Ok(data_read::read_as_f32(&raw, &dt)?)
261 }
262
263 pub fn read_i32(&self) -> Result<Vec<i32>, Error> {
265 let raw = self.read_raw()?;
266 let dt = self.datatype()?;
267 Ok(data_read::read_as_i32(&raw, &dt)?)
268 }
269
270 pub fn read_i64(&self) -> Result<Vec<i64>, Error> {
272 let raw = self.read_raw()?;
273 let dt = self.datatype()?;
274 Ok(data_read::read_as_i64(&raw, &dt)?)
275 }
276
277 pub fn read_u64(&self) -> Result<Vec<u64>, Error> {
279 let raw = self.read_raw()?;
280 let dt = self.datatype()?;
281 Ok(data_read::read_as_u64(&raw, &dt)?)
282 }
283
284 pub fn read_u8(&self) -> Result<Vec<u8>, Error> {
286 self.read_raw()
287 }
288
289 pub fn read_i8(&self) -> Result<Vec<i8>, Error> {
291 let raw = self.read_raw()?;
292 Ok(raw.iter().map(|&b| b as i8).collect())
293 }
294
295 pub fn read_i16(&self) -> Result<Vec<i16>, Error> {
297 let raw = self.read_raw()?;
298 let dt = self.datatype()?;
299 let vals = data_read::read_as_i32(&raw, &dt)?;
300 Ok(vals.into_iter().map(|v| v as i16).collect())
301 }
302
303 pub fn read_u16(&self) -> Result<Vec<u16>, Error> {
305 let raw = self.read_raw()?;
306 let dt = self.datatype()?;
307 let vals = data_read::read_as_u64(&raw, &dt)?;
308 Ok(vals.into_iter().map(|v| v as u16).collect())
309 }
310
311 pub fn read_u32(&self) -> Result<Vec<u32>, Error> {
313 let raw = self.read_raw()?;
314 let dt = self.datatype()?;
315 let vals = data_read::read_as_u64(&raw, &dt)?;
316 Ok(vals.into_iter().map(|v| v as u32).collect())
317 }
318
319 pub fn read_string(&self) -> Result<Vec<String>, Error> {
321 let raw = self.read_raw()?;
322 let dt = self.datatype()?;
323 Ok(data_read::read_as_strings(&raw, &dt)?)
324 }
325
326 pub fn attrs(&self) -> Result<HashMap<String, AttrValue>, Error> {
328 let attr_msgs = extract_attributes_full(
329 &self.file.data,
330 &self.header,
331 self.file.offset_size(),
332 self.file.length_size(),
333 )?;
334 Ok(attrs_to_map(
335 &attr_msgs,
336 &self.file.data,
337 self.file.offset_size(),
338 self.file.length_size(),
339 ))
340 }
341
342 fn datatype(&self) -> Result<Datatype, Error> {
343 let msg = find_message(&self.header, MessageType::Datatype)?;
344 let (dt, _) = Datatype::parse(&msg.data)?;
345 Ok(dt)
346 }
347
348 fn dataspace(&self) -> Result<Dataspace, Error> {
349 let msg = find_message(&self.header, MessageType::Dataspace)?;
350 Ok(Dataspace::parse(&msg.data, self.file.length_size())?)
351 }
352
353 fn data_layout(&self) -> Result<DataLayout, Error> {
354 let msg = find_message(&self.header, MessageType::DataLayout)?;
355 Ok(DataLayout::parse(
356 &msg.data,
357 self.file.offset_size(),
358 self.file.length_size(),
359 )?)
360 }
361
362 fn filter_pipeline(&self) -> Option<FilterPipeline> {
363 self.header
364 .messages
365 .iter()
366 .find(|m| m.msg_type == MessageType::FilterPipeline)
367 .and_then(|msg| FilterPipeline::parse(&msg.data).ok())
368 }
369
370 fn read_raw(&self) -> Result<Vec<u8>, Error> {
371 let dt = self.datatype()?;
372 let ds = self.dataspace()?;
373 let mut dl = self.data_layout()?;
374 if self.file.addr_offset != 0 {
376 if let DataLayout::Contiguous { ref mut address, .. } = dl {
377 if let Some(addr) = address {
378 *addr += self.file.addr_offset;
379 }
380 }
381 }
382 let pipeline = self.filter_pipeline();
383 Ok(data_read::read_raw_data_cached(
384 &self.file.data,
385 &dl,
386 &ds,
387 &dt,
388 pipeline.as_ref(),
389 self.file.offset_size(),
390 self.file.length_size(),
391 &self.file.chunk_cache,
392 )?)
393 }
394}
395
396fn find_message(
401 header: &ObjectHeader,
402 msg_type: MessageType,
403) -> Result<&crate::object_header::HeaderMessage, Error> {
404 header
405 .messages
406 .iter()
407 .find(|m| m.msg_type == msg_type)
408 .ok_or(Error::MissingMessage(msg_type))
409}
410
411fn has_message(header: &ObjectHeader, msg_type: MessageType) -> bool {
412 header.messages.iter().any(|m| m.msg_type == msg_type)
413}
414
415fn is_group(header: &ObjectHeader) -> bool {
416 header
417 .messages
418 .iter()
419 .any(|m| m.msg_type == MessageType::LinkInfo
420 || m.msg_type == MessageType::Link
421 || m.msg_type == MessageType::SymbolTable)
422}
423