Skip to main content

dmap/
record.rs

1//! Defines the [`Record`] trait, which contains the shared behaviour that all DMAP records must have.
2
3use crate::error::DmapError;
4use crate::io;
5use crate::types::{DmapField, DmapType, DmapVec, Fields};
6use crate::io::{create_stream, slice_stream_lax, split_into_slices};
7use crate::convenience::split_results;
8use indexmap::IndexMap;
9use itertools::izip;
10use rayon::iter::Either;
11use rayon::prelude::*;
12use std::fmt::Debug;
13use std::fs::File;
14use std::io::Read;
15use std::path::Path;
16
17/// DMAP record template.
18///
19/// This trait defines functionality for parsing bytes into records, converting records to bytes,
20/// and reading from / writing to files.
21pub trait Record<'a>:
22    Debug + Send + Sync + TryFrom<IndexMap<String, DmapField>, Error = DmapError>
23{
24    /// Creates a new object from the parsed scalars and vectors.
25    fn new(fields: &mut IndexMap<String, DmapField>) -> Result<Self, DmapError>
26    where
27        Self: Sized;
28
29    /// Gets the underlying data of `self`.
30    fn inner(self) -> IndexMap<String, DmapField>;
31
32    /// Returns the field with name `key`, if it exists in the record.
33    fn get(&self, key: &str) -> Option<&DmapField>;
34
35    /// Returns the names of all fields stored in the record.
36    fn keys(&self) -> Vec<&String>;
37
38    /// Returns whether `name` is a metadata field of the record.
39    fn is_metadata_field(name: &str) -> bool;
40
41    /// Reads the nth records from `dmap_data` and parse into instances of `Self`.
42    ///
43    /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data.
44    fn read_nth_records(dmap_data: impl Read, indices: &[i32]) -> Result<Vec<Self>, DmapError>
45    where
46        Self: Sized,
47        Self: Send,
48    {
49        let mut slices = split_into_slices(dmap_data)?;
50        let num_recs = slices.len();
51        let mut records_read = vec![];
52        for idx in indices.iter() {
53            if *idx >= num_recs as i32 || *idx <= -1 * num_recs as i32 {
54                return Err(DmapError::InvalidIndex(*idx));
55            }
56            let i = {
57                if *idx < 0 {
58                    num_recs - idx.abs() as usize
59                } else {
60                    *idx as usize
61                }
62            };
63            records_read.push(slices[i].parse_record::<Self>()
64                .map_err(|e| DmapError::InvalidRecord(format!("Record {idx}: {}", e.to_string())))?);
65        }
66        Ok(records_read)
67    }
68
69    /// Reads the nth records from `dmap_data` and parse into instances of `Self`, if possible.
70    ///
71    /// Returns a 2-tuple, where the first entry is the good records from the front of the buffer,
72    /// and the second entry is the byte where the first corrupted record starts, if applicable.
73    fn read_nth_records_lax(mut dmap_data: impl Read, indices: &[i32]) -> Result<(Vec<Self>, Option<usize>), DmapError>
74    where
75        Self: Sized,
76        Self: Send,
77    {
78        let mut buffer: Vec<u8> = vec![];
79        let mut dmap_records: Vec<Self> = vec![];
80
81        create_stream(&mut dmap_data)?.read_to_end(&mut buffer)?;
82        let (mut slices, rec_starts, mut bad_byte) = slice_stream_lax(buffer);
83
84        let num_recs = slices.len();
85        for idx in indices.iter() {
86            if *idx >= num_recs as i32 || *idx <= -1 * num_recs as i32 {
87                return Err(DmapError::InvalidIndex(*idx));
88            }
89            let i = {
90                if *idx < 0 {
91                    num_recs - idx.abs() as usize
92                } else {
93                    *idx as usize
94                }
95            };
96            let parse_result = slices[i].parse_record::<Self>();
97            if let Ok(x) = parse_result {
98                dmap_records.push(x);
99            } else {
100                bad_byte = Some(rec_starts[i]);
101                break;
102            }
103        }
104
105        Ok((dmap_records, bad_byte))
106    }
107
108    /// Reads from `dmap_data` and parses into `Vec<Self>`.
109    ///
110    /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data.
111    fn read_records(dmap_data: impl Read) -> Result<Vec<Self>, DmapError>
112    where
113        Self: Sized,
114        Self: Send,
115    {
116        let mut slices = split_into_slices(dmap_data)?;
117        let mut dmap_results: Vec<Result<Self, DmapError>> = vec![];
118        dmap_results.par_extend(
119            slices
120                .par_iter_mut()
121                .map(|parser| parser.parse_record::<Self>()),
122        );
123
124        let (dmap_records, dmap_errors, bad_recs) = split_results(dmap_results);
125        if !dmap_errors.is_empty() {
126            return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string()));
127        }
128        Ok(dmap_records)
129    }
130
131    /// Reads metadata of records from `dmap_data` and parses into `Vec<IndexMap<String, DmapField>>`.
132    ///
133    /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data.
134    fn read_metadata(dmap_data: impl Read) -> Result<Vec<IndexMap<String, DmapField>>, DmapError>
135    where
136        Self: Sized,
137        Self: Send,
138    {
139        let mut slices = split_into_slices(dmap_data)?;
140        let mut dmap_results: Vec<Result<IndexMap<String, DmapField>, DmapError>> = vec![];
141        dmap_results.par_extend(
142            slices
143                .par_iter_mut()
144                .map(|parser| parser.parse_metadata::<Self>()),
145        );
146
147        let (dmap_records, dmap_errors, bad_recs) = split_results(dmap_results);
148        if !dmap_errors.is_empty() {
149            return Err(DmapError::BadRecords(bad_recs, dmap_errors[0].to_string()));
150        }
151
152        Ok(dmap_records)
153    }
154
155    /// Reads metadata of the nth records from `dmap_data` and parses into `Vec<IndexMap<String, DmapField>>`.
156    ///
157    /// Returns `DmapError` if `dmap_data` cannot be read or contains invalid data.
158    fn read_metadata_by_indices(dmap_data: impl Read, indices: &[i32]) -> Result<Vec<IndexMap<String, DmapField>>, DmapError>
159    where
160        Self: Sized,
161        Self: Send,
162    {
163        let mut slices = split_into_slices(dmap_data)?;
164        let mut dmap_results: Vec<IndexMap<String, DmapField>> = vec![];
165
166        let num_recs = slices.len();
167        for idx in indices.iter() {
168            if *idx >= num_recs as i32 || *idx <= -1 * num_recs as i32 {
169                return Err(DmapError::InvalidIndex(*idx));
170            }
171            let i = {
172                if *idx < 0 {
173                    num_recs - idx.abs() as usize
174                } else {
175                    *idx as usize
176                }
177            };
178            dmap_results.push(slices[i].parse_metadata::<Self>()
179                .map_err(|e| DmapError::InvalidRecord(format!("Record {idx}: {}", e.to_string())))?);
180        }
181        Ok(dmap_results)
182    }
183
184    /// Reads from `dmap_data` and parses into `Vec<Self>`.
185    ///
186    /// Returns a 2-tuple, where the first entry is the good records from the front of the buffer,
187    /// and the second entry is the byte where the first corrupted record starts, if applicable.
188    fn read_records_lax(mut dmap_data: impl Read) -> Result<(Vec<Self>, Option<usize>), DmapError>
189    where
190        Self: Sized,
191        Self: Send,
192    {
193        let mut buffer: Vec<u8> = vec![];
194        let mut dmap_records: Vec<Self> = vec![];
195
196        create_stream(&mut dmap_data)?.read_to_end(&mut buffer)?;
197        let (mut slices, rec_starts, mut bad_byte) = slice_stream_lax(buffer);
198
199        let mut dmap_results: Vec<Result<Self, DmapError>> = vec![];
200        dmap_results.par_extend(
201            slices
202                .par_iter_mut()
203                .map(|parser| parser.parse_record::<Self>()),
204        );
205
206        for (i, rec) in dmap_results.into_iter().enumerate() {
207            if let Ok(x) = rec {
208                dmap_records.push(x);
209            } else {
210                bad_byte = Some(rec_starts[i]);
211                break;
212            }
213        }
214        Ok((dmap_records, bad_byte))
215    }
216
217    /// Read a DMAP file of type `Self`
218    fn read_file<P: AsRef<Path>>(infile: P) -> Result<Vec<Self>, DmapError>
219    where
220        Self: Sized,
221        Self: Send,
222    {
223        let file = File::open(infile)?;
224        Self::read_records(file)
225    }
226
227    /// Read a DMAP file of type `Self`.
228    ///
229    /// If the file is corrupted, it will return the leading uncorrupted records as well as the
230    /// position corresponding to the start of the first corrupted record.
231    fn read_file_lax<P: AsRef<Path>>(infile: P) -> Result<(Vec<Self>, Option<usize>), DmapError>
232    where
233        Self: Sized,
234        Self: Send,
235    {
236        let file = File::open(infile)?;
237        Self::read_records_lax(file)
238    }
239
240    /// Reads the `nth` record(s) of a DMAP file of type `Self`.
241    fn read_file_by_indices<P: AsRef<Path>>(infile: P, indices: &[i32]) -> Result<Vec<Self>, DmapError>
242    where
243        Self: Sized,
244        Self: Send,
245    {
246        let file = File::open(infile)?;
247        Self::read_nth_records(file, indices)
248    }
249
250    /// Reads the `nth` record(s) of a DMAP file of type `Self`.
251    ///
252    /// Does not fail on corrupted records; rather, returns `(recs, Option<usize>)` where
253    /// the second value is the byte where record corruption begins, if applicable.
254    fn read_file_by_indices_lax<P: AsRef<Path>>(infile: P, indices: &[i32]) -> Result<(Vec<Self>, Option<usize>), DmapError>
255    where
256        Self: Sized,
257        Self: Send,
258    {
259        let file = File::open(infile)?;
260        Self::read_nth_records_lax(file, indices)
261    }
262
263    /// Read the metadata from a DMAP file of type `Self`
264    fn read_file_metadata<P: AsRef<Path>>(
265        infile: P,
266    ) -> Result<Vec<IndexMap<String, DmapField>>, DmapError>
267    where
268        Self: Sized,
269        Self: Send,
270    {
271        let file = File::open(infile)?;
272        Self::read_metadata(file)
273    }
274
275    /// Reads the `nth` records' metadata of a DMAP file of type `Self`.
276    fn read_file_metadata_by_indices<P: AsRef<Path>>(infile: P, indices: &[i32]) -> Result<Vec<IndexMap<String, DmapField>>, DmapError>
277    where
278        Self: Sized,
279        Self: Send,
280    {
281        let file = File::open(infile)?;
282        Self::read_metadata_by_indices(file, indices)
283    }
284    
285    /// Checks the validity of an `IndexMap` as a representation of a DMAP record.
286    ///
287    /// Validity checks include ensuring that no unfamiliar entries exist, that all required
288    /// scalar and vector fields exist, that all scalar and vector fields are of the expected
289    /// type, and that vector fields which are expected to have the same dimensions do indeed
290    /// have the same dimensions.
291    fn check_fields(
292        field_dict: &mut IndexMap<String, DmapField>,
293        fields_for_type: &Fields,
294    ) -> Result<(), DmapError> {
295        let unsupported_keys: Vec<&String> = field_dict
296            .keys()
297            .filter(|&k| !fields_for_type.all_fields.contains(&&**k))
298            .collect();
299        if !unsupported_keys.is_empty() {
300            Err(DmapError::InvalidRecord(format!(
301                "Unsupported fields {:?}, fields supported are {:?}",
302                unsupported_keys, fields_for_type.all_fields
303            )))?
304        }
305
306        for (field, expected_type) in fields_for_type.scalars_required.iter() {
307            match field_dict.get(&field.to_string()) {
308                Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {}
309                Some(DmapField::Scalar(x)) => Err(DmapError::InvalidRecord(format!(
310                    "Field {} has incorrect type {}, expected {}",
311                    field,
312                    x.get_type(),
313                    expected_type
314                )))?,
315                Some(_) => Err(DmapError::InvalidRecord(format!(
316                    "Field {} is a vector, expected scalar",
317                    field
318                )))?,
319                None => Err(DmapError::InvalidRecord(format!(
320                    "Field {field:?} ({:?}) missing: fields {:?}",
321                    &field.to_string(),
322                    field_dict.keys()
323                )))?,
324            }
325        }
326        for (field, expected_type) in fields_for_type.scalars_optional.iter() {
327            match field_dict.get(&field.to_string()) {
328                Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {}
329                Some(DmapField::Scalar(x)) => Err(DmapError::InvalidRecord(format!(
330                    "Field {} has incorrect type {}, expected {}",
331                    field,
332                    x.get_type(),
333                    expected_type
334                )))?,
335                Some(_) => Err(DmapError::InvalidRecord(format!(
336                    "Field {} is a vector, expected scalar",
337                    field
338                )))?,
339                None => {}
340            }
341        }
342        for (field, expected_type) in fields_for_type.vectors_required.iter() {
343            match field_dict.get(&field.to_string()) {
344                Some(DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!(
345                    "Field {} is a scalar, expected vector",
346                    field
347                )))?,
348                Some(DmapField::Vector(x)) if &x.get_type() != expected_type => {
349                    Err(DmapError::InvalidRecord(format!(
350                        "Field {field} has incorrect type {:?}, expected {expected_type:?}",
351                        x.get_type()
352                    )))?
353                }
354                Some(&DmapField::Vector(_)) => {}
355                None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?,
356            }
357        }
358        for (field, expected_type) in fields_for_type.vectors_optional.iter() {
359            match field_dict.get(&field.to_string()) {
360                Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!(
361                    "Field {} is a scalar, expected vector",
362                    field
363                )))?,
364                Some(DmapField::Vector(x)) if &x.get_type() != expected_type => {
365                    Err(DmapError::InvalidRecord(format!(
366                        "Field {field} has incorrect type {}, expected {expected_type}",
367                        x.get_type()
368                    )))?
369                }
370                _ => {}
371            }
372        }
373        // This block checks that grouped vector fields have the same dimensionality
374        for vec_group in fields_for_type.vector_dim_groups.iter() {
375            let vecs: Vec<(&str, &DmapVec)> = vec_group
376                .iter()
377                .filter_map(|&name| match field_dict.get(&name.to_string()) {
378                    Some(DmapField::Vector(ref x)) => Some((name, x)),
379                    Some(_) => None,
380                    None => None,
381                })
382                .collect();
383            if vecs.len() > 1 {
384                let mut vec_iter = vecs.iter();
385                let first = vec_iter.next().expect("Iterator broken");
386                if !vec_iter.all(|(_, v)| v.shape() == first.1.shape()) {
387                    let error_vec: Vec<(&str, &[usize])> =
388                        vecs.iter().map(|(k, v)| (*k, v.shape())).collect();
389                    Err(DmapError::InvalidRecord(format!(
390                        "Vector fields have inconsistent dimensions: {:?}",
391                        error_vec
392                    )))?
393                }
394            }
395        }
396        Ok(())
397    }
398
399    /// Attempts to massage the entries of an `IndexMap` into the proper types for a DMAP record.
400    fn coerce(
401        fields_dict: &mut IndexMap<String, DmapField>,
402        fields_for_type: &Fields,
403    ) -> Result<Self, DmapError> {
404        let unsupported_keys: Vec<&String> = fields_dict
405            .keys()
406            .filter(|&k| !fields_for_type.all_fields.contains(&&**k))
407            .collect();
408        if !unsupported_keys.is_empty() {
409            Err(DmapError::InvalidRecord(format!(
410                "Unsupported fields {:?}, fields supported are {:?}",
411                unsupported_keys, fields_for_type.all_fields
412            )))?
413        }
414
415        for (field, expected_type) in fields_for_type.scalars_required.iter() {
416            match fields_dict.get(&field.to_string()) {
417                Some(DmapField::Scalar(x)) if &x.get_type() != expected_type => {
418                    fields_dict.insert(
419                        field.to_string(),
420                        DmapField::Scalar(x.cast_as(expected_type)?),
421                    );
422                }
423                Some(DmapField::Scalar(_)) => {}
424                Some(_) => Err(DmapError::InvalidRecord(format!(
425                    "Field {} is a vector, expected scalar",
426                    field
427                )))?,
428                None => Err(DmapError::InvalidRecord(format!(
429                    "Field {field:?} ({:?}) missing: fields {:?}",
430                    &field.to_string(),
431                    fields_dict.keys()
432                )))?,
433            }
434        }
435        for (field, expected_type) in fields_for_type.scalars_optional.iter() {
436            match fields_dict.get(&field.to_string()) {
437                Some(DmapField::Scalar(x)) if &x.get_type() == expected_type => {}
438                Some(DmapField::Scalar(x)) => {
439                    fields_dict.insert(
440                        field.to_string(),
441                        DmapField::Scalar(x.cast_as(expected_type)?),
442                    );
443                }
444                Some(_) => Err(DmapError::InvalidRecord(format!(
445                    "Field {} is a vector, expected scalar",
446                    field
447                )))?,
448                None => {}
449            }
450        }
451        for (field, expected_type) in fields_for_type.vectors_required.iter() {
452            match fields_dict.get(&field.to_string()) {
453                Some(DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!(
454                    "Field {} is a scalar, expected vector",
455                    field
456                )))?,
457                Some(DmapField::Vector(x)) if &x.get_type() != expected_type => {
458                    Err(DmapError::InvalidRecord(format!(
459                        "Field {field} has incorrect type {:?}, expected {expected_type:?}",
460                        x.get_type()
461                    )))?
462                }
463                Some(DmapField::Vector(_)) => {}
464                None => Err(DmapError::InvalidRecord(format!("Field {field} missing")))?,
465            }
466        }
467        for (field, expected_type) in fields_for_type.vectors_optional.iter() {
468            match fields_dict.get(&field.to_string()) {
469                Some(&DmapField::Scalar(_)) => Err(DmapError::InvalidRecord(format!(
470                    "Field {} is a scalar, expected vector",
471                    field
472                )))?,
473                Some(DmapField::Vector(x)) if &x.get_type() != expected_type => {
474                    Err(DmapError::InvalidRecord(format!(
475                        "Field {field} has incorrect type {}, expected {expected_type}",
476                        x.get_type()
477                    )))?
478                }
479                _ => {}
480            }
481        }
482
483        Self::new(fields_dict)
484    }
485
486    /// Attempts to copy `self` to a raw byte representation.
487    fn to_bytes(&self) -> Result<Vec<u8>, DmapError>;
488
489    /// Converts the entries of an `IndexMap` into a raw byte representation, including metadata
490    /// about the entries `(DMAP key, name\[, dimensions\])`.
491    ///
492    /// If all is good, returns a tuple containing:
493    /// * the number of scalar fields
494    /// * the number of vector fields
495    /// * the raw bytes
496    fn data_to_bytes(
497        data: &IndexMap<String, DmapField>,
498        fields_for_type: &Fields,
499    ) -> Result<(i32, i32, Vec<u8>), DmapError> {
500        let mut data_bytes: Vec<u8> = vec![];
501        let mut num_scalars: i32 = 0;
502        let mut num_vectors: i32 = 0;
503
504        // let scalar_fields = data.keys().filter(|k| )
505        for (field, _) in fields_for_type.scalars_required.iter() {
506            match data.get(&field.to_string()) {
507                Some(x @ DmapField::Scalar(_)) => {
508                    data_bytes.extend(field.as_bytes());
509                    data_bytes.extend([0]); // null-terminate string
510                    data_bytes.append(&mut x.as_bytes());
511                    num_scalars += 1;
512                }
513                Some(_) => Err(DmapError::InvalidScalar(format!(
514                    "Field {field} is a vector, expected scalar"
515                )))?,
516                None => Err(DmapError::InvalidRecord(format!(
517                    "Field {field} missing from record"
518                )))?,
519            }
520        }
521        for (field, _) in fields_for_type.scalars_optional.iter() {
522            if let Some(x) = data.get(&field.to_string()) {
523                match x {
524                    DmapField::Scalar(_) => {
525                        data_bytes.extend(field.as_bytes());
526                        data_bytes.extend([0]); // null-terminate string
527                        data_bytes.append(&mut x.as_bytes());
528                        num_scalars += 1;
529                    }
530                    DmapField::Vector(_) => Err(DmapError::InvalidScalar(format!(
531                        "Field {field} is a vector, expected scalar"
532                    )))?,
533                }
534            }
535        }
536        for (field, _) in fields_for_type.vectors_required.iter() {
537            match data.get(&field.to_string()) {
538                Some(x @ DmapField::Vector(_)) => {
539                    data_bytes.extend(field.as_bytes());
540                    data_bytes.extend([0]); // null-terminate string
541                    data_bytes.append(&mut x.as_bytes());
542                    num_vectors += 1;
543                }
544                Some(_) => Err(DmapError::InvalidVector(format!(
545                    "Field {field} is a scalar, expected vector"
546                )))?,
547                None => Err(DmapError::InvalidRecord(format!(
548                    "Field {field} missing from record"
549                )))?,
550            }
551        }
552        for (field, _) in fields_for_type.vectors_optional.iter() {
553            if let Some(x) = data.get(&field.to_string()) {
554                match x {
555                    DmapField::Vector(_) => {
556                        data_bytes.extend(field.as_bytes());
557                        data_bytes.extend([0]); // null-terminate string
558                        data_bytes.append(&mut x.as_bytes());
559                        num_vectors += 1;
560                    }
561                    DmapField::Scalar(_) => Err(DmapError::InvalidVector(format!(
562                        "Field {field} is a scalar, expected vector"
563                    )))?,
564                }
565            }
566        }
567
568        Ok((num_scalars, num_vectors, data_bytes))
569    }
570
571    /// Converts the entries of a `Record` into a raw byte representation, for debugging the conversion.
572    ///
573    /// If all is good, returns a vector containing tuples of:
574    /// * `String`: the name of the field (`"header"` denoting the record header)
575    /// * `usize`: where the serialized bytes of the field start in the record byte representation
576    /// * `Vec<u8>` the byte representation of the field.
577    fn inspect_bytes(
578        &self,
579        fields_for_type: &Fields,
580    ) -> Result<Vec<(String, usize, Vec<u8>)>, DmapError> {
581        let mut data_bytes: Vec<Vec<u8>> = vec![];
582        let mut indices: Vec<usize> = vec![16]; // start at 16 to account for header
583        let mut fields: Vec<String> = vec![];
584
585        let (mut num_scalars, mut num_vectors) = (0, 0);
586
587        for (field, _) in fields_for_type.scalars_required.iter() {
588            fields.push(field.to_string());
589            match self.get(field) {
590                Some(x @ DmapField::Scalar(_)) => {
591                    let mut bytes = vec![];
592                    bytes.extend(field.as_bytes());
593                    bytes.extend([0]); // null-terminate string
594                    bytes.append(&mut x.as_bytes());
595                    indices.push(indices[indices.len() - 1] + bytes.len());
596                    data_bytes.push(bytes);
597                    num_scalars += 1;
598                }
599                Some(_) => Err(DmapError::InvalidScalar(format!(
600                    "Field {field} is a vector, expected scalar"
601                )))?,
602                None => Err(DmapError::InvalidRecord(format!(
603                    "Field {field} missing from record"
604                )))?,
605            }
606        }
607        for (field, _) in fields_for_type.scalars_optional.iter() {
608            fields.push(field.to_string());
609            if let Some(x) = self.get(field) {
610                match x {
611                    DmapField::Scalar(_) => {
612                        let mut bytes = vec![];
613                        bytes.extend(field.as_bytes());
614                        bytes.extend([0]); // null-terminate string
615                        bytes.append(&mut x.as_bytes());
616                        indices.push(indices[indices.len() - 1] + bytes.len());
617                        data_bytes.push(bytes);
618                        num_scalars += 1;
619                    }
620                    DmapField::Vector(_) => Err(DmapError::InvalidScalar(format!(
621                        "Field {field} is a vector, expected scalar"
622                    )))?,
623                }
624            }
625        }
626        for (field, _) in fields_for_type.vectors_required.iter() {
627            fields.push(field.to_string());
628            match self.get(field) {
629                Some(x @ DmapField::Vector(_)) => {
630                    let mut bytes = vec![];
631                    bytes.extend(field.as_bytes());
632                    bytes.extend([0]); // null-terminate string
633                    bytes.append(&mut x.as_bytes());
634                    indices.push(indices[indices.len() - 1] + bytes.len());
635                    data_bytes.push(bytes);
636                    num_vectors += 1;
637                }
638                Some(_) => Err(DmapError::InvalidVector(format!(
639                    "Field {field} is a scalar, expected vector"
640                )))?,
641                None => Err(DmapError::InvalidRecord(format!(
642                    "Field {field} missing from record"
643                )))?,
644            }
645        }
646        for (field, _) in fields_for_type.vectors_optional.iter() {
647            fields.push(field.to_string());
648            if let Some(x) = self.get(field) {
649                match x {
650                    DmapField::Vector(_) => {
651                        let mut bytes = vec![];
652                        bytes.extend(field.as_bytes());
653                        bytes.extend([0]); // null-terminate string
654                        bytes.append(&mut x.as_bytes());
655                        indices.push(indices[indices.len() - 1] + data_bytes.len());
656                        data_bytes.push(bytes);
657                        num_vectors += 1;
658                    }
659                    DmapField::Scalar(_) => Err(DmapError::InvalidVector(format!(
660                        "Field {field} is a scalar, expected vector"
661                    )))?,
662                }
663            }
664        }
665
666        // Now build up the header
667        let num_bytes: usize = data_bytes.iter().map(|x| x.len()).sum();
668        let mut bytes: Vec<u8> = vec![];
669        bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter
670        bytes.extend((num_bytes as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors
671        bytes.extend(num_scalars.as_bytes());
672        bytes.extend(num_vectors.as_bytes());
673
674        // Accumulate all the results into one big `Vec`
675        let mut field_info: Vec<(String, usize, Vec<u8>)> = vec![("header".to_string(), 0, bytes)];
676        for (f, (s, b)) in izip!(
677            fields.into_iter(),
678            izip!(indices[..indices.len() - 1].iter(), data_bytes.into_iter())
679        ) {
680            field_info.push((f, *s, b));
681        }
682
683        Ok(field_info)
684    }
685
686    /// Creates the byte representation of a collection of [`Record`]s.
687    ///
688    /// Ordering of the members is preserved.
689    fn par_to_bytes(recs: &[Self]) -> Result<Vec<u8>, DmapError> {
690        let mut bytes: Vec<u8> = vec![];
691        let (errors, rec_bytes): (Vec<_>, Vec<_>) =
692            recs.par_iter()
693                .enumerate()
694                .partition_map(|(i, rec)| match rec.to_bytes() {
695                    Err(e) => Either::Left((i, e)),
696                    Ok(y) => Either::Right(y),
697                });
698        if !errors.is_empty() {
699            Err(DmapError::InvalidRecord(format!(
700                "Corrupted records: {errors:?}"
701            )))?
702        }
703        bytes.par_extend(rec_bytes.into_par_iter().flatten());
704        Ok(bytes)
705    }
706
707    /// Attempts to convert `recs` to `Self` then convert to bytes.
708    fn try_into_bytes(recs: Vec<IndexMap<String, DmapField>>) -> Result<Vec<u8>, DmapError> {
709        let mut bytes: Vec<u8> = vec![];
710        let (errors, rec_bytes): (Vec<_>, Vec<_>) =
711            recs.into_par_iter()
712                .enumerate()
713                .partition_map(|(i, rec)| match Self::try_from(rec) {
714                    Err(e) => Either::Left((i, e)),
715                    Ok(x) => match x.to_bytes() {
716                        Err(e) => Either::Left((i, e)),
717                        Ok(y) => Either::Right(y),
718                    },
719                });
720        if !errors.is_empty() {
721            Err(DmapError::BadRecords(
722                errors.iter().map(|(i, _)| *i).collect(),
723                errors[0].1.to_string(),
724            ))?
725        }
726        bytes.par_extend(rec_bytes.into_par_iter().flatten());
727        Ok(bytes)
728    }
729
730    /// Writes a collection of `Record`s to `outfile`.
731    ///
732    /// Prefer using the specific functions, e.g. `write_dmap`, `write_rawacf`, etc. for their
733    /// specific field checks.
734    fn write_to_file<P: AsRef<Path>>(
735        recs: &Vec<Self>,
736        outfile: P,
737        bz2: bool,
738    ) -> Result<(), DmapError> {
739        let bytes: Vec<u8> = Self::par_to_bytes(recs)?;
740        io::bytes_to_file(bytes, outfile, bz2)?;
741        Ok(())
742    }
743}
744
745macro_rules! create_record_type {
746    ($format:ident, $fields:ident) => {
747        paste::paste! {
748            use crate::types::{DmapType, DmapField};
749            use crate::error::DmapError;
750            use indexmap::IndexMap;
751            use crate::record::Record;
752
753            #[doc = "Struct containing the checked fields of a single `" $format:upper "` record." ]
754            #[derive(Debug, PartialEq, Clone)]
755            pub struct [< $format:camel Record >] {
756                pub data: IndexMap<String, DmapField>,
757            }
758
759            impl Record<'_> for [< $format:camel Record>] {
760                fn inner(self) -> IndexMap<String, DmapField> {
761                    self.data
762                }
763                fn get(&self, key: &str) -> Option<&DmapField> {
764                    self.data.get(key)
765                }
766                fn keys(&self) -> Vec<&String> {
767                    self.data.keys().collect()
768                }
769                fn new(fields: &mut IndexMap<String, DmapField>) -> Result<[< $format:camel Record>], DmapError> {
770                    match Self::check_fields(fields, &$fields) {
771                        Ok(_) => {}
772                        Err(e) => Err(e)?,
773                    }
774
775                    Ok([< $format:camel Record >] {
776                        data: fields.to_owned(),
777                    })
778                }
779                fn to_bytes(&self) -> Result<Vec<u8>, DmapError> {
780                    let (num_scalars, num_vectors, mut data_bytes) =
781                        Self::data_to_bytes(&self.data, &$fields)?;
782
783                    let mut bytes: Vec<u8> = vec![];
784                    bytes.extend((65537_i32).as_bytes()); // No idea why this is what it is, copied from backscatter
785                    bytes.extend((data_bytes.len() as i32 + 16).as_bytes()); // +16 for code, length, num_scalars, num_vectors
786                    bytes.extend(num_scalars.as_bytes());
787                    bytes.extend(num_vectors.as_bytes());
788                    bytes.append(&mut data_bytes); // consumes data_bytes
789                    Ok(bytes)
790                }
791                fn is_metadata_field(name: &str) -> bool {
792                    !$fields.data_fields.iter().any(|e| e == &name)
793                }
794            }
795
796            impl TryFrom<&mut IndexMap<String, DmapField>> for [< $format:camel Record >] {
797                type Error = DmapError;
798
799                fn try_from(value: &mut IndexMap<String, DmapField>) -> Result<Self, Self::Error> {
800                    Self::coerce(value, &$fields)
801                }
802            }
803
804            impl TryFrom<IndexMap<String, DmapField>> for [< $format:camel Record >] {
805                type Error = DmapError;
806
807                fn try_from(mut value: IndexMap<String, DmapField>) -> Result<Self, Self::Error> {
808                    Self::coerce(&mut value, &$fields)
809                }
810            }
811
812            #[cfg(test)]
813            mod tests {
814                use super::*;
815                use std::path::PathBuf;
816
817                /// Creates a test to ensure that the record is still able to be read, even when missing
818                /// some of the optional fields.
819                #[test]
820                fn test_missing_optional_fields() -> Result<(), DmapError> {
821                    let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($format)));
822                    let data = [< $format:camel Record >]::read_file_by_indices(&filename, &[0]).expect("Unable to sniff file");
823                    let recs = data[0].clone().inner();
824
825                    for field in $fields.scalars_optional.iter().chain($fields.vectors_optional.iter()) {
826                        let mut cloned_rec = recs.clone();
827                        let _ = cloned_rec.shift_remove(field.0);
828                        let _ = [< $format:camel Record >]::try_from(&mut cloned_rec)?;
829                    }
830                    Ok(())
831                }
832
833                /// Creates a test to ensure that the record is not able to be read when missing
834                /// some of the required fields.
835                #[test]
836                fn test_missing_required_fields() -> Result<(), DmapError> {
837                    let filename: PathBuf = PathBuf::from(format!("tests/test_files/test.{}", stringify!($format)));
838                    let data = [< $format:camel Record >]::read_file_by_indices(&filename, &[0]).expect("Unable to sniff file");
839                    let recs = data[0].clone().inner();
840
841                    for field in $fields.scalars_required.iter().chain($fields.vectors_required.iter()) {
842                        let mut cloned_rec = recs.clone();
843                        let _ = cloned_rec.shift_remove(field.0);
844                        let res = [< $format:camel Record >]::try_from(&mut cloned_rec);
845                        assert!(res.is_err());
846                    }
847                    Ok(())
848                }
849            }
850        }
851    }
852}
853
854pub(crate) use create_record_type;