mwalib 2.0.3

A library to simplify reading Murchison Widefield Array (MWA) raw visibilities, voltages and metadata.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

//! Functions for organising and checking the consistency of gpubox files.

pub mod error;

use std::collections::BTreeMap;
use std::collections::HashSet;
use std::fmt;
use std::path::Path;

use fitsio::hdu::FitsHdu;
use regex::Regex;

use crate::*;
pub use error::GpuboxError;

use fitsio::FitsFile;

#[cfg(any(feature = "python", feature = "python-stubgen"))]
use pyo3::prelude::*;
#[cfg(feature = "python-stubgen")]
use pyo3_stub_gen_derive::gen_stub_pyclass;

#[cfg(test)]
mod test;

/// This struct is used to return the common or common good timesteps and coarse channels
#[derive(Debug)]
pub(crate) struct ObsTimesAndChans {
    pub start_time_unix_ms: u64, // Start= start of first timestep
    pub end_time_unix_ms: u64,   // End  = start of last timestep + integration time
    pub duration_ms: u64,
    pub coarse_chan_identifiers: Vec<usize>, // Vector of Correlator Coarse Chan identifiers (gpubox number or rec chan number)
}

/// This represents one group of gpubox files with the same "batch" identitifer.
/// e.g. obsid_datetime_chan_batch
#[cfg_attr(feature = "python-stubgen", gen_stub_pyclass)]
#[cfg_attr(
    any(feature = "python", feature = "python-stubgen"),
    pyclass(get_all, set_all, from_py_object)
)]
#[derive(Clone)]
pub struct GpuBoxBatch {
    /// Batch number: 00,01,02..n.
    pub batch_number: usize,

    /// Vector storing the details of each gpubox file in this batch
    pub gpubox_files: Vec<GpuBoxFile>,
}

impl GpuBoxBatch {
    pub fn new(batch_number: usize) -> Self {
        Self {
            batch_number,
            gpubox_files: vec![],
        }
    }
}

impl fmt::Debug for GpuBoxBatch {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "batch_number={} gpubox_files={:?}",
            self.batch_number, self.gpubox_files,
        )
    }
}

/// This represents one gpubox file
#[cfg_attr(feature = "python-stubgen", gen_stub_pyclass)]
#[cfg_attr(
    any(feature = "python", feature = "python-stubgen"),
    pyclass(get_all, set_all, from_py_object)
)]
#[derive(Clone)]
pub struct GpuBoxFile {
    /// Filename of gpubox file
    pub filename: String,

    /// channel number (Legacy==gpubox host number 01..24; V2==receiver channel number 001..255)
    pub channel_identifier: usize,
}

impl fmt::Debug for GpuBoxFile {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "filename={} channelidentifier={}",
            self.filename, self.channel_identifier,
        )
    }
}

impl std::cmp::PartialEq for GpuBoxBatch {
    fn eq(&self, other: &Self) -> bool {
        self.batch_number == other.batch_number && self.gpubox_files == other.gpubox_files
    }
}

impl std::cmp::PartialEq for GpuBoxFile {
    fn eq(&self, other: &Self) -> bool {
        self.filename == other.filename && self.channel_identifier == other.channel_identifier
    }
}

/// A temporary representation of a gpubox file
#[derive(Clone, Debug)]
struct TempGpuBoxFile<'a> {
    /// Filename of gpubox file
    filename: &'a str,
    /// Channel number (Legacy==gpubox host number 01..24; V2==receiver channel number 001..255)
    channel_identifier: usize,
    /// Batch number (00,01,02..n)
    batch_number: usize,
}

impl std::cmp::PartialEq for TempGpuBoxFile<'_> {
    fn eq(&self, other: &Self) -> bool {
        self.filename == other.filename
            && self.channel_identifier == other.channel_identifier
            && self.batch_number == other.batch_number
    }
}

/// A type alias for a horrible type:
/// `BTreeMap<u64, BTreeMap<usize, (usize, usize)>>`
///
/// The outer-most keys are UNIX times in milliseconds, which correspond to the
/// unique times available to HDU files in supplied gpubox files. Each of these
/// keys is associated with a tree; the keys of these trees are the gpubox
/// coarse-channel numbers, which then refer to gpubox batch numbers and HDU
/// indices.
///                                      Unix          Chan    Batch  Hdu
pub(crate) type GpuboxTimeMap = BTreeMap<u64, BTreeMap<usize, (usize, usize)>>;

/// A little struct to help us not get confused when dealing with the returned
/// values from complex functions.
pub(crate) struct GpuboxInfo {
    pub batches: Vec<GpuBoxBatch>,
    pub mwa_version: MWAVersion,
    pub time_map: GpuboxTimeMap,
    pub hdu_size: usize,
}

/// Convert `Vec<TempGPUBoxFile>` to `Vec<GPUBoxBatch>`. This requires the fits
/// files to actually be present, as `GPUBoxFile`s need an open fits file
/// handle.
///
/// Fail if
///
/// * no files were supplied;
/// * the fits files specified by the `TempGPUBoxFile`s can't be opened.
///
///
/// # Arguments
///
/// * `temp_gpuboxes` - A vector of `TempGPUBoxFile` to be converted.
///
///
/// # Returns
///
/// * A Result containing a vector of `GPUBoxBatch`.
///
///
fn convert_temp_gpuboxes(temp_gpuboxes: Vec<TempGpuBoxFile>) -> Vec<GpuBoxBatch> {
    // unwrap is safe as a check is performed above to ensure that there are
    // some files present.
    let num_batches = temp_gpuboxes.iter().map(|g| g.batch_number).max().unwrap() + 1;
    let mut gpubox_batches: Vec<GpuBoxBatch> = Vec::with_capacity(num_batches);
    for b in 0..num_batches {
        gpubox_batches.push(GpuBoxBatch::new(b));
    }

    for temp_g in temp_gpuboxes.into_iter() {
        let g = GpuBoxFile {
            filename: temp_g.filename.to_string(),
            channel_identifier: temp_g.channel_identifier,
        };
        gpubox_batches[temp_g.batch_number].gpubox_files.push(g);
    }

    // Ensure the output is properly sorted - each batch is sorted by
    // channel_identifier.
    for v in &mut gpubox_batches {
        v.gpubox_files
            .sort_unstable_by(|a, b| a.channel_identifier.cmp(&b.channel_identifier));
    }

    // Sort the batches by batch number
    gpubox_batches.sort_by_key(|b| b.batch_number);

    gpubox_batches
}

/// This function unpacks the metadata associated with input gpubox files. The
/// input filenames are grouped into into batches. A "gpubox batch" refers to
/// the number XX in a gpubox filename
/// (e.g. `1065880128_20131015134930_gpubox01_XX.fits`). Some older files might
/// have a "batchless" format
/// (e.g. `1065880128_20131015134930_gpubox01.fits`). These details are
/// reflected in the returned `MWAVersion`.
///
/// Fail if
///
/// * no files were supplied;
/// * there is a mixture of the types of gpubox files supplied (e.g. different
///   correlator versions);
/// * a gpubox filename's structure could not be identified;
/// * the gpubox batch numbers are not contiguous;
/// * the number of files in each batch is not equal;
/// * MWAX gpubox files don't have a CORR_VER key in HDU 0, or it is not equal
///   to 2;
/// * the amount of data in each HDU is not equal.
///
///
/// # Arguments
///
/// * `gpubox_filenames` - A vector or slice of strings or references to strings
///   containing all of the gpubox filenames provided by the client.
///
/// * `metafits_obs_id` - The obs_id reported from the metafits file primary HDU
///
/// # Returns
///
/// * A Result containing a vector of GPUBoxBatch structs, the MWA Correlator
///   version, the UNIX times paired with gpubox HDU numbers, and the amount of
///   data in each HDU.
///
///
pub(crate) fn examine_gpubox_files<T: AsRef<Path>>(
    gpubox_filenames: &[T],
    metafits_obs_id: u32,
) -> Result<GpuboxInfo, GpuboxError> {
    let (temp_gpuboxes, corr_format) = determine_gpubox_batches(gpubox_filenames)?;

    let time_map: GpuboxTimeMap = create_time_map(&temp_gpuboxes, corr_format)?;

    let mut batches = convert_temp_gpuboxes(temp_gpuboxes);

    // Determine the size of each gpubox's image on HDU 1. mwalib will throw an
    // error if this size is not consistent for all gpubox files.
    let mut hdu_size: Option<usize> = None;
    for b in &mut batches {
        for g in &mut b.gpubox_files {
            let mut fptr = fits_open!(&g.filename)?;

            // Check that there are some HDUs (apart from just the primary)
            // Assuming it does have some, open the first one
            let hdu = match fptr.iter().count() {
                1 => {
                    return Err(GpuboxError::NoDataHDUsInGpuboxFile {
                        gpubox_filename: g.filename.clone(),
                    })
                }
                _ => fits_open_hdu!(&mut fptr, 1)?,
            };

            let this_size = get_hdu_image_size!(&mut fptr, &hdu)?.iter().product();
            match hdu_size {
                None => hdu_size = Some(this_size),
                Some(s) => {
                    if s != this_size {
                        return Err(GpuboxError::UnequalHduSizes);
                    }
                }
            }

            // Do another check by looking in the header of each fits file and checking the mwa_version is correct
            let primary_hdu = fits_open_hdu!(&mut fptr, 0)?;
            validate_gpubox_metadata_mwa_version(
                &mut fptr,
                &primary_hdu,
                &g.filename,
                corr_format,
            )?;

            // Do another check to ensure the obsid in the metafits matches that in the gpubox files
            validate_gpubox_metadata_obs_id(&mut fptr, &primary_hdu, &g.filename, metafits_obs_id)?;
        }
    }

    // `determine_gpubox_batches` fails if no gpubox files are supplied, so it
    // is safe to unwrap hdu_size.
    Ok(GpuboxInfo {
        batches,
        mwa_version: corr_format,
        time_map,
        hdu_size: hdu_size.unwrap(),
    })
}

/// Group input gpubox files into batches. A "gpubox batch" refers to the number
/// XX in a gpubox filename
/// (e.g. `1065880128_20131015134930_gpubox01_XX.fits`). Some older files might
/// have a "batchless" format (e.g. `1065880128_20131015134930_gpubox01.fits`).
///
///
/// Fail if
///
/// * no files were supplied;
/// * there is a mixture of the types of gpubox files supplied (e.g. different correlator
///   versions);
/// * a gpubox filename's structure could not be identified;
/// * the gpubox batch numbers are not contiguous;
/// * the number of files in each batch is not equal;
///
///
/// # Arguments
///
/// * `gpubox_filenames` - A vector or slice of strings or references to strings containing
///   all of the gpubox filenames provided by the client.
///
///
/// # Returns
///
/// * A Result containing a vector of `TempGPUBoxFile` structs as well as a
///   `MWAVersion`.
///
///
fn determine_gpubox_batches<T: AsRef<Path>>(
    gpubox_filenames: &'_ [T],
) -> Result<(Vec<TempGpuBoxFile<'_>>, MWAVersion), GpuboxError> {
    if gpubox_filenames.is_empty() {
        return Err(GpuboxError::NoGpuboxes);
    }

    // MWAX: 1234567890_1234567890123_ch123_123.fits
    //          obsid   datetime      chan  batch
    let re_mwax: Regex =
        Regex::new(r"\d{10}_\d{8}(.)?\d{6}_ch(?P<channel>\d{3})_(?P<batch>\d{3}).fits").unwrap();
    // Legacy MWA: 1234567890_1234567890123_gpubox12_12.fits
    //                 obsid     datetime       chan batch
    let re_legacy_batch: Regex =
        Regex::new(r"\d{10}_\d{14}_gpubox(?P<band>\d{2})_(?P<batch>\d{2}).fits").unwrap();
    // Old Legacy MWA: 1234567890_1234567890123_gpubox12.fits
    //                    obsid      datetime        chan
    let re_old_legacy_format: Regex =
        Regex::new(r"\d{10}_\d{14}_gpubox(?P<band>\d{2}).fits").unwrap();

    let mut format = None;
    let mut temp_gpuboxes: Vec<TempGpuBoxFile> = Vec::with_capacity(gpubox_filenames.len());

    for g_path in gpubox_filenames {
        // So that we can pass along useful error messages, convert the input
        // filename type to a string slice. This will fail if the filename is
        // not UTF-8 compliant, but, I don't think cfitsio will work in that
        // case anyway.
        let g = g_path
            .as_ref()
            .to_str()
            .expect("gpubox filename is not UTF-8 compliant");
        match re_mwax.captures(g) {
            Some(caps) => {
                // Check if we've already matched any files as being the old
                // format. If so, then we've got a mix, and we should exit
                // early.
                match format {
                    None => format = Some(MWAVersion::CorrMWAXv2),
                    Some(MWAVersion::CorrMWAXv2) => (),
                    _ => return Err(GpuboxError::Mixture),
                }

                // The following unwraps are safe, because the regex wouldn't
                // work if they couldn't be parsed into ints.
                temp_gpuboxes.push(TempGpuBoxFile {
                    filename: g,
                    channel_identifier: caps["channel"].parse().unwrap(),
                    batch_number: caps["batch"].parse().unwrap(),
                });
            }

            // Try to match the legacy format.
            None => match re_legacy_batch.captures(g) {
                Some(caps) => {
                    match format {
                        None => format = Some(MWAVersion::CorrLegacy),
                        Some(MWAVersion::CorrLegacy) => (),
                        _ => return Err(GpuboxError::Mixture),
                    }

                    temp_gpuboxes.push(TempGpuBoxFile {
                        filename: g,
                        channel_identifier: caps["band"].parse().unwrap(),
                        batch_number: caps["batch"].parse().unwrap(),
                    });
                }

                // Try to match the old legacy format.
                None => match re_old_legacy_format.captures(g) {
                    Some(caps) => {
                        match format {
                            None => format = Some(MWAVersion::CorrOldLegacy),
                            Some(MWAVersion::CorrOldLegacy) => (),
                            _ => return Err(GpuboxError::Mixture),
                        }

                        temp_gpuboxes.push(TempGpuBoxFile {
                            filename: g,
                            channel_identifier: caps["band"].parse().unwrap(),
                            // There's only one batch.
                            batch_number: 0,
                        });
                    }
                    None => return Err(GpuboxError::Unrecognised(g.to_string())),
                },
            },
        }
    }

    // Ensure the output is properly sorted - each batch is sorted by batch
    // number, then channel identifier.
    temp_gpuboxes.sort_unstable_by_key(|g| (g.batch_number, g.channel_identifier));

    Ok((temp_gpuboxes, format.unwrap()))
}

/// Given a FITS file pointer and HDU, determine the time in units of
/// milliseconds.
///
///
/// # Arguments
///
/// * `gpubox_fptr` - a FitsFile reference to this gpubox file.
///
/// * `gpubox_hdu_fptr` - A reference to the HDU we are finding the time of.
///
///
/// # Returns
///
/// * A Result containing the full start unix time (in milliseconds) or an error.
///
///
fn determine_hdu_time(
    gpubox_fptr: &mut FitsFile,
    gpubox_hdu_fptr: &FitsHdu,
) -> Result<u64, FitsError> {
    let start_unix_time: u64 = get_required_fits_key!(gpubox_fptr, gpubox_hdu_fptr, "TIME")?;
    let start_unix_millitime: u64 =
        get_required_fits_key!(gpubox_fptr, gpubox_hdu_fptr, "MILLITIM")?;
    Ok(start_unix_time * 1000 + start_unix_millitime)
}

/// Iterate over each HDU of the given gpubox file, tracking which UNIX times
/// are associated with which HDU numbers.
///
///
/// # Arguments
///
/// * `gpubox_fptr` - a FitsFile reference to this gpubox file.
///
/// * `mwa_version` - enum telling us which correlator version the observation was created by.
///
///
/// # Returns
///
/// * A BTree representing time and hdu index this gpubox file.
///
///
fn map_unix_times_to_hdus(
    gpubox_fptr: &mut FitsFile,
    mwa_version: MWAVersion,
) -> Result<BTreeMap<u64, usize>, FitsError> {
    let mut map = BTreeMap::new();
    let last_hdu_index = gpubox_fptr.iter().count();
    // The new correlator has a "weights" HDU in each alternating HDU. Skip
    // those.
    let step_size = if mwa_version == MWAVersion::CorrMWAXv2 {
        2
    } else {
        1
    };
    // Ignore the first HDU in all gpubox files; it contains only a little
    // metadata.
    for hdu_index in (1..last_hdu_index).step_by(step_size) {
        let hdu = fits_open_hdu!(gpubox_fptr, hdu_index)?;
        let time = determine_hdu_time(gpubox_fptr, &hdu)?;
        map.insert(time, hdu_index);
    }

    Ok(map)
}

/// Validate that the correlator version we worked out from the filename does not contradict
/// the CORR_VER key from MWAX files or absence of that key for legacy correlator.
///
///
/// # Arguments
///
/// * `gpubox_fptr` - a FitsFile reference to this gpubox file.
///
/// * `gpubox_primary_hdu` - The primary HDU of the gpubox file.
///
/// * `gpubox_filename` - The filename of the gpubox file being validated.
///
/// * `mwa_version` - enum telling us which correlator version the observation was created by.
///
///
/// # Returns
///
/// * A Result containing `Ok` or an `MwalibError` if it fails validation.
///
///
fn validate_gpubox_metadata_mwa_version(
    gpubox_fptr: &mut FitsFile,
    gpubox_primary_hdu: &FitsHdu,
    gpubox_filename: &str,
    mwa_version: MWAVersion,
) -> Result<(), GpuboxError> {
    // New correlator files include a version - check that it is present.
    // For pre v2, ensure the key isn't present
    let gpu_mwa_version: Option<u8> =
        get_optional_fits_key!(gpubox_fptr, gpubox_primary_hdu, "CORR_VER")?;

    match mwa_version {
        MWAVersion::CorrMWAXv2 => match gpu_mwa_version {
            None => Err(GpuboxError::MwaxCorrVerMissing(gpubox_filename.to_string())),
            Some(gpu_mwa_version_value) => match gpu_mwa_version_value {
                2 => Ok(()),
                _ => Err(GpuboxError::MwaxCorrVerMismatch(
                    gpubox_filename.to_string(),
                )),
            },
        },

        MWAVersion::CorrOldLegacy | MWAVersion::CorrLegacy => match gpu_mwa_version {
            None => Ok(()),
            Some(gpu_corr_version_value) => Err(GpuboxError::CorrVerMismatch {
                gpubox_filename: gpubox_filename.to_string(),
                gpu_corr_version_value,
            }),
        },
        _ => Err(GpuboxError::InvalidMwaVersion { mwa_version }),
    }
}

/// Validate that the obsid we got from the metafits does not contradict
/// the GPSTIME key (obsid) from gpubox files.
///
///
/// # Arguments
///
/// * `gpubox_fptr` - a FitsFile reference to this gpubox file.
///
/// * `gpubox_primary_hdu` - The primary HDU of the gpubox file.
///
/// * `gpubox_filename` - The filename of the gpubox file being validated.
///
/// * `metafits_obsid` - Obsid as determined by reading the metafits.
///
///
/// # Returns
///
/// * A Result containing `Ok` or an `MwalibError` if it fails validation.
///
///
fn validate_gpubox_metadata_obs_id(
    gpubox_fptr: &mut FitsFile,
    gpubox_primary_hdu: &FitsHdu,
    gpubox_filename: &str,
    metafits_obs_id: u32,
) -> Result<(), GpuboxError> {
    // Get the OBSID- if not present, this is probably not an MWA fits file!
    let gpu_obs_id: u32 = match get_required_fits_key!(gpubox_fptr, gpubox_primary_hdu, "OBSID") {
        Ok(o) => o,
        Err(_) => return Err(GpuboxError::MissingObsid(gpubox_filename.to_string())),
    };

    if gpu_obs_id != metafits_obs_id {
        Err(GpuboxError::ObsidMismatch {
            obsid: metafits_obs_id,
            gpubox_filename: gpubox_filename.to_string(),
            gpubox_obsid: gpu_obs_id,
        })
    } else {
        Ok(())
    }
}

/// Returns a BTree structure consisting of:
/// BTree of timesteps. Each timestep is a BTree for a course channel.
/// Each coarse channel then contains the batch number and hdu index.
///
/// # Arguments
///
/// * `gpubox_batches` - vector of structs describing each gpubox "batch"
///
/// * `mwa_version` - enum telling us which correlator version the observation was created by.
///
///
/// # Returns
///
/// * A Result containing the GPUBox Time Map or an error.
///
///
fn create_time_map(
    gpuboxes: &[TempGpuBoxFile],
    mwa_version: MWAVersion,
) -> Result<GpuboxTimeMap, GpuboxError> {
    // Ugly hack to open up all the HDUs of the gpubox files in parallel. We
    // can't do this over the `GPUBoxBatch` or `GPUBoxFile` structs because they
    // contain the `FitsFile` struct, which does not implement the `Send`
    // trait. `ThreadsafeFitsFile` does contain this, but does not allow
    // iteration. It seems like the smaller evil is to just iterate over the
    // filenames here and get the relevant info out of the HDUs before things
    // get too complicated elsewhere.

    // In parallel, open up all the fits files and get their HDU times. rayon
    // preserves the order of the input arguments, so there is no need to keep
    // the temporary gpubox files along with their times. In any case, handling
    // that would be difficult!
    let maps = gpuboxes
        .into_iter()
        .map(|g| {
            let mut fptr = fits_open!(&g.filename)?;
            let hdu = fits_open_hdu!(&mut fptr, 0)?;

            // New correlator files include a version - check that it is present.
            if mwa_version == MWAVersion::CorrMWAXv2 {
                let v: u8 = get_required_fits_key!(&mut fptr, &hdu, "CORR_VER")?;
                if v != 2 {
                    return Err(GpuboxError::MwaxCorrVerMismatch(g.filename.to_string()));
                }
            }

            // Get the UNIX times from each of the HDUs of this `FitsFile`.
            map_unix_times_to_hdus(&mut fptr, mwa_version).map_err(GpuboxError::from)
        })
        .collect::<Vec<Result<BTreeMap<u64, usize>, GpuboxError>>>();

    // Collapse all of the gpubox time maps into a single map.
    let mut gpubox_time_map = BTreeMap::new();
    for (map_maybe_error, gpubox) in maps.into_iter().zip(gpuboxes.iter()) {
        let map = map_maybe_error?;
        for (time, hdu_index) in map {
            gpubox_time_map
                .entry(time)
                .or_insert_with(BTreeMap::new)
                .entry(gpubox.channel_identifier)
                .or_insert((gpubox.batch_number, hdu_index));
        }
    }

    Ok(gpubox_time_map)
}

/// Returns a vector of timestep indicies which exist in the GpuBoxTimeMap (i.e. the user has provided at least some data files for these timesteps)
///
/// # Arguments
///
/// * `gpubox_time_map` - BTree structure containing the map of what gpubox files and timesteps we were supplied by the client.
///
/// * `corr_timesteps` - Vector of Correlator Context TimeStep structs.
///
/// # Returns
///
/// * A vector of timestep indices for which at least some data files have been provided
///
///
pub(crate) fn populate_provided_timesteps(
    gpubox_time_map: &GpuboxTimeMap,
    corr_timesteps: &[TimeStep],
) -> Vec<usize> {
    // populate a vector with the indicies of corr_timesteps that correspond to the unix times which are in
    // the first level of the gpuboxtimemap. This represents all of the timesteps we have at least some data for
    let mut return_vec: Vec<usize> = gpubox_time_map
        .iter()
        .map(|t| {
            corr_timesteps
                .iter()
                .position(|v| v.unix_time_ms == *t.0)
                .unwrap()
        })
        .collect();

    // Ensure vector is sorted
    return_vec.sort_unstable();

    return_vec
}

/// Returns a vector of coarse chan indicies which exist in the GpuBoxTimeMap (i.e. the user has provided at least some data files for these coarse channels)
///
/// # Arguments
///
/// * `gpubox_time_map` - BTree structure containing the map of what gpubox files and timesteps we were supplied by the client.
///
/// * `corr_coarse_chans` - Vector of Correlator Context CoarseChannel structs.
///
/// # Returns
///
/// * A vector of coarse channel indices for which at least some data files have been provided
///
///
pub(crate) fn populate_provided_coarse_channels(
    gpubox_time_map: &GpuboxTimeMap,
    corr_coarse_chans: &[CoarseChannel],
) -> Vec<usize> {
    // Go through all timesteps in the GpuBoxTimeMap.
    // For each timestep get each coarse channel identifier and add it into the HashSet
    let chans: HashSet<usize> = gpubox_time_map
        .iter()
        .flat_map(|ts| ts.1.iter().map(|ch| *ch.0))
        .collect::<HashSet<usize>>();

    // We should now have a small HashSet of coarse channel identifiers
    // Get the index of each item in the hashset from the correlator coarse channels passed in and add that to the return vector
    let mut return_vec: Vec<usize> = chans
        .iter()
        .map(|c| {
            corr_coarse_chans
                .iter()
                .position(|v| v.gpubox_number == *c)
                .unwrap()
        })
        .collect();

    // Ensure vector is sorted
    return_vec.sort_unstable();

    return_vec
}

/// Determine the common start and end times of an observation. In this context,
/// "common" refers to a time that is common to the all of provided coarse channels and contiguous. e.g.
///
/// ```text
/// time:     0123456789abcdef
/// gpubox01: ###############
/// gpubox02:  #############
/// gpubox03: #####    ######
/// gpubox04:   ############
/// gpubox05: ###############
/// gpubox06:                #
/// gpubox07-24: <none>
/// ```
/// Example 1:
/// In the above example, there is at least some timesteps from coarse channels 01-06. But there are NO timesteps that contain all 6 channels so this function
/// would return a None.
///
/// Example 2:
/// If you were to remove timestep "f", then there are 5 coarse channels, and the timesteps 2-4 inclusive are the common timesteps.
///
/// # Arguments
///
/// * `gpubox_time_map` - BTree structure containing the map of what gpubox files and timesteps we were supplied by the client.
///
/// * `integration_time_ms` - Correlator dump time (so we know the gap between timesteps)
///
/// * `good_time_unix_time_ms` - Option- Some is the 'good' time (i.e. the first time which is not part of the quack time). None means that
///   times during the quack time are ok to be included.
///
/// # Returns
///
/// * A Result which contains an Option containing a struct containing the start and end times based on what we actually got, so all coarse channels match, or None; or an Error.
///
///
pub(crate) fn determine_common_obs_times_and_chans(
    gpubox_time_map: &GpuboxTimeMap,
    integration_time_ms: u64,
    good_time_unix_time_ms: Option<u64>,
) -> Result<Option<ObsTimesAndChans>, GpuboxError> {
    // If we pass in Some(good_time_unix_time_ms) then restrict the gpubox time map to times AFTER the quack time
    let timemap = match good_time_unix_time_ms {
        Some(good_time) => gpubox_time_map
            .clone()
            .into_iter()
            .filter(|ts| ts.0 >= good_time)
            .collect(),

        None => gpubox_time_map.clone(),
    };

    // Go through all timesteps in the GpuBoxTimeMap.
    // For each timestep get each coarse channel identifier and add it into the HashSet, then dump them into a vector
    // get the length of the vector - we will use this to test each entry in the GpuboxTimeMap
    let max_chans = gpubox_time_map
        .iter()
        .flat_map(|ts| ts.1.iter().map(|ch| *ch.0))
        .collect::<HashSet<usize>>()
        .into_iter()
        .len();

    // Filter only the timesteps that have the same coarse channels
    let mut filtered_timesteps = timemap
        .into_iter()
        .filter(|(_, submap)| submap.len() == max_chans);

    // Get the first timestep where the num chans matches the provided channels. If we get None, then we did not find any timesteps which contain all the coarse channels
    let first_ts = match filtered_timesteps.next() {
        Some(ts) => ts,
        None => return Ok(None),
    };

    // Now for refernce lets get what the coarse channels are for this timestep- we will use it below when iterating through the filtered collection of timesteps
    let first_ts_chans = first_ts
        .1
        .iter()
        .map(|ts_chans| *ts_chans.0)
        .collect::<Vec<usize>>();
    let common_start_unix_ms = first_ts.0;

    // In case there was only 1 timestep in the filtered timesteps, set the common end time now
    let mut common_end_unix_ms = common_start_unix_ms + integration_time_ms;

    // Iterate over the filtered timemap
    // Go to the next timestep unless:
    // * It is not contiguous with the previous
    // * It does not have that same max number of channels
    let mut prev_ts_unix_ms = common_start_unix_ms;
    loop {
        let next_item = filtered_timesteps.next();

        match next_item {
            Some(ts) => {
                // Check ts and prev ts are contiguous and channels match
                if (ts.0 == prev_ts_unix_ms + integration_time_ms)
                    && first_ts_chans.len() == ts.1.len()
                {
                    // Update the end time
                    common_end_unix_ms = ts.0 + integration_time_ms;
                    prev_ts_unix_ms = ts.0;
                } else {
                    break;
                }
            }
            None => break,
        }
    }

    Ok(Some(ObsTimesAndChans {
        start_time_unix_ms: common_start_unix_ms,
        end_time_unix_ms: common_end_unix_ms,
        duration_ms: common_end_unix_ms - common_start_unix_ms,
        coarse_chan_identifiers: first_ts_chans,
    }))
}

#[cfg(test)]
mod tests {}