winsfs_core/io/
shuffle.rs

1//! Read and write pseudo-shuffled SAF format.
2//!
3//! The aim of the format is not to give a true, random shuffle of the sites in the input SAF
4//! file(s), but to move them around enough to break blocks of linkage disequilibrium. However,
5//! this must be done in constant memory, i.e. streaming through the input.
6//!
7//! In overview, the strategy is to pre-allocate a file of the correct size, which must be known up
8//! front, and then "split" this file into `B` blocks. Then consecutive writes go into consecutive
9//! blocks to be spread across the file.
10//!
11//! # Examples
12//!
13//! ## Write
14//!
15//! ```no_run
16//! # fn main() -> ::std::io::Result<()> {
17//! use winsfs_core::io::shuffle::{Header, Writer};
18//!
19//! // This must be known up front; for a single SAF, this can be gotten from the index.
20//! let sites = 100_000; // Number of sites in the input.
21//! let shape = vec![9]; // Number of values per site (per population, just one here).
22//! let blocks = 20;     // Number of blocks to use for pseudo-shuffle.
23//! let header = Header::new(sites, shape, blocks);
24//!
25//! let mut writer = Writer::create("/path/to/saf.shuf", header)?;
26//!
27//! // Get sites from somewhere and write. The number of values must match the shape.
28//! let site = vec![0.; 9];
29//! for _ in 0..sites {
30//!     writer.write_site(site.as_slice())?;
31//! }
32//!
33//! // Writer expects exactly as many sites as provided in the header:
34//! // any more will throw error in [`write_site`], any less will panic
35//! // in the drop check; use try_finish to check for this error.
36//! writer.try_finish()?;
37//!
38//! # Ok(()) }
39//! ```
40
41mod header;
42pub use header::{Header, MAGIC_NUMBER};
43
44mod reader;
45pub use reader::Reader;
46
47mod writer;
48pub use writer::Writer;
49
50/// Create checked conversion function.
51macro_rules! impl_convert_to_fn {
52    ($to:ty, $name:ident) => {
53        /// Converts a value into a usize with checking.
54        ///
55        /// Panics if the conversion cannot be made exactly.
56        pub(self) fn $name<T>(x: T) -> $to
57        where
58            $to: TryFrom<T>,
59            T: Copy + std::fmt::Display,
60        {
61            match x.try_into() {
62                Ok(v) => v,
63                Err(_) => {
64                    panic!(
65                        "cannot convert {x} ({ty}) into {to}",
66                        ty = std::any::type_name::<T>(),
67                        to = stringify!($to),
68                    )
69                }
70            }
71        }
72    };
73}
74
75impl_convert_to_fn!(u16, to_u16);
76impl_convert_to_fn!(u32, to_u32);
77impl_convert_to_fn!(u64, to_u64);
78impl_convert_to_fn!(usize, to_usize);
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    use std::io;
85
86    use tempfile::NamedTempFile;
87
88    use crate::{io::ReadSite, saf::Site};
89
90    #[test]
91    fn test_write_then_read() -> io::Result<()> {
92        let file = NamedTempFile::new()?;
93        let path = file.path();
94
95        let header = Header::new(9, vec![1, 4], 4);
96        let mut writer = Writer::create(path, header)?;
97
98        let sites = vec![
99            &[0., 0., 0., 0., 0.],
100            &[1., 1., 1., 1., 1.],
101            &[2., 2., 2., 2., 2.],
102            &[3., 3., 3., 3., 3.],
103            &[4., 4., 4., 4., 4.],
104            &[5., 5., 5., 5., 5.],
105            &[6., 6., 6., 6., 6.],
106            &[7., 7., 7., 7., 7.],
107            &[8., 8., 8., 8., 8.],
108        ];
109
110        for &site in sites.iter() {
111            writer.write_site(site)?;
112        }
113
114        // Drop the writer to flush
115        writer.try_finish().unwrap();
116
117        let mut reader = Reader::try_from_path(path)?;
118
119        let expected_order = &[0, 4, 8, 1, 5, 2, 6, 3, 7];
120
121        let mut site = Site::new(vec![0.; 5], [1, 4]).unwrap();
122        for &i in expected_order {
123            reader.read_site(&mut site)?;
124
125            // Should be normalised, e.g. exp'd
126            let expected_site = sites[i].iter().map(|x| x.exp()).collect::<Vec<_>>();
127            assert_eq!(site.as_slice(), expected_site.as_slice());
128        }
129
130        assert!(reader.read_site(&mut site)?.is_done());
131
132        file.close()
133    }
134}