bed_reader/
lib.rs

1#![warn(missing_docs)]
2#![warn(clippy::pedantic)]
3#![allow(
4    clippy::missing_panics_doc, // LATER: add panics docs
5    clippy::missing_errors_doc, // LATER: add errors docs
6    clippy::similar_names,
7    clippy::cast_possible_truncation,
8    clippy::cast_possible_wrap,
9    clippy::cast_sign_loss,
10    clippy::cast_lossless
11)]
12// Inspired by C++ version by Chris Widmer and Carl Kadie
13
14// See: https://towardsdatascience.com/nine-rules-for-writing-python-extensions-in-rust-d35ea3a4ec29?sk=f8d808d5f414154fdb811e4137011437
15// for an article on how this project uses Rust to create a Python extension.
16
17// For Rust API tips see https://rust-lang.github.io/api-guidelines/necessities.html
18#![doc = include_str!("../README-rust.md")]
19//! ## Main Functions
20//!
21//! | Function | Description |
22//! | -------- | ----------- |
23//! | [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) | Open a local PLINK .bed file for reading genotype data and metadata. |
24//! | [`BedCloud::new`](struct.BedCloud.html#method.new), [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),<br> [`BedCloud::builder`](struct.BedCloud.html#method.builder), [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options),<br> [`BedCloud::from_cloud_file`](struct.BedCloud.html#method.from_cloud_file), [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file) | Open a cloud PLINK .bed file for reading genotype data and metadata. |
25//! | [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) | Read genotype data from a local or cloud file. Supports indexing and options. |
26//! | [`WriteOptions::builder`](struct.WriteOptions.html#method.builder) | Write values to a local file in PLINK .bed format. Supports metadata and options. |
27//!
28//! ### `Bed` Metadata Methods
29//!
30//! After using [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) to open a PLINK .bed file for reading, use
31//! these methods to see metadata.
32//!
33//! | Method | Description |
34//! | -------- | ----------- |
35//! | [`iid_count`](struct.Bed.html#method.iid_count) | Number of individuals (samples) |
36//! | [`sid_count`](struct.Bed.html#method.sid_count) | Number of SNPs (variants) |
37//! | [`dim`](struct.Bed.html#method.dim) | Number of individuals and SNPs |
38//! | [`fid`](struct.Bed.html#method.fid) | Family id of each of individual (sample) |
39//! | [`iid`](struct.Bed.html#method.iid) | Individual id of each of individual (sample) |
40//! | [`father`](struct.Bed.html#method.father) | Father id of each of individual (sample) |
41//! | [`mother`](struct.Bed.html#method.mother) | Mother id of each of individual (sample) |
42//! | [`sex`](struct.Bed.html#method.sex) | Sex of each individual (sample) |
43//! | [`pheno`](struct.Bed.html#method.pheno) | A phenotype for each individual (seldom used) |
44//! | [`chromosome`](struct.Bed.html#method.chromosome) | Chromosome of each SNP (variant) |
45//! | [`sid`](struct.Bed.html#method.sid) | SNP Id of each SNP (variant) |
46//! | [`cm_position`](struct.Bed.html#method.cm_position) | Centimorgan position of each SNP (variant) |
47//! | [`bp_position`](struct.Bed.html#method.bp_position) | Base-pair position of each SNP (variant) |
48//! | [`allele_1`](struct.Bed.html#method.allele_1) | First allele of each SNP (variant) |
49//! | [`allele_2`](struct.Bed.html#method.allele_2) | Second allele of each SNP (variant) |
50//! | [`metadata`](struct.Bed.html#method.metadata) | All the metadata returned as a [`struct.Metadata`](struct.Metadata.html) |
51//!
52//! ### `ReadOptions`
53//!
54//! When using [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) to read genotype data, use these options to
55//! specify a desired numeric type,
56//! which individuals (samples) to read, which SNPs (variants) to read, etc.
57//!
58//! | Option | Description |
59//! | -------- | ----------- |
60//! | [`i8`](struct.ReadOptionsBuilder.html#method.i8) | Read values as i8 |
61//! | [`f32`](struct.ReadOptionsBuilder.html#method.f32) | Read values as f32 |
62//! | [`f64`](struct.ReadOptionsBuilder.html#method.f64) | Read values as f64 |
63//! | [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) | Index of individuals (samples) to read (defaults to all)|
64//! | [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) | Index of SNPs (variants) to read (defaults to all) |
65//! | [`f`](struct.ReadOptionsBuilder.html#method.f) | Order of the output array, Fortran-style (default) |
66//! | [`c`](struct.ReadOptionsBuilder.html#method.c) | Order of the output array, C-style |
67//! | [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) | Is order of the output array Fortran-style? (defaults to true)|
68//! | [`missing_value`](struct.ReadOptionsBuilder.html#method.missing_value) | Value to use for missing values (defaults to -127 or NaN) |
69//! | [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) | Count the number allele 1 (default) |
70//! | [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2) | Count the number allele 2 |
71//! | [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) | Is allele 1 counted? (defaults to true) |
72//! | [`num_threads`](struct.ReadOptionsBuilder.html#method.num_threads) | Number of threads to use (defaults to all processors) |
73//! | [`max_concurrent_requests`](struct.ReadOptionsBuilder.html#method.max_concurrent_requests) | Maximum number of concurrent async requests (defaults to 10) -- Used by [`BedCloud`](struct.BedCloud.html). |
74//! | [`max_chunk_bytes`](struct.ReadOptionsBuilder.html#method.max_chunk_bytes) | Maximum chunk size of async requests (defaults to 8_000_000 bytes) -- Used by [`BedCloud`](struct.BedCloud.html). |
75//!
76//! ### [`Index`](enum.Index.html) Expressions
77//!
78//! Select which individuals (samples) and SNPs (variants) to read by using these
79//! [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) and/or
80//! [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) expressions.
81//!
82//! | Example | Type | Description |
83//! | -------- | --- | ----------- |
84//! | nothing | `()` | All |
85//! | `2` | `isize` | Index position 2 |
86//! | `-1` | `isize` | Last index position |
87//! | `vec![0, 10, -2]` | `Vec<isize>` | Index positions 0, 10, and 2nd from last |
88//! | `[0, 10, -2]` | `[isize]` and `[isize;n]` | Index positions 0, 10, and 2nd from last |
89//! | `ndarray::array![0, 10, -2]` | `ndarray::Array1<isize>` | Index positions 0, 10, and 2nd from last |
90//! | `10..20` | `Range<usize>` | Index positions 10 (inclusive) to 20 (exclusive). *Note: Rust ranges don't support negatives* |
91//! | `..=19` | `RangeInclusive<usize>` | Index positions 0 (inclusive) to 19 (inclusive). *Note: Rust ranges don't support negatives* |
92//! | *any Rust ranges* | `Range*<usize>` | *Note: Rust ranges don't support negatives* |
93//! | `s![10..20;2]` | `ndarray::SliceInfo1` | Index positions 10 (inclusive) to 20 (exclusive) in steps of 2 |
94//! | `s![-20..-10;-2]` | `ndarray::SliceInfo1` | 10th from last (exclusive) to 20th from last (inclusive), in steps of -2 |
95//! | `vec![true, false, true]` | `Vec<bool>`| Index positions 0 and 2. |
96//! | `[true, false, true]` | `[bool]` and `[bool;n]`| Index positions 0 and 2.|
97//! | `ndarray::array![true, false, true]` | `ndarray::Array1<bool>`| Index positions 0 and 2.|
98//!
99//! ### Environment Variables
100//!
101//! * `BED_READER_NUM_THREADS`
102//! * `NUM_THREADS`
103//!
104//! If [`ReadOptionsBuilder::num_threads`](struct.ReadOptionsBuilder.html#method.num_threads)
105//! or [`WriteOptionsBuilder::num_threads`](struct.WriteOptionsBuilder.html#method.num_threads) is not specified,
106//! the number of threads to use is determined by these environment variable (in order of priority):
107//! If neither of these environment variables are set, all processors are used.
108//!
109//! * `BED_READER_DATA_DIR`
110//!
111//! Any requested sample file will be downloaded to this directory. If the environment variable is not set,
112//! a cache folder, appropriate to the OS, will be used.
113
114mod python_module;
115mod tests;
116use anyinput::anyinput;
117pub use bed_cloud::{sample_bed_url, sample_url, sample_urls, BedCloud, BedCloudBuilder};
118use byteorder::{LittleEndian, ReadBytesExt};
119pub use cloud_file::{CloudFile, CloudFileError};
120use core::fmt::Debug;
121use derive_builder::Builder;
122use dpc_pariter::{scope, IteratorExt};
123use fetch_data::FetchData;
124use futures_util::StreamExt;
125use nd::ShapeBuilder;
126use ndarray as nd;
127use num_traits::{abs, Float, FromPrimitive, Signed, ToPrimitive};
128use rayon::iter::{IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator};
129use rayon::{iter::ParallelBridge, ThreadPoolBuildError};
130use statrs::distribution::{Beta, Continuous};
131use std::cmp::Ordering;
132use std::collections::HashSet;
133use std::fs::{self};
134use std::io::Read;
135use std::io::Seek;
136use std::io::SeekFrom;
137use std::io::Write;
138use std::num::{ParseFloatError, ParseIntError};
139use std::ops::AddAssign;
140use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive};
141use std::rc::Rc;
142use std::str::Utf8Error;
143use std::{
144    env,
145    fs::File,
146    io::{BufRead, BufReader, BufWriter},
147    ops::RangeFull,
148    path::{Path, PathBuf},
149};
150use thiserror::Error;
151mod bed_cloud;
152
153const BED_FILE_MAGIC1: u8 = 0x6C; // 0b01101100 or 'l' (lowercase 'L')
154const BED_FILE_MAGIC2: u8 = 0x1B; // 0b00011011 or <esc>
155const CB_HEADER_U64: u64 = 3;
156const CB_HEADER_USIZE: usize = 3;
157
158// About ndarray
159//  https://docs.rs/ndarray/0.14.0/ndarray/parallel/index.html
160//  https://rust-lang-nursery.github.io/rust-cookbook/concurrency/parallel.html
161//  https://github.com/rust-ndarray/ndarray/blob/master/README-quick-start.md
162//  https://datacrayon.com/posts/programming/rust-notebooks/multidimensional-arrays-and-operations-with-ndarray
163//  https://docs.rs/ndarray/0.14.0/ndarray/doc/ndarray_for_numpy_users/index.html
164//  https://docs.rs/ndarray-npy
165//  https://rust-lang-nursery.github.io/rust-cookbook/science/mathematics/linear_algebra.html
166
167/// All possible errors returned by this library and the libraries it depends on.
168// Based on `<https://nick.groenen.me/posts/rust-error-handling/#the-library-error-type>`
169#[derive(Error, Debug)]
170pub enum BedErrorPlus {
171    #[allow(missing_docs)]
172    #[error(transparent)]
173    BedError(#[from] BedError),
174
175    #[allow(missing_docs)]
176    #[error(transparent)]
177    IOError(#[from] std::io::Error),
178
179    #[allow(missing_docs)]
180    #[error(transparent)]
181    ThreadPoolError(#[from] ThreadPoolBuildError),
182
183    #[allow(missing_docs)]
184    #[error(transparent)]
185    ParseIntError(#[from] ParseIntError),
186
187    #[allow(missing_docs)]
188    #[error(transparent)]
189    ParseFloatError(#[from] ParseFloatError),
190
191    #[allow(missing_docs)]
192    #[error(transparent)]
193    CloudFileError(#[from] CloudFileError),
194
195    #[allow(missing_docs)]
196    #[error(transparent)]
197    Utf8Error(#[from] Utf8Error),
198}
199// https://docs.rs/thiserror/1.0.23/thiserror/
200
201/// All errors specific to this library.
202#[derive(Error, Debug, Clone)]
203pub enum BedError {
204    #[allow(missing_docs)]
205    #[error("Ill-formed BED file. BED file header is incorrect or length is wrong. '{0}'")]
206    IllFormed(String),
207
208    #[allow(missing_docs)]
209    #[error(
210        "Ill-formed BED file. BED file header is incorrect. Expected mode to be 0 or 1. '{0}'"
211    )]
212    BadMode(String),
213
214    #[allow(missing_docs)]
215    #[error("Attempt to write illegal value to BED file. Only 0,1,2,missing allowed. '{0}'")]
216    BadValue(String),
217
218    #[allow(missing_docs)]
219    #[error("Multithreading resulted in panic(s)")]
220    PanickedThread(),
221
222    #[allow(missing_docs)]
223    #[error("No individual observed for the SNP.")]
224    NoIndividuals,
225
226    #[allow(missing_docs)]
227    #[error("Illegal SNP mean.")]
228    IllegalSnpMean,
229
230    #[allow(missing_docs)]
231    #[error("Index to individual larger than the number of individuals. (Index value {0})")]
232    IidIndexTooBig(isize),
233
234    #[allow(missing_docs)]
235    #[error("Index to SNP larger than the number of SNPs. (Index value {0})")]
236    SidIndexTooBig(isize),
237
238    #[allow(missing_docs)]
239    #[error("Length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
240    IndexMismatch(usize, usize, usize, usize),
241
242    #[allow(missing_docs)]
243    #[error("Indexes ({0},{1}) too big for files")]
244    IndexesTooBigForFiles(usize, usize),
245
246    #[allow(missing_docs)]
247    #[error("Subset: length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
248    SubsetMismatch(usize, usize, usize, usize),
249
250    #[allow(missing_docs)]
251    #[error("Cannot convert beta values to/from float 64")]
252    CannotConvertBetaToFromF64,
253
254    #[allow(missing_docs)]
255    #[error("Cannot create Beta Dist with given parameters ({0},{1})")]
256    CannotCreateBetaDist(f64, f64),
257
258    #[allow(missing_docs)]
259    #[error("Cannot use skipped metadata '{0}'")]
260    CannotUseSkippedMetadata(String),
261
262    #[allow(missing_docs)]
263    #[error("Index starts at {0} but ends at {1}")]
264    StartGreaterThanEnd(usize, usize),
265
266    #[allow(missing_docs)]
267    #[error("Step of zero not allowed")]
268    StepZero,
269
270    #[allow(missing_docs)]
271    #[error("Index starts at {0} but count is {1}")]
272    StartGreaterThanCount(usize, usize),
273
274    #[allow(missing_docs)]
275    #[error("Index ends at {0} but count is {1}")]
276    EndGreaterThanCount(usize, usize),
277
278    #[allow(missing_docs)]
279    #[error("Adding new axis not allowed")]
280    NewAxis,
281
282    #[allow(missing_docs)]
283    #[error("Expect 1-D NDArray SliceInfo")]
284    NdSliceInfoNot1D,
285
286    #[allow(missing_docs)]
287    #[error("Expect {0} fields but find only {1} in '{2}'")]
288    MetadataFieldCount(usize, usize, String),
289
290    #[allow(missing_docs)]
291    #[error("{0}_count values of {1} and {2} are inconsistent")]
292    InconsistentCount(String, usize, usize),
293
294    #[allow(missing_docs)]
295    #[error("Expect bool arrays and vectors to be length {0}, not {1}")]
296    BoolArrayVectorWrongLength(usize, usize),
297
298    #[allow(missing_docs)]
299    #[error("Expect ndarray of shape ({0}, {1}), but found shape ({2}, {3})")]
300    InvalidShape(usize, usize, usize, usize),
301
302    #[allow(missing_docs)]
303    #[error("Can't write '{0}' metadata if some fields are None")]
304    MetadataMissingForWrite(String),
305
306    #[allow(missing_docs)]
307    #[error("Unknown or bad sample file '{0}'")]
308    UnknownOrBadSampleFile(String),
309
310    #[allow(missing_docs)]
311    #[error("The registry of sample files is invalid")]
312    SampleRegistryProblem(),
313
314    #[allow(missing_docs)]
315    #[error("Samples construction failed with error: {0}")]
316    SamplesConstructionFailed(String),
317
318    #[allow(missing_docs)]
319    #[error("Downloaded sample file not seen: {0}")]
320    DownloadedSampleFileNotSeen(String),
321
322    #[allow(missing_docs)]
323    #[error("Downloaded sample file has wrong hash: {0},expected: {1}, actual: {2}")]
324    DownloadedSampleFileWrongHash(String, String, String),
325
326    #[allow(missing_docs)]
327    #[error("Cannot create cache directory")]
328    CannotCreateCacheDir(),
329
330    #[allow(missing_docs)]
331    #[error("Cannot parse URL: '{0}': {1}")]
332    CannotParseUrl(String, String),
333
334    #[allow(missing_docs)]
335    #[error("UninitializedField: '{0}'")]
336    UninitializedField(&'static str),
337
338    #[allow(missing_docs)]
339    #[error("Sample fetch error: {0}")]
340    SampleFetch(String),
341
342    #[allow(missing_docs)]
343    #[error("Encoding destination buffer must be contiguous.")]
344    EncodingContiguous(),
345
346    #[allow(missing_docs)]
347    #[error("Encoding destination buffer have length {0}, (in_vector.len() - 1) // 4 + 1, but it has length {1}.")]
348    EncodingLength(usize, usize),
349}
350
351// Trait alias
352
353/// A trait alias, used internally, for the values of a .bed file, namely i8, f32, f64.
354pub trait BedVal:
355    Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
356{
357}
358impl<T> BedVal for T where
359    T: Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
360{
361}
362
363fn create_pool(num_threads: usize) -> Result<rayon::ThreadPool, Box<BedErrorPlus>> {
364    match rayon::ThreadPoolBuilder::new()
365        .num_threads(num_threads)
366        .build()
367    {
368        Err(e) => Err(Box::new(e.into())),
369        Ok(pool) => Ok(pool),
370    }
371}
372
373#[allow(clippy::too_many_arguments)]
374#[anyinput]
375fn read_no_alloc<TVal: BedVal>(
376    path: AnyPath,
377    iid_count: usize,
378    sid_count: usize,
379    is_a1_counted: bool,
380    iid_index: &[isize],
381    sid_index: &[isize],
382    missing_value: TVal,
383    num_threads: usize,
384    val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
385) -> Result<(), Box<BedErrorPlus>> {
386    create_pool(num_threads)?.install(|| {
387        let (buf_reader, bytes_vector) = open_and_check(path)?;
388
389        match bytes_vector[2] {
390            0 => {
391                // We swap 'iid' and 'sid' and then reverse the axes.
392                let mut val_t = val.view_mut().reversed_axes();
393                internal_read_no_alloc(
394                    buf_reader,
395                    path,
396                    sid_count,
397                    iid_count,
398                    is_a1_counted,
399                    sid_index,
400                    iid_index,
401                    missing_value,
402                    &mut val_t,
403                )
404            }
405            1 => internal_read_no_alloc(
406                buf_reader,
407                path,
408                iid_count,
409                sid_count,
410                is_a1_counted,
411                iid_index,
412                sid_index,
413                missing_value,
414                val,
415            ),
416            _ => Err(Box::new(BedError::BadMode(path_ref_to_string(path)).into())),
417        }
418    })?;
419    Ok(())
420}
421
422#[anyinput]
423fn path_ref_to_string(path: AnyPath) -> String {
424    PathBuf::from(path).display().to_string()
425}
426
427impl From<BedError> for Box<BedErrorPlus> {
428    fn from(err: BedError) -> Self {
429        Box::new(BedErrorPlus::BedError(err))
430    }
431}
432impl From<std::io::Error> for Box<BedErrorPlus> {
433    fn from(err: std::io::Error) -> Self {
434        Box::new(BedErrorPlus::IOError(err))
435    }
436}
437impl From<ThreadPoolBuildError> for Box<BedErrorPlus> {
438    fn from(err: ThreadPoolBuildError) -> Self {
439        Box::new(BedErrorPlus::ThreadPoolError(err))
440    }
441}
442impl From<ParseIntError> for Box<BedErrorPlus> {
443    fn from(err: ParseIntError) -> Self {
444        Box::new(BedErrorPlus::ParseIntError(err))
445    }
446}
447
448impl From<ParseFloatError> for Box<BedErrorPlus> {
449    fn from(err: ParseFloatError) -> Self {
450        Box::new(BedErrorPlus::ParseFloatError(err))
451    }
452}
453
454impl From<::derive_builder::UninitializedFieldError> for BedErrorPlus {
455    fn from(err: ::derive_builder::UninitializedFieldError) -> Self {
456        BedError::UninitializedField(err.field_name()).into()
457    }
458}
459
460impl From<CloudFileError> for Box<BedErrorPlus> {
461    fn from(err: CloudFileError) -> Self {
462        Box::new(BedErrorPlus::CloudFileError(err))
463    }
464}
465
466impl From<Utf8Error> for Box<BedErrorPlus> {
467    fn from(err: Utf8Error) -> Self {
468        Box::new(BedErrorPlus::Utf8Error(err))
469    }
470}
471
472#[anyinput]
473fn open_and_check(
474    path: AnyPath,
475) -> Result<(BufReader<File>, [u8; CB_HEADER_USIZE]), Box<BedErrorPlus>> {
476    let mut buf_reader = BufReader::new(File::open(path)?);
477    let mut bytes_array: [u8; CB_HEADER_USIZE] = [0; CB_HEADER_USIZE];
478    buf_reader.read_exact(&mut bytes_array)?;
479    if (BED_FILE_MAGIC1 != bytes_array[0]) || (BED_FILE_MAGIC2 != bytes_array[1]) {
480        Err(BedError::IllFormed(path_ref_to_string(path)))?;
481    }
482    Ok((buf_reader, bytes_array))
483}
484
485// trait Max {
486//     fn max() -> Self;
487// }
488
489// impl Max for u8 {
490//     fn max() -> u8 {
491//         u8::MAX
492//     }
493// }
494
495// impl Max for u64 {
496//     fn max() -> u64 {
497//         u64::MAX
498//     }
499// }
500
501/// A trait alias, used internally, to provide default missing values for i8, f32, f64.
502pub trait Missing {
503    /// The default missing value for a type such as i8, f32, and f64.
504    fn missing() -> Self;
505}
506
507impl Missing for f64 {
508    fn missing() -> Self {
509        f64::NAN
510    }
511}
512
513impl Missing for f32 {
514    fn missing() -> Self {
515        f32::NAN
516    }
517}
518
519impl Missing for i8 {
520    fn missing() -> Self {
521        -127i8
522    }
523}
524
525#[cfg(not(target_pointer_width = "64"))]
526compile_error!("This code requires a 64-bit target architecture.");
527#[inline]
528fn try_div_4(in_iid_count: usize, in_sid_count: usize) -> Result<u64, Box<BedErrorPlus>> {
529    if in_iid_count == 0 {
530        return Ok(0);
531    }
532    let in_iid_count_div4_u64 = in_iid_count.checked_sub(1).map_or(0, |v| v / 4 + 1) as u64;
533    let in_sid_count_u64 = in_sid_count as u64;
534
535    if in_sid_count > 0 && (u64::MAX - CB_HEADER_U64) / in_sid_count_u64 < in_iid_count_div4_u64 {
536        Err(BedError::IndexesTooBigForFiles(in_iid_count, in_sid_count))?;
537    }
538
539    Ok(in_iid_count_div4_u64)
540}
541
542#[allow(clippy::too_many_arguments)]
543#[anyinput]
544fn internal_read_no_alloc<TVal: BedVal>(
545    mut buf_reader: BufReader<File>,
546    path: AnyPath,
547    in_iid_count: usize,
548    in_sid_count: usize,
549    is_a1_counted: bool,
550    iid_index: &[isize],
551    sid_index: &[isize],
552    missing_value: TVal,
553    out_val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
554) -> Result<(), Box<BedErrorPlus>> {
555    // Check the file length
556
557    let in_iid_count_div4_u64 = try_div_4(in_iid_count, in_sid_count)?;
558    // "as" and math is safe because of early checks
559    let file_len = buf_reader.get_ref().metadata()?.len();
560    let file_len2 = in_iid_count_div4_u64 * (in_sid_count as u64) + CB_HEADER_U64;
561    if file_len != file_len2 {
562        Err(BedError::IllFormed(path_ref_to_string(path)))?;
563    }
564
565    // Check and precompute for each iid_index
566    let (i_div_4_less_start_array, i_mod_4_times_2_array, i_div_4_start, i_div_4_len) =
567        check_and_precompute_iid_index(in_iid_count, iid_index)?;
568
569    // Check and compute work for each sid_index
570    let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value);
571    let lower_sid_count = -(in_sid_count as isize);
572    let upper_sid_count: isize = (in_sid_count as isize) - 1;
573    // See https://morestina.net/blog/1432/parallel-stream-processing-with-rayon
574    // Possible optimization: We could read snp in their input order instead of their output order
575    sid_index
576        .iter()
577        .map(|in_sid_i_signed| {
578            // Turn signed sid_index into unsigned sid_index (or error)
579            let in_sid_i = if (0..=upper_sid_count).contains(in_sid_i_signed) {
580                *in_sid_i_signed as u64
581            } else if (lower_sid_count..=-1).contains(in_sid_i_signed) {
582                (in_sid_count - ((-in_sid_i_signed) as usize)) as u64
583            } else {
584                Err(BedError::SidIndexTooBig(*in_sid_i_signed))?
585            };
586
587            // Read the iid info for one snp from the disk
588            let mut bytes_vector: Vec<u8> = vec![0; i_div_4_len as usize];
589            let pos: u64 = in_sid_i * in_iid_count_div4_u64 + i_div_4_start + CB_HEADER_U64; // "as" and math is safe because of early checks
590            buf_reader.seek(SeekFrom::Start(pos))?;
591            buf_reader.read_exact(&mut bytes_vector)?;
592            Ok::<_, Box<BedErrorPlus>>(bytes_vector)
593        })
594        // Zip in the column of the output array
595        .zip(out_val.axis_iter_mut(nd::Axis(1)))
596        // In parallel, decompress the iid info and put it in its column
597        .par_bridge() // This seems faster that parallel zip
598        .try_for_each(|(bytes_vector_result, mut col)| match bytes_vector_result {
599            Err(e) => Err(e),
600            Ok(bytes_vector) => {
601                for out_iid_i in 0..iid_index.len() {
602                    let i_div_4_less_start = i_div_4_less_start_array[out_iid_i];
603                    let i_mod_4_times_2 = i_mod_4_times_2_array[out_iid_i];
604                    let genotype_byte: u8 =
605                        (bytes_vector[i_div_4_less_start] >> i_mod_4_times_2) & 0x03;
606                    col[out_iid_i] = from_two_bits_to_value[genotype_byte as usize];
607                }
608                Ok(())
609            }
610        })?;
611
612    Ok(())
613}
614
615type Array1Usize = nd::ArrayBase<nd::OwnedRepr<usize>, nd::Dim<[usize; 1]>>;
616type Array1U8 = nd::ArrayBase<nd::OwnedRepr<u8>, nd::Dim<[usize; 1]>>;
617
618#[allow(clippy::type_complexity)]
619#[allow(clippy::range_plus_one)]
620fn check_and_precompute_iid_index(
621    in_iid_count: usize,
622    iid_index: &[isize],
623) -> Result<(Array1Usize, Array1U8, u64, u64), Box<BedErrorPlus>> {
624    let lower_iid_count = -(in_iid_count as isize);
625    let upper_iid_count: isize = (in_iid_count as isize) - 1;
626    let mut i_div_4_less_start_array = nd::Array1::<usize>::zeros(iid_index.len());
627    let mut i_mod_4_times_2_array = nd::Array1::<u8>::zeros(iid_index.len());
628    let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); iid_index.len()];
629    nd::par_azip!((in_iid_i_signed in iid_index,
630        i_div_4_less_start in &mut i_div_4_less_start_array,
631        i_mod_4_times_2 in &mut i_mod_4_times_2_array,
632        result in &mut result_list
633    )
634    {
635        let in_iid_i = if (0..=upper_iid_count).contains(in_iid_i_signed) {
636            *result = Ok(());
637            *in_iid_i_signed as usize
638        } else if (lower_iid_count..=-1).contains(in_iid_i_signed) {
639            *result = Ok(());
640            in_iid_count - ((-in_iid_i_signed) as usize)
641        } else {
642            *result = Err(BedError::IidIndexTooBig(
643                *in_iid_i_signed,
644            ));
645            0
646        };
647
648        *i_div_4_less_start = in_iid_i / 4 ;
649        *i_mod_4_times_2 = (in_iid_i % 4 * 2) as u8;
650    });
651    result_list
652        .iter()
653        .par_bridge()
654        .try_for_each(|x| (*x).clone())?;
655
656    let (i_div_4_start, i_div_4_len) =
657        if let Some(min_value) = i_div_4_less_start_array.par_iter().min() {
658            let max_value = *i_div_4_less_start_array.par_iter().max().unwrap(); // safe because of min
659            (*min_value as u64, (max_value + 1 - *min_value) as u64)
660        } else {
661            (0, 0)
662        };
663    // skip of min_value is 0
664    if i_div_4_start > 0 {
665        i_div_4_less_start_array
666            .par_iter_mut()
667            .for_each(|x| *x -= i_div_4_start as usize);
668    }
669    Ok((
670        i_div_4_less_start_array,
671        i_mod_4_times_2_array,
672        i_div_4_start,
673        i_div_4_len,
674    ))
675}
676
677fn set_up_two_bits_to_value<TVal: From<i8>>(count_a1: bool, missing_value: TVal) -> [TVal; 4] {
678    let homozygous_primary_allele = TVal::from(0); // Major Allele
679    let heterozygous_allele = TVal::from(1);
680    let homozygous_secondary_allele = TVal::from(2); // Minor Allele
681
682    if count_a1 {
683        [
684            homozygous_secondary_allele, // look-up 0
685            missing_value,               // look-up 1
686            heterozygous_allele,         // look-up 2
687            homozygous_primary_allele,   // look-up 3
688        ]
689    } else {
690        [
691            homozygous_primary_allele,   // look-up 0
692            missing_value,               // look-up 1
693            heterozygous_allele,         // look-up 2
694            homozygous_secondary_allele, // look-up 3
695        ]
696    }
697}
698
699// Thanks to Dawid for his dpc-pariter library that makes this function scale.
700// https://dpc.pw/adding-parallelism-to-your-rust-iterators
701#[anyinput]
702fn write_val<S, TVal>(
703    path: AnyPath,
704    val: &nd::ArrayBase<S, nd::Ix2>,
705    is_a1_counted: bool,
706    missing: TVal,
707    num_threads: usize,
708) -> Result<(), Box<BedErrorPlus>>
709where
710    S: nd::Data<Elem = TVal>,
711    TVal: BedVal,
712{
713    let (iid_count, sid_count) = val.dim();
714
715    // 4 genotypes per byte so round up
716    let iid_count_div4_u64 = try_div_4(iid_count, sid_count)?;
717
718    // We create and write to a file.
719    // If there is an error, we will delete it.
720    if let Err(e) = write_internal(
721        path,
722        iid_count_div4_u64,
723        val,
724        is_a1_counted,
725        missing,
726        num_threads,
727    ) {
728        // Clean up the file
729        let _ = fs::remove_file(path);
730        Err(e)
731    } else {
732        Ok(())
733    }
734}
735
736// https://www.reddit.com/r/rust/comments/mo4s8e/difference_between_reference_and_view_in_ndarray/
737#[anyinput]
738fn write_internal<S, TVal>(
739    path: AnyPath,
740    iid_count_div4_u64: u64,
741    val: &nd::ArrayBase<S, nd::Ix2>,
742    is_a1_counted: bool,
743    missing: TVal,
744    num_threads: usize,
745) -> Result<(), Box<BedErrorPlus>>
746where
747    S: nd::Data<Elem = TVal>,
748    TVal: BedVal,
749{
750    let mut writer = BufWriter::new(File::create(path)?);
751    // LATER: If this method is later changed
752    // to support major="individual", be sure to
753    // change write_f64, etc and python function 'to_bed' which
754    // currently uses a work-around.
755    writer.write_all(&[BED_FILE_MAGIC1, BED_FILE_MAGIC2, 0x01])?;
756
757    #[allow(clippy::eq_op)]
758    let use_nan = missing != missing; // generic NAN test
759    let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
760    let two_code = if is_a1_counted { 0u8 } else { 3u8 };
761
762    let homozygous_primary_allele = TVal::from(0); // Major Allele
763    let heterozygous_allele = TVal::from(1);
764    let homozygous_secondary_allele = TVal::from(2); // Minor Allele
765
766    scope(|scope| {
767        val.axis_iter(nd::Axis(1))
768            .parallel_map_scoped(scope, {
769                move |column| {
770                    // Convert each column into a bytes_vector
771                    let mut bytes_vector: Vec<u8> = vec![0; iid_count_div4_u64 as usize]; // inits to 0
772                    process_genomic_slice(
773                        &column,
774                        &mut bytes_vector,
775                        homozygous_primary_allele,
776                        heterozygous_allele,
777                        homozygous_secondary_allele,
778                        zero_code,
779                        two_code,
780                        use_nan,
781                        missing,
782                    )?;
783                    Ok::<_, Box<BedErrorPlus>>(bytes_vector)
784                }
785            })
786            .threads(num_threads)
787            .try_for_each(|bytes_vector| {
788                // Write the bytes vector, they must be in order.
789                writer.write_all(&bytes_vector?)?;
790                Ok(())
791            })
792    })
793    .map_err(|_e| BedError::PanickedThread())?
794}
795
796#[allow(dead_code)]
797fn encode1<TVal>(
798    in_vector: &ndarray::ArrayView1<TVal>,
799    out_vector: &mut [u8],
800    is_a1_counted: bool,
801    missing: TVal,
802) -> Result<(), Box<BedErrorPlus>>
803where
804    TVal: BedVal,
805{
806    #[allow(clippy::eq_op)]
807    let use_nan = missing != missing; // generic NAN test
808    let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
809    let two_code = if is_a1_counted { 0u8 } else { 3u8 };
810
811    let homozygous_primary_allele: TVal = TVal::from(0); // Major Allele
812    let heterozygous_allele = TVal::from(1);
813    let homozygous_secondary_allele = TVal::from(2); // Minor Allele
814
815    let minor_div4 = in_vector.len().checked_sub(1).map_or(0, |v| v / 4 + 1);
816    if minor_div4 != out_vector.len() {
817        return Err(Box::new(
818            BedError::EncodingLength(minor_div4, out_vector.len()).into(),
819        ));
820    }
821
822    process_genomic_slice(
823        in_vector,
824        out_vector,
825        homozygous_primary_allele,
826        heterozygous_allele,
827        homozygous_secondary_allele,
828        zero_code,
829        two_code,
830        use_nan,
831        missing,
832    )
833}
834
835#[inline]
836#[allow(clippy::eq_op)]
837#[allow(clippy::too_many_arguments)]
838fn encode_genotype_chunk<TVal>(
839    chunk: nd::ArrayView1<TVal>,
840    homozygous_primary_allele: TVal,
841    heterozygous_allele: TVal,
842    homozygous_secondary_allele: TVal,
843    zero_code: u8,
844    two_code: u8,
845    use_nan: bool,
846    missing: TVal,
847) -> Result<u8, BedError>
848where
849    TVal: PartialEq + Copy,
850{
851    // LATER: Think about unrolling this loop in the usual case of 4 elements
852    let mut output_byte = 0u8;
853    for (within_chunk_index, &v0) in chunk.iter().enumerate() {
854        let genotype_code = if v0 == homozygous_primary_allele {
855            zero_code
856        } else if v0 == heterozygous_allele {
857            2
858        } else if v0 == homozygous_secondary_allele {
859            two_code
860        } else if (use_nan && v0 != v0) || (!use_nan && v0 == missing) {
861            1
862        } else {
863            return Err(BedError::BadValue(
864                "Invalid genotype value encountered during encoding.".to_string(),
865            ));
866        };
867
868        output_byte |= genotype_code << (within_chunk_index * 2);
869    }
870    Ok(output_byte)
871}
872
873#[inline]
874#[allow(clippy::eq_op)]
875#[allow(clippy::too_many_arguments)]
876fn process_genomic_slice<TVal>(
877    in_vector: &ndarray::ArrayView1<TVal>,
878    out_vector: &mut [u8],
879    homozygous_primary_allele: TVal,
880    heterozygous_allele: TVal,
881    homozygous_secondary_allele: TVal,
882    zero_code: u8,
883    two_code: u8,
884    use_nan: bool,
885    missing: TVal,
886) -> Result<(), Box<BedErrorPlus>>
887where
888    TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
889{
890    // Calculate the number of full chunks and the remainder
891    let full_chunks = in_vector.len() / 4;
892    let remainder = in_vector.len() % 4;
893
894    // Ensure the output vector is correctly sized
895    assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
896
897    // Zip the exact input chunks with output chunks and process in parallel
898    in_vector
899        .exact_chunks(4)
900        .into_iter()
901        .zip(out_vector.iter_mut())
902        .try_for_each(|(chunk, output_byte)| {
903            *output_byte = encode_genotype_chunk(
904                chunk,
905                homozygous_primary_allele,
906                heterozygous_allele,
907                homozygous_secondary_allele,
908                zero_code,
909                two_code,
910                use_nan,
911                missing,
912            )?;
913            Ok::<(), Box<BedErrorPlus>>(())
914        })?;
915
916    // Process the remainder sequentially if there is any
917    if remainder != 0 {
918        let start = full_chunks * 4;
919        let chunk = in_vector.slice(ndarray::s![start..]);
920        let output_byte = &mut out_vector[full_chunks];
921        *output_byte = encode_genotype_chunk(
922            chunk,
923            homozygous_primary_allele,
924            heterozygous_allele,
925            homozygous_secondary_allele,
926            zero_code,
927            two_code,
928            use_nan,
929            missing,
930        )?;
931    }
932
933    Ok::<(), Box<BedErrorPlus>>(())
934}
935// #[inline]
936// #[allow(clippy::eq_op)]
937// #[allow(clippy::too_many_arguments)]
938// fn process_genomic_slice<TVal>(
939//     in_vector: &ndarray::ArrayView1<TVal>,
940//     out_vector: &mut [u8],
941//     homozygous_primary_allele: TVal,
942//     heterozygous_allele: TVal,
943//     homozygous_secondary_allele: TVal,
944//     zero_code: u8,
945//     two_code: u8,
946//     use_nan: bool,
947//     missing: TVal,
948// ) -> Result<(), Box<BedErrorPlus>>
949// where
950//     TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
951// {
952//     // Calculate the number of full chunks and the remainder
953//     let full_chunks = in_vector.len() / 4;
954//     let remainder = in_vector.len() % 4;
955
956//     // Ensure the output vector is correctly sized
957//     assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
958
959//     // Zip the exact input chunks with output chunks and process in parallel
960//     in_vector
961//         .exact_chunks(4)
962//         .into_iter()
963//         .zip(out_vector.iter_mut())
964//         .par_bridge()
965//         .try_for_each(|(chunk, output_byte)| {
966//             *output_byte = encode_genotype_chunk(
967//                 chunk,
968//                 homozygous_primary_allele,
969//                 heterozygous_allele,
970//                 homozygous_secondary_allele,
971//                 zero_code,
972//                 two_code,
973//                 use_nan,
974//                 missing,
975//             )?;
976//             Ok::<(), Box<BedErrorPlus>>(())
977//         })?;
978
979//     // Process the remainder sequentially if there is any
980//     if remainder != 0 {
981//         let start = full_chunks * 4;
982//         let chunk = in_vector.slice(ndarray::s![start..]);
983//         let output_byte = &mut out_vector[full_chunks];
984//         *output_byte = encode_genotype_chunk(
985//             chunk,
986//             homozygous_primary_allele,
987//             heterozygous_allele,
988//             homozygous_secondary_allele,
989//             zero_code,
990//             two_code,
991//             use_nan,
992//             missing,
993//         )?;
994//     }
995
996//     Ok::<(), Box<BedErrorPlus>>(())
997// }
998
999#[anyinput]
1000fn count_lines(path: AnyPath) -> Result<usize, Box<BedErrorPlus>> {
1001    let file = File::open(path)?;
1002    let reader = BufReader::new(file);
1003    let count = reader.lines().count();
1004    Ok(count)
1005}
1006
1007#[allow(dead_code)]
1008enum Dist {
1009    Unit,
1010    Beta { a: f64, b: f64 },
1011}
1012
1013#[allow(dead_code)]
1014fn impute_and_zero_mean_snps<
1015    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1016>(
1017    val: &mut nd::ArrayViewMut2<'_, T>,
1018    dist: &Dist,
1019    apply_in_place: bool,
1020    use_stats: bool,
1021    stats: &mut nd::ArrayViewMut2<'_, T>,
1022) -> Result<(), Box<BedErrorPlus>> {
1023    let two = T::one() + T::one();
1024
1025    // If output is F-order (or in general if iid stride is no more than sid_stride)
1026    if val.stride_of(nd::Axis(0)) <= val.stride_of(nd::Axis(1)) {
1027        let result_list = nd::Zip::from(val.axis_iter_mut(nd::Axis(1)))
1028            .and(stats.axis_iter_mut(nd::Axis(0)))
1029            .par_map_collect(|mut col, mut stats_row| {
1030                _process_sid(
1031                    &mut col,
1032                    apply_in_place,
1033                    use_stats,
1034                    &mut stats_row,
1035                    dist,
1036                    two,
1037                )
1038            });
1039
1040        // Check the result list for errors
1041        result_list
1042            .iter()
1043            .par_bridge()
1044            .try_for_each(|x| (*x).clone())?;
1045
1046        Ok(())
1047    } else {
1048        //If C-order
1049        _process_all_iids(val, apply_in_place, use_stats, stats, dist, two)
1050    }
1051}
1052
1053// Later move the other fast-lmm functions into their own package
1054#[allow(dead_code)]
1055fn find_factor<
1056    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1057>(
1058    dist: &Dist,
1059    mean_s: T,
1060    std: T,
1061) -> Result<T, BedError> {
1062    if let Dist::Beta { a, b } = dist {
1063        // Try to create a beta dist
1064        let Ok(beta_dist) = Beta::new(*a, *b) else {
1065            Err(BedError::CannotCreateBetaDist(*a, *b))?
1066        };
1067
1068        // Try to an f64 maf
1069        let mut maf = if let Some(mean_u64) = mean_s.to_f64() {
1070            mean_u64 / 2.0
1071        } else {
1072            Err(BedError::CannotConvertBetaToFromF64)?
1073        };
1074        if maf > 0.5 {
1075            maf = 1.0 - maf;
1076        }
1077
1078        // Try to put the maf in the beta dist
1079        if let Some(b) = T::from_f64(beta_dist.pdf(maf)) {
1080            Ok(b)
1081        } else {
1082            Err(BedError::CannotConvertBetaToFromF64)
1083        }
1084    } else {
1085        Ok(T::one() / std)
1086    }
1087}
1088
1089#[allow(dead_code)]
1090fn _process_sid<
1091    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1092>(
1093    col: &mut nd::ArrayViewMut1<'_, T>,
1094    apply_in_place: bool,
1095    use_stats: bool,
1096    stats_row: &mut nd::ArrayViewMut1<'_, T>,
1097    dist: &Dist,
1098    two: T,
1099) -> Result<(), BedError> {
1100    if !use_stats {
1101        let mut n_observed = T::zero();
1102        let mut sum_s = T::zero(); // the sum of a SNP over all observed individuals
1103        let mut sum2_s = T::zero(); // the sum of the squares of the SNP over all observed individuals
1104
1105        for iid_i in 0..col.len() {
1106            let v = col[iid_i];
1107            if !v.is_nan() {
1108                sum_s = sum_s + v;
1109                sum2_s = sum2_s + v * v;
1110                n_observed = n_observed + T::one();
1111            }
1112        }
1113        if n_observed < T::one() {
1114            //LATER make it work (in some form) for n of 0
1115            Err(BedError::NoIndividuals)?;
1116        }
1117        let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1118        let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1119
1120        if mean_s.is_nan()
1121            || (matches!(dist, Dist::Beta { a: _, b: _ })
1122                && ((mean_s > two) || (mean_s < T::zero())))
1123        {
1124            Err(BedError::IllegalSnpMean)?;
1125        }
1126
1127        let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1128
1129        let mut std = variance.sqrt();
1130        if std.is_nan() || std <= T::zero() {
1131            // All "SNPs" have the same value (aka SNC)
1132            std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1133        }
1134
1135        stats_row[0] = mean_s;
1136        stats_row[1] = std;
1137    }
1138
1139    if apply_in_place {
1140        {
1141            let mean_s = stats_row[0];
1142            let std = stats_row[1];
1143            let is_snc = std.is_infinite();
1144
1145            let factor = find_factor(dist, mean_s, std)?;
1146
1147            for iid_i in 0..col.len() {
1148                //check for Missing (NAN) or SNC
1149                if col[iid_i].is_nan() || is_snc {
1150                    col[iid_i] = T::zero();
1151                } else {
1152                    col[iid_i] = (col[iid_i] - mean_s) * factor;
1153                }
1154            }
1155        }
1156    }
1157    Ok(())
1158}
1159
1160#[allow(dead_code)]
1161fn _process_all_iids<
1162    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1163>(
1164    val: &mut nd::ArrayViewMut2<'_, T>,
1165    apply_in_place: bool,
1166    use_stats: bool,
1167    stats: &mut nd::ArrayViewMut2<'_, T>,
1168    dist: &Dist,
1169    two: T,
1170) -> Result<(), Box<BedErrorPlus>> {
1171    let sid_count = val.dim().1;
1172
1173    if !use_stats {
1174        // O(iid_count * sid_count)
1175        // Serial that respects C-order is 3-times faster than parallel that doesn't
1176        // So we parallelize the inner loop instead of the outer loop
1177        let mut n_observed_array = nd::Array1::<T>::zeros(sid_count);
1178        let mut sum_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of a SNP over all observed individuals
1179        let mut sum2_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of the squares of the SNP over all observed individuals
1180        for row in val.axis_iter(nd::Axis(0)) {
1181            nd::par_azip!((&v in row,
1182                n_observed_ptr in &mut n_observed_array,
1183                sum_s_ptr in &mut sum_s_array,
1184                sum2_s_ptr in &mut sum2_s_array
1185            )
1186                if !v.is_nan() {
1187                    *n_observed_ptr = *n_observed_ptr + T::one();
1188                    *sum_s_ptr = *sum_s_ptr + v;
1189                    *sum2_s_ptr = *sum2_s_ptr + v * v;
1190                }
1191            );
1192        }
1193
1194        // O(sid_count)
1195        let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); sid_count];
1196        nd::par_azip!((mut stats_row in stats.axis_iter_mut(nd::Axis(0)),
1197                &n_observed in &n_observed_array,
1198                &sum_s in &sum_s_array,
1199                &sum2_s in &sum2_s_array,
1200                result_ptr in &mut result_list)
1201        {
1202            if n_observed < T::one() {
1203                *result_ptr = Err(BedError::NoIndividuals);
1204                return;
1205            }
1206            let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1207            let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1208
1209            if mean_s.is_nan()
1210                || (matches!(dist, Dist::Beta { a:_, b:_ }) && ((mean_s > two) || (mean_s < T::zero())))
1211            {
1212                *result_ptr = Err(BedError::IllegalSnpMean);
1213                return;
1214            }
1215
1216            let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1217            let mut std = variance.sqrt();
1218            if std.is_nan() || std <= T::zero() {
1219                // All "SNPs" have the same value (aka SNC)
1220                std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1221            }
1222            stats_row[0] = mean_s;
1223            stats_row[1] = std;
1224        });
1225        // Check the result list for errors
1226        result_list.par_iter().try_for_each(|x| (*x).clone())?;
1227    }
1228
1229    if apply_in_place {
1230        // O(sid_count)
1231        let mut factor_array = nd::Array1::<T>::zeros(stats.dim().0);
1232
1233        stats
1234            .axis_iter_mut(nd::Axis(0))
1235            .zip(&mut factor_array)
1236            .par_bridge()
1237            .try_for_each(|(stats_row, factor_ptr)| {
1238                match find_factor(dist, stats_row[0], stats_row[1]) {
1239                    Err(e) => Err(e),
1240                    Ok(factor) => {
1241                        *factor_ptr = factor;
1242                        Ok(())
1243                    }
1244                }
1245            })?;
1246
1247        // O(iid_count * sid_count)
1248        nd::par_azip!((mut row in val.axis_iter_mut(nd::Axis(0)))
1249        {
1250            for sid_i in 0..row.len() {
1251                //check for Missing (NAN) or SNC
1252                if row[sid_i].is_nan() || stats[(sid_i, 1)].is_infinite() {
1253                    row[sid_i] = T::zero();
1254                } else {
1255                    row[sid_i] = (row[sid_i] - stats[(sid_i, 0)]) * factor_array[sid_i];
1256                }
1257            }
1258        });
1259    }
1260    Ok(())
1261}
1262
1263#[allow(dead_code)]
1264#[anyinput]
1265fn file_b_less_aatbx(
1266    a_filename: AnyPath,
1267    offset: u64,
1268    iid_count: usize,
1269    b1: &mut nd::ArrayViewMut2<'_, f64>,
1270    aatb: &mut nd::ArrayViewMut2<'_, f64>,
1271    atb: &mut nd::ArrayViewMut2<'_, f64>,
1272    log_frequency: usize,
1273) -> Result<(), Box<BedErrorPlus>> {
1274    //speed idea from C++:
1275    //Are copies really needed?
1276    //is F, vc C order the best?
1277    //would bigger snp blocks be better
1278
1279    let (a_sid_count, b_sid_count) = atb.dim();
1280    if log_frequency > 0 {
1281        println!("file_b_less_aatbx: iid_count={iid_count}, {a_sid_count}x{b_sid_count} output");
1282    };
1283
1284    // Open the file and move to the starting sid
1285    let mut buf_reader = BufReader::new(File::open(a_filename)?);
1286    buf_reader.seek(SeekFrom::Start(offset))?;
1287
1288    let mut sid_reuse = vec![f64::NAN; iid_count];
1289    for (a_sid_index, mut atb_row) in atb.axis_iter_mut(nd::Axis(0)).enumerate() {
1290        if log_frequency > 0 && a_sid_index % log_frequency == 0 {
1291            println!(
1292                "   working on train_sid_index={a_sid_index} of {a_sid_count} (iid_count={iid_count}, b_sid_count={b_sid_count})"
1293            );
1294        }
1295
1296        buf_reader.read_f64_into::<LittleEndian>(&mut sid_reuse)?;
1297
1298        nd::par_azip!(
1299            (mut atb_element in atb_row.axis_iter_mut(nd::Axis(0)),
1300            b1_col in b1.axis_iter(nd::Axis(1)),
1301            mut aatb_col in aatb.axis_iter_mut(nd::Axis(1)))
1302        {
1303            let mut atbi = 0.0;
1304            for iid_index in 0..iid_count {
1305                atbi += sid_reuse[iid_index] * b1_col[iid_index];
1306            }
1307            atb_element[()] = atbi;
1308            for iid_index in 0..iid_count {
1309                aatb_col[iid_index] -= sid_reuse[iid_index] * atbi;
1310            }
1311        });
1312    }
1313    Ok(())
1314}
1315
1316#[allow(dead_code)]
1317fn read_into_f64(src: &mut BufReader<File>, dst: &mut [f64]) -> std::io::Result<()> {
1318    src.read_f64_into::<LittleEndian>(dst)
1319}
1320
1321#[allow(dead_code)]
1322fn read_into_f32(src: &mut BufReader<File>, dst: &mut [f32]) -> std::io::Result<()> {
1323    src.read_f32_into::<LittleEndian>(dst)
1324}
1325
1326/* Here are Python algorithms that shows how to do a low-memory multiply A (or A.T) x B (or B.T)
1327   They are used by file_ata_piece and file_aat_piece with some optimizations for A and B being the same.
1328
1329output_list = [np.zeros((4,4)) for i in range(4)]
1330
1331# a.T.dot(b)
1332for a_col2 in range(0,4,2): # 1 pass through A, returning output chunk about the same size writing in one pass
1333    buffer_a2 = a[:,a_col2:a_col2+2]
1334    for b_col in range(4): # A1/a1 passes through B
1335        buffer_b = b[:,b_col]
1336        for i in range(4):
1337            b_val = buffer_b[i]
1338            a_slice = buffer_a2[i,:]
1339            for k in range(2): # A1/a1 * A0 passes through the output
1340                output_list[0][a_col2+k,b_col] += a_slice[k]*b_val
1341
1342# a.dot(b.T)
1343for out_col2 in range(0,4,2): # 1 pass through output, returning chunk on each pass
1344    for col in range(4): # O1/o1 passes through A and B
1345        buffer_a = a[:,col]
1346        buffer_b = b[:,col]
1347        for k in range(2):
1348            for i in range(4):
1349                output_list[1][i,out_col2+k] += buffer_a[i]*buffer_b[out_col2+k]
1350
1351# a.T.dot(b.T)
1352for a_col2 in range(0,4,2): # 1 pass through A, returning an output chunk on each pass
1353    buffer_a2 = a[:,a_col2:a_col2+2]
1354    for b_col in range(4):
1355        buffer_b = b[:,b_col]
1356        for i in range(4):
1357            b_val = buffer_b[i]
1358            for k in range(2):
1359                output_list[2][a_col2+k,i] += buffer_a2[b_col,k]*b_val
1360
1361# a.dot(b)  - but should instead do  (b.T.dot(a.T)).T
1362for b_col2 in range(0,4,2): #Transpose of preceding one
1363    buffer_b2 = b[:,b_col2:b_col2+2]
1364    for a_col in range(4):
1365        buffer_a = a[:,a_col]
1366        for i in range(4):
1367            a_val = buffer_a[i]
1368            for k in range(2):
1369                output_list[3][i,b_col2+k] += buffer_b2[a_col,k]*a_val
1370
1371
1372for output in output_list:
1373    print(output)
1374 */
1375
1376// Given A, a matrix in Fortran order in a file
1377// with row_count rows and col_count columns,
1378// and given a starting column,
1379// returns part of A.T x A, the column vs column product.
1380// The piece piece returned has dimensions
1381// (col_count-col_start) x ncols
1382// where ncols <= (col_count-col_start)
1383// Makes only one pass through the file.
1384#[allow(clippy::too_many_arguments)]
1385#[allow(dead_code)]
1386#[anyinput]
1387fn file_ata_piece<T: Float + Send + Sync + Sync + AddAssign>(
1388    path: AnyPath,
1389    offset: u64,
1390    row_count: usize,
1391    col_count: usize,
1392    col_start: usize,
1393    ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1394    log_frequency: usize,
1395    read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1396) -> Result<(), Box<BedErrorPlus>> {
1397    let (nrows, ncols) = ata_piece.dim();
1398    if (col_start >= col_count)
1399        || (col_start + nrows != col_count)
1400        || (col_start + ncols > col_count)
1401    {
1402        Err(BedError::CannotConvertBetaToFromF64)?;
1403    }
1404
1405    _file_ata_piece_internal(
1406        path,
1407        offset,
1408        row_count,
1409        col_start,
1410        ata_piece,
1411        log_frequency,
1412        read_into,
1413    )
1414}
1415
1416#[allow(dead_code)]
1417#[anyinput]
1418fn _file_ata_piece_internal<T: Float + Send + Sync + Sync + AddAssign>(
1419    path: AnyPath,
1420    offset: u64,
1421    row_count: usize,
1422    col_start: usize,
1423    ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1424    log_frequency: usize,
1425    read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1426) -> Result<(), Box<BedErrorPlus>> {
1427    let (nrows, ncols) = ata_piece.dim();
1428    if log_frequency > 0 {
1429        println!("file_ata_piece: col_start={col_start}, {nrows}x{ncols} output");
1430    };
1431
1432    // Open the file and move to the starting col
1433    let mut buf_reader = BufReader::new(File::open(path)?);
1434    buf_reader.seek(SeekFrom::Start(
1435        offset + col_start as u64 * row_count as u64 * std::mem::size_of::<T>() as u64,
1436    ))?;
1437
1438    let mut col_save_list: Vec<Vec<T>> = vec![];
1439    let mut col_reuse = vec![T::nan(); row_count];
1440
1441    for (col_rel_index, mut ata_row) in ata_piece.axis_iter_mut(nd::Axis(0)).enumerate() {
1442        if log_frequency > 0 && col_rel_index % log_frequency == 0 {
1443            println!("   working on {col_rel_index} of {nrows}");
1444        }
1445
1446        // Read next col and save if in range
1447        let col = if col_save_list.len() < ncols {
1448            let mut col_save = vec![T::nan(); row_count];
1449            read_into(&mut buf_reader, &mut col_save)?;
1450            col_save_list.push(col_save);
1451            col_save_list.last().unwrap() // unwrap is OK here
1452        } else {
1453            read_into(&mut buf_reader, &mut col_reuse)?;
1454            &col_reuse
1455        };
1456
1457        // Multiple saved sids with new sid
1458        let mut ata_row_trimmed = ata_row.slice_mut(nd::s![..col_save_list.len()]);
1459        nd::par_azip!((
1460            col_in_range in &col_save_list,
1461            mut ata_val in ata_row_trimmed.axis_iter_mut(nd::Axis(0))
1462        )
1463        {
1464            ata_val[()] = col_product(col_in_range, col);
1465        });
1466    }
1467
1468    // Reflect the new product values
1469    for row_index in 0usize..ncols - 1 {
1470        for col_index in row_index..ncols {
1471            ata_piece[(row_index, col_index)] = ata_piece[(col_index, row_index)];
1472        }
1473    }
1474    Ok(())
1475}
1476
1477#[allow(dead_code)]
1478fn col_product<T: Float + AddAssign>(col_i: &[T], col_j: &[T]) -> T {
1479    assert!(col_i.len() == col_j.len()); // real assert
1480    let mut product = T::zero();
1481    for row_index in 0..col_i.len() {
1482        product += col_i[row_index] * col_j[row_index];
1483    }
1484    product
1485}
1486
1487// Given A, a matrix in Fortran order in a file
1488// with row_count rows and col_count columns,
1489// and given a starting column,
1490// returns part of A x A.T, the row vs row product.
1491// The piece piece returned has dimensions
1492// (row_count-row_start) x ncols
1493// where ncols <= (row_count-row_start)
1494// Makes only one pass through the file.
1495#[allow(clippy::too_many_arguments)]
1496#[allow(dead_code)]
1497#[anyinput]
1498fn file_aat_piece<T: Float + Sync + Send + Sync + AddAssign>(
1499    path: AnyPath,
1500    offset: u64,
1501    row_count: usize,
1502    col_count: usize,
1503    row_start: usize,
1504    aat_piece: &mut nd::ArrayViewMut2<'_, T>,
1505    log_frequency: usize,
1506    read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1507) -> Result<(), Box<BedErrorPlus>> {
1508    let (nrows, ncols) = aat_piece.dim();
1509
1510    if log_frequency > 0 {
1511        println!("file_aat_piece: row_start={row_start}, {nrows}x{ncols} output");
1512    };
1513
1514    if (row_start >= row_count)
1515        || (row_start + nrows != row_count)
1516        || (row_start + ncols > row_count)
1517    {
1518        Err(BedError::CannotConvertBetaToFromF64)?;
1519    }
1520
1521    aat_piece.fill(T::zero());
1522
1523    // Open the file and move to the starting col
1524    let mut buf_reader = BufReader::new(File::open(path)?);
1525
1526    let mut col = vec![T::nan(); row_count - row_start];
1527
1528    for col_index in 0..col_count {
1529        if log_frequency > 0 && col_index % log_frequency == 0 {
1530            println!("   working on {col_index} of {col_count}");
1531        }
1532
1533        // Read next col
1534        buf_reader.seek(SeekFrom::Start(
1535            offset + (col_index * row_count + row_start) as u64 * std::mem::size_of::<T>() as u64,
1536        ))?;
1537        read_into(&mut buf_reader, &mut col)?;
1538
1539        nd::par_azip!(
1540            (index row_index1,
1541            mut aat_col in aat_piece.axis_iter_mut(nd::Axis(1))
1542        )
1543        {
1544            let val1 = col[row_index1];
1545            for row_index0 in row_index1..nrows {
1546                aat_col[row_index0] += val1 * col[row_index0];
1547            }
1548        });
1549    }
1550
1551    // Notice that ata reflects and aat doesn't. They don't need
1552    // to be the same, but they could be.
1553    Ok(())
1554}
1555
1556// References: https://www.youtube.com/watch?v=0zOg8_B71gE&t=22s
1557// https://deterministic.space/elegant-apis-in-rust.html
1558// https://rust-lang.github.io/api-guidelines/
1559// https://ricardomartins.cc/2016/08/03/convenient_and_idiomatic_conversions_in_rust
1560
1561/// Represents the metadata from PLINK .fam and .bim files.
1562///
1563/// Construct with [`Metadata::builder`](struct.Metadata.html#method.builder) or [`Metadata::new`](struct.Metadata.html#method.new).
1564///
1565/// # Example
1566///
1567/// Extract metadata from a file.
1568/// Create a random file with the same metadata.
1569/// ```
1570/// use ndarray as nd;
1571/// use bed_reader::{Bed, WriteOptions, sample_bed_file};
1572/// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
1573///
1574/// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
1575/// let metadata = bed.metadata()?;
1576/// let shape = bed.dim()?;
1577///
1578/// let mut rng = StdRng::seed_from_u64(0);
1579/// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
1580///
1581/// let temp_out = temp_testdir::TempDir::default();
1582/// let output_file = temp_out.join("random.bed");
1583/// WriteOptions::builder(output_file)
1584///     .metadata(&metadata)
1585///     .missing_value(-1)
1586///     .write(&val)?;
1587/// # use bed_reader::BedErrorPlus;
1588/// # Ok::<(), Box<BedErrorPlus>>(())
1589/// ```
1590#[derive(Clone, Debug, Builder, PartialEq)]
1591#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1592pub struct Metadata {
1593    #[builder(setter(custom))]
1594    #[builder(default = "None")]
1595    fid: Option<Rc<nd::Array1<String>>>,
1596    #[builder(setter(custom))]
1597    #[builder(default = "None")]
1598    iid: Option<Rc<nd::Array1<String>>>,
1599    #[builder(setter(custom))]
1600    #[builder(default = "None")]
1601    father: Option<Rc<nd::Array1<String>>>,
1602    #[builder(setter(custom))]
1603    #[builder(default = "None")]
1604    mother: Option<Rc<nd::Array1<String>>>,
1605
1606    // i32 based on https://www.cog-genomics.org/plink2/formats#bim
1607    #[builder(setter(custom))]
1608    #[builder(default = "None")]
1609    sex: Option<Rc<nd::Array1<i32>>>,
1610    #[builder(setter(custom))]
1611    #[builder(default = "None")]
1612    pheno: Option<Rc<nd::Array1<String>>>,
1613
1614    #[builder(setter(custom))]
1615    #[builder(default = "None")]
1616    chromosome: Option<Rc<nd::Array1<String>>>,
1617    #[builder(setter(custom))]
1618    #[builder(default = "None")]
1619    sid: Option<Rc<nd::Array1<String>>>,
1620    #[builder(setter(custom))]
1621    #[builder(default = "None")]
1622    cm_position: Option<Rc<nd::Array1<f32>>>,
1623    #[builder(setter(custom))]
1624    #[builder(default = "None")]
1625    bp_position: Option<Rc<nd::Array1<i32>>>,
1626    #[builder(setter(custom))]
1627    #[builder(default = "None")]
1628    allele_1: Option<Rc<nd::Array1<String>>>,
1629    #[builder(setter(custom))]
1630    #[builder(default = "None")]
1631    allele_2: Option<Rc<nd::Array1<String>>>,
1632}
1633
1634fn lazy_or_skip_count<T>(array: &Option<Rc<nd::Array1<T>>>) -> Option<usize> {
1635    array.as_ref().map(|array| array.len())
1636}
1637
1638/// Represents a PLINK .bed file that is open for reading genotype data and metadata.
1639///
1640/// Construct with [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder).
1641///
1642/// > For reading cloud files, see [`BedCloud`](struct.BedCloud.html).
1643///
1644/// # Example
1645///
1646/// Open a file for reading. Then, read the individual (sample) ids
1647/// and all the genotype data.
1648/// ```
1649/// use ndarray as nd;
1650/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
1651/// use bed_reader::assert_eq_nan;
1652///
1653/// let file_name = sample_bed_file("small.bed")?;
1654/// let mut bed = Bed::new(file_name)?;
1655/// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1656/// let val = ReadOptions::builder().f64().read(&mut bed)?;
1657///
1658/// assert_eq_nan(
1659///     &val,
1660///     &nd::array![
1661///         [1.0, 0.0, f64::NAN, 0.0],
1662///         [2.0, 0.0, f64::NAN, 2.0],
1663///         [0.0, 1.0, 2.0, 0.0]
1664///     ],
1665/// );
1666/// # use bed_reader::BedErrorPlus;
1667/// # Ok::<(), Box<BedErrorPlus>>(())
1668/// ```
1669#[derive(Clone, Debug, Builder)]
1670#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1671pub struct Bed {
1672    // https://stackoverflow.com/questions/32730714/what-is-the-right-way-to-store-an-immutable-path-in-a-struct
1673    // don't emit a setter, but keep the field declaration on the builder
1674    /// The file name or path of the .bed file.
1675    #[builder(setter(custom))]
1676    path: PathBuf,
1677
1678    #[builder(setter(custom))]
1679    #[builder(default = "None")]
1680    fam_path: Option<PathBuf>,
1681
1682    #[builder(setter(custom))]
1683    #[builder(default = "None")]
1684    bim_path: Option<PathBuf>,
1685
1686    #[builder(setter(custom))]
1687    #[builder(default = "true")]
1688    is_checked_early: bool,
1689
1690    #[builder(setter(custom))]
1691    #[builder(default = "None")]
1692    iid_count: Option<usize>,
1693
1694    #[builder(setter(custom))]
1695    #[builder(default = "None")]
1696    sid_count: Option<usize>,
1697
1698    #[builder(setter(custom))]
1699    metadata: Metadata,
1700
1701    #[builder(setter(custom))]
1702    skip_set: HashSet<MetadataFields>,
1703}
1704
1705/// All Metadata fields.
1706///
1707/// Used by [`Metadata::read_fam`](struct.Metadata.html#method.read_fam) and
1708/// [`Metadata::read_bim`](struct.Metadata.html#method.read_bim) to skip reading
1709/// specified metadata fields.
1710#[derive(Debug, PartialEq, Eq, Copy, Clone, Ord, PartialOrd, Hash)]
1711pub enum MetadataFields {
1712    #[allow(missing_docs)]
1713    Fid,
1714    #[allow(missing_docs)]
1715    Iid,
1716    #[allow(missing_docs)]
1717    Father,
1718    #[allow(missing_docs)]
1719    Mother,
1720    #[allow(missing_docs)]
1721    Sex,
1722    #[allow(missing_docs)]
1723    Pheno,
1724    #[allow(missing_docs)]
1725    Chromosome,
1726    #[allow(missing_docs)]
1727    Sid,
1728    #[allow(missing_docs)]
1729    CmPosition,
1730    #[allow(missing_docs)]
1731    BpPosition,
1732    #[allow(missing_docs)]
1733    Allele1,
1734    #[allow(missing_docs)]
1735    Allele2,
1736}
1737
1738impl BedBuilder {
1739    #[anyinput]
1740    fn new(path: AnyPath) -> Self {
1741        Self {
1742            path: Some(path.to_owned()),
1743            fam_path: None,
1744            bim_path: None,
1745
1746            is_checked_early: None,
1747            iid_count: None,
1748            sid_count: None,
1749
1750            metadata: Some(Metadata::new()),
1751            skip_set: Some(HashSet::new()),
1752        }
1753    }
1754
1755    /// Create a [`Bed`](struct.Bed.html) from the builder.
1756    ///
1757    /// > See [`Bed::builder`](struct.Bed.html#method.builder) for more details and examples.
1758    pub fn build(&self) -> Result<Bed, Box<BedErrorPlus>> {
1759        let mut bed = self.build_no_file_check()?;
1760
1761        if bed.is_checked_early {
1762            open_and_check(&bed.path)?;
1763        }
1764
1765        (bed.iid_count, bed.sid_count) = bed.metadata.check_counts(bed.iid_count, bed.sid_count)?;
1766
1767        Ok(bed)
1768    }
1769
1770    // https://stackoverflow.com/questions/38183551/concisely-initializing-a-vector-of-strings
1771    // https://stackoverflow.com/questions/65250496/how-to-convert-intoiteratoritem-asrefstr-to-iteratoritem-str-in-rust
1772
1773    /// Override the family id (fid) values found in the .fam file.
1774    ///
1775    /// By default, if fid values are needed and haven't already been found,
1776    /// they will be read from the .fam file.
1777    /// Providing them here avoids that file read and provides a way to give different values.
1778    #[anyinput]
1779    #[must_use]
1780    pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
1781        // Unwrap will always work because BedBuilder starting with some metadata
1782        self.metadata.as_mut().unwrap().set_fid(fid);
1783        self
1784    }
1785
1786    /// Override the individual id (iid) values found in the .fam file.
1787    ///
1788    /// By default, if iid values are needed and haven't already been found,
1789    /// they will be read from the .fam file.
1790    /// Providing them here avoids that file read and provides a way to give different values.
1791    /// ```
1792    /// use ndarray as nd;
1793    /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
1794    /// let file_name = sample_bed_file("small.bed")?;
1795    /// use bed_reader::ReadOptions;
1796    ///
1797    /// let mut bed = Bed::builder(file_name)
1798    ///    .iid(["sample1", "sample2", "sample3"])
1799    ///    .build()?;
1800    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1801    /// # use bed_reader::BedErrorPlus;
1802    /// # Ok::<(), Box<BedErrorPlus>>(())
1803    /// ```
1804    #[anyinput]
1805    #[must_use]
1806    pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
1807        // Unwrap will always work because BedBuilder starting with some metadata
1808        self.metadata.as_mut().unwrap().set_iid(iid);
1809        self
1810    }
1811
1812    /// Override the father values found in the .fam file.
1813    ///
1814    /// By default, if father values are needed and haven't already been found,
1815    /// they will be read from the .fam file.
1816    /// Providing them here avoids that file read and provides a way to gi&ve different values.
1817    #[anyinput]
1818    #[must_use]
1819    pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
1820        // Unwrap will always work because BedBuilder starting with some metadata
1821        self.metadata.as_mut().unwrap().set_father(father);
1822        self
1823    }
1824
1825    /// Override the mother values found in the .fam file.
1826    ///
1827    /// By default, if mother values are needed and haven't already been found,
1828    /// they will be read from the .fam file.
1829    /// Providing them here avoids that file read and provides a way to give different values.
1830    #[anyinput]
1831    #[must_use]
1832    pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
1833        // Unwrap will always work because BedBuilder starting with some metadata
1834        self.metadata.as_mut().unwrap().set_mother(mother);
1835        self
1836    }
1837
1838    /// Override the sex values found in the .fam file.
1839    ///
1840    /// By default, if sex values are needed and haven't already been found,
1841    /// they will be read from the .fam file.
1842    /// Providing them here avoids that file read and provides a way to give different values.
1843    #[anyinput]
1844    #[must_use]
1845    pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
1846        // Unwrap will always work because BedBuilder starting with some metadata
1847        self.metadata.as_mut().unwrap().set_sex(sex);
1848        self
1849    }
1850
1851    /// Override the phenotype values found in the .fam file.
1852    ///
1853    /// Note that the phenotype values in the .fam file are seldom used.
1854    /// By default, if phenotype values are needed and haven't already been found,
1855    /// they will be read from the .fam file.
1856    /// Providing them here avoids that file read and provides a way to give different values.
1857    #[anyinput]
1858    #[must_use]
1859    pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
1860        // Unwrap will always work because BedBuilder starting with some metadata
1861        self.metadata.as_mut().unwrap().set_pheno(pheno);
1862        self
1863    }
1864
1865    /// Override the chromosome values found in the .bim file.
1866    ///
1867    /// By default, if chromosome values are needed and haven't already been found,
1868    /// they will be read from the .bim file.
1869    /// Providing them here avoids that file read and provides a way to give different values.
1870    #[anyinput]
1871    #[must_use]
1872    pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
1873        // Unwrap will always work because BedBuilder starting with some metadata
1874        self.metadata.as_mut().unwrap().set_chromosome(chromosome);
1875        self
1876    }
1877
1878    /// Override the SNP id (sid) values found in the .fam file.
1879    ///
1880    /// By default, if sid values are needed and haven't already been found,
1881    /// they will be read from the .bim file.
1882    /// Providing them here avoids that file read and provides a way to give different values.
1883    /// ```
1884    /// use ndarray as nd;
1885    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
1886    /// let file_name = sample_bed_file("small.bed")?;
1887    ///
1888    /// let mut bed = Bed::builder(file_name)
1889    ///    .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
1890    ///    .build()?;
1891    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["SNP1", "SNP2", "SNP3", "SNP4"]
1892    /// # use bed_reader::BedErrorPlus;
1893    /// # Ok::<(), Box<BedErrorPlus>>(())
1894    /// ```
1895    #[anyinput]
1896    #[must_use]
1897    pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
1898        self.metadata.as_mut().unwrap().set_sid(sid);
1899        self
1900    }
1901
1902    /// Override the centimorgan position values found in the .bim file.
1903    ///
1904    /// By default, if centimorgan position values are needed and haven't already been found,
1905    /// they will be read from the .bim file.
1906    /// Providing them here avoids that file read and provides a way to give different values.
1907    #[anyinput]
1908    #[must_use]
1909    pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
1910        // Unwrap will always work because BedBuilder starting with some metadata
1911        self.metadata.as_mut().unwrap().set_cm_position(cm_position);
1912        self
1913    }
1914
1915    /// Override the base-pair position values found in the .bim file.
1916    ///
1917    /// By default, if base-pair position values are needed and haven't already been found,
1918    /// they will be read from the .bim file.
1919    /// Providing them here avoids that file read and provides a way to give different values.
1920    #[anyinput]
1921    #[must_use]
1922    pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
1923        // Unwrap will always work because BedBuilder starting with some metadata
1924        self.metadata.as_mut().unwrap().set_bp_position(bp_position);
1925        self
1926    }
1927
1928    /// Override the allele 1 values found in the .bim file.
1929    ///
1930    /// By default, if allele 1 values are needed and haven't already been found,
1931    /// they will be read from the .bim file.
1932    /// Providing them here avoids that file read and provides a way to give different values.
1933    #[anyinput]
1934    #[must_use]
1935    pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
1936        // Unwrap will always work because BedBuilder starting with some metadata
1937        self.metadata.as_mut().unwrap().set_allele_1(allele_1);
1938        self
1939    }
1940
1941    /// Override the allele 2 values found in the .bim file.
1942    ///
1943    /// By default, if allele 2 values are needed and haven't already been found,
1944    /// they will be read from the .bim file.
1945    /// Providing them here avoids that file read and provides a way to give different values.
1946    #[anyinput]
1947    #[must_use]
1948    pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
1949        // Unwrap will always work because BedBuilder starting with some metadata
1950        self.metadata.as_mut().unwrap().set_allele_2(allele_2);
1951        self
1952    }
1953
1954    /// Set the number of individuals (samples) in the data.
1955    ///
1956    /// By default, if this number is needed, it will be found
1957    /// and remembered
1958    /// by opening the .fam file and quickly counting the number
1959    /// of lines. Providing the number thus avoids a file read.
1960    #[must_use]
1961    pub fn iid_count(mut self, count: usize) -> Self {
1962        self.iid_count = Some(Some(count));
1963        self
1964    }
1965
1966    /// Set the number of SNPs in the data.
1967    ///
1968    /// By default, if this number is needed, it will be found
1969    /// and remembered
1970    /// by opening the .bim file and quickly counting the number
1971    /// of lines. Providing the number thus avoids a file read.
1972    #[must_use]
1973    pub fn sid_count(mut self, count: usize) -> Self {
1974        self.sid_count = Some(Some(count));
1975        self
1976    }
1977
1978    /// Don't check the header of the .bed file until and unless the file is actually read.
1979    ///
1980    /// By default, when a [`Bed`](struct.Bed.html) struct is created, the .bed
1981    /// file header is checked. This stops that early check.
1982    #[must_use]
1983    pub fn skip_early_check(mut self) -> Self {
1984        self.is_checked_early = Some(false);
1985        self
1986    }
1987
1988    /// Set the path to the .fam file.
1989    ///
1990    /// If not set, the .fam file will be assumed
1991    /// to have the same name as the .bed file, but with the extension .fam.
1992    ///
1993    /// # Example:
1994    /// Read .bed, .fam, and .bim files with non-standard names.
1995    /// ```
1996    /// use bed_reader::{Bed, ReadOptions, sample_files};
1997    /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
1998    /// let mut bed = Bed::builder(&deb_maf_mib[0])
1999    ///    .fam_path(&deb_maf_mib[1])
2000    ///    .bim_path(&deb_maf_mib[2])
2001    ///    .build()?;
2002    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2003    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2004    /// # use bed_reader::BedErrorPlus;
2005    /// # Ok::<(), Box<BedErrorPlus>>(())
2006    /// ```
2007    #[anyinput]
2008    #[must_use]
2009    pub fn fam_path(mut self, path: AnyPath) -> Self {
2010        self.fam_path = Some(Some(path.to_owned()));
2011        self
2012    }
2013
2014    /// Set the path to the .bim file.
2015    ///
2016    /// If not set, the .bim file will be assumed
2017    /// to have the same name as the .bed file, but with the extension .bim.
2018    ///
2019    /// # Example:
2020    /// Read .bed, .fam, and .bim files with non-standard names.
2021    /// ```
2022    /// use bed_reader::{Bed, ReadOptions, sample_files};
2023    /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
2024    /// let mut bed = Bed::builder(&deb_maf_mib[0])
2025    ///    .fam_path(&deb_maf_mib[1])
2026    ///    .bim_path(&deb_maf_mib[2])
2027    ///    .build()?;
2028    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2029    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2030    /// # use bed_reader::BedErrorPlus;
2031    /// # Ok::<(), Box<BedErrorPlus>>(())
2032    /// ```
2033    #[must_use]
2034    #[anyinput]
2035    pub fn bim_path(mut self, path: AnyPath) -> Self {
2036        self.bim_path = Some(Some(path.to_owned()));
2037        self
2038    }
2039
2040    /// Don't read the fid information from the .fam file.
2041    ///
2042    /// By default, when the .fam is read, the fid (the family id) is recorded.
2043    /// This stops that recording. This is useful if the fid is not needed.
2044    /// Asking for the fid after skipping it results in an error.    
2045    #[must_use]
2046    pub fn skip_fid(mut self) -> Self {
2047        // Unwrap will always work because BedBuilder starting with some skip_set
2048        self.skip_set.as_mut().unwrap().insert(MetadataFields::Fid);
2049        self
2050    }
2051
2052    /// Don't read the iid information from the .fam file.
2053    ///
2054    /// By default, when the .fam is read, the iid (the individual id) is recorded.
2055    /// This stops that recording. This is useful if the iid is not needed.
2056    /// Asking for the iid after skipping it results in an error.
2057    #[must_use]
2058    pub fn skip_iid(mut self) -> Self {
2059        // Unwrap will always work because BedBuilder starting with some skip_set
2060        self.skip_set.as_mut().unwrap().insert(MetadataFields::Iid);
2061        self
2062    }
2063
2064    /// Don't read the father information from the .fam file.
2065    ///
2066    /// By default, when the .fam is read, the father id is recorded.
2067    /// This stops that recording. This is useful if the father id is not needed.
2068    /// Asking for the father id after skipping it results in an error.    
2069    #[must_use]
2070    pub fn skip_father(mut self) -> Self {
2071        // Unwrap will always work because BedBuilder starting with some skip_set
2072        self.skip_set
2073            .as_mut()
2074            .unwrap()
2075            .insert(MetadataFields::Father);
2076        self
2077    }
2078
2079    /// Don't read the mother information from the .fam file.
2080    ///
2081    /// By default, when the .fam is read, the mother id is recorded.
2082    /// This stops that recording. This is useful if the mother id is not needed.
2083    /// Asking for the mother id after skipping it results in an error.    
2084    #[must_use]
2085    pub fn skip_mother(mut self) -> Self {
2086        // Unwrap will always work because BedBuilder starting with some skip_set
2087        self.skip_set
2088            .as_mut()
2089            .unwrap()
2090            .insert(MetadataFields::Mother);
2091        self
2092    }
2093
2094    /// Don't read the sex information from the .fam file.
2095    ///
2096    /// By default, when the .fam is read, the sex is recorded.
2097    /// This stops that recording. This is useful if sex is not needed.
2098    /// Asking for sex after skipping it results in an error.    
2099    #[must_use]
2100    pub fn skip_sex(mut self) -> Self {
2101        // Unwrap will always work because BedBuilder starting with some skip_set
2102        self.skip_set.as_mut().unwrap().insert(MetadataFields::Sex);
2103        self
2104    }
2105
2106    /// Don't read the phenotype information from the .fam file.
2107    ///
2108    /// Note that the phenotype information in the .fam file is
2109    /// seldom used.
2110    ///
2111    /// By default, when the .fam is read, the phenotype is recorded.
2112    /// This stops that recording. This is useful if this phenotype
2113    /// information is not needed.
2114    /// Asking for the phenotype after skipping it results in an error.    
2115    #[must_use]
2116    pub fn skip_pheno(mut self) -> Self {
2117        // Unwrap will always work because BedBuilder starting with some skip_set
2118        self.skip_set
2119            .as_mut()
2120            .unwrap()
2121            .insert(MetadataFields::Pheno);
2122        self
2123    }
2124
2125    /// Don't read the chromosome information from the .bim file.
2126    ///
2127    /// By default, when the .bim is read, the chromosome is recorded.
2128    /// This stops that recording. This is useful if the chromosome is not needed.
2129    /// Asking for the chromosome after skipping it results in an error.    
2130    #[must_use]
2131    pub fn skip_chromosome(mut self) -> Self {
2132        // Unwrap will always work because BedBuilder starting with some skip_set
2133        self.skip_set
2134            .as_mut()
2135            .unwrap()
2136            .insert(MetadataFields::Chromosome);
2137        self
2138    }
2139
2140    /// Don't read the SNP id information from the .bim file.
2141    ///
2142    /// By default, when the .bim is read, the sid (SNP id) is recorded.
2143    /// This stops that recording. This is useful if the sid is not needed.
2144    /// Asking for the sid after skipping it results in an error.    
2145    #[must_use]
2146    pub fn skip_sid(mut self) -> Self {
2147        // Unwrap will always work because BedBuilder starting with some skip_set
2148        self.skip_set.as_mut().unwrap().insert(MetadataFields::Sid);
2149        self
2150    }
2151
2152    /// Don't read the centimorgan position information from the .bim file.
2153    ///
2154    /// By default, when the .bim is read, the cm position is recorded.
2155    /// This stops that recording. This is useful if the cm position is not needed.
2156    /// Asking for the cm position after skipping it results in an error.    
2157    #[must_use]
2158    pub fn skip_cm_position(mut self) -> Self {
2159        // Unwrap will always work because BedBuilder starting with some skip_set
2160        self.skip_set
2161            .as_mut()
2162            .unwrap()
2163            .insert(MetadataFields::CmPosition);
2164        self
2165    }
2166
2167    /// Don't read the base-pair position information from the .bim file.
2168    ///
2169    /// By default, when the .bim is read, the bp position is recorded.
2170    /// This stops that recording. This is useful if the bp position is not needed.
2171    /// Asking for the cp position after skipping it results in an error.    
2172    #[must_use]
2173    pub fn skip_bp_position(mut self) -> Self {
2174        // Unwrap will always work because BedBuilder starting with some skip_set
2175        self.skip_set
2176            .as_mut()
2177            .unwrap()
2178            .insert(MetadataFields::BpPosition);
2179        self
2180    }
2181
2182    /// Don't read the allele 1 information from the .bim file.
2183    ///
2184    /// By default, when the .bim is read, allele 1 is recorded.
2185    /// This stops that recording. This is useful if allele 1 is not needed.
2186    /// Asking for allele 1 after skipping it results in an error.    
2187    #[must_use]
2188    pub fn skip_allele_1(mut self) -> Self {
2189        // Unwrap will always work because BedBuilder starting with some skip_set
2190        self.skip_set
2191            .as_mut()
2192            .unwrap()
2193            .insert(MetadataFields::Allele1);
2194        self
2195    }
2196
2197    /// Don't read the allele 2 information from the .bim file.
2198    ///
2199    /// By default, when the .bim is read, allele 2 is recorded.
2200    /// This stops that recording. This is useful if allele 2 is not needed.
2201    /// Asking for allele 2 after skipping it results in an error.    
2202    #[must_use]
2203    pub fn skip_allele_2(mut self) -> Self {
2204        // Unwrap will always work because BedBuilder starting with some skip_set
2205        self.skip_set
2206            .as_mut()
2207            .unwrap()
2208            .insert(MetadataFields::Allele2);
2209        self
2210    }
2211
2212    /// Override the metadata in the .fam and .bim files with info merged in from a [`Metadata`](struct.Metadata.html).
2213    ///
2214    /// # Example
2215    ///
2216    /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
2217    /// and sid arrays. Next, we use [`BedBuilder`](struct.BedBuilder.html) to override the fid array
2218    /// and an iid array. Then, we add the metadata to the [`BedBuilder`](struct.BedBuilder.html),
2219    /// overwriting iid (again) and overriding sid. Finally, we print these
2220    /// three arrays and chromosome. Chromosome was never overridden so
2221    /// it is read from the *.bim file.
2222    ///```
2223    /// use ndarray as nd;
2224    /// use bed_reader::{Bed, Metadata, sample_bed_file};
2225    ///
2226    /// let file_name = sample_bed_file("small.bed")?;
2227    /// let metadata = Metadata::builder()
2228    ///     .iid(["i1", "i2", "i3"])
2229    ///     .sid(["s1", "s2", "s3", "s4"])
2230    ///     .build()?;
2231    /// let mut bed = Bed::builder(file_name)
2232    ///     .fid(["f1", "f2", "f3"])
2233    ///     .iid(["x1", "x2", "x3"])
2234    ///     .metadata(&metadata)
2235    ///     .build()?;
2236    /// println!("{0:?}", bed.fid()?);  // Outputs ndarray ["f1", "f2", "f3"]
2237    /// println!("{0:?}", bed.iid()?);  // Outputs ndarray ["i1", "i2", "i3"]
2238    /// println!("{0:?}", bed.sid()?);  // Outputs ndarray ["s1", "s2", "s3", "s4"]
2239    /// println!("{0:?}", bed.chromosome()?);  // Outputs ndarray ["1", "1", "5", "Y"]
2240    /// # use bed_reader::BedErrorPlus;
2241    /// # Ok::<(), Box<BedErrorPlus>>(())
2242    /// ```
2243    #[must_use]
2244    pub fn metadata(mut self, metadata: &Metadata) -> Self {
2245        self.metadata = Some(
2246            Metadata::builder()
2247                .metadata(&self.metadata.unwrap()) // unwrap is ok because we know we have metadata
2248                .metadata(metadata) // consistent counts will be check later by the BedBuilder
2249                .build_no_file_check()
2250                .unwrap(), // unwrap is ok because nothing can go wrong
2251        );
2252
2253        self
2254    }
2255}
2256
2257#[anyinput]
2258fn to_metadata_path(
2259    bed_path: AnyPath,
2260    metadata_path: &Option<PathBuf>,
2261    extension: AnyString,
2262) -> PathBuf {
2263    if let Some(metadata_path) = metadata_path {
2264        metadata_path.to_owned()
2265    } else {
2266        bed_path.with_extension(extension)
2267    }
2268}
2269
2270impl Bed {
2271    /// Attempts to open a local PLINK .bed file for reading. Supports options.
2272    ///
2273    /// > Also see [`Bed::new`](struct.Bed.html#method.new), which does not support options.
2274    /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2275    ///
2276    /// The options, [listed here](struct.BedBuilder.html#implementations), can:
2277    ///  * set the path of the .fam and/or .bim file
2278    ///  * override some metadata, for example, replace the individual ids.
2279    ///  * set the number of individuals (samples) or SNPs (variants)
2280    ///  * control checking the validity of the .bed file's header
2281    ///  * skip reading selected metadata
2282    ///
2283    /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2284    /// will not necessarily lock the file(s).
2285    ///
2286    /// # Errors
2287    /// By default, this method will return an error if the file is missing or its header
2288    /// is ill-formed. It will also return an error if the options contradict each other.
2289    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2290    /// for all possible errors.
2291    ///
2292    /// # Examples
2293    /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2294    /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2295    /// then [`read`](struct.Bed.html#method.read) the whole file.
2296    ///
2297    /// ```
2298    /// use ndarray as nd;
2299    /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2300    ///
2301    /// let file_name = sample_bed_file("small.bed")?;
2302    /// let mut bed = Bed::builder(file_name).build()?;
2303    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2304    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
2305    /// let val = bed.read::<f64>()?;
2306    ///
2307    /// assert_eq_nan(
2308    ///     &val,
2309    ///     &nd::array![
2310    ///         [1.0, 0.0, f64::NAN, 0.0],
2311    ///         [2.0, 0.0, f64::NAN, 2.0],
2312    ///         [0.0, 1.0, 2.0, 0.0]
2313    ///     ],
2314    /// );
2315    /// # use bed_reader::BedErrorPlus;
2316    /// # Ok::<(), Box<BedErrorPlus>>(())
2317    /// ```
2318    ///
2319    /// Replace [`iid`](struct.Bed.html#method.iid).
2320    /// ```
2321    /// # use ndarray as nd;
2322    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2323    /// # let file_name = sample_bed_file("small.bed")?;
2324    /// let mut bed = Bed::builder(file_name)
2325    ///    .iid(["sample1", "sample2", "sample3"])
2326    ///    .build()?;
2327    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
2328    /// # use bed_reader::BedErrorPlus;
2329    /// # Ok::<(), Box<BedErrorPlus>>(())
2330    /// ```
2331    /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
2332    /// .bim files need never be opened.
2333    /// ```
2334    /// # use ndarray as nd;
2335    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2336    /// # let file_name = sample_bed_file("small.bed")?;
2337    /// let mut bed = Bed::builder(file_name).iid_count(3).sid_count(4).build()?;
2338    /// let val = bed.read::<f64>()?;
2339    ///
2340    /// assert_eq_nan(
2341    ///     &val,
2342    ///     &nd::array![
2343    ///         [1.0, 0.0, f64::NAN, 0.0],
2344    ///         [2.0, 0.0, f64::NAN, 2.0],
2345    ///         [0.0, 1.0, 2.0, 0.0]
2346    ///     ],
2347    /// );
2348    /// # use bed_reader::BedErrorPlus;
2349    /// # Ok::<(), Box<BedErrorPlus>>(())
2350    /// ```
2351    /// Mark some properties as "don’t read or offer".
2352    /// ```
2353    /// # use ndarray as nd;
2354    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2355    /// # let file_name = sample_bed_file("small.bed")?;
2356    /// let mut bed = Bed::builder(file_name)
2357    ///     .skip_father()
2358    ///     .skip_mother()
2359    ///     .skip_sex()
2360    ///     .skip_pheno()
2361    ///     .skip_allele_1()
2362    ///     .skip_allele_2()
2363    ///     .build()?;
2364    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2365    /// bed.allele_2().expect_err("Can't be read");
2366    /// # use bed_reader::BedErrorPlus;
2367    /// # Ok::<(), Box<BedErrorPlus>>(())
2368    /// ```
2369    ///
2370    #[anyinput]
2371    pub fn builder(path: AnyPath) -> BedBuilder {
2372        BedBuilder::new(path)
2373    }
2374
2375    /// Attempts to open a local PLINK .bed file for reading. Does not support options.
2376    ///
2377    /// > Also see [`Bed::builder`](struct.Bed.html#method.builder), which does support options.
2378    /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2379    ///
2380    /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2381    /// will not necessarily lock the file(s).
2382    ///
2383    /// # Errors
2384    /// By default, this method will return an error if the file is missing or its header
2385    /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2386    /// for all possible errors.
2387    ///
2388    /// # Examples
2389    /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2390    /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2391    /// then [`read`](struct.Bed.html#method.read) the whole file.
2392    ///
2393    /// ```
2394    /// use ndarray as nd;
2395    /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2396    ///
2397    /// let file_name = sample_bed_file("small.bed")?;
2398    /// let mut bed = Bed::new(file_name)?;
2399    /// println!("{:?}", bed.iid()?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
2400    /// println!("{:?}", bed.sid()?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
2401    /// let val = bed.read::<f64>()?;
2402    ///
2403    /// assert_eq_nan(
2404    ///     &val,
2405    ///     &nd::array![
2406    ///         [1.0, 0.0, f64::NAN, 0.0],
2407    ///         [2.0, 0.0, f64::NAN, 2.0],
2408    ///         [0.0, 1.0, 2.0, 0.0]
2409    ///     ],
2410    /// );
2411    /// # use bed_reader::BedErrorPlus;
2412    /// # Ok::<(), Box<BedErrorPlus>>(())
2413    /// ```
2414    ///
2415    /// Open the file and read data for one SNP (variant)
2416    /// at index position 2.
2417    /// ```
2418    /// # use ndarray as nd;
2419    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2420    /// # let file_name = sample_bed_file("small.bed")?;
2421    ///
2422    /// let mut bed = Bed::new(file_name)?;
2423    /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
2424    ///
2425    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2426    /// # use bed_reader::BedErrorPlus;
2427    /// # Ok::<(), Box<BedErrorPlus>>(())
2428    /// ```
2429    #[anyinput]
2430    pub fn new(path: AnyPath) -> Result<Self, Box<BedErrorPlus>> {
2431        Bed::builder(path).build()
2432    }
2433
2434    /// Number of individuals (samples)
2435    ///
2436    /// If this number is needed, it will be found
2437    /// by opening the .fam file and quickly counting the number
2438    /// of lines. Once found, the number will be remembered.
2439    /// The file read can be avoided by setting the
2440    /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2441    /// or, for example, [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2442    ///
2443    /// # Example:
2444    /// ```
2445    /// use ndarray as nd;
2446    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2447    ///
2448    /// let file_name = sample_bed_file("small.bed")?;
2449    /// let mut bed = Bed::new(file_name)?;
2450    /// let iid_count = bed.iid_count()?;
2451    ///
2452    /// assert!(iid_count == 3);
2453    /// # use bed_reader::BedErrorPlus;
2454    /// # Ok::<(), Box<BedErrorPlus>>(())
2455    pub fn iid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2456        if let Some(iid_count) = self.iid_count {
2457            Ok(iid_count)
2458        } else {
2459            let fam_path = self.fam_path();
2460            let iid_count = count_lines(fam_path)?;
2461            self.iid_count = Some(iid_count);
2462            Ok(iid_count)
2463        }
2464    }
2465
2466    /// Number of SNPs (variants)
2467    ///
2468    /// If this number is needed, it will be found
2469    /// by opening the .bim file and quickly counting the number
2470    /// of lines. Once found, the number will be remembered.
2471    /// The file read can be avoided by setting the
2472    /// number with [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count)
2473    /// or, for example, [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2474    ///
2475    /// # Example:
2476    /// ```
2477    /// use ndarray as nd;
2478    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2479    ///
2480    /// let file_name = sample_bed_file("small.bed")?;
2481    /// let mut bed = Bed::new(file_name)?;
2482    /// let sid_count = bed.sid_count()?;
2483    ///
2484    /// assert!(sid_count == 4);
2485    /// # use bed_reader::BedErrorPlus;
2486    /// # Ok::<(), Box<BedErrorPlus>>(())
2487    pub fn sid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2488        if let Some(sid_count) = self.sid_count {
2489            Ok(sid_count)
2490        } else {
2491            let bim_path = self.bim_path();
2492            let sid_count = count_lines(bim_path)?;
2493            self.sid_count = Some(sid_count);
2494            Ok(sid_count)
2495        }
2496    }
2497
2498    /// Number of individuals (samples) and SNPs (variants)
2499    ///
2500    /// If these numbers aren't known, they will be found
2501    /// by opening the .fam and .bim files and quickly counting the number
2502    /// of lines. Once found, the numbers will be remembered.
2503    /// The file read can be avoided by setting the
2504    /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2505    /// and [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count).
2506    ///
2507    /// # Example:
2508    /// ```
2509    /// use ndarray as nd;
2510    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2511    /// use bed_reader::assert_eq_nan;
2512    ///
2513    /// let file_name = sample_bed_file("small.bed")?;
2514    /// let mut bed = Bed::new(file_name)?;
2515    /// let dim = bed.dim()?;
2516    ///
2517    /// assert!(dim == (3,4));
2518    /// # use bed_reader::BedErrorPlus;
2519    /// # Ok::<(), Box<BedErrorPlus>>(())
2520    pub fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
2521        Ok((self.iid_count()?, self.sid_count()?))
2522    }
2523
2524    /// Family id of each of individual (sample)
2525    ///
2526    /// If this ndarray is needed, it will be found
2527    /// by reading the .fam file. Once found, this ndarray
2528    /// and other information in the .fam file will be remembered.
2529    /// The file read can be avoided by setting the
2530    /// array with [`BedBuilder::fid`](struct.BedBuilder.html#method.fid).
2531    ///
2532    /// # Example:
2533    /// ```
2534    /// use ndarray as nd;
2535    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2536    /// use bed_reader::assert_eq_nan;
2537    ///
2538    /// let file_name = sample_bed_file("small.bed")?;
2539    /// let mut bed = Bed::new(file_name)?;
2540    /// let fid = bed.fid()?;
2541    /// println!("{fid:?}"); // Outputs ndarray ["fid1", "fid1", "fid2"]
2542    /// # use bed_reader::BedErrorPlus;
2543    /// # Ok::<(), Box<BedErrorPlus>>(())
2544    pub fn fid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2545        self.unlazy_fam::<String>(self.metadata.fid.is_none(), MetadataFields::Fid, "fid")?;
2546        Ok(self.metadata.fid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2547    }
2548
2549    /// Individual id of each of individual (sample)
2550    ///
2551    /// If this ndarray is needed, it will be found
2552    /// by reading the .fam file. Once found, this ndarray
2553    /// and other information in the .fam file will be remembered.
2554    /// The file read can be avoided by setting the
2555    /// array with [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2556    ///
2557    /// # Example:
2558    /// ```
2559    /// use ndarray as nd;
2560    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2561    /// use bed_reader::assert_eq_nan;
2562    ///
2563    /// let file_name = sample_bed_file("small.bed")?;
2564    /// let mut bed = Bed::new(file_name)?;
2565    /// let iid = bed.iid()?;    ///
2566    /// println!("{iid:?}"); // Outputs ndarray ["iid1", "iid2", "iid3"]
2567    /// # use bed_reader::BedErrorPlus;
2568    /// # Ok::<(), Box<BedErrorPlus>>(())
2569    pub fn iid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2570        self.unlazy_fam::<String>(self.metadata.iid.is_none(), MetadataFields::Iid, "iid")?;
2571        Ok(self.metadata.iid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2572    }
2573
2574    /// Father id of each of individual (sample)
2575    ///
2576    /// If this ndarray is needed, it will be found
2577    /// by reading the .fam file. Once found, this ndarray
2578    /// and other information in the .fam file will be remembered.
2579    /// The file read can be avoided by setting the
2580    /// array with [`BedBuilder::father`](struct.BedBuilder.html#method.father).
2581    ///
2582    /// # Example:
2583    /// ```
2584    /// use ndarray as nd;
2585    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2586    /// use bed_reader::assert_eq_nan;
2587    ///
2588    /// let file_name = sample_bed_file("small.bed")?;
2589    /// let mut bed = Bed::new(file_name)?;
2590    /// let father = bed.father()?;
2591    /// println!("{father:?}"); // Outputs ndarray ["iid23", "iid23", "iid22"]
2592    /// # use bed_reader::BedErrorPlus;
2593    /// # Ok::<(), Box<BedErrorPlus>>(())    
2594    pub fn father(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2595        self.unlazy_fam::<String>(
2596            self.metadata.father.is_none(),
2597            MetadataFields::Father,
2598            "father",
2599        )?;
2600        Ok(self.metadata.father.as_ref().unwrap()) //unwrap always works because of lazy_fam
2601    }
2602
2603    /// Mother id of each of individual (sample)
2604    ///
2605    /// If this ndarray is needed, it will be found
2606    /// by reading the .fam file. Once found, this ndarray
2607    /// and other information in the .fam file will be remembered.
2608    /// The file read can be avoided by setting the
2609    /// array with [`BedBuilder::mother`](struct.BedBuilder.html#method.mother).
2610    ///
2611    /// # Example:
2612    /// ```
2613    /// use ndarray as nd;
2614    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2615    /// use bed_reader::assert_eq_nan;
2616    ///
2617    /// let file_name = sample_bed_file("small.bed")?;
2618    /// let mut bed = Bed::new(file_name)?;
2619    /// let mother = bed.mother()?;
2620    /// println!("{mother:?}"); // Outputs ndarray ["iid34", "iid34", "iid33"]
2621    /// # use bed_reader::BedErrorPlus;
2622    /// # Ok::<(), Box<BedErrorPlus>>(())
2623    pub fn mother(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2624        self.unlazy_fam::<String>(
2625            self.metadata.mother.is_none(),
2626            MetadataFields::Mother,
2627            "mother",
2628        )?;
2629        Ok(self.metadata.mother.as_ref().unwrap()) //unwrap always works because of lazy_fam
2630    }
2631
2632    /// Sex each of individual (sample)
2633    ///
2634    /// 0 is unknown, 1 is male, 2 is female
2635    ///
2636    /// If this ndarray is needed, it will be found
2637    /// by reading the .fam file. Once found, this ndarray
2638    /// and other information in the .fam file will be remembered.
2639    /// The file read can be avoided by setting the
2640    /// array with [`BedBuilder::sex`](struct.BedBuilder.html#method.sex).
2641    ///
2642    /// # Example:
2643    /// ```
2644    /// use ndarray as nd;
2645    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2646    /// use bed_reader::assert_eq_nan;
2647    ///
2648    /// let file_name = sample_bed_file("small.bed")?;
2649    /// let mut bed = Bed::new(file_name)?;
2650    /// let sex = bed.sex()?;
2651    /// println!("{sex:?}"); // Outputs ndarray [1, 2, 0]
2652    /// # use bed_reader::BedErrorPlus;
2653    /// # Ok::<(), Box<BedErrorPlus>>(())
2654    pub fn sex(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2655        self.unlazy_fam::<String>(self.metadata.sex.is_none(), MetadataFields::Sex, "sex")?;
2656        Ok(self.metadata.sex.as_ref().unwrap()) //unwrap always works because of lazy_fam
2657    }
2658
2659    /// A phenotype for each individual (seldom used)
2660    ///
2661    /// If this ndarray is needed, it will be found
2662    /// by reading the .fam file. Once found, this ndarray
2663    /// and other information in the .fam file will be remembered.
2664    /// The file read can be avoided by setting the
2665    /// array with [`BedBuilder::pheno`](struct.BedBuilder.html#method.pheno).
2666    ///
2667    /// # Example:
2668    /// ```
2669    /// use ndarray as nd;
2670    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2671    /// use bed_reader::assert_eq_nan;
2672    ///
2673    /// let file_name = sample_bed_file("small.bed")?;
2674    /// let mut bed = Bed::new(file_name)?;
2675    /// let pheno = bed.pheno()?;
2676    /// println!("{pheno:?}"); // Outputs ndarray ["red", "red", "blue"]
2677    /// # use bed_reader::BedErrorPlus;
2678    /// # Ok::<(), Box<BedErrorPlus>>(())
2679    pub fn pheno(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2680        self.unlazy_fam::<String>(
2681            self.metadata.pheno.is_none(),
2682            MetadataFields::Pheno,
2683            "pheno",
2684        )?;
2685        Ok(self.metadata.pheno.as_ref().unwrap()) //unwrap always works because of lazy_fam
2686    }
2687
2688    /// Chromosome of each SNP (variant)
2689    ///
2690    /// If this ndarray is needed, it will be found
2691    /// by reading the .bim file. Once found, this ndarray
2692    /// and other information in the .bim file will be remembered.
2693    /// The file read can be avoided by setting the
2694    /// array with [`BedBuilder::chromosome`](struct.BedBuilder.html#method.chromosome).
2695    ///
2696    /// # Example:
2697    /// ```
2698    /// use ndarray as nd;
2699    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2700    /// use bed_reader::assert_eq_nan;
2701    ///
2702    /// let file_name = sample_bed_file("small.bed")?;
2703    /// let mut bed = Bed::new(file_name)?;
2704    /// let chromosome = bed.chromosome()?;
2705    /// println!("{chromosome:?}"); // Outputs ndarray ["1", "1", "5", "Y"]
2706    /// # use bed_reader::BedErrorPlus;
2707    /// # Ok::<(), Box<BedErrorPlus>>(())
2708    pub fn chromosome(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2709        self.unlazy_bim::<String>(
2710            self.metadata.chromosome.is_none(),
2711            MetadataFields::Chromosome,
2712            "chromosome",
2713        )?;
2714        Ok(self.metadata.chromosome.as_ref().unwrap()) //unwrap always works because of lazy_bim
2715    }
2716
2717    /// SNP id of each SNP (variant)
2718    ///
2719    /// If this ndarray is needed, it will be found
2720    /// by reading the .bim file. Once found, this ndarray
2721    /// and other information in the .bim file will be remembered.
2722    /// The file read can be avoided by setting the
2723    /// array with [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2724    ///
2725    /// # Example:
2726    /// ```
2727    /// use ndarray as nd;
2728    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2729    /// use bed_reader::assert_eq_nan;
2730    ///
2731    /// let file_name = sample_bed_file("small.bed")?;
2732    /// let mut bed = Bed::new(file_name)?;
2733    /// let sid = bed.sid()?;
2734    /// println!("{sid:?}"); // Outputs ndarray "sid1", "sid2", "sid3", "sid4"]
2735    /// # use bed_reader::BedErrorPlus;
2736    /// # Ok::<(), Box<BedErrorPlus>>(())
2737    pub fn sid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2738        self.unlazy_bim::<String>(self.metadata.sid.is_none(), MetadataFields::Sid, "sid")?;
2739        Ok(self.metadata.sid.as_ref().unwrap()) //unwrap always works because of lazy_bim
2740    }
2741
2742    /// Centimorgan position of each SNP (variant)
2743    ///
2744    /// If this ndarray is needed, it will be found
2745    /// by reading the .bim file. Once found, this ndarray
2746    /// and other information in the .bim file will be remembered.
2747    /// The file read can be avoided by setting the
2748    /// array with [`BedBuilder::cm_position`](struct.BedBuilder.html#method.cm_position).
2749    ///
2750    /// # Example:
2751    /// ```
2752    /// use ndarray as nd;
2753    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2754    /// use bed_reader::assert_eq_nan;
2755    ///
2756    /// let file_name = sample_bed_file("small.bed")?;
2757    /// let mut bed = Bed::new(file_name)?;
2758    /// let cm_position = bed.cm_position()?;
2759    /// println!("{cm_position:?}"); // Outputs ndarray [100.4, 2000.5, 4000.7, 7000.9]
2760    /// # use bed_reader::BedErrorPlus;
2761    /// # Ok::<(), Box<BedErrorPlus>>(())
2762    pub fn cm_position(&mut self) -> Result<&nd::Array1<f32>, Box<BedErrorPlus>> {
2763        self.unlazy_bim::<String>(
2764            self.metadata.cm_position.is_none(),
2765            MetadataFields::CmPosition,
2766            "cm_position",
2767        )?;
2768        Ok(self.metadata.cm_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2769    }
2770
2771    /// Base-pair position of each SNP (variant)
2772    ///
2773    /// If this ndarray is needed, it will be found
2774    /// by reading the .bim file. Once found, this ndarray
2775    /// and other information in the .bim file will be remembered.
2776    /// The file read can be avoided by setting the
2777    /// array with [`BedBuilder::bp_position`](struct.BedBuilder.html#method.bp_position).
2778    ///
2779    /// # Example:
2780    /// ```
2781    /// use ndarray as nd;
2782    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2783    /// use bed_reader::assert_eq_nan;
2784    ///
2785    /// let file_name = sample_bed_file("small.bed")?;
2786    /// let mut bed = Bed::new(file_name)?;
2787    /// let bp_position = bed.bp_position()?;
2788    /// println!("{bp_position:?}"); // Outputs ndarray [1, 100, 1000, 1004]
2789    /// # use bed_reader::BedErrorPlus;
2790    /// # Ok::<(), Box<BedErrorPlus>>(())
2791    pub fn bp_position(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2792        self.unlazy_bim::<String>(
2793            self.metadata.bp_position.is_none(),
2794            MetadataFields::BpPosition,
2795            "bp_position",
2796        )?;
2797        Ok(self.metadata.bp_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2798    }
2799
2800    /// First allele of each SNP (variant)
2801    ///
2802    /// If this ndarray is needed, it will be found
2803    /// by reading the .bim file. Once found, this ndarray
2804    /// and other information in the .bim file will be remembered.
2805    /// The file read can be avoided by setting the
2806    /// array with [`BedBuilder::allele_1`](struct.BedBuilder.html#method.allele_1).
2807    ///
2808    /// # Example:
2809    /// ```
2810    /// use ndarray as nd;
2811    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2812    /// use bed_reader::assert_eq_nan;
2813    ///
2814    /// let file_name = sample_bed_file("small.bed")?;
2815    /// let mut bed = Bed::new(file_name)?;
2816    /// let allele_1 = bed.allele_1()?;
2817    /// println!("{allele_1:?}"); // Outputs ndarray ["A", "T", "A", "T"]
2818    /// # use bed_reader::BedErrorPlus;
2819    /// # Ok::<(), Box<BedErrorPlus>>(())
2820    pub fn allele_1(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2821        self.unlazy_bim::<String>(
2822            self.metadata.allele_1.is_none(),
2823            MetadataFields::Allele1,
2824            "allele_1",
2825        )?;
2826        Ok(self.metadata.allele_1.as_ref().unwrap()) //unwrap always works because of lazy_bim
2827    }
2828
2829    /// Second allele of each SNP (variant)
2830    ///
2831    /// If this ndarray is needed, it will be found
2832    /// by reading the .bim file. Once found, this ndarray
2833    /// and other information in the .bim file will be remembered.
2834    /// The file read can be avoided by setting the
2835    /// array with [`BedBuilder::allele_2`](struct.BedBuilder.html#method.allele_2).
2836    ///
2837    /// # Example:
2838    /// ```
2839    /// use ndarray as nd;
2840    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2841    /// use bed_reader::assert_eq_nan;
2842    ///
2843    /// let file_name = sample_bed_file("small.bed")?;
2844    /// let mut bed = Bed::new(file_name)?;
2845    /// let allele_2 = bed.allele_2()?;
2846    /// println!("{allele_2:?}"); // Outputs ndarray ["A", "C", "C", "G"]
2847    /// # use bed_reader::BedErrorPlus;
2848    /// # Ok::<(), Box<BedErrorPlus>>(())
2849    pub fn allele_2(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2850        self.unlazy_bim::<String>(
2851            self.metadata.allele_2.is_none(),
2852            MetadataFields::Allele2,
2853            "allele_2",
2854        )?;
2855        Ok(self.metadata.allele_2.as_ref().unwrap()) //unwrap always works because of lazy_bim
2856    }
2857
2858    /// [`Metadata`](struct.Metadata.html) for this dataset, for example, the individual (sample) Ids.
2859    ///
2860    /// This returns a struct with 12 fields. Each field is a ndarray.
2861    /// The struct will always be new, but the 12 ndarrays will be
2862    /// shared with this [`Bed`](struct.Bed.html).
2863    ///
2864    /// If the needed, the metadata will be read from the .fam and/or .bim files.
2865    /// ```
2866    /// use ndarray as nd;
2867    /// use bed_reader::{Bed, sample_bed_file};
2868    ///
2869    /// let file_name = sample_bed_file("small.bed")?;
2870    /// let mut bed = Bed::new(file_name)?;
2871    /// let metadata = bed.metadata()?;
2872    /// println!("{0:?}", metadata.iid()); // Outputs Some(["iid1", "iid2", "iid3"] ...)
2873    /// println!("{0:?}", metadata.sid()); // Outputs Some(["sid1", "sid2", "sid3", "sid4"] ...)
2874    /// # use bed_reader::BedErrorPlus;
2875    /// # Ok::<(), Box<BedErrorPlus>>(())
2876    pub fn metadata(&mut self) -> Result<Metadata, Box<BedErrorPlus>> {
2877        self.fam()?;
2878        self.bim()?;
2879        Ok(self.metadata.clone())
2880    }
2881
2882    /// Return the path of the .bed file.
2883    #[must_use]
2884    pub fn path(&self) -> &Path {
2885        &self.path
2886    }
2887
2888    /// Return the path of the .fam file.
2889    pub fn fam_path(&mut self) -> PathBuf {
2890        // We need to clone the path because self might mutate later
2891        if let Some(path) = &self.fam_path {
2892            path.clone()
2893        } else {
2894            let path = to_metadata_path(&self.path, &self.fam_path, "fam");
2895            self.fam_path = Some(path.clone());
2896            path
2897        }
2898    }
2899
2900    /// Return the path of the .bim file.
2901    pub fn bim_path(&mut self) -> PathBuf {
2902        // We need to clone the path because self might mutate later
2903        if let Some(path) = &self.bim_path {
2904            path.clone()
2905        } else {
2906            let path = to_metadata_path(&self.path, &self.bim_path, "bim");
2907            self.bim_path = Some(path.clone());
2908            path
2909        }
2910    }
2911
2912    /// Read genotype data.
2913    ///
2914    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) which supports selection and options.
2915    ///
2916    /// # Errors
2917    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2918    /// for all possible errors.
2919    ///
2920    /// # Examples
2921    /// Read all data in a .bed file.
2922    ///
2923    /// ```
2924    /// use ndarray as nd;
2925    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2926    /// use bed_reader::assert_eq_nan;
2927    ///
2928    /// let file_name = sample_bed_file("small.bed")?;
2929    /// let mut bed = Bed::new(file_name)?;
2930    /// let val = bed.read::<f64>()?;
2931    ///
2932    /// assert_eq_nan(
2933    ///     &val,
2934    ///     &nd::array![
2935    ///         [1.0, 0.0, f64::NAN, 0.0],
2936    ///         [2.0, 0.0, f64::NAN, 2.0],
2937    ///         [0.0, 1.0, 2.0, 0.0]
2938    ///     ],
2939    /// );
2940    ///
2941    /// // Your output array can be f32, f64, or i8
2942    /// let val = bed.read::<i8>()?;
2943    /// assert_eq_nan(
2944    ///     &val,
2945    ///     &nd::array![
2946    ///         [1, 0, -127, 0],
2947    ///         [2, 0, -127, 2],
2948    ///         [0, 1, 2, 0]
2949    ///     ],
2950    /// );
2951    /// # use bed_reader::BedErrorPlus;
2952    /// # Ok::<(), Box<BedErrorPlus>>(())
2953    /// ```    
2954    pub fn read<TVal: BedVal>(&mut self) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2955        let read_options = ReadOptions::<TVal>::builder().build()?;
2956        self.read_with_options(&read_options)
2957    }
2958
2959    /// Read genotype data with options, into a preallocated array.
2960    ///
2961    /// > Also see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
2962    ///
2963    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
2964    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
2965    /// are ignored. Instead, the order of the preallocated array is used.
2966    ///
2967    /// # Errors
2968    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2969    /// for all possible errors.
2970    ///
2971    /// # Example
2972    ///
2973    /// ```
2974    /// use ndarray as nd;
2975    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2976    /// use bed_reader::assert_eq_nan;
2977    ///
2978    /// // Read the SNPs indexed by 2.
2979    /// let file_name = sample_bed_file("small.bed")?;
2980    /// let mut bed = Bed::new(file_name)?;
2981    /// let read_options = ReadOptions::builder().sid_index(2).build()?;
2982    /// let mut val = nd::Array2::<f64>::default((3, 1));
2983    /// bed.read_and_fill_with_options(&mut val.view_mut(), &read_options)?;
2984    ///
2985    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2986    /// # use bed_reader::BedErrorPlus;
2987    /// # Ok::<(), Box<BedErrorPlus>>(())
2988    /// ```  
2989    pub fn read_and_fill_with_options<TVal: BedVal>(
2990        &mut self,
2991        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2992        read_options: &ReadOptions<TVal>,
2993    ) -> Result<(), Box<BedErrorPlus>> {
2994        let iid_count = self.iid_count()?;
2995        let sid_count = self.sid_count()?;
2996
2997        let num_threads = compute_num_threads(read_options.num_threads)?;
2998
2999        // If we already have a Vec<isize>, reference it. If we don't, create one and reference it.
3000        let iid_hold = Hold::new(&read_options.iid_index, iid_count)?;
3001        let iid_index = iid_hold.as_ref();
3002        let sid_hold = Hold::new(&read_options.sid_index, sid_count)?;
3003        let sid_index = sid_hold.as_ref();
3004
3005        let dim = val.dim();
3006        if dim != (iid_index.len(), sid_index.len()) {
3007            Err(BedError::InvalidShape(
3008                iid_index.len(),
3009                sid_index.len(),
3010                dim.0,
3011                dim.1,
3012            ))?;
3013        }
3014
3015        read_no_alloc(
3016            &self.path,
3017            iid_count,
3018            sid_count,
3019            read_options.is_a1_counted,
3020            iid_index,
3021            sid_index,
3022            read_options.missing_value,
3023            num_threads,
3024            &mut val.view_mut(),
3025        )?;
3026
3027        Ok(())
3028    }
3029
3030    /// Read all genotype data into a preallocated array.
3031    ///
3032    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3033    ///
3034    /// # Errors
3035    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3036    /// for all possible errors.
3037    ///
3038    /// # Example
3039    ///
3040    /// ```
3041    /// use ndarray as nd;
3042    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3043    /// use bed_reader::assert_eq_nan;
3044    ///
3045    /// let file_name = sample_bed_file("small.bed")?;
3046    /// let mut bed = Bed::new(file_name)?;
3047    /// let mut val = nd::Array2::<i8>::default(bed.dim()?);
3048    /// bed.read_and_fill(&mut val.view_mut())?;
3049    ///
3050    /// assert_eq_nan(
3051    ///     &val,
3052    ///     &nd::array![
3053    ///         [1, 0, -127, 0],
3054    ///         [2, 0, -127, 2],
3055    ///         [0, 1, 2, 0]
3056    ///     ],
3057    /// );
3058    /// # use bed_reader::BedErrorPlus;
3059    /// # Ok::<(), Box<BedErrorPlus>>(())
3060    /// ```
3061    pub fn read_and_fill<TVal: BedVal>(
3062        &mut self,
3063        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
3064    ) -> Result<(), Box<BedErrorPlus>> {
3065        let read_options = ReadOptions::<TVal>::builder().build()?;
3066        self.read_and_fill_with_options(val, &read_options)
3067    }
3068
3069    /// Read genotype data with options.
3070    ///
3071    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3072    ///
3073    /// # Errors
3074    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3075    /// for all possible errors.
3076    ///
3077    /// # Example
3078    ///
3079    /// ```
3080    /// use ndarray as nd;
3081    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3082    /// use bed_reader::assert_eq_nan;
3083    ///
3084    /// // Read the SNPs indexed by 2.
3085    /// let file_name = sample_bed_file("small.bed")?;
3086    /// let mut bed = Bed::new(file_name)?;
3087    /// let read_options = ReadOptions::builder().sid_index(2).f64().build()?;
3088    /// let val = bed.read_with_options(&read_options)?;
3089    ///
3090    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
3091    /// # use bed_reader::BedErrorPlus;
3092    /// # Ok::<(), Box<BedErrorPlus>>(())
3093    /// ```  
3094    pub fn read_with_options<TVal: BedVal>(
3095        &mut self,
3096        read_options: &ReadOptions<TVal>,
3097    ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
3098        let iid_count_in = self.iid_count()?;
3099        let sid_count_in = self.sid_count()?;
3100        let iid_count_out = read_options.iid_index.len(iid_count_in)?;
3101        let sid_count_out = read_options.sid_index.len(sid_count_in)?;
3102        let shape = ShapeBuilder::set_f((iid_count_out, sid_count_out), read_options.is_f);
3103        let mut val = nd::Array2::<TVal>::default(shape);
3104
3105        self.read_and_fill_with_options(&mut val.view_mut(), read_options)?;
3106
3107        Ok(val)
3108    }
3109    /// Write genotype data with default metadata.
3110    ///
3111    /// > Also see [`WriteOptions::builder`](struct.WriteOptions.html#method.builder), which supports metadata and options.
3112    ///
3113    /// # Errors
3114    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3115    /// for all possible errors.
3116    ///
3117    /// # Example
3118    /// In this example, write genotype data using default metadata.
3119    /// ```
3120    /// use ndarray as nd;
3121    /// use bed_reader::{Bed, WriteOptions};
3122    ///
3123    /// let output_folder = temp_testdir::TempDir::default();
3124    /// let output_file = output_folder.join("small.bed");
3125    ///
3126    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
3127    /// Bed::write(&val, &output_file)?;
3128    ///
3129    /// // If we then read the new file and list the chromosome property,
3130    /// // it is an array of zeros, the default chromosome value.
3131    /// let mut bed2 = Bed::new(&output_file)?;
3132    /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
3133    /// # use bed_reader::BedErrorPlus;
3134    /// # Ok::<(), Box<BedErrorPlus>>(())
3135    /// ```
3136    pub fn write<S: nd::Data<Elem = TVal>, TVal: BedVal>(
3137        val: &nd::ArrayBase<S, nd::Ix2>,
3138        path: &Path,
3139    ) -> Result<(), Box<BedErrorPlus>> {
3140        WriteOptions::builder(path).write(val)
3141    }
3142
3143    /// Given an 2D array of genotype data and a [`WriteOptions`](struct.WriteOptionsBuilder.html), write to a .bed file.
3144    ///
3145    /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
3146    /// > a [`WriteOptions`](struct.WriteOptionsBuilder.html) and writes to file in one step.
3147    ///
3148    /// # Example
3149    /// ```
3150    /// use ndarray as nd;
3151    /// use bed_reader::{Bed, WriteOptions};
3152    ///
3153    /// let val = nd::array![
3154    ///     [1.0, 0.0, f64::NAN, 0.0],
3155    ///     [2.0, 0.0, f64::NAN, 2.0],
3156    ///     [0.0, 1.0, 2.0, 0.0]
3157    /// ];
3158    ///
3159    /// let output_folder = temp_testdir::TempDir::default();
3160    /// let output_file = output_folder.join("small.bed");
3161    /// let write_options = WriteOptions::builder(output_file)
3162    ///     .iid(["iid1", "iid2", "iid3"])
3163    ///     .sid(["sid1", "sid2", "sid3", "sid4"])
3164    ///     .build(3,4)?;
3165    ///
3166    /// Bed::write_with_options(&val, &write_options)?;
3167    /// # use bed_reader::BedErrorPlus;
3168    /// # Ok::<(), Box<BedErrorPlus>>(())
3169    /// ```
3170    pub fn write_with_options<S, TVal>(
3171        val: &nd::ArrayBase<S, nd::Ix2>,
3172        write_options: &WriteOptions<TVal>,
3173    ) -> Result<(), Box<BedErrorPlus>>
3174    where
3175        S: nd::Data<Elem = TVal>,
3176        TVal: BedVal,
3177    {
3178        let (iid_count, sid_count) = val.dim();
3179        if iid_count != write_options.iid_count() {
3180            Err(BedError::InconsistentCount(
3181                "iid".into(),
3182                write_options.iid_count(),
3183                iid_count,
3184            ))?;
3185        }
3186        if sid_count != write_options.sid_count() {
3187            Err(BedError::InconsistentCount(
3188                "sid".into(),
3189                write_options.sid_count(),
3190                sid_count,
3191            ))?;
3192        }
3193
3194        let num_threads = compute_num_threads(write_options.num_threads)?;
3195        write_val(
3196            &write_options.path,
3197            val,
3198            write_options.is_a1_counted,
3199            write_options.missing_value,
3200            num_threads,
3201        )?;
3202
3203        if !write_options.skip_fam() {
3204            if let Err(e) = write_options.metadata.write_fam(write_options.fam_path()) {
3205                // Clean up the file
3206                let _ = fs::remove_file(&write_options.fam_path);
3207                Err(e)?;
3208            }
3209        }
3210
3211        if !write_options.skip_bim() {
3212            if let Err(e) = write_options.metadata.write_bim(write_options.bim_path()) {
3213                // Clean up the file
3214                let _ = fs::remove_file(&write_options.bim_path);
3215                Err(e)?;
3216            }
3217        }
3218
3219        Ok(())
3220    }
3221
3222    fn unlazy_fam<T: FromStringArray<T>>(
3223        &mut self,
3224        is_none: bool,
3225        field_index: MetadataFields,
3226        name: &str,
3227    ) -> Result<(), Box<BedErrorPlus>> {
3228        if self.skip_set.contains(&field_index) {
3229            Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3230        }
3231        if is_none {
3232            self.fam()?;
3233        }
3234        Ok(())
3235    }
3236
3237    fn unlazy_bim<T: FromStringArray<T>>(
3238        &mut self,
3239        is_none: bool,
3240        field_index: MetadataFields,
3241        name: &str,
3242    ) -> Result<(), Box<BedErrorPlus>> {
3243        if self.skip_set.contains(&field_index) {
3244            Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3245        }
3246        if is_none {
3247            self.bim()?;
3248        }
3249        Ok(())
3250    }
3251
3252    fn fam(&mut self) -> Result<(), Box<BedErrorPlus>> {
3253        let fam_path = self.fam_path();
3254
3255        let (metadata, count) = self.metadata.read_fam(fam_path, &self.skip_set)?;
3256        self.metadata = metadata;
3257
3258        match self.iid_count {
3259            Some(iid_count) => {
3260                if iid_count != count {
3261                    Err(BedError::InconsistentCount(
3262                        "iid".to_string(),
3263                        iid_count,
3264                        count,
3265                    ))?;
3266                }
3267            }
3268            None => {
3269                self.iid_count = Some(count);
3270            }
3271        }
3272        Ok(())
3273    }
3274
3275    fn bim(&mut self) -> Result<(), Box<BedErrorPlus>> {
3276        let bim_path = self.bim_path();
3277
3278        let (metadata, count) = self.metadata.read_bim(bim_path, &self.skip_set)?;
3279        self.metadata = metadata;
3280
3281        match self.sid_count {
3282            Some(sid_count) => {
3283                if sid_count != count {
3284                    Err(BedError::InconsistentCount(
3285                        "sid".to_string(),
3286                        sid_count,
3287                        count,
3288                    ))?;
3289                }
3290            }
3291            None => {
3292                self.sid_count = Some(count);
3293            }
3294        }
3295        Ok(())
3296    }
3297}
3298
3299/// If we already have a Vec<isize> remember a reference to it.
3300/// If we don't, then create one.
3301enum Hold<'a> {
3302    Copy(Vec<isize>),
3303    Ref(&'a Vec<isize>),
3304}
3305
3306impl Hold<'_> {
3307    fn new(index: &Index, count: usize) -> Result<Hold, Box<BedErrorPlus>> {
3308        let hold = if let Index::Vec(vec) = index {
3309            Hold::Ref(vec)
3310        } else {
3311            Hold::Copy(index.to_vec(count)?)
3312        };
3313        Ok(hold)
3314    }
3315
3316    fn as_ref(&self) -> &Vec<isize> {
3317        match self {
3318            Hold::Ref(vec) => vec,
3319            Hold::Copy(ref vec) => vec,
3320        }
3321    }
3322}
3323
3324fn compute_num_threads(option_num_threads: Option<usize>) -> Result<usize, Box<BedErrorPlus>> {
3325    let num_threads = if let Some(num_threads) = option_num_threads {
3326        num_threads
3327    } else if let Ok(num_threads) = env::var("BED_READER_NUM_THREADS") {
3328        num_threads.parse::<usize>()?
3329    } else if let Ok(num_threads) = env::var("NUM_THREADS") {
3330        num_threads.parse::<usize>()?
3331    } else {
3332        0
3333    };
3334    Ok(num_threads)
3335}
3336
3337#[allow(clippy::unnecessary_wraps)]
3338fn compute_max_concurrent_requests(
3339    option_max_concurrent_requests: Option<usize>,
3340) -> Result<usize, Box<BedErrorPlus>> {
3341    // In the future, we might want to set this with an environment variable.
3342    let max_concurrent_requests = option_max_concurrent_requests.unwrap_or(10);
3343    Ok(max_concurrent_requests)
3344}
3345
3346#[allow(clippy::unnecessary_wraps)]
3347fn compute_max_chunk_bytes(
3348    option_max_chunk_bytes: Option<usize>,
3349) -> Result<usize, Box<BedErrorPlus>> {
3350    // In the future, we might want to set this with an environment variable.
3351    let max_chunk_bytes = option_max_chunk_bytes.unwrap_or(8_000_000);
3352    Ok(max_chunk_bytes)
3353}
3354
3355impl Index {
3356    // We can't define a 'From' because we want to add count at the last moment.
3357    // Later Would be nice to not always allocate a new vec, maybe with Rc<[T]>?
3358    // Even better would be to support an iterator from Index (an enum with fields).
3359
3360    /// Turns an [`Index`](enum.Index.html) into a vector of usize indexes. Negative means count from end.
3361    pub fn to_vec(&self, count: usize) -> Result<Vec<isize>, Box<BedErrorPlus>> {
3362        let count_signed = count as isize;
3363        match self {
3364            Index::All => Ok((0..count_signed).collect()),
3365            Index::Vec(vec) => Ok(vec.clone()),
3366            Index::NDArrayBool(nd_array_bool) => {
3367                if nd_array_bool.len() != count {
3368                    Err(BedError::BoolArrayVectorWrongLength(
3369                        count,
3370                        nd_array_bool.len(),
3371                    ))?;
3372                }
3373                Ok(nd_array_bool
3374                    .iter()
3375                    .enumerate()
3376                    .filter(|(_, b)| **b)
3377                    .map(|(i, _)| i as isize)
3378                    .collect())
3379            }
3380            Index::NDSliceInfo(nd_slice_info) => {
3381                Ok(RangeNdSlice::new(nd_slice_info, count)?.to_vec())
3382            }
3383            Index::RangeAny(range_any) => {
3384                let range = range_any.to_range(count)?;
3385                Ok(range.map(|i| i as isize).collect::<Vec<isize>>())
3386            }
3387            Index::NDArray(nd_array) => Ok(nd_array.to_vec()),
3388            Index::One(one) => Ok(vec![*one]),
3389            Index::VecBool(vec_bool) => {
3390                if vec_bool.len() != count {
3391                    Err(BedError::BoolArrayVectorWrongLength(count, vec_bool.len()))?;
3392                }
3393                Ok(vec_bool
3394                    .iter()
3395                    .enumerate()
3396                    .filter(|(_, b)| **b)
3397                    .map(|(i, _)| i as isize)
3398                    .collect())
3399            }
3400        }
3401    }
3402}
3403
3404#[allow(clippy::doc_markdown)]
3405/// Type alias for 1-D slices of NDArrays.
3406pub type SliceInfo1 =
3407    nd::SliceInfo<[nd::SliceInfoElem; 1], nd::Dim<[usize; 1]>, nd::Dim<[usize; 1]>>;
3408
3409/// A specification of which individuals (samples) or SNPs (variants) to read.
3410///
3411/// See the [Table of Index Expressions](index.html#index-expressions)
3412/// for a list of expressions for selecting individuals (sample)
3413/// and SNPs (variants).
3414///
3415/// By default, all individuals or SNPs are read.
3416/// The indices can be specified as:
3417///   * an index (negative numbers count from the end)
3418///   * a vector or ndarray of indices
3419///   * a Rust range (negatives not allowed)
3420///   * a vector or ndarray of booleans
3421///   * an ndarray slice (negative indexing and steps allowed)
3422///
3423/// # Examples
3424/// ```
3425/// use ndarray as nd;
3426/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3427/// use bed_reader::assert_eq_nan;
3428/// use ndarray::s;
3429///
3430/// let file_name = sample_bed_file("some_missing.bed")?;
3431/// let mut bed = Bed::new(file_name)?;
3432/// println!("{:?}", bed.dim()?); // prints (100, 100)
3433///
3434/// // Read all individuals and all SNPs
3435/// let val = ReadOptions::builder().f64().read(&mut bed)?;
3436/// assert!(val.dim() == (100, 100));
3437///
3438/// // Read the individual at index position 10 and all SNPs
3439/// let val = ReadOptions::builder().iid_index(10).f64().read(&mut bed)?;
3440/// assert!(val.dim() == (1, 100));
3441///
3442/// // Read the individuals at index positions 0,5, 1st-from-the-end and
3443/// // the SNP at index position 3
3444/// let val = ReadOptions::builder()
3445///     .iid_index(vec![0, 5, -1])
3446///     .sid_index(3)
3447///     .f64()
3448///     .read(&mut bed)?;
3449/// assert!(val.dim() == (3, 1));
3450/// // Repeat, but with an ndarray
3451/// let val = ReadOptions::builder()
3452///     .iid_index(nd::array![0, 5, -1])
3453///     .sid_index(3)
3454///     .f64()
3455///     .read(&mut bed)?;
3456/// assert!(val.dim() == (3, 1));
3457/// // Repeat, but with an Rust array
3458/// let val = ReadOptions::builder()
3459///     .iid_index([0, 5, -1])
3460///     .sid_index(3)
3461///     .f64()
3462///     .read(&mut bed)?;
3463/// assert!(val.dim() == (3, 1));
3464
3465/// // Create a boolean ndarray identifying SNPs in chromosome 5,
3466/// // then select those SNPs.
3467/// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
3468/// let val = ReadOptions::builder()
3469///     .sid_index(chrom_5)
3470///     .f64()
3471///     .read(&mut bed)?;
3472/// assert!(val.dim() == (100, 6));
3473
3474/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3475/// // to select every 2nd individual and every 3rd SNP.
3476/// let val = ReadOptions::builder()
3477///     .iid_index(s![..;2])
3478///     .sid_index(s![..;3])
3479///     .f64()
3480///     .read(&mut bed)?;
3481/// assert!(val.dim() == (50, 34));
3482/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3483/// // to select the 10th-from-last individual to the last, in reverse order,
3484/// // and every 3rd SNP in reverse order.)
3485/// let val = ReadOptions::builder()
3486///     .iid_index(s![-10..;-1])
3487///     .sid_index(s![..;-3])
3488///     .f64()
3489///     .read(&mut bed)?;
3490/// assert!(val.dim() == (10, 34));
3491/// # use bed_reader::BedErrorPlus;
3492/// # Ok::<(), Box<BedErrorPlus>>(())
3493/// ```
3494
3495#[derive(Debug, Clone)]
3496pub enum Index {
3497    // Could implement an enumerator, but it is complex and requires a 'match' on each next()
3498    //     https://stackoverflow.com/questions/65272613/how-to-implement-intoiterator-for-an-enum-of-iterable-variants
3499    #[allow(missing_docs)]
3500    All,
3501    #[allow(missing_docs)]
3502    One(isize),
3503    #[allow(missing_docs)]
3504    Vec(Vec<isize>),
3505    #[allow(missing_docs)]
3506    NDArray(nd::Array1<isize>),
3507    #[allow(missing_docs)]
3508    VecBool(Vec<bool>),
3509    #[allow(missing_docs)]
3510    NDArrayBool(nd::Array1<bool>),
3511    #[allow(missing_docs)]
3512    NDSliceInfo(SliceInfo1),
3513    #[allow(missing_docs)]
3514    RangeAny(RangeAny),
3515}
3516
3517#[doc(hidden)]
3518/// Used internally to represent Rust ranges such as `0..10`, `..10`, etc.
3519#[derive(Debug, Clone)]
3520pub struct RangeAny {
3521    start: Option<usize>,
3522    end: Option<usize>,
3523}
3524
3525impl RangeAny {
3526    fn new<T: RangeBounds<usize>>(range_thing: &T) -> RangeAny {
3527        let start_bound = range_thing.start_bound();
3528        let start = match start_bound {
3529            Bound::Included(&start) => Some(start),
3530            Bound::Excluded(&start) => Some(start + 1),
3531            Bound::Unbounded => None,
3532        };
3533
3534        let end_bound = range_thing.end_bound();
3535        let end = match end_bound {
3536            Bound::Included(&end) => Some(end + 1),
3537            Bound::Excluded(&end) => Some(end),
3538            Bound::Unbounded => None,
3539        };
3540        RangeAny { start, end }
3541    }
3542
3543    // https://stackoverflow.com/questions/55925523/array-cannot-be-indexed-by-rangefull
3544    fn to_range(&self, count: usize) -> Result<Range<usize>, Box<BedErrorPlus>> {
3545        let start = self.start.unwrap_or_default();
3546        let end = if let Some(end) = self.end { end } else { count };
3547        if start > end {
3548            Err(BedError::StartGreaterThanEnd(start, end).into())
3549        } else {
3550            Ok(Range { start, end })
3551        }
3552    }
3553
3554    fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3555        let range = self.to_range(count)?;
3556        Ok(range.end - range.start)
3557    }
3558
3559    fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3560        Ok(self.len(count)? == 0)
3561    }
3562}
3563
3564#[doc(hidden)]
3565#[derive(Debug, Clone)]
3566/// Used internally to represent NDArray Slices such as s![..], s![0..;2], s![0..10;-1]
3567pub struct RangeNdSlice {
3568    start: usize,
3569    end: usize,
3570    step: usize,
3571    is_reversed: bool,
3572}
3573
3574// https://www.geeksforgeeks.org/find-ceil-ab-without-using-ceil-function/
3575fn div_ceil(a: usize, b: usize) -> usize {
3576    (a + b - 1) / b
3577}
3578
3579impl RangeNdSlice {
3580    fn len(&self) -> usize {
3581        if self.start > self.end {
3582            0
3583        } else {
3584            div_ceil(self.end - self.start, self.step)
3585        }
3586    }
3587
3588    fn is_empty(&self) -> bool {
3589        self.len() == 0
3590    }
3591
3592    // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#slicing
3593    fn to_vec(&self) -> Vec<isize> {
3594        if self.start >= self.end {
3595            Vec::new()
3596        } else if !self.is_reversed {
3597            (self.start..self.end)
3598                .step_by(self.step)
3599                .map(|i| i as isize)
3600                .collect()
3601        } else {
3602            // https://docs.rs/ndarray/latest/ndarray/macro.s.html
3603            let size = self.len();
3604            let mut vec: Vec<isize> = Vec::<isize>::with_capacity(size);
3605            let mut i = self.end - 1;
3606            while i >= self.start {
3607                vec.push(i as isize);
3608                if i < self.step {
3609                    break;
3610                }
3611                i -= self.step;
3612            }
3613            vec
3614        }
3615    }
3616
3617    fn new(nd_slice_info: &SliceInfo1, count: usize) -> Result<Self, Box<BedErrorPlus>> {
3618        //  self.to_vec(count).len(),
3619        // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#method.slice_collapse
3620        // Error in the following cases
3621        // * SliceInfo is not a 1-dimensional or is a NewAxis
3622        // * Step is 0
3623        // * Start is greater than count
3624        // * End is greater than count
3625        // As with ndarray, Start can be greater than End is allowed
3626        // and means the slice is empty.
3627        if nd_slice_info.in_ndim() != 1 || nd_slice_info.out_ndim() != 1 {
3628            Err(BedError::NdSliceInfoNot1D)?;
3629        }
3630
3631        let slice_info_elem = nd_slice_info[0];
3632        match slice_info_elem {
3633            nd::SliceInfoElem::Slice { start, end, step } => {
3634                // https://docs.rs/ndarray/0.15.4/ndarray/enum.SliceInfoElem.html
3635                // s![..], 0,None,1
3636                // s![a..b;2] a,b,2
3637                // s![a..;-1], from a to end in reverse order
3638                // start index; negative are counted from the back of the axis
3639                // end index; negative are counted from the back of the axis; when not present the default is the full length of the axis.
3640                // step size in elements; the default is 1, for every element.
3641                // A range with step size. end is an exclusive index. Negative start or end indexes are counted from the back of the axis. If end is None, the slice extends to the end of the axis.
3642                let (step2, is_reverse2) = match step.cmp(&0) {
3643                    Ordering::Greater => (step as usize, false),
3644                    Ordering::Less => ((-step) as usize, true),
3645                    Ordering::Equal => Err(BedError::StepZero)?,
3646                };
3647
3648                let start2 = if start >= 0 {
3649                    let start3 = start as usize;
3650                    if start3 > count {
3651                        Err(BedError::StartGreaterThanCount(start3, count))?;
3652                    }
3653                    start3
3654                } else {
3655                    let start3 = (-start) as usize;
3656                    if start3 > count {
3657                        Err(BedError::StartGreaterThanCount(start3, count))?;
3658                    }
3659                    count - start3
3660                };
3661
3662                let end2 = if let Some(end) = end {
3663                    if end >= 0 {
3664                        let end3 = end as usize;
3665                        if end3 > count {
3666                            Err(BedError::EndGreaterThanCount(end3, count))?;
3667                        }
3668                        end3
3669                    } else {
3670                        let end3 = (-end) as usize;
3671                        if end3 > count {
3672                            Err(BedError::EndGreaterThanCount(end3, count))?;
3673                        }
3674                        count - end3
3675                    }
3676                } else {
3677                    count
3678                };
3679
3680                Ok(RangeNdSlice {
3681                    start: start2,
3682                    end: end2,
3683                    step: step2,
3684                    is_reversed: is_reverse2,
3685                })
3686            }
3687            nd::SliceInfoElem::Index(index) => Ok(RangeNdSlice {
3688                start: index as usize,
3689                end: index as usize + 1,
3690                step: 1,
3691                is_reversed: false,
3692            }),
3693            nd::SliceInfoElem::NewAxis => Err(BedError::NewAxis.into()),
3694        }
3695    }
3696}
3697
3698impl Index {
3699    /// Returns the number of elements in an [`Index`](enum.Index.html).
3700    #[allow(clippy::len_without_is_empty)]
3701    pub fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3702        match self {
3703            Index::All => Ok(count),
3704            Index::One(_) => Ok(1),
3705            Index::Vec(vec) => Ok(vec.len()),
3706            Index::NDArray(nd_array) => Ok(nd_array.len()),
3707            Index::VecBool(vec_bool) => Ok(vec_bool.iter().filter(|&b| *b).count()),
3708            Index::NDArrayBool(nd_array_bool) => Ok(nd_array_bool.iter().filter(|&b| *b).count()),
3709            Index::NDSliceInfo(nd_slice_info) => Ok(RangeNdSlice::new(nd_slice_info, count)?.len()),
3710            Index::RangeAny(range_any) => range_any.len(count),
3711        }
3712    }
3713
3714    /// Returns true if the [`Index`](enum.Index.html) is empty.
3715    pub fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3716        match self {
3717            Index::All => Ok(count == 0),
3718            Index::One(_) => Ok(false),
3719            Index::Vec(vec) => Ok(vec.is_empty()),
3720            Index::NDArray(nd_array) => Ok(nd_array.is_empty()),
3721            Index::VecBool(vec_bool) => Ok(!vec_bool.iter().any(|&b| b)),
3722            Index::NDArrayBool(nd_array_bool) => Ok(!nd_array_bool.iter().any(|&b| b)),
3723            Index::NDSliceInfo(nd_slice_info) => {
3724                Ok(RangeNdSlice::new(nd_slice_info, count)?.is_empty())
3725            }
3726            Index::RangeAny(range_any) => range_any.is_empty(count),
3727        }
3728    }
3729}
3730
3731impl From<SliceInfo1> for Index {
3732    fn from(slice_info: SliceInfo1) -> Index {
3733        Index::NDSliceInfo(slice_info)
3734    }
3735}
3736impl From<&SliceInfo1> for Index {
3737    fn from(slice_info: &SliceInfo1) -> Index {
3738        Index::NDSliceInfo(slice_info.to_owned())
3739    }
3740}
3741
3742impl From<RangeFull> for Index {
3743    fn from(range_thing: RangeFull) -> Index {
3744        Index::RangeAny(RangeAny::new(&range_thing))
3745    }
3746}
3747
3748impl From<&RangeFull> for Index {
3749    fn from(range_thing: &RangeFull) -> Index {
3750        Index::RangeAny(RangeAny::new(range_thing))
3751    }
3752}
3753
3754impl From<Range<usize>> for Index {
3755    fn from(range_thing: Range<usize>) -> Index {
3756        Index::RangeAny(RangeAny::new(&range_thing))
3757    }
3758}
3759
3760impl From<&Range<usize>> for Index {
3761    fn from(range_thing: &Range<usize>) -> Index {
3762        Index::RangeAny(RangeAny::new(range_thing))
3763    }
3764}
3765
3766impl From<RangeFrom<usize>> for Index {
3767    fn from(range_thing: RangeFrom<usize>) -> Index {
3768        Index::RangeAny(RangeAny::new(&range_thing))
3769    }
3770}
3771
3772impl From<&RangeFrom<usize>> for Index {
3773    fn from(range_thing: &RangeFrom<usize>) -> Index {
3774        Index::RangeAny(RangeAny::new(range_thing))
3775    }
3776}
3777
3778impl From<RangeInclusive<usize>> for Index {
3779    fn from(range_thing: RangeInclusive<usize>) -> Index {
3780        Index::RangeAny(RangeAny::new(&range_thing))
3781    }
3782}
3783
3784impl From<&RangeInclusive<usize>> for Index {
3785    fn from(range_thing: &RangeInclusive<usize>) -> Index {
3786        Index::RangeAny(RangeAny::new(range_thing))
3787    }
3788}
3789
3790impl From<RangeTo<usize>> for Index {
3791    fn from(range_thing: RangeTo<usize>) -> Index {
3792        Index::RangeAny(RangeAny::new(&range_thing))
3793    }
3794}
3795
3796impl From<&RangeTo<usize>> for Index {
3797    fn from(range_thing: &RangeTo<usize>) -> Index {
3798        Index::RangeAny(RangeAny::new(range_thing))
3799    }
3800}
3801
3802impl From<RangeToInclusive<usize>> for Index {
3803    fn from(range_thing: RangeToInclusive<usize>) -> Index {
3804        Index::RangeAny(RangeAny::new(&range_thing))
3805    }
3806}
3807
3808impl From<&RangeToInclusive<usize>> for Index {
3809    fn from(range_thing: &RangeToInclusive<usize>) -> Index {
3810        Index::RangeAny(RangeAny::new(range_thing))
3811    }
3812}
3813
3814impl From<&[isize]> for Index {
3815    fn from(array: &[isize]) -> Index {
3816        Index::Vec(array.to_vec())
3817    }
3818}
3819
3820impl<const N: usize> From<[isize; N]> for Index {
3821    fn from(array: [isize; N]) -> Index {
3822        Index::Vec(array.to_vec())
3823    }
3824}
3825
3826impl<const N: usize> From<&[isize; N]> for Index {
3827    fn from(array: &[isize; N]) -> Index {
3828        Index::Vec(array.to_vec())
3829    }
3830}
3831
3832impl From<&nd::ArrayView1<'_, isize>> for Index {
3833    fn from(view: &nd::ArrayView1<isize>) -> Index {
3834        Index::NDArray(view.to_owned())
3835    }
3836}
3837
3838impl From<nd::ArrayView1<'_, isize>> for Index {
3839    fn from(view: nd::ArrayView1<isize>) -> Index {
3840        Index::NDArray(view.to_owned())
3841    }
3842}
3843
3844impl From<Vec<isize>> for Index {
3845    fn from(vec: Vec<isize>) -> Index {
3846        Index::Vec(vec)
3847    }
3848}
3849impl From<&Vec<isize>> for Index {
3850    fn from(vec_ref: &Vec<isize>) -> Index {
3851        Index::Vec(vec_ref.clone())
3852    }
3853}
3854
3855impl From<nd::ArrayView1<'_, bool>> for Index {
3856    fn from(view: nd::ArrayView1<bool>) -> Index {
3857        Index::NDArrayBool(view.to_owned())
3858    }
3859}
3860
3861impl From<&nd::ArrayView1<'_, bool>> for Index {
3862    fn from(view: &nd::ArrayView1<bool>) -> Index {
3863        Index::NDArrayBool(view.to_owned())
3864    }
3865}
3866
3867impl From<&Vec<bool>> for Index {
3868    fn from(vec_ref: &Vec<bool>) -> Index {
3869        Index::VecBool(vec_ref.clone())
3870    }
3871}
3872
3873impl From<&[bool]> for Index {
3874    fn from(array: &[bool]) -> Index {
3875        Index::VecBool(array.to_vec())
3876    }
3877}
3878
3879impl<const N: usize> From<[bool; N]> for Index {
3880    fn from(array: [bool; N]) -> Index {
3881        Index::VecBool(array.to_vec())
3882    }
3883}
3884
3885impl<const N: usize> From<&[bool; N]> for Index {
3886    fn from(array: &[bool; N]) -> Index {
3887        Index::VecBool(array.to_vec())
3888    }
3889}
3890
3891impl From<isize> for Index {
3892    fn from(one: isize) -> Index {
3893        Index::One(one)
3894    }
3895}
3896impl From<&isize> for Index {
3897    fn from(one: &isize) -> Index {
3898        Index::One(one.to_owned())
3899    }
3900}
3901
3902impl From<nd::Array1<isize>> for Index {
3903    fn from(nd_array: nd::Array1<isize>) -> Index {
3904        Index::NDArray(nd_array)
3905    }
3906}
3907
3908impl From<&nd::Array1<isize>> for Index {
3909    fn from(nd_array: &nd::Array1<isize>) -> Index {
3910        Index::NDArray(nd_array.to_owned())
3911    }
3912}
3913
3914impl From<nd::Array1<bool>> for Index {
3915    fn from(nd_array_bool: nd::Array1<bool>) -> Index {
3916        Index::NDArrayBool(nd_array_bool)
3917    }
3918}
3919
3920impl From<&nd::Array1<bool>> for Index {
3921    fn from(nd_array_bool: &nd::Array1<bool>) -> Index {
3922        Index::NDArrayBool(nd_array_bool.clone())
3923    }
3924}
3925
3926impl From<Vec<bool>> for Index {
3927    fn from(vec_bool: Vec<bool>) -> Index {
3928        Index::VecBool(vec_bool)
3929    }
3930}
3931
3932impl From<()> for Index {
3933    fn from((): ()) -> Index {
3934        Index::All
3935    }
3936}
3937
3938// See https://nullderef.com/blog/rust-parameters/
3939
3940/// Represents options for reading genotype data from a PLINK .bed file.
3941///
3942/// Construct with [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3943///
3944/// See the [Table of `ReadOptions`](index.html#readoptions)
3945/// for a list of the supported options.
3946/// See the [Table of Index Expressions](index.html#index-expressions)
3947/// for a list of expressions for selecting individuals (sample)
3948/// and SNPs (variants).
3949#[derive(Debug, Clone, Builder)]
3950#[builder(build_fn(error = "Box<BedErrorPlus>"))]
3951pub struct ReadOptions<TVal: BedVal> {
3952    /// Value to use for missing values (defaults to -127 or NaN)
3953    ///
3954    /// -127 is the default for i8 and NaN is the default for f32 and f64.
3955    ///
3956    /// In this example, the missing value is set to -1:
3957    /// ```
3958    /// use ndarray as nd;
3959    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3960    /// use bed_reader::assert_eq_nan;
3961    ///
3962    /// let file_name = sample_bed_file("small.bed")?;
3963    /// let mut bed = Bed::new(file_name)?;
3964    /// let val = ReadOptions::builder().missing_value(-1).i8().read(&mut bed)?;
3965    ///
3966    /// assert_eq_nan(
3967    ///     &val,
3968    ///     &nd::array![
3969    ///         [1, 0, -1, 0],
3970    ///         [2, 0, -1, 2],
3971    ///         [0, 1, 2, 0]
3972    ///     ],
3973    /// );
3974    /// # use bed_reader::BedErrorPlus;
3975    /// # Ok::<(), Box<BedErrorPlus>>(())
3976    /// ```
3977    #[builder(default = "TVal::missing()")]
3978    missing_value: TVal,
3979
3980    /// Select which individual (sample) values to read -- Defaults to all.
3981    ///
3982    /// Can select with a signed number, various lists of signed numbers,
3983    /// ranges, and various lists of booleans.
3984    ///
3985    /// See the [Table of Index Expressions](index.html#index-expressions)
3986    /// for a list of the supported index expressions.
3987    ///
3988    /// # Examples:
3989    /// ```
3990    /// use ndarray as nd;
3991    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
3992    /// use ndarray::s;
3993    ///
3994    /// let file_name = sample_bed_file("some_missing.bed")?;
3995    /// let mut bed = Bed::new(file_name)?;
3996    ///
3997    /// // Read the individual at index position 3
3998    ///
3999    /// let val = ReadOptions::builder()
4000    ///     .iid_index(3)
4001    ///     .f64()
4002    ///     .read(&mut bed)?;
4003    /// assert!(val.dim() == (1, 100));
4004    ///
4005    /// // Read the individuals at index positions 0, 5, and 1st-from-last.
4006    ///
4007    /// let val = ReadOptions::builder()
4008    ///     .iid_index([0, 5, -1])
4009    ///     .f64()
4010    ///     .read(&mut bed)?;
4011    ///
4012    /// assert!(val.dim() == (3, 100));
4013    ///
4014    /// // Read the individuals at index positions 20 (inclusive) to 30 (exclusive).
4015    ///
4016    /// let val = ReadOptions::builder()
4017    ///     .iid_index(20..30)
4018    ///     .f64()
4019    ///     .read(&mut bed)?;
4020    ///
4021    /// assert!(val.dim() == (10, 100));
4022    ///
4023    /// // Read the individuals at every 2nd index position.
4024    ///
4025    /// let val = ReadOptions::builder()
4026    ///     .iid_index(s![..;2])
4027    ///     .f64()
4028    ///     .read(&mut bed)?;
4029    ///
4030    /// assert!(val.dim() == (50, 100));
4031    ///
4032    /// // Read chromosome 5 of the female individuals.
4033    ///
4034    /// let female = bed.sex()?.map(|elem| *elem == 2);
4035    /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4036    /// let val = ReadOptions::builder()
4037    ///     .iid_index(female)
4038    ///     .sid_index(chrom_5)
4039    ///     .f64()
4040    ///     .read(&mut bed)?;
4041    ///
4042    /// assert!(val.dim() == (50, 6));
4043    /// # use bed_reader::BedErrorPlus;
4044    /// # Ok::<(), Box<BedErrorPlus>>(())
4045    /// ```
4046    #[builder(default = "Index::All")]
4047    #[builder(setter(into))]
4048    iid_index: Index,
4049
4050    /// Select which SNPs (variant) values to read -- Defaults to all.
4051    ///
4052    /// Can select with a signed number, various lists of signed numbers,
4053    /// ranges, and various lists of booleans.
4054    ///
4055    /// See the [Table of Index Expressions](index.html#index-expressions)
4056    /// for a list of the supported index expressions.
4057    ///
4058    /// # Examples:
4059    /// ```
4060    /// use ndarray as nd;
4061    /// use ndarray::s;
4062    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
4063    ///
4064    /// let file_name = sample_bed_file("some_missing.bed")?;
4065    /// let mut bed = Bed::new(file_name)?;
4066    ///
4067    /// // Read the SNP at index position 3
4068    ///
4069    /// let val = ReadOptions::builder()
4070    ///     .sid_index(3)
4071    ///     .f64()
4072    ///     .read(&mut bed)?;
4073    /// assert!(val.dim() == (100, 1));
4074    ///
4075    /// // Read the SNPs at index positions 0, 5, and 1st-from-last.
4076    ///
4077    /// let val = ReadOptions::builder()
4078    ///     .sid_index([0, 5, -1])
4079    ///     .f64()
4080    ///     .read(&mut bed)?;
4081    ///
4082    /// assert!(val.dim() == (100, 3));
4083    ///
4084    /// // Read the SNPs at index positions 20 (inclusive) to 30 (exclusive).
4085    ///
4086    /// let val = ReadOptions::builder()
4087    ///     .sid_index(20..30)
4088    ///     .f64()
4089    ///     .read(&mut bed)?;
4090    ///
4091    /// assert!(val.dim() == (100, 10));
4092    ///
4093    /// // Read the SNPs at every 2nd index position.
4094    ///
4095    /// let val = ReadOptions::builder()
4096    ///     .sid_index(s![..;2])
4097    ///     .f64()
4098    ///     .read(&mut bed)?;
4099    ///
4100    /// assert!(val.dim() == (100, 50));
4101    ///
4102    /// // Read chromosome 5 of the female individuals.
4103    ///
4104    /// let female = bed.sex()?.map(|elem| *elem == 2);
4105    /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4106    /// let val = ReadOptions::builder()
4107    ///     .iid_index(female)
4108    ///     .sid_index(chrom_5)
4109    ///     .f64()
4110    ///     .read(&mut bed)?;
4111    ///
4112    /// assert!(val.dim() == (50, 6));
4113    /// # use bed_reader::BedErrorPlus;
4114    /// # Ok::<(), Box<BedErrorPlus>>(())
4115    /// ```
4116    #[builder(default = "Index::All")]
4117    #[builder(setter(into))]
4118    sid_index: Index,
4119
4120    /// Sets if the order of the output array is Fortran-style -- Default is true.
4121    ///
4122    /// "Fortran order" is also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4123    ///
4124    /// Also see [`f`](struct.ReadOptionsBuilder.html#method.f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4125    #[builder(default = "true")]
4126    is_f: bool,
4127
4128    /// Sets if allele 1 is counted. Default is true.
4129    ///
4130    /// Also see [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4131    #[builder(default = "true")]
4132    is_a1_counted: bool,
4133
4134    /// Number of threads to use (defaults to all processors)
4135    ///
4136    /// Can also be set with an environment variable.
4137    /// See [Environment Variables](index.html#environment-variables).
4138    ///
4139    /// In this example, we read using only one thread.
4140    /// ```
4141    /// use ndarray as nd;
4142    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4143    /// use bed_reader::assert_eq_nan;
4144    ///
4145    /// let file_name = sample_bed_file("small.bed")?;
4146    /// let mut bed = Bed::new(file_name)?;
4147    /// let val = ReadOptions::builder().num_threads(1).i8().read(&mut bed)?;
4148    ///
4149    /// assert_eq_nan(
4150    ///     &val,
4151    ///     &nd::array![
4152    ///         [1, 0, -127, 0],
4153    ///         [2, 0, -127, 2],
4154    ///         [0, 1, 2, 0]
4155    ///     ],
4156    /// );
4157    /// # use bed_reader::BedErrorPlus;
4158    /// # Ok::<(), Box<BedErrorPlus>>(())
4159    /// ```
4160    #[builder(default, setter(strip_option))]
4161    num_threads: Option<usize>,
4162
4163    // LATER: Allow this to be set with an environment variable.
4164    /// Maximum number of concurrent async requests (defaults to 10) --
4165    /// Used by [`BedCloud`](struct.BedCloud.html).
4166    ///
4167    /// In this example, we read using only request at a time.
4168    /// ```
4169    /// use ndarray as nd;
4170    /// use bed_reader::{BedCloud, ReadOptions};
4171    /// use bed_reader::assert_eq_nan;
4172    ///
4173    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4174    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4175    /// let mut bed_cloud = BedCloud::new(&url).await?;
4176    /// let val = ReadOptions::builder().max_concurrent_requests(1).i8().read_cloud(&mut bed_cloud).await?;
4177    ///
4178    /// assert_eq_nan(
4179    ///     &val,
4180    ///     &nd::array![
4181    ///         [1, 0, -127, 0],
4182    ///         [2, 0, -127, 2],
4183    ///         [0, 1, 2, 0]
4184    ///     ],
4185    /// );
4186    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4187    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4188    #[builder(default, setter(strip_option))]
4189    #[allow(dead_code)]
4190    max_concurrent_requests: Option<usize>,
4191
4192    // LATER: Allow this to be set with an environment variable.
4193    /// Maximum chunk size of async requests (defaults to `8_000_000` bytes) --
4194    /// Used by [`BedCloud`](struct.BedCloud.html).
4195    ///
4196    /// In this example, we read using only `1_000_000` bytes per request.
4197    /// ```
4198    /// use ndarray as nd;
4199    /// use bed_reader::{BedCloud, ReadOptions};
4200    /// use bed_reader::assert_eq_nan;
4201    ///
4202    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4203    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4204    /// let mut bed_cloud = BedCloud::new(&url).await?;
4205    /// let val = ReadOptions::builder().max_chunk_bytes(1_000_000).i8().read_cloud(&mut bed_cloud).await?;
4206    ///
4207    /// assert_eq_nan(
4208    ///     &val,
4209    ///     &nd::array![
4210    ///         [1, 0, -127, 0],
4211    ///         [2, 0, -127, 2],
4212    ///         [0, 1, 2, 0]
4213    ///     ],
4214    /// );
4215    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4216    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4217    /// ```
4218    #[builder(default, setter(strip_option))]
4219    #[allow(dead_code)]
4220    max_chunk_bytes: Option<usize>,
4221}
4222
4223impl<TVal: BedVal> ReadOptions<TVal> {
4224    /// Read genotype data. Supports selection and options.
4225    ///
4226    /// > Also see [`Bed::read`](struct.Bed.html#method.read) (read without options).
4227    /// > To fill a preallocated ndarray, see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
4228    ///
4229    /// See the [Table of `ReadOptions`](index.html#readoptions)
4230    /// for a list of the supported options.
4231    /// See the [Table of Index Expressions](index.html#index-expressions)
4232    /// for a list of expressions for selecting individuals (sample)
4233    /// and SNPs (variants).
4234    ///
4235    /// # Errors
4236    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4237    /// for all possible errors.
4238    ///
4239    /// # Examples
4240    ///
4241    /// ```
4242    /// use ndarray as nd;
4243    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4244    /// use bed_reader::assert_eq_nan;
4245    ///
4246    /// // Read all data from a .bed file into an ndarray of f64.
4247    /// let file_name = sample_bed_file("small.bed")?;
4248    /// let mut bed = Bed::new(file_name)?;
4249    /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4250    ///
4251    /// assert_eq_nan(
4252    ///     &val,
4253    ///     &nd::array![
4254    ///         [1.0, 0.0, f64::NAN, 0.0],
4255    ///         [2.0, 0.0, f64::NAN, 2.0],
4256    ///         [0.0, 1.0, 2.0, 0.0]
4257    ///     ],
4258    /// );
4259    ///
4260    /// // Read the SNPs indexed by 2.
4261    /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
4262    ///
4263    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4264    ///
4265    /// // Read the SNPs indexed by 2, 3, and 4th from last.
4266    /// let val = ReadOptions::builder()
4267    ///     .sid_index([2, 3, -4])
4268    ///     .f64()
4269    ///     .read(&mut bed)?;
4270    ///
4271    /// assert_eq_nan(
4272    ///     &val,
4273    ///     &nd::array![[f64::NAN, 0.0, 1.0], [f64::NAN, 2.0, 2.0], [2.0, 0.0, 0.0]],
4274    /// );
4275    ///
4276    /// //  Read SNPs from 1 (inclusive) to 4 (exclusive).
4277    /// let val = ReadOptions::builder()
4278    ///     .sid_index(1..4)
4279    ///     .f64()
4280    ///     .read(&mut bed)?;
4281    ///
4282    /// assert_eq_nan(
4283    ///     &val,
4284    ///     &nd::array![[0.0, f64::NAN, 0.0], [0.0, f64::NAN, 2.0], [1.0, 2.0, 0.0]],
4285    /// );
4286    ///
4287    /// // Print unique chrom values. Then, read all SNPs in chrom 5.
4288    /// use std::collections::HashSet;
4289    ///
4290    /// println!("{:?}", bed.chromosome()?.iter().collect::<HashSet<_>>());
4291    /// // This outputs: {"1", "5", "Y"}.
4292    /// let val = ReadOptions::builder()
4293    ///     .sid_index(bed.chromosome()?.map(|elem| elem == "5"))
4294    ///     .f64()
4295    ///     .read(&mut bed)?;
4296    ///
4297    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4298    ///
4299    /// // Read 1st individual (across all SNPs).
4300    /// let val = ReadOptions::builder().iid_index(0).f64().read(&mut bed)?;
4301    /// assert_eq_nan(&val, &nd::array![[1.0, 0.0, f64::NAN, 0.0]]);
4302    ///
4303    /// // Read every 2nd individual.
4304    /// use ndarray::s;
4305    ///
4306    /// let val = ReadOptions::builder()
4307    ///     .iid_index(s![..;2])
4308    ///     .f64()
4309    ///     .read(&mut bed)?;
4310    /// assert_eq_nan(
4311    ///     &val,
4312    ///     &nd::array![[1.0, 0.0, f64::NAN, 0.0], [0.0, 1.0, 2.0, 0.0]],
4313    /// );
4314    ///
4315    /// // Read last and 2nd-to-last individuals and the last SNP
4316    /// let val = ReadOptions::builder()
4317    ///     .iid_index([-1,-2])
4318    ///     .sid_index(-1)
4319    ///     .f64()
4320    ///     .read(&mut bed)?;
4321    ///
4322    /// assert_eq_nan(&val, &nd::array![[0.0],[2.0]]);
4323    ///
4324    /// // The output array can be f32, f64, or i8
4325    /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4326    ///
4327    /// assert_eq_nan(
4328    ///     &val,
4329    ///     &nd::array![
4330    ///         [1, 0, -127, 0],
4331    ///         [2, 0, -127, 2],
4332    ///         [0, 1, 2, 0]
4333    ///     ],
4334    /// );
4335    /// # use bed_reader::BedErrorPlus;
4336    /// # Ok::<(), Box<BedErrorPlus>>(())
4337    /// ```
4338    #[must_use]
4339    pub fn builder() -> ReadOptionsBuilder<TVal> {
4340        ReadOptionsBuilder::default()
4341    }
4342
4343    /// Value to be used for missing values (defaults to -127 or NaN).
4344    ///
4345    /// # Example
4346    /// ```
4347    /// use ndarray as nd;
4348    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4349    /// use bed_reader::assert_eq_nan;
4350    ///
4351    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4352    /// assert_eq!(read_options.missing_value(), -127);
4353    ///
4354    /// let file_name = sample_bed_file("small.bed")?;
4355    /// let mut bed = Bed::new(file_name)?;
4356    /// let val = bed.read_with_options(&read_options)?;
4357
4358    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4359    /// # use bed_reader::BedErrorPlus;
4360    /// # Ok::<(), Box<BedErrorPlus>>(())
4361    /// ```
4362    pub fn missing_value(&self) -> TVal {
4363        self.missing_value
4364    }
4365
4366    /// Index of individuals (samples) to read (defaults to all).
4367    ///
4368    /// # Example
4369    /// ```
4370    /// use ndarray as nd;
4371    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4372    /// use bed_reader::assert_eq_nan;
4373    ///
4374    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4375    /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4376    /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4377    ///
4378    /// let file_name = sample_bed_file("small.bed")?;
4379    /// let mut bed = Bed::new(file_name)?;
4380    /// let val = bed.read_with_options(&read_options)?;
4381
4382    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4383    /// # use bed_reader::BedErrorPlus;
4384    /// # Ok::<(), Box<BedErrorPlus>>(())
4385    /// ```
4386    pub fn iid_index(&self) -> &Index {
4387        &self.iid_index
4388    }
4389
4390    /// Index of SNPs (variants) to read (defaults to all).
4391    ///
4392    /// # Example
4393    /// ```
4394    /// use ndarray as nd;
4395    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4396    /// use bed_reader::assert_eq_nan;
4397    ///
4398    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4399    /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4400    /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4401    ///
4402    /// let file_name = sample_bed_file("small.bed")?;
4403    /// let mut bed = Bed::new(file_name)?;
4404    /// let val = bed.read_with_options(&read_options)?;
4405
4406    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4407    /// # use bed_reader::BedErrorPlus;
4408    /// # Ok::<(), Box<BedErrorPlus>>(())
4409    /// ```
4410    pub fn sid_index(&self) -> &Index {
4411        &self.sid_index
4412    }
4413
4414    /// Is the order of the output array Fortran-style (defaults to true).
4415    ///
4416    /// # Example
4417    /// ```
4418    /// use ndarray as nd;
4419    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4420    /// use bed_reader::assert_eq_nan;
4421    ///
4422    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4423    /// assert_eq!(read_options.is_f(), true);
4424    ///
4425    /// let file_name = sample_bed_file("small.bed")?;
4426    /// let mut bed = Bed::new(file_name)?;
4427    /// let val = bed.read_with_options(&read_options)?;
4428
4429    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4430    /// # use bed_reader::BedErrorPlus;
4431    /// # Ok::<(), Box<BedErrorPlus>>(())
4432    /// ```
4433    pub fn is_f(&self) -> bool {
4434        self.is_f
4435    }
4436
4437    /// If allele 1 will be counted (defaults to true).
4438    ///
4439    /// # Example
4440    /// ```
4441    /// use ndarray as nd;
4442    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4443    /// use bed_reader::assert_eq_nan;
4444    ///
4445    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4446    /// assert_eq!(read_options.is_a1_counted(), true);
4447    ///
4448    /// let file_name = sample_bed_file("small.bed")?;
4449    /// let mut bed = Bed::new(file_name)?;
4450    /// let val = bed.read_with_options(&read_options)?;
4451
4452    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4453    /// # use bed_reader::BedErrorPlus;
4454    /// # Ok::<(), Box<BedErrorPlus>>(())
4455    /// ```
4456    pub fn is_a1_counted(&self) -> bool {
4457        self.is_a1_counted
4458    }
4459
4460    /// Number of threads to be used (`None` means set with
4461    /// [Environment Variables](index.html#environment-variables) or use all processors).
4462    ///
4463    /// # Example
4464    /// ```
4465    /// use ndarray as nd;
4466    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4467    /// use bed_reader::assert_eq_nan;
4468    ///
4469    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4470    /// assert_eq!(read_options.num_threads(), None);
4471    ///
4472    /// let file_name = sample_bed_file("small.bed")?;
4473    /// let mut bed = Bed::new(file_name)?;
4474    /// let val = bed.read_with_options(&read_options)?;
4475
4476    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4477    /// # use bed_reader::BedErrorPlus;
4478    /// # Ok::<(), Box<BedErrorPlus>>(())
4479    /// ```
4480    pub fn num_threads(&self) -> Option<usize> {
4481        self.num_threads
4482    }
4483}
4484
4485impl<TVal: BedVal> ReadOptionsBuilder<TVal> {
4486    /// > See [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) for details and examples.
4487    pub fn read(&self, bed: &mut Bed) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4488        let read_options = self.build()?;
4489        bed.read_with_options(&read_options)
4490    }
4491
4492    /// Read genotype data from the cloud.
4493    ///
4494    /// > Also see
4495    /// > [`BedCloud::read_with_options`](struct.BedCloud.html#method.read_with_options).
4496    ///
4497    /// # Errors
4498    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4499    /// for all possible errors.
4500    ///
4501    /// # Example
4502    ///
4503    /// ```
4504    /// use ndarray as nd;
4505    /// use bed_reader::{BedCloud, ReadOptions};
4506    /// use bed_reader::assert_eq_nan;
4507    ///
4508    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4509    /// // Read the SNPs indexed by 2.
4510    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4511    /// let mut bed_cloud = BedCloud::new(&url).await?;
4512    /// let mut val = ReadOptions::builder()
4513    ///     .sid_index(2)
4514    ///     .read_cloud(&mut bed_cloud).await?;
4515    ///
4516    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4517    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4518    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4519    /// ```
4520    pub async fn read_cloud(
4521        &self,
4522        bed_cloud: &mut BedCloud,
4523    ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4524        let read_options = self.build()?;
4525        bed_cloud.read_with_options(&read_options).await
4526    }
4527
4528    /// Read genotype data into a preallocated array.
4529    ///
4530    /// > Also see [`Bed::read_and_fill`](struct.Bed.html#method.read_and_fill) and
4531    /// > [`Bed::read_and_fill_with_options`](struct.Bed.html#method.read_and_fill_with_options).
4532    ///
4533    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4534    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4535    /// are ignored. Instead, the order of the preallocated array is used.
4536    ///
4537    /// # Errors
4538    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4539    /// for all possible errors.
4540    ///
4541    /// # Example
4542    ///
4543    /// ```
4544    /// use ndarray as nd;
4545    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4546    /// use bed_reader::assert_eq_nan;
4547    ///
4548    /// // Read the SNPs indexed by 2.
4549    /// let file_name = sample_bed_file("small.bed")?;
4550    /// let mut bed = Bed::new(file_name)?;
4551    /// let mut val = nd::Array2::<f64>::default((3, 1));
4552    /// ReadOptions::builder()
4553    ///     .sid_index(2)
4554    ///     .read_and_fill(&mut bed, &mut val.view_mut())?;
4555    ///
4556    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4557    /// # use bed_reader::BedErrorPlus;
4558    /// # Ok::<(), Box<BedErrorPlus>>(())
4559    /// ```
4560    pub fn read_and_fill(
4561        &self,
4562        bed: &mut Bed,
4563        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4564    ) -> Result<(), Box<BedErrorPlus>> {
4565        let read_options = self.build()?;
4566        bed.read_and_fill_with_options(val, &read_options)
4567    }
4568
4569    /// Read genotype data from the cloud into a preallocated array.
4570    ///
4571    /// > Also see [`BedCloud::read_and_fill`](struct.BedCloud.html#method.read_and_fill) and
4572    /// > [`BedCloud::read_and_fill_with_options`](struct.BedCloud.html#method.read_and_fill_with_options).
4573    ///
4574    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4575    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4576    /// are ignored. Instead, the order of the preallocated array is used.
4577    ///
4578    /// # Errors
4579    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4580    /// for all possible errors.
4581    ///
4582    /// # Example
4583    ///
4584    /// ```
4585    /// use ndarray as nd;
4586    /// use bed_reader::{BedCloud, ReadOptions};
4587    /// use bed_reader::assert_eq_nan;
4588    ///
4589    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4590    /// // Read the SNPs indexed by 2.
4591    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4592    /// let mut bed_cloud = BedCloud::new(&url).await?;
4593    /// let mut val = nd::Array2::<f64>::default((3, 1));
4594    /// ReadOptions::builder()
4595    ///     .sid_index(2)
4596    ///     .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()).await?;
4597    ///
4598    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4599    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4600    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4601    /// ```    
4602    pub async fn read_and_fill_cloud(
4603        &self,
4604        bed_cloud: &mut BedCloud,
4605        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4606    ) -> Result<(), Box<BedErrorPlus>> {
4607        let read_options = self.build()?;
4608        bed_cloud
4609            .read_and_fill_with_options(val, &read_options)
4610            .await
4611    }
4612
4613    /// Order of the output array, Fortran-style (default)
4614    ///
4615    /// Also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4616    ///
4617    /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4618    pub fn f(&mut self) -> &mut Self {
4619        self.is_f(true);
4620        self
4621    }
4622
4623    /// Order of the output array, C (default)
4624    ///
4625    /// Also called "row-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4626    ///
4627    /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`f`](struct.ReadOptionsBuilder.html#method.f).
4628    pub fn c(&mut self) -> &mut Self {
4629        self.is_f(false);
4630        self
4631    }
4632
4633    /// Count the number allele 1 (default and PLINK standard).
4634    ///
4635    /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4636    ///
4637    /// # Example:
4638    /// ```
4639    /// use ndarray as nd;
4640    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4641    /// use bed_reader::assert_eq_nan;
4642    ///
4643    /// let file_name = sample_bed_file("small.bed")?;
4644    /// let mut bed = Bed::new(file_name)?;
4645    /// let val = ReadOptions::builder().count_a1().i8().read(&mut bed)?;
4646    ///
4647    /// assert_eq_nan(
4648    ///     &val,
4649    ///     &nd::array![
4650    ///         [1, 0, -127, 0],
4651    ///         [2, 0, -127, 2],
4652    ///         [0, 1, 2, 0]
4653    ///     ],
4654    /// );
4655    /// # use bed_reader::BedErrorPlus;
4656    /// # Ok::<(), Box<BedErrorPlus>>(())
4657    /// ```
4658    pub fn count_a1(&mut self) -> &mut Self {
4659        self.is_a1_counted = Some(true);
4660        self
4661    }
4662
4663    /// Count the number allele 2.
4664    ///
4665    /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1).
4666    ///
4667    /// # Example:
4668    /// ```
4669    /// use ndarray as nd;
4670    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4671    /// use bed_reader::assert_eq_nan;
4672    ///
4673    /// let file_name = sample_bed_file("small.bed")?;
4674    /// let mut bed = Bed::new(file_name)?;
4675    /// let val = ReadOptions::builder().count_a2().i8().read(&mut bed)?;
4676    ///
4677    /// assert_eq_nan(
4678    ///     &val,
4679    ///     &nd::array![
4680    ///         [1, 2, -127, 2],
4681    ///         [0, 2, -127, 0],
4682    ///         [2, 1, 0, 2]
4683    ///     ],
4684    /// );
4685    /// # use bed_reader::BedErrorPlus;
4686    /// # Ok::<(), Box<BedErrorPlus>>(())
4687    /// ```
4688    pub fn count_a2(&mut self) -> &mut Self {
4689        self.is_a1_counted = Some(false);
4690        self
4691    }
4692}
4693
4694impl ReadOptionsBuilder<i8> {
4695    /// Output an ndarray of i8.
4696    ///
4697    /// # Example:
4698    /// ```
4699    /// use ndarray as nd;
4700    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4701    /// use bed_reader::assert_eq_nan;
4702    ///
4703    /// let file_name = sample_bed_file("small.bed")?;
4704    /// let mut bed = Bed::new(file_name)?;
4705    /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4706    ///
4707    /// assert_eq_nan(
4708    ///     &val,
4709    ///     &nd::array![
4710    ///         [1, 0, -127, 0],
4711    ///         [2, 0, -127, 2],
4712    ///         [0, 1, 2, 0]
4713    ///     ],
4714    /// );
4715    /// # use bed_reader::BedErrorPlus;
4716    /// # Ok::<(), Box<BedErrorPlus>>(())
4717    /// ```
4718    pub fn i8(&mut self) -> &mut Self {
4719        self
4720    }
4721}
4722
4723impl ReadOptionsBuilder<f32> {
4724    /// Output an ndarray of f32.
4725    ///
4726    /// # Example:
4727    /// ```
4728    /// use ndarray as nd;
4729    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4730    /// use bed_reader::assert_eq_nan;
4731    ///
4732    /// let file_name = sample_bed_file("small.bed")?;
4733    /// let mut bed = Bed::new(file_name)?;
4734    /// let val = ReadOptions::builder().f32().read(&mut bed)?;
4735    ///
4736    /// assert_eq_nan(
4737    ///     &val,
4738    ///     &nd::array![
4739    ///         [1.0, 0.0, f32::NAN, 0.0],
4740    ///         [2.0, 0.0, f32::NAN, 2.0],
4741    ///         [0.0, 1.0, 2.0, 0.0]
4742    ///     ],
4743    /// );
4744    /// # use bed_reader::BedErrorPlus;
4745    /// # Ok::<(), Box<BedErrorPlus>>(())
4746    /// ```    
4747    pub fn f32(&mut self) -> &mut Self {
4748        self
4749    }
4750}
4751
4752impl ReadOptionsBuilder<f64> {
4753    /// Output an ndarray of f64.
4754    ///
4755    /// # Example:
4756    /// ```
4757    /// use ndarray as nd;
4758    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4759    /// use bed_reader::assert_eq_nan;
4760    ///
4761    /// let file_name = sample_bed_file("small.bed")?;
4762    /// let mut bed = Bed::new(file_name)?;
4763    /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4764    ///
4765    /// assert_eq_nan(
4766    ///     &val,
4767    ///     &nd::array![
4768    ///         [1.0, 0.0, f64::NAN, 0.0],
4769    ///         [2.0, 0.0, f64::NAN, 2.0],
4770    ///         [0.0, 1.0, 2.0, 0.0]
4771    ///     ],
4772    /// );
4773    /// # use bed_reader::BedErrorPlus;
4774    /// # Ok::<(), Box<BedErrorPlus>>(())
4775    /// ```    
4776    pub fn f64(&mut self) -> &mut Self {
4777        self
4778    }
4779}
4780
4781/// Represents options for writing genotype data and metadata to a PLINK .bed file.
4782///
4783/// Construct with [`WriteOptions::builder`](struct.WriteOptions.html#method.builder).
4784#[derive(Clone, Debug, Builder)]
4785#[builder(build_fn(skip))]
4786pub struct WriteOptions<TVal>
4787where
4788    TVal: BedVal,
4789{
4790    #[builder(setter(custom))]
4791    path: PathBuf,
4792
4793    #[builder(setter(custom))]
4794    fam_path: PathBuf,
4795
4796    #[builder(setter(custom))]
4797    bim_path: PathBuf,
4798
4799    #[builder(setter(custom))]
4800    metadata: Metadata,
4801
4802    #[builder(setter(custom), default = "true")]
4803    is_a1_counted: bool,
4804
4805    #[builder(default, setter(custom))]
4806    num_threads: Option<usize>,
4807
4808    #[builder(default = "TVal::missing()", setter(custom))]
4809    missing_value: TVal,
4810
4811    #[builder(setter(custom), default = "false")]
4812    skip_fam: bool,
4813
4814    #[builder(setter(custom), default = "false")]
4815    skip_bim: bool,
4816}
4817
4818impl<TVal> WriteOptions<TVal>
4819where
4820    TVal: BedVal,
4821{
4822    /// Write values to a file in PLINK .bed format. Supports metadata and options.
4823    ///
4824    /// > Also see [`Bed::write`](struct.Bed.html#method.write), which does not support metadata or options.
4825    ///
4826    /// The options, [listed here](struct.WriteOptionsBuilder.html#implementations), can specify the:
4827    ///  * items of metadata, for example the individual ids or the SNP ids
4828    ///  * a non-default path for the .fam and/or .bim files
4829    ///  * a non-default value that represents missing data
4830    ///  * whether the first allele is counted (default) or the second
4831    ///  * number of threads to use for writing
4832    ///  * a [`Metadata`](struct.Metadata.html)
4833    ///
4834    /// # Examples
4835    /// In this example, all metadata is given one item at a time.
4836    /// ```
4837    /// use ndarray as nd;
4838    /// use bed_reader::{Bed, WriteOptions};
4839    ///
4840    /// let output_folder = temp_testdir::TempDir::default();
4841    /// let output_file = output_folder.join("small.bed");
4842    /// let val = nd::array![
4843    ///     [1.0, 0.0, f64::NAN, 0.0],
4844    ///     [2.0, 0.0, f64::NAN, 2.0],
4845    ///     [0.0, 1.0, 2.0, 0.0]
4846    /// ];
4847    /// WriteOptions::builder(output_file)
4848    ///     .fid(["fid1", "fid1", "fid2"])
4849    ///     .iid(["iid1", "iid2", "iid3"])
4850    ///     .father(["iid23", "iid23", "iid22"])
4851    ///     .mother(["iid34", "iid34", "iid33"])
4852    ///     .sex([1, 2, 0])
4853    ///     .pheno(["red", "red", "blue"])
4854    ///     .chromosome(["1", "1", "5", "Y"])
4855    ///     .sid(["sid1", "sid2", "sid3", "sid4"])
4856    ///     .cm_position([100.4, 2000.5, 4000.7, 7000.9])
4857    ///     .bp_position([1, 100, 1000, 1004])
4858    ///     .allele_1(["A", "T", "A", "T"])
4859    ///     .allele_2(["A", "C", "C", "G"])
4860    ///     .write(&val)?;
4861    /// # use bed_reader::BedErrorPlus;
4862    /// # Ok::<(), Box<BedErrorPlus>>(())
4863    /// ```
4864    /// Here, no metadata is given, so default values are assigned.
4865    /// If we then read the new file and list the chromosome property,
4866    /// it is an array of zeros, the default chromosome value.
4867    /// ```
4868    /// # use ndarray as nd;
4869    /// # use bed_reader::{Bed, WriteOptions};
4870    /// # let output_folder = temp_testdir::TempDir::default();
4871    /// let output_file2 = output_folder.join("small2.bed");
4872    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
4873    ///
4874    /// WriteOptions::builder(&output_file2).write(&val)?;
4875    ///
4876    /// let mut bed2 = Bed::new(&output_file2)?;
4877    /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
4878    /// # use bed_reader::BedErrorPlus;
4879    /// # Ok::<(), Box<BedErrorPlus>>(())
4880    /// ```
4881    #[anyinput]
4882    pub fn builder(path: AnyPath) -> WriteOptionsBuilder<TVal> {
4883        WriteOptionsBuilder::new(path)
4884    }
4885
4886    /// Family id of each of individual (sample). Defaults to "0"'s
4887    ///
4888    /// # Example
4889    /// ```
4890    /// use ndarray as nd;
4891    /// use bed_reader::{WriteOptions};
4892    /// let output_folder = temp_testdir::TempDir::default();
4893    /// let output_file = output_folder.join("small.bed");
4894    /// let write_options = WriteOptions::builder(output_file)
4895    ///     .f64()
4896    ///     .iid(["i1", "i2", "i3"])
4897    ///     .sid(["s1", "s2", "s3", "s4"])
4898    ///     .build(3, 4)?;
4899    ///
4900    /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
4901    /// # use bed_reader::BedErrorPlus;
4902    /// # Ok::<(), Box<BedErrorPlus>>(())
4903    /// ```
4904    pub fn fid(&self) -> &nd::Array1<String> {
4905        // unwrap always works because the WriteOptions constructor fills all metadata.
4906        self.metadata.fid.as_ref().unwrap()
4907    }
4908
4909    /// Individual id of each of individual (sample). Defaults to "iid1", "iid2" ...
4910    ///
4911    /// # Example
4912    /// ```
4913    /// use ndarray as nd;
4914    /// use bed_reader::{Bed, WriteOptions};
4915    /// let output_folder = temp_testdir::TempDir::default();
4916    /// let output_file = output_folder.join("small.bed");
4917    /// let write_options = WriteOptions::builder(output_file)
4918    ///     .f64()
4919    ///     .iid(["i1", "i2", "i3"])
4920    ///     .sid(["s1", "s2", "s3", "s4"])
4921    ///     .build(3, 4)?;
4922    ///
4923    /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
4924    ///
4925    /// let val = nd::array![
4926    ///     [1.0, 0.0, f64::NAN, 0.0],
4927    ///     [2.0, 0.0, f64::NAN, 2.0],
4928    ///     [0.0, 1.0, 2.0, 0.0]
4929    /// ];
4930    /// Bed::write_with_options(&val, &write_options)?;
4931    /// # use bed_reader::BedErrorPlus;
4932    /// # Ok::<(), Box<BedErrorPlus>>(())
4933    /// ```
4934    pub fn iid(&self) -> &nd::Array1<String> {
4935        // unwrap always works because the WriteOptions constructor fills all metadata.
4936        self.metadata.iid.as_ref().unwrap()
4937    }
4938
4939    ///  Father id of each of individual (sample). Defaults to "0"'s
4940    ///
4941    /// # Example
4942    /// ```
4943    /// use ndarray as nd;
4944    /// use bed_reader::WriteOptions;
4945    /// let output_folder = temp_testdir::TempDir::default();
4946    /// let output_file = output_folder.join("small.bed");
4947    /// let write_options = WriteOptions::builder(output_file)
4948    ///     .f64()
4949    ///     .iid(["i1", "i2", "i3"])
4950    ///     .sid(["s1", "s2", "s3", "s4"])
4951    ///     .build(3, 4)?;
4952    ///
4953    /// println!("{0:?}", write_options.father()); // Outputs ndarray ["0", "0", "0"]
4954    /// # use bed_reader::BedErrorPlus;
4955    /// # Ok::<(), Box<BedErrorPlus>>(())
4956    /// ```
4957    pub fn father(&self) -> &nd::Array1<String> {
4958        // unwrap always works because the WriteOptions constructor fills all metadata.
4959        self.metadata.father.as_ref().unwrap()
4960    }
4961
4962    ///  Mother id of each of individual (sample). Defaults to "0"'s
4963    ///
4964    /// # Example
4965    /// ```
4966    /// use ndarray as nd;
4967    /// use bed_reader::WriteOptions;
4968    /// let output_folder = temp_testdir::TempDir::default();
4969    /// let output_file = output_folder.join("small.bed");
4970    /// let write_options = WriteOptions::builder(output_file)
4971    ///     .f64()
4972    ///     .iid(["i1", "i2", "i3"])
4973    ///     .sid(["s1", "s2", "s3", "s4"])
4974    ///     .build(3, 4)?;
4975    ///
4976    /// println!("{0:?}", write_options.mother()); // Outputs ndarray ["0", "0", "0"]
4977    /// # use bed_reader::BedErrorPlus;
4978    /// # Ok::<(), Box<BedErrorPlus>>(())
4979    /// ```
4980    pub fn mother(&self) -> &nd::Array1<String> {
4981        // unwrap always works because the WriteOptions constructor fills all metadata.
4982        self.metadata.mother.as_ref().unwrap()
4983    }
4984
4985    ///  Sex of each of individual (sample). Defaults to 0's
4986    ///
4987    /// 0 is unknown, 1 is male, 2 is female
4988    ///
4989    /// # Example
4990    /// ```
4991    /// use ndarray as nd;
4992    /// use bed_reader::WriteOptions;
4993    /// let output_folder = temp_testdir::TempDir::default();
4994    /// let output_file = output_folder.join("small.bed");
4995    /// let write_options = WriteOptions::builder(output_file)
4996    ///     .f64()
4997    ///     .iid(["i1", "i2", "i3"])
4998    ///     .sid(["s1", "s2", "s3", "s4"])
4999    ///     .build(3, 4)?;
5000    ///
5001    /// println!("{0:?}", write_options.sex()); // Outputs ndarray [0, 0, 0]
5002    /// # use bed_reader::BedErrorPlus;
5003    /// # Ok::<(), Box<BedErrorPlus>>(())
5004    /// ```
5005    pub fn sex(&self) -> &nd::Array1<i32> {
5006        // unwrap always works because the WriteOptions constructor fills all metadata.
5007        self.metadata.sex.as_ref().unwrap()
5008    }
5009
5010    ///  Phenotype of each of individual (sample). Seldom used. Defaults to 0's
5011    ///
5012    /// # Example
5013    /// ```
5014    /// use ndarray as nd;
5015    /// use bed_reader::WriteOptions;
5016    /// let output_folder = temp_testdir::TempDir::default();
5017    /// let output_file = output_folder.join("small.bed");
5018    /// let write_options = WriteOptions::builder(output_file)
5019    ///     .f64()
5020    ///     .iid(["i1", "i2", "i3"])
5021    ///     .sid(["s1", "s2", "s3", "s4"])
5022    ///     .build(3, 4)?;
5023    ///
5024    /// println!("{0:?}", write_options.pheno()); // Outputs ndarray ["0", "0", "0"]
5025    /// # use bed_reader::BedErrorPlus;
5026    /// # Ok::<(), Box<BedErrorPlus>>(())
5027    /// ```
5028    pub fn pheno(&self) -> &nd::Array1<String> {
5029        // unwrap always works because the WriteOptions constructor fills all metadata.
5030        self.metadata.pheno.as_ref().unwrap()
5031    }
5032
5033    ///  Chromosome of each of SNP (variant). Defaults to "0"'s
5034    ///
5035    /// # Example
5036    /// ```
5037    /// use ndarray as nd;
5038    /// use bed_reader::WriteOptions;
5039    /// let output_folder = temp_testdir::TempDir::default();
5040    /// let output_file = output_folder.join("small.bed");
5041    /// let write_options = WriteOptions::builder(output_file)
5042    ///     .f64()
5043    ///     .iid(["i1", "i2", "i3"])
5044    ///     .sid(["s1", "s2", "s3", "s4"])
5045    ///     .build(3, 4)?;
5046    ///
5047    /// println!("{0:?}", write_options.chromosome()); // Outputs ndarray ["0", "0", "0", "0"]
5048    /// # use bed_reader::BedErrorPlus;
5049    /// # Ok::<(), Box<BedErrorPlus>>(())
5050    /// ```
5051    pub fn chromosome(&self) -> &nd::Array1<String> {
5052        // unwrap always works because the WriteOptions constructor fills all metadata.
5053        self.metadata.chromosome.as_ref().unwrap()
5054    }
5055
5056    ///  SNP id of each of SNP (variant). Defaults to "sid1", "sid2", ...
5057    ///
5058    /// # Example
5059    /// ```
5060    /// use ndarray as nd;
5061    /// use bed_reader::{Bed, WriteOptions};
5062    /// let output_folder = temp_testdir::TempDir::default();
5063    /// let output_file = output_folder.join("small.bed");
5064    /// let write_options = WriteOptions::builder(output_file)
5065    ///     .f64()
5066    ///     .iid(["i1", "i2", "i3"])
5067    ///     .sid(["s1", "s2", "s3", "s4"])
5068    ///     .build(3, 4)?;
5069    ///
5070    /// println!("{0:?}", write_options.sid()); // Outputs ndarray ["s1", "s2", "s3", "s4"]
5071    ///
5072    /// let val = nd::array![
5073    ///     [1.0, 0.0, f64::NAN, 0.0],
5074    ///     [2.0, 0.0, f64::NAN, 2.0],
5075    ///     [0.0, 1.0, 2.0, 0.0]
5076    /// ];
5077    /// Bed::write_with_options(&val, &write_options)?;
5078    /// # use bed_reader::BedErrorPlus;
5079    /// # Ok::<(), Box<BedErrorPlus>>(())
5080    /// ```
5081    pub fn sid(&self) -> &nd::Array1<String> {
5082        // unwrap always works because the WriteOptions constructor fills all metadata.
5083        self.metadata.sid.as_ref().unwrap()
5084    }
5085
5086    /// Centimorgan position of each SNP (variant). Defaults to 0.0's.
5087    ///
5088    /// # Example
5089    /// ```
5090    /// use ndarray as nd;
5091    /// use bed_reader::WriteOptions;
5092    /// let output_folder = temp_testdir::TempDir::default();
5093    /// let output_file = output_folder.join("small.bed");
5094    /// let write_options = WriteOptions::builder(output_file)
5095    ///     .f64()
5096    ///     .iid(["i1", "i2", "i3"])
5097    ///     .sid(["s1", "s2", "s3", "s4"])
5098    ///     .build(3, 4)?;
5099    ///
5100    /// println!("{0:?}", write_options.cm_position()); // Outputs ndarray [0.0, 0.0, 0.0, 0.0]
5101    /// # use bed_reader::BedErrorPlus;
5102    /// # Ok::<(), Box<BedErrorPlus>>(())
5103    /// ```
5104    pub fn cm_position(&self) -> &nd::Array1<f32> {
5105        // unwrap always works because the WriteOptions constructor fills all metadata.
5106        self.metadata.cm_position.as_ref().unwrap()
5107    }
5108
5109    /// Base-pair position of each SNP (variant). Defaults to 0's.
5110    ///
5111    /// # Example
5112    /// ```
5113    /// use ndarray as nd;
5114    /// use bed_reader::{Bed, WriteOptions};
5115    /// let output_folder = temp_testdir::TempDir::default();
5116    /// let output_file = output_folder.join("small.bed");
5117    /// let write_options = WriteOptions::builder(output_file)
5118    ///     .f64()
5119    ///     .iid(["i1", "i2", "i3"])
5120    ///     .sid(["s1", "s2", "s3", "s4"])
5121    ///     .build(3, 4)?;
5122    ///
5123    /// println!("{0:?}", write_options.bp_position()); // Outputs ndarray [0, 0, 0, 0]
5124    /// # use bed_reader::BedErrorPlus;
5125    /// # Ok::<(), Box<BedErrorPlus>>(())
5126    /// ```
5127    pub fn bp_position(&self) -> &nd::Array1<i32> {
5128        // unwrap always works because the WriteOptions constructor fills all metadata.
5129        self.metadata.bp_position.as_ref().unwrap()
5130    }
5131
5132    /// First allele of each SNP (variant). Defaults to "A1"
5133    ///
5134    /// # Example
5135    /// ```
5136    /// use ndarray as nd;
5137    /// use bed_reader::{Bed, WriteOptions};
5138    /// let output_folder = temp_testdir::TempDir::default();
5139    /// let output_file = output_folder.join("small.bed");
5140    /// let write_options = WriteOptions::builder(output_file)
5141    ///     .f64()
5142    ///     .iid(["i1", "i2", "i3"])
5143    ///     .sid(["s1", "s2", "s3", "s4"])
5144    ///     .build(3, 4)?;
5145    ///
5146    /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5147    /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5148    /// # use bed_reader::BedErrorPlus;
5149    /// # Ok::<(), Box<BedErrorPlus>>(())
5150    /// ```
5151    pub fn allele_1(&self) -> &nd::Array1<String> {
5152        // unwrap always works because the WriteOptions constructor fills all metadata.
5153        self.metadata.allele_1.as_ref().unwrap()
5154    }
5155
5156    /// Second allele of each SNP (variant). Defaults to "A2"
5157    ///
5158    /// # Example
5159    /// ```
5160    /// use ndarray as nd;
5161    /// use bed_reader::{Bed, WriteOptions};
5162    /// let output_folder = temp_testdir::TempDir::default();
5163    /// let output_file = output_folder.join("small.bed");
5164    /// let write_options = WriteOptions::builder(output_file)
5165    ///     .f64()
5166    ///     .iid(["i1", "i2", "i3"])
5167    ///     .sid(["s1", "s2", "s3", "s4"])
5168    ///     .build(3, 4)?;
5169    ///
5170    /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5171    /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5172    /// # use bed_reader::BedErrorPlus;
5173    /// # Ok::<(), Box<BedErrorPlus>>(())
5174    /// ```
5175    pub fn allele_2(&self) -> &nd::Array1<String> {
5176        // unwrap always works because the WriteOptions constructor fills all metadata.
5177        self.metadata.allele_2.as_ref().unwrap()
5178    }
5179
5180    /// [`Metadata`](struct.Metadata.html) for this [`WriteOptions`](struct.WriteOptions.html), for example, the individual (sample) Ids.
5181    ///
5182    /// This returns a struct with 12 fields. Each field is a ndarray.
5183    /// The struct will always be new, but the 12 ndarrays will be
5184    /// shared with this [`WriteOptions`](struct.WriteOptions.html).
5185    ///
5186    /// If the needed, default values will be used.
5187    ///
5188    /// # Example
5189    /// ```
5190    /// use ndarray as nd;
5191    /// use bed_reader::{Bed, WriteOptions};
5192    /// let output_folder = temp_testdir::TempDir::default();
5193    /// let output_file = output_folder.join("small.bed");
5194    /// let write_options = WriteOptions::builder(output_file)
5195    ///     .f64()
5196    ///     .iid(["i1", "i2", "i3"])
5197    ///     .sid(["s1", "s2", "s3", "s4"])
5198    ///     .build(3, 4)?;
5199    ///
5200    /// let metadata = write_options.metadata();
5201    /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"])
5202    /// # use bed_reader::BedErrorPlus;
5203    /// # Ok::<(), Box<BedErrorPlus>>(())
5204    /// ```
5205    pub fn metadata(&self) -> Metadata {
5206        self.metadata.clone()
5207    }
5208
5209    /// The number of individuals (samples)
5210    ///
5211    /// # Example
5212    /// ```
5213    /// use ndarray as nd;
5214    /// use bed_reader::{Bed, WriteOptions};
5215    /// let output_folder = temp_testdir::TempDir::default();
5216    /// let output_file = output_folder.join("small.bed");
5217    /// let write_options = WriteOptions::builder(output_file)
5218    ///     .f64()
5219    ///     .iid(["i1", "i2", "i3"])
5220    ///     .sid(["s1", "s2", "s3", "s4"])
5221    ///     .build(3, 4)?;
5222    ///
5223    /// assert_eq!(write_options.iid_count(), 3);
5224    /// assert_eq!(write_options.sid_count(), 4);
5225    /// # use bed_reader::BedErrorPlus;
5226    /// # Ok::<(), Box<BedErrorPlus>>(())
5227    /// ```
5228    pub fn iid_count(&self) -> usize {
5229        self.iid().len()
5230    }
5231
5232    /// The number of SNPs (variants)
5233    ///
5234    /// # Example
5235    /// ```
5236    /// use ndarray as nd;
5237    /// use bed_reader::{Bed, WriteOptions};
5238    /// let output_folder = temp_testdir::TempDir::default();
5239    /// let output_file = output_folder.join("small.bed");
5240    /// let write_options = WriteOptions::builder(output_file)
5241    ///     .f64()
5242    ///     .iid(["i1", "i2", "i3"])
5243    ///     .sid(["s1", "s2", "s3", "s4"])
5244    ///     .build(3, 4)?;
5245    ///
5246    /// assert_eq!(write_options.iid_count(), 3);
5247    /// assert_eq!(write_options.sid_count(), 4);
5248    /// # use bed_reader::BedErrorPlus;
5249    /// # Ok::<(), Box<BedErrorPlus>>(())
5250    /// ```
5251    pub fn sid_count(&self) -> usize {
5252        self.sid().len()
5253    }
5254
5255    /// Number of individuals (samples) and SNPs (variants)
5256    ///
5257    /// # Example
5258    /// ```
5259    /// use ndarray as nd;
5260    /// use bed_reader::{Bed, WriteOptions};
5261    /// let output_folder = temp_testdir::TempDir::default();
5262    /// let output_file = output_folder.join("small.bed");
5263    /// let write_options = WriteOptions::builder(output_file)
5264    ///     .f64()
5265    ///     .iid(["i1", "i2", "i3"])
5266    ///     .sid(["s1", "s2", "s3", "s4"])
5267    ///     .build(3, 4)?;
5268    ///
5269    /// assert_eq!(write_options.dim(), (3, 4));
5270    /// # use bed_reader::BedErrorPlus;
5271    /// # Ok::<(), Box<BedErrorPlus>>(())
5272    /// ```
5273    pub fn dim(&self) -> (usize, usize) {
5274        (self.iid_count(), self.sid_count())
5275    }
5276
5277    /// Path to .bed file.
5278    ///
5279    /// # Example
5280    /// ```
5281    /// use ndarray as nd;
5282    /// use bed_reader::{Bed, WriteOptions};
5283    /// let output_folder = temp_testdir::TempDir::default();
5284    /// let output_file = output_folder.join("small.bed");
5285    /// let write_options = WriteOptions::builder(output_file)
5286    ///     .f64()
5287    ///     .iid(["i1", "i2", "i3"])
5288    ///     .sid(["s1", "s2", "s3", "s4"])
5289    ///     .build(3, 4)?;
5290    ///
5291    /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5292    /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5293    /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5294    /// # use bed_reader::BedErrorPlus;
5295    /// # Ok::<(), Box<BedErrorPlus>>(())
5296    /// ```
5297    pub fn path(&self) -> &PathBuf {
5298        &self.path
5299    }
5300
5301    /// Path to .fam file.
5302    ///
5303    /// # Example
5304    /// ```
5305    /// use ndarray as nd;
5306    /// use bed_reader::{Bed, WriteOptions};
5307    /// let output_folder = temp_testdir::TempDir::default();
5308    /// let output_file = output_folder.join("small.bed");
5309    /// let write_options = WriteOptions::builder(output_file)
5310    ///     .f64()
5311    ///     .iid(["i1", "i2", "i3"])
5312    ///     .sid(["s1", "s2", "s3", "s4"])
5313    ///     .build(3, 4)?;
5314    ///
5315    /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5316    /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5317    /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5318    /// # use bed_reader::BedErrorPlus;
5319    /// # Ok::<(), Box<BedErrorPlus>>(())
5320    /// ```
5321    pub fn fam_path(&self) -> &PathBuf {
5322        &self.fam_path
5323    }
5324
5325    /// Path to .bim file.
5326    ///
5327    /// # Example
5328    /// ```
5329    /// use ndarray as nd;
5330    /// use bed_reader::{Bed, WriteOptions};
5331    /// let output_folder = temp_testdir::TempDir::default();
5332    /// let output_file = output_folder.join("small.bed");
5333    /// let write_options = WriteOptions::builder(output_file)
5334    ///     .f64()
5335    ///     .iid(["i1", "i2", "i3"])
5336    ///     .sid(["s1", "s2", "s3", "s4"])
5337    ///     .build(3, 4)?;
5338    ///
5339    /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5340    /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5341    /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5342    /// # use bed_reader::BedErrorPlus;
5343    /// # Ok::<(), Box<BedErrorPlus>>(())
5344    /// ```
5345    pub fn bim_path(&self) -> &PathBuf {
5346        &self.bim_path
5347    }
5348
5349    /// If allele 1 will be counted (defaults to true).
5350    ///
5351    /// # Example
5352    /// ```
5353    /// use ndarray as nd;
5354    /// use bed_reader::{Bed, WriteOptions};
5355    /// let output_folder = temp_testdir::TempDir::default();
5356    /// let output_file = output_folder.join("small.bed");
5357    /// let write_options = WriteOptions::builder(output_file)
5358    ///     .i8()
5359    ///     .iid(["i1", "i2", "i3"])
5360    ///     .sid(["s1", "s2", "s3", "s4"])
5361    ///     .build(3, 4)?;
5362    ///
5363    /// assert!(write_options.is_a1_counted());
5364    /// # use bed_reader::BedErrorPlus;
5365    /// # Ok::<(), Box<BedErrorPlus>>(())
5366    /// ```
5367    pub fn is_a1_counted(&self) -> bool {
5368        self.is_a1_counted
5369    }
5370
5371    /// Number of threads to be used (`None` means set with
5372    /// [Environment Variables](index.html#environment-variables) or use all processors).
5373    ///
5374    /// # Example
5375    /// ```
5376    /// use ndarray as nd;
5377    /// use bed_reader::{Bed, WriteOptions};
5378    /// let output_folder = temp_testdir::TempDir::default();
5379    /// let output_file = output_folder.join("small.bed");
5380    /// let write_options = WriteOptions::builder(output_file)
5381    ///     .i8()
5382    ///     .iid(["i1", "i2", "i3"])
5383    ///     .sid(["s1", "s2", "s3", "s4"])
5384    ///     .build(3, 4)?;
5385    ///
5386    /// assert!(write_options.num_threads().is_none());
5387    /// # use bed_reader::BedErrorPlus;
5388    /// # Ok::<(), Box<BedErrorPlus>>(())
5389    /// ```
5390    pub fn num_threads(&self) -> Option<usize> {
5391        self.num_threads
5392    }
5393
5394    /// Value to be used for missing values (defaults to -127 or NaN).
5395    ///
5396    /// # Example
5397    /// ```
5398    /// use ndarray as nd;
5399    /// use bed_reader::{Bed, WriteOptions};
5400    /// let output_folder = temp_testdir::TempDir::default();
5401    /// let output_file = output_folder.join("small.bed");
5402    /// let write_options = WriteOptions::builder(output_file)
5403    ///     .i8()
5404    ///     .iid(["i1", "i2", "i3"])
5405    ///     .sid(["s1", "s2", "s3", "s4"])
5406    ///     .build(3, 4)?;
5407    ///
5408    /// assert!(write_options.missing_value() == -127);
5409    /// # use bed_reader::BedErrorPlus;
5410    /// # Ok::<(), Box<BedErrorPlus>>(())
5411    /// ```
5412    pub fn missing_value(&self) -> TVal {
5413        self.missing_value
5414    }
5415
5416    /// If skipping writing .fam file.
5417    ///
5418    /// # Example
5419    /// ```
5420    /// use ndarray as nd;
5421    /// use bed_reader::{Bed, WriteOptions};
5422    /// let output_folder = temp_testdir::TempDir::default();
5423    /// let output_file = output_folder.join("small.bed");
5424    /// let write_options = WriteOptions::builder(output_file)
5425    ///     .i8()
5426    ///     .skip_fam()
5427    ///     .skip_bim()
5428    ///     .build(3, 4)?;
5429    /// assert!(write_options.skip_fam());
5430    /// assert!(write_options.skip_bim());
5431    /// # use bed_reader::BedErrorPlus;
5432    /// # Ok::<(), Box<BedErrorPlus>>(())
5433    /// ```
5434    pub fn skip_fam(&self) -> bool {
5435        self.skip_fam
5436    }
5437
5438    /// If skipping writing .bim file.
5439    ///
5440    /// # Example
5441    /// ```
5442    /// use ndarray as nd;
5443    /// use bed_reader::{Bed, WriteOptions};
5444    /// let output_folder = temp_testdir::TempDir::default();
5445    /// let output_file = output_folder.join("small.bed");
5446    /// let write_options = WriteOptions::builder(output_file)
5447    ///     .i8()
5448    ///     .skip_fam()
5449    ///     .skip_bim()
5450    ///     .build(3, 4)?;
5451    /// assert!(write_options.skip_fam());
5452    /// assert!(write_options.skip_bim());
5453    /// # use bed_reader::BedErrorPlus;
5454    /// # Ok::<(), Box<BedErrorPlus>>(())
5455    /// ```
5456    pub fn skip_bim(&self) -> bool {
5457        self.skip_bim
5458    }
5459}
5460
5461impl<TVal> WriteOptionsBuilder<TVal>
5462where
5463    TVal: BedVal,
5464{
5465    /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given and then writes a .bed (and .fam and .bim) file.
5466    ///
5467    /// See [`WriteOptions`](struct.WriteOptions.html) for details and examples.
5468    pub fn write<S: nd::Data<Elem = TVal>>(
5469        &mut self,
5470        val: &nd::ArrayBase<S, nd::Ix2>,
5471    ) -> Result<(), Box<BedErrorPlus>> {
5472        let (iid_count, sid_count) = val.dim();
5473        let write_options = self.build(iid_count, sid_count)?;
5474        Bed::write_with_options(val, &write_options)?;
5475
5476        Ok(())
5477    }
5478
5479    /// Set the family id (fid) values for each individual (sample).
5480    ///
5481    /// Defaults to zeros.
5482    ///
5483    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5484    ///
5485    #[anyinput]
5486    #[must_use]
5487    pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
5488        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5489        self.metadata.as_mut().unwrap().set_fid(fid);
5490        self
5491    }
5492
5493    /// Set the individual id (iid) values for each individual (sample).
5494    ///
5495    /// Defaults to "iid1", "iid2", ...
5496    ///
5497    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5498    ///
5499    #[anyinput]
5500    #[must_use]
5501    pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
5502        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5503        self.metadata.as_mut().unwrap().set_iid(iid);
5504        self
5505    }
5506
5507    /// Set the father id values for each individual (sample).
5508    ///
5509    /// Defaults to zeros.
5510    ///
5511    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5512    ///
5513    #[anyinput]
5514    #[must_use]
5515    pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
5516        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5517        self.metadata.as_mut().unwrap().set_father(father);
5518        self
5519    }
5520
5521    /// Set the mother id values for each individual (sample).
5522    ///
5523    /// Defaults to zeros.
5524    ///
5525    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5526    ///
5527    #[anyinput]
5528    #[must_use]
5529    pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
5530        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5531        self.metadata.as_mut().unwrap().set_mother(mother);
5532        self
5533    }
5534
5535    /// Set the sex for each individual (sample).
5536    ///
5537    /// 0 is unknown (default), 1 is male, 2 is female
5538    #[anyinput]
5539    #[must_use]
5540    pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
5541        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5542        self.metadata.as_mut().unwrap().set_sex(sex);
5543        self
5544    }
5545
5546    /// Set a phenotype for each individual (sample). Seldom used.
5547    ///
5548    /// Defaults to zeros.
5549    ///
5550    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5551    ///
5552    #[anyinput]
5553    #[must_use]
5554    pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
5555        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5556        self.metadata.as_mut().unwrap().set_pheno(pheno);
5557        self
5558    }
5559
5560    /// Set the chromosome for each SNP (variant).
5561    ///
5562    /// Defaults to zeros.
5563    #[anyinput]
5564    #[must_use]
5565    pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
5566        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5567        self.metadata.as_mut().unwrap().set_chromosome(chromosome);
5568        self
5569    }
5570
5571    /// Set the SNP id (sid) for each SNP (variant).
5572    ///
5573    /// Defaults to "sid1", "sid2", ...
5574    ///
5575    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5576    ///
5577    #[anyinput]
5578    #[must_use]
5579    pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
5580        self.metadata.as_mut().unwrap().set_sid(sid);
5581        self
5582    }
5583
5584    /// Set the centimorgan position for each SNP (variant).
5585    ///
5586    /// Defaults to zeros.
5587    #[anyinput]
5588    #[must_use]
5589    pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
5590        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5591        self.metadata.as_mut().unwrap().set_cm_position(cm_position);
5592        self
5593    }
5594
5595    /// Set the base-pair position for each SNP (variant).
5596    ///
5597    /// Defaults to zeros.
5598    ///
5599    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5600    ///
5601    #[anyinput]
5602    #[must_use]
5603    pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
5604        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5605        self.metadata.as_mut().unwrap().set_bp_position(bp_position);
5606        self
5607    }
5608
5609    /// Set the first allele for each SNP (variant).
5610    ///
5611    /// Defaults to "A1", A1" ...
5612    ///
5613    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5614    ///
5615    #[anyinput]
5616    #[must_use]
5617    pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
5618        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5619        self.metadata.as_mut().unwrap().set_allele_1(allele_1);
5620        self
5621    }
5622
5623    /// Set the second allele for each SNP (variant).
5624    ///
5625    /// Defaults to "A2", A2" ...
5626    ///
5627    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5628    ///
5629    #[anyinput]
5630    #[must_use]
5631    pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
5632        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5633        self.metadata.as_mut().unwrap().set_allele_2(allele_2);
5634        self
5635    }
5636
5637    /// Merge metadata from a [`Metadata`](struct.Metadata.html).
5638    ///
5639    /// If a field is set in both [`Metadata`](struct.Metadata.html)'s,
5640    /// it will be overridden.
5641    ///
5642    /// # Example
5643    ///
5644    /// Extract metadata from a file.
5645    /// Create a random file with the same metadata.
5646    /// ```
5647    /// use ndarray as nd;
5648    /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5649    /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5650    ///
5651    /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5652    /// let metadata = bed.metadata()?;
5653    /// let shape = bed.dim()?;
5654    ///
5655    /// let mut rng = StdRng::seed_from_u64(0);
5656    /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5657    ///
5658    /// let temp_out = temp_testdir::TempDir::default();
5659    /// let output_file = temp_out.join("random.bed");
5660    /// WriteOptions::builder(output_file)
5661    ///     .metadata(&metadata)
5662    ///     .missing_value(-1)
5663    ///     .write(&val)?;
5664    /// # use bed_reader::BedErrorPlus;
5665    /// # Ok::<(), Box<BedErrorPlus>>(())
5666    /// ```
5667    #[must_use]
5668    pub fn metadata(mut self, metadata: &Metadata) -> Self {
5669        self.metadata = Some(
5670            Metadata::builder()
5671                .metadata(&self.metadata.unwrap()) // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5672                .metadata(metadata)
5673                .build_no_file_check() // Don't need to check consistent counts here. Builder will do it.
5674                .unwrap(), // Unwrap will always work nothing can go wrong
5675        );
5676        self
5677    }
5678
5679    /// Set the path to the .fam file.
5680    ///
5681    /// If not set, the .fam file will be assumed
5682    /// to have the same name as the .bed file, but with the extension .fam.
5683    ///
5684    /// # Example:
5685    /// Write .bed, .fam, and .bim files with non-standard names.
5686    /// ```
5687    /// use ndarray as nd;
5688    /// use bed_reader::WriteOptions;
5689    /// let output_folder = temp_testdir::TempDir::default();
5690    /// let output_file = output_folder.join("small.deb");
5691    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5692
5693    /// WriteOptions::builder(output_file)
5694    ///     .fam_path(output_folder.join("small.maf"))
5695    ///     .bim_path(output_folder.join("small.mib"))
5696    ///     .write(&val)?;
5697    /// # use bed_reader::BedErrorPlus;
5698    /// # Ok::<(), Box<BedErrorPlus>>(())
5699    /// ```
5700    #[anyinput]
5701    #[must_use]
5702    pub fn fam_path(mut self, path: AnyPath) -> Self {
5703        self.fam_path = Some(path.to_owned());
5704        self
5705    }
5706
5707    /// Set the path to the .bim file.
5708    ///
5709    /// If not set, the .bim file will be assumed
5710    /// to have the same name as the .bed file, but with the extension .bim.
5711    ///
5712    /// # Example:
5713    /// Write .bed, .fam, and .bim files with non-standard names.
5714    /// ```
5715    /// use ndarray as nd;
5716    /// use bed_reader::{WriteOptions};
5717    /// let output_folder = temp_testdir::TempDir::default();
5718    /// let output_file = output_folder.join("small.deb");
5719    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5720
5721    /// WriteOptions::builder(output_file)
5722    ///     .fam_path(output_folder.join("small.maf"))
5723    ///     .bim_path(output_folder.join("small.mib"))
5724    ///     .write(&val)?;
5725    /// # use bed_reader::BedErrorPlus;
5726    /// # Ok::<(), Box<BedErrorPlus>>(())
5727    /// ```
5728    #[anyinput]
5729    #[must_use]
5730    pub fn bim_path(mut self, path: AnyPath) -> Self {
5731        self.bim_path = Some(path.to_owned());
5732        self
5733    }
5734
5735    /// Value used for missing values (defaults to -127 or NaN)
5736    ///
5737    /// -127 is the default for i8 and NaN is the default for f32 and f64.
5738    ///
5739    /// # Example
5740    ///
5741    /// Extract metadata from a file.
5742    /// Create a random file with the same metadata.
5743    /// ```
5744    /// use ndarray as nd;
5745    /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5746    /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5747    ///
5748    /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5749    /// let metadata = bed.metadata()?;
5750    /// let shape = bed.dim()?;
5751    ///
5752    /// let mut rng = StdRng::seed_from_u64(0);
5753    /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5754    ///
5755    /// let temp_out = temp_testdir::TempDir::default();
5756    /// let output_file = temp_out.join("random.bed");
5757    /// WriteOptions::builder(output_file)
5758    ///     .metadata(&metadata)
5759    ///     .missing_value(-1)
5760    ///     .write(&val)?;
5761    /// # use bed_reader::BedErrorPlus;
5762    /// # Ok::<(), Box<BedErrorPlus>>(())
5763    /// ```
5764    pub fn missing_value(&mut self, missing_value: TVal) -> &mut Self {
5765        self.missing_value = Some(missing_value);
5766        self
5767    }
5768
5769    /// Count the number allele 1 (default and PLINK standard).
5770    ///
5771    /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).
5772    pub fn count_a1(&mut self) -> &mut Self {
5773        self.is_a1_counted = Some(true);
5774        self
5775    }
5776
5777    /// Count the number allele 2.
5778    ///
5779    /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1).
5780    pub fn count_a2(&mut self) -> &mut Self {
5781        self.is_a1_counted = Some(false);
5782        self
5783    }
5784
5785    /// Sets if allele 1 is counted. Default is true.
5786    ///
5787    /// Also see [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).    
5788    pub fn is_a1_counted(&mut self, is_a1_counted: bool) -> &mut Self {
5789        self.is_a1_counted = Some(is_a1_counted);
5790        self
5791    }
5792
5793    /// Number of threads to use (defaults to all processors)
5794    ///
5795    /// Can also be set with an environment variable.
5796    /// See [Environment Variables](index.html#environment-variables).
5797    ///
5798    ///
5799    /// # Example:
5800    ///
5801    /// Write using only one thread.
5802    /// ```
5803    /// use ndarray as nd;
5804    /// use bed_reader::WriteOptions;
5805    /// let output_folder = temp_testdir::TempDir::default();
5806    /// let output_file = output_folder.join("small.bed");
5807    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5808
5809    /// WriteOptions::builder(output_file)
5810    ///     .num_threads(1)
5811    ///     .write(&val)?;
5812    /// # use bed_reader::BedErrorPlus;
5813    /// # Ok::<(), Box<BedErrorPlus>>(())
5814    /// ```
5815    pub fn num_threads(&mut self, num_threads: usize) -> &mut Self {
5816        self.num_threads = Some(Some(num_threads));
5817        self
5818    }
5819
5820    /// Skip writing .fam file.
5821    ///
5822    /// # Example
5823    /// ```
5824    /// use ndarray as nd;
5825    /// use bed_reader::{Bed, WriteOptions};
5826    /// let output_folder = temp_testdir::TempDir::default();
5827    /// let output_file = output_folder.join("small.bed");
5828    /// let write_options = WriteOptions::builder(output_file)
5829    ///     .i8()
5830    ///     .skip_fam()
5831    ///     .skip_bim()
5832    ///     .build(3, 4)?;
5833    /// assert!(write_options.skip_fam());
5834    /// assert!(write_options.skip_bim());
5835    /// # use bed_reader::BedErrorPlus;
5836    /// # Ok::<(), Box<BedErrorPlus>>(())
5837    /// ```
5838    pub fn skip_fam(&mut self) -> &mut Self {
5839        self.skip_fam = Some(true);
5840        self
5841    }
5842
5843    /// Skip writing .bim file.
5844    ///
5845    /// # Example
5846    /// ```
5847    /// use ndarray as nd;
5848    /// use bed_reader::{Bed, WriteOptions};
5849    /// let output_folder = temp_testdir::TempDir::default();
5850    /// let output_file = output_folder.join("small.bed");
5851    /// let write_options = WriteOptions::builder(output_file)
5852    ///     .i8()
5853    ///     .skip_fam()
5854    ///     .skip_bim()
5855    ///     .build(3, 4)?;
5856    /// assert!(write_options.skip_fam());
5857    /// assert!(write_options.skip_bim());
5858    /// # use bed_reader::BedErrorPlus;
5859    /// # Ok::<(), Box<BedErrorPlus>>(())
5860    /// ```
5861    pub fn skip_bim(&mut self) -> &mut Self {
5862        self.skip_bim = Some(true);
5863        self
5864    }
5865
5866    /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given.
5867    ///
5868    /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
5869    /// > a [`WriteOptions`](struct.WriteOptions.html) and writes to file in one step.
5870    ///
5871    /// # Example
5872    /// Create a new [`WriteOptions`](struct.WriteOptions.html) with some given values and some
5873    /// default values. Then use it to write a .bed file.
5874    /// ```
5875    /// use ndarray as nd;
5876    /// use bed_reader::{WriteOptions, Bed};
5877    ///
5878    /// let output_folder = temp_testdir::TempDir::default();
5879    /// let output_file = output_folder.join("small.bed");
5880    /// let write_options = WriteOptions::builder(output_file)
5881    ///     .f64()
5882    ///     .iid(["i1", "i2", "i3"])
5883    ///     .sid(["s1", "s2", "s3", "s4"])
5884    ///     .build(3, 4)?;
5885    /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
5886    /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
5887    ///
5888    /// let val = nd::array![
5889    ///     [1.0, 0.0, f64::NAN, 0.0],
5890    ///     [2.0, 0.0, f64::NAN, 2.0],
5891    ///     [0.0, 1.0, 2.0, 0.0]
5892    /// ];
5893    /// Bed::write_with_options(&val, &write_options)?;
5894    /// # use bed_reader::BedErrorPlus;
5895    /// # Ok::<(), Box<BedErrorPlus>>(())
5896    /// ```
5897    pub fn build(
5898        &self,
5899        iid_count: usize,
5900        sid_count: usize,
5901    ) -> Result<WriteOptions<TVal>, Box<BedErrorPlus>> {
5902        let Some(path) = self.path.as_ref() else {
5903            Err(BedError::UninitializedField("path"))?
5904        };
5905
5906        // unwrap always works because the metadata builder always initializes metadata
5907        let metadata = self.metadata.as_ref().unwrap();
5908        let metadata = metadata.fill(iid_count, sid_count)?;
5909
5910        let write_options = WriteOptions {
5911            path: path.to_owned(),
5912            fam_path: to_metadata_path(path, &self.fam_path, "fam"),
5913            bim_path: to_metadata_path(path, &self.bim_path, "bim"),
5914            is_a1_counted: self.is_a1_counted.unwrap_or(true),
5915            num_threads: self.num_threads.unwrap_or(None),
5916            missing_value: self.missing_value.unwrap_or_else(|| TVal::missing()),
5917            skip_fam: self.skip_fam.unwrap_or(false),
5918            skip_bim: self.skip_bim.unwrap_or(false),
5919
5920            metadata,
5921        };
5922        Ok(write_options)
5923    }
5924
5925    #[anyinput]
5926    fn new(path: AnyPath) -> Self {
5927        Self {
5928            path: Some(path.to_owned()),
5929            fam_path: None,
5930            bim_path: None,
5931
5932            metadata: Some(Metadata::new()),
5933
5934            is_a1_counted: None,
5935            num_threads: None,
5936            missing_value: None,
5937            skip_fam: None,
5938            skip_bim: None,
5939        }
5940    }
5941}
5942
5943trait FromStringArray<T> {
5944    #[allow(dead_code)]
5945    fn from_string_array(
5946        string_array: nd::Array1<String>,
5947    ) -> Result<nd::Array1<Self>, Box<BedErrorPlus>>
5948    where
5949        Self: Sized;
5950}
5951
5952impl FromStringArray<String> for String {
5953    fn from_string_array(
5954        string_array: nd::Array1<String>,
5955    ) -> Result<nd::Array1<String>, Box<BedErrorPlus>> {
5956        Ok(string_array)
5957    }
5958}
5959
5960impl FromStringArray<f32> for f32 {
5961    fn from_string_array(
5962        string_array: nd::Array1<String>,
5963    ) -> Result<nd::Array1<f32>, Box<BedErrorPlus>> {
5964        let result = string_array
5965            .iter()
5966            .map(|s| s.parse::<f32>())
5967            .collect::<Result<nd::Array1<f32>, _>>();
5968        match result {
5969            Ok(array) => Ok(array),
5970            Err(e) => Err(Box::new(BedErrorPlus::ParseFloatError(e))),
5971        }
5972    }
5973}
5974impl FromStringArray<i32> for i32 {
5975    fn from_string_array(
5976        string_array: nd::Array1<String>,
5977    ) -> Result<nd::Array1<i32>, Box<BedErrorPlus>> {
5978        let result = string_array
5979            .iter()
5980            .map(|s| s.parse::<i32>())
5981            .collect::<Result<nd::Array1<i32>, _>>();
5982        match result {
5983            Ok(array) => Ok(array),
5984            Err(e) => Err(Box::new(BedErrorPlus::ParseIntError(e))),
5985        }
5986    }
5987}
5988
5989/// Asserts two 2-D arrays are equal, treating NaNs as values.
5990///
5991/// # Example
5992/// ```
5993/// use std::f64::NAN;
5994/// use ndarray as nd;
5995/// use bed_reader::assert_eq_nan;
5996/// let val1 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5997/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5998/// assert_eq_nan(&val1, &val2);
5999/// # use bed_reader::BedErrorPlus;
6000/// # Ok::<(), Box<BedErrorPlus>>(())
6001/// ```
6002pub fn assert_eq_nan<T: 'static + Copy + PartialEq + PartialOrd + Signed + From<i8>>(
6003    val: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
6004    answer: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
6005) {
6006    assert!(allclose::<T, T>(
6007        &val.view(),
6008        &answer.view(),
6009        0.into(),
6010        true
6011    ));
6012}
6013
6014/// Asserts that a result is an error and that the error is of a given variant.
6015#[macro_export]
6016macro_rules! assert_error_variant {
6017    ($result:expr, $pattern:pat) => {
6018        match $result {
6019            Err(ref boxed_error) => match **boxed_error {
6020                $pattern => (),
6021                _ => panic!("test failure"),
6022            },
6023            _ => panic!("test failure"),
6024        }
6025    };
6026}
6027
6028/// True if and only if two 2-D arrays are equal, within a given tolerance and possibly treating NaNs as values.
6029///
6030/// # Example
6031/// ```
6032/// use std::f64::NAN;
6033/// use ndarray as nd;
6034/// use bed_reader::allclose;
6035/// let val1 = nd::arr2(&[[1.0, 2.000000000001], [3.0, NAN]]);
6036/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
6037/// assert!(allclose(&val1.view(), &val2.view(), 1e-08, true));
6038/// # use bed_reader::BedErrorPlus;
6039/// # Ok::<(), Box<BedErrorPlus>>(())
6040/// ```
6041pub fn allclose<
6042    T1: 'static + Copy + PartialEq + PartialOrd + Signed,
6043    T2: 'static + Copy + PartialEq + PartialOrd + Signed + Into<T1>,
6044>(
6045    val1: &nd::ArrayView2<'_, T1>,
6046    val2: &nd::ArrayView2<'_, T2>,
6047    atol: T1,
6048    equal_nan: bool,
6049) -> bool {
6050    assert!(val1.dim() == val2.dim());
6051    // Could be run in parallel
6052
6053    nd::Zip::from(val1)
6054        .and(val2)
6055        .fold(true, |acc, ptr_a, ptr_b| -> bool {
6056            if !acc {
6057                return false;
6058            }
6059            // x != x is a generic nan check
6060            #[allow(clippy::eq_op)]
6061            let a_nan = *ptr_a != *ptr_a;
6062            #[allow(clippy::eq_op)]
6063            let b_nan = *ptr_b != *ptr_b;
6064
6065            if a_nan || b_nan {
6066                if equal_nan {
6067                    a_nan == b_nan
6068                } else {
6069                    false
6070                }
6071            } else {
6072                let c: T1 = abs(*ptr_a - T2::into(*ptr_b));
6073                c <= atol
6074            }
6075        })
6076}
6077
6078impl WriteOptionsBuilder<i8> {
6079    /// The input ndarray will be i8.
6080    #[must_use]
6081    pub fn i8(self) -> Self {
6082        self
6083    }
6084}
6085
6086impl WriteOptionsBuilder<f32> {
6087    /// The input ndarray will be f32.
6088    #[must_use]
6089    pub fn f32(self) -> Self {
6090        self
6091    }
6092}
6093
6094impl WriteOptionsBuilder<f64> {
6095    /// The input ndarray will be f64.
6096    #[must_use]
6097    pub fn f64(self) -> Self {
6098        self
6099    }
6100}
6101
6102fn check_counts(
6103    count_vec: Vec<Option<usize>>,
6104    option_xid_count: &mut Option<usize>,
6105    prefix: &str,
6106) -> Result<(), Box<BedErrorPlus>> {
6107    for count in count_vec.into_iter().flatten() {
6108        if let Some(xid_count) = option_xid_count {
6109            if *xid_count != count {
6110                Err(BedError::InconsistentCount(
6111                    prefix.to_string(),
6112                    *xid_count,
6113                    count,
6114                ))?;
6115            }
6116        } else {
6117            *option_xid_count = Some(count);
6118        }
6119    }
6120
6121    Ok(())
6122}
6123
6124// According to https://docs.rs/derive_builder/latest/derive_builder/
6125// "clone" is OK because "Luckily Rust is clever enough to optimize these
6126// clone-calls away in release builds for your every-day use cases.
6127// Thats quite a safe bet - we checked this for you. ;-)"
6128fn compute_field<T: Clone, F: Fn(usize) -> T>(
6129    field_name: &str,
6130    field: &mut Option<Rc<nd::Array1<T>>>,
6131    count: usize,
6132    lambda: F,
6133) -> Result<(), Box<BedErrorPlus>> {
6134    // let lambda = |_| "0".to_string();
6135    // let count = iid_count;
6136    // let field = &mut metadata.fid;
6137
6138    if let Some(array) = field {
6139        if array.len() != count {
6140            Err(BedError::InconsistentCount(
6141                field_name.to_string(),
6142                array.len(),
6143                count,
6144            ))?;
6145        }
6146    } else {
6147        let array = Rc::new((0..count).map(lambda).collect::<nd::Array1<T>>());
6148        *field = Some(array);
6149    }
6150    Ok(())
6151}
6152
6153impl MetadataBuilder {
6154    /// Create a [`Metadata`](struct.Metadata.html) from the builder.
6155    ///
6156    /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6157    pub fn build(&self) -> Result<Metadata, Box<BedErrorPlus>> {
6158        let metadata = self.build_no_file_check()?;
6159
6160        metadata.check_counts(None, None)?;
6161
6162        Ok(metadata)
6163    }
6164
6165    /// Set the family id (fid) values.
6166    #[anyinput]
6167    pub fn fid(&mut self, fid: AnyIter<AnyString>) -> &mut Self {
6168        self.fid = Some(Some(Rc::new(fid.map(|s| s.as_ref().to_string()).collect())));
6169        self
6170    }
6171
6172    /// Set the individual id (iid) values.
6173    /// ```
6174    /// use ndarray as nd;
6175    /// use bed_reader::{Metadata, assert_eq_nan};
6176    ///
6177    /// let metadata = Metadata::builder()
6178    ///    .iid(["sample1", "sample2", "sample3"])
6179    ///    .build()?;
6180    /// println!("{:?}", metadata.iid()); // Outputs ndarray Some(["sample1", "sample2", "sample3"])
6181    /// # use bed_reader::BedErrorPlus;
6182    /// # Ok::<(), Box<BedErrorPlus>>(())
6183    /// ```
6184    #[anyinput]
6185    pub fn iid(&mut self, iid: AnyIter<AnyString>) -> &mut Self {
6186        self.iid = Some(Some(Rc::new(iid.map(|s| s.as_ref().to_owned()).collect())));
6187        self
6188    }
6189
6190    /// Set the father values.
6191    #[anyinput]
6192    pub fn father(&mut self, father: AnyIter<AnyString>) -> &mut Self {
6193        self.father = Some(Some(Rc::new(
6194            father.map(|s| s.as_ref().to_owned()).collect(),
6195        )));
6196        self
6197    }
6198
6199    /// Override the mother values.
6200    #[anyinput]
6201    pub fn mother(&mut self, mother: AnyIter<AnyString>) -> &mut Self {
6202        self.mother = Some(Some(Rc::new(
6203            mother.map(|s| s.as_ref().to_owned()).collect(),
6204        )));
6205        self
6206    }
6207
6208    /// Override the sex values.
6209    #[anyinput]
6210    pub fn sex(&mut self, sex: AnyIter<i32>) -> &mut Self {
6211        self.sex = Some(Some(Rc::new(sex.collect())));
6212        self
6213    }
6214
6215    /// Override the phenotype values.
6216    #[anyinput]
6217    pub fn pheno(&mut self, pheno: AnyIter<AnyString>) -> &mut Self {
6218        self.pheno = Some(Some(Rc::new(
6219            pheno.map(|s| s.as_ref().to_owned()).collect(),
6220        )));
6221        self
6222    }
6223
6224    /// Override the chromosome values.
6225    #[anyinput]
6226    pub fn chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &mut Self {
6227        self.chromosome = Some(Some(Rc::new(
6228            chromosome.map(|s| s.as_ref().to_owned()).collect(),
6229        )));
6230        self
6231    }
6232
6233    /// Override the SNP id (sid) values.
6234    /// ```
6235    /// use ndarray as nd;
6236    /// use bed_reader::{Metadata, assert_eq_nan};
6237    ///
6238    /// let metadata = Metadata::builder()
6239    ///    .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
6240    ///    .build()?;
6241    /// println!("{:?}", metadata.sid()); // Outputs ndarray Some(["SNP1", "SNP2", "SNP3", "SNP4"])
6242    /// # use bed_reader::BedErrorPlus;
6243    /// # Ok::<(), Box<BedErrorPlus>>(())
6244    /// ```
6245    #[anyinput]
6246    pub fn sid(&mut self, sid: AnyIter<AnyString>) -> &mut Self {
6247        self.sid = Some(Some(Rc::new(
6248            sid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
6249        )));
6250        self
6251    }
6252
6253    /// Override the centimorgan position values.
6254    #[anyinput]
6255    pub fn cm_position(&mut self, cm_position: AnyIter<f32>) -> &mut Self {
6256        self.cm_position = Some(Some(Rc::new(cm_position.into_iter().collect())));
6257        self
6258    }
6259
6260    /// Override the base-pair position values.
6261    #[anyinput]
6262    pub fn bp_position(&mut self, bp_position: AnyIter<i32>) -> &mut Self {
6263        self.bp_position = Some(Some(Rc::new(bp_position.into_iter().collect())));
6264        self
6265    }
6266
6267    /// Override the allele 1 values.
6268    #[anyinput]
6269    pub fn allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &mut Self {
6270        self.allele_1 = Some(Some(Rc::new(
6271            allele_1
6272                .into_iter()
6273                .map(|s| s.as_ref().to_owned())
6274                .collect(),
6275        )));
6276        self
6277    }
6278
6279    /// Override the allele 2 values.
6280    #[anyinput]
6281    pub fn allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &mut Self {
6282        self.allele_2 = Some(Some(Rc::new(
6283            allele_2
6284                .into_iter()
6285                .map(|s| s.as_ref().to_owned())
6286                .collect(),
6287        )));
6288        self
6289    }
6290
6291    /// Merge metadata from a [`Metadata`](struct.Metadata.html).
6292    ///
6293    /// # Example
6294    ///
6295    /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
6296    /// and sid arrays. Next, we use another [`MetadataBuilder`](struct.MetadataBuilder.html) to set an fid array
6297    /// and an iid array. Then, we add the first [`Metadata`](struct.Metadata.html)
6298    /// to the [`MetadataBuilder`](struct.MetadataBuilder.html),
6299    /// overwriting iid and setting sid. Finally, we print these
6300    /// three arrays and chromosome. Chromosome is `None`.
6301    ///```
6302    /// use ndarray as nd;
6303    /// use bed_reader::Metadata;
6304    ///
6305    /// let metadata1 = Metadata::builder()
6306    ///     .iid(["i1", "i2", "i3"])
6307    ///     .sid(["s1", "s2", "s3", "s4"])
6308    ///     .build()?;
6309    /// let metadata2 = Metadata::builder()
6310    ///     .fid(["f1", "f2", "f3"])
6311    ///     .iid(["x1", "x2", "x3"])
6312    ///     .metadata(&metadata1)
6313    ///     .build()?;
6314    ///
6315    /// println!("{0:?}", metadata2.fid()); // Outputs optional ndarray Some(["f1", "f2", "f3"]...)
6316    /// println!("{0:?}", metadata2.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6317    /// println!("{0:?}", metadata2.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
6318    /// println!("{0:?}", metadata2.chromosome()); // Outputs None
6319    /// # use bed_reader::BedErrorPlus;
6320    /// # Ok::<(), Box<BedErrorPlus>>(())
6321    /// ```
6322    pub fn metadata(&mut self, metadata: &Metadata) -> &mut Self {
6323        set_field(&metadata.fid, &mut self.fid);
6324        set_field(&metadata.iid, &mut self.iid);
6325        set_field(&metadata.father, &mut self.father);
6326        set_field(&metadata.mother, &mut self.mother);
6327        set_field(&metadata.sex, &mut self.sex);
6328        set_field(&metadata.pheno, &mut self.pheno);
6329
6330        set_field(&metadata.chromosome, &mut self.chromosome);
6331        set_field(&metadata.sid, &mut self.sid);
6332        set_field(&metadata.cm_position, &mut self.cm_position);
6333        set_field(&metadata.bp_position, &mut self.bp_position);
6334        set_field(&metadata.allele_1, &mut self.allele_1);
6335        set_field(&metadata.allele_2, &mut self.allele_2);
6336        self
6337    }
6338}
6339
6340impl Default for Metadata {
6341    fn default() -> Self {
6342        Self::new()
6343    }
6344}
6345
6346impl Metadata {
6347    fn check_counts(
6348        &self,
6349        mut iid_count: Option<usize>,
6350        mut sid_count: Option<usize>,
6351    ) -> Result<(Option<usize>, Option<usize>), Box<BedErrorPlus>> {
6352        check_counts(
6353            vec![
6354                lazy_or_skip_count(&self.fid),
6355                lazy_or_skip_count(&self.iid),
6356                lazy_or_skip_count(&self.father),
6357                lazy_or_skip_count(&self.mother),
6358                lazy_or_skip_count(&self.sex),
6359                lazy_or_skip_count(&self.pheno),
6360            ],
6361            &mut iid_count,
6362            "iid",
6363        )?;
6364        check_counts(
6365            vec![
6366                lazy_or_skip_count(&self.chromosome),
6367                lazy_or_skip_count(&self.sid),
6368                lazy_or_skip_count(&self.cm_position),
6369                lazy_or_skip_count(&self.bp_position),
6370                lazy_or_skip_count(&self.allele_1),
6371                lazy_or_skip_count(&self.allele_2),
6372            ],
6373            &mut sid_count,
6374            "sid",
6375        )?;
6376        Ok((iid_count, sid_count))
6377    }
6378
6379    /// Create a [`Metadata`](struct.Metadata.html) using a builder.
6380    ///
6381    /// # Example
6382    /// Create metadata.
6383    /// Create a random file with the metadata.
6384    /// ```
6385    /// use ndarray as nd;
6386    /// use bed_reader::{Metadata, WriteOptions};
6387    /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
6388    ///
6389    /// let metadata = Metadata::builder()
6390    ///     .iid(["i1", "i2", "i3"])
6391    ///     .sid(["s1", "s2", "s3", "s4"])
6392    ///     .build()?;
6393    /// let mut rng = StdRng::seed_from_u64(0);
6394    /// let val = nd::Array::random_using((3, 4), Uniform::from(-1..3), &mut rng);
6395
6396    /// let temp_out = temp_testdir::TempDir::default();
6397    /// let output_file = temp_out.join("random.bed");
6398    /// WriteOptions::builder(output_file)
6399    ///     .metadata(&metadata)
6400    ///     .missing_value(-1)
6401    ///     .write(&val)?;
6402    /// # use bed_reader::BedErrorPlus;
6403    /// # Ok::<(), Box<BedErrorPlus>>(())
6404    /// ```
6405    #[must_use]
6406    pub fn builder() -> MetadataBuilder {
6407        MetadataBuilder::default()
6408    }
6409
6410    /// Create an empty [`Metadata`](struct.Metadata.html).
6411    ///
6412    /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6413    #[must_use]
6414    pub fn new() -> Metadata {
6415        // Unwrap always works because an empty metadata builder always works.
6416        Metadata::builder().build().unwrap()
6417    }
6418
6419    /// Optional family id of each of individual (sample)
6420    #[must_use]
6421    pub fn fid(&self) -> Option<&nd::Array1<String>> {
6422        option_rc_as_ref(&self.fid)
6423    }
6424
6425    /// Optional individual id of each of individual (sample)
6426    ///
6427    /// # Example:
6428    /// ```
6429    /// use ndarray as nd;
6430    /// use bed_reader::Metadata;
6431    /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6432    /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6433    /// println!("{0:?}", metadata.sid()); // Outputs None
6434    /// # use bed_reader::BedErrorPlus;
6435    /// # Ok::<(), Box<BedErrorPlus>>(())    
6436    #[must_use]
6437    pub fn iid(&self) -> Option<&nd::Array1<String>> {
6438        option_rc_as_ref(&self.iid)
6439    }
6440
6441    /// Optional father id of each of individual (sample)
6442    #[must_use]
6443    pub fn father(&self) -> Option<&nd::Array1<String>> {
6444        option_rc_as_ref(&self.father)
6445    }
6446
6447    /// Optional mother id of each of individual (sample)
6448    #[must_use]
6449    pub fn mother(&self) -> Option<&nd::Array1<String>> {
6450        option_rc_as_ref(&self.mother)
6451    }
6452
6453    /// Optional sex each of individual (sample)
6454    #[must_use]
6455    pub fn sex(&self) -> Option<&nd::Array1<i32>> {
6456        option_rc_as_ref(&self.sex)
6457    }
6458
6459    /// Optional phenotype for each individual (seldom used)
6460    #[must_use]
6461    pub fn pheno(&self) -> Option<&nd::Array1<String>> {
6462        option_rc_as_ref(&self.pheno)
6463    }
6464
6465    /// Optional chromosome of each SNP (variant)
6466    #[must_use]
6467    pub fn chromosome(&self) -> Option<&nd::Array1<String>> {
6468        option_rc_as_ref(&self.chromosome)
6469    }
6470
6471    /// Optional SNP id of each SNP (variant)
6472    ///
6473    /// # Example:
6474    /// ```
6475    /// use ndarray as nd;
6476    /// use bed_reader::Metadata;
6477    /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6478    /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6479    /// println!("{0:?}", metadata.sid()); // Outputs None
6480    /// # use bed_reader::BedErrorPlus;
6481    /// # Ok::<(), Box<BedErrorPlus>>(())    
6482    #[must_use]
6483    pub fn sid(&self) -> Option<&nd::Array1<String>> {
6484        option_rc_as_ref(&self.sid)
6485    }
6486
6487    /// Optional centimorgan position of each SNP (variant)
6488    #[must_use]
6489    pub fn cm_position(&self) -> Option<&nd::Array1<f32>> {
6490        option_rc_as_ref(&self.cm_position)
6491    }
6492
6493    /// Optional base-pair position of each SNP (variant)
6494    #[must_use]
6495    pub fn bp_position(&self) -> Option<&nd::Array1<i32>> {
6496        option_rc_as_ref(&self.bp_position)
6497    }
6498
6499    /// Optional first allele of each SNP (variant)
6500    #[must_use]
6501    pub fn allele_1(&self) -> Option<&nd::Array1<String>> {
6502        option_rc_as_ref(&self.allele_1)
6503    }
6504
6505    /// Optional second allele of each SNP (variant)
6506    #[must_use]
6507    pub fn allele_2(&self) -> Option<&nd::Array1<String>> {
6508        option_rc_as_ref(&self.allele_2)
6509    }
6510
6511    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .fam file.
6512    ///
6513    /// # Example
6514    ///
6515    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6516    /// Do not skip any fields.
6517    /// ```
6518    /// use ndarray as nd;
6519    /// use std::collections::HashSet;
6520    /// use bed_reader::{Metadata, MetadataFields, sample_file};
6521    ///
6522    /// let skip_set = HashSet::<MetadataFields>::new();
6523    /// let metadata_empty = Metadata::new();
6524    /// let (metadata_fam, iid_count) =
6525    ///     metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6526    /// let (metadata_bim, sid_count) =
6527    ///     metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6528    /// assert_eq!(iid_count, 3);
6529    /// assert_eq!(sid_count, 4);
6530    /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6531    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6532    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6533    /// # use bed_reader::BedErrorPlus;
6534    /// # Ok::<(), Box<BedErrorPlus>>(())
6535    /// ```
6536    #[anyinput]
6537    pub fn read_fam(
6538        &self,
6539        path: AnyPath,
6540        skip_set: &HashSet<MetadataFields>,
6541    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6542        let mut field_vec: Vec<usize> = Vec::new();
6543
6544        if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6545            field_vec.push(0);
6546        }
6547        if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6548            field_vec.push(1);
6549        }
6550        if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6551            field_vec.push(2);
6552        }
6553        if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6554            field_vec.push(3);
6555        }
6556        if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6557            field_vec.push(4);
6558        }
6559        if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6560            field_vec.push(5);
6561        }
6562
6563        let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, true, path)?;
6564
6565        let mut clone = self.clone();
6566
6567        // unwraps are safe because we pop once for every push
6568        if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6569            clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6570        }
6571        if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6572            let vec = vec_of_vec.pop().unwrap();
6573            let array = vec
6574                .iter()
6575                .map(|s| s.parse::<i32>())
6576                .collect::<Result<nd::Array1<i32>, _>>()?;
6577            clone.sex = Some(Rc::new(array));
6578        }
6579        if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6580            clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6581        }
6582        if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6583            clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6584        }
6585        if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6586            clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6587        }
6588        if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6589            clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6590        }
6591
6592        clone.check_counts(Some(count), None)?;
6593
6594        Ok((clone, count))
6595    }
6596
6597    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6598    /// fields with a .fam file in the cloud.
6599    ///
6600    /// # Example
6601    ///
6602    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6603    /// Do not skip any fields.
6604    /// ```
6605    /// use ndarray as nd;
6606    /// use std::collections::HashSet;
6607    /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6608    ///
6609    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6610    /// let skip_set = HashSet::<MetadataFields>::new();
6611    /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6612    /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6613    /// let metadata_empty = Metadata::new();
6614    /// let (metadata_fam, iid_count) =
6615    ///     metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6616    /// let (metadata_bim, sid_count) =
6617    ///     metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6618    /// assert_eq!(iid_count, 3);
6619    /// assert_eq!(sid_count, 4);
6620    /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6621    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6622    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6623    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6624    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6625    /// ```
6626    pub async fn read_fam_cloud(
6627        &self,
6628        cloud_file: &CloudFile,
6629        skip_set: &HashSet<MetadataFields>,
6630    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6631        let mut field_vec: Vec<usize> = Vec::new();
6632
6633        if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6634            field_vec.push(0);
6635        }
6636        if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6637            field_vec.push(1);
6638        }
6639        if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6640            field_vec.push(2);
6641        }
6642        if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6643            field_vec.push(3);
6644        }
6645        if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6646            field_vec.push(4);
6647        }
6648        if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6649            field_vec.push(5);
6650        }
6651
6652        let (mut vec_of_vec, count) = self
6653            .read_fam_or_bim_cloud(&field_vec, true, cloud_file)
6654            .await?;
6655
6656        let mut clone = self.clone();
6657
6658        // unwraps are safe because we pop once for every push
6659        if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6660            clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6661        }
6662        if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6663            let vec = vec_of_vec.pop().unwrap();
6664            let array = vec
6665                .iter()
6666                .map(|s| s.parse::<i32>())
6667                .collect::<Result<nd::Array1<i32>, _>>()?;
6668            clone.sex = Some(Rc::new(array));
6669        }
6670        if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6671            clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6672        }
6673        if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6674            clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6675        }
6676        if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6677            clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6678        }
6679        if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6680            clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6681        }
6682
6683        clone.check_counts(Some(count), None)?;
6684
6685        Ok((clone, count))
6686    }
6687
6688    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .bim file.
6689    ///
6690    /// # Example
6691    ///
6692    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6693    /// Do not skip any fields.
6694    /// ```
6695    /// use ndarray as nd;
6696    /// use std::collections::HashSet;
6697    /// use bed_reader::{Metadata, MetadataFields, sample_file};
6698    ///
6699    /// let skip_set = HashSet::<MetadataFields>::new();
6700    /// let metadata_empty = Metadata::new();
6701    /// let (metadata_fam, iid_count) =
6702    ///     metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6703    /// let (metadata_bim, sid_count) =
6704    ///     metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6705    /// assert_eq!(iid_count, 3);
6706    /// assert_eq!(sid_count, 4);
6707    /// println!("{0:?}", metadata_bim.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6708    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6709    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6710    /// # use bed_reader::BedErrorPlus;
6711    /// # Ok::<(), Box<BedErrorPlus>>(())
6712    /// ```
6713    #[anyinput]
6714    pub fn read_bim(
6715        &self,
6716        path: AnyPath,
6717        skip_set: &HashSet<MetadataFields>,
6718    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6719        let mut field_vec: Vec<usize> = Vec::new();
6720        if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6721            field_vec.push(0);
6722        }
6723        if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6724            field_vec.push(1);
6725        }
6726
6727        if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6728            field_vec.push(2);
6729        }
6730        if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6731            field_vec.push(3);
6732        }
6733        if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6734            field_vec.push(4);
6735        }
6736        if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6737            field_vec.push(5);
6738        }
6739
6740        let mut clone = self.clone();
6741        let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, false, path)?;
6742
6743        // unwraps are safe because we pop once for every push
6744        if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6745            clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6746        }
6747        if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6748            clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6749        }
6750        if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6751            let vec = vec_of_vec.pop().unwrap();
6752            let array = vec
6753                .iter()
6754                .map(|s| s.parse::<i32>())
6755                .collect::<Result<nd::Array1<i32>, _>>()?;
6756            clone.bp_position = Some(Rc::new(array));
6757        }
6758        if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6759            let vec = vec_of_vec.pop().unwrap();
6760            let array = vec
6761                .iter()
6762                .map(|s| s.parse::<f32>())
6763                .collect::<Result<nd::Array1<f32>, _>>()?;
6764            clone.cm_position = Some(Rc::new(array));
6765        }
6766
6767        if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6768            clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6769        }
6770        if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6771            clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6772        }
6773
6774        clone.check_counts(None, Some(count))?;
6775
6776        Ok((clone, count))
6777    }
6778
6779    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6780    /// fields with a .bim file in the cloud.
6781    ///
6782    /// # Example
6783    ///
6784    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6785    /// Do not skip any fields.
6786    /// ```
6787    /// use ndarray as nd;
6788    /// use std::collections::HashSet;
6789    /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6790    ///
6791    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6792    /// let skip_set = HashSet::<MetadataFields>::new();
6793    /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6794    /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6795    /// let metadata_empty = Metadata::new();
6796    /// let (metadata_fam, iid_count) =
6797    ///     metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6798    /// let (metadata_bim, sid_count) =
6799    ///     metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6800    /// assert_eq!(iid_count, 3);
6801    /// assert_eq!(sid_count, 4);
6802    /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6803    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6804    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6805    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6806    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6807    /// ```
6808    pub async fn read_bim_cloud(
6809        &self,
6810        cloud_file: &CloudFile,
6811        skip_set: &HashSet<MetadataFields>,
6812    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6813        let mut field_vec: Vec<usize> = Vec::new();
6814        if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6815            field_vec.push(0);
6816        }
6817        if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6818            field_vec.push(1);
6819        }
6820
6821        if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6822            field_vec.push(2);
6823        }
6824        if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6825            field_vec.push(3);
6826        }
6827        if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6828            field_vec.push(4);
6829        }
6830        if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6831            field_vec.push(5);
6832        }
6833
6834        let mut clone = self.clone();
6835        let (mut vec_of_vec, count) = self
6836            .read_fam_or_bim_cloud(&field_vec, false, cloud_file)
6837            .await?;
6838
6839        // unwraps are safe because we pop once for every push
6840        if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6841            clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6842        }
6843        if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6844            clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6845        }
6846        if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6847            let vec = vec_of_vec.pop().unwrap();
6848            let array = vec
6849                .iter()
6850                .map(|s| s.parse::<i32>())
6851                .collect::<Result<nd::Array1<i32>, _>>()?;
6852            clone.bp_position = Some(Rc::new(array));
6853        }
6854        if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6855            let vec = vec_of_vec.pop().unwrap();
6856            let array = vec
6857                .iter()
6858                .map(|s| s.parse::<f32>())
6859                .collect::<Result<nd::Array1<f32>, _>>()?;
6860            clone.cm_position = Some(Rc::new(array));
6861        }
6862
6863        if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6864            clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6865        }
6866        if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6867            clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6868        }
6869
6870        clone.check_counts(None, Some(count))?;
6871
6872        Ok((clone, count))
6873    }
6874
6875    #[anyinput]
6876    fn read_fam_or_bim(
6877        field_vec: &[usize],
6878        is_split_whitespace: bool,
6879        path: AnyPath,
6880    ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6881        let mut vec_of_vec = vec![vec![]; field_vec.len()];
6882
6883        let file = File::open(path)?;
6884
6885        let reader = BufReader::new(file);
6886        let mut count = 0;
6887        for line in reader.lines() {
6888            let line = line?;
6889            count += 1;
6890
6891            let fields: Vec<&str> = if is_split_whitespace {
6892                line.split_whitespace().collect()
6893            } else {
6894                line.split('\t').collect()
6895            };
6896
6897            if fields.len() != 6 {
6898                Err(BedError::MetadataFieldCount(
6899                    6,
6900                    fields.len(),
6901                    path_ref_to_string(path),
6902                ))?;
6903            }
6904
6905            let mut of_interest_count = 0;
6906            for (field_index, field) in fields.iter().enumerate() {
6907                if field_vec.contains(&field_index) {
6908                    vec_of_vec[of_interest_count].push((*field).to_string());
6909                    of_interest_count += 1;
6910                }
6911            }
6912        }
6913
6914        Ok((vec_of_vec, count))
6915    }
6916
6917    async fn read_fam_or_bim_cloud(
6918        &self,
6919        field_vec: &[usize],
6920        is_split_whitespace: bool,
6921        cloud_file: &CloudFile,
6922    ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6923        let mut vec_of_vec = vec![vec![]; field_vec.len()];
6924        let mut count = 0;
6925
6926        let mut line_chunks = cloud_file.stream_line_chunks().await?;
6927        while let Some(line_chunk) = line_chunks.next().await {
6928            let line_chunk = line_chunk.map_err(CloudFileError::ObjectStoreError)?;
6929            let lines = std::str::from_utf8(&line_chunk)?.lines();
6930            for line in lines {
6931                count += 1;
6932
6933                let fields: Vec<&str> = if is_split_whitespace {
6934                    line.split_whitespace().collect()
6935                } else {
6936                    line.split('\t').collect()
6937                };
6938
6939                if fields.len() != 6 {
6940                    Err(BedError::MetadataFieldCount(
6941                        6,
6942                        fields.len(),
6943                        cloud_file.to_string(),
6944                    ))?;
6945                }
6946
6947                let mut of_interest_count = 0;
6948                for (field_index, field) in fields.iter().enumerate() {
6949                    if field_vec.contains(&field_index) {
6950                        vec_of_vec[of_interest_count].push((*field).to_string());
6951                        of_interest_count += 1;
6952                    }
6953                }
6954            }
6955        }
6956
6957        Ok((vec_of_vec, count))
6958    }
6959
6960    fn is_some_fam(&self) -> bool {
6961        self.fid.is_some()
6962            && self.iid.is_some()
6963            && self.father.is_some()
6964            && self.mother.is_some()
6965            && self.sex.is_some()
6966            && self.pheno.is_some()
6967    }
6968    fn is_some_bim(&self) -> bool {
6969        self.chromosome.is_some()
6970            && self.sid.is_some()
6971            && self.cm_position.is_some()
6972            && self.bp_position.is_some()
6973            && self.allele_1.is_some()
6974            && self.allele_2.is_some()
6975    }
6976
6977    /// Write the metadata related to individuals/samples to a .fam file.
6978    ///
6979    /// If any of the .fam metadata is not present, the function will return an error.
6980    ///
6981    /// # Example
6982    ///
6983    /// Create metadata with iid and sid arrays, then fill in the other
6984    /// fields with default arrays, finally write the .fam information
6985    /// to a file.
6986    ///```
6987    /// use ndarray as nd;
6988    /// use std::collections::HashSet;
6989    /// use bed_reader::Metadata;
6990    ///
6991    /// let metadata0 = Metadata::builder()
6992    ///     .iid(["i1", "i2", "i3"])
6993    ///     .sid(["s1", "s2", "s3", "s4"])
6994    ///     .build()?;
6995    /// let metadata_filled = metadata0.fill(3, 4)?;
6996
6997    /// let temp_out = temp_testdir::TempDir::default();
6998    /// let output_file = temp_out.join("no_bed.fam");
6999    /// metadata_filled.write_fam(output_file)?;
7000    /// # use bed_reader::BedErrorPlus;
7001    /// # Ok::<(), Box<BedErrorPlus>>(())
7002    /// ```
7003    #[anyinput]
7004    pub fn write_fam(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
7005        let file = File::create(path)?;
7006        let mut writer = BufWriter::new(file);
7007        let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
7008
7009        if !self.is_some_fam() {
7010            Err(BedError::MetadataMissingForWrite("fam".to_string()))?;
7011        }
7012
7013        // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
7014        // unwrap always works because we checked that all the fields are present
7015        // 2nd as as_ref turns &Rc<Array> into &Array
7016        nd::azip!((fid in self.fid.as_ref().unwrap().as_ref(),
7017                   iid in self.iid.as_ref().unwrap().as_ref(),
7018                   father in self.father.as_ref().unwrap().as_ref(),
7019                   mother in self.mother.as_ref().unwrap().as_ref(),
7020                   sex in self.sex.as_ref().unwrap().as_ref(),
7021                   pheno in self.pheno.as_ref().unwrap().as_ref(),
7022                )
7023        {
7024            if result.is_ok() {
7025                if let Err(e) = writeln!(
7026                writer,
7027                "{} {} {} {} {} {}",
7028                *fid, *iid, *father, *mother, *sex, *pheno
7029            )
7030            {
7031            result = Err(Box::new(BedErrorPlus::IOError(e)));
7032            }
7033        }});
7034        result?;
7035
7036        Ok(())
7037    }
7038
7039    /// Write the metadata related to SNPs/variants to a .bim file.
7040    ///
7041    /// If any of the .bim metadata is not present, the function will return an error.
7042    ///
7043    /// # Example
7044    ///
7045    /// Create metadata with iid and sid arrays, then fill in the other
7046    /// fields with default arrays, finally write the .bim information
7047    /// to a file.
7048    ///```
7049    /// use ndarray as nd;
7050    /// use std::collections::HashSet;
7051    /// use bed_reader::Metadata;
7052    ///
7053    /// let metadata0 = Metadata::builder()
7054    ///     .iid(["i1", "i2", "i3"])
7055    ///     .sid(["s1", "s2", "s3", "s4"])
7056    ///     .build()?;
7057    /// let metadata_filled = metadata0.fill(3, 4)?;
7058
7059    /// let temp_out = temp_testdir::TempDir::default();
7060    /// let output_file = temp_out.join("no_bed.bim");
7061    /// metadata_filled.write_bim(output_file)?;
7062    /// # use bed_reader::BedErrorPlus;
7063    /// # Ok::<(), Box<BedErrorPlus>>(())
7064    /// ```
7065    #[anyinput]
7066    pub fn write_bim(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
7067        let file = File::create(path)?;
7068        let mut writer = BufWriter::new(file);
7069        let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
7070
7071        if !self.is_some_bim() {
7072            Err(BedError::MetadataMissingForWrite("bim".to_string()))?;
7073        }
7074
7075        // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
7076        // unwrap always works because we checked that all the fields are present
7077        // 2nd as as_ref turns &Rc<Array> into &Array
7078        nd::azip!((
7079            chromosome in self.chromosome.as_ref().unwrap().as_ref(),
7080            sid in self.sid.as_ref().unwrap().as_ref(),
7081            cm_position in self.cm_position.as_ref().unwrap().as_ref(),
7082            bp_position in self.bp_position.as_ref().unwrap().as_ref(),
7083            allele_1 in self.allele_1.as_ref().unwrap().as_ref(),
7084            allele_2 in self.allele_2.as_ref().unwrap().as_ref(),
7085                )
7086        {
7087            if result.is_ok() {
7088                if let Err(e) = writeln!(
7089                writer,
7090                "{}\t{}\t{}\t{}\t{}\t{}",
7091                *chromosome, *sid, *cm_position, *bp_position, *allele_1, *allele_2
7092                )
7093                {
7094                result = Err(Box::new(BedErrorPlus::IOError(e)));
7095                }
7096            }
7097        });
7098        result?;
7099
7100        Ok(())
7101    }
7102
7103    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with default values.
7104    ///
7105    /// # Example
7106    /// ```
7107    /// use ndarray as nd;
7108    /// use std::collections::HashSet;
7109    /// use bed_reader::{Metadata, MetadataFields};
7110    ///
7111    /// let metadata0 = Metadata::builder()
7112    ///     .iid(["i1", "i2", "i3"])
7113    ///     .sid(["s1", "s2", "s3", "s4"])
7114    ///     .build()?;
7115    /// let metadata_filled = metadata0.fill(3, 4)?;
7116    ///
7117    /// println!("{0:?}", metadata_filled.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
7118    /// println!("{0:?}", metadata_filled.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
7119    /// println!("{0:?}", metadata_filled.chromosome()); // Outputs optional ndarray Some(["0", "0", "0", "0"]...)
7120    /// # use bed_reader::BedErrorPlus;
7121    /// # Ok::<(), Box<BedErrorPlus>>(())
7122    /// ```
7123    pub fn fill(&self, iid_count: usize, sid_count: usize) -> Result<Metadata, Box<BedErrorPlus>> {
7124        let mut metadata = self.clone();
7125
7126        compute_field("fid", &mut metadata.fid, iid_count, |_| "0".to_string())?;
7127        compute_field("iid", &mut metadata.iid, iid_count, |i| {
7128            format!("iid{}", i + 1)
7129        })?;
7130        compute_field("father", &mut metadata.father, iid_count, |_| {
7131            "0".to_string()
7132        })?;
7133        compute_field("mother", &mut metadata.mother, iid_count, |_| {
7134            "0".to_string()
7135        })?;
7136        compute_field("sex", &mut metadata.sex, iid_count, |_| 0)?;
7137        compute_field("pheno", &mut metadata.pheno, iid_count, |_| "0".to_string())?;
7138        compute_field("chromosome", &mut metadata.chromosome, sid_count, |_| {
7139            "0".to_string()
7140        })?;
7141        compute_field("sid", &mut metadata.sid, sid_count, |i| {
7142            format!("sid{}", i + 1)
7143        })?;
7144        compute_field("cm_position", &mut metadata.cm_position, sid_count, |_| 0.0)?;
7145        compute_field("bp_position", &mut metadata.bp_position, sid_count, |_| 0)?;
7146        compute_field("allele_1", &mut metadata.allele_1, sid_count, |_| {
7147            "A1".to_string()
7148        })?;
7149        compute_field("allele_2", &mut metadata.allele_2, sid_count, |_| {
7150            "A2".to_string()
7151        })?;
7152
7153        Ok(metadata)
7154    }
7155
7156    #[anyinput]
7157    fn set_fid(&mut self, fid: AnyIter<AnyString>) -> &Self {
7158        self.fid = Some(Rc::new(
7159            fid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7160        ));
7161        self
7162    }
7163
7164    #[anyinput]
7165    fn set_iid(&mut self, iid: AnyIter<AnyString>) -> &Self {
7166        self.iid = Some(Rc::new(
7167            iid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7168        ));
7169        self
7170    }
7171
7172    #[anyinput]
7173    fn set_father(&mut self, father: AnyIter<AnyString>) -> &Self {
7174        self.father = Some(Rc::new(father.map(|s| s.as_ref().to_owned()).collect()));
7175        self
7176    }
7177
7178    #[anyinput]
7179    fn set_mother(&mut self, mother: AnyIter<AnyString>) -> &Self {
7180        self.mother = Some(Rc::new(mother.map(|s| s.as_ref().to_owned()).collect()));
7181        self
7182    }
7183
7184    #[anyinput]
7185    fn set_sex(&mut self, sex: AnyIter<i32>) -> &Self {
7186        self.sex = Some(Rc::new(sex.collect()));
7187        self
7188    }
7189
7190    #[anyinput]
7191    fn set_pheno(&mut self, pheno: AnyIter<AnyString>) -> &Self {
7192        self.pheno = Some(Rc::new(pheno.map(|s| s.as_ref().to_owned()).collect()));
7193        self
7194    }
7195
7196    #[anyinput]
7197    fn set_chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &Self {
7198        self.chromosome = Some(Rc::new(chromosome.map(|s| s.as_ref().to_owned()).collect()));
7199        self
7200    }
7201
7202    #[anyinput]
7203    fn set_sid(&mut self, sid: AnyIter<AnyString>) -> &Self {
7204        self.sid = Some(Rc::new(sid.map(|s| s.as_ref().to_owned()).collect()));
7205        self
7206    }
7207
7208    #[anyinput]
7209    fn set_cm_position(&mut self, cm_position: AnyIter<f32>) -> &Self {
7210        self.cm_position = Some(Rc::new(cm_position.into_iter().collect()));
7211        self
7212    }
7213
7214    #[anyinput]
7215    fn set_bp_position(&mut self, bp_position: AnyIter<i32>) -> &Self {
7216        self.bp_position = Some(Rc::new(bp_position.into_iter().collect()));
7217        self
7218    }
7219
7220    #[anyinput]
7221    fn set_allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &Self {
7222        self.allele_1 = Some(Rc::new(allele_1.map(|s| s.as_ref().to_owned()).collect()));
7223        self
7224    }
7225
7226    #[anyinput]
7227    fn set_allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &Self {
7228        self.allele_2 = Some(Rc::new(allele_2.map(|s| s.as_ref().to_owned()).collect()));
7229        self
7230    }
7231}
7232
7233#[allow(clippy::option_option)]
7234fn set_field<T>(
7235    field1: &Option<Rc<nd::Array1<T>>>,
7236    field2: &mut Option<Option<Rc<nd::Array1<T>>>>,
7237) {
7238    if let Some(array) = field1 {
7239        *field2 = Some(Some(array.clone()));
7240    }
7241}
7242
7243fn option_rc_as_ref<T>(field: &Option<Rc<nd::Array1<T>>>) -> Option<&nd::Array1<T>> {
7244    match field {
7245        Some(array) => Some(array.as_ref()),
7246        None => None,
7247    }
7248}
7249
7250#[allow(dead_code)]
7251fn matrix_subset_no_alloc<
7252    TIn: Copy + Default + Debug + Sync + Send + Sync + Sized,
7253    TOut: Copy + Default + Debug + Sync + Send + Sync + From<TIn>,
7254>(
7255    in_val: &nd::ArrayView3<'_, TIn>,
7256    iid_index: &[usize],
7257    sid_index: &[usize],
7258    out_val: &mut nd::ArrayViewMut3<'_, TOut>,
7259) -> Result<(), Box<BedErrorPlus>> {
7260    let out_iid_count = iid_index.len();
7261    let out_sid_count = sid_index.len();
7262    let did_count = in_val.dim().2;
7263
7264    if (out_iid_count, out_sid_count, did_count) != out_val.dim() {
7265        Err(BedError::SubsetMismatch(
7266            out_iid_count,
7267            out_sid_count,
7268            out_val.dim().0,
7269            out_val.dim().1,
7270        ))?;
7271    }
7272
7273    // If output is F-order (or in general if iid stride is no more than sid_stride)
7274    if out_val.stride_of(nd::Axis(0)) <= out_val.stride_of(nd::Axis(1)) {
7275        // (No error are possible in the par_azip, so don't have to collect and check them)
7276        nd::par_azip!((mut out_col in out_val.axis_iter_mut(nd::Axis(1)),
7277                    in_sid_i_pr in sid_index) {
7278            let in_col = in_val.index_axis(nd::Axis(1), *in_sid_i_pr);
7279            for did_i in 0..did_count
7280            {
7281                for (out_iid_i, in_iid_i_ptr) in iid_index.iter().enumerate() {
7282                    out_col[(out_iid_i,did_i)] = in_col[(*in_iid_i_ptr,did_i)].into();
7283                }
7284            }
7285        });
7286        Ok(())
7287    } else {
7288        //If output is C-order, transpose input and output and recurse
7289        let in_val_t = in_val.view().permuted_axes([1, 0, 2]);
7290        let mut out_val_t = out_val.view_mut().permuted_axes([1, 0, 2]);
7291        matrix_subset_no_alloc(&in_val_t, sid_index, iid_index, &mut out_val_t)
7292    }
7293}
7294
7295#[fetch_data::ctor]
7296static STATIC_FETCH_DATA: FetchData = FetchData::new(
7297    include_str!("../bed_reader/tests/registry.txt"),
7298    "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/",
7299    "BED_READER_DATA_DIR",
7300    "github.io",
7301    "fastlmm",
7302    "bed-reader",
7303);
7304
7305/// Returns the local path to a sample .bed file. If necessary, the file will be downloaded.
7306///
7307/// The .fam and .bim files will also be downloaded, if they are not already present.
7308/// SHA256 hashes are used to verify that the files are correct.
7309/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7310/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7311#[anyinput]
7312pub fn sample_bed_file(bed_path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7313    let mut path_list: Vec<PathBuf> = Vec::new();
7314    for ext in &["bed", "bim", "fam"] {
7315        let file_path = bed_path.with_extension(ext);
7316        path_list.push(file_path);
7317    }
7318
7319    let vec = sample_files(path_list)?;
7320    assert!(vec.len() == 3);
7321    Ok(vec[0].clone())
7322}
7323
7324/// Returns the local path to a sample file. If necessary, the file will be downloaded.
7325///
7326/// A SHA256 hash is used to verify that the file is correct.
7327/// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7328/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7329#[anyinput]
7330pub fn sample_file(path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7331    Ok(STATIC_FETCH_DATA
7332        .fetch_file(path)
7333        .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7334}
7335
7336/// Returns the local paths to a list of files. If necessary, the files will be downloaded.
7337///
7338/// SHA256 hashes are used to verify that the files are correct.
7339/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7340/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7341#[anyinput]
7342pub fn sample_files(path_list: AnyIter<AnyPath>) -> Result<Vec<PathBuf>, Box<BedErrorPlus>>
7343where
7344{
7345    Ok(STATIC_FETCH_DATA
7346        .fetch_files(path_list)
7347        .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7348}
7349
7350/// An empty set of cloud options
7351///
7352/// # Example
7353/// ```
7354/// use cloud_file::{EMPTY_OPTIONS, CloudFile};
7355///
7356/// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
7357/// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/plink_sim_10s_100v_10pmiss.bed";
7358/// let cloud_file = CloudFile::new_with_options(url, EMPTY_OPTIONS)?;
7359/// assert_eq!(cloud_file.read_file_size().await?, 303);
7360/// # Ok::<(), BedErrorPlus>(())}).unwrap();
7361/// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
7362/// ```
7363pub const EMPTY_OPTIONS: [(&str, String); 0] = [];
7364
7365#[cfg(feature = "tokio")]
7366pub mod supplemental_document_options {
7367    #![doc = include_str!("supplemental_documents/options_etc.md")]
7368}
7369
7370#[cfg(feature = "tokio")]
7371pub mod supplemental_document_cloud_urls {
7372    #![doc = include_str!("supplemental_documents/cloud_urls_etc.md")]
7373}