Skip to main content

bed_reader/
lib.rs

1#![warn(missing_docs)]
2#![warn(clippy::pedantic)]
3#![allow(
4    clippy::missing_panics_doc, // LATER: add panics docs
5    clippy::missing_errors_doc, // LATER: add errors docs
6    clippy::similar_names,
7    clippy::cast_possible_truncation,
8    clippy::cast_possible_wrap,
9    clippy::cast_sign_loss,
10    clippy::cast_lossless
11)]
12// Inspired by C++ version by Chris Widmer and Carl Kadie
13
14// See: https://towardsdatascience.com/nine-rules-for-writing-python-extensions-in-rust-d35ea3a4ec29?sk=f8d808d5f414154fdb811e4137011437
15// for an article on how this project uses Rust to create a Python extension.
16
17// For Rust API tips see https://rust-lang.github.io/api-guidelines/necessities.html
18#![doc = include_str!("../README-rust.md")]
19//! ## Main Functions
20//!
21//! | Function | Description |
22//! | -------- | ----------- |
23//! | [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) | Open a local PLINK .bed file for reading genotype data and metadata. |
24//! | [`BedCloud::new`](struct.BedCloud.html#method.new), [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),<br> [`BedCloud::builder`](struct.BedCloud.html#method.builder), [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options),<br> [`BedCloud::from_cloud_file`](struct.BedCloud.html#method.from_cloud_file), [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file) | Open a cloud PLINK .bed file for reading genotype data and metadata. |
25//! | [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) | Read genotype data from a local or cloud file. Supports indexing and options. |
26//! | [`WriteOptions::builder`](struct.WriteOptions.html#method.builder) | Write values to a local file in PLINK .bed format. Supports metadata and options. |
27//!
28//! ### `Bed` Metadata Methods
29//!
30//! After using [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) to open a PLINK .bed file for reading, use
31//! these methods to see metadata.
32//!
33//! | Method | Description |
34//! | -------- | ----------- |
35//! | [`iid_count`](struct.Bed.html#method.iid_count) | Number of individuals (samples) |
36//! | [`sid_count`](struct.Bed.html#method.sid_count) | Number of SNPs (variants) |
37//! | [`dim`](struct.Bed.html#method.dim) | Number of individuals and SNPs |
38//! | [`fid`](struct.Bed.html#method.fid) | Family id of each of individual (sample) |
39//! | [`iid`](struct.Bed.html#method.iid) | Individual id of each of individual (sample) |
40//! | [`father`](struct.Bed.html#method.father) | Father id of each of individual (sample) |
41//! | [`mother`](struct.Bed.html#method.mother) | Mother id of each of individual (sample) |
42//! | [`sex`](struct.Bed.html#method.sex) | Sex of each individual (sample) |
43//! | [`pheno`](struct.Bed.html#method.pheno) | A phenotype for each individual (seldom used) |
44//! | [`chromosome`](struct.Bed.html#method.chromosome) | Chromosome of each SNP (variant) |
45//! | [`sid`](struct.Bed.html#method.sid) | SNP Id of each SNP (variant) |
46//! | [`cm_position`](struct.Bed.html#method.cm_position) | Centimorgan position of each SNP (variant) |
47//! | [`bp_position`](struct.Bed.html#method.bp_position) | Base-pair position of each SNP (variant) |
48//! | [`allele_1`](struct.Bed.html#method.allele_1) | First allele of each SNP (variant) |
49//! | [`allele_2`](struct.Bed.html#method.allele_2) | Second allele of each SNP (variant) |
50//! | [`metadata`](struct.Bed.html#method.metadata) | All the metadata returned as a [`struct.Metadata`](struct.Metadata.html) |
51//!
52//! ### `ReadOptions`
53//!
54//! When using [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) to read genotype data, use these options to
55//! specify a desired numeric type,
56//! which individuals (samples) to read, which SNPs (variants) to read, etc.
57//!
58//! | Option | Description |
59//! | -------- | ----------- |
60//! | [`i8`](struct.ReadOptionsBuilder.html#method.i8) | Read values as i8 |
61//! | [`f32`](struct.ReadOptionsBuilder.html#method.f32) | Read values as f32 |
62//! | [`f64`](struct.ReadOptionsBuilder.html#method.f64) | Read values as f64 |
63//! | [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) | Index of individuals (samples) to read (defaults to all)|
64//! | [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) | Index of SNPs (variants) to read (defaults to all) |
65//! | [`f`](struct.ReadOptionsBuilder.html#method.f) | Order of the output array, Fortran-style (default) |
66//! | [`c`](struct.ReadOptionsBuilder.html#method.c) | Order of the output array, C-style |
67//! | [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) | Is order of the output array Fortran-style? (defaults to true)|
68//! | [`missing_value`](struct.ReadOptionsBuilder.html#method.missing_value) | Value to use for missing values (defaults to -127 or NaN) |
69//! | [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) | Count the number allele 1 (default) |
70//! | [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2) | Count the number allele 2 |
71//! | [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) | Is allele 1 counted? (defaults to true) |
72//! | [`num_threads`](struct.ReadOptionsBuilder.html#method.num_threads) | Number of threads to use (defaults to all processors) |
73//! | [`max_concurrent_requests`](struct.ReadOptionsBuilder.html#method.max_concurrent_requests) | Maximum number of concurrent async requests (defaults to 10) -- Used by [`BedCloud`](struct.BedCloud.html). |
74//! | [`max_chunk_bytes`](struct.ReadOptionsBuilder.html#method.max_chunk_bytes) | Maximum chunk size of async requests (defaults to `8_000_000` bytes) -- Used by [`BedCloud`](struct.BedCloud.html). |
75//!
76//! ### [`Index`](enum.Index.html) Expressions
77//!
78//! Select which individuals (samples) and SNPs (variants) to read by using these
79//! [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) and/or
80//! [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) expressions.
81//!
82//! | Example | Type | Description |
83//! | -------- | --- | ----------- |
84//! | nothing | `()` | All |
85//! | `2` | `isize` | Index position 2 |
86//! | `-1` | `isize` | Last index position |
87//! | `vec![0, 10, -2]` | `Vec<isize>` | Index positions 0, 10, and 2nd from last |
88//! | `[0, 10, -2]` | `[isize]` and `[isize;n]` | Index positions 0, 10, and 2nd from last |
89//! | `ndarray::array![0, 10, -2]` | `ndarray::Array1<isize>` | Index positions 0, 10, and 2nd from last |
90//! | `10..20` | `Range<usize>` | Index positions 10 (inclusive) to 20 (exclusive). *Note: Rust ranges don't support negatives* |
91//! | `..=19` | `RangeInclusive<usize>` | Index positions 0 (inclusive) to 19 (inclusive). *Note: Rust ranges don't support negatives* |
92//! | *any Rust ranges* | `Range*<usize>` | *Note: Rust ranges don't support negatives* |
93//! | `s![10..20;2]` | `ndarray::SliceInfo1` | Index positions 10 (inclusive) to 20 (exclusive) in steps of 2 |
94//! | `s![-20..-10;-2]` | `ndarray::SliceInfo1` | 10th from last (exclusive) to 20th from last (inclusive), in steps of -2 |
95//! | `vec![true, false, true]` | `Vec<bool>`| Index positions 0 and 2. |
96//! | `[true, false, true]` | `[bool]` and `[bool;n]`| Index positions 0 and 2.|
97//! | `ndarray::array![true, false, true]` | `ndarray::Array1<bool>`| Index positions 0 and 2.|
98//!
99//! ### Environment Variables
100//!
101//! * `BED_READER_NUM_THREADS`
102//! * `NUM_THREADS`
103//!
104//! If [`ReadOptionsBuilder::num_threads`](struct.ReadOptionsBuilder.html#method.num_threads)
105//! or [`WriteOptionsBuilder::num_threads`](struct.WriteOptionsBuilder.html#method.num_threads) is not specified,
106//! the number of threads to use is determined by these environment variable (in order of priority):
107//! If neither of these environment variables are set, all processors are used.
108//!
109//! * `BED_READER_DATA_DIR`
110//!
111//! Any requested sample file will be downloaded to this directory. If the environment variable is not set,
112//! a cache folder, appropriate to the OS, will be used.
113
114mod python_module;
115mod tests;
116use anyinput::anyinput;
117pub use bed_cloud::{sample_bed_url, sample_url, sample_urls, BedCloud, BedCloudBuilder};
118use byteorder::{LittleEndian, ReadBytesExt};
119pub use cloud_file::{CloudFile, CloudFileError};
120use core::fmt::Debug;
121use derive_builder::Builder;
122use dpc_pariter::{scope, IteratorExt};
123use fetch_data::FetchData;
124use futures_util::StreamExt;
125use nd::ShapeBuilder;
126use ndarray as nd;
127use num_traits::{abs, Float, FromPrimitive, Signed, ToPrimitive};
128use rayon::iter::{IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator};
129use rayon::{iter::ParallelBridge, ThreadPoolBuildError};
130use statrs::distribution::{Beta, Continuous};
131use std::cmp::Ordering;
132use std::collections::HashSet;
133use std::fs::{self};
134use std::io::Read;
135use std::io::Seek;
136use std::io::SeekFrom;
137use std::io::Write;
138use std::num::{ParseFloatError, ParseIntError};
139use std::ops::AddAssign;
140use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive};
141use std::rc::Rc;
142use std::str::Utf8Error;
143use std::{
144    env,
145    fs::File,
146    io::{BufRead, BufReader, BufWriter},
147    ops::RangeFull,
148    path::{Path, PathBuf},
149};
150use thiserror::Error;
151mod bed_cloud;
152
153const BED_FILE_MAGIC1: u8 = 0x6C; // 0b01101100 or 'l' (lowercase 'L')
154const BED_FILE_MAGIC2: u8 = 0x1B; // 0b00011011 or <esc>
155const CB_HEADER_U64: u64 = 3;
156const CB_HEADER_USIZE: usize = 3;
157
158// About ndarray
159//  https://docs.rs/ndarray/0.14.0/ndarray/parallel/index.html
160//  https://rust-lang-nursery.github.io/rust-cookbook/concurrency/parallel.html
161//  https://github.com/rust-ndarray/ndarray/blob/master/README-quick-start.md
162//  https://datacrayon.com/posts/programming/rust-notebooks/multidimensional-arrays-and-operations-with-ndarray
163//  https://docs.rs/ndarray/0.14.0/ndarray/doc/ndarray_for_numpy_users/index.html
164//  https://docs.rs/ndarray-npy
165//  https://rust-lang-nursery.github.io/rust-cookbook/science/mathematics/linear_algebra.html
166
167/// All possible errors returned by this library and the libraries it depends on.
168// Based on `<https://nick.groenen.me/posts/rust-error-handling/#the-library-error-type>`
169#[derive(Error, Debug)]
170pub enum BedErrorPlus {
171    #[allow(missing_docs)]
172    #[error(transparent)]
173    BedError(#[from] BedError),
174
175    #[allow(missing_docs)]
176    #[error(transparent)]
177    IOError(#[from] std::io::Error),
178
179    #[allow(missing_docs)]
180    #[error(transparent)]
181    ThreadPoolError(#[from] ThreadPoolBuildError),
182
183    #[allow(missing_docs)]
184    #[error(transparent)]
185    ParseIntError(#[from] ParseIntError),
186
187    #[allow(missing_docs)]
188    #[error(transparent)]
189    ParseFloatError(#[from] ParseFloatError),
190
191    #[allow(missing_docs)]
192    #[error(transparent)]
193    CloudFileError(#[from] CloudFileError),
194
195    #[allow(missing_docs)]
196    #[error(transparent)]
197    Utf8Error(#[from] Utf8Error),
198}
199// https://docs.rs/thiserror/1.0.23/thiserror/
200
201/// All errors specific to this library.
202#[derive(Error, Debug, Clone)]
203pub enum BedError {
204    #[allow(missing_docs)]
205    #[error("Ill-formed BED file. BED file header is incorrect or length is wrong. '{0}'")]
206    IllFormed(String),
207
208    #[allow(missing_docs)]
209    #[error(
210        "Ill-formed BED file. BED file header is incorrect. Expected mode to be 0 or 1. '{0}'"
211    )]
212    BadMode(String),
213
214    #[allow(missing_docs)]
215    #[error("Attempt to write illegal value to BED file. Only 0,1,2,missing allowed. '{0}'")]
216    BadValue(String),
217
218    #[allow(missing_docs)]
219    #[error("Multithreading resulted in panic(s)")]
220    PanickedThread(),
221
222    #[allow(missing_docs)]
223    #[error("No individual observed for the SNP.")]
224    NoIndividuals,
225
226    #[allow(missing_docs)]
227    #[error("Illegal SNP mean.")]
228    IllegalSnpMean,
229
230    #[allow(missing_docs)]
231    #[error("Index to individual larger than the number of individuals. (Index value {0})")]
232    IidIndexTooBig(isize),
233
234    #[allow(missing_docs)]
235    #[error("Index to SNP larger than the number of SNPs. (Index value {0})")]
236    SidIndexTooBig(isize),
237
238    #[allow(missing_docs)]
239    #[error("Length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
240    IndexMismatch(usize, usize, usize, usize),
241
242    #[allow(missing_docs)]
243    #[error("Indexes ({0},{1}) too big for files")]
244    IndexesTooBigForFiles(usize, usize),
245
246    #[allow(missing_docs)]
247    #[error("Subset: length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
248    SubsetMismatch(usize, usize, usize, usize),
249
250    #[allow(missing_docs)]
251    #[error("Cannot convert beta values to/from float 64")]
252    CannotConvertBetaToFromF64,
253
254    #[allow(missing_docs)]
255    #[error("Cannot create Beta Dist with given parameters ({0},{1})")]
256    CannotCreateBetaDist(f64, f64),
257
258    #[allow(missing_docs)]
259    #[error("Cannot use skipped metadata '{0}'")]
260    CannotUseSkippedMetadata(String),
261
262    #[allow(missing_docs)]
263    #[error("Index starts at {0} but ends at {1}")]
264    StartGreaterThanEnd(usize, usize),
265
266    #[allow(missing_docs)]
267    #[error("Step of zero not allowed")]
268    StepZero,
269
270    #[allow(missing_docs)]
271    #[error("Index starts at {0} but count is {1}")]
272    StartGreaterThanCount(usize, usize),
273
274    #[allow(missing_docs)]
275    #[error("Index ends at {0} but count is {1}")]
276    EndGreaterThanCount(usize, usize),
277
278    #[allow(missing_docs)]
279    #[error("Adding new axis not allowed")]
280    NewAxis,
281
282    #[allow(missing_docs)]
283    #[error("Expect 1-D NDArray SliceInfo")]
284    NdSliceInfoNot1D,
285
286    #[allow(missing_docs)]
287    #[error("Expect {0} fields but find only {1} in '{2}'")]
288    MetadataFieldCount(usize, usize, String),
289
290    #[allow(missing_docs)]
291    #[error("{0}_count values of {1} and {2} are inconsistent")]
292    InconsistentCount(String, usize, usize),
293
294    #[allow(missing_docs)]
295    #[error("Expect bool arrays and vectors to be length {0}, not {1}")]
296    BoolArrayVectorWrongLength(usize, usize),
297
298    #[allow(missing_docs)]
299    #[error("Expect ndarray of shape ({0}, {1}), but found shape ({2}, {3})")]
300    InvalidShape(usize, usize, usize, usize),
301
302    #[allow(missing_docs)]
303    #[error("Can't write '{0}' metadata if some fields are None")]
304    MetadataMissingForWrite(String),
305
306    #[allow(missing_docs)]
307    #[error("Unknown or bad sample file '{0}'")]
308    UnknownOrBadSampleFile(String),
309
310    #[allow(missing_docs)]
311    #[error("The registry of sample files is invalid")]
312    SampleRegistryProblem(),
313
314    #[allow(missing_docs)]
315    #[error("Samples construction failed with error: {0}")]
316    SamplesConstructionFailed(String),
317
318    #[allow(missing_docs)]
319    #[error("Downloaded sample file not seen: {0}")]
320    DownloadedSampleFileNotSeen(String),
321
322    #[allow(missing_docs)]
323    #[error("Downloaded sample file has wrong hash: {0},expected: {1}, actual: {2}")]
324    DownloadedSampleFileWrongHash(String, String, String),
325
326    #[allow(missing_docs)]
327    #[error("Cannot create cache directory")]
328    CannotCreateCacheDir(),
329
330    #[allow(missing_docs)]
331    #[error("Cannot parse URL: '{0}': {1}")]
332    CannotParseUrl(String, String),
333
334    #[allow(missing_docs)]
335    #[error("UninitializedField: '{0}'")]
336    UninitializedField(&'static str),
337
338    #[allow(missing_docs)]
339    #[error("Sample fetch error: {0}")]
340    SampleFetch(String),
341
342    #[allow(missing_docs)]
343    #[error("Encoding destination buffer must be contiguous.")]
344    EncodingContiguous(),
345
346    #[allow(missing_docs)]
347    #[error("Encoding destination buffer have length {0}, (in_vector.len() - 1) // 4 + 1, but it has length {1}.")]
348    EncodingLength(usize, usize),
349}
350
351// Trait alias
352
353/// A trait alias, used internally, for the values of a .bed file, namely i8, f32, f64.
354pub trait BedVal:
355    Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
356{
357}
358impl<T> BedVal for T where
359    T: Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
360{
361}
362
363fn create_pool(num_threads: usize) -> Result<rayon::ThreadPool, Box<BedErrorPlus>> {
364    match rayon::ThreadPoolBuilder::new()
365        .num_threads(num_threads)
366        .build()
367    {
368        Err(e) => Err(Box::new(e.into())),
369        Ok(pool) => Ok(pool),
370    }
371}
372
373#[allow(clippy::too_many_arguments)]
374#[anyinput]
375fn read_no_alloc<TVal: BedVal>(
376    path: AnyPath,
377    iid_count: usize,
378    sid_count: usize,
379    is_a1_counted: bool,
380    iid_index: &[isize],
381    sid_index: &[isize],
382    missing_value: TVal,
383    num_threads: usize,
384    val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
385) -> Result<(), Box<BedErrorPlus>> {
386    create_pool(num_threads)?.install(|| {
387        let (buf_reader, bytes_vector) = open_and_check(path)?;
388
389        match bytes_vector[2] {
390            0 => {
391                // We swap 'iid' and 'sid' and then reverse the axes.
392                let mut val_t = val.view_mut().reversed_axes();
393                internal_read_no_alloc(
394                    buf_reader,
395                    path,
396                    sid_count,
397                    iid_count,
398                    is_a1_counted,
399                    sid_index,
400                    iid_index,
401                    missing_value,
402                    &mut val_t,
403                )
404            }
405            1 => internal_read_no_alloc(
406                buf_reader,
407                path,
408                iid_count,
409                sid_count,
410                is_a1_counted,
411                iid_index,
412                sid_index,
413                missing_value,
414                val,
415            ),
416            _ => Err(Box::new(BedError::BadMode(path_ref_to_string(path)).into())),
417        }
418    })?;
419    Ok(())
420}
421
422#[anyinput]
423fn path_ref_to_string(path: AnyPath) -> String {
424    PathBuf::from(path).display().to_string()
425}
426
427impl From<BedError> for Box<BedErrorPlus> {
428    fn from(err: BedError) -> Self {
429        Box::new(BedErrorPlus::BedError(err))
430    }
431}
432impl From<std::io::Error> for Box<BedErrorPlus> {
433    fn from(err: std::io::Error) -> Self {
434        Box::new(BedErrorPlus::IOError(err))
435    }
436}
437impl From<ThreadPoolBuildError> for Box<BedErrorPlus> {
438    fn from(err: ThreadPoolBuildError) -> Self {
439        Box::new(BedErrorPlus::ThreadPoolError(err))
440    }
441}
442impl From<ParseIntError> for Box<BedErrorPlus> {
443    fn from(err: ParseIntError) -> Self {
444        Box::new(BedErrorPlus::ParseIntError(err))
445    }
446}
447
448impl From<ParseFloatError> for Box<BedErrorPlus> {
449    fn from(err: ParseFloatError) -> Self {
450        Box::new(BedErrorPlus::ParseFloatError(err))
451    }
452}
453
454impl From<::derive_builder::UninitializedFieldError> for BedErrorPlus {
455    fn from(err: ::derive_builder::UninitializedFieldError) -> Self {
456        BedError::UninitializedField(err.field_name()).into()
457    }
458}
459
460impl From<CloudFileError> for Box<BedErrorPlus> {
461    fn from(err: CloudFileError) -> Self {
462        Box::new(BedErrorPlus::CloudFileError(err))
463    }
464}
465
466impl From<Utf8Error> for Box<BedErrorPlus> {
467    fn from(err: Utf8Error) -> Self {
468        Box::new(BedErrorPlus::Utf8Error(err))
469    }
470}
471
472#[anyinput]
473fn open_and_check(
474    path: AnyPath,
475) -> Result<(BufReader<File>, [u8; CB_HEADER_USIZE]), Box<BedErrorPlus>> {
476    let mut buf_reader = BufReader::new(File::open(path)?);
477    let mut bytes_array: [u8; CB_HEADER_USIZE] = [0; CB_HEADER_USIZE];
478    buf_reader.read_exact(&mut bytes_array)?;
479    if (BED_FILE_MAGIC1 != bytes_array[0]) || (BED_FILE_MAGIC2 != bytes_array[1]) {
480        Err(BedError::IllFormed(path_ref_to_string(path)))?;
481    }
482    Ok((buf_reader, bytes_array))
483}
484
485// trait Max {
486//     fn max() -> Self;
487// }
488
489// impl Max for u8 {
490//     fn max() -> u8 {
491//         u8::MAX
492//     }
493// }
494
495// impl Max for u64 {
496//     fn max() -> u64 {
497//         u64::MAX
498//     }
499// }
500
501/// A trait alias, used internally, to provide default missing values for i8, f32, f64.
502pub trait Missing {
503    /// The default missing value for a type such as i8, f32, and f64.
504    fn missing() -> Self;
505}
506
507impl Missing for f64 {
508    fn missing() -> Self {
509        f64::NAN
510    }
511}
512
513impl Missing for f32 {
514    fn missing() -> Self {
515        f32::NAN
516    }
517}
518
519impl Missing for i8 {
520    fn missing() -> Self {
521        -127i8
522    }
523}
524
525#[cfg(not(target_pointer_width = "64"))]
526compile_error!("This code requires a 64-bit target architecture.");
527#[inline]
528fn try_div_4(in_iid_count: usize, in_sid_count: usize) -> Result<u64, Box<BedErrorPlus>> {
529    if in_iid_count == 0 {
530        return Ok(0);
531    }
532    let in_iid_count_div4_u64 = in_iid_count.checked_sub(1).map_or(0, |v| v / 4 + 1) as u64;
533    let in_sid_count_u64 = in_sid_count as u64;
534
535    if in_sid_count > 0 && (u64::MAX - CB_HEADER_U64) / in_sid_count_u64 < in_iid_count_div4_u64 {
536        Err(BedError::IndexesTooBigForFiles(in_iid_count, in_sid_count))?;
537    }
538
539    Ok(in_iid_count_div4_u64)
540}
541
542#[allow(clippy::too_many_arguments)]
543#[anyinput]
544fn internal_read_no_alloc<TVal: BedVal>(
545    mut buf_reader: BufReader<File>,
546    path: AnyPath,
547    in_iid_count: usize,
548    in_sid_count: usize,
549    is_a1_counted: bool,
550    iid_index: &[isize],
551    sid_index: &[isize],
552    missing_value: TVal,
553    out_val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
554) -> Result<(), Box<BedErrorPlus>> {
555    // Check the file length
556
557    let in_iid_count_div4_u64 = try_div_4(in_iid_count, in_sid_count)?;
558    // "as" and math is safe because of early checks
559    let file_len = buf_reader.get_ref().metadata()?.len();
560    let file_len2 = in_iid_count_div4_u64 * (in_sid_count as u64) + CB_HEADER_U64;
561    if file_len != file_len2 {
562        Err(BedError::IllFormed(path_ref_to_string(path)))?;
563    }
564
565    // Check and precompute for each iid_index
566    let (i_div_4_less_start_array, i_mod_4_times_2_array, i_div_4_start, i_div_4_len) =
567        check_and_precompute_iid_index(in_iid_count, iid_index)?;
568
569    // Check and compute work for each sid_index
570    let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value);
571    let lower_sid_count = -(in_sid_count as isize);
572    let upper_sid_count: isize = (in_sid_count as isize) - 1;
573    // See https://morestina.net/blog/1432/parallel-stream-processing-with-rayon
574    // Possible optimization: We could read snp in their input order instead of their output order
575    sid_index
576        .iter()
577        .map(|in_sid_i_signed| {
578            // Turn signed sid_index into unsigned sid_index (or error)
579            let in_sid_i = if (0..=upper_sid_count).contains(in_sid_i_signed) {
580                *in_sid_i_signed as u64
581            } else if (lower_sid_count..=-1).contains(in_sid_i_signed) {
582                (in_sid_count - ((-in_sid_i_signed) as usize)) as u64
583            } else {
584                Err(BedError::SidIndexTooBig(*in_sid_i_signed))?
585            };
586
587            // Read the iid info for one snp from the disk
588            let mut bytes_vector: Vec<u8> = vec![0; i_div_4_len as usize];
589            let pos: u64 = in_sid_i * in_iid_count_div4_u64 + i_div_4_start + CB_HEADER_U64; // "as" and math is safe because of early checks
590            buf_reader.seek(SeekFrom::Start(pos))?;
591            buf_reader.read_exact(&mut bytes_vector)?;
592            Ok::<_, Box<BedErrorPlus>>(bytes_vector)
593        })
594        // Zip in the column of the output array
595        .zip(out_val.axis_iter_mut(nd::Axis(1)))
596        // In parallel, decompress the iid info and put it in its column
597        .par_bridge() // This seems faster that parallel zip
598        .try_for_each(|(bytes_vector_result, mut col)| match bytes_vector_result {
599            Err(e) => Err(e),
600            Ok(bytes_vector) => {
601                for out_iid_i in 0..iid_index.len() {
602                    let i_div_4_less_start = i_div_4_less_start_array[out_iid_i];
603                    let i_mod_4_times_2 = i_mod_4_times_2_array[out_iid_i];
604                    let genotype_byte: u8 =
605                        (bytes_vector[i_div_4_less_start] >> i_mod_4_times_2) & 0x03;
606                    col[out_iid_i] = from_two_bits_to_value[genotype_byte as usize];
607                }
608                Ok(())
609            }
610        })?;
611
612    Ok(())
613}
614
615type Array1Usize = nd::ArrayBase<nd::OwnedRepr<usize>, nd::Dim<[usize; 1]>>;
616type Array1U8 = nd::ArrayBase<nd::OwnedRepr<u8>, nd::Dim<[usize; 1]>>;
617
618#[allow(clippy::type_complexity)]
619#[allow(clippy::range_plus_one)]
620fn check_and_precompute_iid_index(
621    in_iid_count: usize,
622    iid_index: &[isize],
623) -> Result<(Array1Usize, Array1U8, u64, u64), Box<BedErrorPlus>> {
624    let lower_iid_count = -(in_iid_count as isize);
625    let upper_iid_count: isize = (in_iid_count as isize) - 1;
626    let mut i_div_4_less_start_array = nd::Array1::<usize>::zeros(iid_index.len());
627    let mut i_mod_4_times_2_array = nd::Array1::<u8>::zeros(iid_index.len());
628    let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); iid_index.len()];
629    nd::par_azip!((in_iid_i_signed in iid_index,
630        i_div_4_less_start in &mut i_div_4_less_start_array,
631        i_mod_4_times_2 in &mut i_mod_4_times_2_array,
632        result in &mut result_list
633    )
634    {
635        let in_iid_i = if (0..=upper_iid_count).contains(in_iid_i_signed) {
636            *result = Ok(());
637            *in_iid_i_signed as usize
638        } else if (lower_iid_count..=-1).contains(in_iid_i_signed) {
639            *result = Ok(());
640            in_iid_count - ((-in_iid_i_signed) as usize)
641        } else {
642            *result = Err(BedError::IidIndexTooBig(
643                *in_iid_i_signed,
644            ));
645            0
646        };
647
648        *i_div_4_less_start = in_iid_i / 4 ;
649        *i_mod_4_times_2 = (in_iid_i % 4 * 2) as u8;
650    });
651    result_list
652        .iter()
653        .par_bridge()
654        .try_for_each(|x| (*x).clone())?;
655
656    let (i_div_4_start, i_div_4_len) =
657        if let Some(min_value) = i_div_4_less_start_array.par_iter().min() {
658            let max_value = *i_div_4_less_start_array.par_iter().max().unwrap(); // safe because of min
659            (*min_value as u64, (max_value + 1 - *min_value) as u64)
660        } else {
661            (0, 0)
662        };
663    // skip of min_value is 0
664    if i_div_4_start > 0 {
665        i_div_4_less_start_array
666            .par_iter_mut()
667            .for_each(|x| *x -= i_div_4_start as usize);
668    }
669    Ok((
670        i_div_4_less_start_array,
671        i_mod_4_times_2_array,
672        i_div_4_start,
673        i_div_4_len,
674    ))
675}
676
677fn set_up_two_bits_to_value<TVal: From<i8>>(count_a1: bool, missing_value: TVal) -> [TVal; 4] {
678    let homozygous_primary_allele = TVal::from(0); // Major Allele
679    let heterozygous_allele = TVal::from(1);
680    let homozygous_secondary_allele = TVal::from(2); // Minor Allele
681
682    if count_a1 {
683        [
684            homozygous_secondary_allele, // look-up 0
685            missing_value,               // look-up 1
686            heterozygous_allele,         // look-up 2
687            homozygous_primary_allele,   // look-up 3
688        ]
689    } else {
690        [
691            homozygous_primary_allele,   // look-up 0
692            missing_value,               // look-up 1
693            heterozygous_allele,         // look-up 2
694            homozygous_secondary_allele, // look-up 3
695        ]
696    }
697}
698
699// Thanks to Dawid for his dpc-pariter library that makes this function scale.
700// https://dpc.pw/adding-parallelism-to-your-rust-iterators
701#[anyinput]
702fn write_val<S, TVal>(
703    path: AnyPath,
704    val: &nd::ArrayBase<S, nd::Ix2>,
705    is_a1_counted: bool,
706    missing: TVal,
707    num_threads: usize,
708) -> Result<(), Box<BedErrorPlus>>
709where
710    S: nd::Data<Elem = TVal>,
711    TVal: BedVal,
712{
713    let (iid_count, sid_count) = val.dim();
714
715    // 4 genotypes per byte so round up
716    let iid_count_div4_u64 = try_div_4(iid_count, sid_count)?;
717
718    // We create and write to a file.
719    // If there is an error, we will delete it.
720    if let Err(e) = write_internal(
721        path,
722        iid_count_div4_u64,
723        val,
724        is_a1_counted,
725        missing,
726        num_threads,
727    ) {
728        // Clean up the file
729        let _ = fs::remove_file(path);
730        Err(e)
731    } else {
732        Ok(())
733    }
734}
735
736// https://www.reddit.com/r/rust/comments/mo4s8e/difference_between_reference_and_view_in_ndarray/
737#[anyinput]
738fn write_internal<S, TVal>(
739    path: AnyPath,
740    iid_count_div4_u64: u64,
741    val: &nd::ArrayBase<S, nd::Ix2>,
742    is_a1_counted: bool,
743    missing: TVal,
744    num_threads: usize,
745) -> Result<(), Box<BedErrorPlus>>
746where
747    S: nd::Data<Elem = TVal>,
748    TVal: BedVal,
749{
750    let mut writer = BufWriter::new(File::create(path)?);
751    // LATER: If this method is later changed
752    // to support major="individual", be sure to
753    // change write_f64, etc and python function 'to_bed' which
754    // currently uses a work-around.
755    writer.write_all(&[BED_FILE_MAGIC1, BED_FILE_MAGIC2, 0x01])?;
756
757    #[allow(clippy::eq_op)]
758    let use_nan = missing != missing; // generic NAN test
759    let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
760    let two_code = if is_a1_counted { 0u8 } else { 3u8 };
761
762    let homozygous_primary_allele = TVal::from(0); // Major Allele
763    let heterozygous_allele = TVal::from(1);
764    let homozygous_secondary_allele = TVal::from(2); // Minor Allele
765
766    scope(|scope| {
767        val.axis_iter(nd::Axis(1))
768            .parallel_map_scoped(scope, {
769                move |column| {
770                    // Convert each column into a bytes_vector
771                    let mut bytes_vector: Vec<u8> = vec![0; iid_count_div4_u64 as usize]; // inits to 0
772                    process_genomic_slice(
773                        &column,
774                        &mut bytes_vector,
775                        homozygous_primary_allele,
776                        heterozygous_allele,
777                        homozygous_secondary_allele,
778                        zero_code,
779                        two_code,
780                        use_nan,
781                        missing,
782                    )?;
783                    Ok::<_, Box<BedErrorPlus>>(bytes_vector)
784                }
785            })
786            .threads(num_threads)
787            .try_for_each(|bytes_vector| {
788                // Write the bytes vector, they must be in order.
789                writer.write_all(&bytes_vector?)?;
790                Ok(())
791            })
792    })
793    .map_err(|_e| BedError::PanickedThread())?
794}
795
796#[allow(dead_code)]
797fn encode1<TVal>(
798    in_vector: &ndarray::ArrayView1<TVal>,
799    out_vector: &mut [u8],
800    is_a1_counted: bool,
801    missing: TVal,
802) -> Result<(), Box<BedErrorPlus>>
803where
804    TVal: BedVal,
805{
806    #[allow(clippy::eq_op)]
807    let use_nan = missing != missing; // generic NAN test
808    let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
809    let two_code = if is_a1_counted { 0u8 } else { 3u8 };
810
811    let homozygous_primary_allele: TVal = TVal::from(0); // Major Allele
812    let heterozygous_allele = TVal::from(1);
813    let homozygous_secondary_allele = TVal::from(2); // Minor Allele
814
815    let minor_div4 = in_vector.len().checked_sub(1).map_or(0, |v| v / 4 + 1);
816    if minor_div4 != out_vector.len() {
817        return Err(Box::new(
818            BedError::EncodingLength(minor_div4, out_vector.len()).into(),
819        ));
820    }
821
822    process_genomic_slice(
823        in_vector,
824        out_vector,
825        homozygous_primary_allele,
826        heterozygous_allele,
827        homozygous_secondary_allele,
828        zero_code,
829        two_code,
830        use_nan,
831        missing,
832    )
833}
834
835#[inline]
836#[allow(clippy::eq_op)]
837#[allow(clippy::too_many_arguments)]
838fn encode_genotype_chunk<TVal>(
839    chunk: nd::ArrayView1<TVal>,
840    homozygous_primary_allele: TVal,
841    heterozygous_allele: TVal,
842    homozygous_secondary_allele: TVal,
843    zero_code: u8,
844    two_code: u8,
845    use_nan: bool,
846    missing: TVal,
847) -> Result<u8, BedError>
848where
849    TVal: PartialEq + Copy,
850{
851    // LATER: Think about unrolling this loop in the usual case of 4 elements
852    let mut output_byte = 0u8;
853    for (within_chunk_index, &v0) in chunk.iter().enumerate() {
854        let genotype_code = if v0 == homozygous_primary_allele {
855            zero_code
856        } else if v0 == heterozygous_allele {
857            2
858        } else if v0 == homozygous_secondary_allele {
859            two_code
860        } else if (use_nan && v0 != v0) || (!use_nan && v0 == missing) {
861            1
862        } else {
863            return Err(BedError::BadValue(
864                "Invalid genotype value encountered during encoding.".to_string(),
865            ));
866        };
867
868        output_byte |= genotype_code << (within_chunk_index * 2);
869    }
870    Ok(output_byte)
871}
872
873#[inline]
874#[allow(clippy::eq_op)]
875#[allow(clippy::too_many_arguments)]
876fn process_genomic_slice<TVal>(
877    in_vector: &ndarray::ArrayView1<TVal>,
878    out_vector: &mut [u8],
879    homozygous_primary_allele: TVal,
880    heterozygous_allele: TVal,
881    homozygous_secondary_allele: TVal,
882    zero_code: u8,
883    two_code: u8,
884    use_nan: bool,
885    missing: TVal,
886) -> Result<(), Box<BedErrorPlus>>
887where
888    TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
889{
890    // Calculate the number of full chunks and the remainder
891    let full_chunks = in_vector.len() / 4;
892    let remainder = in_vector.len() % 4;
893
894    // Ensure the output vector is correctly sized
895    assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
896
897    // Zip the exact input chunks with output chunks and process in parallel
898    in_vector
899        .exact_chunks(4)
900        .into_iter()
901        .zip(out_vector.iter_mut())
902        .try_for_each(|(chunk, output_byte)| {
903            *output_byte = encode_genotype_chunk(
904                chunk,
905                homozygous_primary_allele,
906                heterozygous_allele,
907                homozygous_secondary_allele,
908                zero_code,
909                two_code,
910                use_nan,
911                missing,
912            )?;
913            Ok::<(), Box<BedErrorPlus>>(())
914        })?;
915
916    // Process the remainder sequentially if there is any
917    if remainder != 0 {
918        let start = full_chunks * 4;
919        let chunk = in_vector.slice(ndarray::s![start..]);
920        let output_byte = &mut out_vector[full_chunks];
921        *output_byte = encode_genotype_chunk(
922            chunk,
923            homozygous_primary_allele,
924            heterozygous_allele,
925            homozygous_secondary_allele,
926            zero_code,
927            two_code,
928            use_nan,
929            missing,
930        )?;
931    }
932
933    Ok::<(), Box<BedErrorPlus>>(())
934}
935// #[inline]
936// #[allow(clippy::eq_op)]
937// #[allow(clippy::too_many_arguments)]
938// fn process_genomic_slice<TVal>(
939//     in_vector: &ndarray::ArrayView1<TVal>,
940//     out_vector: &mut [u8],
941//     homozygous_primary_allele: TVal,
942//     heterozygous_allele: TVal,
943//     homozygous_secondary_allele: TVal,
944//     zero_code: u8,
945//     two_code: u8,
946//     use_nan: bool,
947//     missing: TVal,
948// ) -> Result<(), Box<BedErrorPlus>>
949// where
950//     TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
951// {
952//     // Calculate the number of full chunks and the remainder
953//     let full_chunks = in_vector.len() / 4;
954//     let remainder = in_vector.len() % 4;
955
956//     // Ensure the output vector is correctly sized
957//     assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
958
959//     // Zip the exact input chunks with output chunks and process in parallel
960//     in_vector
961//         .exact_chunks(4)
962//         .into_iter()
963//         .zip(out_vector.iter_mut())
964//         .par_bridge()
965//         .try_for_each(|(chunk, output_byte)| {
966//             *output_byte = encode_genotype_chunk(
967//                 chunk,
968//                 homozygous_primary_allele,
969//                 heterozygous_allele,
970//                 homozygous_secondary_allele,
971//                 zero_code,
972//                 two_code,
973//                 use_nan,
974//                 missing,
975//             )?;
976//             Ok::<(), Box<BedErrorPlus>>(())
977//         })?;
978
979//     // Process the remainder sequentially if there is any
980//     if remainder != 0 {
981//         let start = full_chunks * 4;
982//         let chunk = in_vector.slice(ndarray::s![start..]);
983//         let output_byte = &mut out_vector[full_chunks];
984//         *output_byte = encode_genotype_chunk(
985//             chunk,
986//             homozygous_primary_allele,
987//             heterozygous_allele,
988//             homozygous_secondary_allele,
989//             zero_code,
990//             two_code,
991//             use_nan,
992//             missing,
993//         )?;
994//     }
995
996//     Ok::<(), Box<BedErrorPlus>>(())
997// }
998
999#[anyinput]
1000fn count_lines(path: AnyPath) -> Result<usize, Box<BedErrorPlus>> {
1001    let file = File::open(path)?;
1002    let reader = BufReader::new(file);
1003    let count = reader.lines().count();
1004    Ok(count)
1005}
1006
1007#[allow(dead_code)]
1008enum Dist {
1009    Unit,
1010    Beta { a: f64, b: f64 },
1011}
1012
1013#[allow(dead_code)]
1014fn impute_and_zero_mean_snps<
1015    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1016>(
1017    val: &mut nd::ArrayViewMut2<'_, T>,
1018    dist: &Dist,
1019    apply_in_place: bool,
1020    use_stats: bool,
1021    stats: &mut nd::ArrayViewMut2<'_, T>,
1022) -> Result<(), Box<BedErrorPlus>> {
1023    let two = T::one() + T::one();
1024
1025    // If output is F-order (or in general if iid stride is no more than sid_stride)
1026    if val.stride_of(nd::Axis(0)) <= val.stride_of(nd::Axis(1)) {
1027        let result_list = nd::Zip::from(val.axis_iter_mut(nd::Axis(1)))
1028            .and(stats.axis_iter_mut(nd::Axis(0)))
1029            .par_map_collect(|mut col, mut stats_row| {
1030                process_sid(
1031                    &mut col,
1032                    apply_in_place,
1033                    use_stats,
1034                    &mut stats_row,
1035                    dist,
1036                    two,
1037                )
1038            });
1039
1040        // Check the result list for errors
1041        result_list
1042            .iter()
1043            .par_bridge()
1044            .try_for_each(|x| (*x).clone())?;
1045
1046        Ok(())
1047    } else {
1048        //If C-order
1049        process_all_iids(val, apply_in_place, use_stats, stats, dist, two)
1050    }
1051}
1052
1053// Later move the other fast-lmm functions into their own package
1054#[allow(dead_code)]
1055fn find_factor<
1056    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1057>(
1058    dist: &Dist,
1059    mean_s: T,
1060    std: T,
1061) -> Result<T, BedError> {
1062    if let Dist::Beta { a, b } = dist {
1063        // Try to create a beta dist
1064        let Ok(beta_dist) = Beta::new(*a, *b) else {
1065            Err(BedError::CannotCreateBetaDist(*a, *b))?
1066        };
1067
1068        // Try to an f64 maf
1069        let mut maf = if let Some(mean_u64) = mean_s.to_f64() {
1070            mean_u64 / 2.0
1071        } else {
1072            Err(BedError::CannotConvertBetaToFromF64)?
1073        };
1074        if maf > 0.5 {
1075            maf = 1.0 - maf;
1076        }
1077
1078        // Try to put the maf in the beta dist
1079        if let Some(b) = T::from_f64(beta_dist.pdf(maf)) {
1080            Ok(b)
1081        } else {
1082            Err(BedError::CannotConvertBetaToFromF64)
1083        }
1084    } else {
1085        Ok(T::one() / std)
1086    }
1087}
1088
1089#[allow(dead_code)]
1090fn process_sid<
1091    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1092>(
1093    col: &mut nd::ArrayViewMut1<'_, T>,
1094    apply_in_place: bool,
1095    use_stats: bool,
1096    stats_row: &mut nd::ArrayViewMut1<'_, T>,
1097    dist: &Dist,
1098    two: T,
1099) -> Result<(), BedError> {
1100    if !use_stats {
1101        let mut n_observed = T::zero();
1102        let mut sum_s = T::zero(); // the sum of a SNP over all observed individuals
1103        let mut sum2_s = T::zero(); // the sum of the squares of the SNP over all observed individuals
1104
1105        for iid_i in 0..col.len() {
1106            let v = col[iid_i];
1107            if !v.is_nan() {
1108                sum_s = sum_s + v;
1109                sum2_s = sum2_s + v * v;
1110                n_observed = n_observed + T::one();
1111            }
1112        }
1113        if n_observed < T::one() {
1114            //LATER make it work (in some form) for n of 0
1115            Err(BedError::NoIndividuals)?;
1116        }
1117        let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1118        let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1119
1120        if mean_s.is_nan()
1121            || (matches!(dist, Dist::Beta { a: _, b: _ })
1122                && ((mean_s > two) || (mean_s < T::zero())))
1123        {
1124            Err(BedError::IllegalSnpMean)?;
1125        }
1126
1127        let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1128
1129        let mut std = variance.sqrt();
1130        if std.is_nan() || std <= T::zero() {
1131            // All "SNPs" have the same value (aka SNC)
1132            std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1133        }
1134
1135        stats_row[0] = mean_s;
1136        stats_row[1] = std;
1137    }
1138
1139    if apply_in_place {
1140        {
1141            let mean_s = stats_row[0];
1142            let std = stats_row[1];
1143            let is_snc = std.is_infinite();
1144
1145            let factor = find_factor(dist, mean_s, std)?;
1146
1147            for iid_i in 0..col.len() {
1148                //check for Missing (NAN) or SNC
1149                if col[iid_i].is_nan() || is_snc {
1150                    col[iid_i] = T::zero();
1151                } else {
1152                    col[iid_i] = (col[iid_i] - mean_s) * factor;
1153                }
1154            }
1155        }
1156    }
1157    Ok(())
1158}
1159
1160#[allow(dead_code)]
1161fn process_all_iids<
1162    T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1163>(
1164    val: &mut nd::ArrayViewMut2<'_, T>,
1165    apply_in_place: bool,
1166    use_stats: bool,
1167    stats: &mut nd::ArrayViewMut2<'_, T>,
1168    dist: &Dist,
1169    two: T,
1170) -> Result<(), Box<BedErrorPlus>> {
1171    let sid_count = val.dim().1;
1172
1173    if !use_stats {
1174        // O(iid_count * sid_count)
1175        // Serial that respects C-order is 3-times faster than parallel that doesn't
1176        // So we parallelize the inner loop instead of the outer loop
1177        let mut n_observed_array = nd::Array1::<T>::zeros(sid_count);
1178        let mut sum_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of a SNP over all observed individuals
1179        let mut sum2_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of the squares of the SNP over all observed individuals
1180        for row in val.axis_iter(nd::Axis(0)) {
1181            nd::par_azip!((&v in row,
1182                n_observed_ptr in &mut n_observed_array,
1183                sum_s_ptr in &mut sum_s_array,
1184                sum2_s_ptr in &mut sum2_s_array
1185            )
1186                if !v.is_nan() {
1187                    *n_observed_ptr = *n_observed_ptr + T::one();
1188                    *sum_s_ptr = *sum_s_ptr + v;
1189                    *sum2_s_ptr = *sum2_s_ptr + v * v;
1190                }
1191            );
1192        }
1193
1194        // O(sid_count)
1195        let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); sid_count];
1196        nd::par_azip!((mut stats_row in stats.axis_iter_mut(nd::Axis(0)),
1197                &n_observed in &n_observed_array,
1198                &sum_s in &sum_s_array,
1199                &sum2_s in &sum2_s_array,
1200                result_ptr in &mut result_list)
1201        {
1202            if n_observed < T::one() {
1203                *result_ptr = Err(BedError::NoIndividuals);
1204                return;
1205            }
1206            let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1207            let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1208
1209            if mean_s.is_nan()
1210                || (matches!(dist, Dist::Beta { a:_, b:_ }) && ((mean_s > two) || (mean_s < T::zero())))
1211            {
1212                *result_ptr = Err(BedError::IllegalSnpMean);
1213                return;
1214            }
1215
1216            let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1217            let mut std = variance.sqrt();
1218            if std.is_nan() || std <= T::zero() {
1219                // All "SNPs" have the same value (aka SNC)
1220                std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1221            }
1222            stats_row[0] = mean_s;
1223            stats_row[1] = std;
1224        });
1225        // Check the result list for errors
1226        result_list.par_iter().try_for_each(|x| (*x).clone())?;
1227    }
1228
1229    if apply_in_place {
1230        // O(sid_count)
1231        let mut factor_array = nd::Array1::<T>::zeros(stats.dim().0);
1232
1233        stats
1234            .axis_iter_mut(nd::Axis(0))
1235            .zip(&mut factor_array)
1236            .par_bridge()
1237            .try_for_each(|(stats_row, factor_ptr)| {
1238                match find_factor(dist, stats_row[0], stats_row[1]) {
1239                    Err(e) => Err(e),
1240                    Ok(factor) => {
1241                        *factor_ptr = factor;
1242                        Ok(())
1243                    }
1244                }
1245            })?;
1246
1247        // O(iid_count * sid_count)
1248        nd::par_azip!((mut row in val.axis_iter_mut(nd::Axis(0)))
1249        {
1250            for sid_i in 0..row.len() {
1251                //check for Missing (NAN) or SNC
1252                if row[sid_i].is_nan() || stats[(sid_i, 1)].is_infinite() {
1253                    row[sid_i] = T::zero();
1254                } else {
1255                    row[sid_i] = (row[sid_i] - stats[(sid_i, 0)]) * factor_array[sid_i];
1256                }
1257            }
1258        });
1259    }
1260    Ok(())
1261}
1262
1263#[allow(dead_code)]
1264#[anyinput]
1265fn file_b_less_aatbx(
1266    a_filename: AnyPath,
1267    offset: u64,
1268    iid_count: usize,
1269    b1: &mut nd::ArrayViewMut2<'_, f64>,
1270    aatb: &mut nd::ArrayViewMut2<'_, f64>,
1271    atb: &mut nd::ArrayViewMut2<'_, f64>,
1272    log_frequency: usize,
1273) -> Result<(), Box<BedErrorPlus>> {
1274    //speed idea from C++:
1275    //Are copies really needed?
1276    //is F, vc C order the best?
1277    //would bigger snp blocks be better
1278
1279    let (a_sid_count, b_sid_count) = atb.dim();
1280    if log_frequency > 0 {
1281        println!("file_b_less_aatbx: iid_count={iid_count}, {a_sid_count}x{b_sid_count} output");
1282    };
1283
1284    // Open the file and move to the starting sid
1285    let mut buf_reader = BufReader::new(File::open(a_filename)?);
1286    buf_reader.seek(SeekFrom::Start(offset))?;
1287
1288    let mut sid_reuse = vec![f64::NAN; iid_count];
1289    for (a_sid_index, mut atb_row) in atb.axis_iter_mut(nd::Axis(0)).enumerate() {
1290        if log_frequency > 0 && a_sid_index % log_frequency == 0 {
1291            println!(
1292                "   working on train_sid_index={a_sid_index} of {a_sid_count} (iid_count={iid_count}, b_sid_count={b_sid_count})"
1293            );
1294        }
1295
1296        buf_reader.read_f64_into::<LittleEndian>(&mut sid_reuse)?;
1297
1298        nd::par_azip!(
1299            (mut atb_element in atb_row.axis_iter_mut(nd::Axis(0)),
1300            b1_col in b1.axis_iter(nd::Axis(1)),
1301            mut aatb_col in aatb.axis_iter_mut(nd::Axis(1)))
1302        {
1303            let mut atbi = 0.0;
1304            for iid_index in 0..iid_count {
1305                atbi += sid_reuse[iid_index] * b1_col[iid_index];
1306            }
1307            atb_element[()] = atbi;
1308            for iid_index in 0..iid_count {
1309                aatb_col[iid_index] -= sid_reuse[iid_index] * atbi;
1310            }
1311        });
1312    }
1313    Ok(())
1314}
1315
1316#[allow(dead_code)]
1317fn read_into_f64(src: &mut BufReader<File>, dst: &mut [f64]) -> std::io::Result<()> {
1318    src.read_f64_into::<LittleEndian>(dst)
1319}
1320
1321#[allow(dead_code)]
1322fn read_into_f32(src: &mut BufReader<File>, dst: &mut [f32]) -> std::io::Result<()> {
1323    src.read_f32_into::<LittleEndian>(dst)
1324}
1325
1326/* Here are Python algorithms that shows how to do a low-memory multiply A (or A.T) x B (or B.T)
1327   They are used by file_ata_piece and file_aat_piece with some optimizations for A and B being the same.
1328
1329output_list = [np.zeros((4,4)) for i in range(4)]
1330
1331# a.T.dot(b)
1332for a_col2 in range(0,4,2): # 1 pass through A, returning output chunk about the same size writing in one pass
1333    buffer_a2 = a[:,a_col2:a_col2+2]
1334    for b_col in range(4): # A1/a1 passes through B
1335        buffer_b = b[:,b_col]
1336        for i in range(4):
1337            b_val = buffer_b[i]
1338            a_slice = buffer_a2[i,:]
1339            for k in range(2): # A1/a1 * A0 passes through the output
1340                output_list[0][a_col2+k,b_col] += a_slice[k]*b_val
1341
1342# a.dot(b.T)
1343for out_col2 in range(0,4,2): # 1 pass through output, returning chunk on each pass
1344    for col in range(4): # O1/o1 passes through A and B
1345        buffer_a = a[:,col]
1346        buffer_b = b[:,col]
1347        for k in range(2):
1348            for i in range(4):
1349                output_list[1][i,out_col2+k] += buffer_a[i]*buffer_b[out_col2+k]
1350
1351# a.T.dot(b.T)
1352for a_col2 in range(0,4,2): # 1 pass through A, returning an output chunk on each pass
1353    buffer_a2 = a[:,a_col2:a_col2+2]
1354    for b_col in range(4):
1355        buffer_b = b[:,b_col]
1356        for i in range(4):
1357            b_val = buffer_b[i]
1358            for k in range(2):
1359                output_list[2][a_col2+k,i] += buffer_a2[b_col,k]*b_val
1360
1361# a.dot(b)  - but should instead do  (b.T.dot(a.T)).T
1362for b_col2 in range(0,4,2): #Transpose of preceding one
1363    buffer_b2 = b[:,b_col2:b_col2+2]
1364    for a_col in range(4):
1365        buffer_a = a[:,a_col]
1366        for i in range(4):
1367            a_val = buffer_a[i]
1368            for k in range(2):
1369                output_list[3][i,b_col2+k] += buffer_b2[a_col,k]*a_val
1370
1371
1372for output in output_list:
1373    print(output)
1374 */
1375
1376// Given A, a matrix in Fortran order in a file
1377// with row_count rows and col_count columns,
1378// and given a starting column,
1379// returns part of A.T x A, the column vs column product.
1380// The piece piece returned has dimensions
1381// (col_count-col_start) x ncols
1382// where ncols <= (col_count-col_start)
1383// Makes only one pass through the file.
1384#[allow(clippy::too_many_arguments)]
1385#[allow(dead_code)]
1386#[anyinput]
1387fn file_ata_piece<T: Float + Send + Sync + Sync + AddAssign>(
1388    path: AnyPath,
1389    offset: u64,
1390    row_count: usize,
1391    col_count: usize,
1392    col_start: usize,
1393    ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1394    log_frequency: usize,
1395    read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1396) -> Result<(), Box<BedErrorPlus>> {
1397    let (nrows, ncols) = ata_piece.dim();
1398    if (col_start >= col_count)
1399        || (col_start + nrows != col_count)
1400        || (col_start + ncols > col_count)
1401    {
1402        Err(BedError::CannotConvertBetaToFromF64)?;
1403    }
1404
1405    file_ata_piece_internal(
1406        path,
1407        offset,
1408        row_count,
1409        col_start,
1410        ata_piece,
1411        log_frequency,
1412        read_into,
1413    )
1414}
1415
1416#[allow(dead_code)]
1417#[anyinput]
1418fn file_ata_piece_internal<T: Float + Send + Sync + Sync + AddAssign>(
1419    path: AnyPath,
1420    offset: u64,
1421    row_count: usize,
1422    col_start: usize,
1423    ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1424    log_frequency: usize,
1425    read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1426) -> Result<(), Box<BedErrorPlus>> {
1427    let (nrows, ncols) = ata_piece.dim();
1428    if log_frequency > 0 {
1429        println!("file_ata_piece: col_start={col_start}, {nrows}x{ncols} output");
1430    };
1431
1432    // Open the file and move to the starting col
1433    let mut buf_reader = BufReader::new(File::open(path)?);
1434    buf_reader.seek(SeekFrom::Start(
1435        offset + col_start as u64 * row_count as u64 * std::mem::size_of::<T>() as u64,
1436    ))?;
1437
1438    let mut col_save_list: Vec<Vec<T>> = vec![];
1439    let mut col_reuse = vec![T::nan(); row_count];
1440
1441    for (col_rel_index, mut ata_row) in ata_piece.axis_iter_mut(nd::Axis(0)).enumerate() {
1442        if log_frequency > 0 && col_rel_index % log_frequency == 0 {
1443            println!("   working on {col_rel_index} of {nrows}");
1444        }
1445
1446        // Read next col and save if in range
1447        let col = if col_save_list.len() < ncols {
1448            let mut col_save = vec![T::nan(); row_count];
1449            read_into(&mut buf_reader, &mut col_save)?;
1450            col_save_list.push(col_save);
1451            col_save_list.last().unwrap() // unwrap is OK here
1452        } else {
1453            read_into(&mut buf_reader, &mut col_reuse)?;
1454            &col_reuse
1455        };
1456
1457        // Multiple saved sids with new sid
1458        let mut ata_row_trimmed = ata_row.slice_mut(nd::s![..col_save_list.len()]);
1459        nd::par_azip!((
1460            col_in_range in &col_save_list,
1461            mut ata_val in ata_row_trimmed.axis_iter_mut(nd::Axis(0))
1462        )
1463        {
1464            ata_val[()] = col_product(col_in_range, col);
1465        });
1466    }
1467
1468    // Reflect the new product values
1469    for row_index in 0usize..ncols - 1 {
1470        for col_index in row_index..ncols {
1471            ata_piece[(row_index, col_index)] = ata_piece[(col_index, row_index)];
1472        }
1473    }
1474    Ok(())
1475}
1476
1477#[allow(dead_code)]
1478fn col_product<T: Float + AddAssign>(col_i: &[T], col_j: &[T]) -> T {
1479    assert!(col_i.len() == col_j.len()); // real assert
1480    let mut product = T::zero();
1481    for row_index in 0..col_i.len() {
1482        product += col_i[row_index] * col_j[row_index];
1483    }
1484    product
1485}
1486
1487// Given A, a matrix in Fortran order in a file
1488// with row_count rows and col_count columns,
1489// and given a starting column,
1490// returns part of A x A.T, the row vs row product.
1491// The piece piece returned has dimensions
1492// (row_count-row_start) x ncols
1493// where ncols <= (row_count-row_start)
1494// Makes only one pass through the file.
1495#[allow(clippy::too_many_arguments)]
1496#[allow(dead_code)]
1497#[anyinput]
1498fn file_aat_piece<T: Float + Sync + Send + Sync + AddAssign>(
1499    path: AnyPath,
1500    offset: u64,
1501    row_count: usize,
1502    col_count: usize,
1503    row_start: usize,
1504    aat_piece: &mut nd::ArrayViewMut2<'_, T>,
1505    log_frequency: usize,
1506    read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1507) -> Result<(), Box<BedErrorPlus>> {
1508    let (nrows, ncols) = aat_piece.dim();
1509
1510    if log_frequency > 0 {
1511        println!("file_aat_piece: row_start={row_start}, {nrows}x{ncols} output");
1512    };
1513
1514    if (row_start >= row_count)
1515        || (row_start + nrows != row_count)
1516        || (row_start + ncols > row_count)
1517    {
1518        Err(BedError::CannotConvertBetaToFromF64)?;
1519    }
1520
1521    aat_piece.fill(T::zero());
1522
1523    // Open the file and move to the starting col
1524    let mut buf_reader = BufReader::new(File::open(path)?);
1525
1526    let mut col = vec![T::nan(); row_count - row_start];
1527
1528    for col_index in 0..col_count {
1529        if log_frequency > 0 && col_index % log_frequency == 0 {
1530            println!("   working on {col_index} of {col_count}");
1531        }
1532
1533        // Read next col
1534        buf_reader.seek(SeekFrom::Start(
1535            offset + (col_index * row_count + row_start) as u64 * std::mem::size_of::<T>() as u64,
1536        ))?;
1537        read_into(&mut buf_reader, &mut col)?;
1538
1539        nd::par_azip!(
1540            (index row_index1,
1541            mut aat_col in aat_piece.axis_iter_mut(nd::Axis(1))
1542        )
1543        {
1544            let val1 = col[row_index1];
1545            for row_index0 in row_index1..nrows {
1546                aat_col[row_index0] += val1 * col[row_index0];
1547            }
1548        });
1549    }
1550
1551    // Notice that ata reflects and aat doesn't. They don't need
1552    // to be the same, but they could be.
1553    Ok(())
1554}
1555
1556// References: https://www.youtube.com/watch?v=0zOg8_B71gE&t=22s
1557// https://deterministic.space/elegant-apis-in-rust.html
1558// https://rust-lang.github.io/api-guidelines/
1559// https://ricardomartins.cc/2016/08/03/convenient_and_idiomatic_conversions_in_rust
1560
1561/// Represents the metadata from PLINK .fam and .bim files.
1562///
1563/// Construct with [`Metadata::builder`](struct.Metadata.html#method.builder) or [`Metadata::new`](struct.Metadata.html#method.new).
1564///
1565/// # Example
1566///
1567/// Extract metadata from a file.
1568/// Create a random file with the same metadata.
1569/// ```
1570/// use ndarray as nd;
1571/// use bed_reader::{Bed, WriteOptions, sample_bed_file};
1572/// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
1573///
1574/// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
1575/// let metadata = bed.metadata()?;
1576/// let shape = bed.dim()?;
1577///
1578/// let mut rng = StdRng::seed_from_u64(0);
1579/// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
1580///
1581/// let temp_out = temp_testdir::TempDir::default();
1582/// let output_file = temp_out.join("random.bed");
1583/// WriteOptions::builder(output_file)
1584///     .metadata(&metadata)
1585///     .missing_value(-1)
1586///     .write(&val)?;
1587/// # use bed_reader::BedErrorPlus;
1588/// # Ok::<(), Box<BedErrorPlus>>(())
1589/// ```
1590#[derive(Clone, Debug, Builder, PartialEq)]
1591#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1592pub struct Metadata {
1593    #[builder(setter(custom))]
1594    #[builder(default = "None")]
1595    fid: Option<Rc<nd::Array1<String>>>,
1596    #[builder(setter(custom))]
1597    #[builder(default = "None")]
1598    iid: Option<Rc<nd::Array1<String>>>,
1599    #[builder(setter(custom))]
1600    #[builder(default = "None")]
1601    father: Option<Rc<nd::Array1<String>>>,
1602    #[builder(setter(custom))]
1603    #[builder(default = "None")]
1604    mother: Option<Rc<nd::Array1<String>>>,
1605
1606    // i32 based on https://www.cog-genomics.org/plink2/formats#bim
1607    #[builder(setter(custom))]
1608    #[builder(default = "None")]
1609    sex: Option<Rc<nd::Array1<i32>>>,
1610    #[builder(setter(custom))]
1611    #[builder(default = "None")]
1612    pheno: Option<Rc<nd::Array1<String>>>,
1613
1614    #[builder(setter(custom))]
1615    #[builder(default = "None")]
1616    chromosome: Option<Rc<nd::Array1<String>>>,
1617    #[builder(setter(custom))]
1618    #[builder(default = "None")]
1619    sid: Option<Rc<nd::Array1<String>>>,
1620    #[builder(setter(custom))]
1621    #[builder(default = "None")]
1622    cm_position: Option<Rc<nd::Array1<f32>>>,
1623    #[builder(setter(custom))]
1624    #[builder(default = "None")]
1625    bp_position: Option<Rc<nd::Array1<i32>>>,
1626    #[builder(setter(custom))]
1627    #[builder(default = "None")]
1628    allele_1: Option<Rc<nd::Array1<String>>>,
1629    #[builder(setter(custom))]
1630    #[builder(default = "None")]
1631    allele_2: Option<Rc<nd::Array1<String>>>,
1632}
1633
1634fn lazy_or_skip_count<T>(array: Option<&Rc<nd::Array1<T>>>) -> Option<usize> {
1635    array.map(|array| array.len())
1636}
1637
1638/// Represents a PLINK .bed file that is open for reading genotype data and metadata.
1639///
1640/// Construct with [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder).
1641///
1642/// > For reading cloud files, see [`BedCloud`](struct.BedCloud.html).
1643///
1644/// # Example
1645///
1646/// Open a file for reading. Then, read the individual (sample) ids
1647/// and all the genotype data.
1648/// ```
1649/// use ndarray as nd;
1650/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
1651/// use bed_reader::assert_eq_nan;
1652///
1653/// let file_name = sample_bed_file("small.bed")?;
1654/// let mut bed = Bed::new(file_name)?;
1655/// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1656/// let val = ReadOptions::builder().f64().read(&mut bed)?;
1657///
1658/// assert_eq_nan(
1659///     &val,
1660///     &nd::array![
1661///         [1.0, 0.0, f64::NAN, 0.0],
1662///         [2.0, 0.0, f64::NAN, 2.0],
1663///         [0.0, 1.0, 2.0, 0.0]
1664///     ],
1665/// );
1666/// # use bed_reader::BedErrorPlus;
1667/// # Ok::<(), Box<BedErrorPlus>>(())
1668/// ```
1669#[derive(Clone, Debug, Builder)]
1670#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1671pub struct Bed {
1672    // https://stackoverflow.com/questions/32730714/what-is-the-right-way-to-store-an-immutable-path-in-a-struct
1673    // don't emit a setter, but keep the field declaration on the builder
1674    /// The file name or path of the .bed file.
1675    #[builder(setter(custom))]
1676    path: PathBuf,
1677
1678    #[builder(setter(custom))]
1679    #[builder(default = "None")]
1680    fam_path: Option<PathBuf>,
1681
1682    #[builder(setter(custom))]
1683    #[builder(default = "None")]
1684    bim_path: Option<PathBuf>,
1685
1686    #[builder(setter(custom))]
1687    #[builder(default = "true")]
1688    is_checked_early: bool,
1689
1690    #[builder(setter(custom))]
1691    #[builder(default = "None")]
1692    iid_count: Option<usize>,
1693
1694    #[builder(setter(custom))]
1695    #[builder(default = "None")]
1696    sid_count: Option<usize>,
1697
1698    #[builder(setter(custom))]
1699    metadata: Metadata,
1700
1701    #[builder(setter(custom))]
1702    skip_set: HashSet<MetadataFields>,
1703}
1704
1705/// All Metadata fields.
1706///
1707/// Used by [`Metadata::read_fam`](struct.Metadata.html#method.read_fam) and
1708/// [`Metadata::read_bim`](struct.Metadata.html#method.read_bim) to skip reading
1709/// specified metadata fields.
1710#[derive(Debug, PartialEq, Eq, Copy, Clone, Ord, PartialOrd, Hash)]
1711pub enum MetadataFields {
1712    #[allow(missing_docs)]
1713    Fid,
1714    #[allow(missing_docs)]
1715    Iid,
1716    #[allow(missing_docs)]
1717    Father,
1718    #[allow(missing_docs)]
1719    Mother,
1720    #[allow(missing_docs)]
1721    Sex,
1722    #[allow(missing_docs)]
1723    Pheno,
1724    #[allow(missing_docs)]
1725    Chromosome,
1726    #[allow(missing_docs)]
1727    Sid,
1728    #[allow(missing_docs)]
1729    CmPosition,
1730    #[allow(missing_docs)]
1731    BpPosition,
1732    #[allow(missing_docs)]
1733    Allele1,
1734    #[allow(missing_docs)]
1735    Allele2,
1736}
1737
1738impl BedBuilder {
1739    #[anyinput]
1740    fn new(path: AnyPath) -> Self {
1741        Self {
1742            path: Some(path.to_owned()),
1743            fam_path: None,
1744            bim_path: None,
1745
1746            is_checked_early: None,
1747            iid_count: None,
1748            sid_count: None,
1749
1750            metadata: Some(Metadata::new()),
1751            skip_set: Some(HashSet::new()),
1752        }
1753    }
1754
1755    /// Create a [`Bed`](struct.Bed.html) from the builder.
1756    ///
1757    /// > See [`Bed::builder`](struct.Bed.html#method.builder) for more details and examples.
1758    pub fn build(&self) -> Result<Bed, Box<BedErrorPlus>> {
1759        let mut bed = self.build_no_file_check()?;
1760
1761        if bed.is_checked_early {
1762            open_and_check(&bed.path)?;
1763        }
1764
1765        (bed.iid_count, bed.sid_count) = bed.metadata.check_counts(bed.iid_count, bed.sid_count)?;
1766
1767        Ok(bed)
1768    }
1769
1770    // https://stackoverflow.com/questions/38183551/concisely-initializing-a-vector-of-strings
1771    // https://stackoverflow.com/questions/65250496/how-to-convert-intoiteratoritem-asrefstr-to-iteratoritem-str-in-rust
1772
1773    /// Override the family id (fid) values found in the .fam file.
1774    ///
1775    /// By default, if fid values are needed and haven't already been found,
1776    /// they will be read from the .fam file.
1777    /// Providing them here avoids that file read and provides a way to give different values.
1778    #[anyinput]
1779    #[must_use]
1780    pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
1781        // Unwrap will always work because BedBuilder starting with some metadata
1782        self.metadata.as_mut().unwrap().set_fid(fid);
1783        self
1784    }
1785
1786    /// Override the individual id (iid) values found in the .fam file.
1787    ///
1788    /// By default, if iid values are needed and haven't already been found,
1789    /// they will be read from the .fam file.
1790    /// Providing them here avoids that file read and provides a way to give different values.
1791    /// ```
1792    /// use ndarray as nd;
1793    /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
1794    /// let file_name = sample_bed_file("small.bed")?;
1795    /// use bed_reader::ReadOptions;
1796    ///
1797    /// let mut bed = Bed::builder(file_name)
1798    ///    .iid(["sample1", "sample2", "sample3"])
1799    ///    .build()?;
1800    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1801    /// # use bed_reader::BedErrorPlus;
1802    /// # Ok::<(), Box<BedErrorPlus>>(())
1803    /// ```
1804    #[anyinput]
1805    #[must_use]
1806    pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
1807        // Unwrap will always work because BedBuilder starting with some metadata
1808        self.metadata.as_mut().unwrap().set_iid(iid);
1809        self
1810    }
1811
1812    /// Override the father values found in the .fam file.
1813    ///
1814    /// By default, if father values are needed and haven't already been found,
1815    /// they will be read from the .fam file.
1816    /// Providing them here avoids that file read and provides a way to gi&ve different values.
1817    #[anyinput]
1818    #[must_use]
1819    pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
1820        // Unwrap will always work because BedBuilder starting with some metadata
1821        self.metadata.as_mut().unwrap().set_father(father);
1822        self
1823    }
1824
1825    /// Override the mother values found in the .fam file.
1826    ///
1827    /// By default, if mother values are needed and haven't already been found,
1828    /// they will be read from the .fam file.
1829    /// Providing them here avoids that file read and provides a way to give different values.
1830    #[anyinput]
1831    #[must_use]
1832    pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
1833        // Unwrap will always work because BedBuilder starting with some metadata
1834        self.metadata.as_mut().unwrap().set_mother(mother);
1835        self
1836    }
1837
1838    /// Override the sex values found in the .fam file.
1839    ///
1840    /// By default, if sex values are needed and haven't already been found,
1841    /// they will be read from the .fam file.
1842    /// Providing them here avoids that file read and provides a way to give different values.
1843    #[anyinput]
1844    #[must_use]
1845    pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
1846        // Unwrap will always work because BedBuilder starting with some metadata
1847        self.metadata.as_mut().unwrap().set_sex(sex);
1848        self
1849    }
1850
1851    /// Override the phenotype values found in the .fam file.
1852    ///
1853    /// Note that the phenotype values in the .fam file are seldom used.
1854    /// By default, if phenotype values are needed and haven't already been found,
1855    /// they will be read from the .fam file.
1856    /// Providing them here avoids that file read and provides a way to give different values.
1857    #[anyinput]
1858    #[must_use]
1859    pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
1860        // Unwrap will always work because BedBuilder starting with some metadata
1861        self.metadata.as_mut().unwrap().set_pheno(pheno);
1862        self
1863    }
1864
1865    /// Override the chromosome values found in the .bim file.
1866    ///
1867    /// By default, if chromosome values are needed and haven't already been found,
1868    /// they will be read from the .bim file.
1869    /// Providing them here avoids that file read and provides a way to give different values.
1870    #[anyinput]
1871    #[must_use]
1872    pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
1873        // Unwrap will always work because BedBuilder starting with some metadata
1874        self.metadata.as_mut().unwrap().set_chromosome(chromosome);
1875        self
1876    }
1877
1878    /// Override the SNP id (sid) values found in the .fam file.
1879    ///
1880    /// By default, if sid values are needed and haven't already been found,
1881    /// they will be read from the .bim file.
1882    /// Providing them here avoids that file read and provides a way to give different values.
1883    /// ```
1884    /// use ndarray as nd;
1885    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
1886    /// let file_name = sample_bed_file("small.bed")?;
1887    ///
1888    /// let mut bed = Bed::builder(file_name)
1889    ///    .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
1890    ///    .build()?;
1891    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["SNP1", "SNP2", "SNP3", "SNP4"]
1892    /// # use bed_reader::BedErrorPlus;
1893    /// # Ok::<(), Box<BedErrorPlus>>(())
1894    /// ```
1895    #[anyinput]
1896    #[must_use]
1897    pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
1898        self.metadata.as_mut().unwrap().set_sid(sid);
1899        self
1900    }
1901
1902    /// Override the centimorgan position values found in the .bim file.
1903    ///
1904    /// By default, if centimorgan position values are needed and haven't already been found,
1905    /// they will be read from the .bim file.
1906    /// Providing them here avoids that file read and provides a way to give different values.
1907    #[anyinput]
1908    #[must_use]
1909    pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
1910        // Unwrap will always work because BedBuilder starting with some metadata
1911        self.metadata.as_mut().unwrap().set_cm_position(cm_position);
1912        self
1913    }
1914
1915    /// Override the base-pair position values found in the .bim file.
1916    ///
1917    /// By default, if base-pair position values are needed and haven't already been found,
1918    /// they will be read from the .bim file.
1919    /// Providing them here avoids that file read and provides a way to give different values.
1920    #[anyinput]
1921    #[must_use]
1922    pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
1923        // Unwrap will always work because BedBuilder starting with some metadata
1924        self.metadata.as_mut().unwrap().set_bp_position(bp_position);
1925        self
1926    }
1927
1928    /// Override the allele 1 values found in the .bim file.
1929    ///
1930    /// By default, if allele 1 values are needed and haven't already been found,
1931    /// they will be read from the .bim file.
1932    /// Providing them here avoids that file read and provides a way to give different values.
1933    #[anyinput]
1934    #[must_use]
1935    pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
1936        // Unwrap will always work because BedBuilder starting with some metadata
1937        self.metadata.as_mut().unwrap().set_allele_1(allele_1);
1938        self
1939    }
1940
1941    /// Override the allele 2 values found in the .bim file.
1942    ///
1943    /// By default, if allele 2 values are needed and haven't already been found,
1944    /// they will be read from the .bim file.
1945    /// Providing them here avoids that file read and provides a way to give different values.
1946    #[anyinput]
1947    #[must_use]
1948    pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
1949        // Unwrap will always work because BedBuilder starting with some metadata
1950        self.metadata.as_mut().unwrap().set_allele_2(allele_2);
1951        self
1952    }
1953
1954    /// Set the number of individuals (samples) in the data.
1955    ///
1956    /// By default, if this number is needed, it will be found
1957    /// and remembered
1958    /// by opening the .fam file and quickly counting the number
1959    /// of lines. Providing the number thus avoids a file read.
1960    #[must_use]
1961    pub fn iid_count(mut self, count: usize) -> Self {
1962        self.iid_count = Some(Some(count));
1963        self
1964    }
1965
1966    /// Set the number of SNPs in the data.
1967    ///
1968    /// By default, if this number is needed, it will be found
1969    /// and remembered
1970    /// by opening the .bim file and quickly counting the number
1971    /// of lines. Providing the number thus avoids a file read.
1972    #[must_use]
1973    pub fn sid_count(mut self, count: usize) -> Self {
1974        self.sid_count = Some(Some(count));
1975        self
1976    }
1977
1978    /// Don't check the header of the .bed file until and unless the file is actually read.
1979    ///
1980    /// By default, when a [`Bed`](struct.Bed.html) struct is created, the .bed
1981    /// file header is checked. This stops that early check.
1982    #[must_use]
1983    pub fn skip_early_check(mut self) -> Self {
1984        self.is_checked_early = Some(false);
1985        self
1986    }
1987
1988    /// Set the path to the .fam file.
1989    ///
1990    /// If not set, the .fam file will be assumed
1991    /// to have the same name as the .bed file, but with the extension .fam.
1992    ///
1993    /// # Example:
1994    /// Read .bed, .fam, and .bim files with non-standard names.
1995    /// ```
1996    /// use bed_reader::{Bed, ReadOptions, sample_files};
1997    /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
1998    /// let mut bed = Bed::builder(&deb_maf_mib[0])
1999    ///    .fam_path(&deb_maf_mib[1])
2000    ///    .bim_path(&deb_maf_mib[2])
2001    ///    .build()?;
2002    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2003    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2004    /// # use bed_reader::BedErrorPlus;
2005    /// # Ok::<(), Box<BedErrorPlus>>(())
2006    /// ```
2007    #[anyinput]
2008    #[must_use]
2009    pub fn fam_path(mut self, path: AnyPath) -> Self {
2010        self.fam_path = Some(Some(path.to_owned()));
2011        self
2012    }
2013
2014    /// Set the path to the .bim file.
2015    ///
2016    /// If not set, the .bim file will be assumed
2017    /// to have the same name as the .bed file, but with the extension .bim.
2018    ///
2019    /// # Example:
2020    /// Read .bed, .fam, and .bim files with non-standard names.
2021    /// ```
2022    /// use bed_reader::{Bed, ReadOptions, sample_files};
2023    /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
2024    /// let mut bed = Bed::builder(&deb_maf_mib[0])
2025    ///    .fam_path(&deb_maf_mib[1])
2026    ///    .bim_path(&deb_maf_mib[2])
2027    ///    .build()?;
2028    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2029    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2030    /// # use bed_reader::BedErrorPlus;
2031    /// # Ok::<(), Box<BedErrorPlus>>(())
2032    /// ```
2033    #[must_use]
2034    #[anyinput]
2035    pub fn bim_path(mut self, path: AnyPath) -> Self {
2036        self.bim_path = Some(Some(path.to_owned()));
2037        self
2038    }
2039
2040    /// Don't read the fid information from the .fam file.
2041    ///
2042    /// By default, when the .fam is read, the fid (the family id) is recorded.
2043    /// This stops that recording. This is useful if the fid is not needed.
2044    /// Asking for the fid after skipping it results in an error.    
2045    #[must_use]
2046    pub fn skip_fid(mut self) -> Self {
2047        // Unwrap will always work because BedBuilder starting with some skip_set
2048        self.skip_set.as_mut().unwrap().insert(MetadataFields::Fid);
2049        self
2050    }
2051
2052    /// Don't read the iid information from the .fam file.
2053    ///
2054    /// By default, when the .fam is read, the iid (the individual id) is recorded.
2055    /// This stops that recording. This is useful if the iid is not needed.
2056    /// Asking for the iid after skipping it results in an error.
2057    #[must_use]
2058    pub fn skip_iid(mut self) -> Self {
2059        // Unwrap will always work because BedBuilder starting with some skip_set
2060        self.skip_set.as_mut().unwrap().insert(MetadataFields::Iid);
2061        self
2062    }
2063
2064    /// Don't read the father information from the .fam file.
2065    ///
2066    /// By default, when the .fam is read, the father id is recorded.
2067    /// This stops that recording. This is useful if the father id is not needed.
2068    /// Asking for the father id after skipping it results in an error.    
2069    #[must_use]
2070    pub fn skip_father(mut self) -> Self {
2071        // Unwrap will always work because BedBuilder starting with some skip_set
2072        self.skip_set
2073            .as_mut()
2074            .unwrap()
2075            .insert(MetadataFields::Father);
2076        self
2077    }
2078
2079    /// Don't read the mother information from the .fam file.
2080    ///
2081    /// By default, when the .fam is read, the mother id is recorded.
2082    /// This stops that recording. This is useful if the mother id is not needed.
2083    /// Asking for the mother id after skipping it results in an error.    
2084    #[must_use]
2085    pub fn skip_mother(mut self) -> Self {
2086        // Unwrap will always work because BedBuilder starting with some skip_set
2087        self.skip_set
2088            .as_mut()
2089            .unwrap()
2090            .insert(MetadataFields::Mother);
2091        self
2092    }
2093
2094    /// Don't read the sex information from the .fam file.
2095    ///
2096    /// By default, when the .fam is read, the sex is recorded.
2097    /// This stops that recording. This is useful if sex is not needed.
2098    /// Asking for sex after skipping it results in an error.    
2099    #[must_use]
2100    pub fn skip_sex(mut self) -> Self {
2101        // Unwrap will always work because BedBuilder starting with some skip_set
2102        self.skip_set.as_mut().unwrap().insert(MetadataFields::Sex);
2103        self
2104    }
2105
2106    /// Don't read the phenotype information from the .fam file.
2107    ///
2108    /// Note that the phenotype information in the .fam file is
2109    /// seldom used.
2110    ///
2111    /// By default, when the .fam is read, the phenotype is recorded.
2112    /// This stops that recording. This is useful if this phenotype
2113    /// information is not needed.
2114    /// Asking for the phenotype after skipping it results in an error.    
2115    #[must_use]
2116    pub fn skip_pheno(mut self) -> Self {
2117        // Unwrap will always work because BedBuilder starting with some skip_set
2118        self.skip_set
2119            .as_mut()
2120            .unwrap()
2121            .insert(MetadataFields::Pheno);
2122        self
2123    }
2124
2125    /// Don't read the chromosome information from the .bim file.
2126    ///
2127    /// By default, when the .bim is read, the chromosome is recorded.
2128    /// This stops that recording. This is useful if the chromosome is not needed.
2129    /// Asking for the chromosome after skipping it results in an error.    
2130    #[must_use]
2131    pub fn skip_chromosome(mut self) -> Self {
2132        // Unwrap will always work because BedBuilder starting with some skip_set
2133        self.skip_set
2134            .as_mut()
2135            .unwrap()
2136            .insert(MetadataFields::Chromosome);
2137        self
2138    }
2139
2140    /// Don't read the SNP id information from the .bim file.
2141    ///
2142    /// By default, when the .bim is read, the sid (SNP id) is recorded.
2143    /// This stops that recording. This is useful if the sid is not needed.
2144    /// Asking for the sid after skipping it results in an error.    
2145    #[must_use]
2146    pub fn skip_sid(mut self) -> Self {
2147        // Unwrap will always work because BedBuilder starting with some skip_set
2148        self.skip_set.as_mut().unwrap().insert(MetadataFields::Sid);
2149        self
2150    }
2151
2152    /// Don't read the centimorgan position information from the .bim file.
2153    ///
2154    /// By default, when the .bim is read, the cm position is recorded.
2155    /// This stops that recording. This is useful if the cm position is not needed.
2156    /// Asking for the cm position after skipping it results in an error.    
2157    #[must_use]
2158    pub fn skip_cm_position(mut self) -> Self {
2159        // Unwrap will always work because BedBuilder starting with some skip_set
2160        self.skip_set
2161            .as_mut()
2162            .unwrap()
2163            .insert(MetadataFields::CmPosition);
2164        self
2165    }
2166
2167    /// Don't read the base-pair position information from the .bim file.
2168    ///
2169    /// By default, when the .bim is read, the bp position is recorded.
2170    /// This stops that recording. This is useful if the bp position is not needed.
2171    /// Asking for the cp position after skipping it results in an error.    
2172    #[must_use]
2173    pub fn skip_bp_position(mut self) -> Self {
2174        // Unwrap will always work because BedBuilder starting with some skip_set
2175        self.skip_set
2176            .as_mut()
2177            .unwrap()
2178            .insert(MetadataFields::BpPosition);
2179        self
2180    }
2181
2182    /// Don't read the allele 1 information from the .bim file.
2183    ///
2184    /// By default, when the .bim is read, allele 1 is recorded.
2185    /// This stops that recording. This is useful if allele 1 is not needed.
2186    /// Asking for allele 1 after skipping it results in an error.    
2187    #[must_use]
2188    pub fn skip_allele_1(mut self) -> Self {
2189        // Unwrap will always work because BedBuilder starting with some skip_set
2190        self.skip_set
2191            .as_mut()
2192            .unwrap()
2193            .insert(MetadataFields::Allele1);
2194        self
2195    }
2196
2197    /// Don't read the allele 2 information from the .bim file.
2198    ///
2199    /// By default, when the .bim is read, allele 2 is recorded.
2200    /// This stops that recording. This is useful if allele 2 is not needed.
2201    /// Asking for allele 2 after skipping it results in an error.    
2202    #[must_use]
2203    pub fn skip_allele_2(mut self) -> Self {
2204        // Unwrap will always work because BedBuilder starting with some skip_set
2205        self.skip_set
2206            .as_mut()
2207            .unwrap()
2208            .insert(MetadataFields::Allele2);
2209        self
2210    }
2211
2212    /// Override the metadata in the .fam and .bim files with info merged in from a [`Metadata`](struct.Metadata.html).
2213    ///
2214    /// # Example
2215    ///
2216    /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
2217    /// and sid arrays. Next, we use [`BedBuilder`](struct.BedBuilder.html) to override the fid array
2218    /// and an iid array. Then, we add the metadata to the [`BedBuilder`](struct.BedBuilder.html),
2219    /// overwriting iid (again) and overriding sid. Finally, we print these
2220    /// three arrays and chromosome. Chromosome was never overridden so
2221    /// it is read from the *.bim file.
2222    ///```
2223    /// use ndarray as nd;
2224    /// use bed_reader::{Bed, Metadata, sample_bed_file};
2225    ///
2226    /// let file_name = sample_bed_file("small.bed")?;
2227    /// let metadata = Metadata::builder()
2228    ///     .iid(["i1", "i2", "i3"])
2229    ///     .sid(["s1", "s2", "s3", "s4"])
2230    ///     .build()?;
2231    /// let mut bed = Bed::builder(file_name)
2232    ///     .fid(["f1", "f2", "f3"])
2233    ///     .iid(["x1", "x2", "x3"])
2234    ///     .metadata(&metadata)
2235    ///     .build()?;
2236    /// println!("{0:?}", bed.fid()?);  // Outputs ndarray ["f1", "f2", "f3"]
2237    /// println!("{0:?}", bed.iid()?);  // Outputs ndarray ["i1", "i2", "i3"]
2238    /// println!("{0:?}", bed.sid()?);  // Outputs ndarray ["s1", "s2", "s3", "s4"]
2239    /// println!("{0:?}", bed.chromosome()?);  // Outputs ndarray ["1", "1", "5", "Y"]
2240    /// # use bed_reader::BedErrorPlus;
2241    /// # Ok::<(), Box<BedErrorPlus>>(())
2242    /// ```
2243    #[must_use]
2244    pub fn metadata(mut self, metadata: &Metadata) -> Self {
2245        self.metadata = Some(
2246            Metadata::builder()
2247                .metadata(&self.metadata.unwrap()) // unwrap is ok because we know we have metadata
2248                .metadata(metadata) // consistent counts will be check later by the BedBuilder
2249                .build_no_file_check()
2250                .unwrap(), // unwrap is ok because nothing can go wrong
2251        );
2252
2253        self
2254    }
2255}
2256
2257#[anyinput]
2258fn to_metadata_path(
2259    bed_path: AnyPath,
2260    metadata_path: Option<&PathBuf>,
2261    extension: AnyString,
2262) -> PathBuf {
2263    if let Some(metadata_path) = metadata_path {
2264        metadata_path.to_owned()
2265    } else {
2266        bed_path.with_extension(extension)
2267    }
2268}
2269
2270impl Bed {
2271    /// Attempts to open a local PLINK .bed file for reading. Supports options.
2272    ///
2273    /// > Also see [`Bed::new`](struct.Bed.html#method.new), which does not support options.
2274    /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2275    ///
2276    /// The options, [listed here](struct.BedBuilder.html#implementations), can:
2277    ///  * set the path of the .fam and/or .bim file
2278    ///  * override some metadata, for example, replace the individual ids.
2279    ///  * set the number of individuals (samples) or SNPs (variants)
2280    ///  * control checking the validity of the .bed file's header
2281    ///  * skip reading selected metadata
2282    ///
2283    /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2284    /// will not necessarily lock the file(s).
2285    ///
2286    /// # Errors
2287    /// By default, this method will return an error if the file is missing or its header
2288    /// is ill-formed. It will also return an error if the options contradict each other.
2289    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2290    /// for all possible errors.
2291    ///
2292    /// # Examples
2293    /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2294    /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2295    /// then [`read`](struct.Bed.html#method.read) the whole file.
2296    ///
2297    /// ```
2298    /// use ndarray as nd;
2299    /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2300    ///
2301    /// let file_name = sample_bed_file("small.bed")?;
2302    /// let mut bed = Bed::builder(file_name).build()?;
2303    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2304    /// println!("{:?}", bed.sid()?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
2305    /// let val = bed.read::<f64>()?;
2306    ///
2307    /// assert_eq_nan(
2308    ///     &val,
2309    ///     &nd::array![
2310    ///         [1.0, 0.0, f64::NAN, 0.0],
2311    ///         [2.0, 0.0, f64::NAN, 2.0],
2312    ///         [0.0, 1.0, 2.0, 0.0]
2313    ///     ],
2314    /// );
2315    /// # use bed_reader::BedErrorPlus;
2316    /// # Ok::<(), Box<BedErrorPlus>>(())
2317    /// ```
2318    ///
2319    /// Replace [`iid`](struct.Bed.html#method.iid).
2320    /// ```
2321    /// # use ndarray as nd;
2322    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2323    /// # let file_name = sample_bed_file("small.bed")?;
2324    /// let mut bed = Bed::builder(file_name)
2325    ///    .iid(["sample1", "sample2", "sample3"])
2326    ///    .build()?;
2327    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
2328    /// # use bed_reader::BedErrorPlus;
2329    /// # Ok::<(), Box<BedErrorPlus>>(())
2330    /// ```
2331    /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
2332    /// .bim files need never be opened.
2333    /// ```
2334    /// # use ndarray as nd;
2335    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2336    /// # let file_name = sample_bed_file("small.bed")?;
2337    /// let mut bed = Bed::builder(file_name).iid_count(3).sid_count(4).build()?;
2338    /// let val = bed.read::<f64>()?;
2339    ///
2340    /// assert_eq_nan(
2341    ///     &val,
2342    ///     &nd::array![
2343    ///         [1.0, 0.0, f64::NAN, 0.0],
2344    ///         [2.0, 0.0, f64::NAN, 2.0],
2345    ///         [0.0, 1.0, 2.0, 0.0]
2346    ///     ],
2347    /// );
2348    /// # use bed_reader::BedErrorPlus;
2349    /// # Ok::<(), Box<BedErrorPlus>>(())
2350    /// ```
2351    /// Mark some properties as "don’t read or offer".
2352    /// ```
2353    /// # use ndarray as nd;
2354    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2355    /// # let file_name = sample_bed_file("small.bed")?;
2356    /// let mut bed = Bed::builder(file_name)
2357    ///     .skip_father()
2358    ///     .skip_mother()
2359    ///     .skip_sex()
2360    ///     .skip_pheno()
2361    ///     .skip_allele_1()
2362    ///     .skip_allele_2()
2363    ///     .build()?;
2364    /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2365    /// bed.allele_2().expect_err("Can't be read");
2366    /// # use bed_reader::BedErrorPlus;
2367    /// # Ok::<(), Box<BedErrorPlus>>(())
2368    /// ```
2369    ///
2370    #[anyinput]
2371    pub fn builder(path: AnyPath) -> BedBuilder {
2372        BedBuilder::new(path)
2373    }
2374
2375    /// Attempts to open a local PLINK .bed file for reading. Does not support options.
2376    ///
2377    /// > Also see [`Bed::builder`](struct.Bed.html#method.builder), which does support options.
2378    /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2379    ///
2380    /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2381    /// will not necessarily lock the file(s).
2382    ///
2383    /// # Errors
2384    /// By default, this method will return an error if the file is missing or its header
2385    /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2386    /// for all possible errors.
2387    ///
2388    /// # Examples
2389    /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2390    /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2391    /// then [`read`](struct.Bed.html#method.read) the whole file.
2392    ///
2393    /// ```
2394    /// use ndarray as nd;
2395    /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2396    ///
2397    /// let file_name = sample_bed_file("small.bed")?;
2398    /// let mut bed = Bed::new(file_name)?;
2399    /// println!("{:?}", bed.iid()?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
2400    /// println!("{:?}", bed.sid()?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
2401    /// let val = bed.read::<f64>()?;
2402    ///
2403    /// assert_eq_nan(
2404    ///     &val,
2405    ///     &nd::array![
2406    ///         [1.0, 0.0, f64::NAN, 0.0],
2407    ///         [2.0, 0.0, f64::NAN, 2.0],
2408    ///         [0.0, 1.0, 2.0, 0.0]
2409    ///     ],
2410    /// );
2411    /// # use bed_reader::BedErrorPlus;
2412    /// # Ok::<(), Box<BedErrorPlus>>(())
2413    /// ```
2414    ///
2415    /// Open the file and read data for one SNP (variant)
2416    /// at index position 2.
2417    /// ```
2418    /// # use ndarray as nd;
2419    /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2420    /// # let file_name = sample_bed_file("small.bed")?;
2421    ///
2422    /// let mut bed = Bed::new(file_name)?;
2423    /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
2424    ///
2425    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2426    /// # use bed_reader::BedErrorPlus;
2427    /// # Ok::<(), Box<BedErrorPlus>>(())
2428    /// ```
2429    #[anyinput]
2430    pub fn new(path: AnyPath) -> Result<Self, Box<BedErrorPlus>> {
2431        Bed::builder(path).build()
2432    }
2433
2434    /// Number of individuals (samples)
2435    ///
2436    /// If this number is needed, it will be found
2437    /// by opening the .fam file and quickly counting the number
2438    /// of lines. Once found, the number will be remembered.
2439    /// The file read can be avoided by setting the
2440    /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2441    /// or, for example, [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2442    ///
2443    /// # Example:
2444    /// ```
2445    /// use ndarray as nd;
2446    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2447    ///
2448    /// let file_name = sample_bed_file("small.bed")?;
2449    /// let mut bed = Bed::new(file_name)?;
2450    /// let iid_count = bed.iid_count()?;
2451    ///
2452    /// assert!(iid_count == 3);
2453    /// # use bed_reader::BedErrorPlus;
2454    /// # Ok::<(), Box<BedErrorPlus>>(())
2455    pub fn iid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2456        if let Some(iid_count) = self.iid_count {
2457            Ok(iid_count)
2458        } else {
2459            let fam_path = self.fam_path();
2460            let iid_count = count_lines(fam_path)?;
2461            self.iid_count = Some(iid_count);
2462            Ok(iid_count)
2463        }
2464    }
2465
2466    /// Number of SNPs (variants)
2467    ///
2468    /// If this number is needed, it will be found
2469    /// by opening the .bim file and quickly counting the number
2470    /// of lines. Once found, the number will be remembered.
2471    /// The file read can be avoided by setting the
2472    /// number with [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count)
2473    /// or, for example, [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2474    ///
2475    /// # Example:
2476    /// ```
2477    /// use ndarray as nd;
2478    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2479    ///
2480    /// let file_name = sample_bed_file("small.bed")?;
2481    /// let mut bed = Bed::new(file_name)?;
2482    /// let sid_count = bed.sid_count()?;
2483    ///
2484    /// assert!(sid_count == 4);
2485    /// # use bed_reader::BedErrorPlus;
2486    /// # Ok::<(), Box<BedErrorPlus>>(())
2487    pub fn sid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2488        if let Some(sid_count) = self.sid_count {
2489            Ok(sid_count)
2490        } else {
2491            let bim_path = self.bim_path();
2492            let sid_count = count_lines(bim_path)?;
2493            self.sid_count = Some(sid_count);
2494            Ok(sid_count)
2495        }
2496    }
2497
2498    /// Number of individuals (samples) and SNPs (variants)
2499    ///
2500    /// If these numbers aren't known, they will be found
2501    /// by opening the .fam and .bim files and quickly counting the number
2502    /// of lines. Once found, the numbers will be remembered.
2503    /// The file read can be avoided by setting the
2504    /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2505    /// and [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count).
2506    ///
2507    /// # Example:
2508    /// ```
2509    /// use ndarray as nd;
2510    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2511    /// use bed_reader::assert_eq_nan;
2512    ///
2513    /// let file_name = sample_bed_file("small.bed")?;
2514    /// let mut bed = Bed::new(file_name)?;
2515    /// let dim = bed.dim()?;
2516    ///
2517    /// assert!(dim == (3,4));
2518    /// # use bed_reader::BedErrorPlus;
2519    /// # Ok::<(), Box<BedErrorPlus>>(())
2520    pub fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
2521        Ok((self.iid_count()?, self.sid_count()?))
2522    }
2523
2524    /// Family id of each of individual (sample)
2525    ///
2526    /// If this ndarray is needed, it will be found
2527    /// by reading the .fam file. Once found, this ndarray
2528    /// and other information in the .fam file will be remembered.
2529    /// The file read can be avoided by setting the
2530    /// array with [`BedBuilder::fid`](struct.BedBuilder.html#method.fid).
2531    ///
2532    /// # Example:
2533    /// ```
2534    /// use ndarray as nd;
2535    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2536    /// use bed_reader::assert_eq_nan;
2537    ///
2538    /// let file_name = sample_bed_file("small.bed")?;
2539    /// let mut bed = Bed::new(file_name)?;
2540    /// let fid = bed.fid()?;
2541    /// println!("{fid:?}"); // Outputs ndarray ["fid1", "fid1", "fid2"]
2542    /// # use bed_reader::BedErrorPlus;
2543    /// # Ok::<(), Box<BedErrorPlus>>(())
2544    pub fn fid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2545        self.unlazy_fam::<String>(self.metadata.fid.is_none(), MetadataFields::Fid, "fid")?;
2546        Ok(self.metadata.fid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2547    }
2548
2549    /// Individual id of each of individual (sample)
2550    ///
2551    /// If this ndarray is needed, it will be found
2552    /// by reading the .fam file. Once found, this ndarray
2553    /// and other information in the .fam file will be remembered.
2554    /// The file read can be avoided by setting the
2555    /// array with [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2556    ///
2557    /// # Example:
2558    /// ```
2559    /// use ndarray as nd;
2560    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2561    /// use bed_reader::assert_eq_nan;
2562    ///
2563    /// let file_name = sample_bed_file("small.bed")?;
2564    /// let mut bed = Bed::new(file_name)?;
2565    /// let iid = bed.iid()?;    ///
2566    /// println!("{iid:?}"); // Outputs ndarray ["iid1", "iid2", "iid3"]
2567    /// # use bed_reader::BedErrorPlus;
2568    /// # Ok::<(), Box<BedErrorPlus>>(())
2569    pub fn iid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2570        self.unlazy_fam::<String>(self.metadata.iid.is_none(), MetadataFields::Iid, "iid")?;
2571        Ok(self.metadata.iid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2572    }
2573
2574    /// Father id of each of individual (sample)
2575    ///
2576    /// If this ndarray is needed, it will be found
2577    /// by reading the .fam file. Once found, this ndarray
2578    /// and other information in the .fam file will be remembered.
2579    /// The file read can be avoided by setting the
2580    /// array with [`BedBuilder::father`](struct.BedBuilder.html#method.father).
2581    ///
2582    /// # Example:
2583    /// ```
2584    /// use ndarray as nd;
2585    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2586    /// use bed_reader::assert_eq_nan;
2587    ///
2588    /// let file_name = sample_bed_file("small.bed")?;
2589    /// let mut bed = Bed::new(file_name)?;
2590    /// let father = bed.father()?;
2591    /// println!("{father:?}"); // Outputs ndarray ["iid23", "iid23", "iid22"]
2592    /// # use bed_reader::BedErrorPlus;
2593    /// # Ok::<(), Box<BedErrorPlus>>(())    
2594    pub fn father(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2595        self.unlazy_fam::<String>(
2596            self.metadata.father.is_none(),
2597            MetadataFields::Father,
2598            "father",
2599        )?;
2600        Ok(self.metadata.father.as_ref().unwrap()) //unwrap always works because of lazy_fam
2601    }
2602
2603    /// Mother id of each of individual (sample)
2604    ///
2605    /// If this ndarray is needed, it will be found
2606    /// by reading the .fam file. Once found, this ndarray
2607    /// and other information in the .fam file will be remembered.
2608    /// The file read can be avoided by setting the
2609    /// array with [`BedBuilder::mother`](struct.BedBuilder.html#method.mother).
2610    ///
2611    /// # Example:
2612    /// ```
2613    /// use ndarray as nd;
2614    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2615    /// use bed_reader::assert_eq_nan;
2616    ///
2617    /// let file_name = sample_bed_file("small.bed")?;
2618    /// let mut bed = Bed::new(file_name)?;
2619    /// let mother = bed.mother()?;
2620    /// println!("{mother:?}"); // Outputs ndarray ["iid34", "iid34", "iid33"]
2621    /// # use bed_reader::BedErrorPlus;
2622    /// # Ok::<(), Box<BedErrorPlus>>(())
2623    pub fn mother(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2624        self.unlazy_fam::<String>(
2625            self.metadata.mother.is_none(),
2626            MetadataFields::Mother,
2627            "mother",
2628        )?;
2629        Ok(self.metadata.mother.as_ref().unwrap()) //unwrap always works because of lazy_fam
2630    }
2631
2632    /// Sex each of individual (sample)
2633    ///
2634    /// 0 is unknown, 1 is male, 2 is female
2635    ///
2636    /// If this ndarray is needed, it will be found
2637    /// by reading the .fam file. Once found, this ndarray
2638    /// and other information in the .fam file will be remembered.
2639    /// The file read can be avoided by setting the
2640    /// array with [`BedBuilder::sex`](struct.BedBuilder.html#method.sex).
2641    ///
2642    /// # Example:
2643    /// ```
2644    /// use ndarray as nd;
2645    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2646    /// use bed_reader::assert_eq_nan;
2647    ///
2648    /// let file_name = sample_bed_file("small.bed")?;
2649    /// let mut bed = Bed::new(file_name)?;
2650    /// let sex = bed.sex()?;
2651    /// println!("{sex:?}"); // Outputs ndarray [1, 2, 0]
2652    /// # use bed_reader::BedErrorPlus;
2653    /// # Ok::<(), Box<BedErrorPlus>>(())
2654    pub fn sex(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2655        self.unlazy_fam::<String>(self.metadata.sex.is_none(), MetadataFields::Sex, "sex")?;
2656        Ok(self.metadata.sex.as_ref().unwrap()) //unwrap always works because of lazy_fam
2657    }
2658
2659    /// A phenotype for each individual (seldom used)
2660    ///
2661    /// If this ndarray is needed, it will be found
2662    /// by reading the .fam file. Once found, this ndarray
2663    /// and other information in the .fam file will be remembered.
2664    /// The file read can be avoided by setting the
2665    /// array with [`BedBuilder::pheno`](struct.BedBuilder.html#method.pheno).
2666    ///
2667    /// # Example:
2668    /// ```
2669    /// use ndarray as nd;
2670    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2671    /// use bed_reader::assert_eq_nan;
2672    ///
2673    /// let file_name = sample_bed_file("small.bed")?;
2674    /// let mut bed = Bed::new(file_name)?;
2675    /// let pheno = bed.pheno()?;
2676    /// println!("{pheno:?}"); // Outputs ndarray ["red", "red", "blue"]
2677    /// # use bed_reader::BedErrorPlus;
2678    /// # Ok::<(), Box<BedErrorPlus>>(())
2679    pub fn pheno(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2680        self.unlazy_fam::<String>(
2681            self.metadata.pheno.is_none(),
2682            MetadataFields::Pheno,
2683            "pheno",
2684        )?;
2685        Ok(self.metadata.pheno.as_ref().unwrap()) //unwrap always works because of lazy_fam
2686    }
2687
2688    /// Chromosome of each SNP (variant)
2689    ///
2690    /// If this ndarray is needed, it will be found
2691    /// by reading the .bim file. Once found, this ndarray
2692    /// and other information in the .bim file will be remembered.
2693    /// The file read can be avoided by setting the
2694    /// array with [`BedBuilder::chromosome`](struct.BedBuilder.html#method.chromosome).
2695    ///
2696    /// # Example:
2697    /// ```
2698    /// use ndarray as nd;
2699    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2700    /// use bed_reader::assert_eq_nan;
2701    ///
2702    /// let file_name = sample_bed_file("small.bed")?;
2703    /// let mut bed = Bed::new(file_name)?;
2704    /// let chromosome = bed.chromosome()?;
2705    /// println!("{chromosome:?}"); // Outputs ndarray ["1", "1", "5", "Y"]
2706    /// # use bed_reader::BedErrorPlus;
2707    /// # Ok::<(), Box<BedErrorPlus>>(())
2708    pub fn chromosome(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2709        self.unlazy_bim::<String>(
2710            self.metadata.chromosome.is_none(),
2711            MetadataFields::Chromosome,
2712            "chromosome",
2713        )?;
2714        Ok(self.metadata.chromosome.as_ref().unwrap()) //unwrap always works because of lazy_bim
2715    }
2716
2717    /// SNP id of each SNP (variant)
2718    ///
2719    /// If this ndarray is needed, it will be found
2720    /// by reading the .bim file. Once found, this ndarray
2721    /// and other information in the .bim file will be remembered.
2722    /// The file read can be avoided by setting the
2723    /// array with [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2724    ///
2725    /// # Example:
2726    /// ```
2727    /// use ndarray as nd;
2728    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2729    /// use bed_reader::assert_eq_nan;
2730    ///
2731    /// let file_name = sample_bed_file("small.bed")?;
2732    /// let mut bed = Bed::new(file_name)?;
2733    /// let sid = bed.sid()?;
2734    /// println!("{sid:?}"); // Outputs ndarray "sid1", "sid2", "sid3", "sid4"]
2735    /// # use bed_reader::BedErrorPlus;
2736    /// # Ok::<(), Box<BedErrorPlus>>(())
2737    pub fn sid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2738        self.unlazy_bim::<String>(self.metadata.sid.is_none(), MetadataFields::Sid, "sid")?;
2739        Ok(self.metadata.sid.as_ref().unwrap()) //unwrap always works because of lazy_bim
2740    }
2741
2742    /// Centimorgan position of each SNP (variant)
2743    ///
2744    /// If this ndarray is needed, it will be found
2745    /// by reading the .bim file. Once found, this ndarray
2746    /// and other information in the .bim file will be remembered.
2747    /// The file read can be avoided by setting the
2748    /// array with [`BedBuilder::cm_position`](struct.BedBuilder.html#method.cm_position).
2749    ///
2750    /// # Example:
2751    /// ```
2752    /// use ndarray as nd;
2753    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2754    /// use bed_reader::assert_eq_nan;
2755    ///
2756    /// let file_name = sample_bed_file("small.bed")?;
2757    /// let mut bed = Bed::new(file_name)?;
2758    /// let cm_position = bed.cm_position()?;
2759    /// println!("{cm_position:?}"); // Outputs ndarray [100.4, 2000.5, 4000.7, 7000.9]
2760    /// # use bed_reader::BedErrorPlus;
2761    /// # Ok::<(), Box<BedErrorPlus>>(())
2762    pub fn cm_position(&mut self) -> Result<&nd::Array1<f32>, Box<BedErrorPlus>> {
2763        self.unlazy_bim::<String>(
2764            self.metadata.cm_position.is_none(),
2765            MetadataFields::CmPosition,
2766            "cm_position",
2767        )?;
2768        Ok(self.metadata.cm_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2769    }
2770
2771    /// Base-pair position of each SNP (variant)
2772    ///
2773    /// If this ndarray is needed, it will be found
2774    /// by reading the .bim file. Once found, this ndarray
2775    /// and other information in the .bim file will be remembered.
2776    /// The file read can be avoided by setting the
2777    /// array with [`BedBuilder::bp_position`](struct.BedBuilder.html#method.bp_position).
2778    ///
2779    /// # Example:
2780    /// ```
2781    /// use ndarray as nd;
2782    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2783    /// use bed_reader::assert_eq_nan;
2784    ///
2785    /// let file_name = sample_bed_file("small.bed")?;
2786    /// let mut bed = Bed::new(file_name)?;
2787    /// let bp_position = bed.bp_position()?;
2788    /// println!("{bp_position:?}"); // Outputs ndarray [1, 100, 1000, 1004]
2789    /// # use bed_reader::BedErrorPlus;
2790    /// # Ok::<(), Box<BedErrorPlus>>(())
2791    pub fn bp_position(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2792        self.unlazy_bim::<String>(
2793            self.metadata.bp_position.is_none(),
2794            MetadataFields::BpPosition,
2795            "bp_position",
2796        )?;
2797        Ok(self.metadata.bp_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2798    }
2799
2800    /// First allele of each SNP (variant)
2801    ///
2802    /// If this ndarray is needed, it will be found
2803    /// by reading the .bim file. Once found, this ndarray
2804    /// and other information in the .bim file will be remembered.
2805    /// The file read can be avoided by setting the
2806    /// array with [`BedBuilder::allele_1`](struct.BedBuilder.html#method.allele_1).
2807    ///
2808    /// # Example:
2809    /// ```
2810    /// use ndarray as nd;
2811    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2812    /// use bed_reader::assert_eq_nan;
2813    ///
2814    /// let file_name = sample_bed_file("small.bed")?;
2815    /// let mut bed = Bed::new(file_name)?;
2816    /// let allele_1 = bed.allele_1()?;
2817    /// println!("{allele_1:?}"); // Outputs ndarray ["A", "T", "A", "T"]
2818    /// # use bed_reader::BedErrorPlus;
2819    /// # Ok::<(), Box<BedErrorPlus>>(())
2820    pub fn allele_1(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2821        self.unlazy_bim::<String>(
2822            self.metadata.allele_1.is_none(),
2823            MetadataFields::Allele1,
2824            "allele_1",
2825        )?;
2826        Ok(self.metadata.allele_1.as_ref().unwrap()) //unwrap always works because of lazy_bim
2827    }
2828
2829    /// Second allele of each SNP (variant)
2830    ///
2831    /// If this ndarray is needed, it will be found
2832    /// by reading the .bim file. Once found, this ndarray
2833    /// and other information in the .bim file will be remembered.
2834    /// The file read can be avoided by setting the
2835    /// array with [`BedBuilder::allele_2`](struct.BedBuilder.html#method.allele_2).
2836    ///
2837    /// # Example:
2838    /// ```
2839    /// use ndarray as nd;
2840    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2841    /// use bed_reader::assert_eq_nan;
2842    ///
2843    /// let file_name = sample_bed_file("small.bed")?;
2844    /// let mut bed = Bed::new(file_name)?;
2845    /// let allele_2 = bed.allele_2()?;
2846    /// println!("{allele_2:?}"); // Outputs ndarray ["A", "C", "C", "G"]
2847    /// # use bed_reader::BedErrorPlus;
2848    /// # Ok::<(), Box<BedErrorPlus>>(())
2849    pub fn allele_2(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2850        self.unlazy_bim::<String>(
2851            self.metadata.allele_2.is_none(),
2852            MetadataFields::Allele2,
2853            "allele_2",
2854        )?;
2855        Ok(self.metadata.allele_2.as_ref().unwrap()) //unwrap always works because of lazy_bim
2856    }
2857
2858    /// [`Metadata`](struct.Metadata.html) for this dataset, for example, the individual (sample) Ids.
2859    ///
2860    /// This returns a struct with 12 fields. Each field is a ndarray.
2861    /// The struct will always be new, but the 12 ndarrays will be
2862    /// shared with this [`Bed`](struct.Bed.html).
2863    ///
2864    /// If the needed, the metadata will be read from the .fam and/or .bim files.
2865    /// ```
2866    /// use ndarray as nd;
2867    /// use bed_reader::{Bed, sample_bed_file};
2868    ///
2869    /// let file_name = sample_bed_file("small.bed")?;
2870    /// let mut bed = Bed::new(file_name)?;
2871    /// let metadata = bed.metadata()?;
2872    /// println!("{0:?}", metadata.iid()); // Outputs Some(["iid1", "iid2", "iid3"] ...)
2873    /// println!("{0:?}", metadata.sid()); // Outputs Some(["sid1", "sid2", "sid3", "sid4"] ...)
2874    /// # use bed_reader::BedErrorPlus;
2875    /// # Ok::<(), Box<BedErrorPlus>>(())
2876    pub fn metadata(&mut self) -> Result<Metadata, Box<BedErrorPlus>> {
2877        self.fam()?;
2878        self.bim()?;
2879        Ok(self.metadata.clone())
2880    }
2881
2882    /// Return the path of the .bed file.
2883    #[must_use]
2884    pub fn path(&self) -> &Path {
2885        &self.path
2886    }
2887
2888    /// Return the path of the .fam file.
2889    pub fn fam_path(&mut self) -> PathBuf {
2890        // We need to clone the path because self might mutate later
2891        if let Some(path) = &self.fam_path {
2892            path.clone()
2893        } else {
2894            let path = to_metadata_path(&self.path, self.fam_path.as_ref(), "fam");
2895            self.fam_path = Some(path.clone());
2896            path
2897        }
2898    }
2899
2900    /// Return the path of the .bim file.
2901    pub fn bim_path(&mut self) -> PathBuf {
2902        // We need to clone the path because self might mutate later
2903        if let Some(path) = &self.bim_path {
2904            path.clone()
2905        } else {
2906            let path = to_metadata_path(&self.path, self.bim_path.as_ref(), "bim");
2907            self.bim_path = Some(path.clone());
2908            path
2909        }
2910    }
2911
2912    /// Read genotype data.
2913    ///
2914    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) which supports selection and options.
2915    ///
2916    /// # Errors
2917    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2918    /// for all possible errors.
2919    ///
2920    /// # Examples
2921    /// Read all data in a .bed file.
2922    ///
2923    /// ```
2924    /// use ndarray as nd;
2925    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2926    /// use bed_reader::assert_eq_nan;
2927    ///
2928    /// let file_name = sample_bed_file("small.bed")?;
2929    /// let mut bed = Bed::new(file_name)?;
2930    /// let val = bed.read::<f64>()?;
2931    ///
2932    /// assert_eq_nan(
2933    ///     &val,
2934    ///     &nd::array![
2935    ///         [1.0, 0.0, f64::NAN, 0.0],
2936    ///         [2.0, 0.0, f64::NAN, 2.0],
2937    ///         [0.0, 1.0, 2.0, 0.0]
2938    ///     ],
2939    /// );
2940    ///
2941    /// // Your output array can be f32, f64, or i8
2942    /// let val = bed.read::<i8>()?;
2943    /// assert_eq_nan(
2944    ///     &val,
2945    ///     &nd::array![
2946    ///         [1, 0, -127, 0],
2947    ///         [2, 0, -127, 2],
2948    ///         [0, 1, 2, 0]
2949    ///     ],
2950    /// );
2951    /// # use bed_reader::BedErrorPlus;
2952    /// # Ok::<(), Box<BedErrorPlus>>(())
2953    /// ```    
2954    pub fn read<TVal: BedVal>(&mut self) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2955        let read_options = ReadOptions::<TVal>::builder().build()?;
2956        self.read_with_options(&read_options)
2957    }
2958
2959    /// Read genotype data with options, into a preallocated array.
2960    ///
2961    /// > Also see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
2962    ///
2963    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
2964    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
2965    /// are ignored. Instead, the order of the preallocated array is used.
2966    ///
2967    /// # Errors
2968    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2969    /// for all possible errors.
2970    ///
2971    /// # Example
2972    ///
2973    /// ```
2974    /// use ndarray as nd;
2975    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2976    /// use bed_reader::assert_eq_nan;
2977    ///
2978    /// // Read the SNPs indexed by 2.
2979    /// let file_name = sample_bed_file("small.bed")?;
2980    /// let mut bed = Bed::new(file_name)?;
2981    /// let read_options = ReadOptions::builder().sid_index(2).build()?;
2982    /// let mut val = nd::Array2::<f64>::default((3, 1));
2983    /// bed.read_and_fill_with_options(&mut val.view_mut(), &read_options)?;
2984    ///
2985    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2986    /// # use bed_reader::BedErrorPlus;
2987    /// # Ok::<(), Box<BedErrorPlus>>(())
2988    /// ```  
2989    pub fn read_and_fill_with_options<TVal: BedVal>(
2990        &mut self,
2991        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2992        read_options: &ReadOptions<TVal>,
2993    ) -> Result<(), Box<BedErrorPlus>> {
2994        let iid_count = self.iid_count()?;
2995        let sid_count = self.sid_count()?;
2996
2997        let num_threads = compute_num_threads(read_options.num_threads)?;
2998
2999        // If we already have a Vec<isize>, reference it. If we don't, create one and reference it.
3000        let iid_hold = Hold::new(&read_options.iid_index, iid_count)?;
3001        let iid_index = iid_hold.as_ref();
3002        let sid_hold = Hold::new(&read_options.sid_index, sid_count)?;
3003        let sid_index = sid_hold.as_ref();
3004
3005        let dim = val.dim();
3006        if dim != (iid_index.len(), sid_index.len()) {
3007            Err(BedError::InvalidShape(
3008                iid_index.len(),
3009                sid_index.len(),
3010                dim.0,
3011                dim.1,
3012            ))?;
3013        }
3014
3015        read_no_alloc(
3016            &self.path,
3017            iid_count,
3018            sid_count,
3019            read_options.is_a1_counted,
3020            iid_index,
3021            sid_index,
3022            read_options.missing_value,
3023            num_threads,
3024            &mut val.view_mut(),
3025        )?;
3026
3027        Ok(())
3028    }
3029
3030    /// Read all genotype data into a preallocated array.
3031    ///
3032    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3033    ///
3034    /// # Errors
3035    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3036    /// for all possible errors.
3037    ///
3038    /// # Example
3039    ///
3040    /// ```
3041    /// use ndarray as nd;
3042    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3043    /// use bed_reader::assert_eq_nan;
3044    ///
3045    /// let file_name = sample_bed_file("small.bed")?;
3046    /// let mut bed = Bed::new(file_name)?;
3047    /// let mut val = nd::Array2::<i8>::default(bed.dim()?);
3048    /// bed.read_and_fill(&mut val.view_mut())?;
3049    ///
3050    /// assert_eq_nan(
3051    ///     &val,
3052    ///     &nd::array![
3053    ///         [1, 0, -127, 0],
3054    ///         [2, 0, -127, 2],
3055    ///         [0, 1, 2, 0]
3056    ///     ],
3057    /// );
3058    /// # use bed_reader::BedErrorPlus;
3059    /// # Ok::<(), Box<BedErrorPlus>>(())
3060    /// ```
3061    pub fn read_and_fill<TVal: BedVal>(
3062        &mut self,
3063        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
3064    ) -> Result<(), Box<BedErrorPlus>> {
3065        let read_options = ReadOptions::<TVal>::builder().build()?;
3066        self.read_and_fill_with_options(val, &read_options)
3067    }
3068
3069    /// Read genotype data with options.
3070    ///
3071    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3072    ///
3073    /// # Errors
3074    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3075    /// for all possible errors.
3076    ///
3077    /// # Example
3078    ///
3079    /// ```
3080    /// use ndarray as nd;
3081    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3082    /// use bed_reader::assert_eq_nan;
3083    ///
3084    /// // Read the SNPs indexed by 2.
3085    /// let file_name = sample_bed_file("small.bed")?;
3086    /// let mut bed = Bed::new(file_name)?;
3087    /// let read_options = ReadOptions::builder().sid_index(2).f64().build()?;
3088    /// let val = bed.read_with_options(&read_options)?;
3089    ///
3090    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
3091    /// # use bed_reader::BedErrorPlus;
3092    /// # Ok::<(), Box<BedErrorPlus>>(())
3093    /// ```  
3094    pub fn read_with_options<TVal: BedVal>(
3095        &mut self,
3096        read_options: &ReadOptions<TVal>,
3097    ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
3098        let iid_count_in = self.iid_count()?;
3099        let sid_count_in = self.sid_count()?;
3100        let iid_count_out = read_options.iid_index.len(iid_count_in)?;
3101        let sid_count_out = read_options.sid_index.len(sid_count_in)?;
3102        let shape = ShapeBuilder::set_f((iid_count_out, sid_count_out), read_options.is_f);
3103        let mut val = nd::Array2::<TVal>::default(shape);
3104
3105        self.read_and_fill_with_options(&mut val.view_mut(), read_options)?;
3106
3107        Ok(val)
3108    }
3109    /// Write genotype data with default metadata.
3110    ///
3111    /// > Also see [`WriteOptions::builder`](struct.WriteOptions.html#method.builder), which supports metadata and options.
3112    ///
3113    /// # Errors
3114    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3115    /// for all possible errors.
3116    ///
3117    /// # Example
3118    /// In this example, write genotype data using default metadata.
3119    /// ```
3120    /// use ndarray as nd;
3121    /// use bed_reader::{Bed, WriteOptions};
3122    ///
3123    /// let output_folder = temp_testdir::TempDir::default();
3124    /// let output_file = output_folder.join("small.bed");
3125    ///
3126    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
3127    /// Bed::write(&val, &output_file)?;
3128    ///
3129    /// // If we then read the new file and list the chromosome property,
3130    /// // it is an array of zeros, the default chromosome value.
3131    /// let mut bed2 = Bed::new(&output_file)?;
3132    /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
3133    /// # use bed_reader::BedErrorPlus;
3134    /// # Ok::<(), Box<BedErrorPlus>>(())
3135    /// ```
3136    pub fn write<S: nd::Data<Elem = TVal>, TVal: BedVal>(
3137        val: &nd::ArrayBase<S, nd::Ix2>,
3138        path: &Path,
3139    ) -> Result<(), Box<BedErrorPlus>> {
3140        WriteOptions::builder(path).write(val)
3141    }
3142
3143    /// Given an 2D array of genotype data and a [`WriteOptions`](struct.WriteOptionsBuilder.html), write to a .bed file.
3144    ///
3145    /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
3146    /// > a [`WriteOptions`](struct.WriteOptionsBuilder.html) and writes to file in one step.
3147    ///
3148    /// # Example
3149    /// ```
3150    /// use ndarray as nd;
3151    /// use bed_reader::{Bed, WriteOptions};
3152    ///
3153    /// let val = nd::array![
3154    ///     [1.0, 0.0, f64::NAN, 0.0],
3155    ///     [2.0, 0.0, f64::NAN, 2.0],
3156    ///     [0.0, 1.0, 2.0, 0.0]
3157    /// ];
3158    ///
3159    /// let output_folder = temp_testdir::TempDir::default();
3160    /// let output_file = output_folder.join("small.bed");
3161    /// let write_options = WriteOptions::builder(output_file)
3162    ///     .iid(["iid1", "iid2", "iid3"])
3163    ///     .sid(["sid1", "sid2", "sid3", "sid4"])
3164    ///     .build(3,4)?;
3165    ///
3166    /// Bed::write_with_options(&val, &write_options)?;
3167    /// # use bed_reader::BedErrorPlus;
3168    /// # Ok::<(), Box<BedErrorPlus>>(())
3169    /// ```
3170    pub fn write_with_options<S, TVal>(
3171        val: &nd::ArrayBase<S, nd::Ix2>,
3172        write_options: &WriteOptions<TVal>,
3173    ) -> Result<(), Box<BedErrorPlus>>
3174    where
3175        S: nd::Data<Elem = TVal>,
3176        TVal: BedVal,
3177    {
3178        let (iid_count, sid_count) = val.dim();
3179        if iid_count != write_options.iid_count() {
3180            Err(BedError::InconsistentCount(
3181                "iid".into(),
3182                write_options.iid_count(),
3183                iid_count,
3184            ))?;
3185        }
3186        if sid_count != write_options.sid_count() {
3187            Err(BedError::InconsistentCount(
3188                "sid".into(),
3189                write_options.sid_count(),
3190                sid_count,
3191            ))?;
3192        }
3193
3194        let num_threads = compute_num_threads(write_options.num_threads)?;
3195        write_val(
3196            &write_options.path,
3197            val,
3198            write_options.is_a1_counted,
3199            write_options.missing_value,
3200            num_threads,
3201        )?;
3202
3203        if !write_options.skip_fam() {
3204            if let Err(e) = write_options.metadata.write_fam(write_options.fam_path()) {
3205                // Clean up the file
3206                let _ = fs::remove_file(&write_options.fam_path);
3207                Err(e)?;
3208            }
3209        }
3210
3211        if !write_options.skip_bim() {
3212            if let Err(e) = write_options.metadata.write_bim(write_options.bim_path()) {
3213                // Clean up the file
3214                let _ = fs::remove_file(&write_options.bim_path);
3215                Err(e)?;
3216            }
3217        }
3218
3219        Ok(())
3220    }
3221
3222    fn unlazy_fam<T: FromStringArray<T>>(
3223        &mut self,
3224        is_none: bool,
3225        field_index: MetadataFields,
3226        name: &str,
3227    ) -> Result<(), Box<BedErrorPlus>> {
3228        if self.skip_set.contains(&field_index) {
3229            Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3230        }
3231        if is_none {
3232            self.fam()?;
3233        }
3234        Ok(())
3235    }
3236
3237    fn unlazy_bim<T: FromStringArray<T>>(
3238        &mut self,
3239        is_none: bool,
3240        field_index: MetadataFields,
3241        name: &str,
3242    ) -> Result<(), Box<BedErrorPlus>> {
3243        if self.skip_set.contains(&field_index) {
3244            Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3245        }
3246        if is_none {
3247            self.bim()?;
3248        }
3249        Ok(())
3250    }
3251
3252    fn fam(&mut self) -> Result<(), Box<BedErrorPlus>> {
3253        let fam_path = self.fam_path();
3254
3255        let (metadata, count) = self.metadata.read_fam(fam_path, &self.skip_set)?;
3256        self.metadata = metadata;
3257
3258        match self.iid_count {
3259            Some(iid_count) => {
3260                if iid_count != count {
3261                    Err(BedError::InconsistentCount(
3262                        "iid".to_string(),
3263                        iid_count,
3264                        count,
3265                    ))?;
3266                }
3267            }
3268            None => {
3269                self.iid_count = Some(count);
3270            }
3271        }
3272        Ok(())
3273    }
3274
3275    fn bim(&mut self) -> Result<(), Box<BedErrorPlus>> {
3276        let bim_path = self.bim_path();
3277
3278        let (metadata, count) = self.metadata.read_bim(bim_path, &self.skip_set)?;
3279        self.metadata = metadata;
3280
3281        match self.sid_count {
3282            Some(sid_count) => {
3283                if sid_count != count {
3284                    Err(BedError::InconsistentCount(
3285                        "sid".to_string(),
3286                        sid_count,
3287                        count,
3288                    ))?;
3289                }
3290            }
3291            None => {
3292                self.sid_count = Some(count);
3293            }
3294        }
3295        Ok(())
3296    }
3297}
3298
3299/// If we already have a Vec<isize> remember a reference to it.
3300/// If we don't, then create one.
3301enum Hold<'a> {
3302    Copy(Vec<isize>),
3303    Ref(&'a Vec<isize>),
3304}
3305
3306impl Hold<'_> {
3307    fn new(index: &Index, count: usize) -> Result<Hold<'_>, Box<BedErrorPlus>> {
3308        let hold = if let Index::Vec(vec) = index {
3309            Hold::Ref(vec)
3310        } else {
3311            Hold::Copy(index.to_vec(count)?)
3312        };
3313        Ok(hold)
3314    }
3315
3316    fn as_ref(&self) -> &Vec<isize> {
3317        match self {
3318            Hold::Ref(vec) => vec,
3319            Hold::Copy(ref vec) => vec,
3320        }
3321    }
3322}
3323
3324fn compute_num_threads(option_num_threads: Option<usize>) -> Result<usize, Box<BedErrorPlus>> {
3325    let num_threads = if let Some(num_threads) = option_num_threads {
3326        num_threads
3327    } else if let Ok(num_threads) = env::var("BED_READER_NUM_THREADS") {
3328        num_threads.parse::<usize>()?
3329    } else if let Ok(num_threads) = env::var("NUM_THREADS") {
3330        num_threads.parse::<usize>()?
3331    } else {
3332        0
3333    };
3334    Ok(num_threads)
3335}
3336
3337#[allow(clippy::unnecessary_wraps)]
3338fn compute_max_concurrent_requests(
3339    option_max_concurrent_requests: Option<usize>,
3340) -> Result<usize, Box<BedErrorPlus>> {
3341    // In the future, we might want to set this with an environment variable.
3342    let max_concurrent_requests = option_max_concurrent_requests.unwrap_or(10);
3343    Ok(max_concurrent_requests)
3344}
3345
3346#[allow(clippy::unnecessary_wraps)]
3347fn compute_max_chunk_bytes(
3348    option_max_chunk_bytes: Option<usize>,
3349) -> Result<usize, Box<BedErrorPlus>> {
3350    // In the future, we might want to set this with an environment variable.
3351    let max_chunk_bytes = option_max_chunk_bytes.unwrap_or(8_000_000);
3352    Ok(max_chunk_bytes)
3353}
3354
3355impl Index {
3356    // We can't define a 'From' because we want to add count at the last moment.
3357    // Later Would be nice to not always allocate a new vec, maybe with Rc<[T]>?
3358    // Even better would be to support an iterator from Index (an enum with fields).
3359
3360    /// Turns an [`Index`](enum.Index.html) into a vector of usize indexes. Negative means count from end.
3361    pub fn to_vec(&self, count: usize) -> Result<Vec<isize>, Box<BedErrorPlus>> {
3362        let count_signed = count as isize;
3363        match self {
3364            Index::All => Ok((0..count_signed).collect()),
3365            Index::Vec(vec) => Ok(vec.clone()),
3366            Index::NDArrayBool(nd_array_bool) => {
3367                if nd_array_bool.len() != count {
3368                    Err(BedError::BoolArrayVectorWrongLength(
3369                        count,
3370                        nd_array_bool.len(),
3371                    ))?;
3372                }
3373                Ok(nd_array_bool
3374                    .iter()
3375                    .enumerate()
3376                    .filter(|(_, b)| **b)
3377                    .map(|(i, _)| i as isize)
3378                    .collect())
3379            }
3380            Index::NDSliceInfo(nd_slice_info) => {
3381                Ok(RangeNdSlice::new(nd_slice_info, count)?.to_vec())
3382            }
3383            Index::RangeAny(range_any) => {
3384                let range = range_any.to_range(count)?;
3385                Ok(range.map(|i| i as isize).collect::<Vec<isize>>())
3386            }
3387            Index::NDArray(nd_array) => Ok(nd_array.to_vec()),
3388            Index::One(one) => Ok(vec![*one]),
3389            Index::VecBool(vec_bool) => {
3390                if vec_bool.len() != count {
3391                    Err(BedError::BoolArrayVectorWrongLength(count, vec_bool.len()))?;
3392                }
3393                Ok(vec_bool
3394                    .iter()
3395                    .enumerate()
3396                    .filter(|(_, b)| **b)
3397                    .map(|(i, _)| i as isize)
3398                    .collect())
3399            }
3400        }
3401    }
3402}
3403
3404#[allow(clippy::doc_markdown)]
3405/// Type alias for 1-D slices of NDArrays.
3406pub type SliceInfo1 =
3407    nd::SliceInfo<[nd::SliceInfoElem; 1], nd::Dim<[usize; 1]>, nd::Dim<[usize; 1]>>;
3408
3409/// A specification of which individuals (samples) or SNPs (variants) to read.
3410///
3411/// See the [Table of Index Expressions](index.html#index-expressions)
3412/// for a list of expressions for selecting individuals (sample)
3413/// and SNPs (variants).
3414///
3415/// By default, all individuals or SNPs are read.
3416/// The indices can be specified as:
3417///   * an index (negative numbers count from the end)
3418///   * a vector or ndarray of indices
3419///   * a Rust range (negatives not allowed)
3420///   * a vector or ndarray of booleans
3421///   * an ndarray slice (negative indexing and steps allowed)
3422///
3423/// # Examples
3424/// ```
3425/// use ndarray as nd;
3426/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3427/// use bed_reader::assert_eq_nan;
3428/// use ndarray::s;
3429///
3430/// let file_name = sample_bed_file("some_missing.bed")?;
3431/// let mut bed = Bed::new(file_name)?;
3432/// println!("{:?}", bed.dim()?); // prints (100, 100)
3433///
3434/// // Read all individuals and all SNPs
3435/// let val = ReadOptions::builder().f64().read(&mut bed)?;
3436/// assert!(val.dim() == (100, 100));
3437///
3438/// // Read the individual at index position 10 and all SNPs
3439/// let val = ReadOptions::builder().iid_index(10).f64().read(&mut bed)?;
3440/// assert!(val.dim() == (1, 100));
3441///
3442/// // Read the individuals at index positions 0,5, 1st-from-the-end and
3443/// // the SNP at index position 3
3444/// let val = ReadOptions::builder()
3445///     .iid_index(vec![0, 5, -1])
3446///     .sid_index(3)
3447///     .f64()
3448///     .read(&mut bed)?;
3449/// assert!(val.dim() == (3, 1));
3450/// // Repeat, but with an ndarray
3451/// let val = ReadOptions::builder()
3452///     .iid_index(nd::array![0, 5, -1])
3453///     .sid_index(3)
3454///     .f64()
3455///     .read(&mut bed)?;
3456/// assert!(val.dim() == (3, 1));
3457/// // Repeat, but with an Rust array
3458/// let val = ReadOptions::builder()
3459///     .iid_index([0, 5, -1])
3460///     .sid_index(3)
3461///     .f64()
3462///     .read(&mut bed)?;
3463/// assert!(val.dim() == (3, 1));
3464/// // Create a boolean ndarray identifying SNPs in chromosome 5,
3465/// // then select those SNPs.
3466/// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
3467/// let val = ReadOptions::builder()
3468///     .sid_index(chrom_5)
3469///     .f64()
3470///     .read(&mut bed)?;
3471/// assert!(val.dim() == (100, 6));
3472/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3473/// // to select every 2nd individual and every 3rd SNP.
3474/// let val = ReadOptions::builder()
3475///     .iid_index(s![..;2])
3476///     .sid_index(s![..;3])
3477///     .f64()
3478///     .read(&mut bed)?;
3479/// assert!(val.dim() == (50, 34));
3480/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3481/// // to select the 10th-from-last individual to the last, in reverse order,
3482/// // and every 3rd SNP in reverse order.)
3483/// let val = ReadOptions::builder()
3484///     .iid_index(s![-10..;-1])
3485///     .sid_index(s![..;-3])
3486///     .f64()
3487///     .read(&mut bed)?;
3488/// assert!(val.dim() == (10, 34));
3489/// # use bed_reader::BedErrorPlus;
3490/// # Ok::<(), Box<BedErrorPlus>>(())
3491/// ```
3492#[derive(Debug, Clone)]
3493pub enum Index {
3494    // Could implement an enumerator, but it is complex and requires a 'match' on each next()
3495    //     https://stackoverflow.com/questions/65272613/how-to-implement-intoiterator-for-an-enum-of-iterable-variants
3496    #[allow(missing_docs)]
3497    All,
3498    #[allow(missing_docs)]
3499    One(isize),
3500    #[allow(missing_docs)]
3501    Vec(Vec<isize>),
3502    #[allow(missing_docs)]
3503    NDArray(nd::Array1<isize>),
3504    #[allow(missing_docs)]
3505    VecBool(Vec<bool>),
3506    #[allow(missing_docs)]
3507    NDArrayBool(nd::Array1<bool>),
3508    #[allow(missing_docs)]
3509    NDSliceInfo(SliceInfo1),
3510    #[allow(missing_docs)]
3511    RangeAny(RangeAny),
3512}
3513
3514#[doc(hidden)]
3515/// Used internally to represent Rust ranges such as `0..10`, `..10`, etc.
3516#[derive(Debug, Clone)]
3517pub struct RangeAny {
3518    start: Option<usize>,
3519    end: Option<usize>,
3520}
3521
3522impl RangeAny {
3523    fn new<T: RangeBounds<usize>>(range_thing: &T) -> RangeAny {
3524        let start_bound = range_thing.start_bound();
3525        let start = match start_bound {
3526            Bound::Included(&start) => Some(start),
3527            Bound::Excluded(&start) => Some(start + 1),
3528            Bound::Unbounded => None,
3529        };
3530
3531        let end_bound = range_thing.end_bound();
3532        let end = match end_bound {
3533            Bound::Included(&end) => Some(end + 1),
3534            Bound::Excluded(&end) => Some(end),
3535            Bound::Unbounded => None,
3536        };
3537        RangeAny { start, end }
3538    }
3539
3540    // https://stackoverflow.com/questions/55925523/array-cannot-be-indexed-by-rangefull
3541    fn to_range(&self, count: usize) -> Result<Range<usize>, Box<BedErrorPlus>> {
3542        let start = self.start.unwrap_or_default();
3543        let end = if let Some(end) = self.end { end } else { count };
3544        if start > end {
3545            Err(BedError::StartGreaterThanEnd(start, end).into())
3546        } else {
3547            Ok(Range { start, end })
3548        }
3549    }
3550
3551    fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3552        let range = self.to_range(count)?;
3553        Ok(range.end - range.start)
3554    }
3555
3556    fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3557        Ok(self.len(count)? == 0)
3558    }
3559}
3560
3561#[doc(hidden)]
3562#[derive(Debug, Clone)]
3563/// Used internally to represent NDArray Slices such as s![..], s![0..;2], s![0..10;-1]
3564pub struct RangeNdSlice {
3565    start: usize,
3566    end: usize,
3567    step: usize,
3568    is_reversed: bool,
3569}
3570
3571impl RangeNdSlice {
3572    fn len(&self) -> usize {
3573        if self.start > self.end {
3574            0
3575        } else {
3576            (self.end - self.start).div_ceil(self.step)
3577        }
3578    }
3579
3580    fn is_empty(&self) -> bool {
3581        self.len() == 0
3582    }
3583
3584    // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#slicing
3585    fn to_vec(&self) -> Vec<isize> {
3586        if self.start >= self.end {
3587            Vec::new()
3588        } else if !self.is_reversed {
3589            (self.start..self.end)
3590                .step_by(self.step)
3591                .map(|i| i as isize)
3592                .collect()
3593        } else {
3594            // https://docs.rs/ndarray/latest/ndarray/macro.s.html
3595            let size = self.len();
3596            let mut vec: Vec<isize> = Vec::<isize>::with_capacity(size);
3597            let mut i = self.end - 1;
3598            while i >= self.start {
3599                vec.push(i as isize);
3600                if i < self.step {
3601                    break;
3602                }
3603                i -= self.step;
3604            }
3605            vec
3606        }
3607    }
3608
3609    fn new(nd_slice_info: &SliceInfo1, count: usize) -> Result<Self, Box<BedErrorPlus>> {
3610        //  self.to_vec(count).len(),
3611        // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#method.slice_collapse
3612        // Error in the following cases
3613        // * SliceInfo is not a 1-dimensional or is a NewAxis
3614        // * Step is 0
3615        // * Start is greater than count
3616        // * End is greater than count
3617        // As with ndarray, Start can be greater than End is allowed
3618        // and means the slice is empty.
3619        if nd_slice_info.in_ndim() != 1 || nd_slice_info.out_ndim() != 1 {
3620            Err(BedError::NdSliceInfoNot1D)?;
3621        }
3622
3623        let slice_info_elem = nd_slice_info[0];
3624        match slice_info_elem {
3625            nd::SliceInfoElem::Slice { start, end, step } => {
3626                // https://docs.rs/ndarray/0.15.4/ndarray/enum.SliceInfoElem.html
3627                // s![..], 0,None,1
3628                // s![a..b;2] a,b,2
3629                // s![a..;-1], from a to end in reverse order
3630                // start index; negative are counted from the back of the axis
3631                // end index; negative are counted from the back of the axis; when not present the default is the full length of the axis.
3632                // step size in elements; the default is 1, for every element.
3633                // A range with step size. end is an exclusive index. Negative start or end indexes are counted from the back of the axis. If end is None, the slice extends to the end of the axis.
3634                let (step2, is_reverse2) = match step.cmp(&0) {
3635                    Ordering::Greater => (step as usize, false),
3636                    Ordering::Less => ((-step) as usize, true),
3637                    Ordering::Equal => Err(BedError::StepZero)?,
3638                };
3639
3640                let start2 = if start >= 0 {
3641                    let start3 = start as usize;
3642                    if start3 > count {
3643                        Err(BedError::StartGreaterThanCount(start3, count))?;
3644                    }
3645                    start3
3646                } else {
3647                    let start3 = (-start) as usize;
3648                    if start3 > count {
3649                        Err(BedError::StartGreaterThanCount(start3, count))?;
3650                    }
3651                    count - start3
3652                };
3653
3654                let end2 = if let Some(end) = end {
3655                    if end >= 0 {
3656                        let end3 = end as usize;
3657                        if end3 > count {
3658                            Err(BedError::EndGreaterThanCount(end3, count))?;
3659                        }
3660                        end3
3661                    } else {
3662                        let end3 = (-end) as usize;
3663                        if end3 > count {
3664                            Err(BedError::EndGreaterThanCount(end3, count))?;
3665                        }
3666                        count - end3
3667                    }
3668                } else {
3669                    count
3670                };
3671
3672                Ok(RangeNdSlice {
3673                    start: start2,
3674                    end: end2,
3675                    step: step2,
3676                    is_reversed: is_reverse2,
3677                })
3678            }
3679            nd::SliceInfoElem::Index(index) => Ok(RangeNdSlice {
3680                start: index as usize,
3681                end: index as usize + 1,
3682                step: 1,
3683                is_reversed: false,
3684            }),
3685            nd::SliceInfoElem::NewAxis => Err(BedError::NewAxis.into()),
3686        }
3687    }
3688}
3689
3690impl Index {
3691    /// Returns the number of elements in an [`Index`](enum.Index.html).
3692    #[allow(clippy::len_without_is_empty)]
3693    pub fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3694        match self {
3695            Index::All => Ok(count),
3696            Index::One(_) => Ok(1),
3697            Index::Vec(vec) => Ok(vec.len()),
3698            Index::NDArray(nd_array) => Ok(nd_array.len()),
3699            Index::VecBool(vec_bool) => Ok(vec_bool.iter().filter(|&b| *b).count()),
3700            Index::NDArrayBool(nd_array_bool) => Ok(nd_array_bool.iter().filter(|&b| *b).count()),
3701            Index::NDSliceInfo(nd_slice_info) => Ok(RangeNdSlice::new(nd_slice_info, count)?.len()),
3702            Index::RangeAny(range_any) => range_any.len(count),
3703        }
3704    }
3705
3706    /// Returns true if the [`Index`](enum.Index.html) is empty.
3707    pub fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3708        match self {
3709            Index::All => Ok(count == 0),
3710            Index::One(_) => Ok(false),
3711            Index::Vec(vec) => Ok(vec.is_empty()),
3712            Index::NDArray(nd_array) => Ok(nd_array.is_empty()),
3713            Index::VecBool(vec_bool) => Ok(!vec_bool.iter().any(|&b| b)),
3714            Index::NDArrayBool(nd_array_bool) => Ok(!nd_array_bool.iter().any(|&b| b)),
3715            Index::NDSliceInfo(nd_slice_info) => {
3716                Ok(RangeNdSlice::new(nd_slice_info, count)?.is_empty())
3717            }
3718            Index::RangeAny(range_any) => range_any.is_empty(count),
3719        }
3720    }
3721}
3722
3723impl From<SliceInfo1> for Index {
3724    fn from(slice_info: SliceInfo1) -> Index {
3725        Index::NDSliceInfo(slice_info)
3726    }
3727}
3728impl From<&SliceInfo1> for Index {
3729    fn from(slice_info: &SliceInfo1) -> Index {
3730        Index::NDSliceInfo(slice_info.to_owned())
3731    }
3732}
3733
3734impl From<RangeFull> for Index {
3735    fn from(range_thing: RangeFull) -> Index {
3736        Index::RangeAny(RangeAny::new(&range_thing))
3737    }
3738}
3739
3740impl From<&RangeFull> for Index {
3741    fn from(range_thing: &RangeFull) -> Index {
3742        Index::RangeAny(RangeAny::new(range_thing))
3743    }
3744}
3745
3746impl From<Range<usize>> for Index {
3747    fn from(range_thing: Range<usize>) -> Index {
3748        Index::RangeAny(RangeAny::new(&range_thing))
3749    }
3750}
3751
3752impl From<&Range<usize>> for Index {
3753    fn from(range_thing: &Range<usize>) -> Index {
3754        Index::RangeAny(RangeAny::new(range_thing))
3755    }
3756}
3757
3758impl From<RangeFrom<usize>> for Index {
3759    fn from(range_thing: RangeFrom<usize>) -> Index {
3760        Index::RangeAny(RangeAny::new(&range_thing))
3761    }
3762}
3763
3764impl From<&RangeFrom<usize>> for Index {
3765    fn from(range_thing: &RangeFrom<usize>) -> Index {
3766        Index::RangeAny(RangeAny::new(range_thing))
3767    }
3768}
3769
3770impl From<RangeInclusive<usize>> for Index {
3771    fn from(range_thing: RangeInclusive<usize>) -> Index {
3772        Index::RangeAny(RangeAny::new(&range_thing))
3773    }
3774}
3775
3776impl From<&RangeInclusive<usize>> for Index {
3777    fn from(range_thing: &RangeInclusive<usize>) -> Index {
3778        Index::RangeAny(RangeAny::new(range_thing))
3779    }
3780}
3781
3782impl From<RangeTo<usize>> for Index {
3783    fn from(range_thing: RangeTo<usize>) -> Index {
3784        Index::RangeAny(RangeAny::new(&range_thing))
3785    }
3786}
3787
3788impl From<&RangeTo<usize>> for Index {
3789    fn from(range_thing: &RangeTo<usize>) -> Index {
3790        Index::RangeAny(RangeAny::new(range_thing))
3791    }
3792}
3793
3794impl From<RangeToInclusive<usize>> for Index {
3795    fn from(range_thing: RangeToInclusive<usize>) -> Index {
3796        Index::RangeAny(RangeAny::new(&range_thing))
3797    }
3798}
3799
3800impl From<&RangeToInclusive<usize>> for Index {
3801    fn from(range_thing: &RangeToInclusive<usize>) -> Index {
3802        Index::RangeAny(RangeAny::new(range_thing))
3803    }
3804}
3805
3806impl From<&[isize]> for Index {
3807    fn from(array: &[isize]) -> Index {
3808        Index::Vec(array.to_vec())
3809    }
3810}
3811
3812impl<const N: usize> From<[isize; N]> for Index {
3813    fn from(array: [isize; N]) -> Index {
3814        Index::Vec(array.to_vec())
3815    }
3816}
3817
3818impl<const N: usize> From<&[isize; N]> for Index {
3819    fn from(array: &[isize; N]) -> Index {
3820        Index::Vec(array.to_vec())
3821    }
3822}
3823
3824impl From<&nd::ArrayView1<'_, isize>> for Index {
3825    fn from(view: &nd::ArrayView1<isize>) -> Index {
3826        Index::NDArray(view.to_owned())
3827    }
3828}
3829
3830impl From<nd::ArrayView1<'_, isize>> for Index {
3831    fn from(view: nd::ArrayView1<isize>) -> Index {
3832        Index::NDArray(view.to_owned())
3833    }
3834}
3835
3836impl From<Vec<isize>> for Index {
3837    fn from(vec: Vec<isize>) -> Index {
3838        Index::Vec(vec)
3839    }
3840}
3841impl From<&Vec<isize>> for Index {
3842    fn from(vec_ref: &Vec<isize>) -> Index {
3843        Index::Vec(vec_ref.clone())
3844    }
3845}
3846
3847impl From<nd::ArrayView1<'_, bool>> for Index {
3848    fn from(view: nd::ArrayView1<bool>) -> Index {
3849        Index::NDArrayBool(view.to_owned())
3850    }
3851}
3852
3853impl From<&nd::ArrayView1<'_, bool>> for Index {
3854    fn from(view: &nd::ArrayView1<bool>) -> Index {
3855        Index::NDArrayBool(view.to_owned())
3856    }
3857}
3858
3859impl From<&Vec<bool>> for Index {
3860    fn from(vec_ref: &Vec<bool>) -> Index {
3861        Index::VecBool(vec_ref.clone())
3862    }
3863}
3864
3865impl From<&[bool]> for Index {
3866    fn from(array: &[bool]) -> Index {
3867        Index::VecBool(array.to_vec())
3868    }
3869}
3870
3871impl<const N: usize> From<[bool; N]> for Index {
3872    fn from(array: [bool; N]) -> Index {
3873        Index::VecBool(array.to_vec())
3874    }
3875}
3876
3877impl<const N: usize> From<&[bool; N]> for Index {
3878    fn from(array: &[bool; N]) -> Index {
3879        Index::VecBool(array.to_vec())
3880    }
3881}
3882
3883impl From<isize> for Index {
3884    fn from(one: isize) -> Index {
3885        Index::One(one)
3886    }
3887}
3888impl From<&isize> for Index {
3889    fn from(one: &isize) -> Index {
3890        Index::One(one.to_owned())
3891    }
3892}
3893
3894impl From<nd::Array1<isize>> for Index {
3895    fn from(nd_array: nd::Array1<isize>) -> Index {
3896        Index::NDArray(nd_array)
3897    }
3898}
3899
3900impl From<&nd::Array1<isize>> for Index {
3901    fn from(nd_array: &nd::Array1<isize>) -> Index {
3902        Index::NDArray(nd_array.to_owned())
3903    }
3904}
3905
3906impl From<nd::Array1<bool>> for Index {
3907    fn from(nd_array_bool: nd::Array1<bool>) -> Index {
3908        Index::NDArrayBool(nd_array_bool)
3909    }
3910}
3911
3912impl From<&nd::Array1<bool>> for Index {
3913    fn from(nd_array_bool: &nd::Array1<bool>) -> Index {
3914        Index::NDArrayBool(nd_array_bool.clone())
3915    }
3916}
3917
3918impl From<Vec<bool>> for Index {
3919    fn from(vec_bool: Vec<bool>) -> Index {
3920        Index::VecBool(vec_bool)
3921    }
3922}
3923
3924impl From<()> for Index {
3925    fn from((): ()) -> Index {
3926        Index::All
3927    }
3928}
3929
3930// See https://nullderef.com/blog/rust-parameters/
3931
3932/// Represents options for reading genotype data from a PLINK .bed file.
3933///
3934/// Construct with [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3935///
3936/// See the [Table of `ReadOptions`](index.html#readoptions)
3937/// for a list of the supported options.
3938/// See the [Table of Index Expressions](index.html#index-expressions)
3939/// for a list of expressions for selecting individuals (sample)
3940/// and SNPs (variants).
3941#[derive(Debug, Clone, Builder)]
3942#[builder(build_fn(error = "Box<BedErrorPlus>"))]
3943pub struct ReadOptions<TVal: BedVal> {
3944    /// Value to use for missing values (defaults to -127 or NaN)
3945    ///
3946    /// -127 is the default for i8 and NaN is the default for f32 and f64.
3947    ///
3948    /// In this example, the missing value is set to -1:
3949    /// ```
3950    /// use ndarray as nd;
3951    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3952    /// use bed_reader::assert_eq_nan;
3953    ///
3954    /// let file_name = sample_bed_file("small.bed")?;
3955    /// let mut bed = Bed::new(file_name)?;
3956    /// let val = ReadOptions::builder().missing_value(-1).i8().read(&mut bed)?;
3957    ///
3958    /// assert_eq_nan(
3959    ///     &val,
3960    ///     &nd::array![
3961    ///         [1, 0, -1, 0],
3962    ///         [2, 0, -1, 2],
3963    ///         [0, 1, 2, 0]
3964    ///     ],
3965    /// );
3966    /// # use bed_reader::BedErrorPlus;
3967    /// # Ok::<(), Box<BedErrorPlus>>(())
3968    /// ```
3969    #[builder(default = "TVal::missing()")]
3970    missing_value: TVal,
3971
3972    /// Select which individual (sample) values to read -- Defaults to all.
3973    ///
3974    /// Can select with a signed number, various lists of signed numbers,
3975    /// ranges, and various lists of booleans.
3976    ///
3977    /// See the [Table of Index Expressions](index.html#index-expressions)
3978    /// for a list of the supported index expressions.
3979    ///
3980    /// # Examples:
3981    /// ```
3982    /// use ndarray as nd;
3983    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
3984    /// use ndarray::s;
3985    ///
3986    /// let file_name = sample_bed_file("some_missing.bed")?;
3987    /// let mut bed = Bed::new(file_name)?;
3988    ///
3989    /// // Read the individual at index position 3
3990    ///
3991    /// let val = ReadOptions::builder()
3992    ///     .iid_index(3)
3993    ///     .f64()
3994    ///     .read(&mut bed)?;
3995    /// assert!(val.dim() == (1, 100));
3996    ///
3997    /// // Read the individuals at index positions 0, 5, and 1st-from-last.
3998    ///
3999    /// let val = ReadOptions::builder()
4000    ///     .iid_index([0, 5, -1])
4001    ///     .f64()
4002    ///     .read(&mut bed)?;
4003    ///
4004    /// assert!(val.dim() == (3, 100));
4005    ///
4006    /// // Read the individuals at index positions 20 (inclusive) to 30 (exclusive).
4007    ///
4008    /// let val = ReadOptions::builder()
4009    ///     .iid_index(20..30)
4010    ///     .f64()
4011    ///     .read(&mut bed)?;
4012    ///
4013    /// assert!(val.dim() == (10, 100));
4014    ///
4015    /// // Read the individuals at every 2nd index position.
4016    ///
4017    /// let val = ReadOptions::builder()
4018    ///     .iid_index(s![..;2])
4019    ///     .f64()
4020    ///     .read(&mut bed)?;
4021    ///
4022    /// assert!(val.dim() == (50, 100));
4023    ///
4024    /// // Read chromosome 5 of the female individuals.
4025    ///
4026    /// let female = bed.sex()?.map(|elem| *elem == 2);
4027    /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4028    /// let val = ReadOptions::builder()
4029    ///     .iid_index(female)
4030    ///     .sid_index(chrom_5)
4031    ///     .f64()
4032    ///     .read(&mut bed)?;
4033    ///
4034    /// assert!(val.dim() == (50, 6));
4035    /// # use bed_reader::BedErrorPlus;
4036    /// # Ok::<(), Box<BedErrorPlus>>(())
4037    /// ```
4038    #[builder(default = "Index::All")]
4039    #[builder(setter(into))]
4040    iid_index: Index,
4041
4042    /// Select which SNPs (variant) values to read -- Defaults to all.
4043    ///
4044    /// Can select with a signed number, various lists of signed numbers,
4045    /// ranges, and various lists of booleans.
4046    ///
4047    /// See the [Table of Index Expressions](index.html#index-expressions)
4048    /// for a list of the supported index expressions.
4049    ///
4050    /// # Examples:
4051    /// ```
4052    /// use ndarray as nd;
4053    /// use ndarray::s;
4054    /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
4055    ///
4056    /// let file_name = sample_bed_file("some_missing.bed")?;
4057    /// let mut bed = Bed::new(file_name)?;
4058    ///
4059    /// // Read the SNP at index position 3
4060    ///
4061    /// let val = ReadOptions::builder()
4062    ///     .sid_index(3)
4063    ///     .f64()
4064    ///     .read(&mut bed)?;
4065    /// assert!(val.dim() == (100, 1));
4066    ///
4067    /// // Read the SNPs at index positions 0, 5, and 1st-from-last.
4068    ///
4069    /// let val = ReadOptions::builder()
4070    ///     .sid_index([0, 5, -1])
4071    ///     .f64()
4072    ///     .read(&mut bed)?;
4073    ///
4074    /// assert!(val.dim() == (100, 3));
4075    ///
4076    /// // Read the SNPs at index positions 20 (inclusive) to 30 (exclusive).
4077    ///
4078    /// let val = ReadOptions::builder()
4079    ///     .sid_index(20..30)
4080    ///     .f64()
4081    ///     .read(&mut bed)?;
4082    ///
4083    /// assert!(val.dim() == (100, 10));
4084    ///
4085    /// // Read the SNPs at every 2nd index position.
4086    ///
4087    /// let val = ReadOptions::builder()
4088    ///     .sid_index(s![..;2])
4089    ///     .f64()
4090    ///     .read(&mut bed)?;
4091    ///
4092    /// assert!(val.dim() == (100, 50));
4093    ///
4094    /// // Read chromosome 5 of the female individuals.
4095    ///
4096    /// let female = bed.sex()?.map(|elem| *elem == 2);
4097    /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4098    /// let val = ReadOptions::builder()
4099    ///     .iid_index(female)
4100    ///     .sid_index(chrom_5)
4101    ///     .f64()
4102    ///     .read(&mut bed)?;
4103    ///
4104    /// assert!(val.dim() == (50, 6));
4105    /// # use bed_reader::BedErrorPlus;
4106    /// # Ok::<(), Box<BedErrorPlus>>(())
4107    /// ```
4108    #[builder(default = "Index::All")]
4109    #[builder(setter(into))]
4110    sid_index: Index,
4111
4112    /// Sets if the order of the output array is Fortran-style -- Default is true.
4113    ///
4114    /// "Fortran order" is also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4115    ///
4116    /// Also see [`f`](struct.ReadOptionsBuilder.html#method.f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4117    #[builder(default = "true")]
4118    is_f: bool,
4119
4120    /// Sets if allele 1 is counted. Default is true.
4121    ///
4122    /// Also see [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4123    #[builder(default = "true")]
4124    is_a1_counted: bool,
4125
4126    /// Number of threads to use (defaults to all processors)
4127    ///
4128    /// Can also be set with an environment variable.
4129    /// See [Environment Variables](index.html#environment-variables).
4130    ///
4131    /// In this example, we read using only one thread.
4132    /// ```
4133    /// use ndarray as nd;
4134    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4135    /// use bed_reader::assert_eq_nan;
4136    ///
4137    /// let file_name = sample_bed_file("small.bed")?;
4138    /// let mut bed = Bed::new(file_name)?;
4139    /// let val = ReadOptions::builder().num_threads(1).i8().read(&mut bed)?;
4140    ///
4141    /// assert_eq_nan(
4142    ///     &val,
4143    ///     &nd::array![
4144    ///         [1, 0, -127, 0],
4145    ///         [2, 0, -127, 2],
4146    ///         [0, 1, 2, 0]
4147    ///     ],
4148    /// );
4149    /// # use bed_reader::BedErrorPlus;
4150    /// # Ok::<(), Box<BedErrorPlus>>(())
4151    /// ```
4152    #[builder(default, setter(strip_option))]
4153    num_threads: Option<usize>,
4154
4155    // LATER: Allow this to be set with an environment variable.
4156    /// Maximum number of concurrent async requests (defaults to 10) --
4157    /// Used by [`BedCloud`](struct.BedCloud.html).
4158    ///
4159    /// In this example, we read using only request at a time.
4160    /// ```
4161    /// use ndarray as nd;
4162    /// use bed_reader::{BedCloud, ReadOptions};
4163    /// use bed_reader::assert_eq_nan;
4164    ///
4165    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4166    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4167    /// let mut bed_cloud = BedCloud::new(&url).await?;
4168    /// let val = ReadOptions::builder().max_concurrent_requests(1).i8().read_cloud(&mut bed_cloud).await?;
4169    ///
4170    /// assert_eq_nan(
4171    ///     &val,
4172    ///     &nd::array![
4173    ///         [1, 0, -127, 0],
4174    ///         [2, 0, -127, 2],
4175    ///         [0, 1, 2, 0]
4176    ///     ],
4177    /// );
4178    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4179    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4180    #[builder(default, setter(strip_option))]
4181    #[allow(dead_code)]
4182    max_concurrent_requests: Option<usize>,
4183
4184    // LATER: Allow this to be set with an environment variable.
4185    /// Maximum chunk size of async requests (defaults to `8_000_000` bytes) --
4186    /// Used by [`BedCloud`](struct.BedCloud.html).
4187    ///
4188    /// In this example, we read using only `1_000_000` bytes per request.
4189    /// ```
4190    /// use ndarray as nd;
4191    /// use bed_reader::{BedCloud, ReadOptions};
4192    /// use bed_reader::assert_eq_nan;
4193    ///
4194    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4195    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4196    /// let mut bed_cloud = BedCloud::new(&url).await?;
4197    /// let val = ReadOptions::builder().max_chunk_bytes(1_000_000).i8().read_cloud(&mut bed_cloud).await?;
4198    ///
4199    /// assert_eq_nan(
4200    ///     &val,
4201    ///     &nd::array![
4202    ///         [1, 0, -127, 0],
4203    ///         [2, 0, -127, 2],
4204    ///         [0, 1, 2, 0]
4205    ///     ],
4206    /// );
4207    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4208    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4209    /// ```
4210    #[builder(default, setter(strip_option))]
4211    #[allow(dead_code)]
4212    max_chunk_bytes: Option<usize>,
4213}
4214
4215impl<TVal: BedVal> ReadOptions<TVal> {
4216    /// Read genotype data. Supports selection and options.
4217    ///
4218    /// > Also see [`Bed::read`](struct.Bed.html#method.read) (read without options).
4219    /// > To fill a preallocated ndarray, see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
4220    ///
4221    /// See the [Table of `ReadOptions`](index.html#readoptions)
4222    /// for a list of the supported options.
4223    /// See the [Table of Index Expressions](index.html#index-expressions)
4224    /// for a list of expressions for selecting individuals (sample)
4225    /// and SNPs (variants).
4226    ///
4227    /// # Errors
4228    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4229    /// for all possible errors.
4230    ///
4231    /// # Examples
4232    ///
4233    /// ```
4234    /// use ndarray as nd;
4235    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4236    /// use bed_reader::assert_eq_nan;
4237    ///
4238    /// // Read all data from a .bed file into an ndarray of f64.
4239    /// let file_name = sample_bed_file("small.bed")?;
4240    /// let mut bed = Bed::new(file_name)?;
4241    /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4242    ///
4243    /// assert_eq_nan(
4244    ///     &val,
4245    ///     &nd::array![
4246    ///         [1.0, 0.0, f64::NAN, 0.0],
4247    ///         [2.0, 0.0, f64::NAN, 2.0],
4248    ///         [0.0, 1.0, 2.0, 0.0]
4249    ///     ],
4250    /// );
4251    ///
4252    /// // Read the SNPs indexed by 2.
4253    /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
4254    ///
4255    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4256    ///
4257    /// // Read the SNPs indexed by 2, 3, and 4th from last.
4258    /// let val = ReadOptions::builder()
4259    ///     .sid_index([2, 3, -4])
4260    ///     .f64()
4261    ///     .read(&mut bed)?;
4262    ///
4263    /// assert_eq_nan(
4264    ///     &val,
4265    ///     &nd::array![[f64::NAN, 0.0, 1.0], [f64::NAN, 2.0, 2.0], [2.0, 0.0, 0.0]],
4266    /// );
4267    ///
4268    /// //  Read SNPs from 1 (inclusive) to 4 (exclusive).
4269    /// let val = ReadOptions::builder()
4270    ///     .sid_index(1..4)
4271    ///     .f64()
4272    ///     .read(&mut bed)?;
4273    ///
4274    /// assert_eq_nan(
4275    ///     &val,
4276    ///     &nd::array![[0.0, f64::NAN, 0.0], [0.0, f64::NAN, 2.0], [1.0, 2.0, 0.0]],
4277    /// );
4278    ///
4279    /// // Print unique chrom values. Then, read all SNPs in chrom 5.
4280    /// use std::collections::HashSet;
4281    ///
4282    /// println!("{:?}", bed.chromosome()?.iter().collect::<HashSet<_>>());
4283    /// // This outputs: {"1", "5", "Y"}.
4284    /// let val = ReadOptions::builder()
4285    ///     .sid_index(bed.chromosome()?.map(|elem| elem == "5"))
4286    ///     .f64()
4287    ///     .read(&mut bed)?;
4288    ///
4289    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4290    ///
4291    /// // Read 1st individual (across all SNPs).
4292    /// let val = ReadOptions::builder().iid_index(0).f64().read(&mut bed)?;
4293    /// assert_eq_nan(&val, &nd::array![[1.0, 0.0, f64::NAN, 0.0]]);
4294    ///
4295    /// // Read every 2nd individual.
4296    /// use ndarray::s;
4297    ///
4298    /// let val = ReadOptions::builder()
4299    ///     .iid_index(s![..;2])
4300    ///     .f64()
4301    ///     .read(&mut bed)?;
4302    /// assert_eq_nan(
4303    ///     &val,
4304    ///     &nd::array![[1.0, 0.0, f64::NAN, 0.0], [0.0, 1.0, 2.0, 0.0]],
4305    /// );
4306    ///
4307    /// // Read last and 2nd-to-last individuals and the last SNP
4308    /// let val = ReadOptions::builder()
4309    ///     .iid_index([-1,-2])
4310    ///     .sid_index(-1)
4311    ///     .f64()
4312    ///     .read(&mut bed)?;
4313    ///
4314    /// assert_eq_nan(&val, &nd::array![[0.0],[2.0]]);
4315    ///
4316    /// // The output array can be f32, f64, or i8
4317    /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4318    ///
4319    /// assert_eq_nan(
4320    ///     &val,
4321    ///     &nd::array![
4322    ///         [1, 0, -127, 0],
4323    ///         [2, 0, -127, 2],
4324    ///         [0, 1, 2, 0]
4325    ///     ],
4326    /// );
4327    /// # use bed_reader::BedErrorPlus;
4328    /// # Ok::<(), Box<BedErrorPlus>>(())
4329    /// ```
4330    #[must_use]
4331    pub fn builder() -> ReadOptionsBuilder<TVal> {
4332        ReadOptionsBuilder::default()
4333    }
4334
4335    /// Value to be used for missing values (defaults to -127 or NaN).
4336    ///
4337    /// # Example
4338    /// ```
4339    /// use ndarray as nd;
4340    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4341    /// use bed_reader::assert_eq_nan;
4342    ///
4343    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4344    /// assert_eq!(read_options.missing_value(), -127);
4345    ///
4346    /// let file_name = sample_bed_file("small.bed")?;
4347    /// let mut bed = Bed::new(file_name)?;
4348    /// let val = bed.read_with_options(&read_options)?;
4349    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4350    /// # use bed_reader::BedErrorPlus;
4351    /// # Ok::<(), Box<BedErrorPlus>>(())
4352    /// ```
4353    pub fn missing_value(&self) -> TVal {
4354        self.missing_value
4355    }
4356
4357    /// Index of individuals (samples) to read (defaults to all).
4358    ///
4359    /// # Example
4360    /// ```
4361    /// use ndarray as nd;
4362    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4363    /// use bed_reader::assert_eq_nan;
4364    ///
4365    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4366    /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4367    /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4368    ///
4369    /// let file_name = sample_bed_file("small.bed")?;
4370    /// let mut bed = Bed::new(file_name)?;
4371    /// let val = bed.read_with_options(&read_options)?;
4372    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4373    /// # use bed_reader::BedErrorPlus;
4374    /// # Ok::<(), Box<BedErrorPlus>>(())
4375    /// ```
4376    pub fn iid_index(&self) -> &Index {
4377        &self.iid_index
4378    }
4379
4380    /// Index of SNPs (variants) to read (defaults to all).
4381    ///
4382    /// # Example
4383    /// ```
4384    /// use ndarray as nd;
4385    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4386    /// use bed_reader::assert_eq_nan;
4387    ///
4388    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4389    /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4390    /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4391    ///
4392    /// let file_name = sample_bed_file("small.bed")?;
4393    /// let mut bed = Bed::new(file_name)?;
4394    /// let val = bed.read_with_options(&read_options)?;
4395    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4396    /// # use bed_reader::BedErrorPlus;
4397    /// # Ok::<(), Box<BedErrorPlus>>(())
4398    /// ```
4399    pub fn sid_index(&self) -> &Index {
4400        &self.sid_index
4401    }
4402
4403    /// Is the order of the output array Fortran-style (defaults to true).
4404    ///
4405    /// # Example
4406    /// ```
4407    /// use ndarray as nd;
4408    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4409    /// use bed_reader::assert_eq_nan;
4410    ///
4411    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4412    /// assert_eq!(read_options.is_f(), true);
4413    ///
4414    /// let file_name = sample_bed_file("small.bed")?;
4415    /// let mut bed = Bed::new(file_name)?;
4416    /// let val = bed.read_with_options(&read_options)?;
4417    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4418    /// # use bed_reader::BedErrorPlus;
4419    /// # Ok::<(), Box<BedErrorPlus>>(())
4420    /// ```
4421    pub fn is_f(&self) -> bool {
4422        self.is_f
4423    }
4424
4425    /// If allele 1 will be counted (defaults to true).
4426    ///
4427    /// # Example
4428    /// ```
4429    /// use ndarray as nd;
4430    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4431    /// use bed_reader::assert_eq_nan;
4432    ///
4433    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4434    /// assert_eq!(read_options.is_a1_counted(), true);
4435    ///
4436    /// let file_name = sample_bed_file("small.bed")?;
4437    /// let mut bed = Bed::new(file_name)?;
4438    /// let val = bed.read_with_options(&read_options)?;
4439    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4440    /// # use bed_reader::BedErrorPlus;
4441    /// # Ok::<(), Box<BedErrorPlus>>(())
4442    /// ```
4443    pub fn is_a1_counted(&self) -> bool {
4444        self.is_a1_counted
4445    }
4446
4447    /// Number of threads to be used (`None` means set with
4448    /// [Environment Variables](index.html#environment-variables) or use all processors).
4449    ///
4450    /// # Example
4451    /// ```
4452    /// use ndarray as nd;
4453    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4454    /// use bed_reader::assert_eq_nan;
4455    ///
4456    /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4457    /// assert_eq!(read_options.num_threads(), None);
4458    ///
4459    /// let file_name = sample_bed_file("small.bed")?;
4460    /// let mut bed = Bed::new(file_name)?;
4461    /// let val = bed.read_with_options(&read_options)?;
4462    /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4463    /// # use bed_reader::BedErrorPlus;
4464    /// # Ok::<(), Box<BedErrorPlus>>(())
4465    /// ```
4466    pub fn num_threads(&self) -> Option<usize> {
4467        self.num_threads
4468    }
4469}
4470
4471impl<TVal: BedVal> ReadOptionsBuilder<TVal> {
4472    /// > See [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) for details and examples.
4473    pub fn read(&self, bed: &mut Bed) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4474        let read_options = self.build()?;
4475        bed.read_with_options(&read_options)
4476    }
4477
4478    /// Read genotype data from the cloud.
4479    ///
4480    /// > Also see
4481    /// > [`BedCloud::read_with_options`](struct.BedCloud.html#method.read_with_options).
4482    ///
4483    /// # Errors
4484    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4485    /// for all possible errors.
4486    ///
4487    /// # Example
4488    ///
4489    /// ```
4490    /// use ndarray as nd;
4491    /// use bed_reader::{BedCloud, ReadOptions};
4492    /// use bed_reader::assert_eq_nan;
4493    ///
4494    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4495    /// // Read the SNPs indexed by 2.
4496    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4497    /// let mut bed_cloud = BedCloud::new(&url).await?;
4498    /// let mut val = ReadOptions::builder()
4499    ///     .sid_index(2)
4500    ///     .read_cloud(&mut bed_cloud).await?;
4501    ///
4502    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4503    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4504    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4505    /// ```
4506    pub async fn read_cloud(
4507        &self,
4508        bed_cloud: &mut BedCloud,
4509    ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4510        let read_options = self.build()?;
4511        bed_cloud.read_with_options(&read_options).await
4512    }
4513
4514    /// Read genotype data into a preallocated array.
4515    ///
4516    /// > Also see [`Bed::read_and_fill`](struct.Bed.html#method.read_and_fill) and
4517    /// > [`Bed::read_and_fill_with_options`](struct.Bed.html#method.read_and_fill_with_options).
4518    ///
4519    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4520    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4521    /// are ignored. Instead, the order of the preallocated array is used.
4522    ///
4523    /// # Errors
4524    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4525    /// for all possible errors.
4526    ///
4527    /// # Example
4528    ///
4529    /// ```
4530    /// use ndarray as nd;
4531    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4532    /// use bed_reader::assert_eq_nan;
4533    ///
4534    /// // Read the SNPs indexed by 2.
4535    /// let file_name = sample_bed_file("small.bed")?;
4536    /// let mut bed = Bed::new(file_name)?;
4537    /// let mut val = nd::Array2::<f64>::default((3, 1));
4538    /// ReadOptions::builder()
4539    ///     .sid_index(2)
4540    ///     .read_and_fill(&mut bed, &mut val.view_mut())?;
4541    ///
4542    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4543    /// # use bed_reader::BedErrorPlus;
4544    /// # Ok::<(), Box<BedErrorPlus>>(())
4545    /// ```
4546    pub fn read_and_fill(
4547        &self,
4548        bed: &mut Bed,
4549        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4550    ) -> Result<(), Box<BedErrorPlus>> {
4551        let read_options = self.build()?;
4552        bed.read_and_fill_with_options(val, &read_options)
4553    }
4554
4555    /// Read genotype data from the cloud into a preallocated array.
4556    ///
4557    /// > Also see [`BedCloud::read_and_fill`](struct.BedCloud.html#method.read_and_fill) and
4558    /// > [`BedCloud::read_and_fill_with_options`](struct.BedCloud.html#method.read_and_fill_with_options).
4559    ///
4560    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4561    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4562    /// are ignored. Instead, the order of the preallocated array is used.
4563    ///
4564    /// # Errors
4565    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4566    /// for all possible errors.
4567    ///
4568    /// # Example
4569    ///
4570    /// ```
4571    /// use ndarray as nd;
4572    /// use bed_reader::{BedCloud, ReadOptions};
4573    /// use bed_reader::assert_eq_nan;
4574    ///
4575    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4576    /// // Read the SNPs indexed by 2.
4577    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4578    /// let mut bed_cloud = BedCloud::new(&url).await?;
4579    /// let mut val = nd::Array2::<f64>::default((3, 1));
4580    /// ReadOptions::builder()
4581    ///     .sid_index(2)
4582    ///     .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()).await?;
4583    ///
4584    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4585    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4586    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4587    /// ```    
4588    pub async fn read_and_fill_cloud(
4589        &self,
4590        bed_cloud: &mut BedCloud,
4591        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4592    ) -> Result<(), Box<BedErrorPlus>> {
4593        let read_options = self.build()?;
4594        bed_cloud
4595            .read_and_fill_with_options(val, &read_options)
4596            .await
4597    }
4598
4599    /// Order of the output array, Fortran-style (default)
4600    ///
4601    /// Also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4602    ///
4603    /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4604    pub fn f(&mut self) -> &mut Self {
4605        self.is_f(true);
4606        self
4607    }
4608
4609    /// Order of the output array, C (default)
4610    ///
4611    /// Also called "row-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4612    ///
4613    /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`f`](struct.ReadOptionsBuilder.html#method.f).
4614    pub fn c(&mut self) -> &mut Self {
4615        self.is_f(false);
4616        self
4617    }
4618
4619    /// Count the number allele 1 (default and PLINK standard).
4620    ///
4621    /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4622    ///
4623    /// # Example:
4624    /// ```
4625    /// use ndarray as nd;
4626    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4627    /// use bed_reader::assert_eq_nan;
4628    ///
4629    /// let file_name = sample_bed_file("small.bed")?;
4630    /// let mut bed = Bed::new(file_name)?;
4631    /// let val = ReadOptions::builder().count_a1().i8().read(&mut bed)?;
4632    ///
4633    /// assert_eq_nan(
4634    ///     &val,
4635    ///     &nd::array![
4636    ///         [1, 0, -127, 0],
4637    ///         [2, 0, -127, 2],
4638    ///         [0, 1, 2, 0]
4639    ///     ],
4640    /// );
4641    /// # use bed_reader::BedErrorPlus;
4642    /// # Ok::<(), Box<BedErrorPlus>>(())
4643    /// ```
4644    pub fn count_a1(&mut self) -> &mut Self {
4645        self.is_a1_counted = Some(true);
4646        self
4647    }
4648
4649    /// Count the number allele 2.
4650    ///
4651    /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1).
4652    ///
4653    /// # Example:
4654    /// ```
4655    /// use ndarray as nd;
4656    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4657    /// use bed_reader::assert_eq_nan;
4658    ///
4659    /// let file_name = sample_bed_file("small.bed")?;
4660    /// let mut bed = Bed::new(file_name)?;
4661    /// let val = ReadOptions::builder().count_a2().i8().read(&mut bed)?;
4662    ///
4663    /// assert_eq_nan(
4664    ///     &val,
4665    ///     &nd::array![
4666    ///         [1, 2, -127, 2],
4667    ///         [0, 2, -127, 0],
4668    ///         [2, 1, 0, 2]
4669    ///     ],
4670    /// );
4671    /// # use bed_reader::BedErrorPlus;
4672    /// # Ok::<(), Box<BedErrorPlus>>(())
4673    /// ```
4674    pub fn count_a2(&mut self) -> &mut Self {
4675        self.is_a1_counted = Some(false);
4676        self
4677    }
4678}
4679
4680impl ReadOptionsBuilder<i8> {
4681    /// Output an ndarray of i8.
4682    ///
4683    /// # Example:
4684    /// ```
4685    /// use ndarray as nd;
4686    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4687    /// use bed_reader::assert_eq_nan;
4688    ///
4689    /// let file_name = sample_bed_file("small.bed")?;
4690    /// let mut bed = Bed::new(file_name)?;
4691    /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4692    ///
4693    /// assert_eq_nan(
4694    ///     &val,
4695    ///     &nd::array![
4696    ///         [1, 0, -127, 0],
4697    ///         [2, 0, -127, 2],
4698    ///         [0, 1, 2, 0]
4699    ///     ],
4700    /// );
4701    /// # use bed_reader::BedErrorPlus;
4702    /// # Ok::<(), Box<BedErrorPlus>>(())
4703    /// ```
4704    pub fn i8(&mut self) -> &mut Self {
4705        self
4706    }
4707}
4708
4709impl ReadOptionsBuilder<f32> {
4710    /// Output an ndarray of f32.
4711    ///
4712    /// # Example:
4713    /// ```
4714    /// use ndarray as nd;
4715    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4716    /// use bed_reader::assert_eq_nan;
4717    ///
4718    /// let file_name = sample_bed_file("small.bed")?;
4719    /// let mut bed = Bed::new(file_name)?;
4720    /// let val = ReadOptions::builder().f32().read(&mut bed)?;
4721    ///
4722    /// assert_eq_nan(
4723    ///     &val,
4724    ///     &nd::array![
4725    ///         [1.0, 0.0, f32::NAN, 0.0],
4726    ///         [2.0, 0.0, f32::NAN, 2.0],
4727    ///         [0.0, 1.0, 2.0, 0.0]
4728    ///     ],
4729    /// );
4730    /// # use bed_reader::BedErrorPlus;
4731    /// # Ok::<(), Box<BedErrorPlus>>(())
4732    /// ```    
4733    pub fn f32(&mut self) -> &mut Self {
4734        self
4735    }
4736}
4737
4738impl ReadOptionsBuilder<f64> {
4739    /// Output an ndarray of f64.
4740    ///
4741    /// # Example:
4742    /// ```
4743    /// use ndarray as nd;
4744    /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4745    /// use bed_reader::assert_eq_nan;
4746    ///
4747    /// let file_name = sample_bed_file("small.bed")?;
4748    /// let mut bed = Bed::new(file_name)?;
4749    /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4750    ///
4751    /// assert_eq_nan(
4752    ///     &val,
4753    ///     &nd::array![
4754    ///         [1.0, 0.0, f64::NAN, 0.0],
4755    ///         [2.0, 0.0, f64::NAN, 2.0],
4756    ///         [0.0, 1.0, 2.0, 0.0]
4757    ///     ],
4758    /// );
4759    /// # use bed_reader::BedErrorPlus;
4760    /// # Ok::<(), Box<BedErrorPlus>>(())
4761    /// ```    
4762    pub fn f64(&mut self) -> &mut Self {
4763        self
4764    }
4765}
4766
4767/// Represents options for writing genotype data and metadata to a PLINK .bed file.
4768///
4769/// Construct with [`WriteOptions::builder`](struct.WriteOptions.html#method.builder).
4770#[derive(Clone, Debug, Builder)]
4771#[builder(build_fn(skip))]
4772pub struct WriteOptions<TVal>
4773where
4774    TVal: BedVal,
4775{
4776    #[builder(setter(custom))]
4777    path: PathBuf,
4778
4779    #[builder(setter(custom))]
4780    fam_path: PathBuf,
4781
4782    #[builder(setter(custom))]
4783    bim_path: PathBuf,
4784
4785    #[builder(setter(custom))]
4786    metadata: Metadata,
4787
4788    #[builder(setter(custom), default = "true")]
4789    is_a1_counted: bool,
4790
4791    #[builder(default, setter(custom))]
4792    num_threads: Option<usize>,
4793
4794    #[builder(default = "TVal::missing()", setter(custom))]
4795    missing_value: TVal,
4796
4797    #[builder(setter(custom), default = "false")]
4798    skip_fam: bool,
4799
4800    #[builder(setter(custom), default = "false")]
4801    skip_bim: bool,
4802}
4803
4804impl<TVal> WriteOptions<TVal>
4805where
4806    TVal: BedVal,
4807{
4808    /// Write values to a file in PLINK .bed format. Supports metadata and options.
4809    ///
4810    /// > Also see [`Bed::write`](struct.Bed.html#method.write), which does not support metadata or options.
4811    ///
4812    /// The options, [listed here](struct.WriteOptionsBuilder.html#implementations), can specify the:
4813    ///  * items of metadata, for example the individual ids or the SNP ids
4814    ///  * a non-default path for the .fam and/or .bim files
4815    ///  * a non-default value that represents missing data
4816    ///  * whether the first allele is counted (default) or the second
4817    ///  * number of threads to use for writing
4818    ///  * a [`Metadata`](struct.Metadata.html)
4819    ///
4820    /// # Examples
4821    /// In this example, all metadata is given one item at a time.
4822    /// ```
4823    /// use ndarray as nd;
4824    /// use bed_reader::{Bed, WriteOptions};
4825    ///
4826    /// let output_folder = temp_testdir::TempDir::default();
4827    /// let output_file = output_folder.join("small.bed");
4828    /// let val = nd::array![
4829    ///     [1.0, 0.0, f64::NAN, 0.0],
4830    ///     [2.0, 0.0, f64::NAN, 2.0],
4831    ///     [0.0, 1.0, 2.0, 0.0]
4832    /// ];
4833    /// WriteOptions::builder(output_file)
4834    ///     .fid(["fid1", "fid1", "fid2"])
4835    ///     .iid(["iid1", "iid2", "iid3"])
4836    ///     .father(["iid23", "iid23", "iid22"])
4837    ///     .mother(["iid34", "iid34", "iid33"])
4838    ///     .sex([1, 2, 0])
4839    ///     .pheno(["red", "red", "blue"])
4840    ///     .chromosome(["1", "1", "5", "Y"])
4841    ///     .sid(["sid1", "sid2", "sid3", "sid4"])
4842    ///     .cm_position([100.4, 2000.5, 4000.7, 7000.9])
4843    ///     .bp_position([1, 100, 1000, 1004])
4844    ///     .allele_1(["A", "T", "A", "T"])
4845    ///     .allele_2(["A", "C", "C", "G"])
4846    ///     .write(&val)?;
4847    /// # use bed_reader::BedErrorPlus;
4848    /// # Ok::<(), Box<BedErrorPlus>>(())
4849    /// ```
4850    /// Here, no metadata is given, so default values are assigned.
4851    /// If we then read the new file and list the chromosome property,
4852    /// it is an array of zeros, the default chromosome value.
4853    /// ```
4854    /// # use ndarray as nd;
4855    /// # use bed_reader::{Bed, WriteOptions};
4856    /// # let output_folder = temp_testdir::TempDir::default();
4857    /// let output_file2 = output_folder.join("small2.bed");
4858    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
4859    ///
4860    /// WriteOptions::builder(&output_file2).write(&val)?;
4861    ///
4862    /// let mut bed2 = Bed::new(&output_file2)?;
4863    /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
4864    /// # use bed_reader::BedErrorPlus;
4865    /// # Ok::<(), Box<BedErrorPlus>>(())
4866    /// ```
4867    #[anyinput]
4868    pub fn builder(path: AnyPath) -> WriteOptionsBuilder<TVal> {
4869        WriteOptionsBuilder::new(path)
4870    }
4871
4872    /// Family id of each of individual (sample). Defaults to "0"'s
4873    ///
4874    /// # Example
4875    /// ```
4876    /// use ndarray as nd;
4877    /// use bed_reader::{WriteOptions};
4878    /// let output_folder = temp_testdir::TempDir::default();
4879    /// let output_file = output_folder.join("small.bed");
4880    /// let write_options = WriteOptions::builder(output_file)
4881    ///     .f64()
4882    ///     .iid(["i1", "i2", "i3"])
4883    ///     .sid(["s1", "s2", "s3", "s4"])
4884    ///     .build(3, 4)?;
4885    ///
4886    /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
4887    /// # use bed_reader::BedErrorPlus;
4888    /// # Ok::<(), Box<BedErrorPlus>>(())
4889    /// ```
4890    pub fn fid(&self) -> &nd::Array1<String> {
4891        // unwrap always works because the WriteOptions constructor fills all metadata.
4892        self.metadata.fid.as_ref().unwrap()
4893    }
4894
4895    /// Individual id of each of individual (sample). Defaults to "iid1", "iid2" ...
4896    ///
4897    /// # Example
4898    /// ```
4899    /// use ndarray as nd;
4900    /// use bed_reader::{Bed, WriteOptions};
4901    /// let output_folder = temp_testdir::TempDir::default();
4902    /// let output_file = output_folder.join("small.bed");
4903    /// let write_options = WriteOptions::builder(output_file)
4904    ///     .f64()
4905    ///     .iid(["i1", "i2", "i3"])
4906    ///     .sid(["s1", "s2", "s3", "s4"])
4907    ///     .build(3, 4)?;
4908    ///
4909    /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
4910    ///
4911    /// let val = nd::array![
4912    ///     [1.0, 0.0, f64::NAN, 0.0],
4913    ///     [2.0, 0.0, f64::NAN, 2.0],
4914    ///     [0.0, 1.0, 2.0, 0.0]
4915    /// ];
4916    /// Bed::write_with_options(&val, &write_options)?;
4917    /// # use bed_reader::BedErrorPlus;
4918    /// # Ok::<(), Box<BedErrorPlus>>(())
4919    /// ```
4920    pub fn iid(&self) -> &nd::Array1<String> {
4921        // unwrap always works because the WriteOptions constructor fills all metadata.
4922        self.metadata.iid.as_ref().unwrap()
4923    }
4924
4925    ///  Father id of each of individual (sample). Defaults to "0"'s
4926    ///
4927    /// # Example
4928    /// ```
4929    /// use ndarray as nd;
4930    /// use bed_reader::WriteOptions;
4931    /// let output_folder = temp_testdir::TempDir::default();
4932    /// let output_file = output_folder.join("small.bed");
4933    /// let write_options = WriteOptions::builder(output_file)
4934    ///     .f64()
4935    ///     .iid(["i1", "i2", "i3"])
4936    ///     .sid(["s1", "s2", "s3", "s4"])
4937    ///     .build(3, 4)?;
4938    ///
4939    /// println!("{0:?}", write_options.father()); // Outputs ndarray ["0", "0", "0"]
4940    /// # use bed_reader::BedErrorPlus;
4941    /// # Ok::<(), Box<BedErrorPlus>>(())
4942    /// ```
4943    pub fn father(&self) -> &nd::Array1<String> {
4944        // unwrap always works because the WriteOptions constructor fills all metadata.
4945        self.metadata.father.as_ref().unwrap()
4946    }
4947
4948    ///  Mother id of each of individual (sample). Defaults to "0"'s
4949    ///
4950    /// # Example
4951    /// ```
4952    /// use ndarray as nd;
4953    /// use bed_reader::WriteOptions;
4954    /// let output_folder = temp_testdir::TempDir::default();
4955    /// let output_file = output_folder.join("small.bed");
4956    /// let write_options = WriteOptions::builder(output_file)
4957    ///     .f64()
4958    ///     .iid(["i1", "i2", "i3"])
4959    ///     .sid(["s1", "s2", "s3", "s4"])
4960    ///     .build(3, 4)?;
4961    ///
4962    /// println!("{0:?}", write_options.mother()); // Outputs ndarray ["0", "0", "0"]
4963    /// # use bed_reader::BedErrorPlus;
4964    /// # Ok::<(), Box<BedErrorPlus>>(())
4965    /// ```
4966    pub fn mother(&self) -> &nd::Array1<String> {
4967        // unwrap always works because the WriteOptions constructor fills all metadata.
4968        self.metadata.mother.as_ref().unwrap()
4969    }
4970
4971    ///  Sex of each of individual (sample). Defaults to 0's
4972    ///
4973    /// 0 is unknown, 1 is male, 2 is female
4974    ///
4975    /// # Example
4976    /// ```
4977    /// use ndarray as nd;
4978    /// use bed_reader::WriteOptions;
4979    /// let output_folder = temp_testdir::TempDir::default();
4980    /// let output_file = output_folder.join("small.bed");
4981    /// let write_options = WriteOptions::builder(output_file)
4982    ///     .f64()
4983    ///     .iid(["i1", "i2", "i3"])
4984    ///     .sid(["s1", "s2", "s3", "s4"])
4985    ///     .build(3, 4)?;
4986    ///
4987    /// println!("{0:?}", write_options.sex()); // Outputs ndarray [0, 0, 0]
4988    /// # use bed_reader::BedErrorPlus;
4989    /// # Ok::<(), Box<BedErrorPlus>>(())
4990    /// ```
4991    pub fn sex(&self) -> &nd::Array1<i32> {
4992        // unwrap always works because the WriteOptions constructor fills all metadata.
4993        self.metadata.sex.as_ref().unwrap()
4994    }
4995
4996    ///  Phenotype of each of individual (sample). Seldom used. Defaults to 0's
4997    ///
4998    /// # Example
4999    /// ```
5000    /// use ndarray as nd;
5001    /// use bed_reader::WriteOptions;
5002    /// let output_folder = temp_testdir::TempDir::default();
5003    /// let output_file = output_folder.join("small.bed");
5004    /// let write_options = WriteOptions::builder(output_file)
5005    ///     .f64()
5006    ///     .iid(["i1", "i2", "i3"])
5007    ///     .sid(["s1", "s2", "s3", "s4"])
5008    ///     .build(3, 4)?;
5009    ///
5010    /// println!("{0:?}", write_options.pheno()); // Outputs ndarray ["0", "0", "0"]
5011    /// # use bed_reader::BedErrorPlus;
5012    /// # Ok::<(), Box<BedErrorPlus>>(())
5013    /// ```
5014    pub fn pheno(&self) -> &nd::Array1<String> {
5015        // unwrap always works because the WriteOptions constructor fills all metadata.
5016        self.metadata.pheno.as_ref().unwrap()
5017    }
5018
5019    ///  Chromosome of each of SNP (variant). Defaults to "0"'s
5020    ///
5021    /// # Example
5022    /// ```
5023    /// use ndarray as nd;
5024    /// use bed_reader::WriteOptions;
5025    /// let output_folder = temp_testdir::TempDir::default();
5026    /// let output_file = output_folder.join("small.bed");
5027    /// let write_options = WriteOptions::builder(output_file)
5028    ///     .f64()
5029    ///     .iid(["i1", "i2", "i3"])
5030    ///     .sid(["s1", "s2", "s3", "s4"])
5031    ///     .build(3, 4)?;
5032    ///
5033    /// println!("{0:?}", write_options.chromosome()); // Outputs ndarray ["0", "0", "0", "0"]
5034    /// # use bed_reader::BedErrorPlus;
5035    /// # Ok::<(), Box<BedErrorPlus>>(())
5036    /// ```
5037    pub fn chromosome(&self) -> &nd::Array1<String> {
5038        // unwrap always works because the WriteOptions constructor fills all metadata.
5039        self.metadata.chromosome.as_ref().unwrap()
5040    }
5041
5042    ///  SNP id of each of SNP (variant). Defaults to "sid1", "sid2", ...
5043    ///
5044    /// # Example
5045    /// ```
5046    /// use ndarray as nd;
5047    /// use bed_reader::{Bed, WriteOptions};
5048    /// let output_folder = temp_testdir::TempDir::default();
5049    /// let output_file = output_folder.join("small.bed");
5050    /// let write_options = WriteOptions::builder(output_file)
5051    ///     .f64()
5052    ///     .iid(["i1", "i2", "i3"])
5053    ///     .sid(["s1", "s2", "s3", "s4"])
5054    ///     .build(3, 4)?;
5055    ///
5056    /// println!("{0:?}", write_options.sid()); // Outputs ndarray ["s1", "s2", "s3", "s4"]
5057    ///
5058    /// let val = nd::array![
5059    ///     [1.0, 0.0, f64::NAN, 0.0],
5060    ///     [2.0, 0.0, f64::NAN, 2.0],
5061    ///     [0.0, 1.0, 2.0, 0.0]
5062    /// ];
5063    /// Bed::write_with_options(&val, &write_options)?;
5064    /// # use bed_reader::BedErrorPlus;
5065    /// # Ok::<(), Box<BedErrorPlus>>(())
5066    /// ```
5067    pub fn sid(&self) -> &nd::Array1<String> {
5068        // unwrap always works because the WriteOptions constructor fills all metadata.
5069        self.metadata.sid.as_ref().unwrap()
5070    }
5071
5072    /// Centimorgan position of each SNP (variant). Defaults to 0.0's.
5073    ///
5074    /// # Example
5075    /// ```
5076    /// use ndarray as nd;
5077    /// use bed_reader::WriteOptions;
5078    /// let output_folder = temp_testdir::TempDir::default();
5079    /// let output_file = output_folder.join("small.bed");
5080    /// let write_options = WriteOptions::builder(output_file)
5081    ///     .f64()
5082    ///     .iid(["i1", "i2", "i3"])
5083    ///     .sid(["s1", "s2", "s3", "s4"])
5084    ///     .build(3, 4)?;
5085    ///
5086    /// println!("{0:?}", write_options.cm_position()); // Outputs ndarray [0.0, 0.0, 0.0, 0.0]
5087    /// # use bed_reader::BedErrorPlus;
5088    /// # Ok::<(), Box<BedErrorPlus>>(())
5089    /// ```
5090    pub fn cm_position(&self) -> &nd::Array1<f32> {
5091        // unwrap always works because the WriteOptions constructor fills all metadata.
5092        self.metadata.cm_position.as_ref().unwrap()
5093    }
5094
5095    /// Base-pair position of each SNP (variant). Defaults to 0's.
5096    ///
5097    /// # Example
5098    /// ```
5099    /// use ndarray as nd;
5100    /// use bed_reader::{Bed, WriteOptions};
5101    /// let output_folder = temp_testdir::TempDir::default();
5102    /// let output_file = output_folder.join("small.bed");
5103    /// let write_options = WriteOptions::builder(output_file)
5104    ///     .f64()
5105    ///     .iid(["i1", "i2", "i3"])
5106    ///     .sid(["s1", "s2", "s3", "s4"])
5107    ///     .build(3, 4)?;
5108    ///
5109    /// println!("{0:?}", write_options.bp_position()); // Outputs ndarray [0, 0, 0, 0]
5110    /// # use bed_reader::BedErrorPlus;
5111    /// # Ok::<(), Box<BedErrorPlus>>(())
5112    /// ```
5113    pub fn bp_position(&self) -> &nd::Array1<i32> {
5114        // unwrap always works because the WriteOptions constructor fills all metadata.
5115        self.metadata.bp_position.as_ref().unwrap()
5116    }
5117
5118    /// First allele of each SNP (variant). Defaults to "A1"
5119    ///
5120    /// # Example
5121    /// ```
5122    /// use ndarray as nd;
5123    /// use bed_reader::{Bed, WriteOptions};
5124    /// let output_folder = temp_testdir::TempDir::default();
5125    /// let output_file = output_folder.join("small.bed");
5126    /// let write_options = WriteOptions::builder(output_file)
5127    ///     .f64()
5128    ///     .iid(["i1", "i2", "i3"])
5129    ///     .sid(["s1", "s2", "s3", "s4"])
5130    ///     .build(3, 4)?;
5131    ///
5132    /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5133    /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5134    /// # use bed_reader::BedErrorPlus;
5135    /// # Ok::<(), Box<BedErrorPlus>>(())
5136    /// ```
5137    pub fn allele_1(&self) -> &nd::Array1<String> {
5138        // unwrap always works because the WriteOptions constructor fills all metadata.
5139        self.metadata.allele_1.as_ref().unwrap()
5140    }
5141
5142    /// Second allele of each SNP (variant). Defaults to "A2"
5143    ///
5144    /// # Example
5145    /// ```
5146    /// use ndarray as nd;
5147    /// use bed_reader::{Bed, WriteOptions};
5148    /// let output_folder = temp_testdir::TempDir::default();
5149    /// let output_file = output_folder.join("small.bed");
5150    /// let write_options = WriteOptions::builder(output_file)
5151    ///     .f64()
5152    ///     .iid(["i1", "i2", "i3"])
5153    ///     .sid(["s1", "s2", "s3", "s4"])
5154    ///     .build(3, 4)?;
5155    ///
5156    /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5157    /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5158    /// # use bed_reader::BedErrorPlus;
5159    /// # Ok::<(), Box<BedErrorPlus>>(())
5160    /// ```
5161    pub fn allele_2(&self) -> &nd::Array1<String> {
5162        // unwrap always works because the WriteOptions constructor fills all metadata.
5163        self.metadata.allele_2.as_ref().unwrap()
5164    }
5165
5166    /// [`Metadata`](struct.Metadata.html) for this [`WriteOptions`](struct.WriteOptions.html), for example, the individual (sample) Ids.
5167    ///
5168    /// This returns a struct with 12 fields. Each field is a ndarray.
5169    /// The struct will always be new, but the 12 ndarrays will be
5170    /// shared with this [`WriteOptions`](struct.WriteOptions.html).
5171    ///
5172    /// If the needed, default values will be used.
5173    ///
5174    /// # Example
5175    /// ```
5176    /// use ndarray as nd;
5177    /// use bed_reader::{Bed, WriteOptions};
5178    /// let output_folder = temp_testdir::TempDir::default();
5179    /// let output_file = output_folder.join("small.bed");
5180    /// let write_options = WriteOptions::builder(output_file)
5181    ///     .f64()
5182    ///     .iid(["i1", "i2", "i3"])
5183    ///     .sid(["s1", "s2", "s3", "s4"])
5184    ///     .build(3, 4)?;
5185    ///
5186    /// let metadata = write_options.metadata();
5187    /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"])
5188    /// # use bed_reader::BedErrorPlus;
5189    /// # Ok::<(), Box<BedErrorPlus>>(())
5190    /// ```
5191    pub fn metadata(&self) -> Metadata {
5192        self.metadata.clone()
5193    }
5194
5195    /// The number of individuals (samples)
5196    ///
5197    /// # Example
5198    /// ```
5199    /// use ndarray as nd;
5200    /// use bed_reader::{Bed, WriteOptions};
5201    /// let output_folder = temp_testdir::TempDir::default();
5202    /// let output_file = output_folder.join("small.bed");
5203    /// let write_options = WriteOptions::builder(output_file)
5204    ///     .f64()
5205    ///     .iid(["i1", "i2", "i3"])
5206    ///     .sid(["s1", "s2", "s3", "s4"])
5207    ///     .build(3, 4)?;
5208    ///
5209    /// assert_eq!(write_options.iid_count(), 3);
5210    /// assert_eq!(write_options.sid_count(), 4);
5211    /// # use bed_reader::BedErrorPlus;
5212    /// # Ok::<(), Box<BedErrorPlus>>(())
5213    /// ```
5214    pub fn iid_count(&self) -> usize {
5215        self.iid().len()
5216    }
5217
5218    /// The number of SNPs (variants)
5219    ///
5220    /// # Example
5221    /// ```
5222    /// use ndarray as nd;
5223    /// use bed_reader::{Bed, WriteOptions};
5224    /// let output_folder = temp_testdir::TempDir::default();
5225    /// let output_file = output_folder.join("small.bed");
5226    /// let write_options = WriteOptions::builder(output_file)
5227    ///     .f64()
5228    ///     .iid(["i1", "i2", "i3"])
5229    ///     .sid(["s1", "s2", "s3", "s4"])
5230    ///     .build(3, 4)?;
5231    ///
5232    /// assert_eq!(write_options.iid_count(), 3);
5233    /// assert_eq!(write_options.sid_count(), 4);
5234    /// # use bed_reader::BedErrorPlus;
5235    /// # Ok::<(), Box<BedErrorPlus>>(())
5236    /// ```
5237    pub fn sid_count(&self) -> usize {
5238        self.sid().len()
5239    }
5240
5241    /// Number of individuals (samples) and SNPs (variants)
5242    ///
5243    /// # Example
5244    /// ```
5245    /// use ndarray as nd;
5246    /// use bed_reader::{Bed, WriteOptions};
5247    /// let output_folder = temp_testdir::TempDir::default();
5248    /// let output_file = output_folder.join("small.bed");
5249    /// let write_options = WriteOptions::builder(output_file)
5250    ///     .f64()
5251    ///     .iid(["i1", "i2", "i3"])
5252    ///     .sid(["s1", "s2", "s3", "s4"])
5253    ///     .build(3, 4)?;
5254    ///
5255    /// assert_eq!(write_options.dim(), (3, 4));
5256    /// # use bed_reader::BedErrorPlus;
5257    /// # Ok::<(), Box<BedErrorPlus>>(())
5258    /// ```
5259    pub fn dim(&self) -> (usize, usize) {
5260        (self.iid_count(), self.sid_count())
5261    }
5262
5263    /// Path to .bed file.
5264    ///
5265    /// # Example
5266    /// ```
5267    /// use ndarray as nd;
5268    /// use bed_reader::{Bed, WriteOptions};
5269    /// let output_folder = temp_testdir::TempDir::default();
5270    /// let output_file = output_folder.join("small.bed");
5271    /// let write_options = WriteOptions::builder(output_file)
5272    ///     .f64()
5273    ///     .iid(["i1", "i2", "i3"])
5274    ///     .sid(["s1", "s2", "s3", "s4"])
5275    ///     .build(3, 4)?;
5276    ///
5277    /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5278    /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5279    /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5280    /// # use bed_reader::BedErrorPlus;
5281    /// # Ok::<(), Box<BedErrorPlus>>(())
5282    /// ```
5283    pub fn path(&self) -> &PathBuf {
5284        &self.path
5285    }
5286
5287    /// Path to .fam file.
5288    ///
5289    /// # Example
5290    /// ```
5291    /// use ndarray as nd;
5292    /// use bed_reader::{Bed, WriteOptions};
5293    /// let output_folder = temp_testdir::TempDir::default();
5294    /// let output_file = output_folder.join("small.bed");
5295    /// let write_options = WriteOptions::builder(output_file)
5296    ///     .f64()
5297    ///     .iid(["i1", "i2", "i3"])
5298    ///     .sid(["s1", "s2", "s3", "s4"])
5299    ///     .build(3, 4)?;
5300    ///
5301    /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5302    /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5303    /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5304    /// # use bed_reader::BedErrorPlus;
5305    /// # Ok::<(), Box<BedErrorPlus>>(())
5306    /// ```
5307    pub fn fam_path(&self) -> &PathBuf {
5308        &self.fam_path
5309    }
5310
5311    /// Path to .bim file.
5312    ///
5313    /// # Example
5314    /// ```
5315    /// use ndarray as nd;
5316    /// use bed_reader::{Bed, WriteOptions};
5317    /// let output_folder = temp_testdir::TempDir::default();
5318    /// let output_file = output_folder.join("small.bed");
5319    /// let write_options = WriteOptions::builder(output_file)
5320    ///     .f64()
5321    ///     .iid(["i1", "i2", "i3"])
5322    ///     .sid(["s1", "s2", "s3", "s4"])
5323    ///     .build(3, 4)?;
5324    ///
5325    /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5326    /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5327    /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5328    /// # use bed_reader::BedErrorPlus;
5329    /// # Ok::<(), Box<BedErrorPlus>>(())
5330    /// ```
5331    pub fn bim_path(&self) -> &PathBuf {
5332        &self.bim_path
5333    }
5334
5335    /// If allele 1 will be counted (defaults to true).
5336    ///
5337    /// # Example
5338    /// ```
5339    /// use ndarray as nd;
5340    /// use bed_reader::{Bed, WriteOptions};
5341    /// let output_folder = temp_testdir::TempDir::default();
5342    /// let output_file = output_folder.join("small.bed");
5343    /// let write_options = WriteOptions::builder(output_file)
5344    ///     .i8()
5345    ///     .iid(["i1", "i2", "i3"])
5346    ///     .sid(["s1", "s2", "s3", "s4"])
5347    ///     .build(3, 4)?;
5348    ///
5349    /// assert!(write_options.is_a1_counted());
5350    /// # use bed_reader::BedErrorPlus;
5351    /// # Ok::<(), Box<BedErrorPlus>>(())
5352    /// ```
5353    pub fn is_a1_counted(&self) -> bool {
5354        self.is_a1_counted
5355    }
5356
5357    /// Number of threads to be used (`None` means set with
5358    /// [Environment Variables](index.html#environment-variables) or use all processors).
5359    ///
5360    /// # Example
5361    /// ```
5362    /// use ndarray as nd;
5363    /// use bed_reader::{Bed, WriteOptions};
5364    /// let output_folder = temp_testdir::TempDir::default();
5365    /// let output_file = output_folder.join("small.bed");
5366    /// let write_options = WriteOptions::builder(output_file)
5367    ///     .i8()
5368    ///     .iid(["i1", "i2", "i3"])
5369    ///     .sid(["s1", "s2", "s3", "s4"])
5370    ///     .build(3, 4)?;
5371    ///
5372    /// assert!(write_options.num_threads().is_none());
5373    /// # use bed_reader::BedErrorPlus;
5374    /// # Ok::<(), Box<BedErrorPlus>>(())
5375    /// ```
5376    pub fn num_threads(&self) -> Option<usize> {
5377        self.num_threads
5378    }
5379
5380    /// Value to be used for missing values (defaults to -127 or NaN).
5381    ///
5382    /// # Example
5383    /// ```
5384    /// use ndarray as nd;
5385    /// use bed_reader::{Bed, WriteOptions};
5386    /// let output_folder = temp_testdir::TempDir::default();
5387    /// let output_file = output_folder.join("small.bed");
5388    /// let write_options = WriteOptions::builder(output_file)
5389    ///     .i8()
5390    ///     .iid(["i1", "i2", "i3"])
5391    ///     .sid(["s1", "s2", "s3", "s4"])
5392    ///     .build(3, 4)?;
5393    ///
5394    /// assert!(write_options.missing_value() == -127);
5395    /// # use bed_reader::BedErrorPlus;
5396    /// # Ok::<(), Box<BedErrorPlus>>(())
5397    /// ```
5398    pub fn missing_value(&self) -> TVal {
5399        self.missing_value
5400    }
5401
5402    /// If skipping writing .fam file.
5403    ///
5404    /// # Example
5405    /// ```
5406    /// use ndarray as nd;
5407    /// use bed_reader::{Bed, WriteOptions};
5408    /// let output_folder = temp_testdir::TempDir::default();
5409    /// let output_file = output_folder.join("small.bed");
5410    /// let write_options = WriteOptions::builder(output_file)
5411    ///     .i8()
5412    ///     .skip_fam()
5413    ///     .skip_bim()
5414    ///     .build(3, 4)?;
5415    /// assert!(write_options.skip_fam());
5416    /// assert!(write_options.skip_bim());
5417    /// # use bed_reader::BedErrorPlus;
5418    /// # Ok::<(), Box<BedErrorPlus>>(())
5419    /// ```
5420    pub fn skip_fam(&self) -> bool {
5421        self.skip_fam
5422    }
5423
5424    /// If skipping writing .bim file.
5425    ///
5426    /// # Example
5427    /// ```
5428    /// use ndarray as nd;
5429    /// use bed_reader::{Bed, WriteOptions};
5430    /// let output_folder = temp_testdir::TempDir::default();
5431    /// let output_file = output_folder.join("small.bed");
5432    /// let write_options = WriteOptions::builder(output_file)
5433    ///     .i8()
5434    ///     .skip_fam()
5435    ///     .skip_bim()
5436    ///     .build(3, 4)?;
5437    /// assert!(write_options.skip_fam());
5438    /// assert!(write_options.skip_bim());
5439    /// # use bed_reader::BedErrorPlus;
5440    /// # Ok::<(), Box<BedErrorPlus>>(())
5441    /// ```
5442    pub fn skip_bim(&self) -> bool {
5443        self.skip_bim
5444    }
5445}
5446
5447impl<TVal> WriteOptionsBuilder<TVal>
5448where
5449    TVal: BedVal,
5450{
5451    /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given and then writes a .bed (and .fam and .bim) file.
5452    ///
5453    /// See [`WriteOptions`](struct.WriteOptions.html) for details and examples.
5454    pub fn write<S: nd::Data<Elem = TVal>>(
5455        &mut self,
5456        val: &nd::ArrayBase<S, nd::Ix2>,
5457    ) -> Result<(), Box<BedErrorPlus>> {
5458        let (iid_count, sid_count) = val.dim();
5459        let write_options = self.build(iid_count, sid_count)?;
5460        Bed::write_with_options(val, &write_options)?;
5461
5462        Ok(())
5463    }
5464
5465    /// Set the family id (fid) values for each individual (sample).
5466    ///
5467    /// Defaults to zeros.
5468    ///
5469    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5470    ///
5471    #[anyinput]
5472    #[must_use]
5473    pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
5474        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5475        self.metadata.as_mut().unwrap().set_fid(fid);
5476        self
5477    }
5478
5479    /// Set the individual id (iid) values for each individual (sample).
5480    ///
5481    /// Defaults to "iid1", "iid2", ...
5482    ///
5483    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5484    ///
5485    #[anyinput]
5486    #[must_use]
5487    pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
5488        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5489        self.metadata.as_mut().unwrap().set_iid(iid);
5490        self
5491    }
5492
5493    /// Set the father id values for each individual (sample).
5494    ///
5495    /// Defaults to zeros.
5496    ///
5497    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5498    ///
5499    #[anyinput]
5500    #[must_use]
5501    pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
5502        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5503        self.metadata.as_mut().unwrap().set_father(father);
5504        self
5505    }
5506
5507    /// Set the mother id values for each individual (sample).
5508    ///
5509    /// Defaults to zeros.
5510    ///
5511    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5512    ///
5513    #[anyinput]
5514    #[must_use]
5515    pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
5516        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5517        self.metadata.as_mut().unwrap().set_mother(mother);
5518        self
5519    }
5520
5521    /// Set the sex for each individual (sample).
5522    ///
5523    /// 0 is unknown (default), 1 is male, 2 is female
5524    #[anyinput]
5525    #[must_use]
5526    pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
5527        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5528        self.metadata.as_mut().unwrap().set_sex(sex);
5529        self
5530    }
5531
5532    /// Set a phenotype for each individual (sample). Seldom used.
5533    ///
5534    /// Defaults to zeros.
5535    ///
5536    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5537    ///
5538    #[anyinput]
5539    #[must_use]
5540    pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
5541        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5542        self.metadata.as_mut().unwrap().set_pheno(pheno);
5543        self
5544    }
5545
5546    /// Set the chromosome for each SNP (variant).
5547    ///
5548    /// Defaults to zeros.
5549    #[anyinput]
5550    #[must_use]
5551    pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
5552        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5553        self.metadata.as_mut().unwrap().set_chromosome(chromosome);
5554        self
5555    }
5556
5557    /// Set the SNP id (sid) for each SNP (variant).
5558    ///
5559    /// Defaults to "sid1", "sid2", ...
5560    ///
5561    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5562    ///
5563    #[anyinput]
5564    #[must_use]
5565    pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
5566        self.metadata.as_mut().unwrap().set_sid(sid);
5567        self
5568    }
5569
5570    /// Set the centimorgan position for each SNP (variant).
5571    ///
5572    /// Defaults to zeros.
5573    #[anyinput]
5574    #[must_use]
5575    pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
5576        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5577        self.metadata.as_mut().unwrap().set_cm_position(cm_position);
5578        self
5579    }
5580
5581    /// Set the base-pair position for each SNP (variant).
5582    ///
5583    /// Defaults to zeros.
5584    ///
5585    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5586    ///
5587    #[anyinput]
5588    #[must_use]
5589    pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
5590        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5591        self.metadata.as_mut().unwrap().set_bp_position(bp_position);
5592        self
5593    }
5594
5595    /// Set the first allele for each SNP (variant).
5596    ///
5597    /// Defaults to "A1", A1" ...
5598    ///
5599    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5600    ///
5601    #[anyinput]
5602    #[must_use]
5603    pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
5604        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5605        self.metadata.as_mut().unwrap().set_allele_1(allele_1);
5606        self
5607    }
5608
5609    /// Set the second allele for each SNP (variant).
5610    ///
5611    /// Defaults to "A2", A2" ...
5612    ///
5613    /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5614    ///
5615    #[anyinput]
5616    #[must_use]
5617    pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
5618        // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5619        self.metadata.as_mut().unwrap().set_allele_2(allele_2);
5620        self
5621    }
5622
5623    /// Merge metadata from a [`Metadata`](struct.Metadata.html).
5624    ///
5625    /// If a field is set in both [`Metadata`](struct.Metadata.html)'s,
5626    /// it will be overridden.
5627    ///
5628    /// # Example
5629    ///
5630    /// Extract metadata from a file.
5631    /// Create a random file with the same metadata.
5632    /// ```
5633    /// use ndarray as nd;
5634    /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5635    /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5636    ///
5637    /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5638    /// let metadata = bed.metadata()?;
5639    /// let shape = bed.dim()?;
5640    ///
5641    /// let mut rng = StdRng::seed_from_u64(0);
5642    /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5643    ///
5644    /// let temp_out = temp_testdir::TempDir::default();
5645    /// let output_file = temp_out.join("random.bed");
5646    /// WriteOptions::builder(output_file)
5647    ///     .metadata(&metadata)
5648    ///     .missing_value(-1)
5649    ///     .write(&val)?;
5650    /// # use bed_reader::BedErrorPlus;
5651    /// # Ok::<(), Box<BedErrorPlus>>(())
5652    /// ```
5653    #[must_use]
5654    pub fn metadata(mut self, metadata: &Metadata) -> Self {
5655        self.metadata = Some(
5656            Metadata::builder()
5657                .metadata(&self.metadata.unwrap()) // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5658                .metadata(metadata)
5659                .build_no_file_check() // Don't need to check consistent counts here. Builder will do it.
5660                .unwrap(), // Unwrap will always work nothing can go wrong
5661        );
5662        self
5663    }
5664
5665    /// Set the path to the .fam file.
5666    ///
5667    /// If not set, the .fam file will be assumed
5668    /// to have the same name as the .bed file, but with the extension .fam.
5669    ///
5670    /// # Example:
5671    /// Write .bed, .fam, and .bim files with non-standard names.
5672    /// ```
5673    /// use ndarray as nd;
5674    /// use bed_reader::WriteOptions;
5675    /// let output_folder = temp_testdir::TempDir::default();
5676    /// let output_file = output_folder.join("small.deb");
5677    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5678    /// WriteOptions::builder(output_file)
5679    ///     .fam_path(output_folder.join("small.maf"))
5680    ///     .bim_path(output_folder.join("small.mib"))
5681    ///     .write(&val)?;
5682    /// # use bed_reader::BedErrorPlus;
5683    /// # Ok::<(), Box<BedErrorPlus>>(())
5684    /// ```
5685    #[anyinput]
5686    #[must_use]
5687    pub fn fam_path(mut self, path: AnyPath) -> Self {
5688        self.fam_path = Some(path.to_owned());
5689        self
5690    }
5691
5692    /// Set the path to the .bim file.
5693    ///
5694    /// If not set, the .bim file will be assumed
5695    /// to have the same name as the .bed file, but with the extension .bim.
5696    ///
5697    /// # Example:
5698    /// Write .bed, .fam, and .bim files with non-standard names.
5699    /// ```
5700    /// use ndarray as nd;
5701    /// use bed_reader::{WriteOptions};
5702    /// let output_folder = temp_testdir::TempDir::default();
5703    /// let output_file = output_folder.join("small.deb");
5704    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5705    /// WriteOptions::builder(output_file)
5706    ///     .fam_path(output_folder.join("small.maf"))
5707    ///     .bim_path(output_folder.join("small.mib"))
5708    ///     .write(&val)?;
5709    /// # use bed_reader::BedErrorPlus;
5710    /// # Ok::<(), Box<BedErrorPlus>>(())
5711    /// ```
5712    #[anyinput]
5713    #[must_use]
5714    pub fn bim_path(mut self, path: AnyPath) -> Self {
5715        self.bim_path = Some(path.to_owned());
5716        self
5717    }
5718
5719    /// Value used for missing values (defaults to -127 or NaN)
5720    ///
5721    /// -127 is the default for i8 and NaN is the default for f32 and f64.
5722    ///
5723    /// # Example
5724    ///
5725    /// Extract metadata from a file.
5726    /// Create a random file with the same metadata.
5727    /// ```
5728    /// use ndarray as nd;
5729    /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5730    /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5731    ///
5732    /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5733    /// let metadata = bed.metadata()?;
5734    /// let shape = bed.dim()?;
5735    ///
5736    /// let mut rng = StdRng::seed_from_u64(0);
5737    /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5738    ///
5739    /// let temp_out = temp_testdir::TempDir::default();
5740    /// let output_file = temp_out.join("random.bed");
5741    /// WriteOptions::builder(output_file)
5742    ///     .metadata(&metadata)
5743    ///     .missing_value(-1)
5744    ///     .write(&val)?;
5745    /// # use bed_reader::BedErrorPlus;
5746    /// # Ok::<(), Box<BedErrorPlus>>(())
5747    /// ```
5748    pub fn missing_value(&mut self, missing_value: TVal) -> &mut Self {
5749        self.missing_value = Some(missing_value);
5750        self
5751    }
5752
5753    /// Count the number allele 1 (default and PLINK standard).
5754    ///
5755    /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).
5756    pub fn count_a1(&mut self) -> &mut Self {
5757        self.is_a1_counted = Some(true);
5758        self
5759    }
5760
5761    /// Count the number allele 2.
5762    ///
5763    /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1).
5764    pub fn count_a2(&mut self) -> &mut Self {
5765        self.is_a1_counted = Some(false);
5766        self
5767    }
5768
5769    /// Sets if allele 1 is counted. Default is true.
5770    ///
5771    /// Also see [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).    
5772    pub fn is_a1_counted(&mut self, is_a1_counted: bool) -> &mut Self {
5773        self.is_a1_counted = Some(is_a1_counted);
5774        self
5775    }
5776
5777    /// Number of threads to use (defaults to all processors)
5778    ///
5779    /// Can also be set with an environment variable.
5780    /// See [Environment Variables](index.html#environment-variables).
5781    ///
5782    ///
5783    /// # Example:
5784    ///
5785    /// Write using only one thread.
5786    /// ```
5787    /// use ndarray as nd;
5788    /// use bed_reader::WriteOptions;
5789    /// let output_folder = temp_testdir::TempDir::default();
5790    /// let output_file = output_folder.join("small.bed");
5791    /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5792    /// WriteOptions::builder(output_file)
5793    ///     .num_threads(1)
5794    ///     .write(&val)?;
5795    /// # use bed_reader::BedErrorPlus;
5796    /// # Ok::<(), Box<BedErrorPlus>>(())
5797    /// ```
5798    pub fn num_threads(&mut self, num_threads: usize) -> &mut Self {
5799        self.num_threads = Some(Some(num_threads));
5800        self
5801    }
5802
5803    /// Skip writing .fam file.
5804    ///
5805    /// # Example
5806    /// ```
5807    /// use ndarray as nd;
5808    /// use bed_reader::{Bed, WriteOptions};
5809    /// let output_folder = temp_testdir::TempDir::default();
5810    /// let output_file = output_folder.join("small.bed");
5811    /// let write_options = WriteOptions::builder(output_file)
5812    ///     .i8()
5813    ///     .skip_fam()
5814    ///     .skip_bim()
5815    ///     .build(3, 4)?;
5816    /// assert!(write_options.skip_fam());
5817    /// assert!(write_options.skip_bim());
5818    /// # use bed_reader::BedErrorPlus;
5819    /// # Ok::<(), Box<BedErrorPlus>>(())
5820    /// ```
5821    pub fn skip_fam(&mut self) -> &mut Self {
5822        self.skip_fam = Some(true);
5823        self
5824    }
5825
5826    /// Skip writing .bim file.
5827    ///
5828    /// # Example
5829    /// ```
5830    /// use ndarray as nd;
5831    /// use bed_reader::{Bed, WriteOptions};
5832    /// let output_folder = temp_testdir::TempDir::default();
5833    /// let output_file = output_folder.join("small.bed");
5834    /// let write_options = WriteOptions::builder(output_file)
5835    ///     .i8()
5836    ///     .skip_fam()
5837    ///     .skip_bim()
5838    ///     .build(3, 4)?;
5839    /// assert!(write_options.skip_fam());
5840    /// assert!(write_options.skip_bim());
5841    /// # use bed_reader::BedErrorPlus;
5842    /// # Ok::<(), Box<BedErrorPlus>>(())
5843    /// ```
5844    pub fn skip_bim(&mut self) -> &mut Self {
5845        self.skip_bim = Some(true);
5846        self
5847    }
5848
5849    /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given.
5850    ///
5851    /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
5852    /// > a [`WriteOptions`](struct.WriteOptions.html) and writes to file in one step.
5853    ///
5854    /// # Example
5855    /// Create a new [`WriteOptions`](struct.WriteOptions.html) with some given values and some
5856    /// default values. Then use it to write a .bed file.
5857    /// ```
5858    /// use ndarray as nd;
5859    /// use bed_reader::{WriteOptions, Bed};
5860    ///
5861    /// let output_folder = temp_testdir::TempDir::default();
5862    /// let output_file = output_folder.join("small.bed");
5863    /// let write_options = WriteOptions::builder(output_file)
5864    ///     .f64()
5865    ///     .iid(["i1", "i2", "i3"])
5866    ///     .sid(["s1", "s2", "s3", "s4"])
5867    ///     .build(3, 4)?;
5868    /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
5869    /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
5870    ///
5871    /// let val = nd::array![
5872    ///     [1.0, 0.0, f64::NAN, 0.0],
5873    ///     [2.0, 0.0, f64::NAN, 2.0],
5874    ///     [0.0, 1.0, 2.0, 0.0]
5875    /// ];
5876    /// Bed::write_with_options(&val, &write_options)?;
5877    /// # use bed_reader::BedErrorPlus;
5878    /// # Ok::<(), Box<BedErrorPlus>>(())
5879    /// ```
5880    pub fn build(
5881        &self,
5882        iid_count: usize,
5883        sid_count: usize,
5884    ) -> Result<WriteOptions<TVal>, Box<BedErrorPlus>> {
5885        let Some(path) = self.path.as_ref() else {
5886            Err(BedError::UninitializedField("path"))?
5887        };
5888
5889        // unwrap always works because the metadata builder always initializes metadata
5890        let metadata = self.metadata.as_ref().unwrap();
5891        let metadata = metadata.fill(iid_count, sid_count)?;
5892
5893        let write_options = WriteOptions {
5894            path: path.to_owned(),
5895            fam_path: to_metadata_path(path, self.fam_path.as_ref(), "fam"),
5896            bim_path: to_metadata_path(path, self.bim_path.as_ref(), "bim"),
5897            is_a1_counted: self.is_a1_counted.unwrap_or(true),
5898            num_threads: self.num_threads.unwrap_or(None),
5899            missing_value: self.missing_value.unwrap_or_else(|| TVal::missing()),
5900            skip_fam: self.skip_fam.unwrap_or(false),
5901            skip_bim: self.skip_bim.unwrap_or(false),
5902
5903            metadata,
5904        };
5905        Ok(write_options)
5906    }
5907
5908    #[anyinput]
5909    fn new(path: AnyPath) -> Self {
5910        Self {
5911            path: Some(path.to_owned()),
5912            fam_path: None,
5913            bim_path: None,
5914
5915            metadata: Some(Metadata::new()),
5916
5917            is_a1_counted: None,
5918            num_threads: None,
5919            missing_value: None,
5920            skip_fam: None,
5921            skip_bim: None,
5922        }
5923    }
5924}
5925
5926trait FromStringArray<T> {
5927    #[allow(dead_code)]
5928    fn from_string_array(
5929        string_array: nd::Array1<String>,
5930    ) -> Result<nd::Array1<Self>, Box<BedErrorPlus>>
5931    where
5932        Self: Sized;
5933}
5934
5935impl FromStringArray<String> for String {
5936    fn from_string_array(
5937        string_array: nd::Array1<String>,
5938    ) -> Result<nd::Array1<String>, Box<BedErrorPlus>> {
5939        Ok(string_array)
5940    }
5941}
5942
5943impl FromStringArray<f32> for f32 {
5944    fn from_string_array(
5945        string_array: nd::Array1<String>,
5946    ) -> Result<nd::Array1<f32>, Box<BedErrorPlus>> {
5947        let result = string_array
5948            .iter()
5949            .map(|s| s.parse::<f32>())
5950            .collect::<Result<nd::Array1<f32>, _>>();
5951        match result {
5952            Ok(array) => Ok(array),
5953            Err(e) => Err(Box::new(BedErrorPlus::ParseFloatError(e))),
5954        }
5955    }
5956}
5957impl FromStringArray<i32> for i32 {
5958    fn from_string_array(
5959        string_array: nd::Array1<String>,
5960    ) -> Result<nd::Array1<i32>, Box<BedErrorPlus>> {
5961        let result = string_array
5962            .iter()
5963            .map(|s| s.parse::<i32>())
5964            .collect::<Result<nd::Array1<i32>, _>>();
5965        match result {
5966            Ok(array) => Ok(array),
5967            Err(e) => Err(Box::new(BedErrorPlus::ParseIntError(e))),
5968        }
5969    }
5970}
5971
5972/// Asserts two 2-D arrays are equal, treating NaNs as values.
5973///
5974/// # Example
5975/// ```
5976/// use std::f64::NAN;
5977/// use ndarray as nd;
5978/// use bed_reader::assert_eq_nan;
5979/// let val1 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5980/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5981/// assert_eq_nan(&val1, &val2);
5982/// # use bed_reader::BedErrorPlus;
5983/// # Ok::<(), Box<BedErrorPlus>>(())
5984/// ```
5985pub fn assert_eq_nan<T: 'static + Copy + PartialEq + PartialOrd + Signed + From<i8>>(
5986    val: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
5987    answer: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
5988) {
5989    assert!(allclose::<T, T>(
5990        &val.view(),
5991        &answer.view(),
5992        0.into(),
5993        true
5994    ));
5995}
5996
5997/// Asserts that a result is an error and that the error is of a given variant.
5998#[macro_export]
5999macro_rules! assert_error_variant {
6000    ($result:expr, $pattern:pat) => {
6001        match $result {
6002            Err(ref boxed_error) => match **boxed_error {
6003                $pattern => (),
6004                _ => panic!("test failure"),
6005            },
6006            _ => panic!("test failure"),
6007        }
6008    };
6009}
6010
6011/// True if and only if two 2-D arrays are equal, within a given tolerance and possibly treating NaNs as values.
6012///
6013/// # Example
6014/// ```
6015/// use std::f64::NAN;
6016/// use ndarray as nd;
6017/// use bed_reader::allclose;
6018/// let val1 = nd::arr2(&[[1.0, 2.000000000001], [3.0, NAN]]);
6019/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
6020/// assert!(allclose(&val1.view(), &val2.view(), 1e-08, true));
6021/// # use bed_reader::BedErrorPlus;
6022/// # Ok::<(), Box<BedErrorPlus>>(())
6023/// ```
6024pub fn allclose<
6025    T1: 'static + Copy + PartialEq + PartialOrd + Signed,
6026    T2: 'static + Copy + PartialEq + PartialOrd + Signed + Into<T1>,
6027>(
6028    val1: &nd::ArrayView2<'_, T1>,
6029    val2: &nd::ArrayView2<'_, T2>,
6030    atol: T1,
6031    equal_nan: bool,
6032) -> bool {
6033    assert!(val1.dim() == val2.dim());
6034    // Could be run in parallel
6035
6036    nd::Zip::from(val1)
6037        .and(val2)
6038        .fold(true, |acc, ptr_a, ptr_b| -> bool {
6039            if !acc {
6040                return false;
6041            }
6042            // x != x is a generic nan check
6043            #[allow(clippy::eq_op)]
6044            let a_nan = *ptr_a != *ptr_a;
6045            #[allow(clippy::eq_op)]
6046            let b_nan = *ptr_b != *ptr_b;
6047
6048            if a_nan || b_nan {
6049                if equal_nan {
6050                    a_nan == b_nan
6051                } else {
6052                    false
6053                }
6054            } else {
6055                let c: T1 = abs(*ptr_a - T2::into(*ptr_b));
6056                c <= atol
6057            }
6058        })
6059}
6060
6061impl WriteOptionsBuilder<i8> {
6062    /// The input ndarray will be i8.
6063    #[must_use]
6064    pub fn i8(self) -> Self {
6065        self
6066    }
6067}
6068
6069impl WriteOptionsBuilder<f32> {
6070    /// The input ndarray will be f32.
6071    #[must_use]
6072    pub fn f32(self) -> Self {
6073        self
6074    }
6075}
6076
6077impl WriteOptionsBuilder<f64> {
6078    /// The input ndarray will be f64.
6079    #[must_use]
6080    pub fn f64(self) -> Self {
6081        self
6082    }
6083}
6084
6085fn check_counts(
6086    count_vec: Vec<Option<usize>>,
6087    option_xid_count: &mut Option<usize>,
6088    prefix: &str,
6089) -> Result<(), Box<BedErrorPlus>> {
6090    for count in count_vec.into_iter().flatten() {
6091        if let Some(xid_count) = option_xid_count {
6092            if *xid_count != count {
6093                Err(BedError::InconsistentCount(
6094                    prefix.to_string(),
6095                    *xid_count,
6096                    count,
6097                ))?;
6098            }
6099        } else {
6100            *option_xid_count = Some(count);
6101        }
6102    }
6103
6104    Ok(())
6105}
6106
6107// According to https://docs.rs/derive_builder/latest/derive_builder/
6108// "clone" is OK because "Luckily Rust is clever enough to optimize these
6109// clone-calls away in release builds for your every-day use cases.
6110// Thats quite a safe bet - we checked this for you. ;-)"
6111fn compute_field<T: Clone, F: Fn(usize) -> T>(
6112    field_name: &str,
6113    field: &mut Option<Rc<nd::Array1<T>>>,
6114    count: usize,
6115    lambda: F,
6116) -> Result<(), Box<BedErrorPlus>> {
6117    // let lambda = |_| "0".to_string();
6118    // let count = iid_count;
6119    // let field = &mut metadata.fid;
6120
6121    if let Some(array) = field {
6122        if array.len() != count {
6123            Err(BedError::InconsistentCount(
6124                field_name.to_string(),
6125                array.len(),
6126                count,
6127            ))?;
6128        }
6129    } else {
6130        let array = Rc::new((0..count).map(lambda).collect::<nd::Array1<T>>());
6131        *field = Some(array);
6132    }
6133    Ok(())
6134}
6135
6136impl MetadataBuilder {
6137    /// Create a [`Metadata`](struct.Metadata.html) from the builder.
6138    ///
6139    /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6140    pub fn build(&self) -> Result<Metadata, Box<BedErrorPlus>> {
6141        let metadata = self.build_no_file_check()?;
6142
6143        metadata.check_counts(None, None)?;
6144
6145        Ok(metadata)
6146    }
6147
6148    /// Set the family id (fid) values.
6149    #[anyinput]
6150    pub fn fid(&mut self, fid: AnyIter<AnyString>) -> &mut Self {
6151        self.fid = Some(Some(Rc::new(fid.map(|s| s.as_ref().to_string()).collect())));
6152        self
6153    }
6154
6155    /// Set the individual id (iid) values.
6156    /// ```
6157    /// use ndarray as nd;
6158    /// use bed_reader::{Metadata, assert_eq_nan};
6159    ///
6160    /// let metadata = Metadata::builder()
6161    ///    .iid(["sample1", "sample2", "sample3"])
6162    ///    .build()?;
6163    /// println!("{:?}", metadata.iid()); // Outputs ndarray Some(["sample1", "sample2", "sample3"])
6164    /// # use bed_reader::BedErrorPlus;
6165    /// # Ok::<(), Box<BedErrorPlus>>(())
6166    /// ```
6167    #[anyinput]
6168    pub fn iid(&mut self, iid: AnyIter<AnyString>) -> &mut Self {
6169        self.iid = Some(Some(Rc::new(iid.map(|s| s.as_ref().to_owned()).collect())));
6170        self
6171    }
6172
6173    /// Set the father values.
6174    #[anyinput]
6175    pub fn father(&mut self, father: AnyIter<AnyString>) -> &mut Self {
6176        self.father = Some(Some(Rc::new(
6177            father.map(|s| s.as_ref().to_owned()).collect(),
6178        )));
6179        self
6180    }
6181
6182    /// Override the mother values.
6183    #[anyinput]
6184    pub fn mother(&mut self, mother: AnyIter<AnyString>) -> &mut Self {
6185        self.mother = Some(Some(Rc::new(
6186            mother.map(|s| s.as_ref().to_owned()).collect(),
6187        )));
6188        self
6189    }
6190
6191    /// Override the sex values.
6192    #[anyinput]
6193    pub fn sex(&mut self, sex: AnyIter<i32>) -> &mut Self {
6194        self.sex = Some(Some(Rc::new(sex.collect())));
6195        self
6196    }
6197
6198    /// Override the phenotype values.
6199    #[anyinput]
6200    pub fn pheno(&mut self, pheno: AnyIter<AnyString>) -> &mut Self {
6201        self.pheno = Some(Some(Rc::new(
6202            pheno.map(|s| s.as_ref().to_owned()).collect(),
6203        )));
6204        self
6205    }
6206
6207    /// Override the chromosome values.
6208    #[anyinput]
6209    pub fn chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &mut Self {
6210        self.chromosome = Some(Some(Rc::new(
6211            chromosome.map(|s| s.as_ref().to_owned()).collect(),
6212        )));
6213        self
6214    }
6215
6216    /// Override the SNP id (sid) values.
6217    /// ```
6218    /// use ndarray as nd;
6219    /// use bed_reader::{Metadata, assert_eq_nan};
6220    ///
6221    /// let metadata = Metadata::builder()
6222    ///    .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
6223    ///    .build()?;
6224    /// println!("{:?}", metadata.sid()); // Outputs ndarray Some(["SNP1", "SNP2", "SNP3", "SNP4"])
6225    /// # use bed_reader::BedErrorPlus;
6226    /// # Ok::<(), Box<BedErrorPlus>>(())
6227    /// ```
6228    #[anyinput]
6229    pub fn sid(&mut self, sid: AnyIter<AnyString>) -> &mut Self {
6230        self.sid = Some(Some(Rc::new(
6231            sid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
6232        )));
6233        self
6234    }
6235
6236    /// Override the centimorgan position values.
6237    #[anyinput]
6238    pub fn cm_position(&mut self, cm_position: AnyIter<f32>) -> &mut Self {
6239        self.cm_position = Some(Some(Rc::new(cm_position.into_iter().collect())));
6240        self
6241    }
6242
6243    /// Override the base-pair position values.
6244    #[anyinput]
6245    pub fn bp_position(&mut self, bp_position: AnyIter<i32>) -> &mut Self {
6246        self.bp_position = Some(Some(Rc::new(bp_position.into_iter().collect())));
6247        self
6248    }
6249
6250    /// Override the allele 1 values.
6251    #[anyinput]
6252    pub fn allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &mut Self {
6253        self.allele_1 = Some(Some(Rc::new(
6254            allele_1
6255                .into_iter()
6256                .map(|s| s.as_ref().to_owned())
6257                .collect(),
6258        )));
6259        self
6260    }
6261
6262    /// Override the allele 2 values.
6263    #[anyinput]
6264    pub fn allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &mut Self {
6265        self.allele_2 = Some(Some(Rc::new(
6266            allele_2
6267                .into_iter()
6268                .map(|s| s.as_ref().to_owned())
6269                .collect(),
6270        )));
6271        self
6272    }
6273
6274    /// Merge metadata from a [`Metadata`](struct.Metadata.html).
6275    ///
6276    /// # Example
6277    ///
6278    /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
6279    /// and sid arrays. Next, we use another [`MetadataBuilder`](struct.MetadataBuilder.html) to set an fid array
6280    /// and an iid array. Then, we add the first [`Metadata`](struct.Metadata.html)
6281    /// to the [`MetadataBuilder`](struct.MetadataBuilder.html),
6282    /// overwriting iid and setting sid. Finally, we print these
6283    /// three arrays and chromosome. Chromosome is `None`.
6284    ///```
6285    /// use ndarray as nd;
6286    /// use bed_reader::Metadata;
6287    ///
6288    /// let metadata1 = Metadata::builder()
6289    ///     .iid(["i1", "i2", "i3"])
6290    ///     .sid(["s1", "s2", "s3", "s4"])
6291    ///     .build()?;
6292    /// let metadata2 = Metadata::builder()
6293    ///     .fid(["f1", "f2", "f3"])
6294    ///     .iid(["x1", "x2", "x3"])
6295    ///     .metadata(&metadata1)
6296    ///     .build()?;
6297    ///
6298    /// println!("{0:?}", metadata2.fid()); // Outputs optional ndarray Some(["f1", "f2", "f3"]...)
6299    /// println!("{0:?}", metadata2.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6300    /// println!("{0:?}", metadata2.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
6301    /// println!("{0:?}", metadata2.chromosome()); // Outputs None
6302    /// # use bed_reader::BedErrorPlus;
6303    /// # Ok::<(), Box<BedErrorPlus>>(())
6304    /// ```
6305    pub fn metadata(&mut self, metadata: &Metadata) -> &mut Self {
6306        set_field(metadata.fid.as_ref(), &mut self.fid);
6307        set_field(metadata.iid.as_ref(), &mut self.iid);
6308        set_field(metadata.father.as_ref(), &mut self.father);
6309        set_field(metadata.mother.as_ref(), &mut self.mother);
6310        set_field(metadata.sex.as_ref(), &mut self.sex);
6311        set_field(metadata.pheno.as_ref(), &mut self.pheno);
6312
6313        set_field(metadata.chromosome.as_ref(), &mut self.chromosome);
6314        set_field(metadata.sid.as_ref(), &mut self.sid);
6315        set_field(metadata.cm_position.as_ref(), &mut self.cm_position);
6316        set_field(metadata.bp_position.as_ref(), &mut self.bp_position);
6317        set_field(metadata.allele_1.as_ref(), &mut self.allele_1);
6318        set_field(metadata.allele_2.as_ref(), &mut self.allele_2);
6319        self
6320    }
6321}
6322
6323impl Default for Metadata {
6324    fn default() -> Self {
6325        Self::new()
6326    }
6327}
6328
6329impl Metadata {
6330    fn check_counts(
6331        &self,
6332        mut iid_count: Option<usize>,
6333        mut sid_count: Option<usize>,
6334    ) -> Result<(Option<usize>, Option<usize>), Box<BedErrorPlus>> {
6335        check_counts(
6336            vec![
6337                lazy_or_skip_count(self.fid.as_ref()),
6338                lazy_or_skip_count(self.iid.as_ref()),
6339                lazy_or_skip_count(self.father.as_ref()),
6340                lazy_or_skip_count(self.mother.as_ref()),
6341                lazy_or_skip_count(self.sex.as_ref()),
6342                lazy_or_skip_count(self.pheno.as_ref()),
6343            ],
6344            &mut iid_count,
6345            "iid",
6346        )?;
6347        check_counts(
6348            vec![
6349                lazy_or_skip_count(self.chromosome.as_ref()),
6350                lazy_or_skip_count(self.sid.as_ref()),
6351                lazy_or_skip_count(self.cm_position.as_ref()),
6352                lazy_or_skip_count(self.bp_position.as_ref()),
6353                lazy_or_skip_count(self.allele_1.as_ref()),
6354                lazy_or_skip_count(self.allele_2.as_ref()),
6355            ],
6356            &mut sid_count,
6357            "sid",
6358        )?;
6359        Ok((iid_count, sid_count))
6360    }
6361
6362    /// Create a [`Metadata`](struct.Metadata.html) using a builder.
6363    ///
6364    /// # Example
6365    /// Create metadata.
6366    /// Create a random file with the metadata.
6367    /// ```
6368    /// use ndarray as nd;
6369    /// use bed_reader::{Metadata, WriteOptions};
6370    /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
6371    ///
6372    /// let metadata = Metadata::builder()
6373    ///     .iid(["i1", "i2", "i3"])
6374    ///     .sid(["s1", "s2", "s3", "s4"])
6375    ///     .build()?;
6376    /// let mut rng = StdRng::seed_from_u64(0);
6377    /// let val = nd::Array::random_using((3, 4), Uniform::from(-1..3), &mut rng);
6378    /// let temp_out = temp_testdir::TempDir::default();
6379    /// let output_file = temp_out.join("random.bed");
6380    /// WriteOptions::builder(output_file)
6381    ///     .metadata(&metadata)
6382    ///     .missing_value(-1)
6383    ///     .write(&val)?;
6384    /// # use bed_reader::BedErrorPlus;
6385    /// # Ok::<(), Box<BedErrorPlus>>(())
6386    /// ```
6387    #[must_use]
6388    pub fn builder() -> MetadataBuilder {
6389        MetadataBuilder::default()
6390    }
6391
6392    /// Create an empty [`Metadata`](struct.Metadata.html).
6393    ///
6394    /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6395    #[must_use]
6396    pub fn new() -> Metadata {
6397        // Unwrap always works because an empty metadata builder always works.
6398        Metadata::builder().build().unwrap()
6399    }
6400
6401    /// Optional family id of each of individual (sample)
6402    #[must_use]
6403    pub fn fid(&self) -> Option<&nd::Array1<String>> {
6404        option_rc_as_ref(self.fid.as_ref())
6405    }
6406
6407    /// Optional individual id of each of individual (sample)
6408    ///
6409    /// # Example:
6410    /// ```
6411    /// use ndarray as nd;
6412    /// use bed_reader::Metadata;
6413    /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6414    /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6415    /// println!("{0:?}", metadata.sid()); // Outputs None
6416    /// # use bed_reader::BedErrorPlus;
6417    /// # Ok::<(), Box<BedErrorPlus>>(())    
6418    #[must_use]
6419    pub fn iid(&self) -> Option<&nd::Array1<String>> {
6420        option_rc_as_ref(self.iid.as_ref())
6421    }
6422
6423    /// Optional father id of each of individual (sample)
6424    #[must_use]
6425    pub fn father(&self) -> Option<&nd::Array1<String>> {
6426        option_rc_as_ref(self.father.as_ref())
6427    }
6428
6429    /// Optional mother id of each of individual (sample)
6430    #[must_use]
6431    pub fn mother(&self) -> Option<&nd::Array1<String>> {
6432        option_rc_as_ref(self.mother.as_ref())
6433    }
6434
6435    /// Optional sex each of individual (sample)
6436    #[must_use]
6437    pub fn sex(&self) -> Option<&nd::Array1<i32>> {
6438        option_rc_as_ref(self.sex.as_ref())
6439    }
6440
6441    /// Optional phenotype for each individual (seldom used)
6442    #[must_use]
6443    pub fn pheno(&self) -> Option<&nd::Array1<String>> {
6444        option_rc_as_ref(self.pheno.as_ref())
6445    }
6446
6447    /// Optional chromosome of each SNP (variant)
6448    #[must_use]
6449    pub fn chromosome(&self) -> Option<&nd::Array1<String>> {
6450        option_rc_as_ref(self.chromosome.as_ref())
6451    }
6452
6453    /// Optional SNP id of each SNP (variant)
6454    ///
6455    /// # Example:
6456    /// ```
6457    /// use ndarray as nd;
6458    /// use bed_reader::Metadata;
6459    /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6460    /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6461    /// println!("{0:?}", metadata.sid()); // Outputs None
6462    /// # use bed_reader::BedErrorPlus;
6463    /// # Ok::<(), Box<BedErrorPlus>>(())    
6464    #[must_use]
6465    pub fn sid(&self) -> Option<&nd::Array1<String>> {
6466        option_rc_as_ref(self.sid.as_ref())
6467    }
6468
6469    /// Optional centimorgan position of each SNP (variant)
6470    #[must_use]
6471    pub fn cm_position(&self) -> Option<&nd::Array1<f32>> {
6472        option_rc_as_ref(self.cm_position.as_ref())
6473    }
6474
6475    /// Optional base-pair position of each SNP (variant)
6476    #[must_use]
6477    pub fn bp_position(&self) -> Option<&nd::Array1<i32>> {
6478        option_rc_as_ref(self.bp_position.as_ref())
6479    }
6480
6481    /// Optional first allele of each SNP (variant)
6482    #[must_use]
6483    pub fn allele_1(&self) -> Option<&nd::Array1<String>> {
6484        option_rc_as_ref(self.allele_1.as_ref())
6485    }
6486
6487    /// Optional second allele of each SNP (variant)
6488    #[must_use]
6489    pub fn allele_2(&self) -> Option<&nd::Array1<String>> {
6490        option_rc_as_ref(self.allele_2.as_ref())
6491    }
6492
6493    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .fam file.
6494    ///
6495    /// # Example
6496    ///
6497    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6498    /// Do not skip any fields.
6499    /// ```
6500    /// use ndarray as nd;
6501    /// use std::collections::HashSet;
6502    /// use bed_reader::{Metadata, MetadataFields, sample_file};
6503    ///
6504    /// let skip_set = HashSet::<MetadataFields>::new();
6505    /// let metadata_empty = Metadata::new();
6506    /// let (metadata_fam, iid_count) =
6507    ///     metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6508    /// let (metadata_bim, sid_count) =
6509    ///     metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6510    /// assert_eq!(iid_count, 3);
6511    /// assert_eq!(sid_count, 4);
6512    /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6513    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6514    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6515    /// # use bed_reader::BedErrorPlus;
6516    /// # Ok::<(), Box<BedErrorPlus>>(())
6517    /// ```
6518    #[anyinput]
6519    pub fn read_fam(
6520        &self,
6521        path: AnyPath,
6522        skip_set: &HashSet<MetadataFields>,
6523    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6524        let mut field_vec: Vec<usize> = Vec::new();
6525
6526        if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6527            field_vec.push(0);
6528        }
6529        if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6530            field_vec.push(1);
6531        }
6532        if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6533            field_vec.push(2);
6534        }
6535        if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6536            field_vec.push(3);
6537        }
6538        if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6539            field_vec.push(4);
6540        }
6541        if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6542            field_vec.push(5);
6543        }
6544
6545        let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, true, path)?;
6546
6547        let mut clone = self.clone();
6548
6549        // unwraps are safe because we pop once for every push
6550        if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6551            clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6552        }
6553        if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6554            let vec = vec_of_vec.pop().unwrap();
6555            let array = vec
6556                .iter()
6557                .map(|s| s.parse::<i32>())
6558                .collect::<Result<nd::Array1<i32>, _>>()?;
6559            clone.sex = Some(Rc::new(array));
6560        }
6561        if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6562            clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6563        }
6564        if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6565            clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6566        }
6567        if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6568            clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6569        }
6570        if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6571            clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6572        }
6573
6574        clone.check_counts(Some(count), None)?;
6575
6576        Ok((clone, count))
6577    }
6578
6579    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6580    /// fields with a .fam file in the cloud.
6581    ///
6582    /// # Example
6583    ///
6584    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6585    /// Do not skip any fields.
6586    /// ```
6587    /// use ndarray as nd;
6588    /// use std::collections::HashSet;
6589    /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6590    ///
6591    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6592    /// let skip_set = HashSet::<MetadataFields>::new();
6593    /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6594    /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6595    /// let metadata_empty = Metadata::new();
6596    /// let (metadata_fam, iid_count) =
6597    ///     metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6598    /// let (metadata_bim, sid_count) =
6599    ///     metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6600    /// assert_eq!(iid_count, 3);
6601    /// assert_eq!(sid_count, 4);
6602    /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6603    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6604    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6605    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6606    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6607    /// ```
6608    pub async fn read_fam_cloud(
6609        &self,
6610        cloud_file: &CloudFile,
6611        skip_set: &HashSet<MetadataFields>,
6612    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6613        let mut field_vec: Vec<usize> = Vec::new();
6614
6615        if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6616            field_vec.push(0);
6617        }
6618        if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6619            field_vec.push(1);
6620        }
6621        if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6622            field_vec.push(2);
6623        }
6624        if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6625            field_vec.push(3);
6626        }
6627        if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6628            field_vec.push(4);
6629        }
6630        if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6631            field_vec.push(5);
6632        }
6633
6634        let (mut vec_of_vec, count) = self
6635            .read_fam_or_bim_cloud(&field_vec, true, cloud_file)
6636            .await?;
6637
6638        let mut clone = self.clone();
6639
6640        // unwraps are safe because we pop once for every push
6641        if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6642            clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6643        }
6644        if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6645            let vec = vec_of_vec.pop().unwrap();
6646            let array = vec
6647                .iter()
6648                .map(|s| s.parse::<i32>())
6649                .collect::<Result<nd::Array1<i32>, _>>()?;
6650            clone.sex = Some(Rc::new(array));
6651        }
6652        if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6653            clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6654        }
6655        if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6656            clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6657        }
6658        if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6659            clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6660        }
6661        if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6662            clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6663        }
6664
6665        clone.check_counts(Some(count), None)?;
6666
6667        Ok((clone, count))
6668    }
6669
6670    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .bim file.
6671    ///
6672    /// # Example
6673    ///
6674    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6675    /// Do not skip any fields.
6676    /// ```
6677    /// use ndarray as nd;
6678    /// use std::collections::HashSet;
6679    /// use bed_reader::{Metadata, MetadataFields, sample_file};
6680    ///
6681    /// let skip_set = HashSet::<MetadataFields>::new();
6682    /// let metadata_empty = Metadata::new();
6683    /// let (metadata_fam, iid_count) =
6684    ///     metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6685    /// let (metadata_bim, sid_count) =
6686    ///     metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6687    /// assert_eq!(iid_count, 3);
6688    /// assert_eq!(sid_count, 4);
6689    /// println!("{0:?}", metadata_bim.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6690    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6691    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6692    /// # use bed_reader::BedErrorPlus;
6693    /// # Ok::<(), Box<BedErrorPlus>>(())
6694    /// ```
6695    #[anyinput]
6696    pub fn read_bim(
6697        &self,
6698        path: AnyPath,
6699        skip_set: &HashSet<MetadataFields>,
6700    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6701        let mut field_vec: Vec<usize> = Vec::new();
6702        if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6703            field_vec.push(0);
6704        }
6705        if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6706            field_vec.push(1);
6707        }
6708
6709        if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6710            field_vec.push(2);
6711        }
6712        if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6713            field_vec.push(3);
6714        }
6715        if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6716            field_vec.push(4);
6717        }
6718        if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6719            field_vec.push(5);
6720        }
6721
6722        let mut clone = self.clone();
6723        let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, false, path)?;
6724
6725        // unwraps are safe because we pop once for every push
6726        if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6727            clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6728        }
6729        if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6730            clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6731        }
6732        if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6733            let vec = vec_of_vec.pop().unwrap();
6734            let array = vec
6735                .iter()
6736                .map(|s| s.parse::<i32>())
6737                .collect::<Result<nd::Array1<i32>, _>>()?;
6738            clone.bp_position = Some(Rc::new(array));
6739        }
6740        if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6741            let vec = vec_of_vec.pop().unwrap();
6742            let array = vec
6743                .iter()
6744                .map(|s| s.parse::<f32>())
6745                .collect::<Result<nd::Array1<f32>, _>>()?;
6746            clone.cm_position = Some(Rc::new(array));
6747        }
6748
6749        if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6750            clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6751        }
6752        if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6753            clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6754        }
6755
6756        clone.check_counts(None, Some(count))?;
6757
6758        Ok((clone, count))
6759    }
6760
6761    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6762    /// fields with a .bim file in the cloud.
6763    ///
6764    /// # Example
6765    ///
6766    /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6767    /// Do not skip any fields.
6768    /// ```
6769    /// use ndarray as nd;
6770    /// use std::collections::HashSet;
6771    /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6772    ///
6773    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6774    /// let skip_set = HashSet::<MetadataFields>::new();
6775    /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6776    /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6777    /// let metadata_empty = Metadata::new();
6778    /// let (metadata_fam, iid_count) =
6779    ///     metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6780    /// let (metadata_bim, sid_count) =
6781    ///     metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6782    /// assert_eq!(iid_count, 3);
6783    /// assert_eq!(sid_count, 4);
6784    /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6785    /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6786    /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6787    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6788    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6789    /// ```
6790    pub async fn read_bim_cloud(
6791        &self,
6792        cloud_file: &CloudFile,
6793        skip_set: &HashSet<MetadataFields>,
6794    ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6795        let mut field_vec: Vec<usize> = Vec::new();
6796        if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6797            field_vec.push(0);
6798        }
6799        if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6800            field_vec.push(1);
6801        }
6802
6803        if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6804            field_vec.push(2);
6805        }
6806        if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6807            field_vec.push(3);
6808        }
6809        if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6810            field_vec.push(4);
6811        }
6812        if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6813            field_vec.push(5);
6814        }
6815
6816        let mut clone = self.clone();
6817        let (mut vec_of_vec, count) = self
6818            .read_fam_or_bim_cloud(&field_vec, false, cloud_file)
6819            .await?;
6820
6821        // unwraps are safe because we pop once for every push
6822        if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6823            clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6824        }
6825        if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6826            clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6827        }
6828        if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6829            let vec = vec_of_vec.pop().unwrap();
6830            let array = vec
6831                .iter()
6832                .map(|s| s.parse::<i32>())
6833                .collect::<Result<nd::Array1<i32>, _>>()?;
6834            clone.bp_position = Some(Rc::new(array));
6835        }
6836        if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6837            let vec = vec_of_vec.pop().unwrap();
6838            let array = vec
6839                .iter()
6840                .map(|s| s.parse::<f32>())
6841                .collect::<Result<nd::Array1<f32>, _>>()?;
6842            clone.cm_position = Some(Rc::new(array));
6843        }
6844
6845        if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6846            clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6847        }
6848        if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6849            clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6850        }
6851
6852        clone.check_counts(None, Some(count))?;
6853
6854        Ok((clone, count))
6855    }
6856
6857    #[anyinput]
6858    fn read_fam_or_bim(
6859        field_vec: &[usize],
6860        is_split_whitespace: bool,
6861        path: AnyPath,
6862    ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6863        let mut vec_of_vec = vec![vec![]; field_vec.len()];
6864
6865        let file = File::open(path)?;
6866
6867        let reader = BufReader::new(file);
6868        let mut count = 0;
6869        for line in reader.lines() {
6870            let line = line?;
6871            count += 1;
6872
6873            let fields: Vec<&str> = if is_split_whitespace {
6874                line.split_whitespace().collect()
6875            } else {
6876                line.split('\t').collect()
6877            };
6878
6879            if fields.len() != 6 {
6880                Err(BedError::MetadataFieldCount(
6881                    6,
6882                    fields.len(),
6883                    path_ref_to_string(path),
6884                ))?;
6885            }
6886
6887            let mut of_interest_count = 0;
6888            for (field_index, field) in fields.iter().enumerate() {
6889                if field_vec.contains(&field_index) {
6890                    vec_of_vec[of_interest_count].push((*field).to_string());
6891                    of_interest_count += 1;
6892                }
6893            }
6894        }
6895
6896        Ok((vec_of_vec, count))
6897    }
6898
6899    async fn read_fam_or_bim_cloud(
6900        &self,
6901        field_vec: &[usize],
6902        is_split_whitespace: bool,
6903        cloud_file: &CloudFile,
6904    ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6905        let mut vec_of_vec = vec![vec![]; field_vec.len()];
6906        let mut count = 0;
6907
6908        let mut line_chunks = cloud_file.stream_line_chunks().await?;
6909        while let Some(line_chunk) = line_chunks.next().await {
6910            let line_chunk = line_chunk.map_err(CloudFileError::ObjectStoreError)?;
6911            let lines = std::str::from_utf8(&line_chunk)?.lines();
6912            for line in lines {
6913                count += 1;
6914
6915                let fields: Vec<&str> = if is_split_whitespace {
6916                    line.split_whitespace().collect()
6917                } else {
6918                    line.split('\t').collect()
6919                };
6920
6921                if fields.len() != 6 {
6922                    Err(BedError::MetadataFieldCount(
6923                        6,
6924                        fields.len(),
6925                        cloud_file.to_string(),
6926                    ))?;
6927                }
6928
6929                let mut of_interest_count = 0;
6930                for (field_index, field) in fields.iter().enumerate() {
6931                    if field_vec.contains(&field_index) {
6932                        vec_of_vec[of_interest_count].push((*field).to_string());
6933                        of_interest_count += 1;
6934                    }
6935                }
6936            }
6937        }
6938
6939        Ok((vec_of_vec, count))
6940    }
6941
6942    fn is_some_fam(&self) -> bool {
6943        self.fid.is_some()
6944            && self.iid.is_some()
6945            && self.father.is_some()
6946            && self.mother.is_some()
6947            && self.sex.is_some()
6948            && self.pheno.is_some()
6949    }
6950    fn is_some_bim(&self) -> bool {
6951        self.chromosome.is_some()
6952            && self.sid.is_some()
6953            && self.cm_position.is_some()
6954            && self.bp_position.is_some()
6955            && self.allele_1.is_some()
6956            && self.allele_2.is_some()
6957    }
6958
6959    /// Write the metadata related to individuals/samples to a .fam file.
6960    ///
6961    /// If any of the .fam metadata is not present, the function will return an error.
6962    ///
6963    /// # Example
6964    ///
6965    /// Create metadata with iid and sid arrays, then fill in the other
6966    /// fields with default arrays, finally write the .fam information
6967    /// to a file.
6968    ///```
6969    /// use ndarray as nd;
6970    /// use std::collections::HashSet;
6971    /// use bed_reader::Metadata;
6972    ///
6973    /// let metadata0 = Metadata::builder()
6974    ///     .iid(["i1", "i2", "i3"])
6975    ///     .sid(["s1", "s2", "s3", "s4"])
6976    ///     .build()?;
6977    /// let metadata_filled = metadata0.fill(3, 4)?;
6978    /// let temp_out = temp_testdir::TempDir::default();
6979    /// let output_file = temp_out.join("no_bed.fam");
6980    /// metadata_filled.write_fam(output_file)?;
6981    /// # use bed_reader::BedErrorPlus;
6982    /// # Ok::<(), Box<BedErrorPlus>>(())
6983    /// ```
6984    #[anyinput]
6985    pub fn write_fam(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
6986        let file = File::create(path)?;
6987        let mut writer = BufWriter::new(file);
6988        let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
6989
6990        if !self.is_some_fam() {
6991            Err(BedError::MetadataMissingForWrite("fam".to_string()))?;
6992        }
6993
6994        // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
6995        // unwrap always works because we checked that all the fields are present
6996        // 2nd as as_ref turns &Rc<Array> into &Array
6997        nd::azip!((fid in self.fid.as_ref().unwrap().as_ref(),
6998                   iid in self.iid.as_ref().unwrap().as_ref(),
6999                   father in self.father.as_ref().unwrap().as_ref(),
7000                   mother in self.mother.as_ref().unwrap().as_ref(),
7001                   sex in self.sex.as_ref().unwrap().as_ref(),
7002                   pheno in self.pheno.as_ref().unwrap().as_ref(),
7003                )
7004        {
7005            if result.is_ok() {
7006                if let Err(e) = writeln!(
7007                writer,
7008                "{} {} {} {} {} {}",
7009                *fid, *iid, *father, *mother, *sex, *pheno
7010            )
7011            {
7012            result = Err(Box::new(BedErrorPlus::IOError(e)));
7013            }
7014        }});
7015        result?;
7016
7017        Ok(())
7018    }
7019
7020    /// Write the metadata related to SNPs/variants to a .bim file.
7021    ///
7022    /// If any of the .bim metadata is not present, the function will return an error.
7023    ///
7024    /// # Example
7025    ///
7026    /// Create metadata with iid and sid arrays, then fill in the other
7027    /// fields with default arrays, finally write the .bim information
7028    /// to a file.
7029    ///```
7030    /// use ndarray as nd;
7031    /// use std::collections::HashSet;
7032    /// use bed_reader::Metadata;
7033    ///
7034    /// let metadata0 = Metadata::builder()
7035    ///     .iid(["i1", "i2", "i3"])
7036    ///     .sid(["s1", "s2", "s3", "s4"])
7037    ///     .build()?;
7038    /// let metadata_filled = metadata0.fill(3, 4)?;
7039    /// let temp_out = temp_testdir::TempDir::default();
7040    /// let output_file = temp_out.join("no_bed.bim");
7041    /// metadata_filled.write_bim(output_file)?;
7042    /// # use bed_reader::BedErrorPlus;
7043    /// # Ok::<(), Box<BedErrorPlus>>(())
7044    /// ```
7045    #[anyinput]
7046    pub fn write_bim(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
7047        let file = File::create(path)?;
7048        let mut writer = BufWriter::new(file);
7049        let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
7050
7051        if !self.is_some_bim() {
7052            Err(BedError::MetadataMissingForWrite("bim".to_string()))?;
7053        }
7054
7055        // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
7056        // unwrap always works because we checked that all the fields are present
7057        // 2nd as as_ref turns &Rc<Array> into &Array
7058        nd::azip!((
7059            chromosome in self.chromosome.as_ref().unwrap().as_ref(),
7060            sid in self.sid.as_ref().unwrap().as_ref(),
7061            cm_position in self.cm_position.as_ref().unwrap().as_ref(),
7062            bp_position in self.bp_position.as_ref().unwrap().as_ref(),
7063            allele_1 in self.allele_1.as_ref().unwrap().as_ref(),
7064            allele_2 in self.allele_2.as_ref().unwrap().as_ref(),
7065                )
7066        {
7067            if result.is_ok() {
7068                if let Err(e) = writeln!(
7069                writer,
7070                "{}\t{}\t{}\t{}\t{}\t{}",
7071                *chromosome, *sid, *cm_position, *bp_position, *allele_1, *allele_2
7072                )
7073                {
7074                result = Err(Box::new(BedErrorPlus::IOError(e)));
7075                }
7076            }
7077        });
7078        result?;
7079
7080        Ok(())
7081    }
7082
7083    /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with default values.
7084    ///
7085    /// # Example
7086    /// ```
7087    /// use ndarray as nd;
7088    /// use std::collections::HashSet;
7089    /// use bed_reader::{Metadata, MetadataFields};
7090    ///
7091    /// let metadata0 = Metadata::builder()
7092    ///     .iid(["i1", "i2", "i3"])
7093    ///     .sid(["s1", "s2", "s3", "s4"])
7094    ///     .build()?;
7095    /// let metadata_filled = metadata0.fill(3, 4)?;
7096    ///
7097    /// println!("{0:?}", metadata_filled.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
7098    /// println!("{0:?}", metadata_filled.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
7099    /// println!("{0:?}", metadata_filled.chromosome()); // Outputs optional ndarray Some(["0", "0", "0", "0"]...)
7100    /// # use bed_reader::BedErrorPlus;
7101    /// # Ok::<(), Box<BedErrorPlus>>(())
7102    /// ```
7103    pub fn fill(&self, iid_count: usize, sid_count: usize) -> Result<Metadata, Box<BedErrorPlus>> {
7104        let mut metadata = self.clone();
7105
7106        compute_field("fid", &mut metadata.fid, iid_count, |_| "0".to_string())?;
7107        compute_field("iid", &mut metadata.iid, iid_count, |i| {
7108            format!("iid{}", i + 1)
7109        })?;
7110        compute_field("father", &mut metadata.father, iid_count, |_| {
7111            "0".to_string()
7112        })?;
7113        compute_field("mother", &mut metadata.mother, iid_count, |_| {
7114            "0".to_string()
7115        })?;
7116        compute_field("sex", &mut metadata.sex, iid_count, |_| 0)?;
7117        compute_field("pheno", &mut metadata.pheno, iid_count, |_| "0".to_string())?;
7118        compute_field("chromosome", &mut metadata.chromosome, sid_count, |_| {
7119            "0".to_string()
7120        })?;
7121        compute_field("sid", &mut metadata.sid, sid_count, |i| {
7122            format!("sid{}", i + 1)
7123        })?;
7124        compute_field("cm_position", &mut metadata.cm_position, sid_count, |_| 0.0)?;
7125        compute_field("bp_position", &mut metadata.bp_position, sid_count, |_| 0)?;
7126        compute_field("allele_1", &mut metadata.allele_1, sid_count, |_| {
7127            "A1".to_string()
7128        })?;
7129        compute_field("allele_2", &mut metadata.allele_2, sid_count, |_| {
7130            "A2".to_string()
7131        })?;
7132
7133        Ok(metadata)
7134    }
7135
7136    #[anyinput]
7137    fn set_fid(&mut self, fid: AnyIter<AnyString>) -> &Self {
7138        self.fid = Some(Rc::new(
7139            fid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7140        ));
7141        self
7142    }
7143
7144    #[anyinput]
7145    fn set_iid(&mut self, iid: AnyIter<AnyString>) -> &Self {
7146        self.iid = Some(Rc::new(
7147            iid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7148        ));
7149        self
7150    }
7151
7152    #[anyinput]
7153    fn set_father(&mut self, father: AnyIter<AnyString>) -> &Self {
7154        self.father = Some(Rc::new(father.map(|s| s.as_ref().to_owned()).collect()));
7155        self
7156    }
7157
7158    #[anyinput]
7159    fn set_mother(&mut self, mother: AnyIter<AnyString>) -> &Self {
7160        self.mother = Some(Rc::new(mother.map(|s| s.as_ref().to_owned()).collect()));
7161        self
7162    }
7163
7164    #[anyinput]
7165    fn set_sex(&mut self, sex: AnyIter<i32>) -> &Self {
7166        self.sex = Some(Rc::new(sex.collect()));
7167        self
7168    }
7169
7170    #[anyinput]
7171    fn set_pheno(&mut self, pheno: AnyIter<AnyString>) -> &Self {
7172        self.pheno = Some(Rc::new(pheno.map(|s| s.as_ref().to_owned()).collect()));
7173        self
7174    }
7175
7176    #[anyinput]
7177    fn set_chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &Self {
7178        self.chromosome = Some(Rc::new(chromosome.map(|s| s.as_ref().to_owned()).collect()));
7179        self
7180    }
7181
7182    #[anyinput]
7183    fn set_sid(&mut self, sid: AnyIter<AnyString>) -> &Self {
7184        self.sid = Some(Rc::new(sid.map(|s| s.as_ref().to_owned()).collect()));
7185        self
7186    }
7187
7188    #[anyinput]
7189    fn set_cm_position(&mut self, cm_position: AnyIter<f32>) -> &Self {
7190        self.cm_position = Some(Rc::new(cm_position.into_iter().collect()));
7191        self
7192    }
7193
7194    #[anyinput]
7195    fn set_bp_position(&mut self, bp_position: AnyIter<i32>) -> &Self {
7196        self.bp_position = Some(Rc::new(bp_position.into_iter().collect()));
7197        self
7198    }
7199
7200    #[anyinput]
7201    fn set_allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &Self {
7202        self.allele_1 = Some(Rc::new(allele_1.map(|s| s.as_ref().to_owned()).collect()));
7203        self
7204    }
7205
7206    #[anyinput]
7207    fn set_allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &Self {
7208        self.allele_2 = Some(Rc::new(allele_2.map(|s| s.as_ref().to_owned()).collect()));
7209        self
7210    }
7211}
7212
7213#[allow(clippy::option_option)]
7214fn set_field<T>(
7215    field1: Option<&Rc<nd::Array1<T>>>,
7216    field2: &mut Option<Option<Rc<nd::Array1<T>>>>,
7217) {
7218    if let Some(array) = field1 {
7219        *field2 = Some(Some(array.clone()));
7220    }
7221}
7222
7223fn option_rc_as_ref<T>(field: Option<&Rc<nd::Array1<T>>>) -> Option<&nd::Array1<T>> {
7224    match field {
7225        Some(array) => Some(array.as_ref()),
7226        None => None,
7227    }
7228}
7229
7230#[allow(dead_code)]
7231fn matrix_subset_no_alloc<
7232    TIn: Copy + Default + Debug + Sync + Send + Sync + Sized,
7233    TOut: Copy + Default + Debug + Sync + Send + Sync + From<TIn>,
7234>(
7235    in_val: &nd::ArrayView3<'_, TIn>,
7236    iid_index: &[usize],
7237    sid_index: &[usize],
7238    out_val: &mut nd::ArrayViewMut3<'_, TOut>,
7239) -> Result<(), Box<BedErrorPlus>> {
7240    let out_iid_count = iid_index.len();
7241    let out_sid_count = sid_index.len();
7242    let did_count = in_val.dim().2;
7243
7244    if (out_iid_count, out_sid_count, did_count) != out_val.dim() {
7245        Err(BedError::SubsetMismatch(
7246            out_iid_count,
7247            out_sid_count,
7248            out_val.dim().0,
7249            out_val.dim().1,
7250        ))?;
7251    }
7252
7253    // If output is F-order (or in general if iid stride is no more than sid_stride)
7254    if out_val.stride_of(nd::Axis(0)) <= out_val.stride_of(nd::Axis(1)) {
7255        // (No error are possible in the par_azip, so don't have to collect and check them)
7256        nd::par_azip!((mut out_col in out_val.axis_iter_mut(nd::Axis(1)),
7257                    in_sid_i_pr in sid_index) {
7258            let in_col = in_val.index_axis(nd::Axis(1), *in_sid_i_pr);
7259            for did_i in 0..did_count
7260            {
7261                for (out_iid_i, in_iid_i_ptr) in iid_index.iter().enumerate() {
7262                    out_col[(out_iid_i,did_i)] = in_col[(*in_iid_i_ptr,did_i)].into();
7263                }
7264            }
7265        });
7266        Ok(())
7267    } else {
7268        //If output is C-order, transpose input and output and recurse
7269        let in_val_t = in_val.view().permuted_axes([1, 0, 2]);
7270        let mut out_val_t = out_val.view_mut().permuted_axes([1, 0, 2]);
7271        matrix_subset_no_alloc(&in_val_t, sid_index, iid_index, &mut out_val_t)
7272    }
7273}
7274
7275#[fetch_data::ctor]
7276static STATIC_FETCH_DATA: FetchData = FetchData::new(
7277    include_str!("../bed_reader/tests/registry.txt"),
7278    "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/",
7279    "BED_READER_DATA_DIR",
7280    "github.io",
7281    "fastlmm",
7282    "bed-reader",
7283);
7284
7285/// Returns the local path to a sample .bed file. If necessary, the file will be downloaded.
7286///
7287/// The .fam and .bim files will also be downloaded, if they are not already present.
7288/// SHA256 hashes are used to verify that the files are correct.
7289/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7290/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7291#[anyinput]
7292pub fn sample_bed_file(bed_path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7293    let mut path_list: Vec<PathBuf> = Vec::new();
7294    for ext in &["bed", "bim", "fam"] {
7295        let file_path = bed_path.with_extension(ext);
7296        path_list.push(file_path);
7297    }
7298
7299    let vec = sample_files(path_list)?;
7300    assert!(vec.len() == 3);
7301    Ok(vec[0].clone())
7302}
7303
7304/// Returns the local path to a sample file. If necessary, the file will be downloaded.
7305///
7306/// A SHA256 hash is used to verify that the file is correct.
7307/// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7308/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7309#[anyinput]
7310pub fn sample_file(path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7311    Ok(STATIC_FETCH_DATA
7312        .fetch_file(path)
7313        .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7314}
7315
7316/// Returns the local paths to a list of files. If necessary, the files will be downloaded.
7317///
7318/// SHA256 hashes are used to verify that the files are correct.
7319/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7320/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7321#[anyinput]
7322pub fn sample_files(path_list: AnyIter<AnyPath>) -> Result<Vec<PathBuf>, Box<BedErrorPlus>>
7323where
7324{
7325    Ok(STATIC_FETCH_DATA
7326        .fetch_files(path_list)
7327        .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7328}
7329
7330/// An empty set of cloud options
7331///
7332/// # Example
7333/// ```
7334/// use cloud_file::{EMPTY_OPTIONS, CloudFile};
7335///
7336/// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
7337/// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/plink_sim_10s_100v_10pmiss.bed";
7338/// let cloud_file = CloudFile::new_with_options(url, EMPTY_OPTIONS)?;
7339/// assert_eq!(cloud_file.read_file_size().await?, 303);
7340/// # Ok::<(), BedErrorPlus>(())}).unwrap();
7341/// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
7342/// ```
7343pub const EMPTY_OPTIONS: [(&str, String); 0] = [];
7344
7345#[cfg(feature = "tokio")]
7346pub mod supplemental_document_options {
7347    #![doc = include_str!("supplemental_documents/options_etc.md")]
7348}
7349
7350#[cfg(feature = "tokio")]
7351pub mod supplemental_document_cloud_urls {
7352    #![doc = include_str!("supplemental_documents/cloud_urls_etc.md")]
7353}