bed_reader/lib.rs
1#![warn(missing_docs)]
2#![warn(clippy::pedantic)]
3#![allow(
4 clippy::missing_panics_doc, // LATER: add panics docs
5 clippy::missing_errors_doc, // LATER: add errors docs
6 clippy::similar_names,
7 clippy::cast_possible_truncation,
8 clippy::cast_possible_wrap,
9 clippy::cast_sign_loss,
10 clippy::cast_lossless
11)]
12// Inspired by C++ version by Chris Widmer and Carl Kadie
13
14// See: https://towardsdatascience.com/nine-rules-for-writing-python-extensions-in-rust-d35ea3a4ec29?sk=f8d808d5f414154fdb811e4137011437
15// for an article on how this project uses Rust to create a Python extension.
16
17// For Rust API tips see https://rust-lang.github.io/api-guidelines/necessities.html
18#![doc = include_str!("../README-rust.md")]
19//! ## Main Functions
20//!
21//! | Function | Description |
22//! | -------- | ----------- |
23//! | [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) | Open a local PLINK .bed file for reading genotype data and metadata. |
24//! | [`BedCloud::new`](struct.BedCloud.html#method.new), [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),<br> [`BedCloud::builder`](struct.BedCloud.html#method.builder), [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options),<br> [`BedCloud::from_cloud_file`](struct.BedCloud.html#method.from_cloud_file), [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file) | Open a cloud PLINK .bed file for reading genotype data and metadata. |
25//! | [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) | Read genotype data from a local or cloud file. Supports indexing and options. |
26//! | [`WriteOptions::builder`](struct.WriteOptions.html#method.builder) | Write values to a local file in PLINK .bed format. Supports metadata and options. |
27//!
28//! ### `Bed` Metadata Methods
29//!
30//! After using [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) to open a PLINK .bed file for reading, use
31//! these methods to see metadata.
32//!
33//! | Method | Description |
34//! | -------- | ----------- |
35//! | [`iid_count`](struct.Bed.html#method.iid_count) | Number of individuals (samples) |
36//! | [`sid_count`](struct.Bed.html#method.sid_count) | Number of SNPs (variants) |
37//! | [`dim`](struct.Bed.html#method.dim) | Number of individuals and SNPs |
38//! | [`fid`](struct.Bed.html#method.fid) | Family id of each of individual (sample) |
39//! | [`iid`](struct.Bed.html#method.iid) | Individual id of each of individual (sample) |
40//! | [`father`](struct.Bed.html#method.father) | Father id of each of individual (sample) |
41//! | [`mother`](struct.Bed.html#method.mother) | Mother id of each of individual (sample) |
42//! | [`sex`](struct.Bed.html#method.sex) | Sex of each individual (sample) |
43//! | [`pheno`](struct.Bed.html#method.pheno) | A phenotype for each individual (seldom used) |
44//! | [`chromosome`](struct.Bed.html#method.chromosome) | Chromosome of each SNP (variant) |
45//! | [`sid`](struct.Bed.html#method.sid) | SNP Id of each SNP (variant) |
46//! | [`cm_position`](struct.Bed.html#method.cm_position) | Centimorgan position of each SNP (variant) |
47//! | [`bp_position`](struct.Bed.html#method.bp_position) | Base-pair position of each SNP (variant) |
48//! | [`allele_1`](struct.Bed.html#method.allele_1) | First allele of each SNP (variant) |
49//! | [`allele_2`](struct.Bed.html#method.allele_2) | Second allele of each SNP (variant) |
50//! | [`metadata`](struct.Bed.html#method.metadata) | All the metadata returned as a [`struct.Metadata`](struct.Metadata.html) |
51//!
52//! ### `ReadOptions`
53//!
54//! When using [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) to read genotype data, use these options to
55//! specify a desired numeric type,
56//! which individuals (samples) to read, which SNPs (variants) to read, etc.
57//!
58//! | Option | Description |
59//! | -------- | ----------- |
60//! | [`i8`](struct.ReadOptionsBuilder.html#method.i8) | Read values as i8 |
61//! | [`f32`](struct.ReadOptionsBuilder.html#method.f32) | Read values as f32 |
62//! | [`f64`](struct.ReadOptionsBuilder.html#method.f64) | Read values as f64 |
63//! | [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) | Index of individuals (samples) to read (defaults to all)|
64//! | [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) | Index of SNPs (variants) to read (defaults to all) |
65//! | [`f`](struct.ReadOptionsBuilder.html#method.f) | Order of the output array, Fortran-style (default) |
66//! | [`c`](struct.ReadOptionsBuilder.html#method.c) | Order of the output array, C-style |
67//! | [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) | Is order of the output array Fortran-style? (defaults to true)|
68//! | [`missing_value`](struct.ReadOptionsBuilder.html#method.missing_value) | Value to use for missing values (defaults to -127 or NaN) |
69//! | [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) | Count the number allele 1 (default) |
70//! | [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2) | Count the number allele 2 |
71//! | [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) | Is allele 1 counted? (defaults to true) |
72//! | [`num_threads`](struct.ReadOptionsBuilder.html#method.num_threads) | Number of threads to use (defaults to all processors) |
73//! | [`max_concurrent_requests`](struct.ReadOptionsBuilder.html#method.max_concurrent_requests) | Maximum number of concurrent async requests (defaults to 10) -- Used by [`BedCloud`](struct.BedCloud.html). |
74//! | [`max_chunk_bytes`](struct.ReadOptionsBuilder.html#method.max_chunk_bytes) | Maximum chunk size of async requests (defaults to 8_000_000 bytes) -- Used by [`BedCloud`](struct.BedCloud.html). |
75//!
76//! ### [`Index`](enum.Index.html) Expressions
77//!
78//! Select which individuals (samples) and SNPs (variants) to read by using these
79//! [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) and/or
80//! [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) expressions.
81//!
82//! | Example | Type | Description |
83//! | -------- | --- | ----------- |
84//! | nothing | `()` | All |
85//! | `2` | `isize` | Index position 2 |
86//! | `-1` | `isize` | Last index position |
87//! | `vec![0, 10, -2]` | `Vec<isize>` | Index positions 0, 10, and 2nd from last |
88//! | `[0, 10, -2]` | `[isize]` and `[isize;n]` | Index positions 0, 10, and 2nd from last |
89//! | `ndarray::array![0, 10, -2]` | `ndarray::Array1<isize>` | Index positions 0, 10, and 2nd from last |
90//! | `10..20` | `Range<usize>` | Index positions 10 (inclusive) to 20 (exclusive). *Note: Rust ranges don't support negatives* |
91//! | `..=19` | `RangeInclusive<usize>` | Index positions 0 (inclusive) to 19 (inclusive). *Note: Rust ranges don't support negatives* |
92//! | *any Rust ranges* | `Range*<usize>` | *Note: Rust ranges don't support negatives* |
93//! | `s![10..20;2]` | `ndarray::SliceInfo1` | Index positions 10 (inclusive) to 20 (exclusive) in steps of 2 |
94//! | `s![-20..-10;-2]` | `ndarray::SliceInfo1` | 10th from last (exclusive) to 20th from last (inclusive), in steps of -2 |
95//! | `vec![true, false, true]` | `Vec<bool>`| Index positions 0 and 2. |
96//! | `[true, false, true]` | `[bool]` and `[bool;n]`| Index positions 0 and 2.|
97//! | `ndarray::array![true, false, true]` | `ndarray::Array1<bool>`| Index positions 0 and 2.|
98//!
99//! ### Environment Variables
100//!
101//! * `BED_READER_NUM_THREADS`
102//! * `NUM_THREADS`
103//!
104//! If [`ReadOptionsBuilder::num_threads`](struct.ReadOptionsBuilder.html#method.num_threads)
105//! or [`WriteOptionsBuilder::num_threads`](struct.WriteOptionsBuilder.html#method.num_threads) is not specified,
106//! the number of threads to use is determined by these environment variable (in order of priority):
107//! If neither of these environment variables are set, all processors are used.
108//!
109//! * `BED_READER_DATA_DIR`
110//!
111//! Any requested sample file will be downloaded to this directory. If the environment variable is not set,
112//! a cache folder, appropriate to the OS, will be used.
113
114mod python_module;
115mod tests;
116use anyinput::anyinput;
117pub use bed_cloud::{sample_bed_url, sample_url, sample_urls, BedCloud, BedCloudBuilder};
118use byteorder::{LittleEndian, ReadBytesExt};
119pub use cloud_file::{CloudFile, CloudFileError};
120use core::fmt::Debug;
121use derive_builder::Builder;
122use dpc_pariter::{scope, IteratorExt};
123use fetch_data::FetchData;
124use futures_util::StreamExt;
125use nd::ShapeBuilder;
126use ndarray as nd;
127use num_traits::{abs, Float, FromPrimitive, Signed, ToPrimitive};
128use rayon::iter::{IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator};
129use rayon::{iter::ParallelBridge, ThreadPoolBuildError};
130use statrs::distribution::{Beta, Continuous};
131use std::cmp::Ordering;
132use std::collections::HashSet;
133use std::fs::{self};
134use std::io::Read;
135use std::io::Seek;
136use std::io::SeekFrom;
137use std::io::Write;
138use std::num::{ParseFloatError, ParseIntError};
139use std::ops::AddAssign;
140use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive};
141use std::rc::Rc;
142use std::str::Utf8Error;
143use std::{
144 env,
145 fs::File,
146 io::{BufRead, BufReader, BufWriter},
147 ops::RangeFull,
148 path::{Path, PathBuf},
149};
150use thiserror::Error;
151mod bed_cloud;
152
153const BED_FILE_MAGIC1: u8 = 0x6C; // 0b01101100 or 'l' (lowercase 'L')
154const BED_FILE_MAGIC2: u8 = 0x1B; // 0b00011011 or <esc>
155const CB_HEADER_U64: u64 = 3;
156const CB_HEADER_USIZE: usize = 3;
157
158// About ndarray
159// https://docs.rs/ndarray/0.14.0/ndarray/parallel/index.html
160// https://rust-lang-nursery.github.io/rust-cookbook/concurrency/parallel.html
161// https://github.com/rust-ndarray/ndarray/blob/master/README-quick-start.md
162// https://datacrayon.com/posts/programming/rust-notebooks/multidimensional-arrays-and-operations-with-ndarray
163// https://docs.rs/ndarray/0.14.0/ndarray/doc/ndarray_for_numpy_users/index.html
164// https://docs.rs/ndarray-npy
165// https://rust-lang-nursery.github.io/rust-cookbook/science/mathematics/linear_algebra.html
166
167/// All possible errors returned by this library and the libraries it depends on.
168// Based on `<https://nick.groenen.me/posts/rust-error-handling/#the-library-error-type>`
169#[derive(Error, Debug)]
170pub enum BedErrorPlus {
171 #[allow(missing_docs)]
172 #[error(transparent)]
173 BedError(#[from] BedError),
174
175 #[allow(missing_docs)]
176 #[error(transparent)]
177 IOError(#[from] std::io::Error),
178
179 #[allow(missing_docs)]
180 #[error(transparent)]
181 ThreadPoolError(#[from] ThreadPoolBuildError),
182
183 #[allow(missing_docs)]
184 #[error(transparent)]
185 ParseIntError(#[from] ParseIntError),
186
187 #[allow(missing_docs)]
188 #[error(transparent)]
189 ParseFloatError(#[from] ParseFloatError),
190
191 #[allow(missing_docs)]
192 #[error(transparent)]
193 CloudFileError(#[from] CloudFileError),
194
195 #[allow(missing_docs)]
196 #[error(transparent)]
197 Utf8Error(#[from] Utf8Error),
198}
199// https://docs.rs/thiserror/1.0.23/thiserror/
200
201/// All errors specific to this library.
202#[derive(Error, Debug, Clone)]
203pub enum BedError {
204 #[allow(missing_docs)]
205 #[error("Ill-formed BED file. BED file header is incorrect or length is wrong. '{0}'")]
206 IllFormed(String),
207
208 #[allow(missing_docs)]
209 #[error(
210 "Ill-formed BED file. BED file header is incorrect. Expected mode to be 0 or 1. '{0}'"
211 )]
212 BadMode(String),
213
214 #[allow(missing_docs)]
215 #[error("Attempt to write illegal value to BED file. Only 0,1,2,missing allowed. '{0}'")]
216 BadValue(String),
217
218 #[allow(missing_docs)]
219 #[error("Multithreading resulted in panic(s)")]
220 PanickedThread(),
221
222 #[allow(missing_docs)]
223 #[error("No individual observed for the SNP.")]
224 NoIndividuals,
225
226 #[allow(missing_docs)]
227 #[error("Illegal SNP mean.")]
228 IllegalSnpMean,
229
230 #[allow(missing_docs)]
231 #[error("Index to individual larger than the number of individuals. (Index value {0})")]
232 IidIndexTooBig(isize),
233
234 #[allow(missing_docs)]
235 #[error("Index to SNP larger than the number of SNPs. (Index value {0})")]
236 SidIndexTooBig(isize),
237
238 #[allow(missing_docs)]
239 #[error("Length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
240 IndexMismatch(usize, usize, usize, usize),
241
242 #[allow(missing_docs)]
243 #[error("Indexes ({0},{1}) too big for files")]
244 IndexesTooBigForFiles(usize, usize),
245
246 #[allow(missing_docs)]
247 #[error("Subset: length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
248 SubsetMismatch(usize, usize, usize, usize),
249
250 #[allow(missing_docs)]
251 #[error("Cannot convert beta values to/from float 64")]
252 CannotConvertBetaToFromF64,
253
254 #[allow(missing_docs)]
255 #[error("Cannot create Beta Dist with given parameters ({0},{1})")]
256 CannotCreateBetaDist(f64, f64),
257
258 #[allow(missing_docs)]
259 #[error("Cannot use skipped metadata '{0}'")]
260 CannotUseSkippedMetadata(String),
261
262 #[allow(missing_docs)]
263 #[error("Index starts at {0} but ends at {1}")]
264 StartGreaterThanEnd(usize, usize),
265
266 #[allow(missing_docs)]
267 #[error("Step of zero not allowed")]
268 StepZero,
269
270 #[allow(missing_docs)]
271 #[error("Index starts at {0} but count is {1}")]
272 StartGreaterThanCount(usize, usize),
273
274 #[allow(missing_docs)]
275 #[error("Index ends at {0} but count is {1}")]
276 EndGreaterThanCount(usize, usize),
277
278 #[allow(missing_docs)]
279 #[error("Adding new axis not allowed")]
280 NewAxis,
281
282 #[allow(missing_docs)]
283 #[error("Expect 1-D NDArray SliceInfo")]
284 NdSliceInfoNot1D,
285
286 #[allow(missing_docs)]
287 #[error("Expect {0} fields but find only {1} in '{2}'")]
288 MetadataFieldCount(usize, usize, String),
289
290 #[allow(missing_docs)]
291 #[error("{0}_count values of {1} and {2} are inconsistent")]
292 InconsistentCount(String, usize, usize),
293
294 #[allow(missing_docs)]
295 #[error("Expect bool arrays and vectors to be length {0}, not {1}")]
296 BoolArrayVectorWrongLength(usize, usize),
297
298 #[allow(missing_docs)]
299 #[error("Expect ndarray of shape ({0}, {1}), but found shape ({2}, {3})")]
300 InvalidShape(usize, usize, usize, usize),
301
302 #[allow(missing_docs)]
303 #[error("Can't write '{0}' metadata if some fields are None")]
304 MetadataMissingForWrite(String),
305
306 #[allow(missing_docs)]
307 #[error("Unknown or bad sample file '{0}'")]
308 UnknownOrBadSampleFile(String),
309
310 #[allow(missing_docs)]
311 #[error("The registry of sample files is invalid")]
312 SampleRegistryProblem(),
313
314 #[allow(missing_docs)]
315 #[error("Samples construction failed with error: {0}")]
316 SamplesConstructionFailed(String),
317
318 #[allow(missing_docs)]
319 #[error("Downloaded sample file not seen: {0}")]
320 DownloadedSampleFileNotSeen(String),
321
322 #[allow(missing_docs)]
323 #[error("Downloaded sample file has wrong hash: {0},expected: {1}, actual: {2}")]
324 DownloadedSampleFileWrongHash(String, String, String),
325
326 #[allow(missing_docs)]
327 #[error("Cannot create cache directory")]
328 CannotCreateCacheDir(),
329
330 #[allow(missing_docs)]
331 #[error("Cannot parse URL: '{0}': {1}")]
332 CannotParseUrl(String, String),
333
334 #[allow(missing_docs)]
335 #[error("UninitializedField: '{0}'")]
336 UninitializedField(&'static str),
337
338 #[allow(missing_docs)]
339 #[error("Sample fetch error: {0}")]
340 SampleFetch(String),
341
342 #[allow(missing_docs)]
343 #[error("Encoding destination buffer must be contiguous.")]
344 EncodingContiguous(),
345
346 #[allow(missing_docs)]
347 #[error("Encoding destination buffer have length {0}, (in_vector.len() - 1) // 4 + 1, but it has length {1}.")]
348 EncodingLength(usize, usize),
349}
350
351// Trait alias
352
353/// A trait alias, used internally, for the values of a .bed file, namely i8, f32, f64.
354pub trait BedVal:
355 Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
356{
357}
358impl<T> BedVal for T where
359 T: Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
360{
361}
362
363fn create_pool(num_threads: usize) -> Result<rayon::ThreadPool, Box<BedErrorPlus>> {
364 match rayon::ThreadPoolBuilder::new()
365 .num_threads(num_threads)
366 .build()
367 {
368 Err(e) => Err(Box::new(e.into())),
369 Ok(pool) => Ok(pool),
370 }
371}
372
373#[allow(clippy::too_many_arguments)]
374#[anyinput]
375fn read_no_alloc<TVal: BedVal>(
376 path: AnyPath,
377 iid_count: usize,
378 sid_count: usize,
379 is_a1_counted: bool,
380 iid_index: &[isize],
381 sid_index: &[isize],
382 missing_value: TVal,
383 num_threads: usize,
384 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
385) -> Result<(), Box<BedErrorPlus>> {
386 create_pool(num_threads)?.install(|| {
387 let (buf_reader, bytes_vector) = open_and_check(path)?;
388
389 match bytes_vector[2] {
390 0 => {
391 // We swap 'iid' and 'sid' and then reverse the axes.
392 let mut val_t = val.view_mut().reversed_axes();
393 internal_read_no_alloc(
394 buf_reader,
395 path,
396 sid_count,
397 iid_count,
398 is_a1_counted,
399 sid_index,
400 iid_index,
401 missing_value,
402 &mut val_t,
403 )
404 }
405 1 => internal_read_no_alloc(
406 buf_reader,
407 path,
408 iid_count,
409 sid_count,
410 is_a1_counted,
411 iid_index,
412 sid_index,
413 missing_value,
414 val,
415 ),
416 _ => Err(Box::new(BedError::BadMode(path_ref_to_string(path)).into())),
417 }
418 })?;
419 Ok(())
420}
421
422#[anyinput]
423fn path_ref_to_string(path: AnyPath) -> String {
424 PathBuf::from(path).display().to_string()
425}
426
427impl From<BedError> for Box<BedErrorPlus> {
428 fn from(err: BedError) -> Self {
429 Box::new(BedErrorPlus::BedError(err))
430 }
431}
432impl From<std::io::Error> for Box<BedErrorPlus> {
433 fn from(err: std::io::Error) -> Self {
434 Box::new(BedErrorPlus::IOError(err))
435 }
436}
437impl From<ThreadPoolBuildError> for Box<BedErrorPlus> {
438 fn from(err: ThreadPoolBuildError) -> Self {
439 Box::new(BedErrorPlus::ThreadPoolError(err))
440 }
441}
442impl From<ParseIntError> for Box<BedErrorPlus> {
443 fn from(err: ParseIntError) -> Self {
444 Box::new(BedErrorPlus::ParseIntError(err))
445 }
446}
447
448impl From<ParseFloatError> for Box<BedErrorPlus> {
449 fn from(err: ParseFloatError) -> Self {
450 Box::new(BedErrorPlus::ParseFloatError(err))
451 }
452}
453
454impl From<::derive_builder::UninitializedFieldError> for BedErrorPlus {
455 fn from(err: ::derive_builder::UninitializedFieldError) -> Self {
456 BedError::UninitializedField(err.field_name()).into()
457 }
458}
459
460impl From<CloudFileError> for Box<BedErrorPlus> {
461 fn from(err: CloudFileError) -> Self {
462 Box::new(BedErrorPlus::CloudFileError(err))
463 }
464}
465
466impl From<Utf8Error> for Box<BedErrorPlus> {
467 fn from(err: Utf8Error) -> Self {
468 Box::new(BedErrorPlus::Utf8Error(err))
469 }
470}
471
472#[anyinput]
473fn open_and_check(
474 path: AnyPath,
475) -> Result<(BufReader<File>, [u8; CB_HEADER_USIZE]), Box<BedErrorPlus>> {
476 let mut buf_reader = BufReader::new(File::open(path)?);
477 let mut bytes_array: [u8; CB_HEADER_USIZE] = [0; CB_HEADER_USIZE];
478 buf_reader.read_exact(&mut bytes_array)?;
479 if (BED_FILE_MAGIC1 != bytes_array[0]) || (BED_FILE_MAGIC2 != bytes_array[1]) {
480 Err(BedError::IllFormed(path_ref_to_string(path)))?;
481 }
482 Ok((buf_reader, bytes_array))
483}
484
485// trait Max {
486// fn max() -> Self;
487// }
488
489// impl Max for u8 {
490// fn max() -> u8 {
491// u8::MAX
492// }
493// }
494
495// impl Max for u64 {
496// fn max() -> u64 {
497// u64::MAX
498// }
499// }
500
501/// A trait alias, used internally, to provide default missing values for i8, f32, f64.
502pub trait Missing {
503 /// The default missing value for a type such as i8, f32, and f64.
504 fn missing() -> Self;
505}
506
507impl Missing for f64 {
508 fn missing() -> Self {
509 f64::NAN
510 }
511}
512
513impl Missing for f32 {
514 fn missing() -> Self {
515 f32::NAN
516 }
517}
518
519impl Missing for i8 {
520 fn missing() -> Self {
521 -127i8
522 }
523}
524
525#[cfg(not(target_pointer_width = "64"))]
526compile_error!("This code requires a 64-bit target architecture.");
527#[inline]
528fn try_div_4(in_iid_count: usize, in_sid_count: usize) -> Result<u64, Box<BedErrorPlus>> {
529 if in_iid_count == 0 {
530 return Ok(0);
531 }
532 let in_iid_count_div4_u64 = in_iid_count.checked_sub(1).map_or(0, |v| v / 4 + 1) as u64;
533 let in_sid_count_u64 = in_sid_count as u64;
534
535 if in_sid_count > 0 && (u64::MAX - CB_HEADER_U64) / in_sid_count_u64 < in_iid_count_div4_u64 {
536 Err(BedError::IndexesTooBigForFiles(in_iid_count, in_sid_count))?;
537 }
538
539 Ok(in_iid_count_div4_u64)
540}
541
542#[allow(clippy::too_many_arguments)]
543#[anyinput]
544fn internal_read_no_alloc<TVal: BedVal>(
545 mut buf_reader: BufReader<File>,
546 path: AnyPath,
547 in_iid_count: usize,
548 in_sid_count: usize,
549 is_a1_counted: bool,
550 iid_index: &[isize],
551 sid_index: &[isize],
552 missing_value: TVal,
553 out_val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
554) -> Result<(), Box<BedErrorPlus>> {
555 // Check the file length
556
557 let in_iid_count_div4_u64 = try_div_4(in_iid_count, in_sid_count)?;
558 // "as" and math is safe because of early checks
559 let file_len = buf_reader.get_ref().metadata()?.len();
560 let file_len2 = in_iid_count_div4_u64 * (in_sid_count as u64) + CB_HEADER_U64;
561 if file_len != file_len2 {
562 Err(BedError::IllFormed(path_ref_to_string(path)))?;
563 }
564
565 // Check and precompute for each iid_index
566 let (i_div_4_less_start_array, i_mod_4_times_2_array, i_div_4_start, i_div_4_len) =
567 check_and_precompute_iid_index(in_iid_count, iid_index)?;
568
569 // Check and compute work for each sid_index
570 let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value);
571 let lower_sid_count = -(in_sid_count as isize);
572 let upper_sid_count: isize = (in_sid_count as isize) - 1;
573 // See https://morestina.net/blog/1432/parallel-stream-processing-with-rayon
574 // Possible optimization: We could read snp in their input order instead of their output order
575 sid_index
576 .iter()
577 .map(|in_sid_i_signed| {
578 // Turn signed sid_index into unsigned sid_index (or error)
579 let in_sid_i = if (0..=upper_sid_count).contains(in_sid_i_signed) {
580 *in_sid_i_signed as u64
581 } else if (lower_sid_count..=-1).contains(in_sid_i_signed) {
582 (in_sid_count - ((-in_sid_i_signed) as usize)) as u64
583 } else {
584 Err(BedError::SidIndexTooBig(*in_sid_i_signed))?
585 };
586
587 // Read the iid info for one snp from the disk
588 let mut bytes_vector: Vec<u8> = vec![0; i_div_4_len as usize];
589 let pos: u64 = in_sid_i * in_iid_count_div4_u64 + i_div_4_start + CB_HEADER_U64; // "as" and math is safe because of early checks
590 buf_reader.seek(SeekFrom::Start(pos))?;
591 buf_reader.read_exact(&mut bytes_vector)?;
592 Ok::<_, Box<BedErrorPlus>>(bytes_vector)
593 })
594 // Zip in the column of the output array
595 .zip(out_val.axis_iter_mut(nd::Axis(1)))
596 // In parallel, decompress the iid info and put it in its column
597 .par_bridge() // This seems faster that parallel zip
598 .try_for_each(|(bytes_vector_result, mut col)| match bytes_vector_result {
599 Err(e) => Err(e),
600 Ok(bytes_vector) => {
601 for out_iid_i in 0..iid_index.len() {
602 let i_div_4_less_start = i_div_4_less_start_array[out_iid_i];
603 let i_mod_4_times_2 = i_mod_4_times_2_array[out_iid_i];
604 let genotype_byte: u8 =
605 (bytes_vector[i_div_4_less_start] >> i_mod_4_times_2) & 0x03;
606 col[out_iid_i] = from_two_bits_to_value[genotype_byte as usize];
607 }
608 Ok(())
609 }
610 })?;
611
612 Ok(())
613}
614
615type Array1Usize = nd::ArrayBase<nd::OwnedRepr<usize>, nd::Dim<[usize; 1]>>;
616type Array1U8 = nd::ArrayBase<nd::OwnedRepr<u8>, nd::Dim<[usize; 1]>>;
617
618#[allow(clippy::type_complexity)]
619#[allow(clippy::range_plus_one)]
620fn check_and_precompute_iid_index(
621 in_iid_count: usize,
622 iid_index: &[isize],
623) -> Result<(Array1Usize, Array1U8, u64, u64), Box<BedErrorPlus>> {
624 let lower_iid_count = -(in_iid_count as isize);
625 let upper_iid_count: isize = (in_iid_count as isize) - 1;
626 let mut i_div_4_less_start_array = nd::Array1::<usize>::zeros(iid_index.len());
627 let mut i_mod_4_times_2_array = nd::Array1::<u8>::zeros(iid_index.len());
628 let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); iid_index.len()];
629 nd::par_azip!((in_iid_i_signed in iid_index,
630 i_div_4_less_start in &mut i_div_4_less_start_array,
631 i_mod_4_times_2 in &mut i_mod_4_times_2_array,
632 result in &mut result_list
633 )
634 {
635 let in_iid_i = if (0..=upper_iid_count).contains(in_iid_i_signed) {
636 *result = Ok(());
637 *in_iid_i_signed as usize
638 } else if (lower_iid_count..=-1).contains(in_iid_i_signed) {
639 *result = Ok(());
640 in_iid_count - ((-in_iid_i_signed) as usize)
641 } else {
642 *result = Err(BedError::IidIndexTooBig(
643 *in_iid_i_signed,
644 ));
645 0
646 };
647
648 *i_div_4_less_start = in_iid_i / 4 ;
649 *i_mod_4_times_2 = (in_iid_i % 4 * 2) as u8;
650 });
651 result_list
652 .iter()
653 .par_bridge()
654 .try_for_each(|x| (*x).clone())?;
655
656 let (i_div_4_start, i_div_4_len) =
657 if let Some(min_value) = i_div_4_less_start_array.par_iter().min() {
658 let max_value = *i_div_4_less_start_array.par_iter().max().unwrap(); // safe because of min
659 (*min_value as u64, (max_value + 1 - *min_value) as u64)
660 } else {
661 (0, 0)
662 };
663 // skip of min_value is 0
664 if i_div_4_start > 0 {
665 i_div_4_less_start_array
666 .par_iter_mut()
667 .for_each(|x| *x -= i_div_4_start as usize);
668 }
669 Ok((
670 i_div_4_less_start_array,
671 i_mod_4_times_2_array,
672 i_div_4_start,
673 i_div_4_len,
674 ))
675}
676
677fn set_up_two_bits_to_value<TVal: From<i8>>(count_a1: bool, missing_value: TVal) -> [TVal; 4] {
678 let homozygous_primary_allele = TVal::from(0); // Major Allele
679 let heterozygous_allele = TVal::from(1);
680 let homozygous_secondary_allele = TVal::from(2); // Minor Allele
681
682 if count_a1 {
683 [
684 homozygous_secondary_allele, // look-up 0
685 missing_value, // look-up 1
686 heterozygous_allele, // look-up 2
687 homozygous_primary_allele, // look-up 3
688 ]
689 } else {
690 [
691 homozygous_primary_allele, // look-up 0
692 missing_value, // look-up 1
693 heterozygous_allele, // look-up 2
694 homozygous_secondary_allele, // look-up 3
695 ]
696 }
697}
698
699// Thanks to Dawid for his dpc-pariter library that makes this function scale.
700// https://dpc.pw/adding-parallelism-to-your-rust-iterators
701#[anyinput]
702fn write_val<S, TVal>(
703 path: AnyPath,
704 val: &nd::ArrayBase<S, nd::Ix2>,
705 is_a1_counted: bool,
706 missing: TVal,
707 num_threads: usize,
708) -> Result<(), Box<BedErrorPlus>>
709where
710 S: nd::Data<Elem = TVal>,
711 TVal: BedVal,
712{
713 let (iid_count, sid_count) = val.dim();
714
715 // 4 genotypes per byte so round up
716 let iid_count_div4_u64 = try_div_4(iid_count, sid_count)?;
717
718 // We create and write to a file.
719 // If there is an error, we will delete it.
720 if let Err(e) = write_internal(
721 path,
722 iid_count_div4_u64,
723 val,
724 is_a1_counted,
725 missing,
726 num_threads,
727 ) {
728 // Clean up the file
729 let _ = fs::remove_file(path);
730 Err(e)
731 } else {
732 Ok(())
733 }
734}
735
736// https://www.reddit.com/r/rust/comments/mo4s8e/difference_between_reference_and_view_in_ndarray/
737#[anyinput]
738fn write_internal<S, TVal>(
739 path: AnyPath,
740 iid_count_div4_u64: u64,
741 val: &nd::ArrayBase<S, nd::Ix2>,
742 is_a1_counted: bool,
743 missing: TVal,
744 num_threads: usize,
745) -> Result<(), Box<BedErrorPlus>>
746where
747 S: nd::Data<Elem = TVal>,
748 TVal: BedVal,
749{
750 let mut writer = BufWriter::new(File::create(path)?);
751 // LATER: If this method is later changed
752 // to support major="individual", be sure to
753 // change write_f64, etc and python function 'to_bed' which
754 // currently uses a work-around.
755 writer.write_all(&[BED_FILE_MAGIC1, BED_FILE_MAGIC2, 0x01])?;
756
757 #[allow(clippy::eq_op)]
758 let use_nan = missing != missing; // generic NAN test
759 let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
760 let two_code = if is_a1_counted { 0u8 } else { 3u8 };
761
762 let homozygous_primary_allele = TVal::from(0); // Major Allele
763 let heterozygous_allele = TVal::from(1);
764 let homozygous_secondary_allele = TVal::from(2); // Minor Allele
765
766 scope(|scope| {
767 val.axis_iter(nd::Axis(1))
768 .parallel_map_scoped(scope, {
769 move |column| {
770 // Convert each column into a bytes_vector
771 let mut bytes_vector: Vec<u8> = vec![0; iid_count_div4_u64 as usize]; // inits to 0
772 process_genomic_slice(
773 &column,
774 &mut bytes_vector,
775 homozygous_primary_allele,
776 heterozygous_allele,
777 homozygous_secondary_allele,
778 zero_code,
779 two_code,
780 use_nan,
781 missing,
782 )?;
783 Ok::<_, Box<BedErrorPlus>>(bytes_vector)
784 }
785 })
786 .threads(num_threads)
787 .try_for_each(|bytes_vector| {
788 // Write the bytes vector, they must be in order.
789 writer.write_all(&bytes_vector?)?;
790 Ok(())
791 })
792 })
793 .map_err(|_e| BedError::PanickedThread())?
794}
795
796#[allow(dead_code)]
797fn encode1<TVal>(
798 in_vector: &ndarray::ArrayView1<TVal>,
799 out_vector: &mut [u8],
800 is_a1_counted: bool,
801 missing: TVal,
802) -> Result<(), Box<BedErrorPlus>>
803where
804 TVal: BedVal,
805{
806 #[allow(clippy::eq_op)]
807 let use_nan = missing != missing; // generic NAN test
808 let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
809 let two_code = if is_a1_counted { 0u8 } else { 3u8 };
810
811 let homozygous_primary_allele: TVal = TVal::from(0); // Major Allele
812 let heterozygous_allele = TVal::from(1);
813 let homozygous_secondary_allele = TVal::from(2); // Minor Allele
814
815 let minor_div4 = in_vector.len().checked_sub(1).map_or(0, |v| v / 4 + 1);
816 if minor_div4 != out_vector.len() {
817 return Err(Box::new(
818 BedError::EncodingLength(minor_div4, out_vector.len()).into(),
819 ));
820 }
821
822 process_genomic_slice(
823 in_vector,
824 out_vector,
825 homozygous_primary_allele,
826 heterozygous_allele,
827 homozygous_secondary_allele,
828 zero_code,
829 two_code,
830 use_nan,
831 missing,
832 )
833}
834
835#[inline]
836#[allow(clippy::eq_op)]
837#[allow(clippy::too_many_arguments)]
838fn encode_genotype_chunk<TVal>(
839 chunk: nd::ArrayView1<TVal>,
840 homozygous_primary_allele: TVal,
841 heterozygous_allele: TVal,
842 homozygous_secondary_allele: TVal,
843 zero_code: u8,
844 two_code: u8,
845 use_nan: bool,
846 missing: TVal,
847) -> Result<u8, BedError>
848where
849 TVal: PartialEq + Copy,
850{
851 // LATER: Think about unrolling this loop in the usual case of 4 elements
852 let mut output_byte = 0u8;
853 for (within_chunk_index, &v0) in chunk.iter().enumerate() {
854 let genotype_code = if v0 == homozygous_primary_allele {
855 zero_code
856 } else if v0 == heterozygous_allele {
857 2
858 } else if v0 == homozygous_secondary_allele {
859 two_code
860 } else if (use_nan && v0 != v0) || (!use_nan && v0 == missing) {
861 1
862 } else {
863 return Err(BedError::BadValue(
864 "Invalid genotype value encountered during encoding.".to_string(),
865 ));
866 };
867
868 output_byte |= genotype_code << (within_chunk_index * 2);
869 }
870 Ok(output_byte)
871}
872
873#[inline]
874#[allow(clippy::eq_op)]
875#[allow(clippy::too_many_arguments)]
876fn process_genomic_slice<TVal>(
877 in_vector: &ndarray::ArrayView1<TVal>,
878 out_vector: &mut [u8],
879 homozygous_primary_allele: TVal,
880 heterozygous_allele: TVal,
881 homozygous_secondary_allele: TVal,
882 zero_code: u8,
883 two_code: u8,
884 use_nan: bool,
885 missing: TVal,
886) -> Result<(), Box<BedErrorPlus>>
887where
888 TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
889{
890 // Calculate the number of full chunks and the remainder
891 let full_chunks = in_vector.len() / 4;
892 let remainder = in_vector.len() % 4;
893
894 // Ensure the output vector is correctly sized
895 assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
896
897 // Zip the exact input chunks with output chunks and process in parallel
898 in_vector
899 .exact_chunks(4)
900 .into_iter()
901 .zip(out_vector.iter_mut())
902 .try_for_each(|(chunk, output_byte)| {
903 *output_byte = encode_genotype_chunk(
904 chunk,
905 homozygous_primary_allele,
906 heterozygous_allele,
907 homozygous_secondary_allele,
908 zero_code,
909 two_code,
910 use_nan,
911 missing,
912 )?;
913 Ok::<(), Box<BedErrorPlus>>(())
914 })?;
915
916 // Process the remainder sequentially if there is any
917 if remainder != 0 {
918 let start = full_chunks * 4;
919 let chunk = in_vector.slice(ndarray::s![start..]);
920 let output_byte = &mut out_vector[full_chunks];
921 *output_byte = encode_genotype_chunk(
922 chunk,
923 homozygous_primary_allele,
924 heterozygous_allele,
925 homozygous_secondary_allele,
926 zero_code,
927 two_code,
928 use_nan,
929 missing,
930 )?;
931 }
932
933 Ok::<(), Box<BedErrorPlus>>(())
934}
935// #[inline]
936// #[allow(clippy::eq_op)]
937// #[allow(clippy::too_many_arguments)]
938// fn process_genomic_slice<TVal>(
939// in_vector: &ndarray::ArrayView1<TVal>,
940// out_vector: &mut [u8],
941// homozygous_primary_allele: TVal,
942// heterozygous_allele: TVal,
943// homozygous_secondary_allele: TVal,
944// zero_code: u8,
945// two_code: u8,
946// use_nan: bool,
947// missing: TVal,
948// ) -> Result<(), Box<BedErrorPlus>>
949// where
950// TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
951// {
952// // Calculate the number of full chunks and the remainder
953// let full_chunks = in_vector.len() / 4;
954// let remainder = in_vector.len() % 4;
955
956// // Ensure the output vector is correctly sized
957// assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
958
959// // Zip the exact input chunks with output chunks and process in parallel
960// in_vector
961// .exact_chunks(4)
962// .into_iter()
963// .zip(out_vector.iter_mut())
964// .par_bridge()
965// .try_for_each(|(chunk, output_byte)| {
966// *output_byte = encode_genotype_chunk(
967// chunk,
968// homozygous_primary_allele,
969// heterozygous_allele,
970// homozygous_secondary_allele,
971// zero_code,
972// two_code,
973// use_nan,
974// missing,
975// )?;
976// Ok::<(), Box<BedErrorPlus>>(())
977// })?;
978
979// // Process the remainder sequentially if there is any
980// if remainder != 0 {
981// let start = full_chunks * 4;
982// let chunk = in_vector.slice(ndarray::s![start..]);
983// let output_byte = &mut out_vector[full_chunks];
984// *output_byte = encode_genotype_chunk(
985// chunk,
986// homozygous_primary_allele,
987// heterozygous_allele,
988// homozygous_secondary_allele,
989// zero_code,
990// two_code,
991// use_nan,
992// missing,
993// )?;
994// }
995
996// Ok::<(), Box<BedErrorPlus>>(())
997// }
998
999#[anyinput]
1000fn count_lines(path: AnyPath) -> Result<usize, Box<BedErrorPlus>> {
1001 let file = File::open(path)?;
1002 let reader = BufReader::new(file);
1003 let count = reader.lines().count();
1004 Ok(count)
1005}
1006
1007#[allow(dead_code)]
1008enum Dist {
1009 Unit,
1010 Beta { a: f64, b: f64 },
1011}
1012
1013#[allow(dead_code)]
1014fn impute_and_zero_mean_snps<
1015 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1016>(
1017 val: &mut nd::ArrayViewMut2<'_, T>,
1018 dist: &Dist,
1019 apply_in_place: bool,
1020 use_stats: bool,
1021 stats: &mut nd::ArrayViewMut2<'_, T>,
1022) -> Result<(), Box<BedErrorPlus>> {
1023 let two = T::one() + T::one();
1024
1025 // If output is F-order (or in general if iid stride is no more than sid_stride)
1026 if val.stride_of(nd::Axis(0)) <= val.stride_of(nd::Axis(1)) {
1027 let result_list = nd::Zip::from(val.axis_iter_mut(nd::Axis(1)))
1028 .and(stats.axis_iter_mut(nd::Axis(0)))
1029 .par_map_collect(|mut col, mut stats_row| {
1030 _process_sid(
1031 &mut col,
1032 apply_in_place,
1033 use_stats,
1034 &mut stats_row,
1035 dist,
1036 two,
1037 )
1038 });
1039
1040 // Check the result list for errors
1041 result_list
1042 .iter()
1043 .par_bridge()
1044 .try_for_each(|x| (*x).clone())?;
1045
1046 Ok(())
1047 } else {
1048 //If C-order
1049 _process_all_iids(val, apply_in_place, use_stats, stats, dist, two)
1050 }
1051}
1052
1053// Later move the other fast-lmm functions into their own package
1054#[allow(dead_code)]
1055fn find_factor<
1056 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1057>(
1058 dist: &Dist,
1059 mean_s: T,
1060 std: T,
1061) -> Result<T, BedError> {
1062 if let Dist::Beta { a, b } = dist {
1063 // Try to create a beta dist
1064 let Ok(beta_dist) = Beta::new(*a, *b) else {
1065 Err(BedError::CannotCreateBetaDist(*a, *b))?
1066 };
1067
1068 // Try to an f64 maf
1069 let mut maf = if let Some(mean_u64) = mean_s.to_f64() {
1070 mean_u64 / 2.0
1071 } else {
1072 Err(BedError::CannotConvertBetaToFromF64)?
1073 };
1074 if maf > 0.5 {
1075 maf = 1.0 - maf;
1076 }
1077
1078 // Try to put the maf in the beta dist
1079 if let Some(b) = T::from_f64(beta_dist.pdf(maf)) {
1080 Ok(b)
1081 } else {
1082 Err(BedError::CannotConvertBetaToFromF64)
1083 }
1084 } else {
1085 Ok(T::one() / std)
1086 }
1087}
1088
1089#[allow(dead_code)]
1090fn _process_sid<
1091 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1092>(
1093 col: &mut nd::ArrayViewMut1<'_, T>,
1094 apply_in_place: bool,
1095 use_stats: bool,
1096 stats_row: &mut nd::ArrayViewMut1<'_, T>,
1097 dist: &Dist,
1098 two: T,
1099) -> Result<(), BedError> {
1100 if !use_stats {
1101 let mut n_observed = T::zero();
1102 let mut sum_s = T::zero(); // the sum of a SNP over all observed individuals
1103 let mut sum2_s = T::zero(); // the sum of the squares of the SNP over all observed individuals
1104
1105 for iid_i in 0..col.len() {
1106 let v = col[iid_i];
1107 if !v.is_nan() {
1108 sum_s = sum_s + v;
1109 sum2_s = sum2_s + v * v;
1110 n_observed = n_observed + T::one();
1111 }
1112 }
1113 if n_observed < T::one() {
1114 //LATER make it work (in some form) for n of 0
1115 Err(BedError::NoIndividuals)?;
1116 }
1117 let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1118 let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1119
1120 if mean_s.is_nan()
1121 || (matches!(dist, Dist::Beta { a: _, b: _ })
1122 && ((mean_s > two) || (mean_s < T::zero())))
1123 {
1124 Err(BedError::IllegalSnpMean)?;
1125 }
1126
1127 let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1128
1129 let mut std = variance.sqrt();
1130 if std.is_nan() || std <= T::zero() {
1131 // All "SNPs" have the same value (aka SNC)
1132 std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1133 }
1134
1135 stats_row[0] = mean_s;
1136 stats_row[1] = std;
1137 }
1138
1139 if apply_in_place {
1140 {
1141 let mean_s = stats_row[0];
1142 let std = stats_row[1];
1143 let is_snc = std.is_infinite();
1144
1145 let factor = find_factor(dist, mean_s, std)?;
1146
1147 for iid_i in 0..col.len() {
1148 //check for Missing (NAN) or SNC
1149 if col[iid_i].is_nan() || is_snc {
1150 col[iid_i] = T::zero();
1151 } else {
1152 col[iid_i] = (col[iid_i] - mean_s) * factor;
1153 }
1154 }
1155 }
1156 }
1157 Ok(())
1158}
1159
1160#[allow(dead_code)]
1161fn _process_all_iids<
1162 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1163>(
1164 val: &mut nd::ArrayViewMut2<'_, T>,
1165 apply_in_place: bool,
1166 use_stats: bool,
1167 stats: &mut nd::ArrayViewMut2<'_, T>,
1168 dist: &Dist,
1169 two: T,
1170) -> Result<(), Box<BedErrorPlus>> {
1171 let sid_count = val.dim().1;
1172
1173 if !use_stats {
1174 // O(iid_count * sid_count)
1175 // Serial that respects C-order is 3-times faster than parallel that doesn't
1176 // So we parallelize the inner loop instead of the outer loop
1177 let mut n_observed_array = nd::Array1::<T>::zeros(sid_count);
1178 let mut sum_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of a SNP over all observed individuals
1179 let mut sum2_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of the squares of the SNP over all observed individuals
1180 for row in val.axis_iter(nd::Axis(0)) {
1181 nd::par_azip!((&v in row,
1182 n_observed_ptr in &mut n_observed_array,
1183 sum_s_ptr in &mut sum_s_array,
1184 sum2_s_ptr in &mut sum2_s_array
1185 )
1186 if !v.is_nan() {
1187 *n_observed_ptr = *n_observed_ptr + T::one();
1188 *sum_s_ptr = *sum_s_ptr + v;
1189 *sum2_s_ptr = *sum2_s_ptr + v * v;
1190 }
1191 );
1192 }
1193
1194 // O(sid_count)
1195 let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); sid_count];
1196 nd::par_azip!((mut stats_row in stats.axis_iter_mut(nd::Axis(0)),
1197 &n_observed in &n_observed_array,
1198 &sum_s in &sum_s_array,
1199 &sum2_s in &sum2_s_array,
1200 result_ptr in &mut result_list)
1201 {
1202 if n_observed < T::one() {
1203 *result_ptr = Err(BedError::NoIndividuals);
1204 return;
1205 }
1206 let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1207 let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1208
1209 if mean_s.is_nan()
1210 || (matches!(dist, Dist::Beta { a:_, b:_ }) && ((mean_s > two) || (mean_s < T::zero())))
1211 {
1212 *result_ptr = Err(BedError::IllegalSnpMean);
1213 return;
1214 }
1215
1216 let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1217 let mut std = variance.sqrt();
1218 if std.is_nan() || std <= T::zero() {
1219 // All "SNPs" have the same value (aka SNC)
1220 std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1221 }
1222 stats_row[0] = mean_s;
1223 stats_row[1] = std;
1224 });
1225 // Check the result list for errors
1226 result_list.par_iter().try_for_each(|x| (*x).clone())?;
1227 }
1228
1229 if apply_in_place {
1230 // O(sid_count)
1231 let mut factor_array = nd::Array1::<T>::zeros(stats.dim().0);
1232
1233 stats
1234 .axis_iter_mut(nd::Axis(0))
1235 .zip(&mut factor_array)
1236 .par_bridge()
1237 .try_for_each(|(stats_row, factor_ptr)| {
1238 match find_factor(dist, stats_row[0], stats_row[1]) {
1239 Err(e) => Err(e),
1240 Ok(factor) => {
1241 *factor_ptr = factor;
1242 Ok(())
1243 }
1244 }
1245 })?;
1246
1247 // O(iid_count * sid_count)
1248 nd::par_azip!((mut row in val.axis_iter_mut(nd::Axis(0)))
1249 {
1250 for sid_i in 0..row.len() {
1251 //check for Missing (NAN) or SNC
1252 if row[sid_i].is_nan() || stats[(sid_i, 1)].is_infinite() {
1253 row[sid_i] = T::zero();
1254 } else {
1255 row[sid_i] = (row[sid_i] - stats[(sid_i, 0)]) * factor_array[sid_i];
1256 }
1257 }
1258 });
1259 }
1260 Ok(())
1261}
1262
1263#[allow(dead_code)]
1264#[anyinput]
1265fn file_b_less_aatbx(
1266 a_filename: AnyPath,
1267 offset: u64,
1268 iid_count: usize,
1269 b1: &mut nd::ArrayViewMut2<'_, f64>,
1270 aatb: &mut nd::ArrayViewMut2<'_, f64>,
1271 atb: &mut nd::ArrayViewMut2<'_, f64>,
1272 log_frequency: usize,
1273) -> Result<(), Box<BedErrorPlus>> {
1274 //speed idea from C++:
1275 //Are copies really needed?
1276 //is F, vc C order the best?
1277 //would bigger snp blocks be better
1278
1279 let (a_sid_count, b_sid_count) = atb.dim();
1280 if log_frequency > 0 {
1281 println!("file_b_less_aatbx: iid_count={iid_count}, {a_sid_count}x{b_sid_count} output");
1282 };
1283
1284 // Open the file and move to the starting sid
1285 let mut buf_reader = BufReader::new(File::open(a_filename)?);
1286 buf_reader.seek(SeekFrom::Start(offset))?;
1287
1288 let mut sid_reuse = vec![f64::NAN; iid_count];
1289 for (a_sid_index, mut atb_row) in atb.axis_iter_mut(nd::Axis(0)).enumerate() {
1290 if log_frequency > 0 && a_sid_index % log_frequency == 0 {
1291 println!(
1292 " working on train_sid_index={a_sid_index} of {a_sid_count} (iid_count={iid_count}, b_sid_count={b_sid_count})"
1293 );
1294 }
1295
1296 buf_reader.read_f64_into::<LittleEndian>(&mut sid_reuse)?;
1297
1298 nd::par_azip!(
1299 (mut atb_element in atb_row.axis_iter_mut(nd::Axis(0)),
1300 b1_col in b1.axis_iter(nd::Axis(1)),
1301 mut aatb_col in aatb.axis_iter_mut(nd::Axis(1)))
1302 {
1303 let mut atbi = 0.0;
1304 for iid_index in 0..iid_count {
1305 atbi += sid_reuse[iid_index] * b1_col[iid_index];
1306 }
1307 atb_element[()] = atbi;
1308 for iid_index in 0..iid_count {
1309 aatb_col[iid_index] -= sid_reuse[iid_index] * atbi;
1310 }
1311 });
1312 }
1313 Ok(())
1314}
1315
1316#[allow(dead_code)]
1317fn read_into_f64(src: &mut BufReader<File>, dst: &mut [f64]) -> std::io::Result<()> {
1318 src.read_f64_into::<LittleEndian>(dst)
1319}
1320
1321#[allow(dead_code)]
1322fn read_into_f32(src: &mut BufReader<File>, dst: &mut [f32]) -> std::io::Result<()> {
1323 src.read_f32_into::<LittleEndian>(dst)
1324}
1325
1326/* Here are Python algorithms that shows how to do a low-memory multiply A (or A.T) x B (or B.T)
1327 They are used by file_ata_piece and file_aat_piece with some optimizations for A and B being the same.
1328
1329output_list = [np.zeros((4,4)) for i in range(4)]
1330
1331# a.T.dot(b)
1332for a_col2 in range(0,4,2): # 1 pass through A, returning output chunk about the same size writing in one pass
1333 buffer_a2 = a[:,a_col2:a_col2+2]
1334 for b_col in range(4): # A1/a1 passes through B
1335 buffer_b = b[:,b_col]
1336 for i in range(4):
1337 b_val = buffer_b[i]
1338 a_slice = buffer_a2[i,:]
1339 for k in range(2): # A1/a1 * A0 passes through the output
1340 output_list[0][a_col2+k,b_col] += a_slice[k]*b_val
1341
1342# a.dot(b.T)
1343for out_col2 in range(0,4,2): # 1 pass through output, returning chunk on each pass
1344 for col in range(4): # O1/o1 passes through A and B
1345 buffer_a = a[:,col]
1346 buffer_b = b[:,col]
1347 for k in range(2):
1348 for i in range(4):
1349 output_list[1][i,out_col2+k] += buffer_a[i]*buffer_b[out_col2+k]
1350
1351# a.T.dot(b.T)
1352for a_col2 in range(0,4,2): # 1 pass through A, returning an output chunk on each pass
1353 buffer_a2 = a[:,a_col2:a_col2+2]
1354 for b_col in range(4):
1355 buffer_b = b[:,b_col]
1356 for i in range(4):
1357 b_val = buffer_b[i]
1358 for k in range(2):
1359 output_list[2][a_col2+k,i] += buffer_a2[b_col,k]*b_val
1360
1361# a.dot(b) - but should instead do (b.T.dot(a.T)).T
1362for b_col2 in range(0,4,2): #Transpose of preceding one
1363 buffer_b2 = b[:,b_col2:b_col2+2]
1364 for a_col in range(4):
1365 buffer_a = a[:,a_col]
1366 for i in range(4):
1367 a_val = buffer_a[i]
1368 for k in range(2):
1369 output_list[3][i,b_col2+k] += buffer_b2[a_col,k]*a_val
1370
1371
1372for output in output_list:
1373 print(output)
1374 */
1375
1376// Given A, a matrix in Fortran order in a file
1377// with row_count rows and col_count columns,
1378// and given a starting column,
1379// returns part of A.T x A, the column vs column product.
1380// The piece piece returned has dimensions
1381// (col_count-col_start) x ncols
1382// where ncols <= (col_count-col_start)
1383// Makes only one pass through the file.
1384#[allow(clippy::too_many_arguments)]
1385#[allow(dead_code)]
1386#[anyinput]
1387fn file_ata_piece<T: Float + Send + Sync + Sync + AddAssign>(
1388 path: AnyPath,
1389 offset: u64,
1390 row_count: usize,
1391 col_count: usize,
1392 col_start: usize,
1393 ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1394 log_frequency: usize,
1395 read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1396) -> Result<(), Box<BedErrorPlus>> {
1397 let (nrows, ncols) = ata_piece.dim();
1398 if (col_start >= col_count)
1399 || (col_start + nrows != col_count)
1400 || (col_start + ncols > col_count)
1401 {
1402 Err(BedError::CannotConvertBetaToFromF64)?;
1403 }
1404
1405 _file_ata_piece_internal(
1406 path,
1407 offset,
1408 row_count,
1409 col_start,
1410 ata_piece,
1411 log_frequency,
1412 read_into,
1413 )
1414}
1415
1416#[allow(dead_code)]
1417#[anyinput]
1418fn _file_ata_piece_internal<T: Float + Send + Sync + Sync + AddAssign>(
1419 path: AnyPath,
1420 offset: u64,
1421 row_count: usize,
1422 col_start: usize,
1423 ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1424 log_frequency: usize,
1425 read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1426) -> Result<(), Box<BedErrorPlus>> {
1427 let (nrows, ncols) = ata_piece.dim();
1428 if log_frequency > 0 {
1429 println!("file_ata_piece: col_start={col_start}, {nrows}x{ncols} output");
1430 };
1431
1432 // Open the file and move to the starting col
1433 let mut buf_reader = BufReader::new(File::open(path)?);
1434 buf_reader.seek(SeekFrom::Start(
1435 offset + col_start as u64 * row_count as u64 * std::mem::size_of::<T>() as u64,
1436 ))?;
1437
1438 let mut col_save_list: Vec<Vec<T>> = vec![];
1439 let mut col_reuse = vec![T::nan(); row_count];
1440
1441 for (col_rel_index, mut ata_row) in ata_piece.axis_iter_mut(nd::Axis(0)).enumerate() {
1442 if log_frequency > 0 && col_rel_index % log_frequency == 0 {
1443 println!(" working on {col_rel_index} of {nrows}");
1444 }
1445
1446 // Read next col and save if in range
1447 let col = if col_save_list.len() < ncols {
1448 let mut col_save = vec![T::nan(); row_count];
1449 read_into(&mut buf_reader, &mut col_save)?;
1450 col_save_list.push(col_save);
1451 col_save_list.last().unwrap() // unwrap is OK here
1452 } else {
1453 read_into(&mut buf_reader, &mut col_reuse)?;
1454 &col_reuse
1455 };
1456
1457 // Multiple saved sids with new sid
1458 let mut ata_row_trimmed = ata_row.slice_mut(nd::s![..col_save_list.len()]);
1459 nd::par_azip!((
1460 col_in_range in &col_save_list,
1461 mut ata_val in ata_row_trimmed.axis_iter_mut(nd::Axis(0))
1462 )
1463 {
1464 ata_val[()] = col_product(col_in_range, col);
1465 });
1466 }
1467
1468 // Reflect the new product values
1469 for row_index in 0usize..ncols - 1 {
1470 for col_index in row_index..ncols {
1471 ata_piece[(row_index, col_index)] = ata_piece[(col_index, row_index)];
1472 }
1473 }
1474 Ok(())
1475}
1476
1477#[allow(dead_code)]
1478fn col_product<T: Float + AddAssign>(col_i: &[T], col_j: &[T]) -> T {
1479 assert!(col_i.len() == col_j.len()); // real assert
1480 let mut product = T::zero();
1481 for row_index in 0..col_i.len() {
1482 product += col_i[row_index] * col_j[row_index];
1483 }
1484 product
1485}
1486
1487// Given A, a matrix in Fortran order in a file
1488// with row_count rows and col_count columns,
1489// and given a starting column,
1490// returns part of A x A.T, the row vs row product.
1491// The piece piece returned has dimensions
1492// (row_count-row_start) x ncols
1493// where ncols <= (row_count-row_start)
1494// Makes only one pass through the file.
1495#[allow(clippy::too_many_arguments)]
1496#[allow(dead_code)]
1497#[anyinput]
1498fn file_aat_piece<T: Float + Sync + Send + Sync + AddAssign>(
1499 path: AnyPath,
1500 offset: u64,
1501 row_count: usize,
1502 col_count: usize,
1503 row_start: usize,
1504 aat_piece: &mut nd::ArrayViewMut2<'_, T>,
1505 log_frequency: usize,
1506 read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1507) -> Result<(), Box<BedErrorPlus>> {
1508 let (nrows, ncols) = aat_piece.dim();
1509
1510 if log_frequency > 0 {
1511 println!("file_aat_piece: row_start={row_start}, {nrows}x{ncols} output");
1512 };
1513
1514 if (row_start >= row_count)
1515 || (row_start + nrows != row_count)
1516 || (row_start + ncols > row_count)
1517 {
1518 Err(BedError::CannotConvertBetaToFromF64)?;
1519 }
1520
1521 aat_piece.fill(T::zero());
1522
1523 // Open the file and move to the starting col
1524 let mut buf_reader = BufReader::new(File::open(path)?);
1525
1526 let mut col = vec![T::nan(); row_count - row_start];
1527
1528 for col_index in 0..col_count {
1529 if log_frequency > 0 && col_index % log_frequency == 0 {
1530 println!(" working on {col_index} of {col_count}");
1531 }
1532
1533 // Read next col
1534 buf_reader.seek(SeekFrom::Start(
1535 offset + (col_index * row_count + row_start) as u64 * std::mem::size_of::<T>() as u64,
1536 ))?;
1537 read_into(&mut buf_reader, &mut col)?;
1538
1539 nd::par_azip!(
1540 (index row_index1,
1541 mut aat_col in aat_piece.axis_iter_mut(nd::Axis(1))
1542 )
1543 {
1544 let val1 = col[row_index1];
1545 for row_index0 in row_index1..nrows {
1546 aat_col[row_index0] += val1 * col[row_index0];
1547 }
1548 });
1549 }
1550
1551 // Notice that ata reflects and aat doesn't. They don't need
1552 // to be the same, but they could be.
1553 Ok(())
1554}
1555
1556// References: https://www.youtube.com/watch?v=0zOg8_B71gE&t=22s
1557// https://deterministic.space/elegant-apis-in-rust.html
1558// https://rust-lang.github.io/api-guidelines/
1559// https://ricardomartins.cc/2016/08/03/convenient_and_idiomatic_conversions_in_rust
1560
1561/// Represents the metadata from PLINK .fam and .bim files.
1562///
1563/// Construct with [`Metadata::builder`](struct.Metadata.html#method.builder) or [`Metadata::new`](struct.Metadata.html#method.new).
1564///
1565/// # Example
1566///
1567/// Extract metadata from a file.
1568/// Create a random file with the same metadata.
1569/// ```
1570/// use ndarray as nd;
1571/// use bed_reader::{Bed, WriteOptions, sample_bed_file};
1572/// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
1573///
1574/// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
1575/// let metadata = bed.metadata()?;
1576/// let shape = bed.dim()?;
1577///
1578/// let mut rng = StdRng::seed_from_u64(0);
1579/// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
1580///
1581/// let temp_out = temp_testdir::TempDir::default();
1582/// let output_file = temp_out.join("random.bed");
1583/// WriteOptions::builder(output_file)
1584/// .metadata(&metadata)
1585/// .missing_value(-1)
1586/// .write(&val)?;
1587/// # use bed_reader::BedErrorPlus;
1588/// # Ok::<(), Box<BedErrorPlus>>(())
1589/// ```
1590#[derive(Clone, Debug, Builder, PartialEq)]
1591#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1592pub struct Metadata {
1593 #[builder(setter(custom))]
1594 #[builder(default = "None")]
1595 fid: Option<Rc<nd::Array1<String>>>,
1596 #[builder(setter(custom))]
1597 #[builder(default = "None")]
1598 iid: Option<Rc<nd::Array1<String>>>,
1599 #[builder(setter(custom))]
1600 #[builder(default = "None")]
1601 father: Option<Rc<nd::Array1<String>>>,
1602 #[builder(setter(custom))]
1603 #[builder(default = "None")]
1604 mother: Option<Rc<nd::Array1<String>>>,
1605
1606 // i32 based on https://www.cog-genomics.org/plink2/formats#bim
1607 #[builder(setter(custom))]
1608 #[builder(default = "None")]
1609 sex: Option<Rc<nd::Array1<i32>>>,
1610 #[builder(setter(custom))]
1611 #[builder(default = "None")]
1612 pheno: Option<Rc<nd::Array1<String>>>,
1613
1614 #[builder(setter(custom))]
1615 #[builder(default = "None")]
1616 chromosome: Option<Rc<nd::Array1<String>>>,
1617 #[builder(setter(custom))]
1618 #[builder(default = "None")]
1619 sid: Option<Rc<nd::Array1<String>>>,
1620 #[builder(setter(custom))]
1621 #[builder(default = "None")]
1622 cm_position: Option<Rc<nd::Array1<f32>>>,
1623 #[builder(setter(custom))]
1624 #[builder(default = "None")]
1625 bp_position: Option<Rc<nd::Array1<i32>>>,
1626 #[builder(setter(custom))]
1627 #[builder(default = "None")]
1628 allele_1: Option<Rc<nd::Array1<String>>>,
1629 #[builder(setter(custom))]
1630 #[builder(default = "None")]
1631 allele_2: Option<Rc<nd::Array1<String>>>,
1632}
1633
1634fn lazy_or_skip_count<T>(array: &Option<Rc<nd::Array1<T>>>) -> Option<usize> {
1635 array.as_ref().map(|array| array.len())
1636}
1637
1638/// Represents a PLINK .bed file that is open for reading genotype data and metadata.
1639///
1640/// Construct with [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder).
1641///
1642/// > For reading cloud files, see [`BedCloud`](struct.BedCloud.html).
1643///
1644/// # Example
1645///
1646/// Open a file for reading. Then, read the individual (sample) ids
1647/// and all the genotype data.
1648/// ```
1649/// use ndarray as nd;
1650/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
1651/// use bed_reader::assert_eq_nan;
1652///
1653/// let file_name = sample_bed_file("small.bed")?;
1654/// let mut bed = Bed::new(file_name)?;
1655/// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1656/// let val = ReadOptions::builder().f64().read(&mut bed)?;
1657///
1658/// assert_eq_nan(
1659/// &val,
1660/// &nd::array![
1661/// [1.0, 0.0, f64::NAN, 0.0],
1662/// [2.0, 0.0, f64::NAN, 2.0],
1663/// [0.0, 1.0, 2.0, 0.0]
1664/// ],
1665/// );
1666/// # use bed_reader::BedErrorPlus;
1667/// # Ok::<(), Box<BedErrorPlus>>(())
1668/// ```
1669#[derive(Clone, Debug, Builder)]
1670#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1671pub struct Bed {
1672 // https://stackoverflow.com/questions/32730714/what-is-the-right-way-to-store-an-immutable-path-in-a-struct
1673 // don't emit a setter, but keep the field declaration on the builder
1674 /// The file name or path of the .bed file.
1675 #[builder(setter(custom))]
1676 path: PathBuf,
1677
1678 #[builder(setter(custom))]
1679 #[builder(default = "None")]
1680 fam_path: Option<PathBuf>,
1681
1682 #[builder(setter(custom))]
1683 #[builder(default = "None")]
1684 bim_path: Option<PathBuf>,
1685
1686 #[builder(setter(custom))]
1687 #[builder(default = "true")]
1688 is_checked_early: bool,
1689
1690 #[builder(setter(custom))]
1691 #[builder(default = "None")]
1692 iid_count: Option<usize>,
1693
1694 #[builder(setter(custom))]
1695 #[builder(default = "None")]
1696 sid_count: Option<usize>,
1697
1698 #[builder(setter(custom))]
1699 metadata: Metadata,
1700
1701 #[builder(setter(custom))]
1702 skip_set: HashSet<MetadataFields>,
1703}
1704
1705/// All Metadata fields.
1706///
1707/// Used by [`Metadata::read_fam`](struct.Metadata.html#method.read_fam) and
1708/// [`Metadata::read_bim`](struct.Metadata.html#method.read_bim) to skip reading
1709/// specified metadata fields.
1710#[derive(Debug, PartialEq, Eq, Copy, Clone, Ord, PartialOrd, Hash)]
1711pub enum MetadataFields {
1712 #[allow(missing_docs)]
1713 Fid,
1714 #[allow(missing_docs)]
1715 Iid,
1716 #[allow(missing_docs)]
1717 Father,
1718 #[allow(missing_docs)]
1719 Mother,
1720 #[allow(missing_docs)]
1721 Sex,
1722 #[allow(missing_docs)]
1723 Pheno,
1724 #[allow(missing_docs)]
1725 Chromosome,
1726 #[allow(missing_docs)]
1727 Sid,
1728 #[allow(missing_docs)]
1729 CmPosition,
1730 #[allow(missing_docs)]
1731 BpPosition,
1732 #[allow(missing_docs)]
1733 Allele1,
1734 #[allow(missing_docs)]
1735 Allele2,
1736}
1737
1738impl BedBuilder {
1739 #[anyinput]
1740 fn new(path: AnyPath) -> Self {
1741 Self {
1742 path: Some(path.to_owned()),
1743 fam_path: None,
1744 bim_path: None,
1745
1746 is_checked_early: None,
1747 iid_count: None,
1748 sid_count: None,
1749
1750 metadata: Some(Metadata::new()),
1751 skip_set: Some(HashSet::new()),
1752 }
1753 }
1754
1755 /// Create a [`Bed`](struct.Bed.html) from the builder.
1756 ///
1757 /// > See [`Bed::builder`](struct.Bed.html#method.builder) for more details and examples.
1758 pub fn build(&self) -> Result<Bed, Box<BedErrorPlus>> {
1759 let mut bed = self.build_no_file_check()?;
1760
1761 if bed.is_checked_early {
1762 open_and_check(&bed.path)?;
1763 }
1764
1765 (bed.iid_count, bed.sid_count) = bed.metadata.check_counts(bed.iid_count, bed.sid_count)?;
1766
1767 Ok(bed)
1768 }
1769
1770 // https://stackoverflow.com/questions/38183551/concisely-initializing-a-vector-of-strings
1771 // https://stackoverflow.com/questions/65250496/how-to-convert-intoiteratoritem-asrefstr-to-iteratoritem-str-in-rust
1772
1773 /// Override the family id (fid) values found in the .fam file.
1774 ///
1775 /// By default, if fid values are needed and haven't already been found,
1776 /// they will be read from the .fam file.
1777 /// Providing them here avoids that file read and provides a way to give different values.
1778 #[anyinput]
1779 #[must_use]
1780 pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
1781 // Unwrap will always work because BedBuilder starting with some metadata
1782 self.metadata.as_mut().unwrap().set_fid(fid);
1783 self
1784 }
1785
1786 /// Override the individual id (iid) values found in the .fam file.
1787 ///
1788 /// By default, if iid values are needed and haven't already been found,
1789 /// they will be read from the .fam file.
1790 /// Providing them here avoids that file read and provides a way to give different values.
1791 /// ```
1792 /// use ndarray as nd;
1793 /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
1794 /// let file_name = sample_bed_file("small.bed")?;
1795 /// use bed_reader::ReadOptions;
1796 ///
1797 /// let mut bed = Bed::builder(file_name)
1798 /// .iid(["sample1", "sample2", "sample3"])
1799 /// .build()?;
1800 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1801 /// # use bed_reader::BedErrorPlus;
1802 /// # Ok::<(), Box<BedErrorPlus>>(())
1803 /// ```
1804 #[anyinput]
1805 #[must_use]
1806 pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
1807 // Unwrap will always work because BedBuilder starting with some metadata
1808 self.metadata.as_mut().unwrap().set_iid(iid);
1809 self
1810 }
1811
1812 /// Override the father values found in the .fam file.
1813 ///
1814 /// By default, if father values are needed and haven't already been found,
1815 /// they will be read from the .fam file.
1816 /// Providing them here avoids that file read and provides a way to gi&ve different values.
1817 #[anyinput]
1818 #[must_use]
1819 pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
1820 // Unwrap will always work because BedBuilder starting with some metadata
1821 self.metadata.as_mut().unwrap().set_father(father);
1822 self
1823 }
1824
1825 /// Override the mother values found in the .fam file.
1826 ///
1827 /// By default, if mother values are needed and haven't already been found,
1828 /// they will be read from the .fam file.
1829 /// Providing them here avoids that file read and provides a way to give different values.
1830 #[anyinput]
1831 #[must_use]
1832 pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
1833 // Unwrap will always work because BedBuilder starting with some metadata
1834 self.metadata.as_mut().unwrap().set_mother(mother);
1835 self
1836 }
1837
1838 /// Override the sex values found in the .fam file.
1839 ///
1840 /// By default, if sex values are needed and haven't already been found,
1841 /// they will be read from the .fam file.
1842 /// Providing them here avoids that file read and provides a way to give different values.
1843 #[anyinput]
1844 #[must_use]
1845 pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
1846 // Unwrap will always work because BedBuilder starting with some metadata
1847 self.metadata.as_mut().unwrap().set_sex(sex);
1848 self
1849 }
1850
1851 /// Override the phenotype values found in the .fam file.
1852 ///
1853 /// Note that the phenotype values in the .fam file are seldom used.
1854 /// By default, if phenotype values are needed and haven't already been found,
1855 /// they will be read from the .fam file.
1856 /// Providing them here avoids that file read and provides a way to give different values.
1857 #[anyinput]
1858 #[must_use]
1859 pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
1860 // Unwrap will always work because BedBuilder starting with some metadata
1861 self.metadata.as_mut().unwrap().set_pheno(pheno);
1862 self
1863 }
1864
1865 /// Override the chromosome values found in the .bim file.
1866 ///
1867 /// By default, if chromosome values are needed and haven't already been found,
1868 /// they will be read from the .bim file.
1869 /// Providing them here avoids that file read and provides a way to give different values.
1870 #[anyinput]
1871 #[must_use]
1872 pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
1873 // Unwrap will always work because BedBuilder starting with some metadata
1874 self.metadata.as_mut().unwrap().set_chromosome(chromosome);
1875 self
1876 }
1877
1878 /// Override the SNP id (sid) values found in the .fam file.
1879 ///
1880 /// By default, if sid values are needed and haven't already been found,
1881 /// they will be read from the .bim file.
1882 /// Providing them here avoids that file read and provides a way to give different values.
1883 /// ```
1884 /// use ndarray as nd;
1885 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
1886 /// let file_name = sample_bed_file("small.bed")?;
1887 ///
1888 /// let mut bed = Bed::builder(file_name)
1889 /// .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
1890 /// .build()?;
1891 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["SNP1", "SNP2", "SNP3", "SNP4"]
1892 /// # use bed_reader::BedErrorPlus;
1893 /// # Ok::<(), Box<BedErrorPlus>>(())
1894 /// ```
1895 #[anyinput]
1896 #[must_use]
1897 pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
1898 self.metadata.as_mut().unwrap().set_sid(sid);
1899 self
1900 }
1901
1902 /// Override the centimorgan position values found in the .bim file.
1903 ///
1904 /// By default, if centimorgan position values are needed and haven't already been found,
1905 /// they will be read from the .bim file.
1906 /// Providing them here avoids that file read and provides a way to give different values.
1907 #[anyinput]
1908 #[must_use]
1909 pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
1910 // Unwrap will always work because BedBuilder starting with some metadata
1911 self.metadata.as_mut().unwrap().set_cm_position(cm_position);
1912 self
1913 }
1914
1915 /// Override the base-pair position values found in the .bim file.
1916 ///
1917 /// By default, if base-pair position values are needed and haven't already been found,
1918 /// they will be read from the .bim file.
1919 /// Providing them here avoids that file read and provides a way to give different values.
1920 #[anyinput]
1921 #[must_use]
1922 pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
1923 // Unwrap will always work because BedBuilder starting with some metadata
1924 self.metadata.as_mut().unwrap().set_bp_position(bp_position);
1925 self
1926 }
1927
1928 /// Override the allele 1 values found in the .bim file.
1929 ///
1930 /// By default, if allele 1 values are needed and haven't already been found,
1931 /// they will be read from the .bim file.
1932 /// Providing them here avoids that file read and provides a way to give different values.
1933 #[anyinput]
1934 #[must_use]
1935 pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
1936 // Unwrap will always work because BedBuilder starting with some metadata
1937 self.metadata.as_mut().unwrap().set_allele_1(allele_1);
1938 self
1939 }
1940
1941 /// Override the allele 2 values found in the .bim file.
1942 ///
1943 /// By default, if allele 2 values are needed and haven't already been found,
1944 /// they will be read from the .bim file.
1945 /// Providing them here avoids that file read and provides a way to give different values.
1946 #[anyinput]
1947 #[must_use]
1948 pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
1949 // Unwrap will always work because BedBuilder starting with some metadata
1950 self.metadata.as_mut().unwrap().set_allele_2(allele_2);
1951 self
1952 }
1953
1954 /// Set the number of individuals (samples) in the data.
1955 ///
1956 /// By default, if this number is needed, it will be found
1957 /// and remembered
1958 /// by opening the .fam file and quickly counting the number
1959 /// of lines. Providing the number thus avoids a file read.
1960 #[must_use]
1961 pub fn iid_count(mut self, count: usize) -> Self {
1962 self.iid_count = Some(Some(count));
1963 self
1964 }
1965
1966 /// Set the number of SNPs in the data.
1967 ///
1968 /// By default, if this number is needed, it will be found
1969 /// and remembered
1970 /// by opening the .bim file and quickly counting the number
1971 /// of lines. Providing the number thus avoids a file read.
1972 #[must_use]
1973 pub fn sid_count(mut self, count: usize) -> Self {
1974 self.sid_count = Some(Some(count));
1975 self
1976 }
1977
1978 /// Don't check the header of the .bed file until and unless the file is actually read.
1979 ///
1980 /// By default, when a [`Bed`](struct.Bed.html) struct is created, the .bed
1981 /// file header is checked. This stops that early check.
1982 #[must_use]
1983 pub fn skip_early_check(mut self) -> Self {
1984 self.is_checked_early = Some(false);
1985 self
1986 }
1987
1988 /// Set the path to the .fam file.
1989 ///
1990 /// If not set, the .fam file will be assumed
1991 /// to have the same name as the .bed file, but with the extension .fam.
1992 ///
1993 /// # Example:
1994 /// Read .bed, .fam, and .bim files with non-standard names.
1995 /// ```
1996 /// use bed_reader::{Bed, ReadOptions, sample_files};
1997 /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
1998 /// let mut bed = Bed::builder(&deb_maf_mib[0])
1999 /// .fam_path(&deb_maf_mib[1])
2000 /// .bim_path(&deb_maf_mib[2])
2001 /// .build()?;
2002 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2003 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2004 /// # use bed_reader::BedErrorPlus;
2005 /// # Ok::<(), Box<BedErrorPlus>>(())
2006 /// ```
2007 #[anyinput]
2008 #[must_use]
2009 pub fn fam_path(mut self, path: AnyPath) -> Self {
2010 self.fam_path = Some(Some(path.to_owned()));
2011 self
2012 }
2013
2014 /// Set the path to the .bim file.
2015 ///
2016 /// If not set, the .bim file will be assumed
2017 /// to have the same name as the .bed file, but with the extension .bim.
2018 ///
2019 /// # Example:
2020 /// Read .bed, .fam, and .bim files with non-standard names.
2021 /// ```
2022 /// use bed_reader::{Bed, ReadOptions, sample_files};
2023 /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
2024 /// let mut bed = Bed::builder(&deb_maf_mib[0])
2025 /// .fam_path(&deb_maf_mib[1])
2026 /// .bim_path(&deb_maf_mib[2])
2027 /// .build()?;
2028 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2029 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2030 /// # use bed_reader::BedErrorPlus;
2031 /// # Ok::<(), Box<BedErrorPlus>>(())
2032 /// ```
2033 #[must_use]
2034 #[anyinput]
2035 pub fn bim_path(mut self, path: AnyPath) -> Self {
2036 self.bim_path = Some(Some(path.to_owned()));
2037 self
2038 }
2039
2040 /// Don't read the fid information from the .fam file.
2041 ///
2042 /// By default, when the .fam is read, the fid (the family id) is recorded.
2043 /// This stops that recording. This is useful if the fid is not needed.
2044 /// Asking for the fid after skipping it results in an error.
2045 #[must_use]
2046 pub fn skip_fid(mut self) -> Self {
2047 // Unwrap will always work because BedBuilder starting with some skip_set
2048 self.skip_set.as_mut().unwrap().insert(MetadataFields::Fid);
2049 self
2050 }
2051
2052 /// Don't read the iid information from the .fam file.
2053 ///
2054 /// By default, when the .fam is read, the iid (the individual id) is recorded.
2055 /// This stops that recording. This is useful if the iid is not needed.
2056 /// Asking for the iid after skipping it results in an error.
2057 #[must_use]
2058 pub fn skip_iid(mut self) -> Self {
2059 // Unwrap will always work because BedBuilder starting with some skip_set
2060 self.skip_set.as_mut().unwrap().insert(MetadataFields::Iid);
2061 self
2062 }
2063
2064 /// Don't read the father information from the .fam file.
2065 ///
2066 /// By default, when the .fam is read, the father id is recorded.
2067 /// This stops that recording. This is useful if the father id is not needed.
2068 /// Asking for the father id after skipping it results in an error.
2069 #[must_use]
2070 pub fn skip_father(mut self) -> Self {
2071 // Unwrap will always work because BedBuilder starting with some skip_set
2072 self.skip_set
2073 .as_mut()
2074 .unwrap()
2075 .insert(MetadataFields::Father);
2076 self
2077 }
2078
2079 /// Don't read the mother information from the .fam file.
2080 ///
2081 /// By default, when the .fam is read, the mother id is recorded.
2082 /// This stops that recording. This is useful if the mother id is not needed.
2083 /// Asking for the mother id after skipping it results in an error.
2084 #[must_use]
2085 pub fn skip_mother(mut self) -> Self {
2086 // Unwrap will always work because BedBuilder starting with some skip_set
2087 self.skip_set
2088 .as_mut()
2089 .unwrap()
2090 .insert(MetadataFields::Mother);
2091 self
2092 }
2093
2094 /// Don't read the sex information from the .fam file.
2095 ///
2096 /// By default, when the .fam is read, the sex is recorded.
2097 /// This stops that recording. This is useful if sex is not needed.
2098 /// Asking for sex after skipping it results in an error.
2099 #[must_use]
2100 pub fn skip_sex(mut self) -> Self {
2101 // Unwrap will always work because BedBuilder starting with some skip_set
2102 self.skip_set.as_mut().unwrap().insert(MetadataFields::Sex);
2103 self
2104 }
2105
2106 /// Don't read the phenotype information from the .fam file.
2107 ///
2108 /// Note that the phenotype information in the .fam file is
2109 /// seldom used.
2110 ///
2111 /// By default, when the .fam is read, the phenotype is recorded.
2112 /// This stops that recording. This is useful if this phenotype
2113 /// information is not needed.
2114 /// Asking for the phenotype after skipping it results in an error.
2115 #[must_use]
2116 pub fn skip_pheno(mut self) -> Self {
2117 // Unwrap will always work because BedBuilder starting with some skip_set
2118 self.skip_set
2119 .as_mut()
2120 .unwrap()
2121 .insert(MetadataFields::Pheno);
2122 self
2123 }
2124
2125 /// Don't read the chromosome information from the .bim file.
2126 ///
2127 /// By default, when the .bim is read, the chromosome is recorded.
2128 /// This stops that recording. This is useful if the chromosome is not needed.
2129 /// Asking for the chromosome after skipping it results in an error.
2130 #[must_use]
2131 pub fn skip_chromosome(mut self) -> Self {
2132 // Unwrap will always work because BedBuilder starting with some skip_set
2133 self.skip_set
2134 .as_mut()
2135 .unwrap()
2136 .insert(MetadataFields::Chromosome);
2137 self
2138 }
2139
2140 /// Don't read the SNP id information from the .bim file.
2141 ///
2142 /// By default, when the .bim is read, the sid (SNP id) is recorded.
2143 /// This stops that recording. This is useful if the sid is not needed.
2144 /// Asking for the sid after skipping it results in an error.
2145 #[must_use]
2146 pub fn skip_sid(mut self) -> Self {
2147 // Unwrap will always work because BedBuilder starting with some skip_set
2148 self.skip_set.as_mut().unwrap().insert(MetadataFields::Sid);
2149 self
2150 }
2151
2152 /// Don't read the centimorgan position information from the .bim file.
2153 ///
2154 /// By default, when the .bim is read, the cm position is recorded.
2155 /// This stops that recording. This is useful if the cm position is not needed.
2156 /// Asking for the cm position after skipping it results in an error.
2157 #[must_use]
2158 pub fn skip_cm_position(mut self) -> Self {
2159 // Unwrap will always work because BedBuilder starting with some skip_set
2160 self.skip_set
2161 .as_mut()
2162 .unwrap()
2163 .insert(MetadataFields::CmPosition);
2164 self
2165 }
2166
2167 /// Don't read the base-pair position information from the .bim file.
2168 ///
2169 /// By default, when the .bim is read, the bp position is recorded.
2170 /// This stops that recording. This is useful if the bp position is not needed.
2171 /// Asking for the cp position after skipping it results in an error.
2172 #[must_use]
2173 pub fn skip_bp_position(mut self) -> Self {
2174 // Unwrap will always work because BedBuilder starting with some skip_set
2175 self.skip_set
2176 .as_mut()
2177 .unwrap()
2178 .insert(MetadataFields::BpPosition);
2179 self
2180 }
2181
2182 /// Don't read the allele 1 information from the .bim file.
2183 ///
2184 /// By default, when the .bim is read, allele 1 is recorded.
2185 /// This stops that recording. This is useful if allele 1 is not needed.
2186 /// Asking for allele 1 after skipping it results in an error.
2187 #[must_use]
2188 pub fn skip_allele_1(mut self) -> Self {
2189 // Unwrap will always work because BedBuilder starting with some skip_set
2190 self.skip_set
2191 .as_mut()
2192 .unwrap()
2193 .insert(MetadataFields::Allele1);
2194 self
2195 }
2196
2197 /// Don't read the allele 2 information from the .bim file.
2198 ///
2199 /// By default, when the .bim is read, allele 2 is recorded.
2200 /// This stops that recording. This is useful if allele 2 is not needed.
2201 /// Asking for allele 2 after skipping it results in an error.
2202 #[must_use]
2203 pub fn skip_allele_2(mut self) -> Self {
2204 // Unwrap will always work because BedBuilder starting with some skip_set
2205 self.skip_set
2206 .as_mut()
2207 .unwrap()
2208 .insert(MetadataFields::Allele2);
2209 self
2210 }
2211
2212 /// Override the metadata in the .fam and .bim files with info merged in from a [`Metadata`](struct.Metadata.html).
2213 ///
2214 /// # Example
2215 ///
2216 /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
2217 /// and sid arrays. Next, we use [`BedBuilder`](struct.BedBuilder.html) to override the fid array
2218 /// and an iid array. Then, we add the metadata to the [`BedBuilder`](struct.BedBuilder.html),
2219 /// overwriting iid (again) and overriding sid. Finally, we print these
2220 /// three arrays and chromosome. Chromosome was never overridden so
2221 /// it is read from the *.bim file.
2222 ///```
2223 /// use ndarray as nd;
2224 /// use bed_reader::{Bed, Metadata, sample_bed_file};
2225 ///
2226 /// let file_name = sample_bed_file("small.bed")?;
2227 /// let metadata = Metadata::builder()
2228 /// .iid(["i1", "i2", "i3"])
2229 /// .sid(["s1", "s2", "s3", "s4"])
2230 /// .build()?;
2231 /// let mut bed = Bed::builder(file_name)
2232 /// .fid(["f1", "f2", "f3"])
2233 /// .iid(["x1", "x2", "x3"])
2234 /// .metadata(&metadata)
2235 /// .build()?;
2236 /// println!("{0:?}", bed.fid()?); // Outputs ndarray ["f1", "f2", "f3"]
2237 /// println!("{0:?}", bed.iid()?); // Outputs ndarray ["i1", "i2", "i3"]
2238 /// println!("{0:?}", bed.sid()?); // Outputs ndarray ["s1", "s2", "s3", "s4"]
2239 /// println!("{0:?}", bed.chromosome()?); // Outputs ndarray ["1", "1", "5", "Y"]
2240 /// # use bed_reader::BedErrorPlus;
2241 /// # Ok::<(), Box<BedErrorPlus>>(())
2242 /// ```
2243 #[must_use]
2244 pub fn metadata(mut self, metadata: &Metadata) -> Self {
2245 self.metadata = Some(
2246 Metadata::builder()
2247 .metadata(&self.metadata.unwrap()) // unwrap is ok because we know we have metadata
2248 .metadata(metadata) // consistent counts will be check later by the BedBuilder
2249 .build_no_file_check()
2250 .unwrap(), // unwrap is ok because nothing can go wrong
2251 );
2252
2253 self
2254 }
2255}
2256
2257#[anyinput]
2258fn to_metadata_path(
2259 bed_path: AnyPath,
2260 metadata_path: &Option<PathBuf>,
2261 extension: AnyString,
2262) -> PathBuf {
2263 if let Some(metadata_path) = metadata_path {
2264 metadata_path.to_owned()
2265 } else {
2266 bed_path.with_extension(extension)
2267 }
2268}
2269
2270impl Bed {
2271 /// Attempts to open a local PLINK .bed file for reading. Supports options.
2272 ///
2273 /// > Also see [`Bed::new`](struct.Bed.html#method.new), which does not support options.
2274 /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2275 ///
2276 /// The options, [listed here](struct.BedBuilder.html#implementations), can:
2277 /// * set the path of the .fam and/or .bim file
2278 /// * override some metadata, for example, replace the individual ids.
2279 /// * set the number of individuals (samples) or SNPs (variants)
2280 /// * control checking the validity of the .bed file's header
2281 /// * skip reading selected metadata
2282 ///
2283 /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2284 /// will not necessarily lock the file(s).
2285 ///
2286 /// # Errors
2287 /// By default, this method will return an error if the file is missing or its header
2288 /// is ill-formed. It will also return an error if the options contradict each other.
2289 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2290 /// for all possible errors.
2291 ///
2292 /// # Examples
2293 /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2294 /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2295 /// then [`read`](struct.Bed.html#method.read) the whole file.
2296 ///
2297 /// ```
2298 /// use ndarray as nd;
2299 /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2300 ///
2301 /// let file_name = sample_bed_file("small.bed")?;
2302 /// let mut bed = Bed::builder(file_name).build()?;
2303 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2304 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
2305 /// let val = bed.read::<f64>()?;
2306 ///
2307 /// assert_eq_nan(
2308 /// &val,
2309 /// &nd::array![
2310 /// [1.0, 0.0, f64::NAN, 0.0],
2311 /// [2.0, 0.0, f64::NAN, 2.0],
2312 /// [0.0, 1.0, 2.0, 0.0]
2313 /// ],
2314 /// );
2315 /// # use bed_reader::BedErrorPlus;
2316 /// # Ok::<(), Box<BedErrorPlus>>(())
2317 /// ```
2318 ///
2319 /// Replace [`iid`](struct.Bed.html#method.iid).
2320 /// ```
2321 /// # use ndarray as nd;
2322 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2323 /// # let file_name = sample_bed_file("small.bed")?;
2324 /// let mut bed = Bed::builder(file_name)
2325 /// .iid(["sample1", "sample2", "sample3"])
2326 /// .build()?;
2327 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
2328 /// # use bed_reader::BedErrorPlus;
2329 /// # Ok::<(), Box<BedErrorPlus>>(())
2330 /// ```
2331 /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
2332 /// .bim files need never be opened.
2333 /// ```
2334 /// # use ndarray as nd;
2335 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2336 /// # let file_name = sample_bed_file("small.bed")?;
2337 /// let mut bed = Bed::builder(file_name).iid_count(3).sid_count(4).build()?;
2338 /// let val = bed.read::<f64>()?;
2339 ///
2340 /// assert_eq_nan(
2341 /// &val,
2342 /// &nd::array![
2343 /// [1.0, 0.0, f64::NAN, 0.0],
2344 /// [2.0, 0.0, f64::NAN, 2.0],
2345 /// [0.0, 1.0, 2.0, 0.0]
2346 /// ],
2347 /// );
2348 /// # use bed_reader::BedErrorPlus;
2349 /// # Ok::<(), Box<BedErrorPlus>>(())
2350 /// ```
2351 /// Mark some properties as "don’t read or offer".
2352 /// ```
2353 /// # use ndarray as nd;
2354 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2355 /// # let file_name = sample_bed_file("small.bed")?;
2356 /// let mut bed = Bed::builder(file_name)
2357 /// .skip_father()
2358 /// .skip_mother()
2359 /// .skip_sex()
2360 /// .skip_pheno()
2361 /// .skip_allele_1()
2362 /// .skip_allele_2()
2363 /// .build()?;
2364 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2365 /// bed.allele_2().expect_err("Can't be read");
2366 /// # use bed_reader::BedErrorPlus;
2367 /// # Ok::<(), Box<BedErrorPlus>>(())
2368 /// ```
2369 ///
2370 #[anyinput]
2371 pub fn builder(path: AnyPath) -> BedBuilder {
2372 BedBuilder::new(path)
2373 }
2374
2375 /// Attempts to open a local PLINK .bed file for reading. Does not support options.
2376 ///
2377 /// > Also see [`Bed::builder`](struct.Bed.html#method.builder), which does support options.
2378 /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2379 ///
2380 /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2381 /// will not necessarily lock the file(s).
2382 ///
2383 /// # Errors
2384 /// By default, this method will return an error if the file is missing or its header
2385 /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2386 /// for all possible errors.
2387 ///
2388 /// # Examples
2389 /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2390 /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2391 /// then [`read`](struct.Bed.html#method.read) the whole file.
2392 ///
2393 /// ```
2394 /// use ndarray as nd;
2395 /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2396 ///
2397 /// let file_name = sample_bed_file("small.bed")?;
2398 /// let mut bed = Bed::new(file_name)?;
2399 /// println!("{:?}", bed.iid()?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
2400 /// println!("{:?}", bed.sid()?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
2401 /// let val = bed.read::<f64>()?;
2402 ///
2403 /// assert_eq_nan(
2404 /// &val,
2405 /// &nd::array![
2406 /// [1.0, 0.0, f64::NAN, 0.0],
2407 /// [2.0, 0.0, f64::NAN, 2.0],
2408 /// [0.0, 1.0, 2.0, 0.0]
2409 /// ],
2410 /// );
2411 /// # use bed_reader::BedErrorPlus;
2412 /// # Ok::<(), Box<BedErrorPlus>>(())
2413 /// ```
2414 ///
2415 /// Open the file and read data for one SNP (variant)
2416 /// at index position 2.
2417 /// ```
2418 /// # use ndarray as nd;
2419 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2420 /// # let file_name = sample_bed_file("small.bed")?;
2421 ///
2422 /// let mut bed = Bed::new(file_name)?;
2423 /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
2424 ///
2425 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2426 /// # use bed_reader::BedErrorPlus;
2427 /// # Ok::<(), Box<BedErrorPlus>>(())
2428 /// ```
2429 #[anyinput]
2430 pub fn new(path: AnyPath) -> Result<Self, Box<BedErrorPlus>> {
2431 Bed::builder(path).build()
2432 }
2433
2434 /// Number of individuals (samples)
2435 ///
2436 /// If this number is needed, it will be found
2437 /// by opening the .fam file and quickly counting the number
2438 /// of lines. Once found, the number will be remembered.
2439 /// The file read can be avoided by setting the
2440 /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2441 /// or, for example, [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2442 ///
2443 /// # Example:
2444 /// ```
2445 /// use ndarray as nd;
2446 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2447 ///
2448 /// let file_name = sample_bed_file("small.bed")?;
2449 /// let mut bed = Bed::new(file_name)?;
2450 /// let iid_count = bed.iid_count()?;
2451 ///
2452 /// assert!(iid_count == 3);
2453 /// # use bed_reader::BedErrorPlus;
2454 /// # Ok::<(), Box<BedErrorPlus>>(())
2455 pub fn iid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2456 if let Some(iid_count) = self.iid_count {
2457 Ok(iid_count)
2458 } else {
2459 let fam_path = self.fam_path();
2460 let iid_count = count_lines(fam_path)?;
2461 self.iid_count = Some(iid_count);
2462 Ok(iid_count)
2463 }
2464 }
2465
2466 /// Number of SNPs (variants)
2467 ///
2468 /// If this number is needed, it will be found
2469 /// by opening the .bim file and quickly counting the number
2470 /// of lines. Once found, the number will be remembered.
2471 /// The file read can be avoided by setting the
2472 /// number with [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count)
2473 /// or, for example, [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2474 ///
2475 /// # Example:
2476 /// ```
2477 /// use ndarray as nd;
2478 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2479 ///
2480 /// let file_name = sample_bed_file("small.bed")?;
2481 /// let mut bed = Bed::new(file_name)?;
2482 /// let sid_count = bed.sid_count()?;
2483 ///
2484 /// assert!(sid_count == 4);
2485 /// # use bed_reader::BedErrorPlus;
2486 /// # Ok::<(), Box<BedErrorPlus>>(())
2487 pub fn sid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2488 if let Some(sid_count) = self.sid_count {
2489 Ok(sid_count)
2490 } else {
2491 let bim_path = self.bim_path();
2492 let sid_count = count_lines(bim_path)?;
2493 self.sid_count = Some(sid_count);
2494 Ok(sid_count)
2495 }
2496 }
2497
2498 /// Number of individuals (samples) and SNPs (variants)
2499 ///
2500 /// If these numbers aren't known, they will be found
2501 /// by opening the .fam and .bim files and quickly counting the number
2502 /// of lines. Once found, the numbers will be remembered.
2503 /// The file read can be avoided by setting the
2504 /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2505 /// and [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count).
2506 ///
2507 /// # Example:
2508 /// ```
2509 /// use ndarray as nd;
2510 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2511 /// use bed_reader::assert_eq_nan;
2512 ///
2513 /// let file_name = sample_bed_file("small.bed")?;
2514 /// let mut bed = Bed::new(file_name)?;
2515 /// let dim = bed.dim()?;
2516 ///
2517 /// assert!(dim == (3,4));
2518 /// # use bed_reader::BedErrorPlus;
2519 /// # Ok::<(), Box<BedErrorPlus>>(())
2520 pub fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
2521 Ok((self.iid_count()?, self.sid_count()?))
2522 }
2523
2524 /// Family id of each of individual (sample)
2525 ///
2526 /// If this ndarray is needed, it will be found
2527 /// by reading the .fam file. Once found, this ndarray
2528 /// and other information in the .fam file will be remembered.
2529 /// The file read can be avoided by setting the
2530 /// array with [`BedBuilder::fid`](struct.BedBuilder.html#method.fid).
2531 ///
2532 /// # Example:
2533 /// ```
2534 /// use ndarray as nd;
2535 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2536 /// use bed_reader::assert_eq_nan;
2537 ///
2538 /// let file_name = sample_bed_file("small.bed")?;
2539 /// let mut bed = Bed::new(file_name)?;
2540 /// let fid = bed.fid()?;
2541 /// println!("{fid:?}"); // Outputs ndarray ["fid1", "fid1", "fid2"]
2542 /// # use bed_reader::BedErrorPlus;
2543 /// # Ok::<(), Box<BedErrorPlus>>(())
2544 pub fn fid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2545 self.unlazy_fam::<String>(self.metadata.fid.is_none(), MetadataFields::Fid, "fid")?;
2546 Ok(self.metadata.fid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2547 }
2548
2549 /// Individual id of each of individual (sample)
2550 ///
2551 /// If this ndarray is needed, it will be found
2552 /// by reading the .fam file. Once found, this ndarray
2553 /// and other information in the .fam file will be remembered.
2554 /// The file read can be avoided by setting the
2555 /// array with [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2556 ///
2557 /// # Example:
2558 /// ```
2559 /// use ndarray as nd;
2560 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2561 /// use bed_reader::assert_eq_nan;
2562 ///
2563 /// let file_name = sample_bed_file("small.bed")?;
2564 /// let mut bed = Bed::new(file_name)?;
2565 /// let iid = bed.iid()?; ///
2566 /// println!("{iid:?}"); // Outputs ndarray ["iid1", "iid2", "iid3"]
2567 /// # use bed_reader::BedErrorPlus;
2568 /// # Ok::<(), Box<BedErrorPlus>>(())
2569 pub fn iid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2570 self.unlazy_fam::<String>(self.metadata.iid.is_none(), MetadataFields::Iid, "iid")?;
2571 Ok(self.metadata.iid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2572 }
2573
2574 /// Father id of each of individual (sample)
2575 ///
2576 /// If this ndarray is needed, it will be found
2577 /// by reading the .fam file. Once found, this ndarray
2578 /// and other information in the .fam file will be remembered.
2579 /// The file read can be avoided by setting the
2580 /// array with [`BedBuilder::father`](struct.BedBuilder.html#method.father).
2581 ///
2582 /// # Example:
2583 /// ```
2584 /// use ndarray as nd;
2585 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2586 /// use bed_reader::assert_eq_nan;
2587 ///
2588 /// let file_name = sample_bed_file("small.bed")?;
2589 /// let mut bed = Bed::new(file_name)?;
2590 /// let father = bed.father()?;
2591 /// println!("{father:?}"); // Outputs ndarray ["iid23", "iid23", "iid22"]
2592 /// # use bed_reader::BedErrorPlus;
2593 /// # Ok::<(), Box<BedErrorPlus>>(())
2594 pub fn father(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2595 self.unlazy_fam::<String>(
2596 self.metadata.father.is_none(),
2597 MetadataFields::Father,
2598 "father",
2599 )?;
2600 Ok(self.metadata.father.as_ref().unwrap()) //unwrap always works because of lazy_fam
2601 }
2602
2603 /// Mother id of each of individual (sample)
2604 ///
2605 /// If this ndarray is needed, it will be found
2606 /// by reading the .fam file. Once found, this ndarray
2607 /// and other information in the .fam file will be remembered.
2608 /// The file read can be avoided by setting the
2609 /// array with [`BedBuilder::mother`](struct.BedBuilder.html#method.mother).
2610 ///
2611 /// # Example:
2612 /// ```
2613 /// use ndarray as nd;
2614 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2615 /// use bed_reader::assert_eq_nan;
2616 ///
2617 /// let file_name = sample_bed_file("small.bed")?;
2618 /// let mut bed = Bed::new(file_name)?;
2619 /// let mother = bed.mother()?;
2620 /// println!("{mother:?}"); // Outputs ndarray ["iid34", "iid34", "iid33"]
2621 /// # use bed_reader::BedErrorPlus;
2622 /// # Ok::<(), Box<BedErrorPlus>>(())
2623 pub fn mother(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2624 self.unlazy_fam::<String>(
2625 self.metadata.mother.is_none(),
2626 MetadataFields::Mother,
2627 "mother",
2628 )?;
2629 Ok(self.metadata.mother.as_ref().unwrap()) //unwrap always works because of lazy_fam
2630 }
2631
2632 /// Sex each of individual (sample)
2633 ///
2634 /// 0 is unknown, 1 is male, 2 is female
2635 ///
2636 /// If this ndarray is needed, it will be found
2637 /// by reading the .fam file. Once found, this ndarray
2638 /// and other information in the .fam file will be remembered.
2639 /// The file read can be avoided by setting the
2640 /// array with [`BedBuilder::sex`](struct.BedBuilder.html#method.sex).
2641 ///
2642 /// # Example:
2643 /// ```
2644 /// use ndarray as nd;
2645 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2646 /// use bed_reader::assert_eq_nan;
2647 ///
2648 /// let file_name = sample_bed_file("small.bed")?;
2649 /// let mut bed = Bed::new(file_name)?;
2650 /// let sex = bed.sex()?;
2651 /// println!("{sex:?}"); // Outputs ndarray [1, 2, 0]
2652 /// # use bed_reader::BedErrorPlus;
2653 /// # Ok::<(), Box<BedErrorPlus>>(())
2654 pub fn sex(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2655 self.unlazy_fam::<String>(self.metadata.sex.is_none(), MetadataFields::Sex, "sex")?;
2656 Ok(self.metadata.sex.as_ref().unwrap()) //unwrap always works because of lazy_fam
2657 }
2658
2659 /// A phenotype for each individual (seldom used)
2660 ///
2661 /// If this ndarray is needed, it will be found
2662 /// by reading the .fam file. Once found, this ndarray
2663 /// and other information in the .fam file will be remembered.
2664 /// The file read can be avoided by setting the
2665 /// array with [`BedBuilder::pheno`](struct.BedBuilder.html#method.pheno).
2666 ///
2667 /// # Example:
2668 /// ```
2669 /// use ndarray as nd;
2670 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2671 /// use bed_reader::assert_eq_nan;
2672 ///
2673 /// let file_name = sample_bed_file("small.bed")?;
2674 /// let mut bed = Bed::new(file_name)?;
2675 /// let pheno = bed.pheno()?;
2676 /// println!("{pheno:?}"); // Outputs ndarray ["red", "red", "blue"]
2677 /// # use bed_reader::BedErrorPlus;
2678 /// # Ok::<(), Box<BedErrorPlus>>(())
2679 pub fn pheno(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2680 self.unlazy_fam::<String>(
2681 self.metadata.pheno.is_none(),
2682 MetadataFields::Pheno,
2683 "pheno",
2684 )?;
2685 Ok(self.metadata.pheno.as_ref().unwrap()) //unwrap always works because of lazy_fam
2686 }
2687
2688 /// Chromosome of each SNP (variant)
2689 ///
2690 /// If this ndarray is needed, it will be found
2691 /// by reading the .bim file. Once found, this ndarray
2692 /// and other information in the .bim file will be remembered.
2693 /// The file read can be avoided by setting the
2694 /// array with [`BedBuilder::chromosome`](struct.BedBuilder.html#method.chromosome).
2695 ///
2696 /// # Example:
2697 /// ```
2698 /// use ndarray as nd;
2699 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2700 /// use bed_reader::assert_eq_nan;
2701 ///
2702 /// let file_name = sample_bed_file("small.bed")?;
2703 /// let mut bed = Bed::new(file_name)?;
2704 /// let chromosome = bed.chromosome()?;
2705 /// println!("{chromosome:?}"); // Outputs ndarray ["1", "1", "5", "Y"]
2706 /// # use bed_reader::BedErrorPlus;
2707 /// # Ok::<(), Box<BedErrorPlus>>(())
2708 pub fn chromosome(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2709 self.unlazy_bim::<String>(
2710 self.metadata.chromosome.is_none(),
2711 MetadataFields::Chromosome,
2712 "chromosome",
2713 )?;
2714 Ok(self.metadata.chromosome.as_ref().unwrap()) //unwrap always works because of lazy_bim
2715 }
2716
2717 /// SNP id of each SNP (variant)
2718 ///
2719 /// If this ndarray is needed, it will be found
2720 /// by reading the .bim file. Once found, this ndarray
2721 /// and other information in the .bim file will be remembered.
2722 /// The file read can be avoided by setting the
2723 /// array with [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2724 ///
2725 /// # Example:
2726 /// ```
2727 /// use ndarray as nd;
2728 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2729 /// use bed_reader::assert_eq_nan;
2730 ///
2731 /// let file_name = sample_bed_file("small.bed")?;
2732 /// let mut bed = Bed::new(file_name)?;
2733 /// let sid = bed.sid()?;
2734 /// println!("{sid:?}"); // Outputs ndarray "sid1", "sid2", "sid3", "sid4"]
2735 /// # use bed_reader::BedErrorPlus;
2736 /// # Ok::<(), Box<BedErrorPlus>>(())
2737 pub fn sid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2738 self.unlazy_bim::<String>(self.metadata.sid.is_none(), MetadataFields::Sid, "sid")?;
2739 Ok(self.metadata.sid.as_ref().unwrap()) //unwrap always works because of lazy_bim
2740 }
2741
2742 /// Centimorgan position of each SNP (variant)
2743 ///
2744 /// If this ndarray is needed, it will be found
2745 /// by reading the .bim file. Once found, this ndarray
2746 /// and other information in the .bim file will be remembered.
2747 /// The file read can be avoided by setting the
2748 /// array with [`BedBuilder::cm_position`](struct.BedBuilder.html#method.cm_position).
2749 ///
2750 /// # Example:
2751 /// ```
2752 /// use ndarray as nd;
2753 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2754 /// use bed_reader::assert_eq_nan;
2755 ///
2756 /// let file_name = sample_bed_file("small.bed")?;
2757 /// let mut bed = Bed::new(file_name)?;
2758 /// let cm_position = bed.cm_position()?;
2759 /// println!("{cm_position:?}"); // Outputs ndarray [100.4, 2000.5, 4000.7, 7000.9]
2760 /// # use bed_reader::BedErrorPlus;
2761 /// # Ok::<(), Box<BedErrorPlus>>(())
2762 pub fn cm_position(&mut self) -> Result<&nd::Array1<f32>, Box<BedErrorPlus>> {
2763 self.unlazy_bim::<String>(
2764 self.metadata.cm_position.is_none(),
2765 MetadataFields::CmPosition,
2766 "cm_position",
2767 )?;
2768 Ok(self.metadata.cm_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2769 }
2770
2771 /// Base-pair position of each SNP (variant)
2772 ///
2773 /// If this ndarray is needed, it will be found
2774 /// by reading the .bim file. Once found, this ndarray
2775 /// and other information in the .bim file will be remembered.
2776 /// The file read can be avoided by setting the
2777 /// array with [`BedBuilder::bp_position`](struct.BedBuilder.html#method.bp_position).
2778 ///
2779 /// # Example:
2780 /// ```
2781 /// use ndarray as nd;
2782 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2783 /// use bed_reader::assert_eq_nan;
2784 ///
2785 /// let file_name = sample_bed_file("small.bed")?;
2786 /// let mut bed = Bed::new(file_name)?;
2787 /// let bp_position = bed.bp_position()?;
2788 /// println!("{bp_position:?}"); // Outputs ndarray [1, 100, 1000, 1004]
2789 /// # use bed_reader::BedErrorPlus;
2790 /// # Ok::<(), Box<BedErrorPlus>>(())
2791 pub fn bp_position(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2792 self.unlazy_bim::<String>(
2793 self.metadata.bp_position.is_none(),
2794 MetadataFields::BpPosition,
2795 "bp_position",
2796 )?;
2797 Ok(self.metadata.bp_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2798 }
2799
2800 /// First allele of each SNP (variant)
2801 ///
2802 /// If this ndarray is needed, it will be found
2803 /// by reading the .bim file. Once found, this ndarray
2804 /// and other information in the .bim file will be remembered.
2805 /// The file read can be avoided by setting the
2806 /// array with [`BedBuilder::allele_1`](struct.BedBuilder.html#method.allele_1).
2807 ///
2808 /// # Example:
2809 /// ```
2810 /// use ndarray as nd;
2811 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2812 /// use bed_reader::assert_eq_nan;
2813 ///
2814 /// let file_name = sample_bed_file("small.bed")?;
2815 /// let mut bed = Bed::new(file_name)?;
2816 /// let allele_1 = bed.allele_1()?;
2817 /// println!("{allele_1:?}"); // Outputs ndarray ["A", "T", "A", "T"]
2818 /// # use bed_reader::BedErrorPlus;
2819 /// # Ok::<(), Box<BedErrorPlus>>(())
2820 pub fn allele_1(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2821 self.unlazy_bim::<String>(
2822 self.metadata.allele_1.is_none(),
2823 MetadataFields::Allele1,
2824 "allele_1",
2825 )?;
2826 Ok(self.metadata.allele_1.as_ref().unwrap()) //unwrap always works because of lazy_bim
2827 }
2828
2829 /// Second allele of each SNP (variant)
2830 ///
2831 /// If this ndarray is needed, it will be found
2832 /// by reading the .bim file. Once found, this ndarray
2833 /// and other information in the .bim file will be remembered.
2834 /// The file read can be avoided by setting the
2835 /// array with [`BedBuilder::allele_2`](struct.BedBuilder.html#method.allele_2).
2836 ///
2837 /// # Example:
2838 /// ```
2839 /// use ndarray as nd;
2840 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2841 /// use bed_reader::assert_eq_nan;
2842 ///
2843 /// let file_name = sample_bed_file("small.bed")?;
2844 /// let mut bed = Bed::new(file_name)?;
2845 /// let allele_2 = bed.allele_2()?;
2846 /// println!("{allele_2:?}"); // Outputs ndarray ["A", "C", "C", "G"]
2847 /// # use bed_reader::BedErrorPlus;
2848 /// # Ok::<(), Box<BedErrorPlus>>(())
2849 pub fn allele_2(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2850 self.unlazy_bim::<String>(
2851 self.metadata.allele_2.is_none(),
2852 MetadataFields::Allele2,
2853 "allele_2",
2854 )?;
2855 Ok(self.metadata.allele_2.as_ref().unwrap()) //unwrap always works because of lazy_bim
2856 }
2857
2858 /// [`Metadata`](struct.Metadata.html) for this dataset, for example, the individual (sample) Ids.
2859 ///
2860 /// This returns a struct with 12 fields. Each field is a ndarray.
2861 /// The struct will always be new, but the 12 ndarrays will be
2862 /// shared with this [`Bed`](struct.Bed.html).
2863 ///
2864 /// If the needed, the metadata will be read from the .fam and/or .bim files.
2865 /// ```
2866 /// use ndarray as nd;
2867 /// use bed_reader::{Bed, sample_bed_file};
2868 ///
2869 /// let file_name = sample_bed_file("small.bed")?;
2870 /// let mut bed = Bed::new(file_name)?;
2871 /// let metadata = bed.metadata()?;
2872 /// println!("{0:?}", metadata.iid()); // Outputs Some(["iid1", "iid2", "iid3"] ...)
2873 /// println!("{0:?}", metadata.sid()); // Outputs Some(["sid1", "sid2", "sid3", "sid4"] ...)
2874 /// # use bed_reader::BedErrorPlus;
2875 /// # Ok::<(), Box<BedErrorPlus>>(())
2876 pub fn metadata(&mut self) -> Result<Metadata, Box<BedErrorPlus>> {
2877 self.fam()?;
2878 self.bim()?;
2879 Ok(self.metadata.clone())
2880 }
2881
2882 /// Return the path of the .bed file.
2883 #[must_use]
2884 pub fn path(&self) -> &Path {
2885 &self.path
2886 }
2887
2888 /// Return the path of the .fam file.
2889 pub fn fam_path(&mut self) -> PathBuf {
2890 // We need to clone the path because self might mutate later
2891 if let Some(path) = &self.fam_path {
2892 path.clone()
2893 } else {
2894 let path = to_metadata_path(&self.path, &self.fam_path, "fam");
2895 self.fam_path = Some(path.clone());
2896 path
2897 }
2898 }
2899
2900 /// Return the path of the .bim file.
2901 pub fn bim_path(&mut self) -> PathBuf {
2902 // We need to clone the path because self might mutate later
2903 if let Some(path) = &self.bim_path {
2904 path.clone()
2905 } else {
2906 let path = to_metadata_path(&self.path, &self.bim_path, "bim");
2907 self.bim_path = Some(path.clone());
2908 path
2909 }
2910 }
2911
2912 /// Read genotype data.
2913 ///
2914 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) which supports selection and options.
2915 ///
2916 /// # Errors
2917 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2918 /// for all possible errors.
2919 ///
2920 /// # Examples
2921 /// Read all data in a .bed file.
2922 ///
2923 /// ```
2924 /// use ndarray as nd;
2925 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2926 /// use bed_reader::assert_eq_nan;
2927 ///
2928 /// let file_name = sample_bed_file("small.bed")?;
2929 /// let mut bed = Bed::new(file_name)?;
2930 /// let val = bed.read::<f64>()?;
2931 ///
2932 /// assert_eq_nan(
2933 /// &val,
2934 /// &nd::array![
2935 /// [1.0, 0.0, f64::NAN, 0.0],
2936 /// [2.0, 0.0, f64::NAN, 2.0],
2937 /// [0.0, 1.0, 2.0, 0.0]
2938 /// ],
2939 /// );
2940 ///
2941 /// // Your output array can be f32, f64, or i8
2942 /// let val = bed.read::<i8>()?;
2943 /// assert_eq_nan(
2944 /// &val,
2945 /// &nd::array![
2946 /// [1, 0, -127, 0],
2947 /// [2, 0, -127, 2],
2948 /// [0, 1, 2, 0]
2949 /// ],
2950 /// );
2951 /// # use bed_reader::BedErrorPlus;
2952 /// # Ok::<(), Box<BedErrorPlus>>(())
2953 /// ```
2954 pub fn read<TVal: BedVal>(&mut self) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2955 let read_options = ReadOptions::<TVal>::builder().build()?;
2956 self.read_with_options(&read_options)
2957 }
2958
2959 /// Read genotype data with options, into a preallocated array.
2960 ///
2961 /// > Also see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
2962 ///
2963 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
2964 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
2965 /// are ignored. Instead, the order of the preallocated array is used.
2966 ///
2967 /// # Errors
2968 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2969 /// for all possible errors.
2970 ///
2971 /// # Example
2972 ///
2973 /// ```
2974 /// use ndarray as nd;
2975 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2976 /// use bed_reader::assert_eq_nan;
2977 ///
2978 /// // Read the SNPs indexed by 2.
2979 /// let file_name = sample_bed_file("small.bed")?;
2980 /// let mut bed = Bed::new(file_name)?;
2981 /// let read_options = ReadOptions::builder().sid_index(2).build()?;
2982 /// let mut val = nd::Array2::<f64>::default((3, 1));
2983 /// bed.read_and_fill_with_options(&mut val.view_mut(), &read_options)?;
2984 ///
2985 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2986 /// # use bed_reader::BedErrorPlus;
2987 /// # Ok::<(), Box<BedErrorPlus>>(())
2988 /// ```
2989 pub fn read_and_fill_with_options<TVal: BedVal>(
2990 &mut self,
2991 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2992 read_options: &ReadOptions<TVal>,
2993 ) -> Result<(), Box<BedErrorPlus>> {
2994 let iid_count = self.iid_count()?;
2995 let sid_count = self.sid_count()?;
2996
2997 let num_threads = compute_num_threads(read_options.num_threads)?;
2998
2999 // If we already have a Vec<isize>, reference it. If we don't, create one and reference it.
3000 let iid_hold = Hold::new(&read_options.iid_index, iid_count)?;
3001 let iid_index = iid_hold.as_ref();
3002 let sid_hold = Hold::new(&read_options.sid_index, sid_count)?;
3003 let sid_index = sid_hold.as_ref();
3004
3005 let dim = val.dim();
3006 if dim != (iid_index.len(), sid_index.len()) {
3007 Err(BedError::InvalidShape(
3008 iid_index.len(),
3009 sid_index.len(),
3010 dim.0,
3011 dim.1,
3012 ))?;
3013 }
3014
3015 read_no_alloc(
3016 &self.path,
3017 iid_count,
3018 sid_count,
3019 read_options.is_a1_counted,
3020 iid_index,
3021 sid_index,
3022 read_options.missing_value,
3023 num_threads,
3024 &mut val.view_mut(),
3025 )?;
3026
3027 Ok(())
3028 }
3029
3030 /// Read all genotype data into a preallocated array.
3031 ///
3032 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3033 ///
3034 /// # Errors
3035 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3036 /// for all possible errors.
3037 ///
3038 /// # Example
3039 ///
3040 /// ```
3041 /// use ndarray as nd;
3042 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3043 /// use bed_reader::assert_eq_nan;
3044 ///
3045 /// let file_name = sample_bed_file("small.bed")?;
3046 /// let mut bed = Bed::new(file_name)?;
3047 /// let mut val = nd::Array2::<i8>::default(bed.dim()?);
3048 /// bed.read_and_fill(&mut val.view_mut())?;
3049 ///
3050 /// assert_eq_nan(
3051 /// &val,
3052 /// &nd::array![
3053 /// [1, 0, -127, 0],
3054 /// [2, 0, -127, 2],
3055 /// [0, 1, 2, 0]
3056 /// ],
3057 /// );
3058 /// # use bed_reader::BedErrorPlus;
3059 /// # Ok::<(), Box<BedErrorPlus>>(())
3060 /// ```
3061 pub fn read_and_fill<TVal: BedVal>(
3062 &mut self,
3063 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
3064 ) -> Result<(), Box<BedErrorPlus>> {
3065 let read_options = ReadOptions::<TVal>::builder().build()?;
3066 self.read_and_fill_with_options(val, &read_options)
3067 }
3068
3069 /// Read genotype data with options.
3070 ///
3071 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3072 ///
3073 /// # Errors
3074 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3075 /// for all possible errors.
3076 ///
3077 /// # Example
3078 ///
3079 /// ```
3080 /// use ndarray as nd;
3081 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3082 /// use bed_reader::assert_eq_nan;
3083 ///
3084 /// // Read the SNPs indexed by 2.
3085 /// let file_name = sample_bed_file("small.bed")?;
3086 /// let mut bed = Bed::new(file_name)?;
3087 /// let read_options = ReadOptions::builder().sid_index(2).f64().build()?;
3088 /// let val = bed.read_with_options(&read_options)?;
3089 ///
3090 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
3091 /// # use bed_reader::BedErrorPlus;
3092 /// # Ok::<(), Box<BedErrorPlus>>(())
3093 /// ```
3094 pub fn read_with_options<TVal: BedVal>(
3095 &mut self,
3096 read_options: &ReadOptions<TVal>,
3097 ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
3098 let iid_count_in = self.iid_count()?;
3099 let sid_count_in = self.sid_count()?;
3100 let iid_count_out = read_options.iid_index.len(iid_count_in)?;
3101 let sid_count_out = read_options.sid_index.len(sid_count_in)?;
3102 let shape = ShapeBuilder::set_f((iid_count_out, sid_count_out), read_options.is_f);
3103 let mut val = nd::Array2::<TVal>::default(shape);
3104
3105 self.read_and_fill_with_options(&mut val.view_mut(), read_options)?;
3106
3107 Ok(val)
3108 }
3109 /// Write genotype data with default metadata.
3110 ///
3111 /// > Also see [`WriteOptions::builder`](struct.WriteOptions.html#method.builder), which supports metadata and options.
3112 ///
3113 /// # Errors
3114 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3115 /// for all possible errors.
3116 ///
3117 /// # Example
3118 /// In this example, write genotype data using default metadata.
3119 /// ```
3120 /// use ndarray as nd;
3121 /// use bed_reader::{Bed, WriteOptions};
3122 ///
3123 /// let output_folder = temp_testdir::TempDir::default();
3124 /// let output_file = output_folder.join("small.bed");
3125 ///
3126 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
3127 /// Bed::write(&val, &output_file)?;
3128 ///
3129 /// // If we then read the new file and list the chromosome property,
3130 /// // it is an array of zeros, the default chromosome value.
3131 /// let mut bed2 = Bed::new(&output_file)?;
3132 /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
3133 /// # use bed_reader::BedErrorPlus;
3134 /// # Ok::<(), Box<BedErrorPlus>>(())
3135 /// ```
3136 pub fn write<S: nd::Data<Elem = TVal>, TVal: BedVal>(
3137 val: &nd::ArrayBase<S, nd::Ix2>,
3138 path: &Path,
3139 ) -> Result<(), Box<BedErrorPlus>> {
3140 WriteOptions::builder(path).write(val)
3141 }
3142
3143 /// Given an 2D array of genotype data and a [`WriteOptions`](struct.WriteOptionsBuilder.html), write to a .bed file.
3144 ///
3145 /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
3146 /// > a [`WriteOptions`](struct.WriteOptionsBuilder.html) and writes to file in one step.
3147 ///
3148 /// # Example
3149 /// ```
3150 /// use ndarray as nd;
3151 /// use bed_reader::{Bed, WriteOptions};
3152 ///
3153 /// let val = nd::array![
3154 /// [1.0, 0.0, f64::NAN, 0.0],
3155 /// [2.0, 0.0, f64::NAN, 2.0],
3156 /// [0.0, 1.0, 2.0, 0.0]
3157 /// ];
3158 ///
3159 /// let output_folder = temp_testdir::TempDir::default();
3160 /// let output_file = output_folder.join("small.bed");
3161 /// let write_options = WriteOptions::builder(output_file)
3162 /// .iid(["iid1", "iid2", "iid3"])
3163 /// .sid(["sid1", "sid2", "sid3", "sid4"])
3164 /// .build(3,4)?;
3165 ///
3166 /// Bed::write_with_options(&val, &write_options)?;
3167 /// # use bed_reader::BedErrorPlus;
3168 /// # Ok::<(), Box<BedErrorPlus>>(())
3169 /// ```
3170 pub fn write_with_options<S, TVal>(
3171 val: &nd::ArrayBase<S, nd::Ix2>,
3172 write_options: &WriteOptions<TVal>,
3173 ) -> Result<(), Box<BedErrorPlus>>
3174 where
3175 S: nd::Data<Elem = TVal>,
3176 TVal: BedVal,
3177 {
3178 let (iid_count, sid_count) = val.dim();
3179 if iid_count != write_options.iid_count() {
3180 Err(BedError::InconsistentCount(
3181 "iid".into(),
3182 write_options.iid_count(),
3183 iid_count,
3184 ))?;
3185 }
3186 if sid_count != write_options.sid_count() {
3187 Err(BedError::InconsistentCount(
3188 "sid".into(),
3189 write_options.sid_count(),
3190 sid_count,
3191 ))?;
3192 }
3193
3194 let num_threads = compute_num_threads(write_options.num_threads)?;
3195 write_val(
3196 &write_options.path,
3197 val,
3198 write_options.is_a1_counted,
3199 write_options.missing_value,
3200 num_threads,
3201 )?;
3202
3203 if !write_options.skip_fam() {
3204 if let Err(e) = write_options.metadata.write_fam(write_options.fam_path()) {
3205 // Clean up the file
3206 let _ = fs::remove_file(&write_options.fam_path);
3207 Err(e)?;
3208 }
3209 }
3210
3211 if !write_options.skip_bim() {
3212 if let Err(e) = write_options.metadata.write_bim(write_options.bim_path()) {
3213 // Clean up the file
3214 let _ = fs::remove_file(&write_options.bim_path);
3215 Err(e)?;
3216 }
3217 }
3218
3219 Ok(())
3220 }
3221
3222 fn unlazy_fam<T: FromStringArray<T>>(
3223 &mut self,
3224 is_none: bool,
3225 field_index: MetadataFields,
3226 name: &str,
3227 ) -> Result<(), Box<BedErrorPlus>> {
3228 if self.skip_set.contains(&field_index) {
3229 Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3230 }
3231 if is_none {
3232 self.fam()?;
3233 }
3234 Ok(())
3235 }
3236
3237 fn unlazy_bim<T: FromStringArray<T>>(
3238 &mut self,
3239 is_none: bool,
3240 field_index: MetadataFields,
3241 name: &str,
3242 ) -> Result<(), Box<BedErrorPlus>> {
3243 if self.skip_set.contains(&field_index) {
3244 Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3245 }
3246 if is_none {
3247 self.bim()?;
3248 }
3249 Ok(())
3250 }
3251
3252 fn fam(&mut self) -> Result<(), Box<BedErrorPlus>> {
3253 let fam_path = self.fam_path();
3254
3255 let (metadata, count) = self.metadata.read_fam(fam_path, &self.skip_set)?;
3256 self.metadata = metadata;
3257
3258 match self.iid_count {
3259 Some(iid_count) => {
3260 if iid_count != count {
3261 Err(BedError::InconsistentCount(
3262 "iid".to_string(),
3263 iid_count,
3264 count,
3265 ))?;
3266 }
3267 }
3268 None => {
3269 self.iid_count = Some(count);
3270 }
3271 }
3272 Ok(())
3273 }
3274
3275 fn bim(&mut self) -> Result<(), Box<BedErrorPlus>> {
3276 let bim_path = self.bim_path();
3277
3278 let (metadata, count) = self.metadata.read_bim(bim_path, &self.skip_set)?;
3279 self.metadata = metadata;
3280
3281 match self.sid_count {
3282 Some(sid_count) => {
3283 if sid_count != count {
3284 Err(BedError::InconsistentCount(
3285 "sid".to_string(),
3286 sid_count,
3287 count,
3288 ))?;
3289 }
3290 }
3291 None => {
3292 self.sid_count = Some(count);
3293 }
3294 }
3295 Ok(())
3296 }
3297}
3298
3299/// If we already have a Vec<isize> remember a reference to it.
3300/// If we don't, then create one.
3301enum Hold<'a> {
3302 Copy(Vec<isize>),
3303 Ref(&'a Vec<isize>),
3304}
3305
3306impl Hold<'_> {
3307 fn new(index: &Index, count: usize) -> Result<Hold, Box<BedErrorPlus>> {
3308 let hold = if let Index::Vec(vec) = index {
3309 Hold::Ref(vec)
3310 } else {
3311 Hold::Copy(index.to_vec(count)?)
3312 };
3313 Ok(hold)
3314 }
3315
3316 fn as_ref(&self) -> &Vec<isize> {
3317 match self {
3318 Hold::Ref(vec) => vec,
3319 Hold::Copy(ref vec) => vec,
3320 }
3321 }
3322}
3323
3324fn compute_num_threads(option_num_threads: Option<usize>) -> Result<usize, Box<BedErrorPlus>> {
3325 let num_threads = if let Some(num_threads) = option_num_threads {
3326 num_threads
3327 } else if let Ok(num_threads) = env::var("BED_READER_NUM_THREADS") {
3328 num_threads.parse::<usize>()?
3329 } else if let Ok(num_threads) = env::var("NUM_THREADS") {
3330 num_threads.parse::<usize>()?
3331 } else {
3332 0
3333 };
3334 Ok(num_threads)
3335}
3336
3337#[allow(clippy::unnecessary_wraps)]
3338fn compute_max_concurrent_requests(
3339 option_max_concurrent_requests: Option<usize>,
3340) -> Result<usize, Box<BedErrorPlus>> {
3341 // In the future, we might want to set this with an environment variable.
3342 let max_concurrent_requests = option_max_concurrent_requests.unwrap_or(10);
3343 Ok(max_concurrent_requests)
3344}
3345
3346#[allow(clippy::unnecessary_wraps)]
3347fn compute_max_chunk_bytes(
3348 option_max_chunk_bytes: Option<usize>,
3349) -> Result<usize, Box<BedErrorPlus>> {
3350 // In the future, we might want to set this with an environment variable.
3351 let max_chunk_bytes = option_max_chunk_bytes.unwrap_or(8_000_000);
3352 Ok(max_chunk_bytes)
3353}
3354
3355impl Index {
3356 // We can't define a 'From' because we want to add count at the last moment.
3357 // Later Would be nice to not always allocate a new vec, maybe with Rc<[T]>?
3358 // Even better would be to support an iterator from Index (an enum with fields).
3359
3360 /// Turns an [`Index`](enum.Index.html) into a vector of usize indexes. Negative means count from end.
3361 pub fn to_vec(&self, count: usize) -> Result<Vec<isize>, Box<BedErrorPlus>> {
3362 let count_signed = count as isize;
3363 match self {
3364 Index::All => Ok((0..count_signed).collect()),
3365 Index::Vec(vec) => Ok(vec.clone()),
3366 Index::NDArrayBool(nd_array_bool) => {
3367 if nd_array_bool.len() != count {
3368 Err(BedError::BoolArrayVectorWrongLength(
3369 count,
3370 nd_array_bool.len(),
3371 ))?;
3372 }
3373 Ok(nd_array_bool
3374 .iter()
3375 .enumerate()
3376 .filter(|(_, b)| **b)
3377 .map(|(i, _)| i as isize)
3378 .collect())
3379 }
3380 Index::NDSliceInfo(nd_slice_info) => {
3381 Ok(RangeNdSlice::new(nd_slice_info, count)?.to_vec())
3382 }
3383 Index::RangeAny(range_any) => {
3384 let range = range_any.to_range(count)?;
3385 Ok(range.map(|i| i as isize).collect::<Vec<isize>>())
3386 }
3387 Index::NDArray(nd_array) => Ok(nd_array.to_vec()),
3388 Index::One(one) => Ok(vec![*one]),
3389 Index::VecBool(vec_bool) => {
3390 if vec_bool.len() != count {
3391 Err(BedError::BoolArrayVectorWrongLength(count, vec_bool.len()))?;
3392 }
3393 Ok(vec_bool
3394 .iter()
3395 .enumerate()
3396 .filter(|(_, b)| **b)
3397 .map(|(i, _)| i as isize)
3398 .collect())
3399 }
3400 }
3401 }
3402}
3403
3404#[allow(clippy::doc_markdown)]
3405/// Type alias for 1-D slices of NDArrays.
3406pub type SliceInfo1 =
3407 nd::SliceInfo<[nd::SliceInfoElem; 1], nd::Dim<[usize; 1]>, nd::Dim<[usize; 1]>>;
3408
3409/// A specification of which individuals (samples) or SNPs (variants) to read.
3410///
3411/// See the [Table of Index Expressions](index.html#index-expressions)
3412/// for a list of expressions for selecting individuals (sample)
3413/// and SNPs (variants).
3414///
3415/// By default, all individuals or SNPs are read.
3416/// The indices can be specified as:
3417/// * an index (negative numbers count from the end)
3418/// * a vector or ndarray of indices
3419/// * a Rust range (negatives not allowed)
3420/// * a vector or ndarray of booleans
3421/// * an ndarray slice (negative indexing and steps allowed)
3422///
3423/// # Examples
3424/// ```
3425/// use ndarray as nd;
3426/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3427/// use bed_reader::assert_eq_nan;
3428/// use ndarray::s;
3429///
3430/// let file_name = sample_bed_file("some_missing.bed")?;
3431/// let mut bed = Bed::new(file_name)?;
3432/// println!("{:?}", bed.dim()?); // prints (100, 100)
3433///
3434/// // Read all individuals and all SNPs
3435/// let val = ReadOptions::builder().f64().read(&mut bed)?;
3436/// assert!(val.dim() == (100, 100));
3437///
3438/// // Read the individual at index position 10 and all SNPs
3439/// let val = ReadOptions::builder().iid_index(10).f64().read(&mut bed)?;
3440/// assert!(val.dim() == (1, 100));
3441///
3442/// // Read the individuals at index positions 0,5, 1st-from-the-end and
3443/// // the SNP at index position 3
3444/// let val = ReadOptions::builder()
3445/// .iid_index(vec![0, 5, -1])
3446/// .sid_index(3)
3447/// .f64()
3448/// .read(&mut bed)?;
3449/// assert!(val.dim() == (3, 1));
3450/// // Repeat, but with an ndarray
3451/// let val = ReadOptions::builder()
3452/// .iid_index(nd::array![0, 5, -1])
3453/// .sid_index(3)
3454/// .f64()
3455/// .read(&mut bed)?;
3456/// assert!(val.dim() == (3, 1));
3457/// // Repeat, but with an Rust array
3458/// let val = ReadOptions::builder()
3459/// .iid_index([0, 5, -1])
3460/// .sid_index(3)
3461/// .f64()
3462/// .read(&mut bed)?;
3463/// assert!(val.dim() == (3, 1));
3464
3465/// // Create a boolean ndarray identifying SNPs in chromosome 5,
3466/// // then select those SNPs.
3467/// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
3468/// let val = ReadOptions::builder()
3469/// .sid_index(chrom_5)
3470/// .f64()
3471/// .read(&mut bed)?;
3472/// assert!(val.dim() == (100, 6));
3473
3474/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3475/// // to select every 2nd individual and every 3rd SNP.
3476/// let val = ReadOptions::builder()
3477/// .iid_index(s![..;2])
3478/// .sid_index(s![..;3])
3479/// .f64()
3480/// .read(&mut bed)?;
3481/// assert!(val.dim() == (50, 34));
3482/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3483/// // to select the 10th-from-last individual to the last, in reverse order,
3484/// // and every 3rd SNP in reverse order.)
3485/// let val = ReadOptions::builder()
3486/// .iid_index(s![-10..;-1])
3487/// .sid_index(s![..;-3])
3488/// .f64()
3489/// .read(&mut bed)?;
3490/// assert!(val.dim() == (10, 34));
3491/// # use bed_reader::BedErrorPlus;
3492/// # Ok::<(), Box<BedErrorPlus>>(())
3493/// ```
3494
3495#[derive(Debug, Clone)]
3496pub enum Index {
3497 // Could implement an enumerator, but it is complex and requires a 'match' on each next()
3498 // https://stackoverflow.com/questions/65272613/how-to-implement-intoiterator-for-an-enum-of-iterable-variants
3499 #[allow(missing_docs)]
3500 All,
3501 #[allow(missing_docs)]
3502 One(isize),
3503 #[allow(missing_docs)]
3504 Vec(Vec<isize>),
3505 #[allow(missing_docs)]
3506 NDArray(nd::Array1<isize>),
3507 #[allow(missing_docs)]
3508 VecBool(Vec<bool>),
3509 #[allow(missing_docs)]
3510 NDArrayBool(nd::Array1<bool>),
3511 #[allow(missing_docs)]
3512 NDSliceInfo(SliceInfo1),
3513 #[allow(missing_docs)]
3514 RangeAny(RangeAny),
3515}
3516
3517#[doc(hidden)]
3518/// Used internally to represent Rust ranges such as `0..10`, `..10`, etc.
3519#[derive(Debug, Clone)]
3520pub struct RangeAny {
3521 start: Option<usize>,
3522 end: Option<usize>,
3523}
3524
3525impl RangeAny {
3526 fn new<T: RangeBounds<usize>>(range_thing: &T) -> RangeAny {
3527 let start_bound = range_thing.start_bound();
3528 let start = match start_bound {
3529 Bound::Included(&start) => Some(start),
3530 Bound::Excluded(&start) => Some(start + 1),
3531 Bound::Unbounded => None,
3532 };
3533
3534 let end_bound = range_thing.end_bound();
3535 let end = match end_bound {
3536 Bound::Included(&end) => Some(end + 1),
3537 Bound::Excluded(&end) => Some(end),
3538 Bound::Unbounded => None,
3539 };
3540 RangeAny { start, end }
3541 }
3542
3543 // https://stackoverflow.com/questions/55925523/array-cannot-be-indexed-by-rangefull
3544 fn to_range(&self, count: usize) -> Result<Range<usize>, Box<BedErrorPlus>> {
3545 let start = self.start.unwrap_or_default();
3546 let end = if let Some(end) = self.end { end } else { count };
3547 if start > end {
3548 Err(BedError::StartGreaterThanEnd(start, end).into())
3549 } else {
3550 Ok(Range { start, end })
3551 }
3552 }
3553
3554 fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3555 let range = self.to_range(count)?;
3556 Ok(range.end - range.start)
3557 }
3558
3559 fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3560 Ok(self.len(count)? == 0)
3561 }
3562}
3563
3564#[doc(hidden)]
3565#[derive(Debug, Clone)]
3566/// Used internally to represent NDArray Slices such as s![..], s![0..;2], s![0..10;-1]
3567pub struct RangeNdSlice {
3568 start: usize,
3569 end: usize,
3570 step: usize,
3571 is_reversed: bool,
3572}
3573
3574// https://www.geeksforgeeks.org/find-ceil-ab-without-using-ceil-function/
3575fn div_ceil(a: usize, b: usize) -> usize {
3576 (a + b - 1) / b
3577}
3578
3579impl RangeNdSlice {
3580 fn len(&self) -> usize {
3581 if self.start > self.end {
3582 0
3583 } else {
3584 div_ceil(self.end - self.start, self.step)
3585 }
3586 }
3587
3588 fn is_empty(&self) -> bool {
3589 self.len() == 0
3590 }
3591
3592 // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#slicing
3593 fn to_vec(&self) -> Vec<isize> {
3594 if self.start >= self.end {
3595 Vec::new()
3596 } else if !self.is_reversed {
3597 (self.start..self.end)
3598 .step_by(self.step)
3599 .map(|i| i as isize)
3600 .collect()
3601 } else {
3602 // https://docs.rs/ndarray/latest/ndarray/macro.s.html
3603 let size = self.len();
3604 let mut vec: Vec<isize> = Vec::<isize>::with_capacity(size);
3605 let mut i = self.end - 1;
3606 while i >= self.start {
3607 vec.push(i as isize);
3608 if i < self.step {
3609 break;
3610 }
3611 i -= self.step;
3612 }
3613 vec
3614 }
3615 }
3616
3617 fn new(nd_slice_info: &SliceInfo1, count: usize) -> Result<Self, Box<BedErrorPlus>> {
3618 // self.to_vec(count).len(),
3619 // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#method.slice_collapse
3620 // Error in the following cases
3621 // * SliceInfo is not a 1-dimensional or is a NewAxis
3622 // * Step is 0
3623 // * Start is greater than count
3624 // * End is greater than count
3625 // As with ndarray, Start can be greater than End is allowed
3626 // and means the slice is empty.
3627 if nd_slice_info.in_ndim() != 1 || nd_slice_info.out_ndim() != 1 {
3628 Err(BedError::NdSliceInfoNot1D)?;
3629 }
3630
3631 let slice_info_elem = nd_slice_info[0];
3632 match slice_info_elem {
3633 nd::SliceInfoElem::Slice { start, end, step } => {
3634 // https://docs.rs/ndarray/0.15.4/ndarray/enum.SliceInfoElem.html
3635 // s![..], 0,None,1
3636 // s![a..b;2] a,b,2
3637 // s![a..;-1], from a to end in reverse order
3638 // start index; negative are counted from the back of the axis
3639 // end index; negative are counted from the back of the axis; when not present the default is the full length of the axis.
3640 // step size in elements; the default is 1, for every element.
3641 // A range with step size. end is an exclusive index. Negative start or end indexes are counted from the back of the axis. If end is None, the slice extends to the end of the axis.
3642 let (step2, is_reverse2) = match step.cmp(&0) {
3643 Ordering::Greater => (step as usize, false),
3644 Ordering::Less => ((-step) as usize, true),
3645 Ordering::Equal => Err(BedError::StepZero)?,
3646 };
3647
3648 let start2 = if start >= 0 {
3649 let start3 = start as usize;
3650 if start3 > count {
3651 Err(BedError::StartGreaterThanCount(start3, count))?;
3652 }
3653 start3
3654 } else {
3655 let start3 = (-start) as usize;
3656 if start3 > count {
3657 Err(BedError::StartGreaterThanCount(start3, count))?;
3658 }
3659 count - start3
3660 };
3661
3662 let end2 = if let Some(end) = end {
3663 if end >= 0 {
3664 let end3 = end as usize;
3665 if end3 > count {
3666 Err(BedError::EndGreaterThanCount(end3, count))?;
3667 }
3668 end3
3669 } else {
3670 let end3 = (-end) as usize;
3671 if end3 > count {
3672 Err(BedError::EndGreaterThanCount(end3, count))?;
3673 }
3674 count - end3
3675 }
3676 } else {
3677 count
3678 };
3679
3680 Ok(RangeNdSlice {
3681 start: start2,
3682 end: end2,
3683 step: step2,
3684 is_reversed: is_reverse2,
3685 })
3686 }
3687 nd::SliceInfoElem::Index(index) => Ok(RangeNdSlice {
3688 start: index as usize,
3689 end: index as usize + 1,
3690 step: 1,
3691 is_reversed: false,
3692 }),
3693 nd::SliceInfoElem::NewAxis => Err(BedError::NewAxis.into()),
3694 }
3695 }
3696}
3697
3698impl Index {
3699 /// Returns the number of elements in an [`Index`](enum.Index.html).
3700 #[allow(clippy::len_without_is_empty)]
3701 pub fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3702 match self {
3703 Index::All => Ok(count),
3704 Index::One(_) => Ok(1),
3705 Index::Vec(vec) => Ok(vec.len()),
3706 Index::NDArray(nd_array) => Ok(nd_array.len()),
3707 Index::VecBool(vec_bool) => Ok(vec_bool.iter().filter(|&b| *b).count()),
3708 Index::NDArrayBool(nd_array_bool) => Ok(nd_array_bool.iter().filter(|&b| *b).count()),
3709 Index::NDSliceInfo(nd_slice_info) => Ok(RangeNdSlice::new(nd_slice_info, count)?.len()),
3710 Index::RangeAny(range_any) => range_any.len(count),
3711 }
3712 }
3713
3714 /// Returns true if the [`Index`](enum.Index.html) is empty.
3715 pub fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3716 match self {
3717 Index::All => Ok(count == 0),
3718 Index::One(_) => Ok(false),
3719 Index::Vec(vec) => Ok(vec.is_empty()),
3720 Index::NDArray(nd_array) => Ok(nd_array.is_empty()),
3721 Index::VecBool(vec_bool) => Ok(!vec_bool.iter().any(|&b| b)),
3722 Index::NDArrayBool(nd_array_bool) => Ok(!nd_array_bool.iter().any(|&b| b)),
3723 Index::NDSliceInfo(nd_slice_info) => {
3724 Ok(RangeNdSlice::new(nd_slice_info, count)?.is_empty())
3725 }
3726 Index::RangeAny(range_any) => range_any.is_empty(count),
3727 }
3728 }
3729}
3730
3731impl From<SliceInfo1> for Index {
3732 fn from(slice_info: SliceInfo1) -> Index {
3733 Index::NDSliceInfo(slice_info)
3734 }
3735}
3736impl From<&SliceInfo1> for Index {
3737 fn from(slice_info: &SliceInfo1) -> Index {
3738 Index::NDSliceInfo(slice_info.to_owned())
3739 }
3740}
3741
3742impl From<RangeFull> for Index {
3743 fn from(range_thing: RangeFull) -> Index {
3744 Index::RangeAny(RangeAny::new(&range_thing))
3745 }
3746}
3747
3748impl From<&RangeFull> for Index {
3749 fn from(range_thing: &RangeFull) -> Index {
3750 Index::RangeAny(RangeAny::new(range_thing))
3751 }
3752}
3753
3754impl From<Range<usize>> for Index {
3755 fn from(range_thing: Range<usize>) -> Index {
3756 Index::RangeAny(RangeAny::new(&range_thing))
3757 }
3758}
3759
3760impl From<&Range<usize>> for Index {
3761 fn from(range_thing: &Range<usize>) -> Index {
3762 Index::RangeAny(RangeAny::new(range_thing))
3763 }
3764}
3765
3766impl From<RangeFrom<usize>> for Index {
3767 fn from(range_thing: RangeFrom<usize>) -> Index {
3768 Index::RangeAny(RangeAny::new(&range_thing))
3769 }
3770}
3771
3772impl From<&RangeFrom<usize>> for Index {
3773 fn from(range_thing: &RangeFrom<usize>) -> Index {
3774 Index::RangeAny(RangeAny::new(range_thing))
3775 }
3776}
3777
3778impl From<RangeInclusive<usize>> for Index {
3779 fn from(range_thing: RangeInclusive<usize>) -> Index {
3780 Index::RangeAny(RangeAny::new(&range_thing))
3781 }
3782}
3783
3784impl From<&RangeInclusive<usize>> for Index {
3785 fn from(range_thing: &RangeInclusive<usize>) -> Index {
3786 Index::RangeAny(RangeAny::new(range_thing))
3787 }
3788}
3789
3790impl From<RangeTo<usize>> for Index {
3791 fn from(range_thing: RangeTo<usize>) -> Index {
3792 Index::RangeAny(RangeAny::new(&range_thing))
3793 }
3794}
3795
3796impl From<&RangeTo<usize>> for Index {
3797 fn from(range_thing: &RangeTo<usize>) -> Index {
3798 Index::RangeAny(RangeAny::new(range_thing))
3799 }
3800}
3801
3802impl From<RangeToInclusive<usize>> for Index {
3803 fn from(range_thing: RangeToInclusive<usize>) -> Index {
3804 Index::RangeAny(RangeAny::new(&range_thing))
3805 }
3806}
3807
3808impl From<&RangeToInclusive<usize>> for Index {
3809 fn from(range_thing: &RangeToInclusive<usize>) -> Index {
3810 Index::RangeAny(RangeAny::new(range_thing))
3811 }
3812}
3813
3814impl From<&[isize]> for Index {
3815 fn from(array: &[isize]) -> Index {
3816 Index::Vec(array.to_vec())
3817 }
3818}
3819
3820impl<const N: usize> From<[isize; N]> for Index {
3821 fn from(array: [isize; N]) -> Index {
3822 Index::Vec(array.to_vec())
3823 }
3824}
3825
3826impl<const N: usize> From<&[isize; N]> for Index {
3827 fn from(array: &[isize; N]) -> Index {
3828 Index::Vec(array.to_vec())
3829 }
3830}
3831
3832impl From<&nd::ArrayView1<'_, isize>> for Index {
3833 fn from(view: &nd::ArrayView1<isize>) -> Index {
3834 Index::NDArray(view.to_owned())
3835 }
3836}
3837
3838impl From<nd::ArrayView1<'_, isize>> for Index {
3839 fn from(view: nd::ArrayView1<isize>) -> Index {
3840 Index::NDArray(view.to_owned())
3841 }
3842}
3843
3844impl From<Vec<isize>> for Index {
3845 fn from(vec: Vec<isize>) -> Index {
3846 Index::Vec(vec)
3847 }
3848}
3849impl From<&Vec<isize>> for Index {
3850 fn from(vec_ref: &Vec<isize>) -> Index {
3851 Index::Vec(vec_ref.clone())
3852 }
3853}
3854
3855impl From<nd::ArrayView1<'_, bool>> for Index {
3856 fn from(view: nd::ArrayView1<bool>) -> Index {
3857 Index::NDArrayBool(view.to_owned())
3858 }
3859}
3860
3861impl From<&nd::ArrayView1<'_, bool>> for Index {
3862 fn from(view: &nd::ArrayView1<bool>) -> Index {
3863 Index::NDArrayBool(view.to_owned())
3864 }
3865}
3866
3867impl From<&Vec<bool>> for Index {
3868 fn from(vec_ref: &Vec<bool>) -> Index {
3869 Index::VecBool(vec_ref.clone())
3870 }
3871}
3872
3873impl From<&[bool]> for Index {
3874 fn from(array: &[bool]) -> Index {
3875 Index::VecBool(array.to_vec())
3876 }
3877}
3878
3879impl<const N: usize> From<[bool; N]> for Index {
3880 fn from(array: [bool; N]) -> Index {
3881 Index::VecBool(array.to_vec())
3882 }
3883}
3884
3885impl<const N: usize> From<&[bool; N]> for Index {
3886 fn from(array: &[bool; N]) -> Index {
3887 Index::VecBool(array.to_vec())
3888 }
3889}
3890
3891impl From<isize> for Index {
3892 fn from(one: isize) -> Index {
3893 Index::One(one)
3894 }
3895}
3896impl From<&isize> for Index {
3897 fn from(one: &isize) -> Index {
3898 Index::One(one.to_owned())
3899 }
3900}
3901
3902impl From<nd::Array1<isize>> for Index {
3903 fn from(nd_array: nd::Array1<isize>) -> Index {
3904 Index::NDArray(nd_array)
3905 }
3906}
3907
3908impl From<&nd::Array1<isize>> for Index {
3909 fn from(nd_array: &nd::Array1<isize>) -> Index {
3910 Index::NDArray(nd_array.to_owned())
3911 }
3912}
3913
3914impl From<nd::Array1<bool>> for Index {
3915 fn from(nd_array_bool: nd::Array1<bool>) -> Index {
3916 Index::NDArrayBool(nd_array_bool)
3917 }
3918}
3919
3920impl From<&nd::Array1<bool>> for Index {
3921 fn from(nd_array_bool: &nd::Array1<bool>) -> Index {
3922 Index::NDArrayBool(nd_array_bool.clone())
3923 }
3924}
3925
3926impl From<Vec<bool>> for Index {
3927 fn from(vec_bool: Vec<bool>) -> Index {
3928 Index::VecBool(vec_bool)
3929 }
3930}
3931
3932impl From<()> for Index {
3933 fn from((): ()) -> Index {
3934 Index::All
3935 }
3936}
3937
3938// See https://nullderef.com/blog/rust-parameters/
3939
3940/// Represents options for reading genotype data from a PLINK .bed file.
3941///
3942/// Construct with [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3943///
3944/// See the [Table of `ReadOptions`](index.html#readoptions)
3945/// for a list of the supported options.
3946/// See the [Table of Index Expressions](index.html#index-expressions)
3947/// for a list of expressions for selecting individuals (sample)
3948/// and SNPs (variants).
3949#[derive(Debug, Clone, Builder)]
3950#[builder(build_fn(error = "Box<BedErrorPlus>"))]
3951pub struct ReadOptions<TVal: BedVal> {
3952 /// Value to use for missing values (defaults to -127 or NaN)
3953 ///
3954 /// -127 is the default for i8 and NaN is the default for f32 and f64.
3955 ///
3956 /// In this example, the missing value is set to -1:
3957 /// ```
3958 /// use ndarray as nd;
3959 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3960 /// use bed_reader::assert_eq_nan;
3961 ///
3962 /// let file_name = sample_bed_file("small.bed")?;
3963 /// let mut bed = Bed::new(file_name)?;
3964 /// let val = ReadOptions::builder().missing_value(-1).i8().read(&mut bed)?;
3965 ///
3966 /// assert_eq_nan(
3967 /// &val,
3968 /// &nd::array![
3969 /// [1, 0, -1, 0],
3970 /// [2, 0, -1, 2],
3971 /// [0, 1, 2, 0]
3972 /// ],
3973 /// );
3974 /// # use bed_reader::BedErrorPlus;
3975 /// # Ok::<(), Box<BedErrorPlus>>(())
3976 /// ```
3977 #[builder(default = "TVal::missing()")]
3978 missing_value: TVal,
3979
3980 /// Select which individual (sample) values to read -- Defaults to all.
3981 ///
3982 /// Can select with a signed number, various lists of signed numbers,
3983 /// ranges, and various lists of booleans.
3984 ///
3985 /// See the [Table of Index Expressions](index.html#index-expressions)
3986 /// for a list of the supported index expressions.
3987 ///
3988 /// # Examples:
3989 /// ```
3990 /// use ndarray as nd;
3991 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
3992 /// use ndarray::s;
3993 ///
3994 /// let file_name = sample_bed_file("some_missing.bed")?;
3995 /// let mut bed = Bed::new(file_name)?;
3996 ///
3997 /// // Read the individual at index position 3
3998 ///
3999 /// let val = ReadOptions::builder()
4000 /// .iid_index(3)
4001 /// .f64()
4002 /// .read(&mut bed)?;
4003 /// assert!(val.dim() == (1, 100));
4004 ///
4005 /// // Read the individuals at index positions 0, 5, and 1st-from-last.
4006 ///
4007 /// let val = ReadOptions::builder()
4008 /// .iid_index([0, 5, -1])
4009 /// .f64()
4010 /// .read(&mut bed)?;
4011 ///
4012 /// assert!(val.dim() == (3, 100));
4013 ///
4014 /// // Read the individuals at index positions 20 (inclusive) to 30 (exclusive).
4015 ///
4016 /// let val = ReadOptions::builder()
4017 /// .iid_index(20..30)
4018 /// .f64()
4019 /// .read(&mut bed)?;
4020 ///
4021 /// assert!(val.dim() == (10, 100));
4022 ///
4023 /// // Read the individuals at every 2nd index position.
4024 ///
4025 /// let val = ReadOptions::builder()
4026 /// .iid_index(s![..;2])
4027 /// .f64()
4028 /// .read(&mut bed)?;
4029 ///
4030 /// assert!(val.dim() == (50, 100));
4031 ///
4032 /// // Read chromosome 5 of the female individuals.
4033 ///
4034 /// let female = bed.sex()?.map(|elem| *elem == 2);
4035 /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4036 /// let val = ReadOptions::builder()
4037 /// .iid_index(female)
4038 /// .sid_index(chrom_5)
4039 /// .f64()
4040 /// .read(&mut bed)?;
4041 ///
4042 /// assert!(val.dim() == (50, 6));
4043 /// # use bed_reader::BedErrorPlus;
4044 /// # Ok::<(), Box<BedErrorPlus>>(())
4045 /// ```
4046 #[builder(default = "Index::All")]
4047 #[builder(setter(into))]
4048 iid_index: Index,
4049
4050 /// Select which SNPs (variant) values to read -- Defaults to all.
4051 ///
4052 /// Can select with a signed number, various lists of signed numbers,
4053 /// ranges, and various lists of booleans.
4054 ///
4055 /// See the [Table of Index Expressions](index.html#index-expressions)
4056 /// for a list of the supported index expressions.
4057 ///
4058 /// # Examples:
4059 /// ```
4060 /// use ndarray as nd;
4061 /// use ndarray::s;
4062 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
4063 ///
4064 /// let file_name = sample_bed_file("some_missing.bed")?;
4065 /// let mut bed = Bed::new(file_name)?;
4066 ///
4067 /// // Read the SNP at index position 3
4068 ///
4069 /// let val = ReadOptions::builder()
4070 /// .sid_index(3)
4071 /// .f64()
4072 /// .read(&mut bed)?;
4073 /// assert!(val.dim() == (100, 1));
4074 ///
4075 /// // Read the SNPs at index positions 0, 5, and 1st-from-last.
4076 ///
4077 /// let val = ReadOptions::builder()
4078 /// .sid_index([0, 5, -1])
4079 /// .f64()
4080 /// .read(&mut bed)?;
4081 ///
4082 /// assert!(val.dim() == (100, 3));
4083 ///
4084 /// // Read the SNPs at index positions 20 (inclusive) to 30 (exclusive).
4085 ///
4086 /// let val = ReadOptions::builder()
4087 /// .sid_index(20..30)
4088 /// .f64()
4089 /// .read(&mut bed)?;
4090 ///
4091 /// assert!(val.dim() == (100, 10));
4092 ///
4093 /// // Read the SNPs at every 2nd index position.
4094 ///
4095 /// let val = ReadOptions::builder()
4096 /// .sid_index(s![..;2])
4097 /// .f64()
4098 /// .read(&mut bed)?;
4099 ///
4100 /// assert!(val.dim() == (100, 50));
4101 ///
4102 /// // Read chromosome 5 of the female individuals.
4103 ///
4104 /// let female = bed.sex()?.map(|elem| *elem == 2);
4105 /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4106 /// let val = ReadOptions::builder()
4107 /// .iid_index(female)
4108 /// .sid_index(chrom_5)
4109 /// .f64()
4110 /// .read(&mut bed)?;
4111 ///
4112 /// assert!(val.dim() == (50, 6));
4113 /// # use bed_reader::BedErrorPlus;
4114 /// # Ok::<(), Box<BedErrorPlus>>(())
4115 /// ```
4116 #[builder(default = "Index::All")]
4117 #[builder(setter(into))]
4118 sid_index: Index,
4119
4120 /// Sets if the order of the output array is Fortran-style -- Default is true.
4121 ///
4122 /// "Fortran order" is also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4123 ///
4124 /// Also see [`f`](struct.ReadOptionsBuilder.html#method.f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4125 #[builder(default = "true")]
4126 is_f: bool,
4127
4128 /// Sets if allele 1 is counted. Default is true.
4129 ///
4130 /// Also see [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4131 #[builder(default = "true")]
4132 is_a1_counted: bool,
4133
4134 /// Number of threads to use (defaults to all processors)
4135 ///
4136 /// Can also be set with an environment variable.
4137 /// See [Environment Variables](index.html#environment-variables).
4138 ///
4139 /// In this example, we read using only one thread.
4140 /// ```
4141 /// use ndarray as nd;
4142 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4143 /// use bed_reader::assert_eq_nan;
4144 ///
4145 /// let file_name = sample_bed_file("small.bed")?;
4146 /// let mut bed = Bed::new(file_name)?;
4147 /// let val = ReadOptions::builder().num_threads(1).i8().read(&mut bed)?;
4148 ///
4149 /// assert_eq_nan(
4150 /// &val,
4151 /// &nd::array![
4152 /// [1, 0, -127, 0],
4153 /// [2, 0, -127, 2],
4154 /// [0, 1, 2, 0]
4155 /// ],
4156 /// );
4157 /// # use bed_reader::BedErrorPlus;
4158 /// # Ok::<(), Box<BedErrorPlus>>(())
4159 /// ```
4160 #[builder(default, setter(strip_option))]
4161 num_threads: Option<usize>,
4162
4163 // LATER: Allow this to be set with an environment variable.
4164 /// Maximum number of concurrent async requests (defaults to 10) --
4165 /// Used by [`BedCloud`](struct.BedCloud.html).
4166 ///
4167 /// In this example, we read using only request at a time.
4168 /// ```
4169 /// use ndarray as nd;
4170 /// use bed_reader::{BedCloud, ReadOptions};
4171 /// use bed_reader::assert_eq_nan;
4172 ///
4173 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4174 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4175 /// let mut bed_cloud = BedCloud::new(&url).await?;
4176 /// let val = ReadOptions::builder().max_concurrent_requests(1).i8().read_cloud(&mut bed_cloud).await?;
4177 ///
4178 /// assert_eq_nan(
4179 /// &val,
4180 /// &nd::array![
4181 /// [1, 0, -127, 0],
4182 /// [2, 0, -127, 2],
4183 /// [0, 1, 2, 0]
4184 /// ],
4185 /// );
4186 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4187 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4188 #[builder(default, setter(strip_option))]
4189 #[allow(dead_code)]
4190 max_concurrent_requests: Option<usize>,
4191
4192 // LATER: Allow this to be set with an environment variable.
4193 /// Maximum chunk size of async requests (defaults to `8_000_000` bytes) --
4194 /// Used by [`BedCloud`](struct.BedCloud.html).
4195 ///
4196 /// In this example, we read using only `1_000_000` bytes per request.
4197 /// ```
4198 /// use ndarray as nd;
4199 /// use bed_reader::{BedCloud, ReadOptions};
4200 /// use bed_reader::assert_eq_nan;
4201 ///
4202 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4203 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4204 /// let mut bed_cloud = BedCloud::new(&url).await?;
4205 /// let val = ReadOptions::builder().max_chunk_bytes(1_000_000).i8().read_cloud(&mut bed_cloud).await?;
4206 ///
4207 /// assert_eq_nan(
4208 /// &val,
4209 /// &nd::array![
4210 /// [1, 0, -127, 0],
4211 /// [2, 0, -127, 2],
4212 /// [0, 1, 2, 0]
4213 /// ],
4214 /// );
4215 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4216 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4217 /// ```
4218 #[builder(default, setter(strip_option))]
4219 #[allow(dead_code)]
4220 max_chunk_bytes: Option<usize>,
4221}
4222
4223impl<TVal: BedVal> ReadOptions<TVal> {
4224 /// Read genotype data. Supports selection and options.
4225 ///
4226 /// > Also see [`Bed::read`](struct.Bed.html#method.read) (read without options).
4227 /// > To fill a preallocated ndarray, see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
4228 ///
4229 /// See the [Table of `ReadOptions`](index.html#readoptions)
4230 /// for a list of the supported options.
4231 /// See the [Table of Index Expressions](index.html#index-expressions)
4232 /// for a list of expressions for selecting individuals (sample)
4233 /// and SNPs (variants).
4234 ///
4235 /// # Errors
4236 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4237 /// for all possible errors.
4238 ///
4239 /// # Examples
4240 ///
4241 /// ```
4242 /// use ndarray as nd;
4243 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4244 /// use bed_reader::assert_eq_nan;
4245 ///
4246 /// // Read all data from a .bed file into an ndarray of f64.
4247 /// let file_name = sample_bed_file("small.bed")?;
4248 /// let mut bed = Bed::new(file_name)?;
4249 /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4250 ///
4251 /// assert_eq_nan(
4252 /// &val,
4253 /// &nd::array![
4254 /// [1.0, 0.0, f64::NAN, 0.0],
4255 /// [2.0, 0.0, f64::NAN, 2.0],
4256 /// [0.0, 1.0, 2.0, 0.0]
4257 /// ],
4258 /// );
4259 ///
4260 /// // Read the SNPs indexed by 2.
4261 /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
4262 ///
4263 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4264 ///
4265 /// // Read the SNPs indexed by 2, 3, and 4th from last.
4266 /// let val = ReadOptions::builder()
4267 /// .sid_index([2, 3, -4])
4268 /// .f64()
4269 /// .read(&mut bed)?;
4270 ///
4271 /// assert_eq_nan(
4272 /// &val,
4273 /// &nd::array![[f64::NAN, 0.0, 1.0], [f64::NAN, 2.0, 2.0], [2.0, 0.0, 0.0]],
4274 /// );
4275 ///
4276 /// // Read SNPs from 1 (inclusive) to 4 (exclusive).
4277 /// let val = ReadOptions::builder()
4278 /// .sid_index(1..4)
4279 /// .f64()
4280 /// .read(&mut bed)?;
4281 ///
4282 /// assert_eq_nan(
4283 /// &val,
4284 /// &nd::array![[0.0, f64::NAN, 0.0], [0.0, f64::NAN, 2.0], [1.0, 2.0, 0.0]],
4285 /// );
4286 ///
4287 /// // Print unique chrom values. Then, read all SNPs in chrom 5.
4288 /// use std::collections::HashSet;
4289 ///
4290 /// println!("{:?}", bed.chromosome()?.iter().collect::<HashSet<_>>());
4291 /// // This outputs: {"1", "5", "Y"}.
4292 /// let val = ReadOptions::builder()
4293 /// .sid_index(bed.chromosome()?.map(|elem| elem == "5"))
4294 /// .f64()
4295 /// .read(&mut bed)?;
4296 ///
4297 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4298 ///
4299 /// // Read 1st individual (across all SNPs).
4300 /// let val = ReadOptions::builder().iid_index(0).f64().read(&mut bed)?;
4301 /// assert_eq_nan(&val, &nd::array![[1.0, 0.0, f64::NAN, 0.0]]);
4302 ///
4303 /// // Read every 2nd individual.
4304 /// use ndarray::s;
4305 ///
4306 /// let val = ReadOptions::builder()
4307 /// .iid_index(s![..;2])
4308 /// .f64()
4309 /// .read(&mut bed)?;
4310 /// assert_eq_nan(
4311 /// &val,
4312 /// &nd::array![[1.0, 0.0, f64::NAN, 0.0], [0.0, 1.0, 2.0, 0.0]],
4313 /// );
4314 ///
4315 /// // Read last and 2nd-to-last individuals and the last SNP
4316 /// let val = ReadOptions::builder()
4317 /// .iid_index([-1,-2])
4318 /// .sid_index(-1)
4319 /// .f64()
4320 /// .read(&mut bed)?;
4321 ///
4322 /// assert_eq_nan(&val, &nd::array![[0.0],[2.0]]);
4323 ///
4324 /// // The output array can be f32, f64, or i8
4325 /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4326 ///
4327 /// assert_eq_nan(
4328 /// &val,
4329 /// &nd::array![
4330 /// [1, 0, -127, 0],
4331 /// [2, 0, -127, 2],
4332 /// [0, 1, 2, 0]
4333 /// ],
4334 /// );
4335 /// # use bed_reader::BedErrorPlus;
4336 /// # Ok::<(), Box<BedErrorPlus>>(())
4337 /// ```
4338 #[must_use]
4339 pub fn builder() -> ReadOptionsBuilder<TVal> {
4340 ReadOptionsBuilder::default()
4341 }
4342
4343 /// Value to be used for missing values (defaults to -127 or NaN).
4344 ///
4345 /// # Example
4346 /// ```
4347 /// use ndarray as nd;
4348 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4349 /// use bed_reader::assert_eq_nan;
4350 ///
4351 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4352 /// assert_eq!(read_options.missing_value(), -127);
4353 ///
4354 /// let file_name = sample_bed_file("small.bed")?;
4355 /// let mut bed = Bed::new(file_name)?;
4356 /// let val = bed.read_with_options(&read_options)?;
4357
4358 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4359 /// # use bed_reader::BedErrorPlus;
4360 /// # Ok::<(), Box<BedErrorPlus>>(())
4361 /// ```
4362 pub fn missing_value(&self) -> TVal {
4363 self.missing_value
4364 }
4365
4366 /// Index of individuals (samples) to read (defaults to all).
4367 ///
4368 /// # Example
4369 /// ```
4370 /// use ndarray as nd;
4371 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4372 /// use bed_reader::assert_eq_nan;
4373 ///
4374 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4375 /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4376 /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4377 ///
4378 /// let file_name = sample_bed_file("small.bed")?;
4379 /// let mut bed = Bed::new(file_name)?;
4380 /// let val = bed.read_with_options(&read_options)?;
4381
4382 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4383 /// # use bed_reader::BedErrorPlus;
4384 /// # Ok::<(), Box<BedErrorPlus>>(())
4385 /// ```
4386 pub fn iid_index(&self) -> &Index {
4387 &self.iid_index
4388 }
4389
4390 /// Index of SNPs (variants) to read (defaults to all).
4391 ///
4392 /// # Example
4393 /// ```
4394 /// use ndarray as nd;
4395 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4396 /// use bed_reader::assert_eq_nan;
4397 ///
4398 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4399 /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4400 /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4401 ///
4402 /// let file_name = sample_bed_file("small.bed")?;
4403 /// let mut bed = Bed::new(file_name)?;
4404 /// let val = bed.read_with_options(&read_options)?;
4405
4406 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4407 /// # use bed_reader::BedErrorPlus;
4408 /// # Ok::<(), Box<BedErrorPlus>>(())
4409 /// ```
4410 pub fn sid_index(&self) -> &Index {
4411 &self.sid_index
4412 }
4413
4414 /// Is the order of the output array Fortran-style (defaults to true).
4415 ///
4416 /// # Example
4417 /// ```
4418 /// use ndarray as nd;
4419 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4420 /// use bed_reader::assert_eq_nan;
4421 ///
4422 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4423 /// assert_eq!(read_options.is_f(), true);
4424 ///
4425 /// let file_name = sample_bed_file("small.bed")?;
4426 /// let mut bed = Bed::new(file_name)?;
4427 /// let val = bed.read_with_options(&read_options)?;
4428
4429 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4430 /// # use bed_reader::BedErrorPlus;
4431 /// # Ok::<(), Box<BedErrorPlus>>(())
4432 /// ```
4433 pub fn is_f(&self) -> bool {
4434 self.is_f
4435 }
4436
4437 /// If allele 1 will be counted (defaults to true).
4438 ///
4439 /// # Example
4440 /// ```
4441 /// use ndarray as nd;
4442 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4443 /// use bed_reader::assert_eq_nan;
4444 ///
4445 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4446 /// assert_eq!(read_options.is_a1_counted(), true);
4447 ///
4448 /// let file_name = sample_bed_file("small.bed")?;
4449 /// let mut bed = Bed::new(file_name)?;
4450 /// let val = bed.read_with_options(&read_options)?;
4451
4452 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4453 /// # use bed_reader::BedErrorPlus;
4454 /// # Ok::<(), Box<BedErrorPlus>>(())
4455 /// ```
4456 pub fn is_a1_counted(&self) -> bool {
4457 self.is_a1_counted
4458 }
4459
4460 /// Number of threads to be used (`None` means set with
4461 /// [Environment Variables](index.html#environment-variables) or use all processors).
4462 ///
4463 /// # Example
4464 /// ```
4465 /// use ndarray as nd;
4466 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4467 /// use bed_reader::assert_eq_nan;
4468 ///
4469 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4470 /// assert_eq!(read_options.num_threads(), None);
4471 ///
4472 /// let file_name = sample_bed_file("small.bed")?;
4473 /// let mut bed = Bed::new(file_name)?;
4474 /// let val = bed.read_with_options(&read_options)?;
4475
4476 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4477 /// # use bed_reader::BedErrorPlus;
4478 /// # Ok::<(), Box<BedErrorPlus>>(())
4479 /// ```
4480 pub fn num_threads(&self) -> Option<usize> {
4481 self.num_threads
4482 }
4483}
4484
4485impl<TVal: BedVal> ReadOptionsBuilder<TVal> {
4486 /// > See [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) for details and examples.
4487 pub fn read(&self, bed: &mut Bed) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4488 let read_options = self.build()?;
4489 bed.read_with_options(&read_options)
4490 }
4491
4492 /// Read genotype data from the cloud.
4493 ///
4494 /// > Also see
4495 /// > [`BedCloud::read_with_options`](struct.BedCloud.html#method.read_with_options).
4496 ///
4497 /// # Errors
4498 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4499 /// for all possible errors.
4500 ///
4501 /// # Example
4502 ///
4503 /// ```
4504 /// use ndarray as nd;
4505 /// use bed_reader::{BedCloud, ReadOptions};
4506 /// use bed_reader::assert_eq_nan;
4507 ///
4508 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4509 /// // Read the SNPs indexed by 2.
4510 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4511 /// let mut bed_cloud = BedCloud::new(&url).await?;
4512 /// let mut val = ReadOptions::builder()
4513 /// .sid_index(2)
4514 /// .read_cloud(&mut bed_cloud).await?;
4515 ///
4516 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4517 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4518 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4519 /// ```
4520 pub async fn read_cloud(
4521 &self,
4522 bed_cloud: &mut BedCloud,
4523 ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4524 let read_options = self.build()?;
4525 bed_cloud.read_with_options(&read_options).await
4526 }
4527
4528 /// Read genotype data into a preallocated array.
4529 ///
4530 /// > Also see [`Bed::read_and_fill`](struct.Bed.html#method.read_and_fill) and
4531 /// > [`Bed::read_and_fill_with_options`](struct.Bed.html#method.read_and_fill_with_options).
4532 ///
4533 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4534 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4535 /// are ignored. Instead, the order of the preallocated array is used.
4536 ///
4537 /// # Errors
4538 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4539 /// for all possible errors.
4540 ///
4541 /// # Example
4542 ///
4543 /// ```
4544 /// use ndarray as nd;
4545 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4546 /// use bed_reader::assert_eq_nan;
4547 ///
4548 /// // Read the SNPs indexed by 2.
4549 /// let file_name = sample_bed_file("small.bed")?;
4550 /// let mut bed = Bed::new(file_name)?;
4551 /// let mut val = nd::Array2::<f64>::default((3, 1));
4552 /// ReadOptions::builder()
4553 /// .sid_index(2)
4554 /// .read_and_fill(&mut bed, &mut val.view_mut())?;
4555 ///
4556 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4557 /// # use bed_reader::BedErrorPlus;
4558 /// # Ok::<(), Box<BedErrorPlus>>(())
4559 /// ```
4560 pub fn read_and_fill(
4561 &self,
4562 bed: &mut Bed,
4563 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4564 ) -> Result<(), Box<BedErrorPlus>> {
4565 let read_options = self.build()?;
4566 bed.read_and_fill_with_options(val, &read_options)
4567 }
4568
4569 /// Read genotype data from the cloud into a preallocated array.
4570 ///
4571 /// > Also see [`BedCloud::read_and_fill`](struct.BedCloud.html#method.read_and_fill) and
4572 /// > [`BedCloud::read_and_fill_with_options`](struct.BedCloud.html#method.read_and_fill_with_options).
4573 ///
4574 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4575 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4576 /// are ignored. Instead, the order of the preallocated array is used.
4577 ///
4578 /// # Errors
4579 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4580 /// for all possible errors.
4581 ///
4582 /// # Example
4583 ///
4584 /// ```
4585 /// use ndarray as nd;
4586 /// use bed_reader::{BedCloud, ReadOptions};
4587 /// use bed_reader::assert_eq_nan;
4588 ///
4589 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4590 /// // Read the SNPs indexed by 2.
4591 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4592 /// let mut bed_cloud = BedCloud::new(&url).await?;
4593 /// let mut val = nd::Array2::<f64>::default((3, 1));
4594 /// ReadOptions::builder()
4595 /// .sid_index(2)
4596 /// .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()).await?;
4597 ///
4598 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4599 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4600 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4601 /// ```
4602 pub async fn read_and_fill_cloud(
4603 &self,
4604 bed_cloud: &mut BedCloud,
4605 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4606 ) -> Result<(), Box<BedErrorPlus>> {
4607 let read_options = self.build()?;
4608 bed_cloud
4609 .read_and_fill_with_options(val, &read_options)
4610 .await
4611 }
4612
4613 /// Order of the output array, Fortran-style (default)
4614 ///
4615 /// Also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4616 ///
4617 /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4618 pub fn f(&mut self) -> &mut Self {
4619 self.is_f(true);
4620 self
4621 }
4622
4623 /// Order of the output array, C (default)
4624 ///
4625 /// Also called "row-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4626 ///
4627 /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`f`](struct.ReadOptionsBuilder.html#method.f).
4628 pub fn c(&mut self) -> &mut Self {
4629 self.is_f(false);
4630 self
4631 }
4632
4633 /// Count the number allele 1 (default and PLINK standard).
4634 ///
4635 /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4636 ///
4637 /// # Example:
4638 /// ```
4639 /// use ndarray as nd;
4640 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4641 /// use bed_reader::assert_eq_nan;
4642 ///
4643 /// let file_name = sample_bed_file("small.bed")?;
4644 /// let mut bed = Bed::new(file_name)?;
4645 /// let val = ReadOptions::builder().count_a1().i8().read(&mut bed)?;
4646 ///
4647 /// assert_eq_nan(
4648 /// &val,
4649 /// &nd::array![
4650 /// [1, 0, -127, 0],
4651 /// [2, 0, -127, 2],
4652 /// [0, 1, 2, 0]
4653 /// ],
4654 /// );
4655 /// # use bed_reader::BedErrorPlus;
4656 /// # Ok::<(), Box<BedErrorPlus>>(())
4657 /// ```
4658 pub fn count_a1(&mut self) -> &mut Self {
4659 self.is_a1_counted = Some(true);
4660 self
4661 }
4662
4663 /// Count the number allele 2.
4664 ///
4665 /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1).
4666 ///
4667 /// # Example:
4668 /// ```
4669 /// use ndarray as nd;
4670 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4671 /// use bed_reader::assert_eq_nan;
4672 ///
4673 /// let file_name = sample_bed_file("small.bed")?;
4674 /// let mut bed = Bed::new(file_name)?;
4675 /// let val = ReadOptions::builder().count_a2().i8().read(&mut bed)?;
4676 ///
4677 /// assert_eq_nan(
4678 /// &val,
4679 /// &nd::array![
4680 /// [1, 2, -127, 2],
4681 /// [0, 2, -127, 0],
4682 /// [2, 1, 0, 2]
4683 /// ],
4684 /// );
4685 /// # use bed_reader::BedErrorPlus;
4686 /// # Ok::<(), Box<BedErrorPlus>>(())
4687 /// ```
4688 pub fn count_a2(&mut self) -> &mut Self {
4689 self.is_a1_counted = Some(false);
4690 self
4691 }
4692}
4693
4694impl ReadOptionsBuilder<i8> {
4695 /// Output an ndarray of i8.
4696 ///
4697 /// # Example:
4698 /// ```
4699 /// use ndarray as nd;
4700 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4701 /// use bed_reader::assert_eq_nan;
4702 ///
4703 /// let file_name = sample_bed_file("small.bed")?;
4704 /// let mut bed = Bed::new(file_name)?;
4705 /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4706 ///
4707 /// assert_eq_nan(
4708 /// &val,
4709 /// &nd::array![
4710 /// [1, 0, -127, 0],
4711 /// [2, 0, -127, 2],
4712 /// [0, 1, 2, 0]
4713 /// ],
4714 /// );
4715 /// # use bed_reader::BedErrorPlus;
4716 /// # Ok::<(), Box<BedErrorPlus>>(())
4717 /// ```
4718 pub fn i8(&mut self) -> &mut Self {
4719 self
4720 }
4721}
4722
4723impl ReadOptionsBuilder<f32> {
4724 /// Output an ndarray of f32.
4725 ///
4726 /// # Example:
4727 /// ```
4728 /// use ndarray as nd;
4729 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4730 /// use bed_reader::assert_eq_nan;
4731 ///
4732 /// let file_name = sample_bed_file("small.bed")?;
4733 /// let mut bed = Bed::new(file_name)?;
4734 /// let val = ReadOptions::builder().f32().read(&mut bed)?;
4735 ///
4736 /// assert_eq_nan(
4737 /// &val,
4738 /// &nd::array![
4739 /// [1.0, 0.0, f32::NAN, 0.0],
4740 /// [2.0, 0.0, f32::NAN, 2.0],
4741 /// [0.0, 1.0, 2.0, 0.0]
4742 /// ],
4743 /// );
4744 /// # use bed_reader::BedErrorPlus;
4745 /// # Ok::<(), Box<BedErrorPlus>>(())
4746 /// ```
4747 pub fn f32(&mut self) -> &mut Self {
4748 self
4749 }
4750}
4751
4752impl ReadOptionsBuilder<f64> {
4753 /// Output an ndarray of f64.
4754 ///
4755 /// # Example:
4756 /// ```
4757 /// use ndarray as nd;
4758 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4759 /// use bed_reader::assert_eq_nan;
4760 ///
4761 /// let file_name = sample_bed_file("small.bed")?;
4762 /// let mut bed = Bed::new(file_name)?;
4763 /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4764 ///
4765 /// assert_eq_nan(
4766 /// &val,
4767 /// &nd::array![
4768 /// [1.0, 0.0, f64::NAN, 0.0],
4769 /// [2.0, 0.0, f64::NAN, 2.0],
4770 /// [0.0, 1.0, 2.0, 0.0]
4771 /// ],
4772 /// );
4773 /// # use bed_reader::BedErrorPlus;
4774 /// # Ok::<(), Box<BedErrorPlus>>(())
4775 /// ```
4776 pub fn f64(&mut self) -> &mut Self {
4777 self
4778 }
4779}
4780
4781/// Represents options for writing genotype data and metadata to a PLINK .bed file.
4782///
4783/// Construct with [`WriteOptions::builder`](struct.WriteOptions.html#method.builder).
4784#[derive(Clone, Debug, Builder)]
4785#[builder(build_fn(skip))]
4786pub struct WriteOptions<TVal>
4787where
4788 TVal: BedVal,
4789{
4790 #[builder(setter(custom))]
4791 path: PathBuf,
4792
4793 #[builder(setter(custom))]
4794 fam_path: PathBuf,
4795
4796 #[builder(setter(custom))]
4797 bim_path: PathBuf,
4798
4799 #[builder(setter(custom))]
4800 metadata: Metadata,
4801
4802 #[builder(setter(custom), default = "true")]
4803 is_a1_counted: bool,
4804
4805 #[builder(default, setter(custom))]
4806 num_threads: Option<usize>,
4807
4808 #[builder(default = "TVal::missing()", setter(custom))]
4809 missing_value: TVal,
4810
4811 #[builder(setter(custom), default = "false")]
4812 skip_fam: bool,
4813
4814 #[builder(setter(custom), default = "false")]
4815 skip_bim: bool,
4816}
4817
4818impl<TVal> WriteOptions<TVal>
4819where
4820 TVal: BedVal,
4821{
4822 /// Write values to a file in PLINK .bed format. Supports metadata and options.
4823 ///
4824 /// > Also see [`Bed::write`](struct.Bed.html#method.write), which does not support metadata or options.
4825 ///
4826 /// The options, [listed here](struct.WriteOptionsBuilder.html#implementations), can specify the:
4827 /// * items of metadata, for example the individual ids or the SNP ids
4828 /// * a non-default path for the .fam and/or .bim files
4829 /// * a non-default value that represents missing data
4830 /// * whether the first allele is counted (default) or the second
4831 /// * number of threads to use for writing
4832 /// * a [`Metadata`](struct.Metadata.html)
4833 ///
4834 /// # Examples
4835 /// In this example, all metadata is given one item at a time.
4836 /// ```
4837 /// use ndarray as nd;
4838 /// use bed_reader::{Bed, WriteOptions};
4839 ///
4840 /// let output_folder = temp_testdir::TempDir::default();
4841 /// let output_file = output_folder.join("small.bed");
4842 /// let val = nd::array![
4843 /// [1.0, 0.0, f64::NAN, 0.0],
4844 /// [2.0, 0.0, f64::NAN, 2.0],
4845 /// [0.0, 1.0, 2.0, 0.0]
4846 /// ];
4847 /// WriteOptions::builder(output_file)
4848 /// .fid(["fid1", "fid1", "fid2"])
4849 /// .iid(["iid1", "iid2", "iid3"])
4850 /// .father(["iid23", "iid23", "iid22"])
4851 /// .mother(["iid34", "iid34", "iid33"])
4852 /// .sex([1, 2, 0])
4853 /// .pheno(["red", "red", "blue"])
4854 /// .chromosome(["1", "1", "5", "Y"])
4855 /// .sid(["sid1", "sid2", "sid3", "sid4"])
4856 /// .cm_position([100.4, 2000.5, 4000.7, 7000.9])
4857 /// .bp_position([1, 100, 1000, 1004])
4858 /// .allele_1(["A", "T", "A", "T"])
4859 /// .allele_2(["A", "C", "C", "G"])
4860 /// .write(&val)?;
4861 /// # use bed_reader::BedErrorPlus;
4862 /// # Ok::<(), Box<BedErrorPlus>>(())
4863 /// ```
4864 /// Here, no metadata is given, so default values are assigned.
4865 /// If we then read the new file and list the chromosome property,
4866 /// it is an array of zeros, the default chromosome value.
4867 /// ```
4868 /// # use ndarray as nd;
4869 /// # use bed_reader::{Bed, WriteOptions};
4870 /// # let output_folder = temp_testdir::TempDir::default();
4871 /// let output_file2 = output_folder.join("small2.bed");
4872 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
4873 ///
4874 /// WriteOptions::builder(&output_file2).write(&val)?;
4875 ///
4876 /// let mut bed2 = Bed::new(&output_file2)?;
4877 /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
4878 /// # use bed_reader::BedErrorPlus;
4879 /// # Ok::<(), Box<BedErrorPlus>>(())
4880 /// ```
4881 #[anyinput]
4882 pub fn builder(path: AnyPath) -> WriteOptionsBuilder<TVal> {
4883 WriteOptionsBuilder::new(path)
4884 }
4885
4886 /// Family id of each of individual (sample). Defaults to "0"'s
4887 ///
4888 /// # Example
4889 /// ```
4890 /// use ndarray as nd;
4891 /// use bed_reader::{WriteOptions};
4892 /// let output_folder = temp_testdir::TempDir::default();
4893 /// let output_file = output_folder.join("small.bed");
4894 /// let write_options = WriteOptions::builder(output_file)
4895 /// .f64()
4896 /// .iid(["i1", "i2", "i3"])
4897 /// .sid(["s1", "s2", "s3", "s4"])
4898 /// .build(3, 4)?;
4899 ///
4900 /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
4901 /// # use bed_reader::BedErrorPlus;
4902 /// # Ok::<(), Box<BedErrorPlus>>(())
4903 /// ```
4904 pub fn fid(&self) -> &nd::Array1<String> {
4905 // unwrap always works because the WriteOptions constructor fills all metadata.
4906 self.metadata.fid.as_ref().unwrap()
4907 }
4908
4909 /// Individual id of each of individual (sample). Defaults to "iid1", "iid2" ...
4910 ///
4911 /// # Example
4912 /// ```
4913 /// use ndarray as nd;
4914 /// use bed_reader::{Bed, WriteOptions};
4915 /// let output_folder = temp_testdir::TempDir::default();
4916 /// let output_file = output_folder.join("small.bed");
4917 /// let write_options = WriteOptions::builder(output_file)
4918 /// .f64()
4919 /// .iid(["i1", "i2", "i3"])
4920 /// .sid(["s1", "s2", "s3", "s4"])
4921 /// .build(3, 4)?;
4922 ///
4923 /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
4924 ///
4925 /// let val = nd::array![
4926 /// [1.0, 0.0, f64::NAN, 0.0],
4927 /// [2.0, 0.0, f64::NAN, 2.0],
4928 /// [0.0, 1.0, 2.0, 0.0]
4929 /// ];
4930 /// Bed::write_with_options(&val, &write_options)?;
4931 /// # use bed_reader::BedErrorPlus;
4932 /// # Ok::<(), Box<BedErrorPlus>>(())
4933 /// ```
4934 pub fn iid(&self) -> &nd::Array1<String> {
4935 // unwrap always works because the WriteOptions constructor fills all metadata.
4936 self.metadata.iid.as_ref().unwrap()
4937 }
4938
4939 /// Father id of each of individual (sample). Defaults to "0"'s
4940 ///
4941 /// # Example
4942 /// ```
4943 /// use ndarray as nd;
4944 /// use bed_reader::WriteOptions;
4945 /// let output_folder = temp_testdir::TempDir::default();
4946 /// let output_file = output_folder.join("small.bed");
4947 /// let write_options = WriteOptions::builder(output_file)
4948 /// .f64()
4949 /// .iid(["i1", "i2", "i3"])
4950 /// .sid(["s1", "s2", "s3", "s4"])
4951 /// .build(3, 4)?;
4952 ///
4953 /// println!("{0:?}", write_options.father()); // Outputs ndarray ["0", "0", "0"]
4954 /// # use bed_reader::BedErrorPlus;
4955 /// # Ok::<(), Box<BedErrorPlus>>(())
4956 /// ```
4957 pub fn father(&self) -> &nd::Array1<String> {
4958 // unwrap always works because the WriteOptions constructor fills all metadata.
4959 self.metadata.father.as_ref().unwrap()
4960 }
4961
4962 /// Mother id of each of individual (sample). Defaults to "0"'s
4963 ///
4964 /// # Example
4965 /// ```
4966 /// use ndarray as nd;
4967 /// use bed_reader::WriteOptions;
4968 /// let output_folder = temp_testdir::TempDir::default();
4969 /// let output_file = output_folder.join("small.bed");
4970 /// let write_options = WriteOptions::builder(output_file)
4971 /// .f64()
4972 /// .iid(["i1", "i2", "i3"])
4973 /// .sid(["s1", "s2", "s3", "s4"])
4974 /// .build(3, 4)?;
4975 ///
4976 /// println!("{0:?}", write_options.mother()); // Outputs ndarray ["0", "0", "0"]
4977 /// # use bed_reader::BedErrorPlus;
4978 /// # Ok::<(), Box<BedErrorPlus>>(())
4979 /// ```
4980 pub fn mother(&self) -> &nd::Array1<String> {
4981 // unwrap always works because the WriteOptions constructor fills all metadata.
4982 self.metadata.mother.as_ref().unwrap()
4983 }
4984
4985 /// Sex of each of individual (sample). Defaults to 0's
4986 ///
4987 /// 0 is unknown, 1 is male, 2 is female
4988 ///
4989 /// # Example
4990 /// ```
4991 /// use ndarray as nd;
4992 /// use bed_reader::WriteOptions;
4993 /// let output_folder = temp_testdir::TempDir::default();
4994 /// let output_file = output_folder.join("small.bed");
4995 /// let write_options = WriteOptions::builder(output_file)
4996 /// .f64()
4997 /// .iid(["i1", "i2", "i3"])
4998 /// .sid(["s1", "s2", "s3", "s4"])
4999 /// .build(3, 4)?;
5000 ///
5001 /// println!("{0:?}", write_options.sex()); // Outputs ndarray [0, 0, 0]
5002 /// # use bed_reader::BedErrorPlus;
5003 /// # Ok::<(), Box<BedErrorPlus>>(())
5004 /// ```
5005 pub fn sex(&self) -> &nd::Array1<i32> {
5006 // unwrap always works because the WriteOptions constructor fills all metadata.
5007 self.metadata.sex.as_ref().unwrap()
5008 }
5009
5010 /// Phenotype of each of individual (sample). Seldom used. Defaults to 0's
5011 ///
5012 /// # Example
5013 /// ```
5014 /// use ndarray as nd;
5015 /// use bed_reader::WriteOptions;
5016 /// let output_folder = temp_testdir::TempDir::default();
5017 /// let output_file = output_folder.join("small.bed");
5018 /// let write_options = WriteOptions::builder(output_file)
5019 /// .f64()
5020 /// .iid(["i1", "i2", "i3"])
5021 /// .sid(["s1", "s2", "s3", "s4"])
5022 /// .build(3, 4)?;
5023 ///
5024 /// println!("{0:?}", write_options.pheno()); // Outputs ndarray ["0", "0", "0"]
5025 /// # use bed_reader::BedErrorPlus;
5026 /// # Ok::<(), Box<BedErrorPlus>>(())
5027 /// ```
5028 pub fn pheno(&self) -> &nd::Array1<String> {
5029 // unwrap always works because the WriteOptions constructor fills all metadata.
5030 self.metadata.pheno.as_ref().unwrap()
5031 }
5032
5033 /// Chromosome of each of SNP (variant). Defaults to "0"'s
5034 ///
5035 /// # Example
5036 /// ```
5037 /// use ndarray as nd;
5038 /// use bed_reader::WriteOptions;
5039 /// let output_folder = temp_testdir::TempDir::default();
5040 /// let output_file = output_folder.join("small.bed");
5041 /// let write_options = WriteOptions::builder(output_file)
5042 /// .f64()
5043 /// .iid(["i1", "i2", "i3"])
5044 /// .sid(["s1", "s2", "s3", "s4"])
5045 /// .build(3, 4)?;
5046 ///
5047 /// println!("{0:?}", write_options.chromosome()); // Outputs ndarray ["0", "0", "0", "0"]
5048 /// # use bed_reader::BedErrorPlus;
5049 /// # Ok::<(), Box<BedErrorPlus>>(())
5050 /// ```
5051 pub fn chromosome(&self) -> &nd::Array1<String> {
5052 // unwrap always works because the WriteOptions constructor fills all metadata.
5053 self.metadata.chromosome.as_ref().unwrap()
5054 }
5055
5056 /// SNP id of each of SNP (variant). Defaults to "sid1", "sid2", ...
5057 ///
5058 /// # Example
5059 /// ```
5060 /// use ndarray as nd;
5061 /// use bed_reader::{Bed, WriteOptions};
5062 /// let output_folder = temp_testdir::TempDir::default();
5063 /// let output_file = output_folder.join("small.bed");
5064 /// let write_options = WriteOptions::builder(output_file)
5065 /// .f64()
5066 /// .iid(["i1", "i2", "i3"])
5067 /// .sid(["s1", "s2", "s3", "s4"])
5068 /// .build(3, 4)?;
5069 ///
5070 /// println!("{0:?}", write_options.sid()); // Outputs ndarray ["s1", "s2", "s3", "s4"]
5071 ///
5072 /// let val = nd::array![
5073 /// [1.0, 0.0, f64::NAN, 0.0],
5074 /// [2.0, 0.0, f64::NAN, 2.0],
5075 /// [0.0, 1.0, 2.0, 0.0]
5076 /// ];
5077 /// Bed::write_with_options(&val, &write_options)?;
5078 /// # use bed_reader::BedErrorPlus;
5079 /// # Ok::<(), Box<BedErrorPlus>>(())
5080 /// ```
5081 pub fn sid(&self) -> &nd::Array1<String> {
5082 // unwrap always works because the WriteOptions constructor fills all metadata.
5083 self.metadata.sid.as_ref().unwrap()
5084 }
5085
5086 /// Centimorgan position of each SNP (variant). Defaults to 0.0's.
5087 ///
5088 /// # Example
5089 /// ```
5090 /// use ndarray as nd;
5091 /// use bed_reader::WriteOptions;
5092 /// let output_folder = temp_testdir::TempDir::default();
5093 /// let output_file = output_folder.join("small.bed");
5094 /// let write_options = WriteOptions::builder(output_file)
5095 /// .f64()
5096 /// .iid(["i1", "i2", "i3"])
5097 /// .sid(["s1", "s2", "s3", "s4"])
5098 /// .build(3, 4)?;
5099 ///
5100 /// println!("{0:?}", write_options.cm_position()); // Outputs ndarray [0.0, 0.0, 0.0, 0.0]
5101 /// # use bed_reader::BedErrorPlus;
5102 /// # Ok::<(), Box<BedErrorPlus>>(())
5103 /// ```
5104 pub fn cm_position(&self) -> &nd::Array1<f32> {
5105 // unwrap always works because the WriteOptions constructor fills all metadata.
5106 self.metadata.cm_position.as_ref().unwrap()
5107 }
5108
5109 /// Base-pair position of each SNP (variant). Defaults to 0's.
5110 ///
5111 /// # Example
5112 /// ```
5113 /// use ndarray as nd;
5114 /// use bed_reader::{Bed, WriteOptions};
5115 /// let output_folder = temp_testdir::TempDir::default();
5116 /// let output_file = output_folder.join("small.bed");
5117 /// let write_options = WriteOptions::builder(output_file)
5118 /// .f64()
5119 /// .iid(["i1", "i2", "i3"])
5120 /// .sid(["s1", "s2", "s3", "s4"])
5121 /// .build(3, 4)?;
5122 ///
5123 /// println!("{0:?}", write_options.bp_position()); // Outputs ndarray [0, 0, 0, 0]
5124 /// # use bed_reader::BedErrorPlus;
5125 /// # Ok::<(), Box<BedErrorPlus>>(())
5126 /// ```
5127 pub fn bp_position(&self) -> &nd::Array1<i32> {
5128 // unwrap always works because the WriteOptions constructor fills all metadata.
5129 self.metadata.bp_position.as_ref().unwrap()
5130 }
5131
5132 /// First allele of each SNP (variant). Defaults to "A1"
5133 ///
5134 /// # Example
5135 /// ```
5136 /// use ndarray as nd;
5137 /// use bed_reader::{Bed, WriteOptions};
5138 /// let output_folder = temp_testdir::TempDir::default();
5139 /// let output_file = output_folder.join("small.bed");
5140 /// let write_options = WriteOptions::builder(output_file)
5141 /// .f64()
5142 /// .iid(["i1", "i2", "i3"])
5143 /// .sid(["s1", "s2", "s3", "s4"])
5144 /// .build(3, 4)?;
5145 ///
5146 /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5147 /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5148 /// # use bed_reader::BedErrorPlus;
5149 /// # Ok::<(), Box<BedErrorPlus>>(())
5150 /// ```
5151 pub fn allele_1(&self) -> &nd::Array1<String> {
5152 // unwrap always works because the WriteOptions constructor fills all metadata.
5153 self.metadata.allele_1.as_ref().unwrap()
5154 }
5155
5156 /// Second allele of each SNP (variant). Defaults to "A2"
5157 ///
5158 /// # Example
5159 /// ```
5160 /// use ndarray as nd;
5161 /// use bed_reader::{Bed, WriteOptions};
5162 /// let output_folder = temp_testdir::TempDir::default();
5163 /// let output_file = output_folder.join("small.bed");
5164 /// let write_options = WriteOptions::builder(output_file)
5165 /// .f64()
5166 /// .iid(["i1", "i2", "i3"])
5167 /// .sid(["s1", "s2", "s3", "s4"])
5168 /// .build(3, 4)?;
5169 ///
5170 /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5171 /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5172 /// # use bed_reader::BedErrorPlus;
5173 /// # Ok::<(), Box<BedErrorPlus>>(())
5174 /// ```
5175 pub fn allele_2(&self) -> &nd::Array1<String> {
5176 // unwrap always works because the WriteOptions constructor fills all metadata.
5177 self.metadata.allele_2.as_ref().unwrap()
5178 }
5179
5180 /// [`Metadata`](struct.Metadata.html) for this [`WriteOptions`](struct.WriteOptions.html), for example, the individual (sample) Ids.
5181 ///
5182 /// This returns a struct with 12 fields. Each field is a ndarray.
5183 /// The struct will always be new, but the 12 ndarrays will be
5184 /// shared with this [`WriteOptions`](struct.WriteOptions.html).
5185 ///
5186 /// If the needed, default values will be used.
5187 ///
5188 /// # Example
5189 /// ```
5190 /// use ndarray as nd;
5191 /// use bed_reader::{Bed, WriteOptions};
5192 /// let output_folder = temp_testdir::TempDir::default();
5193 /// let output_file = output_folder.join("small.bed");
5194 /// let write_options = WriteOptions::builder(output_file)
5195 /// .f64()
5196 /// .iid(["i1", "i2", "i3"])
5197 /// .sid(["s1", "s2", "s3", "s4"])
5198 /// .build(3, 4)?;
5199 ///
5200 /// let metadata = write_options.metadata();
5201 /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"])
5202 /// # use bed_reader::BedErrorPlus;
5203 /// # Ok::<(), Box<BedErrorPlus>>(())
5204 /// ```
5205 pub fn metadata(&self) -> Metadata {
5206 self.metadata.clone()
5207 }
5208
5209 /// The number of individuals (samples)
5210 ///
5211 /// # Example
5212 /// ```
5213 /// use ndarray as nd;
5214 /// use bed_reader::{Bed, WriteOptions};
5215 /// let output_folder = temp_testdir::TempDir::default();
5216 /// let output_file = output_folder.join("small.bed");
5217 /// let write_options = WriteOptions::builder(output_file)
5218 /// .f64()
5219 /// .iid(["i1", "i2", "i3"])
5220 /// .sid(["s1", "s2", "s3", "s4"])
5221 /// .build(3, 4)?;
5222 ///
5223 /// assert_eq!(write_options.iid_count(), 3);
5224 /// assert_eq!(write_options.sid_count(), 4);
5225 /// # use bed_reader::BedErrorPlus;
5226 /// # Ok::<(), Box<BedErrorPlus>>(())
5227 /// ```
5228 pub fn iid_count(&self) -> usize {
5229 self.iid().len()
5230 }
5231
5232 /// The number of SNPs (variants)
5233 ///
5234 /// # Example
5235 /// ```
5236 /// use ndarray as nd;
5237 /// use bed_reader::{Bed, WriteOptions};
5238 /// let output_folder = temp_testdir::TempDir::default();
5239 /// let output_file = output_folder.join("small.bed");
5240 /// let write_options = WriteOptions::builder(output_file)
5241 /// .f64()
5242 /// .iid(["i1", "i2", "i3"])
5243 /// .sid(["s1", "s2", "s3", "s4"])
5244 /// .build(3, 4)?;
5245 ///
5246 /// assert_eq!(write_options.iid_count(), 3);
5247 /// assert_eq!(write_options.sid_count(), 4);
5248 /// # use bed_reader::BedErrorPlus;
5249 /// # Ok::<(), Box<BedErrorPlus>>(())
5250 /// ```
5251 pub fn sid_count(&self) -> usize {
5252 self.sid().len()
5253 }
5254
5255 /// Number of individuals (samples) and SNPs (variants)
5256 ///
5257 /// # Example
5258 /// ```
5259 /// use ndarray as nd;
5260 /// use bed_reader::{Bed, WriteOptions};
5261 /// let output_folder = temp_testdir::TempDir::default();
5262 /// let output_file = output_folder.join("small.bed");
5263 /// let write_options = WriteOptions::builder(output_file)
5264 /// .f64()
5265 /// .iid(["i1", "i2", "i3"])
5266 /// .sid(["s1", "s2", "s3", "s4"])
5267 /// .build(3, 4)?;
5268 ///
5269 /// assert_eq!(write_options.dim(), (3, 4));
5270 /// # use bed_reader::BedErrorPlus;
5271 /// # Ok::<(), Box<BedErrorPlus>>(())
5272 /// ```
5273 pub fn dim(&self) -> (usize, usize) {
5274 (self.iid_count(), self.sid_count())
5275 }
5276
5277 /// Path to .bed file.
5278 ///
5279 /// # Example
5280 /// ```
5281 /// use ndarray as nd;
5282 /// use bed_reader::{Bed, WriteOptions};
5283 /// let output_folder = temp_testdir::TempDir::default();
5284 /// let output_file = output_folder.join("small.bed");
5285 /// let write_options = WriteOptions::builder(output_file)
5286 /// .f64()
5287 /// .iid(["i1", "i2", "i3"])
5288 /// .sid(["s1", "s2", "s3", "s4"])
5289 /// .build(3, 4)?;
5290 ///
5291 /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5292 /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5293 /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5294 /// # use bed_reader::BedErrorPlus;
5295 /// # Ok::<(), Box<BedErrorPlus>>(())
5296 /// ```
5297 pub fn path(&self) -> &PathBuf {
5298 &self.path
5299 }
5300
5301 /// Path to .fam file.
5302 ///
5303 /// # Example
5304 /// ```
5305 /// use ndarray as nd;
5306 /// use bed_reader::{Bed, WriteOptions};
5307 /// let output_folder = temp_testdir::TempDir::default();
5308 /// let output_file = output_folder.join("small.bed");
5309 /// let write_options = WriteOptions::builder(output_file)
5310 /// .f64()
5311 /// .iid(["i1", "i2", "i3"])
5312 /// .sid(["s1", "s2", "s3", "s4"])
5313 /// .build(3, 4)?;
5314 ///
5315 /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5316 /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5317 /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5318 /// # use bed_reader::BedErrorPlus;
5319 /// # Ok::<(), Box<BedErrorPlus>>(())
5320 /// ```
5321 pub fn fam_path(&self) -> &PathBuf {
5322 &self.fam_path
5323 }
5324
5325 /// Path to .bim file.
5326 ///
5327 /// # Example
5328 /// ```
5329 /// use ndarray as nd;
5330 /// use bed_reader::{Bed, WriteOptions};
5331 /// let output_folder = temp_testdir::TempDir::default();
5332 /// let output_file = output_folder.join("small.bed");
5333 /// let write_options = WriteOptions::builder(output_file)
5334 /// .f64()
5335 /// .iid(["i1", "i2", "i3"])
5336 /// .sid(["s1", "s2", "s3", "s4"])
5337 /// .build(3, 4)?;
5338 ///
5339 /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5340 /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5341 /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5342 /// # use bed_reader::BedErrorPlus;
5343 /// # Ok::<(), Box<BedErrorPlus>>(())
5344 /// ```
5345 pub fn bim_path(&self) -> &PathBuf {
5346 &self.bim_path
5347 }
5348
5349 /// If allele 1 will be counted (defaults to true).
5350 ///
5351 /// # Example
5352 /// ```
5353 /// use ndarray as nd;
5354 /// use bed_reader::{Bed, WriteOptions};
5355 /// let output_folder = temp_testdir::TempDir::default();
5356 /// let output_file = output_folder.join("small.bed");
5357 /// let write_options = WriteOptions::builder(output_file)
5358 /// .i8()
5359 /// .iid(["i1", "i2", "i3"])
5360 /// .sid(["s1", "s2", "s3", "s4"])
5361 /// .build(3, 4)?;
5362 ///
5363 /// assert!(write_options.is_a1_counted());
5364 /// # use bed_reader::BedErrorPlus;
5365 /// # Ok::<(), Box<BedErrorPlus>>(())
5366 /// ```
5367 pub fn is_a1_counted(&self) -> bool {
5368 self.is_a1_counted
5369 }
5370
5371 /// Number of threads to be used (`None` means set with
5372 /// [Environment Variables](index.html#environment-variables) or use all processors).
5373 ///
5374 /// # Example
5375 /// ```
5376 /// use ndarray as nd;
5377 /// use bed_reader::{Bed, WriteOptions};
5378 /// let output_folder = temp_testdir::TempDir::default();
5379 /// let output_file = output_folder.join("small.bed");
5380 /// let write_options = WriteOptions::builder(output_file)
5381 /// .i8()
5382 /// .iid(["i1", "i2", "i3"])
5383 /// .sid(["s1", "s2", "s3", "s4"])
5384 /// .build(3, 4)?;
5385 ///
5386 /// assert!(write_options.num_threads().is_none());
5387 /// # use bed_reader::BedErrorPlus;
5388 /// # Ok::<(), Box<BedErrorPlus>>(())
5389 /// ```
5390 pub fn num_threads(&self) -> Option<usize> {
5391 self.num_threads
5392 }
5393
5394 /// Value to be used for missing values (defaults to -127 or NaN).
5395 ///
5396 /// # Example
5397 /// ```
5398 /// use ndarray as nd;
5399 /// use bed_reader::{Bed, WriteOptions};
5400 /// let output_folder = temp_testdir::TempDir::default();
5401 /// let output_file = output_folder.join("small.bed");
5402 /// let write_options = WriteOptions::builder(output_file)
5403 /// .i8()
5404 /// .iid(["i1", "i2", "i3"])
5405 /// .sid(["s1", "s2", "s3", "s4"])
5406 /// .build(3, 4)?;
5407 ///
5408 /// assert!(write_options.missing_value() == -127);
5409 /// # use bed_reader::BedErrorPlus;
5410 /// # Ok::<(), Box<BedErrorPlus>>(())
5411 /// ```
5412 pub fn missing_value(&self) -> TVal {
5413 self.missing_value
5414 }
5415
5416 /// If skipping writing .fam file.
5417 ///
5418 /// # Example
5419 /// ```
5420 /// use ndarray as nd;
5421 /// use bed_reader::{Bed, WriteOptions};
5422 /// let output_folder = temp_testdir::TempDir::default();
5423 /// let output_file = output_folder.join("small.bed");
5424 /// let write_options = WriteOptions::builder(output_file)
5425 /// .i8()
5426 /// .skip_fam()
5427 /// .skip_bim()
5428 /// .build(3, 4)?;
5429 /// assert!(write_options.skip_fam());
5430 /// assert!(write_options.skip_bim());
5431 /// # use bed_reader::BedErrorPlus;
5432 /// # Ok::<(), Box<BedErrorPlus>>(())
5433 /// ```
5434 pub fn skip_fam(&self) -> bool {
5435 self.skip_fam
5436 }
5437
5438 /// If skipping writing .bim file.
5439 ///
5440 /// # Example
5441 /// ```
5442 /// use ndarray as nd;
5443 /// use bed_reader::{Bed, WriteOptions};
5444 /// let output_folder = temp_testdir::TempDir::default();
5445 /// let output_file = output_folder.join("small.bed");
5446 /// let write_options = WriteOptions::builder(output_file)
5447 /// .i8()
5448 /// .skip_fam()
5449 /// .skip_bim()
5450 /// .build(3, 4)?;
5451 /// assert!(write_options.skip_fam());
5452 /// assert!(write_options.skip_bim());
5453 /// # use bed_reader::BedErrorPlus;
5454 /// # Ok::<(), Box<BedErrorPlus>>(())
5455 /// ```
5456 pub fn skip_bim(&self) -> bool {
5457 self.skip_bim
5458 }
5459}
5460
5461impl<TVal> WriteOptionsBuilder<TVal>
5462where
5463 TVal: BedVal,
5464{
5465 /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given and then writes a .bed (and .fam and .bim) file.
5466 ///
5467 /// See [`WriteOptions`](struct.WriteOptions.html) for details and examples.
5468 pub fn write<S: nd::Data<Elem = TVal>>(
5469 &mut self,
5470 val: &nd::ArrayBase<S, nd::Ix2>,
5471 ) -> Result<(), Box<BedErrorPlus>> {
5472 let (iid_count, sid_count) = val.dim();
5473 let write_options = self.build(iid_count, sid_count)?;
5474 Bed::write_with_options(val, &write_options)?;
5475
5476 Ok(())
5477 }
5478
5479 /// Set the family id (fid) values for each individual (sample).
5480 ///
5481 /// Defaults to zeros.
5482 ///
5483 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5484 ///
5485 #[anyinput]
5486 #[must_use]
5487 pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
5488 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5489 self.metadata.as_mut().unwrap().set_fid(fid);
5490 self
5491 }
5492
5493 /// Set the individual id (iid) values for each individual (sample).
5494 ///
5495 /// Defaults to "iid1", "iid2", ...
5496 ///
5497 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5498 ///
5499 #[anyinput]
5500 #[must_use]
5501 pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
5502 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5503 self.metadata.as_mut().unwrap().set_iid(iid);
5504 self
5505 }
5506
5507 /// Set the father id values for each individual (sample).
5508 ///
5509 /// Defaults to zeros.
5510 ///
5511 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5512 ///
5513 #[anyinput]
5514 #[must_use]
5515 pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
5516 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5517 self.metadata.as_mut().unwrap().set_father(father);
5518 self
5519 }
5520
5521 /// Set the mother id values for each individual (sample).
5522 ///
5523 /// Defaults to zeros.
5524 ///
5525 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5526 ///
5527 #[anyinput]
5528 #[must_use]
5529 pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
5530 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5531 self.metadata.as_mut().unwrap().set_mother(mother);
5532 self
5533 }
5534
5535 /// Set the sex for each individual (sample).
5536 ///
5537 /// 0 is unknown (default), 1 is male, 2 is female
5538 #[anyinput]
5539 #[must_use]
5540 pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
5541 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5542 self.metadata.as_mut().unwrap().set_sex(sex);
5543 self
5544 }
5545
5546 /// Set a phenotype for each individual (sample). Seldom used.
5547 ///
5548 /// Defaults to zeros.
5549 ///
5550 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5551 ///
5552 #[anyinput]
5553 #[must_use]
5554 pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
5555 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5556 self.metadata.as_mut().unwrap().set_pheno(pheno);
5557 self
5558 }
5559
5560 /// Set the chromosome for each SNP (variant).
5561 ///
5562 /// Defaults to zeros.
5563 #[anyinput]
5564 #[must_use]
5565 pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
5566 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5567 self.metadata.as_mut().unwrap().set_chromosome(chromosome);
5568 self
5569 }
5570
5571 /// Set the SNP id (sid) for each SNP (variant).
5572 ///
5573 /// Defaults to "sid1", "sid2", ...
5574 ///
5575 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5576 ///
5577 #[anyinput]
5578 #[must_use]
5579 pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
5580 self.metadata.as_mut().unwrap().set_sid(sid);
5581 self
5582 }
5583
5584 /// Set the centimorgan position for each SNP (variant).
5585 ///
5586 /// Defaults to zeros.
5587 #[anyinput]
5588 #[must_use]
5589 pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
5590 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5591 self.metadata.as_mut().unwrap().set_cm_position(cm_position);
5592 self
5593 }
5594
5595 /// Set the base-pair position for each SNP (variant).
5596 ///
5597 /// Defaults to zeros.
5598 ///
5599 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5600 ///
5601 #[anyinput]
5602 #[must_use]
5603 pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
5604 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5605 self.metadata.as_mut().unwrap().set_bp_position(bp_position);
5606 self
5607 }
5608
5609 /// Set the first allele for each SNP (variant).
5610 ///
5611 /// Defaults to "A1", A1" ...
5612 ///
5613 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5614 ///
5615 #[anyinput]
5616 #[must_use]
5617 pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
5618 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5619 self.metadata.as_mut().unwrap().set_allele_1(allele_1);
5620 self
5621 }
5622
5623 /// Set the second allele for each SNP (variant).
5624 ///
5625 /// Defaults to "A2", A2" ...
5626 ///
5627 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5628 ///
5629 #[anyinput]
5630 #[must_use]
5631 pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
5632 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5633 self.metadata.as_mut().unwrap().set_allele_2(allele_2);
5634 self
5635 }
5636
5637 /// Merge metadata from a [`Metadata`](struct.Metadata.html).
5638 ///
5639 /// If a field is set in both [`Metadata`](struct.Metadata.html)'s,
5640 /// it will be overridden.
5641 ///
5642 /// # Example
5643 ///
5644 /// Extract metadata from a file.
5645 /// Create a random file with the same metadata.
5646 /// ```
5647 /// use ndarray as nd;
5648 /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5649 /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5650 ///
5651 /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5652 /// let metadata = bed.metadata()?;
5653 /// let shape = bed.dim()?;
5654 ///
5655 /// let mut rng = StdRng::seed_from_u64(0);
5656 /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5657 ///
5658 /// let temp_out = temp_testdir::TempDir::default();
5659 /// let output_file = temp_out.join("random.bed");
5660 /// WriteOptions::builder(output_file)
5661 /// .metadata(&metadata)
5662 /// .missing_value(-1)
5663 /// .write(&val)?;
5664 /// # use bed_reader::BedErrorPlus;
5665 /// # Ok::<(), Box<BedErrorPlus>>(())
5666 /// ```
5667 #[must_use]
5668 pub fn metadata(mut self, metadata: &Metadata) -> Self {
5669 self.metadata = Some(
5670 Metadata::builder()
5671 .metadata(&self.metadata.unwrap()) // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5672 .metadata(metadata)
5673 .build_no_file_check() // Don't need to check consistent counts here. Builder will do it.
5674 .unwrap(), // Unwrap will always work nothing can go wrong
5675 );
5676 self
5677 }
5678
5679 /// Set the path to the .fam file.
5680 ///
5681 /// If not set, the .fam file will be assumed
5682 /// to have the same name as the .bed file, but with the extension .fam.
5683 ///
5684 /// # Example:
5685 /// Write .bed, .fam, and .bim files with non-standard names.
5686 /// ```
5687 /// use ndarray as nd;
5688 /// use bed_reader::WriteOptions;
5689 /// let output_folder = temp_testdir::TempDir::default();
5690 /// let output_file = output_folder.join("small.deb");
5691 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5692
5693 /// WriteOptions::builder(output_file)
5694 /// .fam_path(output_folder.join("small.maf"))
5695 /// .bim_path(output_folder.join("small.mib"))
5696 /// .write(&val)?;
5697 /// # use bed_reader::BedErrorPlus;
5698 /// # Ok::<(), Box<BedErrorPlus>>(())
5699 /// ```
5700 #[anyinput]
5701 #[must_use]
5702 pub fn fam_path(mut self, path: AnyPath) -> Self {
5703 self.fam_path = Some(path.to_owned());
5704 self
5705 }
5706
5707 /// Set the path to the .bim file.
5708 ///
5709 /// If not set, the .bim file will be assumed
5710 /// to have the same name as the .bed file, but with the extension .bim.
5711 ///
5712 /// # Example:
5713 /// Write .bed, .fam, and .bim files with non-standard names.
5714 /// ```
5715 /// use ndarray as nd;
5716 /// use bed_reader::{WriteOptions};
5717 /// let output_folder = temp_testdir::TempDir::default();
5718 /// let output_file = output_folder.join("small.deb");
5719 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5720
5721 /// WriteOptions::builder(output_file)
5722 /// .fam_path(output_folder.join("small.maf"))
5723 /// .bim_path(output_folder.join("small.mib"))
5724 /// .write(&val)?;
5725 /// # use bed_reader::BedErrorPlus;
5726 /// # Ok::<(), Box<BedErrorPlus>>(())
5727 /// ```
5728 #[anyinput]
5729 #[must_use]
5730 pub fn bim_path(mut self, path: AnyPath) -> Self {
5731 self.bim_path = Some(path.to_owned());
5732 self
5733 }
5734
5735 /// Value used for missing values (defaults to -127 or NaN)
5736 ///
5737 /// -127 is the default for i8 and NaN is the default for f32 and f64.
5738 ///
5739 /// # Example
5740 ///
5741 /// Extract metadata from a file.
5742 /// Create a random file with the same metadata.
5743 /// ```
5744 /// use ndarray as nd;
5745 /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5746 /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5747 ///
5748 /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5749 /// let metadata = bed.metadata()?;
5750 /// let shape = bed.dim()?;
5751 ///
5752 /// let mut rng = StdRng::seed_from_u64(0);
5753 /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5754 ///
5755 /// let temp_out = temp_testdir::TempDir::default();
5756 /// let output_file = temp_out.join("random.bed");
5757 /// WriteOptions::builder(output_file)
5758 /// .metadata(&metadata)
5759 /// .missing_value(-1)
5760 /// .write(&val)?;
5761 /// # use bed_reader::BedErrorPlus;
5762 /// # Ok::<(), Box<BedErrorPlus>>(())
5763 /// ```
5764 pub fn missing_value(&mut self, missing_value: TVal) -> &mut Self {
5765 self.missing_value = Some(missing_value);
5766 self
5767 }
5768
5769 /// Count the number allele 1 (default and PLINK standard).
5770 ///
5771 /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).
5772 pub fn count_a1(&mut self) -> &mut Self {
5773 self.is_a1_counted = Some(true);
5774 self
5775 }
5776
5777 /// Count the number allele 2.
5778 ///
5779 /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1).
5780 pub fn count_a2(&mut self) -> &mut Self {
5781 self.is_a1_counted = Some(false);
5782 self
5783 }
5784
5785 /// Sets if allele 1 is counted. Default is true.
5786 ///
5787 /// Also see [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).
5788 pub fn is_a1_counted(&mut self, is_a1_counted: bool) -> &mut Self {
5789 self.is_a1_counted = Some(is_a1_counted);
5790 self
5791 }
5792
5793 /// Number of threads to use (defaults to all processors)
5794 ///
5795 /// Can also be set with an environment variable.
5796 /// See [Environment Variables](index.html#environment-variables).
5797 ///
5798 ///
5799 /// # Example:
5800 ///
5801 /// Write using only one thread.
5802 /// ```
5803 /// use ndarray as nd;
5804 /// use bed_reader::WriteOptions;
5805 /// let output_folder = temp_testdir::TempDir::default();
5806 /// let output_file = output_folder.join("small.bed");
5807 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5808
5809 /// WriteOptions::builder(output_file)
5810 /// .num_threads(1)
5811 /// .write(&val)?;
5812 /// # use bed_reader::BedErrorPlus;
5813 /// # Ok::<(), Box<BedErrorPlus>>(())
5814 /// ```
5815 pub fn num_threads(&mut self, num_threads: usize) -> &mut Self {
5816 self.num_threads = Some(Some(num_threads));
5817 self
5818 }
5819
5820 /// Skip writing .fam file.
5821 ///
5822 /// # Example
5823 /// ```
5824 /// use ndarray as nd;
5825 /// use bed_reader::{Bed, WriteOptions};
5826 /// let output_folder = temp_testdir::TempDir::default();
5827 /// let output_file = output_folder.join("small.bed");
5828 /// let write_options = WriteOptions::builder(output_file)
5829 /// .i8()
5830 /// .skip_fam()
5831 /// .skip_bim()
5832 /// .build(3, 4)?;
5833 /// assert!(write_options.skip_fam());
5834 /// assert!(write_options.skip_bim());
5835 /// # use bed_reader::BedErrorPlus;
5836 /// # Ok::<(), Box<BedErrorPlus>>(())
5837 /// ```
5838 pub fn skip_fam(&mut self) -> &mut Self {
5839 self.skip_fam = Some(true);
5840 self
5841 }
5842
5843 /// Skip writing .bim file.
5844 ///
5845 /// # Example
5846 /// ```
5847 /// use ndarray as nd;
5848 /// use bed_reader::{Bed, WriteOptions};
5849 /// let output_folder = temp_testdir::TempDir::default();
5850 /// let output_file = output_folder.join("small.bed");
5851 /// let write_options = WriteOptions::builder(output_file)
5852 /// .i8()
5853 /// .skip_fam()
5854 /// .skip_bim()
5855 /// .build(3, 4)?;
5856 /// assert!(write_options.skip_fam());
5857 /// assert!(write_options.skip_bim());
5858 /// # use bed_reader::BedErrorPlus;
5859 /// # Ok::<(), Box<BedErrorPlus>>(())
5860 /// ```
5861 pub fn skip_bim(&mut self) -> &mut Self {
5862 self.skip_bim = Some(true);
5863 self
5864 }
5865
5866 /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given.
5867 ///
5868 /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
5869 /// > a [`WriteOptions`](struct.WriteOptions.html) and writes to file in one step.
5870 ///
5871 /// # Example
5872 /// Create a new [`WriteOptions`](struct.WriteOptions.html) with some given values and some
5873 /// default values. Then use it to write a .bed file.
5874 /// ```
5875 /// use ndarray as nd;
5876 /// use bed_reader::{WriteOptions, Bed};
5877 ///
5878 /// let output_folder = temp_testdir::TempDir::default();
5879 /// let output_file = output_folder.join("small.bed");
5880 /// let write_options = WriteOptions::builder(output_file)
5881 /// .f64()
5882 /// .iid(["i1", "i2", "i3"])
5883 /// .sid(["s1", "s2", "s3", "s4"])
5884 /// .build(3, 4)?;
5885 /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
5886 /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
5887 ///
5888 /// let val = nd::array![
5889 /// [1.0, 0.0, f64::NAN, 0.0],
5890 /// [2.0, 0.0, f64::NAN, 2.0],
5891 /// [0.0, 1.0, 2.0, 0.0]
5892 /// ];
5893 /// Bed::write_with_options(&val, &write_options)?;
5894 /// # use bed_reader::BedErrorPlus;
5895 /// # Ok::<(), Box<BedErrorPlus>>(())
5896 /// ```
5897 pub fn build(
5898 &self,
5899 iid_count: usize,
5900 sid_count: usize,
5901 ) -> Result<WriteOptions<TVal>, Box<BedErrorPlus>> {
5902 let Some(path) = self.path.as_ref() else {
5903 Err(BedError::UninitializedField("path"))?
5904 };
5905
5906 // unwrap always works because the metadata builder always initializes metadata
5907 let metadata = self.metadata.as_ref().unwrap();
5908 let metadata = metadata.fill(iid_count, sid_count)?;
5909
5910 let write_options = WriteOptions {
5911 path: path.to_owned(),
5912 fam_path: to_metadata_path(path, &self.fam_path, "fam"),
5913 bim_path: to_metadata_path(path, &self.bim_path, "bim"),
5914 is_a1_counted: self.is_a1_counted.unwrap_or(true),
5915 num_threads: self.num_threads.unwrap_or(None),
5916 missing_value: self.missing_value.unwrap_or_else(|| TVal::missing()),
5917 skip_fam: self.skip_fam.unwrap_or(false),
5918 skip_bim: self.skip_bim.unwrap_or(false),
5919
5920 metadata,
5921 };
5922 Ok(write_options)
5923 }
5924
5925 #[anyinput]
5926 fn new(path: AnyPath) -> Self {
5927 Self {
5928 path: Some(path.to_owned()),
5929 fam_path: None,
5930 bim_path: None,
5931
5932 metadata: Some(Metadata::new()),
5933
5934 is_a1_counted: None,
5935 num_threads: None,
5936 missing_value: None,
5937 skip_fam: None,
5938 skip_bim: None,
5939 }
5940 }
5941}
5942
5943trait FromStringArray<T> {
5944 #[allow(dead_code)]
5945 fn from_string_array(
5946 string_array: nd::Array1<String>,
5947 ) -> Result<nd::Array1<Self>, Box<BedErrorPlus>>
5948 where
5949 Self: Sized;
5950}
5951
5952impl FromStringArray<String> for String {
5953 fn from_string_array(
5954 string_array: nd::Array1<String>,
5955 ) -> Result<nd::Array1<String>, Box<BedErrorPlus>> {
5956 Ok(string_array)
5957 }
5958}
5959
5960impl FromStringArray<f32> for f32 {
5961 fn from_string_array(
5962 string_array: nd::Array1<String>,
5963 ) -> Result<nd::Array1<f32>, Box<BedErrorPlus>> {
5964 let result = string_array
5965 .iter()
5966 .map(|s| s.parse::<f32>())
5967 .collect::<Result<nd::Array1<f32>, _>>();
5968 match result {
5969 Ok(array) => Ok(array),
5970 Err(e) => Err(Box::new(BedErrorPlus::ParseFloatError(e))),
5971 }
5972 }
5973}
5974impl FromStringArray<i32> for i32 {
5975 fn from_string_array(
5976 string_array: nd::Array1<String>,
5977 ) -> Result<nd::Array1<i32>, Box<BedErrorPlus>> {
5978 let result = string_array
5979 .iter()
5980 .map(|s| s.parse::<i32>())
5981 .collect::<Result<nd::Array1<i32>, _>>();
5982 match result {
5983 Ok(array) => Ok(array),
5984 Err(e) => Err(Box::new(BedErrorPlus::ParseIntError(e))),
5985 }
5986 }
5987}
5988
5989/// Asserts two 2-D arrays are equal, treating NaNs as values.
5990///
5991/// # Example
5992/// ```
5993/// use std::f64::NAN;
5994/// use ndarray as nd;
5995/// use bed_reader::assert_eq_nan;
5996/// let val1 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5997/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5998/// assert_eq_nan(&val1, &val2);
5999/// # use bed_reader::BedErrorPlus;
6000/// # Ok::<(), Box<BedErrorPlus>>(())
6001/// ```
6002pub fn assert_eq_nan<T: 'static + Copy + PartialEq + PartialOrd + Signed + From<i8>>(
6003 val: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
6004 answer: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
6005) {
6006 assert!(allclose::<T, T>(
6007 &val.view(),
6008 &answer.view(),
6009 0.into(),
6010 true
6011 ));
6012}
6013
6014/// Asserts that a result is an error and that the error is of a given variant.
6015#[macro_export]
6016macro_rules! assert_error_variant {
6017 ($result:expr, $pattern:pat) => {
6018 match $result {
6019 Err(ref boxed_error) => match **boxed_error {
6020 $pattern => (),
6021 _ => panic!("test failure"),
6022 },
6023 _ => panic!("test failure"),
6024 }
6025 };
6026}
6027
6028/// True if and only if two 2-D arrays are equal, within a given tolerance and possibly treating NaNs as values.
6029///
6030/// # Example
6031/// ```
6032/// use std::f64::NAN;
6033/// use ndarray as nd;
6034/// use bed_reader::allclose;
6035/// let val1 = nd::arr2(&[[1.0, 2.000000000001], [3.0, NAN]]);
6036/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
6037/// assert!(allclose(&val1.view(), &val2.view(), 1e-08, true));
6038/// # use bed_reader::BedErrorPlus;
6039/// # Ok::<(), Box<BedErrorPlus>>(())
6040/// ```
6041pub fn allclose<
6042 T1: 'static + Copy + PartialEq + PartialOrd + Signed,
6043 T2: 'static + Copy + PartialEq + PartialOrd + Signed + Into<T1>,
6044>(
6045 val1: &nd::ArrayView2<'_, T1>,
6046 val2: &nd::ArrayView2<'_, T2>,
6047 atol: T1,
6048 equal_nan: bool,
6049) -> bool {
6050 assert!(val1.dim() == val2.dim());
6051 // Could be run in parallel
6052
6053 nd::Zip::from(val1)
6054 .and(val2)
6055 .fold(true, |acc, ptr_a, ptr_b| -> bool {
6056 if !acc {
6057 return false;
6058 }
6059 // x != x is a generic nan check
6060 #[allow(clippy::eq_op)]
6061 let a_nan = *ptr_a != *ptr_a;
6062 #[allow(clippy::eq_op)]
6063 let b_nan = *ptr_b != *ptr_b;
6064
6065 if a_nan || b_nan {
6066 if equal_nan {
6067 a_nan == b_nan
6068 } else {
6069 false
6070 }
6071 } else {
6072 let c: T1 = abs(*ptr_a - T2::into(*ptr_b));
6073 c <= atol
6074 }
6075 })
6076}
6077
6078impl WriteOptionsBuilder<i8> {
6079 /// The input ndarray will be i8.
6080 #[must_use]
6081 pub fn i8(self) -> Self {
6082 self
6083 }
6084}
6085
6086impl WriteOptionsBuilder<f32> {
6087 /// The input ndarray will be f32.
6088 #[must_use]
6089 pub fn f32(self) -> Self {
6090 self
6091 }
6092}
6093
6094impl WriteOptionsBuilder<f64> {
6095 /// The input ndarray will be f64.
6096 #[must_use]
6097 pub fn f64(self) -> Self {
6098 self
6099 }
6100}
6101
6102fn check_counts(
6103 count_vec: Vec<Option<usize>>,
6104 option_xid_count: &mut Option<usize>,
6105 prefix: &str,
6106) -> Result<(), Box<BedErrorPlus>> {
6107 for count in count_vec.into_iter().flatten() {
6108 if let Some(xid_count) = option_xid_count {
6109 if *xid_count != count {
6110 Err(BedError::InconsistentCount(
6111 prefix.to_string(),
6112 *xid_count,
6113 count,
6114 ))?;
6115 }
6116 } else {
6117 *option_xid_count = Some(count);
6118 }
6119 }
6120
6121 Ok(())
6122}
6123
6124// According to https://docs.rs/derive_builder/latest/derive_builder/
6125// "clone" is OK because "Luckily Rust is clever enough to optimize these
6126// clone-calls away in release builds for your every-day use cases.
6127// Thats quite a safe bet - we checked this for you. ;-)"
6128fn compute_field<T: Clone, F: Fn(usize) -> T>(
6129 field_name: &str,
6130 field: &mut Option<Rc<nd::Array1<T>>>,
6131 count: usize,
6132 lambda: F,
6133) -> Result<(), Box<BedErrorPlus>> {
6134 // let lambda = |_| "0".to_string();
6135 // let count = iid_count;
6136 // let field = &mut metadata.fid;
6137
6138 if let Some(array) = field {
6139 if array.len() != count {
6140 Err(BedError::InconsistentCount(
6141 field_name.to_string(),
6142 array.len(),
6143 count,
6144 ))?;
6145 }
6146 } else {
6147 let array = Rc::new((0..count).map(lambda).collect::<nd::Array1<T>>());
6148 *field = Some(array);
6149 }
6150 Ok(())
6151}
6152
6153impl MetadataBuilder {
6154 /// Create a [`Metadata`](struct.Metadata.html) from the builder.
6155 ///
6156 /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6157 pub fn build(&self) -> Result<Metadata, Box<BedErrorPlus>> {
6158 let metadata = self.build_no_file_check()?;
6159
6160 metadata.check_counts(None, None)?;
6161
6162 Ok(metadata)
6163 }
6164
6165 /// Set the family id (fid) values.
6166 #[anyinput]
6167 pub fn fid(&mut self, fid: AnyIter<AnyString>) -> &mut Self {
6168 self.fid = Some(Some(Rc::new(fid.map(|s| s.as_ref().to_string()).collect())));
6169 self
6170 }
6171
6172 /// Set the individual id (iid) values.
6173 /// ```
6174 /// use ndarray as nd;
6175 /// use bed_reader::{Metadata, assert_eq_nan};
6176 ///
6177 /// let metadata = Metadata::builder()
6178 /// .iid(["sample1", "sample2", "sample3"])
6179 /// .build()?;
6180 /// println!("{:?}", metadata.iid()); // Outputs ndarray Some(["sample1", "sample2", "sample3"])
6181 /// # use bed_reader::BedErrorPlus;
6182 /// # Ok::<(), Box<BedErrorPlus>>(())
6183 /// ```
6184 #[anyinput]
6185 pub fn iid(&mut self, iid: AnyIter<AnyString>) -> &mut Self {
6186 self.iid = Some(Some(Rc::new(iid.map(|s| s.as_ref().to_owned()).collect())));
6187 self
6188 }
6189
6190 /// Set the father values.
6191 #[anyinput]
6192 pub fn father(&mut self, father: AnyIter<AnyString>) -> &mut Self {
6193 self.father = Some(Some(Rc::new(
6194 father.map(|s| s.as_ref().to_owned()).collect(),
6195 )));
6196 self
6197 }
6198
6199 /// Override the mother values.
6200 #[anyinput]
6201 pub fn mother(&mut self, mother: AnyIter<AnyString>) -> &mut Self {
6202 self.mother = Some(Some(Rc::new(
6203 mother.map(|s| s.as_ref().to_owned()).collect(),
6204 )));
6205 self
6206 }
6207
6208 /// Override the sex values.
6209 #[anyinput]
6210 pub fn sex(&mut self, sex: AnyIter<i32>) -> &mut Self {
6211 self.sex = Some(Some(Rc::new(sex.collect())));
6212 self
6213 }
6214
6215 /// Override the phenotype values.
6216 #[anyinput]
6217 pub fn pheno(&mut self, pheno: AnyIter<AnyString>) -> &mut Self {
6218 self.pheno = Some(Some(Rc::new(
6219 pheno.map(|s| s.as_ref().to_owned()).collect(),
6220 )));
6221 self
6222 }
6223
6224 /// Override the chromosome values.
6225 #[anyinput]
6226 pub fn chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &mut Self {
6227 self.chromosome = Some(Some(Rc::new(
6228 chromosome.map(|s| s.as_ref().to_owned()).collect(),
6229 )));
6230 self
6231 }
6232
6233 /// Override the SNP id (sid) values.
6234 /// ```
6235 /// use ndarray as nd;
6236 /// use bed_reader::{Metadata, assert_eq_nan};
6237 ///
6238 /// let metadata = Metadata::builder()
6239 /// .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
6240 /// .build()?;
6241 /// println!("{:?}", metadata.sid()); // Outputs ndarray Some(["SNP1", "SNP2", "SNP3", "SNP4"])
6242 /// # use bed_reader::BedErrorPlus;
6243 /// # Ok::<(), Box<BedErrorPlus>>(())
6244 /// ```
6245 #[anyinput]
6246 pub fn sid(&mut self, sid: AnyIter<AnyString>) -> &mut Self {
6247 self.sid = Some(Some(Rc::new(
6248 sid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
6249 )));
6250 self
6251 }
6252
6253 /// Override the centimorgan position values.
6254 #[anyinput]
6255 pub fn cm_position(&mut self, cm_position: AnyIter<f32>) -> &mut Self {
6256 self.cm_position = Some(Some(Rc::new(cm_position.into_iter().collect())));
6257 self
6258 }
6259
6260 /// Override the base-pair position values.
6261 #[anyinput]
6262 pub fn bp_position(&mut self, bp_position: AnyIter<i32>) -> &mut Self {
6263 self.bp_position = Some(Some(Rc::new(bp_position.into_iter().collect())));
6264 self
6265 }
6266
6267 /// Override the allele 1 values.
6268 #[anyinput]
6269 pub fn allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &mut Self {
6270 self.allele_1 = Some(Some(Rc::new(
6271 allele_1
6272 .into_iter()
6273 .map(|s| s.as_ref().to_owned())
6274 .collect(),
6275 )));
6276 self
6277 }
6278
6279 /// Override the allele 2 values.
6280 #[anyinput]
6281 pub fn allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &mut Self {
6282 self.allele_2 = Some(Some(Rc::new(
6283 allele_2
6284 .into_iter()
6285 .map(|s| s.as_ref().to_owned())
6286 .collect(),
6287 )));
6288 self
6289 }
6290
6291 /// Merge metadata from a [`Metadata`](struct.Metadata.html).
6292 ///
6293 /// # Example
6294 ///
6295 /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
6296 /// and sid arrays. Next, we use another [`MetadataBuilder`](struct.MetadataBuilder.html) to set an fid array
6297 /// and an iid array. Then, we add the first [`Metadata`](struct.Metadata.html)
6298 /// to the [`MetadataBuilder`](struct.MetadataBuilder.html),
6299 /// overwriting iid and setting sid. Finally, we print these
6300 /// three arrays and chromosome. Chromosome is `None`.
6301 ///```
6302 /// use ndarray as nd;
6303 /// use bed_reader::Metadata;
6304 ///
6305 /// let metadata1 = Metadata::builder()
6306 /// .iid(["i1", "i2", "i3"])
6307 /// .sid(["s1", "s2", "s3", "s4"])
6308 /// .build()?;
6309 /// let metadata2 = Metadata::builder()
6310 /// .fid(["f1", "f2", "f3"])
6311 /// .iid(["x1", "x2", "x3"])
6312 /// .metadata(&metadata1)
6313 /// .build()?;
6314 ///
6315 /// println!("{0:?}", metadata2.fid()); // Outputs optional ndarray Some(["f1", "f2", "f3"]...)
6316 /// println!("{0:?}", metadata2.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6317 /// println!("{0:?}", metadata2.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
6318 /// println!("{0:?}", metadata2.chromosome()); // Outputs None
6319 /// # use bed_reader::BedErrorPlus;
6320 /// # Ok::<(), Box<BedErrorPlus>>(())
6321 /// ```
6322 pub fn metadata(&mut self, metadata: &Metadata) -> &mut Self {
6323 set_field(&metadata.fid, &mut self.fid);
6324 set_field(&metadata.iid, &mut self.iid);
6325 set_field(&metadata.father, &mut self.father);
6326 set_field(&metadata.mother, &mut self.mother);
6327 set_field(&metadata.sex, &mut self.sex);
6328 set_field(&metadata.pheno, &mut self.pheno);
6329
6330 set_field(&metadata.chromosome, &mut self.chromosome);
6331 set_field(&metadata.sid, &mut self.sid);
6332 set_field(&metadata.cm_position, &mut self.cm_position);
6333 set_field(&metadata.bp_position, &mut self.bp_position);
6334 set_field(&metadata.allele_1, &mut self.allele_1);
6335 set_field(&metadata.allele_2, &mut self.allele_2);
6336 self
6337 }
6338}
6339
6340impl Default for Metadata {
6341 fn default() -> Self {
6342 Self::new()
6343 }
6344}
6345
6346impl Metadata {
6347 fn check_counts(
6348 &self,
6349 mut iid_count: Option<usize>,
6350 mut sid_count: Option<usize>,
6351 ) -> Result<(Option<usize>, Option<usize>), Box<BedErrorPlus>> {
6352 check_counts(
6353 vec![
6354 lazy_or_skip_count(&self.fid),
6355 lazy_or_skip_count(&self.iid),
6356 lazy_or_skip_count(&self.father),
6357 lazy_or_skip_count(&self.mother),
6358 lazy_or_skip_count(&self.sex),
6359 lazy_or_skip_count(&self.pheno),
6360 ],
6361 &mut iid_count,
6362 "iid",
6363 )?;
6364 check_counts(
6365 vec![
6366 lazy_or_skip_count(&self.chromosome),
6367 lazy_or_skip_count(&self.sid),
6368 lazy_or_skip_count(&self.cm_position),
6369 lazy_or_skip_count(&self.bp_position),
6370 lazy_or_skip_count(&self.allele_1),
6371 lazy_or_skip_count(&self.allele_2),
6372 ],
6373 &mut sid_count,
6374 "sid",
6375 )?;
6376 Ok((iid_count, sid_count))
6377 }
6378
6379 /// Create a [`Metadata`](struct.Metadata.html) using a builder.
6380 ///
6381 /// # Example
6382 /// Create metadata.
6383 /// Create a random file with the metadata.
6384 /// ```
6385 /// use ndarray as nd;
6386 /// use bed_reader::{Metadata, WriteOptions};
6387 /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
6388 ///
6389 /// let metadata = Metadata::builder()
6390 /// .iid(["i1", "i2", "i3"])
6391 /// .sid(["s1", "s2", "s3", "s4"])
6392 /// .build()?;
6393 /// let mut rng = StdRng::seed_from_u64(0);
6394 /// let val = nd::Array::random_using((3, 4), Uniform::from(-1..3), &mut rng);
6395
6396 /// let temp_out = temp_testdir::TempDir::default();
6397 /// let output_file = temp_out.join("random.bed");
6398 /// WriteOptions::builder(output_file)
6399 /// .metadata(&metadata)
6400 /// .missing_value(-1)
6401 /// .write(&val)?;
6402 /// # use bed_reader::BedErrorPlus;
6403 /// # Ok::<(), Box<BedErrorPlus>>(())
6404 /// ```
6405 #[must_use]
6406 pub fn builder() -> MetadataBuilder {
6407 MetadataBuilder::default()
6408 }
6409
6410 /// Create an empty [`Metadata`](struct.Metadata.html).
6411 ///
6412 /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6413 #[must_use]
6414 pub fn new() -> Metadata {
6415 // Unwrap always works because an empty metadata builder always works.
6416 Metadata::builder().build().unwrap()
6417 }
6418
6419 /// Optional family id of each of individual (sample)
6420 #[must_use]
6421 pub fn fid(&self) -> Option<&nd::Array1<String>> {
6422 option_rc_as_ref(&self.fid)
6423 }
6424
6425 /// Optional individual id of each of individual (sample)
6426 ///
6427 /// # Example:
6428 /// ```
6429 /// use ndarray as nd;
6430 /// use bed_reader::Metadata;
6431 /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6432 /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6433 /// println!("{0:?}", metadata.sid()); // Outputs None
6434 /// # use bed_reader::BedErrorPlus;
6435 /// # Ok::<(), Box<BedErrorPlus>>(())
6436 #[must_use]
6437 pub fn iid(&self) -> Option<&nd::Array1<String>> {
6438 option_rc_as_ref(&self.iid)
6439 }
6440
6441 /// Optional father id of each of individual (sample)
6442 #[must_use]
6443 pub fn father(&self) -> Option<&nd::Array1<String>> {
6444 option_rc_as_ref(&self.father)
6445 }
6446
6447 /// Optional mother id of each of individual (sample)
6448 #[must_use]
6449 pub fn mother(&self) -> Option<&nd::Array1<String>> {
6450 option_rc_as_ref(&self.mother)
6451 }
6452
6453 /// Optional sex each of individual (sample)
6454 #[must_use]
6455 pub fn sex(&self) -> Option<&nd::Array1<i32>> {
6456 option_rc_as_ref(&self.sex)
6457 }
6458
6459 /// Optional phenotype for each individual (seldom used)
6460 #[must_use]
6461 pub fn pheno(&self) -> Option<&nd::Array1<String>> {
6462 option_rc_as_ref(&self.pheno)
6463 }
6464
6465 /// Optional chromosome of each SNP (variant)
6466 #[must_use]
6467 pub fn chromosome(&self) -> Option<&nd::Array1<String>> {
6468 option_rc_as_ref(&self.chromosome)
6469 }
6470
6471 /// Optional SNP id of each SNP (variant)
6472 ///
6473 /// # Example:
6474 /// ```
6475 /// use ndarray as nd;
6476 /// use bed_reader::Metadata;
6477 /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6478 /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6479 /// println!("{0:?}", metadata.sid()); // Outputs None
6480 /// # use bed_reader::BedErrorPlus;
6481 /// # Ok::<(), Box<BedErrorPlus>>(())
6482 #[must_use]
6483 pub fn sid(&self) -> Option<&nd::Array1<String>> {
6484 option_rc_as_ref(&self.sid)
6485 }
6486
6487 /// Optional centimorgan position of each SNP (variant)
6488 #[must_use]
6489 pub fn cm_position(&self) -> Option<&nd::Array1<f32>> {
6490 option_rc_as_ref(&self.cm_position)
6491 }
6492
6493 /// Optional base-pair position of each SNP (variant)
6494 #[must_use]
6495 pub fn bp_position(&self) -> Option<&nd::Array1<i32>> {
6496 option_rc_as_ref(&self.bp_position)
6497 }
6498
6499 /// Optional first allele of each SNP (variant)
6500 #[must_use]
6501 pub fn allele_1(&self) -> Option<&nd::Array1<String>> {
6502 option_rc_as_ref(&self.allele_1)
6503 }
6504
6505 /// Optional second allele of each SNP (variant)
6506 #[must_use]
6507 pub fn allele_2(&self) -> Option<&nd::Array1<String>> {
6508 option_rc_as_ref(&self.allele_2)
6509 }
6510
6511 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .fam file.
6512 ///
6513 /// # Example
6514 ///
6515 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6516 /// Do not skip any fields.
6517 /// ```
6518 /// use ndarray as nd;
6519 /// use std::collections::HashSet;
6520 /// use bed_reader::{Metadata, MetadataFields, sample_file};
6521 ///
6522 /// let skip_set = HashSet::<MetadataFields>::new();
6523 /// let metadata_empty = Metadata::new();
6524 /// let (metadata_fam, iid_count) =
6525 /// metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6526 /// let (metadata_bim, sid_count) =
6527 /// metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6528 /// assert_eq!(iid_count, 3);
6529 /// assert_eq!(sid_count, 4);
6530 /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6531 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6532 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6533 /// # use bed_reader::BedErrorPlus;
6534 /// # Ok::<(), Box<BedErrorPlus>>(())
6535 /// ```
6536 #[anyinput]
6537 pub fn read_fam(
6538 &self,
6539 path: AnyPath,
6540 skip_set: &HashSet<MetadataFields>,
6541 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6542 let mut field_vec: Vec<usize> = Vec::new();
6543
6544 if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6545 field_vec.push(0);
6546 }
6547 if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6548 field_vec.push(1);
6549 }
6550 if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6551 field_vec.push(2);
6552 }
6553 if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6554 field_vec.push(3);
6555 }
6556 if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6557 field_vec.push(4);
6558 }
6559 if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6560 field_vec.push(5);
6561 }
6562
6563 let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, true, path)?;
6564
6565 let mut clone = self.clone();
6566
6567 // unwraps are safe because we pop once for every push
6568 if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6569 clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6570 }
6571 if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6572 let vec = vec_of_vec.pop().unwrap();
6573 let array = vec
6574 .iter()
6575 .map(|s| s.parse::<i32>())
6576 .collect::<Result<nd::Array1<i32>, _>>()?;
6577 clone.sex = Some(Rc::new(array));
6578 }
6579 if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6580 clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6581 }
6582 if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6583 clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6584 }
6585 if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6586 clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6587 }
6588 if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6589 clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6590 }
6591
6592 clone.check_counts(Some(count), None)?;
6593
6594 Ok((clone, count))
6595 }
6596
6597 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6598 /// fields with a .fam file in the cloud.
6599 ///
6600 /// # Example
6601 ///
6602 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6603 /// Do not skip any fields.
6604 /// ```
6605 /// use ndarray as nd;
6606 /// use std::collections::HashSet;
6607 /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6608 ///
6609 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6610 /// let skip_set = HashSet::<MetadataFields>::new();
6611 /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6612 /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6613 /// let metadata_empty = Metadata::new();
6614 /// let (metadata_fam, iid_count) =
6615 /// metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6616 /// let (metadata_bim, sid_count) =
6617 /// metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6618 /// assert_eq!(iid_count, 3);
6619 /// assert_eq!(sid_count, 4);
6620 /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6621 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6622 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6623 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6624 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6625 /// ```
6626 pub async fn read_fam_cloud(
6627 &self,
6628 cloud_file: &CloudFile,
6629 skip_set: &HashSet<MetadataFields>,
6630 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6631 let mut field_vec: Vec<usize> = Vec::new();
6632
6633 if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6634 field_vec.push(0);
6635 }
6636 if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6637 field_vec.push(1);
6638 }
6639 if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6640 field_vec.push(2);
6641 }
6642 if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6643 field_vec.push(3);
6644 }
6645 if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6646 field_vec.push(4);
6647 }
6648 if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6649 field_vec.push(5);
6650 }
6651
6652 let (mut vec_of_vec, count) = self
6653 .read_fam_or_bim_cloud(&field_vec, true, cloud_file)
6654 .await?;
6655
6656 let mut clone = self.clone();
6657
6658 // unwraps are safe because we pop once for every push
6659 if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6660 clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6661 }
6662 if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6663 let vec = vec_of_vec.pop().unwrap();
6664 let array = vec
6665 .iter()
6666 .map(|s| s.parse::<i32>())
6667 .collect::<Result<nd::Array1<i32>, _>>()?;
6668 clone.sex = Some(Rc::new(array));
6669 }
6670 if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6671 clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6672 }
6673 if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6674 clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6675 }
6676 if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6677 clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6678 }
6679 if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6680 clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6681 }
6682
6683 clone.check_counts(Some(count), None)?;
6684
6685 Ok((clone, count))
6686 }
6687
6688 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .bim file.
6689 ///
6690 /// # Example
6691 ///
6692 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6693 /// Do not skip any fields.
6694 /// ```
6695 /// use ndarray as nd;
6696 /// use std::collections::HashSet;
6697 /// use bed_reader::{Metadata, MetadataFields, sample_file};
6698 ///
6699 /// let skip_set = HashSet::<MetadataFields>::new();
6700 /// let metadata_empty = Metadata::new();
6701 /// let (metadata_fam, iid_count) =
6702 /// metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6703 /// let (metadata_bim, sid_count) =
6704 /// metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6705 /// assert_eq!(iid_count, 3);
6706 /// assert_eq!(sid_count, 4);
6707 /// println!("{0:?}", metadata_bim.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6708 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6709 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6710 /// # use bed_reader::BedErrorPlus;
6711 /// # Ok::<(), Box<BedErrorPlus>>(())
6712 /// ```
6713 #[anyinput]
6714 pub fn read_bim(
6715 &self,
6716 path: AnyPath,
6717 skip_set: &HashSet<MetadataFields>,
6718 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6719 let mut field_vec: Vec<usize> = Vec::new();
6720 if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6721 field_vec.push(0);
6722 }
6723 if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6724 field_vec.push(1);
6725 }
6726
6727 if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6728 field_vec.push(2);
6729 }
6730 if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6731 field_vec.push(3);
6732 }
6733 if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6734 field_vec.push(4);
6735 }
6736 if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6737 field_vec.push(5);
6738 }
6739
6740 let mut clone = self.clone();
6741 let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, false, path)?;
6742
6743 // unwraps are safe because we pop once for every push
6744 if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6745 clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6746 }
6747 if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6748 clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6749 }
6750 if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6751 let vec = vec_of_vec.pop().unwrap();
6752 let array = vec
6753 .iter()
6754 .map(|s| s.parse::<i32>())
6755 .collect::<Result<nd::Array1<i32>, _>>()?;
6756 clone.bp_position = Some(Rc::new(array));
6757 }
6758 if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6759 let vec = vec_of_vec.pop().unwrap();
6760 let array = vec
6761 .iter()
6762 .map(|s| s.parse::<f32>())
6763 .collect::<Result<nd::Array1<f32>, _>>()?;
6764 clone.cm_position = Some(Rc::new(array));
6765 }
6766
6767 if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6768 clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6769 }
6770 if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6771 clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6772 }
6773
6774 clone.check_counts(None, Some(count))?;
6775
6776 Ok((clone, count))
6777 }
6778
6779 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6780 /// fields with a .bim file in the cloud.
6781 ///
6782 /// # Example
6783 ///
6784 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6785 /// Do not skip any fields.
6786 /// ```
6787 /// use ndarray as nd;
6788 /// use std::collections::HashSet;
6789 /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6790 ///
6791 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6792 /// let skip_set = HashSet::<MetadataFields>::new();
6793 /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6794 /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6795 /// let metadata_empty = Metadata::new();
6796 /// let (metadata_fam, iid_count) =
6797 /// metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6798 /// let (metadata_bim, sid_count) =
6799 /// metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6800 /// assert_eq!(iid_count, 3);
6801 /// assert_eq!(sid_count, 4);
6802 /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6803 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6804 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6805 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6806 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6807 /// ```
6808 pub async fn read_bim_cloud(
6809 &self,
6810 cloud_file: &CloudFile,
6811 skip_set: &HashSet<MetadataFields>,
6812 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6813 let mut field_vec: Vec<usize> = Vec::new();
6814 if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6815 field_vec.push(0);
6816 }
6817 if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6818 field_vec.push(1);
6819 }
6820
6821 if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6822 field_vec.push(2);
6823 }
6824 if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6825 field_vec.push(3);
6826 }
6827 if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6828 field_vec.push(4);
6829 }
6830 if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6831 field_vec.push(5);
6832 }
6833
6834 let mut clone = self.clone();
6835 let (mut vec_of_vec, count) = self
6836 .read_fam_or_bim_cloud(&field_vec, false, cloud_file)
6837 .await?;
6838
6839 // unwraps are safe because we pop once for every push
6840 if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6841 clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6842 }
6843 if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6844 clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6845 }
6846 if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6847 let vec = vec_of_vec.pop().unwrap();
6848 let array = vec
6849 .iter()
6850 .map(|s| s.parse::<i32>())
6851 .collect::<Result<nd::Array1<i32>, _>>()?;
6852 clone.bp_position = Some(Rc::new(array));
6853 }
6854 if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6855 let vec = vec_of_vec.pop().unwrap();
6856 let array = vec
6857 .iter()
6858 .map(|s| s.parse::<f32>())
6859 .collect::<Result<nd::Array1<f32>, _>>()?;
6860 clone.cm_position = Some(Rc::new(array));
6861 }
6862
6863 if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6864 clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6865 }
6866 if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6867 clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6868 }
6869
6870 clone.check_counts(None, Some(count))?;
6871
6872 Ok((clone, count))
6873 }
6874
6875 #[anyinput]
6876 fn read_fam_or_bim(
6877 field_vec: &[usize],
6878 is_split_whitespace: bool,
6879 path: AnyPath,
6880 ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6881 let mut vec_of_vec = vec![vec![]; field_vec.len()];
6882
6883 let file = File::open(path)?;
6884
6885 let reader = BufReader::new(file);
6886 let mut count = 0;
6887 for line in reader.lines() {
6888 let line = line?;
6889 count += 1;
6890
6891 let fields: Vec<&str> = if is_split_whitespace {
6892 line.split_whitespace().collect()
6893 } else {
6894 line.split('\t').collect()
6895 };
6896
6897 if fields.len() != 6 {
6898 Err(BedError::MetadataFieldCount(
6899 6,
6900 fields.len(),
6901 path_ref_to_string(path),
6902 ))?;
6903 }
6904
6905 let mut of_interest_count = 0;
6906 for (field_index, field) in fields.iter().enumerate() {
6907 if field_vec.contains(&field_index) {
6908 vec_of_vec[of_interest_count].push((*field).to_string());
6909 of_interest_count += 1;
6910 }
6911 }
6912 }
6913
6914 Ok((vec_of_vec, count))
6915 }
6916
6917 async fn read_fam_or_bim_cloud(
6918 &self,
6919 field_vec: &[usize],
6920 is_split_whitespace: bool,
6921 cloud_file: &CloudFile,
6922 ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6923 let mut vec_of_vec = vec![vec![]; field_vec.len()];
6924 let mut count = 0;
6925
6926 let mut line_chunks = cloud_file.stream_line_chunks().await?;
6927 while let Some(line_chunk) = line_chunks.next().await {
6928 let line_chunk = line_chunk.map_err(CloudFileError::ObjectStoreError)?;
6929 let lines = std::str::from_utf8(&line_chunk)?.lines();
6930 for line in lines {
6931 count += 1;
6932
6933 let fields: Vec<&str> = if is_split_whitespace {
6934 line.split_whitespace().collect()
6935 } else {
6936 line.split('\t').collect()
6937 };
6938
6939 if fields.len() != 6 {
6940 Err(BedError::MetadataFieldCount(
6941 6,
6942 fields.len(),
6943 cloud_file.to_string(),
6944 ))?;
6945 }
6946
6947 let mut of_interest_count = 0;
6948 for (field_index, field) in fields.iter().enumerate() {
6949 if field_vec.contains(&field_index) {
6950 vec_of_vec[of_interest_count].push((*field).to_string());
6951 of_interest_count += 1;
6952 }
6953 }
6954 }
6955 }
6956
6957 Ok((vec_of_vec, count))
6958 }
6959
6960 fn is_some_fam(&self) -> bool {
6961 self.fid.is_some()
6962 && self.iid.is_some()
6963 && self.father.is_some()
6964 && self.mother.is_some()
6965 && self.sex.is_some()
6966 && self.pheno.is_some()
6967 }
6968 fn is_some_bim(&self) -> bool {
6969 self.chromosome.is_some()
6970 && self.sid.is_some()
6971 && self.cm_position.is_some()
6972 && self.bp_position.is_some()
6973 && self.allele_1.is_some()
6974 && self.allele_2.is_some()
6975 }
6976
6977 /// Write the metadata related to individuals/samples to a .fam file.
6978 ///
6979 /// If any of the .fam metadata is not present, the function will return an error.
6980 ///
6981 /// # Example
6982 ///
6983 /// Create metadata with iid and sid arrays, then fill in the other
6984 /// fields with default arrays, finally write the .fam information
6985 /// to a file.
6986 ///```
6987 /// use ndarray as nd;
6988 /// use std::collections::HashSet;
6989 /// use bed_reader::Metadata;
6990 ///
6991 /// let metadata0 = Metadata::builder()
6992 /// .iid(["i1", "i2", "i3"])
6993 /// .sid(["s1", "s2", "s3", "s4"])
6994 /// .build()?;
6995 /// let metadata_filled = metadata0.fill(3, 4)?;
6996
6997 /// let temp_out = temp_testdir::TempDir::default();
6998 /// let output_file = temp_out.join("no_bed.fam");
6999 /// metadata_filled.write_fam(output_file)?;
7000 /// # use bed_reader::BedErrorPlus;
7001 /// # Ok::<(), Box<BedErrorPlus>>(())
7002 /// ```
7003 #[anyinput]
7004 pub fn write_fam(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
7005 let file = File::create(path)?;
7006 let mut writer = BufWriter::new(file);
7007 let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
7008
7009 if !self.is_some_fam() {
7010 Err(BedError::MetadataMissingForWrite("fam".to_string()))?;
7011 }
7012
7013 // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
7014 // unwrap always works because we checked that all the fields are present
7015 // 2nd as as_ref turns &Rc<Array> into &Array
7016 nd::azip!((fid in self.fid.as_ref().unwrap().as_ref(),
7017 iid in self.iid.as_ref().unwrap().as_ref(),
7018 father in self.father.as_ref().unwrap().as_ref(),
7019 mother in self.mother.as_ref().unwrap().as_ref(),
7020 sex in self.sex.as_ref().unwrap().as_ref(),
7021 pheno in self.pheno.as_ref().unwrap().as_ref(),
7022 )
7023 {
7024 if result.is_ok() {
7025 if let Err(e) = writeln!(
7026 writer,
7027 "{} {} {} {} {} {}",
7028 *fid, *iid, *father, *mother, *sex, *pheno
7029 )
7030 {
7031 result = Err(Box::new(BedErrorPlus::IOError(e)));
7032 }
7033 }});
7034 result?;
7035
7036 Ok(())
7037 }
7038
7039 /// Write the metadata related to SNPs/variants to a .bim file.
7040 ///
7041 /// If any of the .bim metadata is not present, the function will return an error.
7042 ///
7043 /// # Example
7044 ///
7045 /// Create metadata with iid and sid arrays, then fill in the other
7046 /// fields with default arrays, finally write the .bim information
7047 /// to a file.
7048 ///```
7049 /// use ndarray as nd;
7050 /// use std::collections::HashSet;
7051 /// use bed_reader::Metadata;
7052 ///
7053 /// let metadata0 = Metadata::builder()
7054 /// .iid(["i1", "i2", "i3"])
7055 /// .sid(["s1", "s2", "s3", "s4"])
7056 /// .build()?;
7057 /// let metadata_filled = metadata0.fill(3, 4)?;
7058
7059 /// let temp_out = temp_testdir::TempDir::default();
7060 /// let output_file = temp_out.join("no_bed.bim");
7061 /// metadata_filled.write_bim(output_file)?;
7062 /// # use bed_reader::BedErrorPlus;
7063 /// # Ok::<(), Box<BedErrorPlus>>(())
7064 /// ```
7065 #[anyinput]
7066 pub fn write_bim(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
7067 let file = File::create(path)?;
7068 let mut writer = BufWriter::new(file);
7069 let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
7070
7071 if !self.is_some_bim() {
7072 Err(BedError::MetadataMissingForWrite("bim".to_string()))?;
7073 }
7074
7075 // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
7076 // unwrap always works because we checked that all the fields are present
7077 // 2nd as as_ref turns &Rc<Array> into &Array
7078 nd::azip!((
7079 chromosome in self.chromosome.as_ref().unwrap().as_ref(),
7080 sid in self.sid.as_ref().unwrap().as_ref(),
7081 cm_position in self.cm_position.as_ref().unwrap().as_ref(),
7082 bp_position in self.bp_position.as_ref().unwrap().as_ref(),
7083 allele_1 in self.allele_1.as_ref().unwrap().as_ref(),
7084 allele_2 in self.allele_2.as_ref().unwrap().as_ref(),
7085 )
7086 {
7087 if result.is_ok() {
7088 if let Err(e) = writeln!(
7089 writer,
7090 "{}\t{}\t{}\t{}\t{}\t{}",
7091 *chromosome, *sid, *cm_position, *bp_position, *allele_1, *allele_2
7092 )
7093 {
7094 result = Err(Box::new(BedErrorPlus::IOError(e)));
7095 }
7096 }
7097 });
7098 result?;
7099
7100 Ok(())
7101 }
7102
7103 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with default values.
7104 ///
7105 /// # Example
7106 /// ```
7107 /// use ndarray as nd;
7108 /// use std::collections::HashSet;
7109 /// use bed_reader::{Metadata, MetadataFields};
7110 ///
7111 /// let metadata0 = Metadata::builder()
7112 /// .iid(["i1", "i2", "i3"])
7113 /// .sid(["s1", "s2", "s3", "s4"])
7114 /// .build()?;
7115 /// let metadata_filled = metadata0.fill(3, 4)?;
7116 ///
7117 /// println!("{0:?}", metadata_filled.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
7118 /// println!("{0:?}", metadata_filled.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
7119 /// println!("{0:?}", metadata_filled.chromosome()); // Outputs optional ndarray Some(["0", "0", "0", "0"]...)
7120 /// # use bed_reader::BedErrorPlus;
7121 /// # Ok::<(), Box<BedErrorPlus>>(())
7122 /// ```
7123 pub fn fill(&self, iid_count: usize, sid_count: usize) -> Result<Metadata, Box<BedErrorPlus>> {
7124 let mut metadata = self.clone();
7125
7126 compute_field("fid", &mut metadata.fid, iid_count, |_| "0".to_string())?;
7127 compute_field("iid", &mut metadata.iid, iid_count, |i| {
7128 format!("iid{}", i + 1)
7129 })?;
7130 compute_field("father", &mut metadata.father, iid_count, |_| {
7131 "0".to_string()
7132 })?;
7133 compute_field("mother", &mut metadata.mother, iid_count, |_| {
7134 "0".to_string()
7135 })?;
7136 compute_field("sex", &mut metadata.sex, iid_count, |_| 0)?;
7137 compute_field("pheno", &mut metadata.pheno, iid_count, |_| "0".to_string())?;
7138 compute_field("chromosome", &mut metadata.chromosome, sid_count, |_| {
7139 "0".to_string()
7140 })?;
7141 compute_field("sid", &mut metadata.sid, sid_count, |i| {
7142 format!("sid{}", i + 1)
7143 })?;
7144 compute_field("cm_position", &mut metadata.cm_position, sid_count, |_| 0.0)?;
7145 compute_field("bp_position", &mut metadata.bp_position, sid_count, |_| 0)?;
7146 compute_field("allele_1", &mut metadata.allele_1, sid_count, |_| {
7147 "A1".to_string()
7148 })?;
7149 compute_field("allele_2", &mut metadata.allele_2, sid_count, |_| {
7150 "A2".to_string()
7151 })?;
7152
7153 Ok(metadata)
7154 }
7155
7156 #[anyinput]
7157 fn set_fid(&mut self, fid: AnyIter<AnyString>) -> &Self {
7158 self.fid = Some(Rc::new(
7159 fid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7160 ));
7161 self
7162 }
7163
7164 #[anyinput]
7165 fn set_iid(&mut self, iid: AnyIter<AnyString>) -> &Self {
7166 self.iid = Some(Rc::new(
7167 iid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7168 ));
7169 self
7170 }
7171
7172 #[anyinput]
7173 fn set_father(&mut self, father: AnyIter<AnyString>) -> &Self {
7174 self.father = Some(Rc::new(father.map(|s| s.as_ref().to_owned()).collect()));
7175 self
7176 }
7177
7178 #[anyinput]
7179 fn set_mother(&mut self, mother: AnyIter<AnyString>) -> &Self {
7180 self.mother = Some(Rc::new(mother.map(|s| s.as_ref().to_owned()).collect()));
7181 self
7182 }
7183
7184 #[anyinput]
7185 fn set_sex(&mut self, sex: AnyIter<i32>) -> &Self {
7186 self.sex = Some(Rc::new(sex.collect()));
7187 self
7188 }
7189
7190 #[anyinput]
7191 fn set_pheno(&mut self, pheno: AnyIter<AnyString>) -> &Self {
7192 self.pheno = Some(Rc::new(pheno.map(|s| s.as_ref().to_owned()).collect()));
7193 self
7194 }
7195
7196 #[anyinput]
7197 fn set_chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &Self {
7198 self.chromosome = Some(Rc::new(chromosome.map(|s| s.as_ref().to_owned()).collect()));
7199 self
7200 }
7201
7202 #[anyinput]
7203 fn set_sid(&mut self, sid: AnyIter<AnyString>) -> &Self {
7204 self.sid = Some(Rc::new(sid.map(|s| s.as_ref().to_owned()).collect()));
7205 self
7206 }
7207
7208 #[anyinput]
7209 fn set_cm_position(&mut self, cm_position: AnyIter<f32>) -> &Self {
7210 self.cm_position = Some(Rc::new(cm_position.into_iter().collect()));
7211 self
7212 }
7213
7214 #[anyinput]
7215 fn set_bp_position(&mut self, bp_position: AnyIter<i32>) -> &Self {
7216 self.bp_position = Some(Rc::new(bp_position.into_iter().collect()));
7217 self
7218 }
7219
7220 #[anyinput]
7221 fn set_allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &Self {
7222 self.allele_1 = Some(Rc::new(allele_1.map(|s| s.as_ref().to_owned()).collect()));
7223 self
7224 }
7225
7226 #[anyinput]
7227 fn set_allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &Self {
7228 self.allele_2 = Some(Rc::new(allele_2.map(|s| s.as_ref().to_owned()).collect()));
7229 self
7230 }
7231}
7232
7233#[allow(clippy::option_option)]
7234fn set_field<T>(
7235 field1: &Option<Rc<nd::Array1<T>>>,
7236 field2: &mut Option<Option<Rc<nd::Array1<T>>>>,
7237) {
7238 if let Some(array) = field1 {
7239 *field2 = Some(Some(array.clone()));
7240 }
7241}
7242
7243fn option_rc_as_ref<T>(field: &Option<Rc<nd::Array1<T>>>) -> Option<&nd::Array1<T>> {
7244 match field {
7245 Some(array) => Some(array.as_ref()),
7246 None => None,
7247 }
7248}
7249
7250#[allow(dead_code)]
7251fn matrix_subset_no_alloc<
7252 TIn: Copy + Default + Debug + Sync + Send + Sync + Sized,
7253 TOut: Copy + Default + Debug + Sync + Send + Sync + From<TIn>,
7254>(
7255 in_val: &nd::ArrayView3<'_, TIn>,
7256 iid_index: &[usize],
7257 sid_index: &[usize],
7258 out_val: &mut nd::ArrayViewMut3<'_, TOut>,
7259) -> Result<(), Box<BedErrorPlus>> {
7260 let out_iid_count = iid_index.len();
7261 let out_sid_count = sid_index.len();
7262 let did_count = in_val.dim().2;
7263
7264 if (out_iid_count, out_sid_count, did_count) != out_val.dim() {
7265 Err(BedError::SubsetMismatch(
7266 out_iid_count,
7267 out_sid_count,
7268 out_val.dim().0,
7269 out_val.dim().1,
7270 ))?;
7271 }
7272
7273 // If output is F-order (or in general if iid stride is no more than sid_stride)
7274 if out_val.stride_of(nd::Axis(0)) <= out_val.stride_of(nd::Axis(1)) {
7275 // (No error are possible in the par_azip, so don't have to collect and check them)
7276 nd::par_azip!((mut out_col in out_val.axis_iter_mut(nd::Axis(1)),
7277 in_sid_i_pr in sid_index) {
7278 let in_col = in_val.index_axis(nd::Axis(1), *in_sid_i_pr);
7279 for did_i in 0..did_count
7280 {
7281 for (out_iid_i, in_iid_i_ptr) in iid_index.iter().enumerate() {
7282 out_col[(out_iid_i,did_i)] = in_col[(*in_iid_i_ptr,did_i)].into();
7283 }
7284 }
7285 });
7286 Ok(())
7287 } else {
7288 //If output is C-order, transpose input and output and recurse
7289 let in_val_t = in_val.view().permuted_axes([1, 0, 2]);
7290 let mut out_val_t = out_val.view_mut().permuted_axes([1, 0, 2]);
7291 matrix_subset_no_alloc(&in_val_t, sid_index, iid_index, &mut out_val_t)
7292 }
7293}
7294
7295#[fetch_data::ctor]
7296static STATIC_FETCH_DATA: FetchData = FetchData::new(
7297 include_str!("../bed_reader/tests/registry.txt"),
7298 "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/",
7299 "BED_READER_DATA_DIR",
7300 "github.io",
7301 "fastlmm",
7302 "bed-reader",
7303);
7304
7305/// Returns the local path to a sample .bed file. If necessary, the file will be downloaded.
7306///
7307/// The .fam and .bim files will also be downloaded, if they are not already present.
7308/// SHA256 hashes are used to verify that the files are correct.
7309/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7310/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7311#[anyinput]
7312pub fn sample_bed_file(bed_path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7313 let mut path_list: Vec<PathBuf> = Vec::new();
7314 for ext in &["bed", "bim", "fam"] {
7315 let file_path = bed_path.with_extension(ext);
7316 path_list.push(file_path);
7317 }
7318
7319 let vec = sample_files(path_list)?;
7320 assert!(vec.len() == 3);
7321 Ok(vec[0].clone())
7322}
7323
7324/// Returns the local path to a sample file. If necessary, the file will be downloaded.
7325///
7326/// A SHA256 hash is used to verify that the file is correct.
7327/// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7328/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7329#[anyinput]
7330pub fn sample_file(path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7331 Ok(STATIC_FETCH_DATA
7332 .fetch_file(path)
7333 .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7334}
7335
7336/// Returns the local paths to a list of files. If necessary, the files will be downloaded.
7337///
7338/// SHA256 hashes are used to verify that the files are correct.
7339/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7340/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7341#[anyinput]
7342pub fn sample_files(path_list: AnyIter<AnyPath>) -> Result<Vec<PathBuf>, Box<BedErrorPlus>>
7343where
7344{
7345 Ok(STATIC_FETCH_DATA
7346 .fetch_files(path_list)
7347 .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7348}
7349
7350/// An empty set of cloud options
7351///
7352/// # Example
7353/// ```
7354/// use cloud_file::{EMPTY_OPTIONS, CloudFile};
7355///
7356/// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
7357/// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/plink_sim_10s_100v_10pmiss.bed";
7358/// let cloud_file = CloudFile::new_with_options(url, EMPTY_OPTIONS)?;
7359/// assert_eq!(cloud_file.read_file_size().await?, 303);
7360/// # Ok::<(), BedErrorPlus>(())}).unwrap();
7361/// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
7362/// ```
7363pub const EMPTY_OPTIONS: [(&str, String); 0] = [];
7364
7365#[cfg(feature = "tokio")]
7366pub mod supplemental_document_options {
7367 #![doc = include_str!("supplemental_documents/options_etc.md")]
7368}
7369
7370#[cfg(feature = "tokio")]
7371pub mod supplemental_document_cloud_urls {
7372 #![doc = include_str!("supplemental_documents/cloud_urls_etc.md")]
7373}