bed_reader/lib.rs
1#![warn(missing_docs)]
2#![warn(clippy::pedantic)]
3#![allow(
4 clippy::missing_panics_doc, // LATER: add panics docs
5 clippy::missing_errors_doc, // LATER: add errors docs
6 clippy::similar_names,
7 clippy::cast_possible_truncation,
8 clippy::cast_possible_wrap,
9 clippy::cast_sign_loss,
10 clippy::cast_lossless
11)]
12// Inspired by C++ version by Chris Widmer and Carl Kadie
13
14// See: https://towardsdatascience.com/nine-rules-for-writing-python-extensions-in-rust-d35ea3a4ec29?sk=f8d808d5f414154fdb811e4137011437
15// for an article on how this project uses Rust to create a Python extension.
16
17// For Rust API tips see https://rust-lang.github.io/api-guidelines/necessities.html
18#![doc = include_str!("../README-rust.md")]
19//! ## Main Functions
20//!
21//! | Function | Description |
22//! | -------- | ----------- |
23//! | [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) | Open a local PLINK .bed file for reading genotype data and metadata. |
24//! | [`BedCloud::new`](struct.BedCloud.html#method.new), [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),<br> [`BedCloud::builder`](struct.BedCloud.html#method.builder), [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options),<br> [`BedCloud::from_cloud_file`](struct.BedCloud.html#method.from_cloud_file), [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file) | Open a cloud PLINK .bed file for reading genotype data and metadata. |
25//! | [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) | Read genotype data from a local or cloud file. Supports indexing and options. |
26//! | [`WriteOptions::builder`](struct.WriteOptions.html#method.builder) | Write values to a local file in PLINK .bed format. Supports metadata and options. |
27//!
28//! ### `Bed` Metadata Methods
29//!
30//! After using [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder) to open a PLINK .bed file for reading, use
31//! these methods to see metadata.
32//!
33//! | Method | Description |
34//! | -------- | ----------- |
35//! | [`iid_count`](struct.Bed.html#method.iid_count) | Number of individuals (samples) |
36//! | [`sid_count`](struct.Bed.html#method.sid_count) | Number of SNPs (variants) |
37//! | [`dim`](struct.Bed.html#method.dim) | Number of individuals and SNPs |
38//! | [`fid`](struct.Bed.html#method.fid) | Family id of each of individual (sample) |
39//! | [`iid`](struct.Bed.html#method.iid) | Individual id of each of individual (sample) |
40//! | [`father`](struct.Bed.html#method.father) | Father id of each of individual (sample) |
41//! | [`mother`](struct.Bed.html#method.mother) | Mother id of each of individual (sample) |
42//! | [`sex`](struct.Bed.html#method.sex) | Sex of each individual (sample) |
43//! | [`pheno`](struct.Bed.html#method.pheno) | A phenotype for each individual (seldom used) |
44//! | [`chromosome`](struct.Bed.html#method.chromosome) | Chromosome of each SNP (variant) |
45//! | [`sid`](struct.Bed.html#method.sid) | SNP Id of each SNP (variant) |
46//! | [`cm_position`](struct.Bed.html#method.cm_position) | Centimorgan position of each SNP (variant) |
47//! | [`bp_position`](struct.Bed.html#method.bp_position) | Base-pair position of each SNP (variant) |
48//! | [`allele_1`](struct.Bed.html#method.allele_1) | First allele of each SNP (variant) |
49//! | [`allele_2`](struct.Bed.html#method.allele_2) | Second allele of each SNP (variant) |
50//! | [`metadata`](struct.Bed.html#method.metadata) | All the metadata returned as a [`struct.Metadata`](struct.Metadata.html) |
51//!
52//! ### `ReadOptions`
53//!
54//! When using [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) to read genotype data, use these options to
55//! specify a desired numeric type,
56//! which individuals (samples) to read, which SNPs (variants) to read, etc.
57//!
58//! | Option | Description |
59//! | -------- | ----------- |
60//! | [`i8`](struct.ReadOptionsBuilder.html#method.i8) | Read values as i8 |
61//! | [`f32`](struct.ReadOptionsBuilder.html#method.f32) | Read values as f32 |
62//! | [`f64`](struct.ReadOptionsBuilder.html#method.f64) | Read values as f64 |
63//! | [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) | Index of individuals (samples) to read (defaults to all)|
64//! | [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) | Index of SNPs (variants) to read (defaults to all) |
65//! | [`f`](struct.ReadOptionsBuilder.html#method.f) | Order of the output array, Fortran-style (default) |
66//! | [`c`](struct.ReadOptionsBuilder.html#method.c) | Order of the output array, C-style |
67//! | [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) | Is order of the output array Fortran-style? (defaults to true)|
68//! | [`missing_value`](struct.ReadOptionsBuilder.html#method.missing_value) | Value to use for missing values (defaults to -127 or NaN) |
69//! | [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) | Count the number allele 1 (default) |
70//! | [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2) | Count the number allele 2 |
71//! | [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) | Is allele 1 counted? (defaults to true) |
72//! | [`num_threads`](struct.ReadOptionsBuilder.html#method.num_threads) | Number of threads to use (defaults to all processors) |
73//! | [`max_concurrent_requests`](struct.ReadOptionsBuilder.html#method.max_concurrent_requests) | Maximum number of concurrent async requests (defaults to 10) -- Used by [`BedCloud`](struct.BedCloud.html). |
74//! | [`max_chunk_bytes`](struct.ReadOptionsBuilder.html#method.max_chunk_bytes) | Maximum chunk size of async requests (defaults to `8_000_000` bytes) -- Used by [`BedCloud`](struct.BedCloud.html). |
75//!
76//! ### [`Index`](enum.Index.html) Expressions
77//!
78//! Select which individuals (samples) and SNPs (variants) to read by using these
79//! [`iid_index`](struct.ReadOptionsBuilder.html#method.iid_index) and/or
80//! [`sid_index`](struct.ReadOptionsBuilder.html#method.sid_index) expressions.
81//!
82//! | Example | Type | Description |
83//! | -------- | --- | ----------- |
84//! | nothing | `()` | All |
85//! | `2` | `isize` | Index position 2 |
86//! | `-1` | `isize` | Last index position |
87//! | `vec![0, 10, -2]` | `Vec<isize>` | Index positions 0, 10, and 2nd from last |
88//! | `[0, 10, -2]` | `[isize]` and `[isize;n]` | Index positions 0, 10, and 2nd from last |
89//! | `ndarray::array![0, 10, -2]` | `ndarray::Array1<isize>` | Index positions 0, 10, and 2nd from last |
90//! | `10..20` | `Range<usize>` | Index positions 10 (inclusive) to 20 (exclusive). *Note: Rust ranges don't support negatives* |
91//! | `..=19` | `RangeInclusive<usize>` | Index positions 0 (inclusive) to 19 (inclusive). *Note: Rust ranges don't support negatives* |
92//! | *any Rust ranges* | `Range*<usize>` | *Note: Rust ranges don't support negatives* |
93//! | `s![10..20;2]` | `ndarray::SliceInfo1` | Index positions 10 (inclusive) to 20 (exclusive) in steps of 2 |
94//! | `s![-20..-10;-2]` | `ndarray::SliceInfo1` | 10th from last (exclusive) to 20th from last (inclusive), in steps of -2 |
95//! | `vec![true, false, true]` | `Vec<bool>`| Index positions 0 and 2. |
96//! | `[true, false, true]` | `[bool]` and `[bool;n]`| Index positions 0 and 2.|
97//! | `ndarray::array![true, false, true]` | `ndarray::Array1<bool>`| Index positions 0 and 2.|
98//!
99//! ### Environment Variables
100//!
101//! * `BED_READER_NUM_THREADS`
102//! * `NUM_THREADS`
103//!
104//! If [`ReadOptionsBuilder::num_threads`](struct.ReadOptionsBuilder.html#method.num_threads)
105//! or [`WriteOptionsBuilder::num_threads`](struct.WriteOptionsBuilder.html#method.num_threads) is not specified,
106//! the number of threads to use is determined by these environment variable (in order of priority):
107//! If neither of these environment variables are set, all processors are used.
108//!
109//! * `BED_READER_DATA_DIR`
110//!
111//! Any requested sample file will be downloaded to this directory. If the environment variable is not set,
112//! a cache folder, appropriate to the OS, will be used.
113
114mod python_module;
115mod tests;
116use anyinput::anyinput;
117pub use bed_cloud::{sample_bed_url, sample_url, sample_urls, BedCloud, BedCloudBuilder};
118use byteorder::{LittleEndian, ReadBytesExt};
119pub use cloud_file::{CloudFile, CloudFileError};
120use core::fmt::Debug;
121use derive_builder::Builder;
122use dpc_pariter::{scope, IteratorExt};
123use fetch_data::FetchData;
124use futures_util::StreamExt;
125use nd::ShapeBuilder;
126use ndarray as nd;
127use num_traits::{abs, Float, FromPrimitive, Signed, ToPrimitive};
128use rayon::iter::{IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelIterator};
129use rayon::{iter::ParallelBridge, ThreadPoolBuildError};
130use statrs::distribution::{Beta, Continuous};
131use std::cmp::Ordering;
132use std::collections::HashSet;
133use std::fs::{self};
134use std::io::Read;
135use std::io::Seek;
136use std::io::SeekFrom;
137use std::io::Write;
138use std::num::{ParseFloatError, ParseIntError};
139use std::ops::AddAssign;
140use std::ops::{Bound, Range, RangeBounds, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive};
141use std::rc::Rc;
142use std::str::Utf8Error;
143use std::{
144 env,
145 fs::File,
146 io::{BufRead, BufReader, BufWriter},
147 ops::RangeFull,
148 path::{Path, PathBuf},
149};
150use thiserror::Error;
151mod bed_cloud;
152
153const BED_FILE_MAGIC1: u8 = 0x6C; // 0b01101100 or 'l' (lowercase 'L')
154const BED_FILE_MAGIC2: u8 = 0x1B; // 0b00011011 or <esc>
155const CB_HEADER_U64: u64 = 3;
156const CB_HEADER_USIZE: usize = 3;
157
158// About ndarray
159// https://docs.rs/ndarray/0.14.0/ndarray/parallel/index.html
160// https://rust-lang-nursery.github.io/rust-cookbook/concurrency/parallel.html
161// https://github.com/rust-ndarray/ndarray/blob/master/README-quick-start.md
162// https://datacrayon.com/posts/programming/rust-notebooks/multidimensional-arrays-and-operations-with-ndarray
163// https://docs.rs/ndarray/0.14.0/ndarray/doc/ndarray_for_numpy_users/index.html
164// https://docs.rs/ndarray-npy
165// https://rust-lang-nursery.github.io/rust-cookbook/science/mathematics/linear_algebra.html
166
167/// All possible errors returned by this library and the libraries it depends on.
168// Based on `<https://nick.groenen.me/posts/rust-error-handling/#the-library-error-type>`
169#[derive(Error, Debug)]
170pub enum BedErrorPlus {
171 #[allow(missing_docs)]
172 #[error(transparent)]
173 BedError(#[from] BedError),
174
175 #[allow(missing_docs)]
176 #[error(transparent)]
177 IOError(#[from] std::io::Error),
178
179 #[allow(missing_docs)]
180 #[error(transparent)]
181 ThreadPoolError(#[from] ThreadPoolBuildError),
182
183 #[allow(missing_docs)]
184 #[error(transparent)]
185 ParseIntError(#[from] ParseIntError),
186
187 #[allow(missing_docs)]
188 #[error(transparent)]
189 ParseFloatError(#[from] ParseFloatError),
190
191 #[allow(missing_docs)]
192 #[error(transparent)]
193 CloudFileError(#[from] CloudFileError),
194
195 #[allow(missing_docs)]
196 #[error(transparent)]
197 Utf8Error(#[from] Utf8Error),
198}
199// https://docs.rs/thiserror/1.0.23/thiserror/
200
201/// All errors specific to this library.
202#[derive(Error, Debug, Clone)]
203pub enum BedError {
204 #[allow(missing_docs)]
205 #[error("Ill-formed BED file. BED file header is incorrect or length is wrong. '{0}'")]
206 IllFormed(String),
207
208 #[allow(missing_docs)]
209 #[error(
210 "Ill-formed BED file. BED file header is incorrect. Expected mode to be 0 or 1. '{0}'"
211 )]
212 BadMode(String),
213
214 #[allow(missing_docs)]
215 #[error("Attempt to write illegal value to BED file. Only 0,1,2,missing allowed. '{0}'")]
216 BadValue(String),
217
218 #[allow(missing_docs)]
219 #[error("Multithreading resulted in panic(s)")]
220 PanickedThread(),
221
222 #[allow(missing_docs)]
223 #[error("No individual observed for the SNP.")]
224 NoIndividuals,
225
226 #[allow(missing_docs)]
227 #[error("Illegal SNP mean.")]
228 IllegalSnpMean,
229
230 #[allow(missing_docs)]
231 #[error("Index to individual larger than the number of individuals. (Index value {0})")]
232 IidIndexTooBig(isize),
233
234 #[allow(missing_docs)]
235 #[error("Index to SNP larger than the number of SNPs. (Index value {0})")]
236 SidIndexTooBig(isize),
237
238 #[allow(missing_docs)]
239 #[error("Length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
240 IndexMismatch(usize, usize, usize, usize),
241
242 #[allow(missing_docs)]
243 #[error("Indexes ({0},{1}) too big for files")]
244 IndexesTooBigForFiles(usize, usize),
245
246 #[allow(missing_docs)]
247 #[error("Subset: length of iid_index ({0}) and sid_index ({1}) must match dimensions of output array ({2},{3}).")]
248 SubsetMismatch(usize, usize, usize, usize),
249
250 #[allow(missing_docs)]
251 #[error("Cannot convert beta values to/from float 64")]
252 CannotConvertBetaToFromF64,
253
254 #[allow(missing_docs)]
255 #[error("Cannot create Beta Dist with given parameters ({0},{1})")]
256 CannotCreateBetaDist(f64, f64),
257
258 #[allow(missing_docs)]
259 #[error("Cannot use skipped metadata '{0}'")]
260 CannotUseSkippedMetadata(String),
261
262 #[allow(missing_docs)]
263 #[error("Index starts at {0} but ends at {1}")]
264 StartGreaterThanEnd(usize, usize),
265
266 #[allow(missing_docs)]
267 #[error("Step of zero not allowed")]
268 StepZero,
269
270 #[allow(missing_docs)]
271 #[error("Index starts at {0} but count is {1}")]
272 StartGreaterThanCount(usize, usize),
273
274 #[allow(missing_docs)]
275 #[error("Index ends at {0} but count is {1}")]
276 EndGreaterThanCount(usize, usize),
277
278 #[allow(missing_docs)]
279 #[error("Adding new axis not allowed")]
280 NewAxis,
281
282 #[allow(missing_docs)]
283 #[error("Expect 1-D NDArray SliceInfo")]
284 NdSliceInfoNot1D,
285
286 #[allow(missing_docs)]
287 #[error("Expect {0} fields but find only {1} in '{2}'")]
288 MetadataFieldCount(usize, usize, String),
289
290 #[allow(missing_docs)]
291 #[error("{0}_count values of {1} and {2} are inconsistent")]
292 InconsistentCount(String, usize, usize),
293
294 #[allow(missing_docs)]
295 #[error("Expect bool arrays and vectors to be length {0}, not {1}")]
296 BoolArrayVectorWrongLength(usize, usize),
297
298 #[allow(missing_docs)]
299 #[error("Expect ndarray of shape ({0}, {1}), but found shape ({2}, {3})")]
300 InvalidShape(usize, usize, usize, usize),
301
302 #[allow(missing_docs)]
303 #[error("Can't write '{0}' metadata if some fields are None")]
304 MetadataMissingForWrite(String),
305
306 #[allow(missing_docs)]
307 #[error("Unknown or bad sample file '{0}'")]
308 UnknownOrBadSampleFile(String),
309
310 #[allow(missing_docs)]
311 #[error("The registry of sample files is invalid")]
312 SampleRegistryProblem(),
313
314 #[allow(missing_docs)]
315 #[error("Samples construction failed with error: {0}")]
316 SamplesConstructionFailed(String),
317
318 #[allow(missing_docs)]
319 #[error("Downloaded sample file not seen: {0}")]
320 DownloadedSampleFileNotSeen(String),
321
322 #[allow(missing_docs)]
323 #[error("Downloaded sample file has wrong hash: {0},expected: {1}, actual: {2}")]
324 DownloadedSampleFileWrongHash(String, String, String),
325
326 #[allow(missing_docs)]
327 #[error("Cannot create cache directory")]
328 CannotCreateCacheDir(),
329
330 #[allow(missing_docs)]
331 #[error("Cannot parse URL: '{0}': {1}")]
332 CannotParseUrl(String, String),
333
334 #[allow(missing_docs)]
335 #[error("UninitializedField: '{0}'")]
336 UninitializedField(&'static str),
337
338 #[allow(missing_docs)]
339 #[error("Sample fetch error: {0}")]
340 SampleFetch(String),
341
342 #[allow(missing_docs)]
343 #[error("Encoding destination buffer must be contiguous.")]
344 EncodingContiguous(),
345
346 #[allow(missing_docs)]
347 #[error("Encoding destination buffer have length {0}, (in_vector.len() - 1) // 4 + 1, but it has length {1}.")]
348 EncodingLength(usize, usize),
349}
350
351// Trait alias
352
353/// A trait alias, used internally, for the values of a .bed file, namely i8, f32, f64.
354pub trait BedVal:
355 Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
356{
357}
358impl<T> BedVal for T where
359 T: Copy + Default + From<i8> + Debug + Sync + Send + Sync + Missing + PartialEq
360{
361}
362
363fn create_pool(num_threads: usize) -> Result<rayon::ThreadPool, Box<BedErrorPlus>> {
364 match rayon::ThreadPoolBuilder::new()
365 .num_threads(num_threads)
366 .build()
367 {
368 Err(e) => Err(Box::new(e.into())),
369 Ok(pool) => Ok(pool),
370 }
371}
372
373#[allow(clippy::too_many_arguments)]
374#[anyinput]
375fn read_no_alloc<TVal: BedVal>(
376 path: AnyPath,
377 iid_count: usize,
378 sid_count: usize,
379 is_a1_counted: bool,
380 iid_index: &[isize],
381 sid_index: &[isize],
382 missing_value: TVal,
383 num_threads: usize,
384 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
385) -> Result<(), Box<BedErrorPlus>> {
386 create_pool(num_threads)?.install(|| {
387 let (buf_reader, bytes_vector) = open_and_check(path)?;
388
389 match bytes_vector[2] {
390 0 => {
391 // We swap 'iid' and 'sid' and then reverse the axes.
392 let mut val_t = val.view_mut().reversed_axes();
393 internal_read_no_alloc(
394 buf_reader,
395 path,
396 sid_count,
397 iid_count,
398 is_a1_counted,
399 sid_index,
400 iid_index,
401 missing_value,
402 &mut val_t,
403 )
404 }
405 1 => internal_read_no_alloc(
406 buf_reader,
407 path,
408 iid_count,
409 sid_count,
410 is_a1_counted,
411 iid_index,
412 sid_index,
413 missing_value,
414 val,
415 ),
416 _ => Err(Box::new(BedError::BadMode(path_ref_to_string(path)).into())),
417 }
418 })?;
419 Ok(())
420}
421
422#[anyinput]
423fn path_ref_to_string(path: AnyPath) -> String {
424 PathBuf::from(path).display().to_string()
425}
426
427impl From<BedError> for Box<BedErrorPlus> {
428 fn from(err: BedError) -> Self {
429 Box::new(BedErrorPlus::BedError(err))
430 }
431}
432impl From<std::io::Error> for Box<BedErrorPlus> {
433 fn from(err: std::io::Error) -> Self {
434 Box::new(BedErrorPlus::IOError(err))
435 }
436}
437impl From<ThreadPoolBuildError> for Box<BedErrorPlus> {
438 fn from(err: ThreadPoolBuildError) -> Self {
439 Box::new(BedErrorPlus::ThreadPoolError(err))
440 }
441}
442impl From<ParseIntError> for Box<BedErrorPlus> {
443 fn from(err: ParseIntError) -> Self {
444 Box::new(BedErrorPlus::ParseIntError(err))
445 }
446}
447
448impl From<ParseFloatError> for Box<BedErrorPlus> {
449 fn from(err: ParseFloatError) -> Self {
450 Box::new(BedErrorPlus::ParseFloatError(err))
451 }
452}
453
454impl From<::derive_builder::UninitializedFieldError> for BedErrorPlus {
455 fn from(err: ::derive_builder::UninitializedFieldError) -> Self {
456 BedError::UninitializedField(err.field_name()).into()
457 }
458}
459
460impl From<CloudFileError> for Box<BedErrorPlus> {
461 fn from(err: CloudFileError) -> Self {
462 Box::new(BedErrorPlus::CloudFileError(err))
463 }
464}
465
466impl From<Utf8Error> for Box<BedErrorPlus> {
467 fn from(err: Utf8Error) -> Self {
468 Box::new(BedErrorPlus::Utf8Error(err))
469 }
470}
471
472#[anyinput]
473fn open_and_check(
474 path: AnyPath,
475) -> Result<(BufReader<File>, [u8; CB_HEADER_USIZE]), Box<BedErrorPlus>> {
476 let mut buf_reader = BufReader::new(File::open(path)?);
477 let mut bytes_array: [u8; CB_HEADER_USIZE] = [0; CB_HEADER_USIZE];
478 buf_reader.read_exact(&mut bytes_array)?;
479 if (BED_FILE_MAGIC1 != bytes_array[0]) || (BED_FILE_MAGIC2 != bytes_array[1]) {
480 Err(BedError::IllFormed(path_ref_to_string(path)))?;
481 }
482 Ok((buf_reader, bytes_array))
483}
484
485// trait Max {
486// fn max() -> Self;
487// }
488
489// impl Max for u8 {
490// fn max() -> u8 {
491// u8::MAX
492// }
493// }
494
495// impl Max for u64 {
496// fn max() -> u64 {
497// u64::MAX
498// }
499// }
500
501/// A trait alias, used internally, to provide default missing values for i8, f32, f64.
502pub trait Missing {
503 /// The default missing value for a type such as i8, f32, and f64.
504 fn missing() -> Self;
505}
506
507impl Missing for f64 {
508 fn missing() -> Self {
509 f64::NAN
510 }
511}
512
513impl Missing for f32 {
514 fn missing() -> Self {
515 f32::NAN
516 }
517}
518
519impl Missing for i8 {
520 fn missing() -> Self {
521 -127i8
522 }
523}
524
525#[cfg(not(target_pointer_width = "64"))]
526compile_error!("This code requires a 64-bit target architecture.");
527#[inline]
528fn try_div_4(in_iid_count: usize, in_sid_count: usize) -> Result<u64, Box<BedErrorPlus>> {
529 if in_iid_count == 0 {
530 return Ok(0);
531 }
532 let in_iid_count_div4_u64 = in_iid_count.checked_sub(1).map_or(0, |v| v / 4 + 1) as u64;
533 let in_sid_count_u64 = in_sid_count as u64;
534
535 if in_sid_count > 0 && (u64::MAX - CB_HEADER_U64) / in_sid_count_u64 < in_iid_count_div4_u64 {
536 Err(BedError::IndexesTooBigForFiles(in_iid_count, in_sid_count))?;
537 }
538
539 Ok(in_iid_count_div4_u64)
540}
541
542#[allow(clippy::too_many_arguments)]
543#[anyinput]
544fn internal_read_no_alloc<TVal: BedVal>(
545 mut buf_reader: BufReader<File>,
546 path: AnyPath,
547 in_iid_count: usize,
548 in_sid_count: usize,
549 is_a1_counted: bool,
550 iid_index: &[isize],
551 sid_index: &[isize],
552 missing_value: TVal,
553 out_val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
554) -> Result<(), Box<BedErrorPlus>> {
555 // Check the file length
556
557 let in_iid_count_div4_u64 = try_div_4(in_iid_count, in_sid_count)?;
558 // "as" and math is safe because of early checks
559 let file_len = buf_reader.get_ref().metadata()?.len();
560 let file_len2 = in_iid_count_div4_u64 * (in_sid_count as u64) + CB_HEADER_U64;
561 if file_len != file_len2 {
562 Err(BedError::IllFormed(path_ref_to_string(path)))?;
563 }
564
565 // Check and precompute for each iid_index
566 let (i_div_4_less_start_array, i_mod_4_times_2_array, i_div_4_start, i_div_4_len) =
567 check_and_precompute_iid_index(in_iid_count, iid_index)?;
568
569 // Check and compute work for each sid_index
570 let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value);
571 let lower_sid_count = -(in_sid_count as isize);
572 let upper_sid_count: isize = (in_sid_count as isize) - 1;
573 // See https://morestina.net/blog/1432/parallel-stream-processing-with-rayon
574 // Possible optimization: We could read snp in their input order instead of their output order
575 sid_index
576 .iter()
577 .map(|in_sid_i_signed| {
578 // Turn signed sid_index into unsigned sid_index (or error)
579 let in_sid_i = if (0..=upper_sid_count).contains(in_sid_i_signed) {
580 *in_sid_i_signed as u64
581 } else if (lower_sid_count..=-1).contains(in_sid_i_signed) {
582 (in_sid_count - ((-in_sid_i_signed) as usize)) as u64
583 } else {
584 Err(BedError::SidIndexTooBig(*in_sid_i_signed))?
585 };
586
587 // Read the iid info for one snp from the disk
588 let mut bytes_vector: Vec<u8> = vec![0; i_div_4_len as usize];
589 let pos: u64 = in_sid_i * in_iid_count_div4_u64 + i_div_4_start + CB_HEADER_U64; // "as" and math is safe because of early checks
590 buf_reader.seek(SeekFrom::Start(pos))?;
591 buf_reader.read_exact(&mut bytes_vector)?;
592 Ok::<_, Box<BedErrorPlus>>(bytes_vector)
593 })
594 // Zip in the column of the output array
595 .zip(out_val.axis_iter_mut(nd::Axis(1)))
596 // In parallel, decompress the iid info and put it in its column
597 .par_bridge() // This seems faster that parallel zip
598 .try_for_each(|(bytes_vector_result, mut col)| match bytes_vector_result {
599 Err(e) => Err(e),
600 Ok(bytes_vector) => {
601 for out_iid_i in 0..iid_index.len() {
602 let i_div_4_less_start = i_div_4_less_start_array[out_iid_i];
603 let i_mod_4_times_2 = i_mod_4_times_2_array[out_iid_i];
604 let genotype_byte: u8 =
605 (bytes_vector[i_div_4_less_start] >> i_mod_4_times_2) & 0x03;
606 col[out_iid_i] = from_two_bits_to_value[genotype_byte as usize];
607 }
608 Ok(())
609 }
610 })?;
611
612 Ok(())
613}
614
615type Array1Usize = nd::ArrayBase<nd::OwnedRepr<usize>, nd::Dim<[usize; 1]>>;
616type Array1U8 = nd::ArrayBase<nd::OwnedRepr<u8>, nd::Dim<[usize; 1]>>;
617
618#[allow(clippy::type_complexity)]
619#[allow(clippy::range_plus_one)]
620fn check_and_precompute_iid_index(
621 in_iid_count: usize,
622 iid_index: &[isize],
623) -> Result<(Array1Usize, Array1U8, u64, u64), Box<BedErrorPlus>> {
624 let lower_iid_count = -(in_iid_count as isize);
625 let upper_iid_count: isize = (in_iid_count as isize) - 1;
626 let mut i_div_4_less_start_array = nd::Array1::<usize>::zeros(iid_index.len());
627 let mut i_mod_4_times_2_array = nd::Array1::<u8>::zeros(iid_index.len());
628 let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); iid_index.len()];
629 nd::par_azip!((in_iid_i_signed in iid_index,
630 i_div_4_less_start in &mut i_div_4_less_start_array,
631 i_mod_4_times_2 in &mut i_mod_4_times_2_array,
632 result in &mut result_list
633 )
634 {
635 let in_iid_i = if (0..=upper_iid_count).contains(in_iid_i_signed) {
636 *result = Ok(());
637 *in_iid_i_signed as usize
638 } else if (lower_iid_count..=-1).contains(in_iid_i_signed) {
639 *result = Ok(());
640 in_iid_count - ((-in_iid_i_signed) as usize)
641 } else {
642 *result = Err(BedError::IidIndexTooBig(
643 *in_iid_i_signed,
644 ));
645 0
646 };
647
648 *i_div_4_less_start = in_iid_i / 4 ;
649 *i_mod_4_times_2 = (in_iid_i % 4 * 2) as u8;
650 });
651 result_list
652 .iter()
653 .par_bridge()
654 .try_for_each(|x| (*x).clone())?;
655
656 let (i_div_4_start, i_div_4_len) =
657 if let Some(min_value) = i_div_4_less_start_array.par_iter().min() {
658 let max_value = *i_div_4_less_start_array.par_iter().max().unwrap(); // safe because of min
659 (*min_value as u64, (max_value + 1 - *min_value) as u64)
660 } else {
661 (0, 0)
662 };
663 // skip of min_value is 0
664 if i_div_4_start > 0 {
665 i_div_4_less_start_array
666 .par_iter_mut()
667 .for_each(|x| *x -= i_div_4_start as usize);
668 }
669 Ok((
670 i_div_4_less_start_array,
671 i_mod_4_times_2_array,
672 i_div_4_start,
673 i_div_4_len,
674 ))
675}
676
677fn set_up_two_bits_to_value<TVal: From<i8>>(count_a1: bool, missing_value: TVal) -> [TVal; 4] {
678 let homozygous_primary_allele = TVal::from(0); // Major Allele
679 let heterozygous_allele = TVal::from(1);
680 let homozygous_secondary_allele = TVal::from(2); // Minor Allele
681
682 if count_a1 {
683 [
684 homozygous_secondary_allele, // look-up 0
685 missing_value, // look-up 1
686 heterozygous_allele, // look-up 2
687 homozygous_primary_allele, // look-up 3
688 ]
689 } else {
690 [
691 homozygous_primary_allele, // look-up 0
692 missing_value, // look-up 1
693 heterozygous_allele, // look-up 2
694 homozygous_secondary_allele, // look-up 3
695 ]
696 }
697}
698
699// Thanks to Dawid for his dpc-pariter library that makes this function scale.
700// https://dpc.pw/adding-parallelism-to-your-rust-iterators
701#[anyinput]
702fn write_val<S, TVal>(
703 path: AnyPath,
704 val: &nd::ArrayBase<S, nd::Ix2>,
705 is_a1_counted: bool,
706 missing: TVal,
707 num_threads: usize,
708) -> Result<(), Box<BedErrorPlus>>
709where
710 S: nd::Data<Elem = TVal>,
711 TVal: BedVal,
712{
713 let (iid_count, sid_count) = val.dim();
714
715 // 4 genotypes per byte so round up
716 let iid_count_div4_u64 = try_div_4(iid_count, sid_count)?;
717
718 // We create and write to a file.
719 // If there is an error, we will delete it.
720 if let Err(e) = write_internal(
721 path,
722 iid_count_div4_u64,
723 val,
724 is_a1_counted,
725 missing,
726 num_threads,
727 ) {
728 // Clean up the file
729 let _ = fs::remove_file(path);
730 Err(e)
731 } else {
732 Ok(())
733 }
734}
735
736// https://www.reddit.com/r/rust/comments/mo4s8e/difference_between_reference_and_view_in_ndarray/
737#[anyinput]
738fn write_internal<S, TVal>(
739 path: AnyPath,
740 iid_count_div4_u64: u64,
741 val: &nd::ArrayBase<S, nd::Ix2>,
742 is_a1_counted: bool,
743 missing: TVal,
744 num_threads: usize,
745) -> Result<(), Box<BedErrorPlus>>
746where
747 S: nd::Data<Elem = TVal>,
748 TVal: BedVal,
749{
750 let mut writer = BufWriter::new(File::create(path)?);
751 // LATER: If this method is later changed
752 // to support major="individual", be sure to
753 // change write_f64, etc and python function 'to_bed' which
754 // currently uses a work-around.
755 writer.write_all(&[BED_FILE_MAGIC1, BED_FILE_MAGIC2, 0x01])?;
756
757 #[allow(clippy::eq_op)]
758 let use_nan = missing != missing; // generic NAN test
759 let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
760 let two_code = if is_a1_counted { 0u8 } else { 3u8 };
761
762 let homozygous_primary_allele = TVal::from(0); // Major Allele
763 let heterozygous_allele = TVal::from(1);
764 let homozygous_secondary_allele = TVal::from(2); // Minor Allele
765
766 scope(|scope| {
767 val.axis_iter(nd::Axis(1))
768 .parallel_map_scoped(scope, {
769 move |column| {
770 // Convert each column into a bytes_vector
771 let mut bytes_vector: Vec<u8> = vec![0; iid_count_div4_u64 as usize]; // inits to 0
772 process_genomic_slice(
773 &column,
774 &mut bytes_vector,
775 homozygous_primary_allele,
776 heterozygous_allele,
777 homozygous_secondary_allele,
778 zero_code,
779 two_code,
780 use_nan,
781 missing,
782 )?;
783 Ok::<_, Box<BedErrorPlus>>(bytes_vector)
784 }
785 })
786 .threads(num_threads)
787 .try_for_each(|bytes_vector| {
788 // Write the bytes vector, they must be in order.
789 writer.write_all(&bytes_vector?)?;
790 Ok(())
791 })
792 })
793 .map_err(|_e| BedError::PanickedThread())?
794}
795
796#[allow(dead_code)]
797fn encode1<TVal>(
798 in_vector: &ndarray::ArrayView1<TVal>,
799 out_vector: &mut [u8],
800 is_a1_counted: bool,
801 missing: TVal,
802) -> Result<(), Box<BedErrorPlus>>
803where
804 TVal: BedVal,
805{
806 #[allow(clippy::eq_op)]
807 let use_nan = missing != missing; // generic NAN test
808 let zero_code = if is_a1_counted { 3u8 } else { 0u8 };
809 let two_code = if is_a1_counted { 0u8 } else { 3u8 };
810
811 let homozygous_primary_allele: TVal = TVal::from(0); // Major Allele
812 let heterozygous_allele = TVal::from(1);
813 let homozygous_secondary_allele = TVal::from(2); // Minor Allele
814
815 let minor_div4 = in_vector.len().checked_sub(1).map_or(0, |v| v / 4 + 1);
816 if minor_div4 != out_vector.len() {
817 return Err(Box::new(
818 BedError::EncodingLength(minor_div4, out_vector.len()).into(),
819 ));
820 }
821
822 process_genomic_slice(
823 in_vector,
824 out_vector,
825 homozygous_primary_allele,
826 heterozygous_allele,
827 homozygous_secondary_allele,
828 zero_code,
829 two_code,
830 use_nan,
831 missing,
832 )
833}
834
835#[inline]
836#[allow(clippy::eq_op)]
837#[allow(clippy::too_many_arguments)]
838fn encode_genotype_chunk<TVal>(
839 chunk: nd::ArrayView1<TVal>,
840 homozygous_primary_allele: TVal,
841 heterozygous_allele: TVal,
842 homozygous_secondary_allele: TVal,
843 zero_code: u8,
844 two_code: u8,
845 use_nan: bool,
846 missing: TVal,
847) -> Result<u8, BedError>
848where
849 TVal: PartialEq + Copy,
850{
851 // LATER: Think about unrolling this loop in the usual case of 4 elements
852 let mut output_byte = 0u8;
853 for (within_chunk_index, &v0) in chunk.iter().enumerate() {
854 let genotype_code = if v0 == homozygous_primary_allele {
855 zero_code
856 } else if v0 == heterozygous_allele {
857 2
858 } else if v0 == homozygous_secondary_allele {
859 two_code
860 } else if (use_nan && v0 != v0) || (!use_nan && v0 == missing) {
861 1
862 } else {
863 return Err(BedError::BadValue(
864 "Invalid genotype value encountered during encoding.".to_string(),
865 ));
866 };
867
868 output_byte |= genotype_code << (within_chunk_index * 2);
869 }
870 Ok(output_byte)
871}
872
873#[inline]
874#[allow(clippy::eq_op)]
875#[allow(clippy::too_many_arguments)]
876fn process_genomic_slice<TVal>(
877 in_vector: &ndarray::ArrayView1<TVal>,
878 out_vector: &mut [u8],
879 homozygous_primary_allele: TVal,
880 heterozygous_allele: TVal,
881 homozygous_secondary_allele: TVal,
882 zero_code: u8,
883 two_code: u8,
884 use_nan: bool,
885 missing: TVal,
886) -> Result<(), Box<BedErrorPlus>>
887where
888 TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
889{
890 // Calculate the number of full chunks and the remainder
891 let full_chunks = in_vector.len() / 4;
892 let remainder = in_vector.len() % 4;
893
894 // Ensure the output vector is correctly sized
895 assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
896
897 // Zip the exact input chunks with output chunks and process in parallel
898 in_vector
899 .exact_chunks(4)
900 .into_iter()
901 .zip(out_vector.iter_mut())
902 .try_for_each(|(chunk, output_byte)| {
903 *output_byte = encode_genotype_chunk(
904 chunk,
905 homozygous_primary_allele,
906 heterozygous_allele,
907 homozygous_secondary_allele,
908 zero_code,
909 two_code,
910 use_nan,
911 missing,
912 )?;
913 Ok::<(), Box<BedErrorPlus>>(())
914 })?;
915
916 // Process the remainder sequentially if there is any
917 if remainder != 0 {
918 let start = full_chunks * 4;
919 let chunk = in_vector.slice(ndarray::s![start..]);
920 let output_byte = &mut out_vector[full_chunks];
921 *output_byte = encode_genotype_chunk(
922 chunk,
923 homozygous_primary_allele,
924 heterozygous_allele,
925 homozygous_secondary_allele,
926 zero_code,
927 two_code,
928 use_nan,
929 missing,
930 )?;
931 }
932
933 Ok::<(), Box<BedErrorPlus>>(())
934}
935// #[inline]
936// #[allow(clippy::eq_op)]
937// #[allow(clippy::too_many_arguments)]
938// fn process_genomic_slice<TVal>(
939// in_vector: &ndarray::ArrayView1<TVal>,
940// out_vector: &mut [u8],
941// homozygous_primary_allele: TVal,
942// heterozygous_allele: TVal,
943// homozygous_secondary_allele: TVal,
944// zero_code: u8,
945// two_code: u8,
946// use_nan: bool,
947// missing: TVal,
948// ) -> Result<(), Box<BedErrorPlus>>
949// where
950// TVal: PartialEq + Copy + Sync, // Ensure TVal supports equality check and can be copied
951// {
952// // Calculate the number of full chunks and the remainder
953// let full_chunks = in_vector.len() / 4;
954// let remainder = in_vector.len() % 4;
955
956// // Ensure the output vector is correctly sized
957// assert_eq!(out_vector.len(), full_chunks + usize::from(remainder > 0));
958
959// // Zip the exact input chunks with output chunks and process in parallel
960// in_vector
961// .exact_chunks(4)
962// .into_iter()
963// .zip(out_vector.iter_mut())
964// .par_bridge()
965// .try_for_each(|(chunk, output_byte)| {
966// *output_byte = encode_genotype_chunk(
967// chunk,
968// homozygous_primary_allele,
969// heterozygous_allele,
970// homozygous_secondary_allele,
971// zero_code,
972// two_code,
973// use_nan,
974// missing,
975// )?;
976// Ok::<(), Box<BedErrorPlus>>(())
977// })?;
978
979// // Process the remainder sequentially if there is any
980// if remainder != 0 {
981// let start = full_chunks * 4;
982// let chunk = in_vector.slice(ndarray::s![start..]);
983// let output_byte = &mut out_vector[full_chunks];
984// *output_byte = encode_genotype_chunk(
985// chunk,
986// homozygous_primary_allele,
987// heterozygous_allele,
988// homozygous_secondary_allele,
989// zero_code,
990// two_code,
991// use_nan,
992// missing,
993// )?;
994// }
995
996// Ok::<(), Box<BedErrorPlus>>(())
997// }
998
999#[anyinput]
1000fn count_lines(path: AnyPath) -> Result<usize, Box<BedErrorPlus>> {
1001 let file = File::open(path)?;
1002 let reader = BufReader::new(file);
1003 let count = reader.lines().count();
1004 Ok(count)
1005}
1006
1007#[allow(dead_code)]
1008enum Dist {
1009 Unit,
1010 Beta { a: f64, b: f64 },
1011}
1012
1013#[allow(dead_code)]
1014fn impute_and_zero_mean_snps<
1015 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1016>(
1017 val: &mut nd::ArrayViewMut2<'_, T>,
1018 dist: &Dist,
1019 apply_in_place: bool,
1020 use_stats: bool,
1021 stats: &mut nd::ArrayViewMut2<'_, T>,
1022) -> Result<(), Box<BedErrorPlus>> {
1023 let two = T::one() + T::one();
1024
1025 // If output is F-order (or in general if iid stride is no more than sid_stride)
1026 if val.stride_of(nd::Axis(0)) <= val.stride_of(nd::Axis(1)) {
1027 let result_list = nd::Zip::from(val.axis_iter_mut(nd::Axis(1)))
1028 .and(stats.axis_iter_mut(nd::Axis(0)))
1029 .par_map_collect(|mut col, mut stats_row| {
1030 process_sid(
1031 &mut col,
1032 apply_in_place,
1033 use_stats,
1034 &mut stats_row,
1035 dist,
1036 two,
1037 )
1038 });
1039
1040 // Check the result list for errors
1041 result_list
1042 .iter()
1043 .par_bridge()
1044 .try_for_each(|x| (*x).clone())?;
1045
1046 Ok(())
1047 } else {
1048 //If C-order
1049 process_all_iids(val, apply_in_place, use_stats, stats, dist, two)
1050 }
1051}
1052
1053// Later move the other fast-lmm functions into their own package
1054#[allow(dead_code)]
1055fn find_factor<
1056 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1057>(
1058 dist: &Dist,
1059 mean_s: T,
1060 std: T,
1061) -> Result<T, BedError> {
1062 if let Dist::Beta { a, b } = dist {
1063 // Try to create a beta dist
1064 let Ok(beta_dist) = Beta::new(*a, *b) else {
1065 Err(BedError::CannotCreateBetaDist(*a, *b))?
1066 };
1067
1068 // Try to an f64 maf
1069 let mut maf = if let Some(mean_u64) = mean_s.to_f64() {
1070 mean_u64 / 2.0
1071 } else {
1072 Err(BedError::CannotConvertBetaToFromF64)?
1073 };
1074 if maf > 0.5 {
1075 maf = 1.0 - maf;
1076 }
1077
1078 // Try to put the maf in the beta dist
1079 if let Some(b) = T::from_f64(beta_dist.pdf(maf)) {
1080 Ok(b)
1081 } else {
1082 Err(BedError::CannotConvertBetaToFromF64)
1083 }
1084 } else {
1085 Ok(T::one() / std)
1086 }
1087}
1088
1089#[allow(dead_code)]
1090fn process_sid<
1091 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1092>(
1093 col: &mut nd::ArrayViewMut1<'_, T>,
1094 apply_in_place: bool,
1095 use_stats: bool,
1096 stats_row: &mut nd::ArrayViewMut1<'_, T>,
1097 dist: &Dist,
1098 two: T,
1099) -> Result<(), BedError> {
1100 if !use_stats {
1101 let mut n_observed = T::zero();
1102 let mut sum_s = T::zero(); // the sum of a SNP over all observed individuals
1103 let mut sum2_s = T::zero(); // the sum of the squares of the SNP over all observed individuals
1104
1105 for iid_i in 0..col.len() {
1106 let v = col[iid_i];
1107 if !v.is_nan() {
1108 sum_s = sum_s + v;
1109 sum2_s = sum2_s + v * v;
1110 n_observed = n_observed + T::one();
1111 }
1112 }
1113 if n_observed < T::one() {
1114 //LATER make it work (in some form) for n of 0
1115 Err(BedError::NoIndividuals)?;
1116 }
1117 let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1118 let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1119
1120 if mean_s.is_nan()
1121 || (matches!(dist, Dist::Beta { a: _, b: _ })
1122 && ((mean_s > two) || (mean_s < T::zero())))
1123 {
1124 Err(BedError::IllegalSnpMean)?;
1125 }
1126
1127 let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1128
1129 let mut std = variance.sqrt();
1130 if std.is_nan() || std <= T::zero() {
1131 // All "SNPs" have the same value (aka SNC)
1132 std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1133 }
1134
1135 stats_row[0] = mean_s;
1136 stats_row[1] = std;
1137 }
1138
1139 if apply_in_place {
1140 {
1141 let mean_s = stats_row[0];
1142 let std = stats_row[1];
1143 let is_snc = std.is_infinite();
1144
1145 let factor = find_factor(dist, mean_s, std)?;
1146
1147 for iid_i in 0..col.len() {
1148 //check for Missing (NAN) or SNC
1149 if col[iid_i].is_nan() || is_snc {
1150 col[iid_i] = T::zero();
1151 } else {
1152 col[iid_i] = (col[iid_i] - mean_s) * factor;
1153 }
1154 }
1155 }
1156 }
1157 Ok(())
1158}
1159
1160#[allow(dead_code)]
1161fn process_all_iids<
1162 T: Default + Copy + Debug + Sync + Send + Sync + Float + ToPrimitive + FromPrimitive,
1163>(
1164 val: &mut nd::ArrayViewMut2<'_, T>,
1165 apply_in_place: bool,
1166 use_stats: bool,
1167 stats: &mut nd::ArrayViewMut2<'_, T>,
1168 dist: &Dist,
1169 two: T,
1170) -> Result<(), Box<BedErrorPlus>> {
1171 let sid_count = val.dim().1;
1172
1173 if !use_stats {
1174 // O(iid_count * sid_count)
1175 // Serial that respects C-order is 3-times faster than parallel that doesn't
1176 // So we parallelize the inner loop instead of the outer loop
1177 let mut n_observed_array = nd::Array1::<T>::zeros(sid_count);
1178 let mut sum_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of a SNP over all observed individuals
1179 let mut sum2_s_array = nd::Array1::<T>::zeros(sid_count); //the sum of the squares of the SNP over all observed individuals
1180 for row in val.axis_iter(nd::Axis(0)) {
1181 nd::par_azip!((&v in row,
1182 n_observed_ptr in &mut n_observed_array,
1183 sum_s_ptr in &mut sum_s_array,
1184 sum2_s_ptr in &mut sum2_s_array
1185 )
1186 if !v.is_nan() {
1187 *n_observed_ptr = *n_observed_ptr + T::one();
1188 *sum_s_ptr = *sum_s_ptr + v;
1189 *sum2_s_ptr = *sum2_s_ptr + v * v;
1190 }
1191 );
1192 }
1193
1194 // O(sid_count)
1195 let mut result_list: Vec<Result<(), BedError>> = vec![Ok(()); sid_count];
1196 nd::par_azip!((mut stats_row in stats.axis_iter_mut(nd::Axis(0)),
1197 &n_observed in &n_observed_array,
1198 &sum_s in &sum_s_array,
1199 &sum2_s in &sum2_s_array,
1200 result_ptr in &mut result_list)
1201 {
1202 if n_observed < T::one() {
1203 *result_ptr = Err(BedError::NoIndividuals);
1204 return;
1205 }
1206 let mean_s = sum_s / n_observed; //compute the mean over observed individuals for the current SNP
1207 let mean2_s: T = sum2_s / n_observed; //compute the mean of the squared SNP
1208
1209 if mean_s.is_nan()
1210 || (matches!(dist, Dist::Beta { a:_, b:_ }) && ((mean_s > two) || (mean_s < T::zero())))
1211 {
1212 *result_ptr = Err(BedError::IllegalSnpMean);
1213 return;
1214 }
1215
1216 let variance: T = mean2_s - mean_s * mean_s; //By the Cauchy Schwartz inequality this should always be positive
1217 let mut std = variance.sqrt();
1218 if std.is_nan() || std <= T::zero() {
1219 // All "SNPs" have the same value (aka SNC)
1220 std = T::infinity(); //SNCs are still meaning full in QQ plots because they should be thought of as SNPs without enough data.
1221 }
1222 stats_row[0] = mean_s;
1223 stats_row[1] = std;
1224 });
1225 // Check the result list for errors
1226 result_list.par_iter().try_for_each(|x| (*x).clone())?;
1227 }
1228
1229 if apply_in_place {
1230 // O(sid_count)
1231 let mut factor_array = nd::Array1::<T>::zeros(stats.dim().0);
1232
1233 stats
1234 .axis_iter_mut(nd::Axis(0))
1235 .zip(&mut factor_array)
1236 .par_bridge()
1237 .try_for_each(|(stats_row, factor_ptr)| {
1238 match find_factor(dist, stats_row[0], stats_row[1]) {
1239 Err(e) => Err(e),
1240 Ok(factor) => {
1241 *factor_ptr = factor;
1242 Ok(())
1243 }
1244 }
1245 })?;
1246
1247 // O(iid_count * sid_count)
1248 nd::par_azip!((mut row in val.axis_iter_mut(nd::Axis(0)))
1249 {
1250 for sid_i in 0..row.len() {
1251 //check for Missing (NAN) or SNC
1252 if row[sid_i].is_nan() || stats[(sid_i, 1)].is_infinite() {
1253 row[sid_i] = T::zero();
1254 } else {
1255 row[sid_i] = (row[sid_i] - stats[(sid_i, 0)]) * factor_array[sid_i];
1256 }
1257 }
1258 });
1259 }
1260 Ok(())
1261}
1262
1263#[allow(dead_code)]
1264#[anyinput]
1265fn file_b_less_aatbx(
1266 a_filename: AnyPath,
1267 offset: u64,
1268 iid_count: usize,
1269 b1: &mut nd::ArrayViewMut2<'_, f64>,
1270 aatb: &mut nd::ArrayViewMut2<'_, f64>,
1271 atb: &mut nd::ArrayViewMut2<'_, f64>,
1272 log_frequency: usize,
1273) -> Result<(), Box<BedErrorPlus>> {
1274 //speed idea from C++:
1275 //Are copies really needed?
1276 //is F, vc C order the best?
1277 //would bigger snp blocks be better
1278
1279 let (a_sid_count, b_sid_count) = atb.dim();
1280 if log_frequency > 0 {
1281 println!("file_b_less_aatbx: iid_count={iid_count}, {a_sid_count}x{b_sid_count} output");
1282 };
1283
1284 // Open the file and move to the starting sid
1285 let mut buf_reader = BufReader::new(File::open(a_filename)?);
1286 buf_reader.seek(SeekFrom::Start(offset))?;
1287
1288 let mut sid_reuse = vec![f64::NAN; iid_count];
1289 for (a_sid_index, mut atb_row) in atb.axis_iter_mut(nd::Axis(0)).enumerate() {
1290 if log_frequency > 0 && a_sid_index % log_frequency == 0 {
1291 println!(
1292 " working on train_sid_index={a_sid_index} of {a_sid_count} (iid_count={iid_count}, b_sid_count={b_sid_count})"
1293 );
1294 }
1295
1296 buf_reader.read_f64_into::<LittleEndian>(&mut sid_reuse)?;
1297
1298 nd::par_azip!(
1299 (mut atb_element in atb_row.axis_iter_mut(nd::Axis(0)),
1300 b1_col in b1.axis_iter(nd::Axis(1)),
1301 mut aatb_col in aatb.axis_iter_mut(nd::Axis(1)))
1302 {
1303 let mut atbi = 0.0;
1304 for iid_index in 0..iid_count {
1305 atbi += sid_reuse[iid_index] * b1_col[iid_index];
1306 }
1307 atb_element[()] = atbi;
1308 for iid_index in 0..iid_count {
1309 aatb_col[iid_index] -= sid_reuse[iid_index] * atbi;
1310 }
1311 });
1312 }
1313 Ok(())
1314}
1315
1316#[allow(dead_code)]
1317fn read_into_f64(src: &mut BufReader<File>, dst: &mut [f64]) -> std::io::Result<()> {
1318 src.read_f64_into::<LittleEndian>(dst)
1319}
1320
1321#[allow(dead_code)]
1322fn read_into_f32(src: &mut BufReader<File>, dst: &mut [f32]) -> std::io::Result<()> {
1323 src.read_f32_into::<LittleEndian>(dst)
1324}
1325
1326/* Here are Python algorithms that shows how to do a low-memory multiply A (or A.T) x B (or B.T)
1327 They are used by file_ata_piece and file_aat_piece with some optimizations for A and B being the same.
1328
1329output_list = [np.zeros((4,4)) for i in range(4)]
1330
1331# a.T.dot(b)
1332for a_col2 in range(0,4,2): # 1 pass through A, returning output chunk about the same size writing in one pass
1333 buffer_a2 = a[:,a_col2:a_col2+2]
1334 for b_col in range(4): # A1/a1 passes through B
1335 buffer_b = b[:,b_col]
1336 for i in range(4):
1337 b_val = buffer_b[i]
1338 a_slice = buffer_a2[i,:]
1339 for k in range(2): # A1/a1 * A0 passes through the output
1340 output_list[0][a_col2+k,b_col] += a_slice[k]*b_val
1341
1342# a.dot(b.T)
1343for out_col2 in range(0,4,2): # 1 pass through output, returning chunk on each pass
1344 for col in range(4): # O1/o1 passes through A and B
1345 buffer_a = a[:,col]
1346 buffer_b = b[:,col]
1347 for k in range(2):
1348 for i in range(4):
1349 output_list[1][i,out_col2+k] += buffer_a[i]*buffer_b[out_col2+k]
1350
1351# a.T.dot(b.T)
1352for a_col2 in range(0,4,2): # 1 pass through A, returning an output chunk on each pass
1353 buffer_a2 = a[:,a_col2:a_col2+2]
1354 for b_col in range(4):
1355 buffer_b = b[:,b_col]
1356 for i in range(4):
1357 b_val = buffer_b[i]
1358 for k in range(2):
1359 output_list[2][a_col2+k,i] += buffer_a2[b_col,k]*b_val
1360
1361# a.dot(b) - but should instead do (b.T.dot(a.T)).T
1362for b_col2 in range(0,4,2): #Transpose of preceding one
1363 buffer_b2 = b[:,b_col2:b_col2+2]
1364 for a_col in range(4):
1365 buffer_a = a[:,a_col]
1366 for i in range(4):
1367 a_val = buffer_a[i]
1368 for k in range(2):
1369 output_list[3][i,b_col2+k] += buffer_b2[a_col,k]*a_val
1370
1371
1372for output in output_list:
1373 print(output)
1374 */
1375
1376// Given A, a matrix in Fortran order in a file
1377// with row_count rows and col_count columns,
1378// and given a starting column,
1379// returns part of A.T x A, the column vs column product.
1380// The piece piece returned has dimensions
1381// (col_count-col_start) x ncols
1382// where ncols <= (col_count-col_start)
1383// Makes only one pass through the file.
1384#[allow(clippy::too_many_arguments)]
1385#[allow(dead_code)]
1386#[anyinput]
1387fn file_ata_piece<T: Float + Send + Sync + Sync + AddAssign>(
1388 path: AnyPath,
1389 offset: u64,
1390 row_count: usize,
1391 col_count: usize,
1392 col_start: usize,
1393 ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1394 log_frequency: usize,
1395 read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1396) -> Result<(), Box<BedErrorPlus>> {
1397 let (nrows, ncols) = ata_piece.dim();
1398 if (col_start >= col_count)
1399 || (col_start + nrows != col_count)
1400 || (col_start + ncols > col_count)
1401 {
1402 Err(BedError::CannotConvertBetaToFromF64)?;
1403 }
1404
1405 file_ata_piece_internal(
1406 path,
1407 offset,
1408 row_count,
1409 col_start,
1410 ata_piece,
1411 log_frequency,
1412 read_into,
1413 )
1414}
1415
1416#[allow(dead_code)]
1417#[anyinput]
1418fn file_ata_piece_internal<T: Float + Send + Sync + Sync + AddAssign>(
1419 path: AnyPath,
1420 offset: u64,
1421 row_count: usize,
1422 col_start: usize,
1423 ata_piece: &mut nd::ArrayViewMut2<'_, T>,
1424 log_frequency: usize,
1425 read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1426) -> Result<(), Box<BedErrorPlus>> {
1427 let (nrows, ncols) = ata_piece.dim();
1428 if log_frequency > 0 {
1429 println!("file_ata_piece: col_start={col_start}, {nrows}x{ncols} output");
1430 };
1431
1432 // Open the file and move to the starting col
1433 let mut buf_reader = BufReader::new(File::open(path)?);
1434 buf_reader.seek(SeekFrom::Start(
1435 offset + col_start as u64 * row_count as u64 * std::mem::size_of::<T>() as u64,
1436 ))?;
1437
1438 let mut col_save_list: Vec<Vec<T>> = vec![];
1439 let mut col_reuse = vec![T::nan(); row_count];
1440
1441 for (col_rel_index, mut ata_row) in ata_piece.axis_iter_mut(nd::Axis(0)).enumerate() {
1442 if log_frequency > 0 && col_rel_index % log_frequency == 0 {
1443 println!(" working on {col_rel_index} of {nrows}");
1444 }
1445
1446 // Read next col and save if in range
1447 let col = if col_save_list.len() < ncols {
1448 let mut col_save = vec![T::nan(); row_count];
1449 read_into(&mut buf_reader, &mut col_save)?;
1450 col_save_list.push(col_save);
1451 col_save_list.last().unwrap() // unwrap is OK here
1452 } else {
1453 read_into(&mut buf_reader, &mut col_reuse)?;
1454 &col_reuse
1455 };
1456
1457 // Multiple saved sids with new sid
1458 let mut ata_row_trimmed = ata_row.slice_mut(nd::s![..col_save_list.len()]);
1459 nd::par_azip!((
1460 col_in_range in &col_save_list,
1461 mut ata_val in ata_row_trimmed.axis_iter_mut(nd::Axis(0))
1462 )
1463 {
1464 ata_val[()] = col_product(col_in_range, col);
1465 });
1466 }
1467
1468 // Reflect the new product values
1469 for row_index in 0usize..ncols - 1 {
1470 for col_index in row_index..ncols {
1471 ata_piece[(row_index, col_index)] = ata_piece[(col_index, row_index)];
1472 }
1473 }
1474 Ok(())
1475}
1476
1477#[allow(dead_code)]
1478fn col_product<T: Float + AddAssign>(col_i: &[T], col_j: &[T]) -> T {
1479 assert!(col_i.len() == col_j.len()); // real assert
1480 let mut product = T::zero();
1481 for row_index in 0..col_i.len() {
1482 product += col_i[row_index] * col_j[row_index];
1483 }
1484 product
1485}
1486
1487// Given A, a matrix in Fortran order in a file
1488// with row_count rows and col_count columns,
1489// and given a starting column,
1490// returns part of A x A.T, the row vs row product.
1491// The piece piece returned has dimensions
1492// (row_count-row_start) x ncols
1493// where ncols <= (row_count-row_start)
1494// Makes only one pass through the file.
1495#[allow(clippy::too_many_arguments)]
1496#[allow(dead_code)]
1497#[anyinput]
1498fn file_aat_piece<T: Float + Sync + Send + Sync + AddAssign>(
1499 path: AnyPath,
1500 offset: u64,
1501 row_count: usize,
1502 col_count: usize,
1503 row_start: usize,
1504 aat_piece: &mut nd::ArrayViewMut2<'_, T>,
1505 log_frequency: usize,
1506 read_into: fn(&mut BufReader<File>, &mut [T]) -> std::io::Result<()>,
1507) -> Result<(), Box<BedErrorPlus>> {
1508 let (nrows, ncols) = aat_piece.dim();
1509
1510 if log_frequency > 0 {
1511 println!("file_aat_piece: row_start={row_start}, {nrows}x{ncols} output");
1512 };
1513
1514 if (row_start >= row_count)
1515 || (row_start + nrows != row_count)
1516 || (row_start + ncols > row_count)
1517 {
1518 Err(BedError::CannotConvertBetaToFromF64)?;
1519 }
1520
1521 aat_piece.fill(T::zero());
1522
1523 // Open the file and move to the starting col
1524 let mut buf_reader = BufReader::new(File::open(path)?);
1525
1526 let mut col = vec![T::nan(); row_count - row_start];
1527
1528 for col_index in 0..col_count {
1529 if log_frequency > 0 && col_index % log_frequency == 0 {
1530 println!(" working on {col_index} of {col_count}");
1531 }
1532
1533 // Read next col
1534 buf_reader.seek(SeekFrom::Start(
1535 offset + (col_index * row_count + row_start) as u64 * std::mem::size_of::<T>() as u64,
1536 ))?;
1537 read_into(&mut buf_reader, &mut col)?;
1538
1539 nd::par_azip!(
1540 (index row_index1,
1541 mut aat_col in aat_piece.axis_iter_mut(nd::Axis(1))
1542 )
1543 {
1544 let val1 = col[row_index1];
1545 for row_index0 in row_index1..nrows {
1546 aat_col[row_index0] += val1 * col[row_index0];
1547 }
1548 });
1549 }
1550
1551 // Notice that ata reflects and aat doesn't. They don't need
1552 // to be the same, but they could be.
1553 Ok(())
1554}
1555
1556// References: https://www.youtube.com/watch?v=0zOg8_B71gE&t=22s
1557// https://deterministic.space/elegant-apis-in-rust.html
1558// https://rust-lang.github.io/api-guidelines/
1559// https://ricardomartins.cc/2016/08/03/convenient_and_idiomatic_conversions_in_rust
1560
1561/// Represents the metadata from PLINK .fam and .bim files.
1562///
1563/// Construct with [`Metadata::builder`](struct.Metadata.html#method.builder) or [`Metadata::new`](struct.Metadata.html#method.new).
1564///
1565/// # Example
1566///
1567/// Extract metadata from a file.
1568/// Create a random file with the same metadata.
1569/// ```
1570/// use ndarray as nd;
1571/// use bed_reader::{Bed, WriteOptions, sample_bed_file};
1572/// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
1573///
1574/// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
1575/// let metadata = bed.metadata()?;
1576/// let shape = bed.dim()?;
1577///
1578/// let mut rng = StdRng::seed_from_u64(0);
1579/// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
1580///
1581/// let temp_out = temp_testdir::TempDir::default();
1582/// let output_file = temp_out.join("random.bed");
1583/// WriteOptions::builder(output_file)
1584/// .metadata(&metadata)
1585/// .missing_value(-1)
1586/// .write(&val)?;
1587/// # use bed_reader::BedErrorPlus;
1588/// # Ok::<(), Box<BedErrorPlus>>(())
1589/// ```
1590#[derive(Clone, Debug, Builder, PartialEq)]
1591#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1592pub struct Metadata {
1593 #[builder(setter(custom))]
1594 #[builder(default = "None")]
1595 fid: Option<Rc<nd::Array1<String>>>,
1596 #[builder(setter(custom))]
1597 #[builder(default = "None")]
1598 iid: Option<Rc<nd::Array1<String>>>,
1599 #[builder(setter(custom))]
1600 #[builder(default = "None")]
1601 father: Option<Rc<nd::Array1<String>>>,
1602 #[builder(setter(custom))]
1603 #[builder(default = "None")]
1604 mother: Option<Rc<nd::Array1<String>>>,
1605
1606 // i32 based on https://www.cog-genomics.org/plink2/formats#bim
1607 #[builder(setter(custom))]
1608 #[builder(default = "None")]
1609 sex: Option<Rc<nd::Array1<i32>>>,
1610 #[builder(setter(custom))]
1611 #[builder(default = "None")]
1612 pheno: Option<Rc<nd::Array1<String>>>,
1613
1614 #[builder(setter(custom))]
1615 #[builder(default = "None")]
1616 chromosome: Option<Rc<nd::Array1<String>>>,
1617 #[builder(setter(custom))]
1618 #[builder(default = "None")]
1619 sid: Option<Rc<nd::Array1<String>>>,
1620 #[builder(setter(custom))]
1621 #[builder(default = "None")]
1622 cm_position: Option<Rc<nd::Array1<f32>>>,
1623 #[builder(setter(custom))]
1624 #[builder(default = "None")]
1625 bp_position: Option<Rc<nd::Array1<i32>>>,
1626 #[builder(setter(custom))]
1627 #[builder(default = "None")]
1628 allele_1: Option<Rc<nd::Array1<String>>>,
1629 #[builder(setter(custom))]
1630 #[builder(default = "None")]
1631 allele_2: Option<Rc<nd::Array1<String>>>,
1632}
1633
1634fn lazy_or_skip_count<T>(array: Option<&Rc<nd::Array1<T>>>) -> Option<usize> {
1635 array.map(|array| array.len())
1636}
1637
1638/// Represents a PLINK .bed file that is open for reading genotype data and metadata.
1639///
1640/// Construct with [`Bed::new`](struct.Bed.html#method.new) or [`Bed::builder`](struct.Bed.html#method.builder).
1641///
1642/// > For reading cloud files, see [`BedCloud`](struct.BedCloud.html).
1643///
1644/// # Example
1645///
1646/// Open a file for reading. Then, read the individual (sample) ids
1647/// and all the genotype data.
1648/// ```
1649/// use ndarray as nd;
1650/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
1651/// use bed_reader::assert_eq_nan;
1652///
1653/// let file_name = sample_bed_file("small.bed")?;
1654/// let mut bed = Bed::new(file_name)?;
1655/// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1656/// let val = ReadOptions::builder().f64().read(&mut bed)?;
1657///
1658/// assert_eq_nan(
1659/// &val,
1660/// &nd::array![
1661/// [1.0, 0.0, f64::NAN, 0.0],
1662/// [2.0, 0.0, f64::NAN, 2.0],
1663/// [0.0, 1.0, 2.0, 0.0]
1664/// ],
1665/// );
1666/// # use bed_reader::BedErrorPlus;
1667/// # Ok::<(), Box<BedErrorPlus>>(())
1668/// ```
1669#[derive(Clone, Debug, Builder)]
1670#[builder(build_fn(private, name = "build_no_file_check", error = "BedErrorPlus"))]
1671pub struct Bed {
1672 // https://stackoverflow.com/questions/32730714/what-is-the-right-way-to-store-an-immutable-path-in-a-struct
1673 // don't emit a setter, but keep the field declaration on the builder
1674 /// The file name or path of the .bed file.
1675 #[builder(setter(custom))]
1676 path: PathBuf,
1677
1678 #[builder(setter(custom))]
1679 #[builder(default = "None")]
1680 fam_path: Option<PathBuf>,
1681
1682 #[builder(setter(custom))]
1683 #[builder(default = "None")]
1684 bim_path: Option<PathBuf>,
1685
1686 #[builder(setter(custom))]
1687 #[builder(default = "true")]
1688 is_checked_early: bool,
1689
1690 #[builder(setter(custom))]
1691 #[builder(default = "None")]
1692 iid_count: Option<usize>,
1693
1694 #[builder(setter(custom))]
1695 #[builder(default = "None")]
1696 sid_count: Option<usize>,
1697
1698 #[builder(setter(custom))]
1699 metadata: Metadata,
1700
1701 #[builder(setter(custom))]
1702 skip_set: HashSet<MetadataFields>,
1703}
1704
1705/// All Metadata fields.
1706///
1707/// Used by [`Metadata::read_fam`](struct.Metadata.html#method.read_fam) and
1708/// [`Metadata::read_bim`](struct.Metadata.html#method.read_bim) to skip reading
1709/// specified metadata fields.
1710#[derive(Debug, PartialEq, Eq, Copy, Clone, Ord, PartialOrd, Hash)]
1711pub enum MetadataFields {
1712 #[allow(missing_docs)]
1713 Fid,
1714 #[allow(missing_docs)]
1715 Iid,
1716 #[allow(missing_docs)]
1717 Father,
1718 #[allow(missing_docs)]
1719 Mother,
1720 #[allow(missing_docs)]
1721 Sex,
1722 #[allow(missing_docs)]
1723 Pheno,
1724 #[allow(missing_docs)]
1725 Chromosome,
1726 #[allow(missing_docs)]
1727 Sid,
1728 #[allow(missing_docs)]
1729 CmPosition,
1730 #[allow(missing_docs)]
1731 BpPosition,
1732 #[allow(missing_docs)]
1733 Allele1,
1734 #[allow(missing_docs)]
1735 Allele2,
1736}
1737
1738impl BedBuilder {
1739 #[anyinput]
1740 fn new(path: AnyPath) -> Self {
1741 Self {
1742 path: Some(path.to_owned()),
1743 fam_path: None,
1744 bim_path: None,
1745
1746 is_checked_early: None,
1747 iid_count: None,
1748 sid_count: None,
1749
1750 metadata: Some(Metadata::new()),
1751 skip_set: Some(HashSet::new()),
1752 }
1753 }
1754
1755 /// Create a [`Bed`](struct.Bed.html) from the builder.
1756 ///
1757 /// > See [`Bed::builder`](struct.Bed.html#method.builder) for more details and examples.
1758 pub fn build(&self) -> Result<Bed, Box<BedErrorPlus>> {
1759 let mut bed = self.build_no_file_check()?;
1760
1761 if bed.is_checked_early {
1762 open_and_check(&bed.path)?;
1763 }
1764
1765 (bed.iid_count, bed.sid_count) = bed.metadata.check_counts(bed.iid_count, bed.sid_count)?;
1766
1767 Ok(bed)
1768 }
1769
1770 // https://stackoverflow.com/questions/38183551/concisely-initializing-a-vector-of-strings
1771 // https://stackoverflow.com/questions/65250496/how-to-convert-intoiteratoritem-asrefstr-to-iteratoritem-str-in-rust
1772
1773 /// Override the family id (fid) values found in the .fam file.
1774 ///
1775 /// By default, if fid values are needed and haven't already been found,
1776 /// they will be read from the .fam file.
1777 /// Providing them here avoids that file read and provides a way to give different values.
1778 #[anyinput]
1779 #[must_use]
1780 pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
1781 // Unwrap will always work because BedBuilder starting with some metadata
1782 self.metadata.as_mut().unwrap().set_fid(fid);
1783 self
1784 }
1785
1786 /// Override the individual id (iid) values found in the .fam file.
1787 ///
1788 /// By default, if iid values are needed and haven't already been found,
1789 /// they will be read from the .fam file.
1790 /// Providing them here avoids that file read and provides a way to give different values.
1791 /// ```
1792 /// use ndarray as nd;
1793 /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
1794 /// let file_name = sample_bed_file("small.bed")?;
1795 /// use bed_reader::ReadOptions;
1796 ///
1797 /// let mut bed = Bed::builder(file_name)
1798 /// .iid(["sample1", "sample2", "sample3"])
1799 /// .build()?;
1800 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1801 /// # use bed_reader::BedErrorPlus;
1802 /// # Ok::<(), Box<BedErrorPlus>>(())
1803 /// ```
1804 #[anyinput]
1805 #[must_use]
1806 pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
1807 // Unwrap will always work because BedBuilder starting with some metadata
1808 self.metadata.as_mut().unwrap().set_iid(iid);
1809 self
1810 }
1811
1812 /// Override the father values found in the .fam file.
1813 ///
1814 /// By default, if father values are needed and haven't already been found,
1815 /// they will be read from the .fam file.
1816 /// Providing them here avoids that file read and provides a way to gi&ve different values.
1817 #[anyinput]
1818 #[must_use]
1819 pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
1820 // Unwrap will always work because BedBuilder starting with some metadata
1821 self.metadata.as_mut().unwrap().set_father(father);
1822 self
1823 }
1824
1825 /// Override the mother values found in the .fam file.
1826 ///
1827 /// By default, if mother values are needed and haven't already been found,
1828 /// they will be read from the .fam file.
1829 /// Providing them here avoids that file read and provides a way to give different values.
1830 #[anyinput]
1831 #[must_use]
1832 pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
1833 // Unwrap will always work because BedBuilder starting with some metadata
1834 self.metadata.as_mut().unwrap().set_mother(mother);
1835 self
1836 }
1837
1838 /// Override the sex values found in the .fam file.
1839 ///
1840 /// By default, if sex values are needed and haven't already been found,
1841 /// they will be read from the .fam file.
1842 /// Providing them here avoids that file read and provides a way to give different values.
1843 #[anyinput]
1844 #[must_use]
1845 pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
1846 // Unwrap will always work because BedBuilder starting with some metadata
1847 self.metadata.as_mut().unwrap().set_sex(sex);
1848 self
1849 }
1850
1851 /// Override the phenotype values found in the .fam file.
1852 ///
1853 /// Note that the phenotype values in the .fam file are seldom used.
1854 /// By default, if phenotype values are needed and haven't already been found,
1855 /// they will be read from the .fam file.
1856 /// Providing them here avoids that file read and provides a way to give different values.
1857 #[anyinput]
1858 #[must_use]
1859 pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
1860 // Unwrap will always work because BedBuilder starting with some metadata
1861 self.metadata.as_mut().unwrap().set_pheno(pheno);
1862 self
1863 }
1864
1865 /// Override the chromosome values found in the .bim file.
1866 ///
1867 /// By default, if chromosome values are needed and haven't already been found,
1868 /// they will be read from the .bim file.
1869 /// Providing them here avoids that file read and provides a way to give different values.
1870 #[anyinput]
1871 #[must_use]
1872 pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
1873 // Unwrap will always work because BedBuilder starting with some metadata
1874 self.metadata.as_mut().unwrap().set_chromosome(chromosome);
1875 self
1876 }
1877
1878 /// Override the SNP id (sid) values found in the .fam file.
1879 ///
1880 /// By default, if sid values are needed and haven't already been found,
1881 /// they will be read from the .bim file.
1882 /// Providing them here avoids that file read and provides a way to give different values.
1883 /// ```
1884 /// use ndarray as nd;
1885 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
1886 /// let file_name = sample_bed_file("small.bed")?;
1887 ///
1888 /// let mut bed = Bed::builder(file_name)
1889 /// .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
1890 /// .build()?;
1891 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["SNP1", "SNP2", "SNP3", "SNP4"]
1892 /// # use bed_reader::BedErrorPlus;
1893 /// # Ok::<(), Box<BedErrorPlus>>(())
1894 /// ```
1895 #[anyinput]
1896 #[must_use]
1897 pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
1898 self.metadata.as_mut().unwrap().set_sid(sid);
1899 self
1900 }
1901
1902 /// Override the centimorgan position values found in the .bim file.
1903 ///
1904 /// By default, if centimorgan position values are needed and haven't already been found,
1905 /// they will be read from the .bim file.
1906 /// Providing them here avoids that file read and provides a way to give different values.
1907 #[anyinput]
1908 #[must_use]
1909 pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
1910 // Unwrap will always work because BedBuilder starting with some metadata
1911 self.metadata.as_mut().unwrap().set_cm_position(cm_position);
1912 self
1913 }
1914
1915 /// Override the base-pair position values found in the .bim file.
1916 ///
1917 /// By default, if base-pair position values are needed and haven't already been found,
1918 /// they will be read from the .bim file.
1919 /// Providing them here avoids that file read and provides a way to give different values.
1920 #[anyinput]
1921 #[must_use]
1922 pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
1923 // Unwrap will always work because BedBuilder starting with some metadata
1924 self.metadata.as_mut().unwrap().set_bp_position(bp_position);
1925 self
1926 }
1927
1928 /// Override the allele 1 values found in the .bim file.
1929 ///
1930 /// By default, if allele 1 values are needed and haven't already been found,
1931 /// they will be read from the .bim file.
1932 /// Providing them here avoids that file read and provides a way to give different values.
1933 #[anyinput]
1934 #[must_use]
1935 pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
1936 // Unwrap will always work because BedBuilder starting with some metadata
1937 self.metadata.as_mut().unwrap().set_allele_1(allele_1);
1938 self
1939 }
1940
1941 /// Override the allele 2 values found in the .bim file.
1942 ///
1943 /// By default, if allele 2 values are needed and haven't already been found,
1944 /// they will be read from the .bim file.
1945 /// Providing them here avoids that file read and provides a way to give different values.
1946 #[anyinput]
1947 #[must_use]
1948 pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
1949 // Unwrap will always work because BedBuilder starting with some metadata
1950 self.metadata.as_mut().unwrap().set_allele_2(allele_2);
1951 self
1952 }
1953
1954 /// Set the number of individuals (samples) in the data.
1955 ///
1956 /// By default, if this number is needed, it will be found
1957 /// and remembered
1958 /// by opening the .fam file and quickly counting the number
1959 /// of lines. Providing the number thus avoids a file read.
1960 #[must_use]
1961 pub fn iid_count(mut self, count: usize) -> Self {
1962 self.iid_count = Some(Some(count));
1963 self
1964 }
1965
1966 /// Set the number of SNPs in the data.
1967 ///
1968 /// By default, if this number is needed, it will be found
1969 /// and remembered
1970 /// by opening the .bim file and quickly counting the number
1971 /// of lines. Providing the number thus avoids a file read.
1972 #[must_use]
1973 pub fn sid_count(mut self, count: usize) -> Self {
1974 self.sid_count = Some(Some(count));
1975 self
1976 }
1977
1978 /// Don't check the header of the .bed file until and unless the file is actually read.
1979 ///
1980 /// By default, when a [`Bed`](struct.Bed.html) struct is created, the .bed
1981 /// file header is checked. This stops that early check.
1982 #[must_use]
1983 pub fn skip_early_check(mut self) -> Self {
1984 self.is_checked_early = Some(false);
1985 self
1986 }
1987
1988 /// Set the path to the .fam file.
1989 ///
1990 /// If not set, the .fam file will be assumed
1991 /// to have the same name as the .bed file, but with the extension .fam.
1992 ///
1993 /// # Example:
1994 /// Read .bed, .fam, and .bim files with non-standard names.
1995 /// ```
1996 /// use bed_reader::{Bed, ReadOptions, sample_files};
1997 /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
1998 /// let mut bed = Bed::builder(&deb_maf_mib[0])
1999 /// .fam_path(&deb_maf_mib[1])
2000 /// .bim_path(&deb_maf_mib[2])
2001 /// .build()?;
2002 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2003 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2004 /// # use bed_reader::BedErrorPlus;
2005 /// # Ok::<(), Box<BedErrorPlus>>(())
2006 /// ```
2007 #[anyinput]
2008 #[must_use]
2009 pub fn fam_path(mut self, path: AnyPath) -> Self {
2010 self.fam_path = Some(Some(path.to_owned()));
2011 self
2012 }
2013
2014 /// Set the path to the .bim file.
2015 ///
2016 /// If not set, the .bim file will be assumed
2017 /// to have the same name as the .bed file, but with the extension .bim.
2018 ///
2019 /// # Example:
2020 /// Read .bed, .fam, and .bim files with non-standard names.
2021 /// ```
2022 /// use bed_reader::{Bed, ReadOptions, sample_files};
2023 /// let deb_maf_mib = sample_files(["small.deb", "small.maf", "small.mib"])?;
2024 /// let mut bed = Bed::builder(&deb_maf_mib[0])
2025 /// .fam_path(&deb_maf_mib[1])
2026 /// .bim_path(&deb_maf_mib[2])
2027 /// .build()?;
2028 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2029 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
2030 /// # use bed_reader::BedErrorPlus;
2031 /// # Ok::<(), Box<BedErrorPlus>>(())
2032 /// ```
2033 #[must_use]
2034 #[anyinput]
2035 pub fn bim_path(mut self, path: AnyPath) -> Self {
2036 self.bim_path = Some(Some(path.to_owned()));
2037 self
2038 }
2039
2040 /// Don't read the fid information from the .fam file.
2041 ///
2042 /// By default, when the .fam is read, the fid (the family id) is recorded.
2043 /// This stops that recording. This is useful if the fid is not needed.
2044 /// Asking for the fid after skipping it results in an error.
2045 #[must_use]
2046 pub fn skip_fid(mut self) -> Self {
2047 // Unwrap will always work because BedBuilder starting with some skip_set
2048 self.skip_set.as_mut().unwrap().insert(MetadataFields::Fid);
2049 self
2050 }
2051
2052 /// Don't read the iid information from the .fam file.
2053 ///
2054 /// By default, when the .fam is read, the iid (the individual id) is recorded.
2055 /// This stops that recording. This is useful if the iid is not needed.
2056 /// Asking for the iid after skipping it results in an error.
2057 #[must_use]
2058 pub fn skip_iid(mut self) -> Self {
2059 // Unwrap will always work because BedBuilder starting with some skip_set
2060 self.skip_set.as_mut().unwrap().insert(MetadataFields::Iid);
2061 self
2062 }
2063
2064 /// Don't read the father information from the .fam file.
2065 ///
2066 /// By default, when the .fam is read, the father id is recorded.
2067 /// This stops that recording. This is useful if the father id is not needed.
2068 /// Asking for the father id after skipping it results in an error.
2069 #[must_use]
2070 pub fn skip_father(mut self) -> Self {
2071 // Unwrap will always work because BedBuilder starting with some skip_set
2072 self.skip_set
2073 .as_mut()
2074 .unwrap()
2075 .insert(MetadataFields::Father);
2076 self
2077 }
2078
2079 /// Don't read the mother information from the .fam file.
2080 ///
2081 /// By default, when the .fam is read, the mother id is recorded.
2082 /// This stops that recording. This is useful if the mother id is not needed.
2083 /// Asking for the mother id after skipping it results in an error.
2084 #[must_use]
2085 pub fn skip_mother(mut self) -> Self {
2086 // Unwrap will always work because BedBuilder starting with some skip_set
2087 self.skip_set
2088 .as_mut()
2089 .unwrap()
2090 .insert(MetadataFields::Mother);
2091 self
2092 }
2093
2094 /// Don't read the sex information from the .fam file.
2095 ///
2096 /// By default, when the .fam is read, the sex is recorded.
2097 /// This stops that recording. This is useful if sex is not needed.
2098 /// Asking for sex after skipping it results in an error.
2099 #[must_use]
2100 pub fn skip_sex(mut self) -> Self {
2101 // Unwrap will always work because BedBuilder starting with some skip_set
2102 self.skip_set.as_mut().unwrap().insert(MetadataFields::Sex);
2103 self
2104 }
2105
2106 /// Don't read the phenotype information from the .fam file.
2107 ///
2108 /// Note that the phenotype information in the .fam file is
2109 /// seldom used.
2110 ///
2111 /// By default, when the .fam is read, the phenotype is recorded.
2112 /// This stops that recording. This is useful if this phenotype
2113 /// information is not needed.
2114 /// Asking for the phenotype after skipping it results in an error.
2115 #[must_use]
2116 pub fn skip_pheno(mut self) -> Self {
2117 // Unwrap will always work because BedBuilder starting with some skip_set
2118 self.skip_set
2119 .as_mut()
2120 .unwrap()
2121 .insert(MetadataFields::Pheno);
2122 self
2123 }
2124
2125 /// Don't read the chromosome information from the .bim file.
2126 ///
2127 /// By default, when the .bim is read, the chromosome is recorded.
2128 /// This stops that recording. This is useful if the chromosome is not needed.
2129 /// Asking for the chromosome after skipping it results in an error.
2130 #[must_use]
2131 pub fn skip_chromosome(mut self) -> Self {
2132 // Unwrap will always work because BedBuilder starting with some skip_set
2133 self.skip_set
2134 .as_mut()
2135 .unwrap()
2136 .insert(MetadataFields::Chromosome);
2137 self
2138 }
2139
2140 /// Don't read the SNP id information from the .bim file.
2141 ///
2142 /// By default, when the .bim is read, the sid (SNP id) is recorded.
2143 /// This stops that recording. This is useful if the sid is not needed.
2144 /// Asking for the sid after skipping it results in an error.
2145 #[must_use]
2146 pub fn skip_sid(mut self) -> Self {
2147 // Unwrap will always work because BedBuilder starting with some skip_set
2148 self.skip_set.as_mut().unwrap().insert(MetadataFields::Sid);
2149 self
2150 }
2151
2152 /// Don't read the centimorgan position information from the .bim file.
2153 ///
2154 /// By default, when the .bim is read, the cm position is recorded.
2155 /// This stops that recording. This is useful if the cm position is not needed.
2156 /// Asking for the cm position after skipping it results in an error.
2157 #[must_use]
2158 pub fn skip_cm_position(mut self) -> Self {
2159 // Unwrap will always work because BedBuilder starting with some skip_set
2160 self.skip_set
2161 .as_mut()
2162 .unwrap()
2163 .insert(MetadataFields::CmPosition);
2164 self
2165 }
2166
2167 /// Don't read the base-pair position information from the .bim file.
2168 ///
2169 /// By default, when the .bim is read, the bp position is recorded.
2170 /// This stops that recording. This is useful if the bp position is not needed.
2171 /// Asking for the cp position after skipping it results in an error.
2172 #[must_use]
2173 pub fn skip_bp_position(mut self) -> Self {
2174 // Unwrap will always work because BedBuilder starting with some skip_set
2175 self.skip_set
2176 .as_mut()
2177 .unwrap()
2178 .insert(MetadataFields::BpPosition);
2179 self
2180 }
2181
2182 /// Don't read the allele 1 information from the .bim file.
2183 ///
2184 /// By default, when the .bim is read, allele 1 is recorded.
2185 /// This stops that recording. This is useful if allele 1 is not needed.
2186 /// Asking for allele 1 after skipping it results in an error.
2187 #[must_use]
2188 pub fn skip_allele_1(mut self) -> Self {
2189 // Unwrap will always work because BedBuilder starting with some skip_set
2190 self.skip_set
2191 .as_mut()
2192 .unwrap()
2193 .insert(MetadataFields::Allele1);
2194 self
2195 }
2196
2197 /// Don't read the allele 2 information from the .bim file.
2198 ///
2199 /// By default, when the .bim is read, allele 2 is recorded.
2200 /// This stops that recording. This is useful if allele 2 is not needed.
2201 /// Asking for allele 2 after skipping it results in an error.
2202 #[must_use]
2203 pub fn skip_allele_2(mut self) -> Self {
2204 // Unwrap will always work because BedBuilder starting with some skip_set
2205 self.skip_set
2206 .as_mut()
2207 .unwrap()
2208 .insert(MetadataFields::Allele2);
2209 self
2210 }
2211
2212 /// Override the metadata in the .fam and .bim files with info merged in from a [`Metadata`](struct.Metadata.html).
2213 ///
2214 /// # Example
2215 ///
2216 /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
2217 /// and sid arrays. Next, we use [`BedBuilder`](struct.BedBuilder.html) to override the fid array
2218 /// and an iid array. Then, we add the metadata to the [`BedBuilder`](struct.BedBuilder.html),
2219 /// overwriting iid (again) and overriding sid. Finally, we print these
2220 /// three arrays and chromosome. Chromosome was never overridden so
2221 /// it is read from the *.bim file.
2222 ///```
2223 /// use ndarray as nd;
2224 /// use bed_reader::{Bed, Metadata, sample_bed_file};
2225 ///
2226 /// let file_name = sample_bed_file("small.bed")?;
2227 /// let metadata = Metadata::builder()
2228 /// .iid(["i1", "i2", "i3"])
2229 /// .sid(["s1", "s2", "s3", "s4"])
2230 /// .build()?;
2231 /// let mut bed = Bed::builder(file_name)
2232 /// .fid(["f1", "f2", "f3"])
2233 /// .iid(["x1", "x2", "x3"])
2234 /// .metadata(&metadata)
2235 /// .build()?;
2236 /// println!("{0:?}", bed.fid()?); // Outputs ndarray ["f1", "f2", "f3"]
2237 /// println!("{0:?}", bed.iid()?); // Outputs ndarray ["i1", "i2", "i3"]
2238 /// println!("{0:?}", bed.sid()?); // Outputs ndarray ["s1", "s2", "s3", "s4"]
2239 /// println!("{0:?}", bed.chromosome()?); // Outputs ndarray ["1", "1", "5", "Y"]
2240 /// # use bed_reader::BedErrorPlus;
2241 /// # Ok::<(), Box<BedErrorPlus>>(())
2242 /// ```
2243 #[must_use]
2244 pub fn metadata(mut self, metadata: &Metadata) -> Self {
2245 self.metadata = Some(
2246 Metadata::builder()
2247 .metadata(&self.metadata.unwrap()) // unwrap is ok because we know we have metadata
2248 .metadata(metadata) // consistent counts will be check later by the BedBuilder
2249 .build_no_file_check()
2250 .unwrap(), // unwrap is ok because nothing can go wrong
2251 );
2252
2253 self
2254 }
2255}
2256
2257#[anyinput]
2258fn to_metadata_path(
2259 bed_path: AnyPath,
2260 metadata_path: Option<&PathBuf>,
2261 extension: AnyString,
2262) -> PathBuf {
2263 if let Some(metadata_path) = metadata_path {
2264 metadata_path.to_owned()
2265 } else {
2266 bed_path.with_extension(extension)
2267 }
2268}
2269
2270impl Bed {
2271 /// Attempts to open a local PLINK .bed file for reading. Supports options.
2272 ///
2273 /// > Also see [`Bed::new`](struct.Bed.html#method.new), which does not support options.
2274 /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2275 ///
2276 /// The options, [listed here](struct.BedBuilder.html#implementations), can:
2277 /// * set the path of the .fam and/or .bim file
2278 /// * override some metadata, for example, replace the individual ids.
2279 /// * set the number of individuals (samples) or SNPs (variants)
2280 /// * control checking the validity of the .bed file's header
2281 /// * skip reading selected metadata
2282 ///
2283 /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2284 /// will not necessarily lock the file(s).
2285 ///
2286 /// # Errors
2287 /// By default, this method will return an error if the file is missing or its header
2288 /// is ill-formed. It will also return an error if the options contradict each other.
2289 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2290 /// for all possible errors.
2291 ///
2292 /// # Examples
2293 /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2294 /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2295 /// then [`read`](struct.Bed.html#method.read) the whole file.
2296 ///
2297 /// ```
2298 /// use ndarray as nd;
2299 /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2300 ///
2301 /// let file_name = sample_bed_file("small.bed")?;
2302 /// let mut bed = Bed::builder(file_name).build()?;
2303 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2304 /// println!("{:?}", bed.sid()?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
2305 /// let val = bed.read::<f64>()?;
2306 ///
2307 /// assert_eq_nan(
2308 /// &val,
2309 /// &nd::array![
2310 /// [1.0, 0.0, f64::NAN, 0.0],
2311 /// [2.0, 0.0, f64::NAN, 2.0],
2312 /// [0.0, 1.0, 2.0, 0.0]
2313 /// ],
2314 /// );
2315 /// # use bed_reader::BedErrorPlus;
2316 /// # Ok::<(), Box<BedErrorPlus>>(())
2317 /// ```
2318 ///
2319 /// Replace [`iid`](struct.Bed.html#method.iid).
2320 /// ```
2321 /// # use ndarray as nd;
2322 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2323 /// # let file_name = sample_bed_file("small.bed")?;
2324 /// let mut bed = Bed::builder(file_name)
2325 /// .iid(["sample1", "sample2", "sample3"])
2326 /// .build()?;
2327 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["sample1", "sample2", "sample3"]
2328 /// # use bed_reader::BedErrorPlus;
2329 /// # Ok::<(), Box<BedErrorPlus>>(())
2330 /// ```
2331 /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
2332 /// .bim files need never be opened.
2333 /// ```
2334 /// # use ndarray as nd;
2335 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2336 /// # let file_name = sample_bed_file("small.bed")?;
2337 /// let mut bed = Bed::builder(file_name).iid_count(3).sid_count(4).build()?;
2338 /// let val = bed.read::<f64>()?;
2339 ///
2340 /// assert_eq_nan(
2341 /// &val,
2342 /// &nd::array![
2343 /// [1.0, 0.0, f64::NAN, 0.0],
2344 /// [2.0, 0.0, f64::NAN, 2.0],
2345 /// [0.0, 1.0, 2.0, 0.0]
2346 /// ],
2347 /// );
2348 /// # use bed_reader::BedErrorPlus;
2349 /// # Ok::<(), Box<BedErrorPlus>>(())
2350 /// ```
2351 /// Mark some properties as "don’t read or offer".
2352 /// ```
2353 /// # use ndarray as nd;
2354 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2355 /// # let file_name = sample_bed_file("small.bed")?;
2356 /// let mut bed = Bed::builder(file_name)
2357 /// .skip_father()
2358 /// .skip_mother()
2359 /// .skip_sex()
2360 /// .skip_pheno()
2361 /// .skip_allele_1()
2362 /// .skip_allele_2()
2363 /// .build()?;
2364 /// println!("{:?}", bed.iid()?); // Outputs ndarray ["iid1", "iid2", "iid3"]
2365 /// bed.allele_2().expect_err("Can't be read");
2366 /// # use bed_reader::BedErrorPlus;
2367 /// # Ok::<(), Box<BedErrorPlus>>(())
2368 /// ```
2369 ///
2370 #[anyinput]
2371 pub fn builder(path: AnyPath) -> BedBuilder {
2372 BedBuilder::new(path)
2373 }
2374
2375 /// Attempts to open a local PLINK .bed file for reading. Does not support options.
2376 ///
2377 /// > Also see [`Bed::builder`](struct.Bed.html#method.builder), which does support options.
2378 /// > For reading from the cloud, see [`BedCloud`](struct.BedCloud.html).
2379 ///
2380 /// Note that this method is a lazy about holding files, so unlike `std::fs::File::open(&path)`, it
2381 /// will not necessarily lock the file(s).
2382 ///
2383 /// # Errors
2384 /// By default, this method will return an error if the file is missing or its header
2385 /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2386 /// for all possible errors.
2387 ///
2388 /// # Examples
2389 /// List individual (sample) [`iid`](struct.Bed.html#method.iid) and
2390 /// SNP (variant) [`sid`](struct.Bed.html#method.sid),
2391 /// then [`read`](struct.Bed.html#method.read) the whole file.
2392 ///
2393 /// ```
2394 /// use ndarray as nd;
2395 /// use bed_reader::{Bed, assert_eq_nan, sample_bed_file};
2396 ///
2397 /// let file_name = sample_bed_file("small.bed")?;
2398 /// let mut bed = Bed::new(file_name)?;
2399 /// println!("{:?}", bed.iid()?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
2400 /// println!("{:?}", bed.sid()?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
2401 /// let val = bed.read::<f64>()?;
2402 ///
2403 /// assert_eq_nan(
2404 /// &val,
2405 /// &nd::array![
2406 /// [1.0, 0.0, f64::NAN, 0.0],
2407 /// [2.0, 0.0, f64::NAN, 2.0],
2408 /// [0.0, 1.0, 2.0, 0.0]
2409 /// ],
2410 /// );
2411 /// # use bed_reader::BedErrorPlus;
2412 /// # Ok::<(), Box<BedErrorPlus>>(())
2413 /// ```
2414 ///
2415 /// Open the file and read data for one SNP (variant)
2416 /// at index position 2.
2417 /// ```
2418 /// # use ndarray as nd;
2419 /// # use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2420 /// # let file_name = sample_bed_file("small.bed")?;
2421 ///
2422 /// let mut bed = Bed::new(file_name)?;
2423 /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
2424 ///
2425 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2426 /// # use bed_reader::BedErrorPlus;
2427 /// # Ok::<(), Box<BedErrorPlus>>(())
2428 /// ```
2429 #[anyinput]
2430 pub fn new(path: AnyPath) -> Result<Self, Box<BedErrorPlus>> {
2431 Bed::builder(path).build()
2432 }
2433
2434 /// Number of individuals (samples)
2435 ///
2436 /// If this number is needed, it will be found
2437 /// by opening the .fam file and quickly counting the number
2438 /// of lines. Once found, the number will be remembered.
2439 /// The file read can be avoided by setting the
2440 /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2441 /// or, for example, [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2442 ///
2443 /// # Example:
2444 /// ```
2445 /// use ndarray as nd;
2446 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2447 ///
2448 /// let file_name = sample_bed_file("small.bed")?;
2449 /// let mut bed = Bed::new(file_name)?;
2450 /// let iid_count = bed.iid_count()?;
2451 ///
2452 /// assert!(iid_count == 3);
2453 /// # use bed_reader::BedErrorPlus;
2454 /// # Ok::<(), Box<BedErrorPlus>>(())
2455 pub fn iid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2456 if let Some(iid_count) = self.iid_count {
2457 Ok(iid_count)
2458 } else {
2459 let fam_path = self.fam_path();
2460 let iid_count = count_lines(fam_path)?;
2461 self.iid_count = Some(iid_count);
2462 Ok(iid_count)
2463 }
2464 }
2465
2466 /// Number of SNPs (variants)
2467 ///
2468 /// If this number is needed, it will be found
2469 /// by opening the .bim file and quickly counting the number
2470 /// of lines. Once found, the number will be remembered.
2471 /// The file read can be avoided by setting the
2472 /// number with [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count)
2473 /// or, for example, [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2474 ///
2475 /// # Example:
2476 /// ```
2477 /// use ndarray as nd;
2478 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
2479 ///
2480 /// let file_name = sample_bed_file("small.bed")?;
2481 /// let mut bed = Bed::new(file_name)?;
2482 /// let sid_count = bed.sid_count()?;
2483 ///
2484 /// assert!(sid_count == 4);
2485 /// # use bed_reader::BedErrorPlus;
2486 /// # Ok::<(), Box<BedErrorPlus>>(())
2487 pub fn sid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
2488 if let Some(sid_count) = self.sid_count {
2489 Ok(sid_count)
2490 } else {
2491 let bim_path = self.bim_path();
2492 let sid_count = count_lines(bim_path)?;
2493 self.sid_count = Some(sid_count);
2494 Ok(sid_count)
2495 }
2496 }
2497
2498 /// Number of individuals (samples) and SNPs (variants)
2499 ///
2500 /// If these numbers aren't known, they will be found
2501 /// by opening the .fam and .bim files and quickly counting the number
2502 /// of lines. Once found, the numbers will be remembered.
2503 /// The file read can be avoided by setting the
2504 /// number with [`BedBuilder::iid_count`](struct.BedBuilder.html#method.iid_count)
2505 /// and [`BedBuilder::sid_count`](struct.BedBuilder.html#method.sid_count).
2506 ///
2507 /// # Example:
2508 /// ```
2509 /// use ndarray as nd;
2510 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2511 /// use bed_reader::assert_eq_nan;
2512 ///
2513 /// let file_name = sample_bed_file("small.bed")?;
2514 /// let mut bed = Bed::new(file_name)?;
2515 /// let dim = bed.dim()?;
2516 ///
2517 /// assert!(dim == (3,4));
2518 /// # use bed_reader::BedErrorPlus;
2519 /// # Ok::<(), Box<BedErrorPlus>>(())
2520 pub fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
2521 Ok((self.iid_count()?, self.sid_count()?))
2522 }
2523
2524 /// Family id of each of individual (sample)
2525 ///
2526 /// If this ndarray is needed, it will be found
2527 /// by reading the .fam file. Once found, this ndarray
2528 /// and other information in the .fam file will be remembered.
2529 /// The file read can be avoided by setting the
2530 /// array with [`BedBuilder::fid`](struct.BedBuilder.html#method.fid).
2531 ///
2532 /// # Example:
2533 /// ```
2534 /// use ndarray as nd;
2535 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2536 /// use bed_reader::assert_eq_nan;
2537 ///
2538 /// let file_name = sample_bed_file("small.bed")?;
2539 /// let mut bed = Bed::new(file_name)?;
2540 /// let fid = bed.fid()?;
2541 /// println!("{fid:?}"); // Outputs ndarray ["fid1", "fid1", "fid2"]
2542 /// # use bed_reader::BedErrorPlus;
2543 /// # Ok::<(), Box<BedErrorPlus>>(())
2544 pub fn fid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2545 self.unlazy_fam::<String>(self.metadata.fid.is_none(), MetadataFields::Fid, "fid")?;
2546 Ok(self.metadata.fid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2547 }
2548
2549 /// Individual id of each of individual (sample)
2550 ///
2551 /// If this ndarray is needed, it will be found
2552 /// by reading the .fam file. Once found, this ndarray
2553 /// and other information in the .fam file will be remembered.
2554 /// The file read can be avoided by setting the
2555 /// array with [`BedBuilder::iid`](struct.BedBuilder.html#method.iid).
2556 ///
2557 /// # Example:
2558 /// ```
2559 /// use ndarray as nd;
2560 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2561 /// use bed_reader::assert_eq_nan;
2562 ///
2563 /// let file_name = sample_bed_file("small.bed")?;
2564 /// let mut bed = Bed::new(file_name)?;
2565 /// let iid = bed.iid()?; ///
2566 /// println!("{iid:?}"); // Outputs ndarray ["iid1", "iid2", "iid3"]
2567 /// # use bed_reader::BedErrorPlus;
2568 /// # Ok::<(), Box<BedErrorPlus>>(())
2569 pub fn iid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2570 self.unlazy_fam::<String>(self.metadata.iid.is_none(), MetadataFields::Iid, "iid")?;
2571 Ok(self.metadata.iid.as_ref().unwrap()) //unwrap always works because of lazy_fam
2572 }
2573
2574 /// Father id of each of individual (sample)
2575 ///
2576 /// If this ndarray is needed, it will be found
2577 /// by reading the .fam file. Once found, this ndarray
2578 /// and other information in the .fam file will be remembered.
2579 /// The file read can be avoided by setting the
2580 /// array with [`BedBuilder::father`](struct.BedBuilder.html#method.father).
2581 ///
2582 /// # Example:
2583 /// ```
2584 /// use ndarray as nd;
2585 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2586 /// use bed_reader::assert_eq_nan;
2587 ///
2588 /// let file_name = sample_bed_file("small.bed")?;
2589 /// let mut bed = Bed::new(file_name)?;
2590 /// let father = bed.father()?;
2591 /// println!("{father:?}"); // Outputs ndarray ["iid23", "iid23", "iid22"]
2592 /// # use bed_reader::BedErrorPlus;
2593 /// # Ok::<(), Box<BedErrorPlus>>(())
2594 pub fn father(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2595 self.unlazy_fam::<String>(
2596 self.metadata.father.is_none(),
2597 MetadataFields::Father,
2598 "father",
2599 )?;
2600 Ok(self.metadata.father.as_ref().unwrap()) //unwrap always works because of lazy_fam
2601 }
2602
2603 /// Mother id of each of individual (sample)
2604 ///
2605 /// If this ndarray is needed, it will be found
2606 /// by reading the .fam file. Once found, this ndarray
2607 /// and other information in the .fam file will be remembered.
2608 /// The file read can be avoided by setting the
2609 /// array with [`BedBuilder::mother`](struct.BedBuilder.html#method.mother).
2610 ///
2611 /// # Example:
2612 /// ```
2613 /// use ndarray as nd;
2614 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2615 /// use bed_reader::assert_eq_nan;
2616 ///
2617 /// let file_name = sample_bed_file("small.bed")?;
2618 /// let mut bed = Bed::new(file_name)?;
2619 /// let mother = bed.mother()?;
2620 /// println!("{mother:?}"); // Outputs ndarray ["iid34", "iid34", "iid33"]
2621 /// # use bed_reader::BedErrorPlus;
2622 /// # Ok::<(), Box<BedErrorPlus>>(())
2623 pub fn mother(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2624 self.unlazy_fam::<String>(
2625 self.metadata.mother.is_none(),
2626 MetadataFields::Mother,
2627 "mother",
2628 )?;
2629 Ok(self.metadata.mother.as_ref().unwrap()) //unwrap always works because of lazy_fam
2630 }
2631
2632 /// Sex each of individual (sample)
2633 ///
2634 /// 0 is unknown, 1 is male, 2 is female
2635 ///
2636 /// If this ndarray is needed, it will be found
2637 /// by reading the .fam file. Once found, this ndarray
2638 /// and other information in the .fam file will be remembered.
2639 /// The file read can be avoided by setting the
2640 /// array with [`BedBuilder::sex`](struct.BedBuilder.html#method.sex).
2641 ///
2642 /// # Example:
2643 /// ```
2644 /// use ndarray as nd;
2645 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2646 /// use bed_reader::assert_eq_nan;
2647 ///
2648 /// let file_name = sample_bed_file("small.bed")?;
2649 /// let mut bed = Bed::new(file_name)?;
2650 /// let sex = bed.sex()?;
2651 /// println!("{sex:?}"); // Outputs ndarray [1, 2, 0]
2652 /// # use bed_reader::BedErrorPlus;
2653 /// # Ok::<(), Box<BedErrorPlus>>(())
2654 pub fn sex(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2655 self.unlazy_fam::<String>(self.metadata.sex.is_none(), MetadataFields::Sex, "sex")?;
2656 Ok(self.metadata.sex.as_ref().unwrap()) //unwrap always works because of lazy_fam
2657 }
2658
2659 /// A phenotype for each individual (seldom used)
2660 ///
2661 /// If this ndarray is needed, it will be found
2662 /// by reading the .fam file. Once found, this ndarray
2663 /// and other information in the .fam file will be remembered.
2664 /// The file read can be avoided by setting the
2665 /// array with [`BedBuilder::pheno`](struct.BedBuilder.html#method.pheno).
2666 ///
2667 /// # Example:
2668 /// ```
2669 /// use ndarray as nd;
2670 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2671 /// use bed_reader::assert_eq_nan;
2672 ///
2673 /// let file_name = sample_bed_file("small.bed")?;
2674 /// let mut bed = Bed::new(file_name)?;
2675 /// let pheno = bed.pheno()?;
2676 /// println!("{pheno:?}"); // Outputs ndarray ["red", "red", "blue"]
2677 /// # use bed_reader::BedErrorPlus;
2678 /// # Ok::<(), Box<BedErrorPlus>>(())
2679 pub fn pheno(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2680 self.unlazy_fam::<String>(
2681 self.metadata.pheno.is_none(),
2682 MetadataFields::Pheno,
2683 "pheno",
2684 )?;
2685 Ok(self.metadata.pheno.as_ref().unwrap()) //unwrap always works because of lazy_fam
2686 }
2687
2688 /// Chromosome of each SNP (variant)
2689 ///
2690 /// If this ndarray is needed, it will be found
2691 /// by reading the .bim file. Once found, this ndarray
2692 /// and other information in the .bim file will be remembered.
2693 /// The file read can be avoided by setting the
2694 /// array with [`BedBuilder::chromosome`](struct.BedBuilder.html#method.chromosome).
2695 ///
2696 /// # Example:
2697 /// ```
2698 /// use ndarray as nd;
2699 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2700 /// use bed_reader::assert_eq_nan;
2701 ///
2702 /// let file_name = sample_bed_file("small.bed")?;
2703 /// let mut bed = Bed::new(file_name)?;
2704 /// let chromosome = bed.chromosome()?;
2705 /// println!("{chromosome:?}"); // Outputs ndarray ["1", "1", "5", "Y"]
2706 /// # use bed_reader::BedErrorPlus;
2707 /// # Ok::<(), Box<BedErrorPlus>>(())
2708 pub fn chromosome(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2709 self.unlazy_bim::<String>(
2710 self.metadata.chromosome.is_none(),
2711 MetadataFields::Chromosome,
2712 "chromosome",
2713 )?;
2714 Ok(self.metadata.chromosome.as_ref().unwrap()) //unwrap always works because of lazy_bim
2715 }
2716
2717 /// SNP id of each SNP (variant)
2718 ///
2719 /// If this ndarray is needed, it will be found
2720 /// by reading the .bim file. Once found, this ndarray
2721 /// and other information in the .bim file will be remembered.
2722 /// The file read can be avoided by setting the
2723 /// array with [`BedBuilder::sid`](struct.BedBuilder.html#method.sid).
2724 ///
2725 /// # Example:
2726 /// ```
2727 /// use ndarray as nd;
2728 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2729 /// use bed_reader::assert_eq_nan;
2730 ///
2731 /// let file_name = sample_bed_file("small.bed")?;
2732 /// let mut bed = Bed::new(file_name)?;
2733 /// let sid = bed.sid()?;
2734 /// println!("{sid:?}"); // Outputs ndarray "sid1", "sid2", "sid3", "sid4"]
2735 /// # use bed_reader::BedErrorPlus;
2736 /// # Ok::<(), Box<BedErrorPlus>>(())
2737 pub fn sid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2738 self.unlazy_bim::<String>(self.metadata.sid.is_none(), MetadataFields::Sid, "sid")?;
2739 Ok(self.metadata.sid.as_ref().unwrap()) //unwrap always works because of lazy_bim
2740 }
2741
2742 /// Centimorgan position of each SNP (variant)
2743 ///
2744 /// If this ndarray is needed, it will be found
2745 /// by reading the .bim file. Once found, this ndarray
2746 /// and other information in the .bim file will be remembered.
2747 /// The file read can be avoided by setting the
2748 /// array with [`BedBuilder::cm_position`](struct.BedBuilder.html#method.cm_position).
2749 ///
2750 /// # Example:
2751 /// ```
2752 /// use ndarray as nd;
2753 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2754 /// use bed_reader::assert_eq_nan;
2755 ///
2756 /// let file_name = sample_bed_file("small.bed")?;
2757 /// let mut bed = Bed::new(file_name)?;
2758 /// let cm_position = bed.cm_position()?;
2759 /// println!("{cm_position:?}"); // Outputs ndarray [100.4, 2000.5, 4000.7, 7000.9]
2760 /// # use bed_reader::BedErrorPlus;
2761 /// # Ok::<(), Box<BedErrorPlus>>(())
2762 pub fn cm_position(&mut self) -> Result<&nd::Array1<f32>, Box<BedErrorPlus>> {
2763 self.unlazy_bim::<String>(
2764 self.metadata.cm_position.is_none(),
2765 MetadataFields::CmPosition,
2766 "cm_position",
2767 )?;
2768 Ok(self.metadata.cm_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2769 }
2770
2771 /// Base-pair position of each SNP (variant)
2772 ///
2773 /// If this ndarray is needed, it will be found
2774 /// by reading the .bim file. Once found, this ndarray
2775 /// and other information in the .bim file will be remembered.
2776 /// The file read can be avoided by setting the
2777 /// array with [`BedBuilder::bp_position`](struct.BedBuilder.html#method.bp_position).
2778 ///
2779 /// # Example:
2780 /// ```
2781 /// use ndarray as nd;
2782 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2783 /// use bed_reader::assert_eq_nan;
2784 ///
2785 /// let file_name = sample_bed_file("small.bed")?;
2786 /// let mut bed = Bed::new(file_name)?;
2787 /// let bp_position = bed.bp_position()?;
2788 /// println!("{bp_position:?}"); // Outputs ndarray [1, 100, 1000, 1004]
2789 /// # use bed_reader::BedErrorPlus;
2790 /// # Ok::<(), Box<BedErrorPlus>>(())
2791 pub fn bp_position(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
2792 self.unlazy_bim::<String>(
2793 self.metadata.bp_position.is_none(),
2794 MetadataFields::BpPosition,
2795 "bp_position",
2796 )?;
2797 Ok(self.metadata.bp_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
2798 }
2799
2800 /// First allele of each SNP (variant)
2801 ///
2802 /// If this ndarray is needed, it will be found
2803 /// by reading the .bim file. Once found, this ndarray
2804 /// and other information in the .bim file will be remembered.
2805 /// The file read can be avoided by setting the
2806 /// array with [`BedBuilder::allele_1`](struct.BedBuilder.html#method.allele_1).
2807 ///
2808 /// # Example:
2809 /// ```
2810 /// use ndarray as nd;
2811 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2812 /// use bed_reader::assert_eq_nan;
2813 ///
2814 /// let file_name = sample_bed_file("small.bed")?;
2815 /// let mut bed = Bed::new(file_name)?;
2816 /// let allele_1 = bed.allele_1()?;
2817 /// println!("{allele_1:?}"); // Outputs ndarray ["A", "T", "A", "T"]
2818 /// # use bed_reader::BedErrorPlus;
2819 /// # Ok::<(), Box<BedErrorPlus>>(())
2820 pub fn allele_1(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2821 self.unlazy_bim::<String>(
2822 self.metadata.allele_1.is_none(),
2823 MetadataFields::Allele1,
2824 "allele_1",
2825 )?;
2826 Ok(self.metadata.allele_1.as_ref().unwrap()) //unwrap always works because of lazy_bim
2827 }
2828
2829 /// Second allele of each SNP (variant)
2830 ///
2831 /// If this ndarray is needed, it will be found
2832 /// by reading the .bim file. Once found, this ndarray
2833 /// and other information in the .bim file will be remembered.
2834 /// The file read can be avoided by setting the
2835 /// array with [`BedBuilder::allele_2`](struct.BedBuilder.html#method.allele_2).
2836 ///
2837 /// # Example:
2838 /// ```
2839 /// use ndarray as nd;
2840 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2841 /// use bed_reader::assert_eq_nan;
2842 ///
2843 /// let file_name = sample_bed_file("small.bed")?;
2844 /// let mut bed = Bed::new(file_name)?;
2845 /// let allele_2 = bed.allele_2()?;
2846 /// println!("{allele_2:?}"); // Outputs ndarray ["A", "C", "C", "G"]
2847 /// # use bed_reader::BedErrorPlus;
2848 /// # Ok::<(), Box<BedErrorPlus>>(())
2849 pub fn allele_2(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
2850 self.unlazy_bim::<String>(
2851 self.metadata.allele_2.is_none(),
2852 MetadataFields::Allele2,
2853 "allele_2",
2854 )?;
2855 Ok(self.metadata.allele_2.as_ref().unwrap()) //unwrap always works because of lazy_bim
2856 }
2857
2858 /// [`Metadata`](struct.Metadata.html) for this dataset, for example, the individual (sample) Ids.
2859 ///
2860 /// This returns a struct with 12 fields. Each field is a ndarray.
2861 /// The struct will always be new, but the 12 ndarrays will be
2862 /// shared with this [`Bed`](struct.Bed.html).
2863 ///
2864 /// If the needed, the metadata will be read from the .fam and/or .bim files.
2865 /// ```
2866 /// use ndarray as nd;
2867 /// use bed_reader::{Bed, sample_bed_file};
2868 ///
2869 /// let file_name = sample_bed_file("small.bed")?;
2870 /// let mut bed = Bed::new(file_name)?;
2871 /// let metadata = bed.metadata()?;
2872 /// println!("{0:?}", metadata.iid()); // Outputs Some(["iid1", "iid2", "iid3"] ...)
2873 /// println!("{0:?}", metadata.sid()); // Outputs Some(["sid1", "sid2", "sid3", "sid4"] ...)
2874 /// # use bed_reader::BedErrorPlus;
2875 /// # Ok::<(), Box<BedErrorPlus>>(())
2876 pub fn metadata(&mut self) -> Result<Metadata, Box<BedErrorPlus>> {
2877 self.fam()?;
2878 self.bim()?;
2879 Ok(self.metadata.clone())
2880 }
2881
2882 /// Return the path of the .bed file.
2883 #[must_use]
2884 pub fn path(&self) -> &Path {
2885 &self.path
2886 }
2887
2888 /// Return the path of the .fam file.
2889 pub fn fam_path(&mut self) -> PathBuf {
2890 // We need to clone the path because self might mutate later
2891 if let Some(path) = &self.fam_path {
2892 path.clone()
2893 } else {
2894 let path = to_metadata_path(&self.path, self.fam_path.as_ref(), "fam");
2895 self.fam_path = Some(path.clone());
2896 path
2897 }
2898 }
2899
2900 /// Return the path of the .bim file.
2901 pub fn bim_path(&mut self) -> PathBuf {
2902 // We need to clone the path because self might mutate later
2903 if let Some(path) = &self.bim_path {
2904 path.clone()
2905 } else {
2906 let path = to_metadata_path(&self.path, self.bim_path.as_ref(), "bim");
2907 self.bim_path = Some(path.clone());
2908 path
2909 }
2910 }
2911
2912 /// Read genotype data.
2913 ///
2914 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) which supports selection and options.
2915 ///
2916 /// # Errors
2917 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2918 /// for all possible errors.
2919 ///
2920 /// # Examples
2921 /// Read all data in a .bed file.
2922 ///
2923 /// ```
2924 /// use ndarray as nd;
2925 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2926 /// use bed_reader::assert_eq_nan;
2927 ///
2928 /// let file_name = sample_bed_file("small.bed")?;
2929 /// let mut bed = Bed::new(file_name)?;
2930 /// let val = bed.read::<f64>()?;
2931 ///
2932 /// assert_eq_nan(
2933 /// &val,
2934 /// &nd::array![
2935 /// [1.0, 0.0, f64::NAN, 0.0],
2936 /// [2.0, 0.0, f64::NAN, 2.0],
2937 /// [0.0, 1.0, 2.0, 0.0]
2938 /// ],
2939 /// );
2940 ///
2941 /// // Your output array can be f32, f64, or i8
2942 /// let val = bed.read::<i8>()?;
2943 /// assert_eq_nan(
2944 /// &val,
2945 /// &nd::array![
2946 /// [1, 0, -127, 0],
2947 /// [2, 0, -127, 2],
2948 /// [0, 1, 2, 0]
2949 /// ],
2950 /// );
2951 /// # use bed_reader::BedErrorPlus;
2952 /// # Ok::<(), Box<BedErrorPlus>>(())
2953 /// ```
2954 pub fn read<TVal: BedVal>(&mut self) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2955 let read_options = ReadOptions::<TVal>::builder().build()?;
2956 self.read_with_options(&read_options)
2957 }
2958
2959 /// Read genotype data with options, into a preallocated array.
2960 ///
2961 /// > Also see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
2962 ///
2963 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
2964 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
2965 /// are ignored. Instead, the order of the preallocated array is used.
2966 ///
2967 /// # Errors
2968 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2969 /// for all possible errors.
2970 ///
2971 /// # Example
2972 ///
2973 /// ```
2974 /// use ndarray as nd;
2975 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
2976 /// use bed_reader::assert_eq_nan;
2977 ///
2978 /// // Read the SNPs indexed by 2.
2979 /// let file_name = sample_bed_file("small.bed")?;
2980 /// let mut bed = Bed::new(file_name)?;
2981 /// let read_options = ReadOptions::builder().sid_index(2).build()?;
2982 /// let mut val = nd::Array2::<f64>::default((3, 1));
2983 /// bed.read_and_fill_with_options(&mut val.view_mut(), &read_options)?;
2984 ///
2985 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2986 /// # use bed_reader::BedErrorPlus;
2987 /// # Ok::<(), Box<BedErrorPlus>>(())
2988 /// ```
2989 pub fn read_and_fill_with_options<TVal: BedVal>(
2990 &mut self,
2991 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2992 read_options: &ReadOptions<TVal>,
2993 ) -> Result<(), Box<BedErrorPlus>> {
2994 let iid_count = self.iid_count()?;
2995 let sid_count = self.sid_count()?;
2996
2997 let num_threads = compute_num_threads(read_options.num_threads)?;
2998
2999 // If we already have a Vec<isize>, reference it. If we don't, create one and reference it.
3000 let iid_hold = Hold::new(&read_options.iid_index, iid_count)?;
3001 let iid_index = iid_hold.as_ref();
3002 let sid_hold = Hold::new(&read_options.sid_index, sid_count)?;
3003 let sid_index = sid_hold.as_ref();
3004
3005 let dim = val.dim();
3006 if dim != (iid_index.len(), sid_index.len()) {
3007 Err(BedError::InvalidShape(
3008 iid_index.len(),
3009 sid_index.len(),
3010 dim.0,
3011 dim.1,
3012 ))?;
3013 }
3014
3015 read_no_alloc(
3016 &self.path,
3017 iid_count,
3018 sid_count,
3019 read_options.is_a1_counted,
3020 iid_index,
3021 sid_index,
3022 read_options.missing_value,
3023 num_threads,
3024 &mut val.view_mut(),
3025 )?;
3026
3027 Ok(())
3028 }
3029
3030 /// Read all genotype data into a preallocated array.
3031 ///
3032 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3033 ///
3034 /// # Errors
3035 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3036 /// for all possible errors.
3037 ///
3038 /// # Example
3039 ///
3040 /// ```
3041 /// use ndarray as nd;
3042 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3043 /// use bed_reader::assert_eq_nan;
3044 ///
3045 /// let file_name = sample_bed_file("small.bed")?;
3046 /// let mut bed = Bed::new(file_name)?;
3047 /// let mut val = nd::Array2::<i8>::default(bed.dim()?);
3048 /// bed.read_and_fill(&mut val.view_mut())?;
3049 ///
3050 /// assert_eq_nan(
3051 /// &val,
3052 /// &nd::array![
3053 /// [1, 0, -127, 0],
3054 /// [2, 0, -127, 2],
3055 /// [0, 1, 2, 0]
3056 /// ],
3057 /// );
3058 /// # use bed_reader::BedErrorPlus;
3059 /// # Ok::<(), Box<BedErrorPlus>>(())
3060 /// ```
3061 pub fn read_and_fill<TVal: BedVal>(
3062 &mut self,
3063 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
3064 ) -> Result<(), Box<BedErrorPlus>> {
3065 let read_options = ReadOptions::<TVal>::builder().build()?;
3066 self.read_and_fill_with_options(val, &read_options)
3067 }
3068
3069 /// Read genotype data with options.
3070 ///
3071 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3072 ///
3073 /// # Errors
3074 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3075 /// for all possible errors.
3076 ///
3077 /// # Example
3078 ///
3079 /// ```
3080 /// use ndarray as nd;
3081 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3082 /// use bed_reader::assert_eq_nan;
3083 ///
3084 /// // Read the SNPs indexed by 2.
3085 /// let file_name = sample_bed_file("small.bed")?;
3086 /// let mut bed = Bed::new(file_name)?;
3087 /// let read_options = ReadOptions::builder().sid_index(2).f64().build()?;
3088 /// let val = bed.read_with_options(&read_options)?;
3089 ///
3090 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
3091 /// # use bed_reader::BedErrorPlus;
3092 /// # Ok::<(), Box<BedErrorPlus>>(())
3093 /// ```
3094 pub fn read_with_options<TVal: BedVal>(
3095 &mut self,
3096 read_options: &ReadOptions<TVal>,
3097 ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
3098 let iid_count_in = self.iid_count()?;
3099 let sid_count_in = self.sid_count()?;
3100 let iid_count_out = read_options.iid_index.len(iid_count_in)?;
3101 let sid_count_out = read_options.sid_index.len(sid_count_in)?;
3102 let shape = ShapeBuilder::set_f((iid_count_out, sid_count_out), read_options.is_f);
3103 let mut val = nd::Array2::<TVal>::default(shape);
3104
3105 self.read_and_fill_with_options(&mut val.view_mut(), read_options)?;
3106
3107 Ok(val)
3108 }
3109 /// Write genotype data with default metadata.
3110 ///
3111 /// > Also see [`WriteOptions::builder`](struct.WriteOptions.html#method.builder), which supports metadata and options.
3112 ///
3113 /// # Errors
3114 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
3115 /// for all possible errors.
3116 ///
3117 /// # Example
3118 /// In this example, write genotype data using default metadata.
3119 /// ```
3120 /// use ndarray as nd;
3121 /// use bed_reader::{Bed, WriteOptions};
3122 ///
3123 /// let output_folder = temp_testdir::TempDir::default();
3124 /// let output_file = output_folder.join("small.bed");
3125 ///
3126 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
3127 /// Bed::write(&val, &output_file)?;
3128 ///
3129 /// // If we then read the new file and list the chromosome property,
3130 /// // it is an array of zeros, the default chromosome value.
3131 /// let mut bed2 = Bed::new(&output_file)?;
3132 /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
3133 /// # use bed_reader::BedErrorPlus;
3134 /// # Ok::<(), Box<BedErrorPlus>>(())
3135 /// ```
3136 pub fn write<S: nd::Data<Elem = TVal>, TVal: BedVal>(
3137 val: &nd::ArrayBase<S, nd::Ix2>,
3138 path: &Path,
3139 ) -> Result<(), Box<BedErrorPlus>> {
3140 WriteOptions::builder(path).write(val)
3141 }
3142
3143 /// Given an 2D array of genotype data and a [`WriteOptions`](struct.WriteOptionsBuilder.html), write to a .bed file.
3144 ///
3145 /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
3146 /// > a [`WriteOptions`](struct.WriteOptionsBuilder.html) and writes to file in one step.
3147 ///
3148 /// # Example
3149 /// ```
3150 /// use ndarray as nd;
3151 /// use bed_reader::{Bed, WriteOptions};
3152 ///
3153 /// let val = nd::array![
3154 /// [1.0, 0.0, f64::NAN, 0.0],
3155 /// [2.0, 0.0, f64::NAN, 2.0],
3156 /// [0.0, 1.0, 2.0, 0.0]
3157 /// ];
3158 ///
3159 /// let output_folder = temp_testdir::TempDir::default();
3160 /// let output_file = output_folder.join("small.bed");
3161 /// let write_options = WriteOptions::builder(output_file)
3162 /// .iid(["iid1", "iid2", "iid3"])
3163 /// .sid(["sid1", "sid2", "sid3", "sid4"])
3164 /// .build(3,4)?;
3165 ///
3166 /// Bed::write_with_options(&val, &write_options)?;
3167 /// # use bed_reader::BedErrorPlus;
3168 /// # Ok::<(), Box<BedErrorPlus>>(())
3169 /// ```
3170 pub fn write_with_options<S, TVal>(
3171 val: &nd::ArrayBase<S, nd::Ix2>,
3172 write_options: &WriteOptions<TVal>,
3173 ) -> Result<(), Box<BedErrorPlus>>
3174 where
3175 S: nd::Data<Elem = TVal>,
3176 TVal: BedVal,
3177 {
3178 let (iid_count, sid_count) = val.dim();
3179 if iid_count != write_options.iid_count() {
3180 Err(BedError::InconsistentCount(
3181 "iid".into(),
3182 write_options.iid_count(),
3183 iid_count,
3184 ))?;
3185 }
3186 if sid_count != write_options.sid_count() {
3187 Err(BedError::InconsistentCount(
3188 "sid".into(),
3189 write_options.sid_count(),
3190 sid_count,
3191 ))?;
3192 }
3193
3194 let num_threads = compute_num_threads(write_options.num_threads)?;
3195 write_val(
3196 &write_options.path,
3197 val,
3198 write_options.is_a1_counted,
3199 write_options.missing_value,
3200 num_threads,
3201 )?;
3202
3203 if !write_options.skip_fam() {
3204 if let Err(e) = write_options.metadata.write_fam(write_options.fam_path()) {
3205 // Clean up the file
3206 let _ = fs::remove_file(&write_options.fam_path);
3207 Err(e)?;
3208 }
3209 }
3210
3211 if !write_options.skip_bim() {
3212 if let Err(e) = write_options.metadata.write_bim(write_options.bim_path()) {
3213 // Clean up the file
3214 let _ = fs::remove_file(&write_options.bim_path);
3215 Err(e)?;
3216 }
3217 }
3218
3219 Ok(())
3220 }
3221
3222 fn unlazy_fam<T: FromStringArray<T>>(
3223 &mut self,
3224 is_none: bool,
3225 field_index: MetadataFields,
3226 name: &str,
3227 ) -> Result<(), Box<BedErrorPlus>> {
3228 if self.skip_set.contains(&field_index) {
3229 Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3230 }
3231 if is_none {
3232 self.fam()?;
3233 }
3234 Ok(())
3235 }
3236
3237 fn unlazy_bim<T: FromStringArray<T>>(
3238 &mut self,
3239 is_none: bool,
3240 field_index: MetadataFields,
3241 name: &str,
3242 ) -> Result<(), Box<BedErrorPlus>> {
3243 if self.skip_set.contains(&field_index) {
3244 Err(BedError::CannotUseSkippedMetadata(name.to_string()))?;
3245 }
3246 if is_none {
3247 self.bim()?;
3248 }
3249 Ok(())
3250 }
3251
3252 fn fam(&mut self) -> Result<(), Box<BedErrorPlus>> {
3253 let fam_path = self.fam_path();
3254
3255 let (metadata, count) = self.metadata.read_fam(fam_path, &self.skip_set)?;
3256 self.metadata = metadata;
3257
3258 match self.iid_count {
3259 Some(iid_count) => {
3260 if iid_count != count {
3261 Err(BedError::InconsistentCount(
3262 "iid".to_string(),
3263 iid_count,
3264 count,
3265 ))?;
3266 }
3267 }
3268 None => {
3269 self.iid_count = Some(count);
3270 }
3271 }
3272 Ok(())
3273 }
3274
3275 fn bim(&mut self) -> Result<(), Box<BedErrorPlus>> {
3276 let bim_path = self.bim_path();
3277
3278 let (metadata, count) = self.metadata.read_bim(bim_path, &self.skip_set)?;
3279 self.metadata = metadata;
3280
3281 match self.sid_count {
3282 Some(sid_count) => {
3283 if sid_count != count {
3284 Err(BedError::InconsistentCount(
3285 "sid".to_string(),
3286 sid_count,
3287 count,
3288 ))?;
3289 }
3290 }
3291 None => {
3292 self.sid_count = Some(count);
3293 }
3294 }
3295 Ok(())
3296 }
3297}
3298
3299/// If we already have a Vec<isize> remember a reference to it.
3300/// If we don't, then create one.
3301enum Hold<'a> {
3302 Copy(Vec<isize>),
3303 Ref(&'a Vec<isize>),
3304}
3305
3306impl Hold<'_> {
3307 fn new(index: &Index, count: usize) -> Result<Hold<'_>, Box<BedErrorPlus>> {
3308 let hold = if let Index::Vec(vec) = index {
3309 Hold::Ref(vec)
3310 } else {
3311 Hold::Copy(index.to_vec(count)?)
3312 };
3313 Ok(hold)
3314 }
3315
3316 fn as_ref(&self) -> &Vec<isize> {
3317 match self {
3318 Hold::Ref(vec) => vec,
3319 Hold::Copy(ref vec) => vec,
3320 }
3321 }
3322}
3323
3324fn compute_num_threads(option_num_threads: Option<usize>) -> Result<usize, Box<BedErrorPlus>> {
3325 let num_threads = if let Some(num_threads) = option_num_threads {
3326 num_threads
3327 } else if let Ok(num_threads) = env::var("BED_READER_NUM_THREADS") {
3328 num_threads.parse::<usize>()?
3329 } else if let Ok(num_threads) = env::var("NUM_THREADS") {
3330 num_threads.parse::<usize>()?
3331 } else {
3332 0
3333 };
3334 Ok(num_threads)
3335}
3336
3337#[allow(clippy::unnecessary_wraps)]
3338fn compute_max_concurrent_requests(
3339 option_max_concurrent_requests: Option<usize>,
3340) -> Result<usize, Box<BedErrorPlus>> {
3341 // In the future, we might want to set this with an environment variable.
3342 let max_concurrent_requests = option_max_concurrent_requests.unwrap_or(10);
3343 Ok(max_concurrent_requests)
3344}
3345
3346#[allow(clippy::unnecessary_wraps)]
3347fn compute_max_chunk_bytes(
3348 option_max_chunk_bytes: Option<usize>,
3349) -> Result<usize, Box<BedErrorPlus>> {
3350 // In the future, we might want to set this with an environment variable.
3351 let max_chunk_bytes = option_max_chunk_bytes.unwrap_or(8_000_000);
3352 Ok(max_chunk_bytes)
3353}
3354
3355impl Index {
3356 // We can't define a 'From' because we want to add count at the last moment.
3357 // Later Would be nice to not always allocate a new vec, maybe with Rc<[T]>?
3358 // Even better would be to support an iterator from Index (an enum with fields).
3359
3360 /// Turns an [`Index`](enum.Index.html) into a vector of usize indexes. Negative means count from end.
3361 pub fn to_vec(&self, count: usize) -> Result<Vec<isize>, Box<BedErrorPlus>> {
3362 let count_signed = count as isize;
3363 match self {
3364 Index::All => Ok((0..count_signed).collect()),
3365 Index::Vec(vec) => Ok(vec.clone()),
3366 Index::NDArrayBool(nd_array_bool) => {
3367 if nd_array_bool.len() != count {
3368 Err(BedError::BoolArrayVectorWrongLength(
3369 count,
3370 nd_array_bool.len(),
3371 ))?;
3372 }
3373 Ok(nd_array_bool
3374 .iter()
3375 .enumerate()
3376 .filter(|(_, b)| **b)
3377 .map(|(i, _)| i as isize)
3378 .collect())
3379 }
3380 Index::NDSliceInfo(nd_slice_info) => {
3381 Ok(RangeNdSlice::new(nd_slice_info, count)?.to_vec())
3382 }
3383 Index::RangeAny(range_any) => {
3384 let range = range_any.to_range(count)?;
3385 Ok(range.map(|i| i as isize).collect::<Vec<isize>>())
3386 }
3387 Index::NDArray(nd_array) => Ok(nd_array.to_vec()),
3388 Index::One(one) => Ok(vec![*one]),
3389 Index::VecBool(vec_bool) => {
3390 if vec_bool.len() != count {
3391 Err(BedError::BoolArrayVectorWrongLength(count, vec_bool.len()))?;
3392 }
3393 Ok(vec_bool
3394 .iter()
3395 .enumerate()
3396 .filter(|(_, b)| **b)
3397 .map(|(i, _)| i as isize)
3398 .collect())
3399 }
3400 }
3401 }
3402}
3403
3404#[allow(clippy::doc_markdown)]
3405/// Type alias for 1-D slices of NDArrays.
3406pub type SliceInfo1 =
3407 nd::SliceInfo<[nd::SliceInfoElem; 1], nd::Dim<[usize; 1]>, nd::Dim<[usize; 1]>>;
3408
3409/// A specification of which individuals (samples) or SNPs (variants) to read.
3410///
3411/// See the [Table of Index Expressions](index.html#index-expressions)
3412/// for a list of expressions for selecting individuals (sample)
3413/// and SNPs (variants).
3414///
3415/// By default, all individuals or SNPs are read.
3416/// The indices can be specified as:
3417/// * an index (negative numbers count from the end)
3418/// * a vector or ndarray of indices
3419/// * a Rust range (negatives not allowed)
3420/// * a vector or ndarray of booleans
3421/// * an ndarray slice (negative indexing and steps allowed)
3422///
3423/// # Examples
3424/// ```
3425/// use ndarray as nd;
3426/// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3427/// use bed_reader::assert_eq_nan;
3428/// use ndarray::s;
3429///
3430/// let file_name = sample_bed_file("some_missing.bed")?;
3431/// let mut bed = Bed::new(file_name)?;
3432/// println!("{:?}", bed.dim()?); // prints (100, 100)
3433///
3434/// // Read all individuals and all SNPs
3435/// let val = ReadOptions::builder().f64().read(&mut bed)?;
3436/// assert!(val.dim() == (100, 100));
3437///
3438/// // Read the individual at index position 10 and all SNPs
3439/// let val = ReadOptions::builder().iid_index(10).f64().read(&mut bed)?;
3440/// assert!(val.dim() == (1, 100));
3441///
3442/// // Read the individuals at index positions 0,5, 1st-from-the-end and
3443/// // the SNP at index position 3
3444/// let val = ReadOptions::builder()
3445/// .iid_index(vec![0, 5, -1])
3446/// .sid_index(3)
3447/// .f64()
3448/// .read(&mut bed)?;
3449/// assert!(val.dim() == (3, 1));
3450/// // Repeat, but with an ndarray
3451/// let val = ReadOptions::builder()
3452/// .iid_index(nd::array![0, 5, -1])
3453/// .sid_index(3)
3454/// .f64()
3455/// .read(&mut bed)?;
3456/// assert!(val.dim() == (3, 1));
3457/// // Repeat, but with an Rust array
3458/// let val = ReadOptions::builder()
3459/// .iid_index([0, 5, -1])
3460/// .sid_index(3)
3461/// .f64()
3462/// .read(&mut bed)?;
3463/// assert!(val.dim() == (3, 1));
3464/// // Create a boolean ndarray identifying SNPs in chromosome 5,
3465/// // then select those SNPs.
3466/// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
3467/// let val = ReadOptions::builder()
3468/// .sid_index(chrom_5)
3469/// .f64()
3470/// .read(&mut bed)?;
3471/// assert!(val.dim() == (100, 6));
3472/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3473/// // to select every 2nd individual and every 3rd SNP.
3474/// let val = ReadOptions::builder()
3475/// .iid_index(s![..;2])
3476/// .sid_index(s![..;3])
3477/// .f64()
3478/// .read(&mut bed)?;
3479/// assert!(val.dim() == (50, 34));
3480/// // Use ndarray's slice macro, [`s!`](https://docs.rs/ndarray/latest/ndarray/macro.s.html),
3481/// // to select the 10th-from-last individual to the last, in reverse order,
3482/// // and every 3rd SNP in reverse order.)
3483/// let val = ReadOptions::builder()
3484/// .iid_index(s![-10..;-1])
3485/// .sid_index(s![..;-3])
3486/// .f64()
3487/// .read(&mut bed)?;
3488/// assert!(val.dim() == (10, 34));
3489/// # use bed_reader::BedErrorPlus;
3490/// # Ok::<(), Box<BedErrorPlus>>(())
3491/// ```
3492#[derive(Debug, Clone)]
3493pub enum Index {
3494 // Could implement an enumerator, but it is complex and requires a 'match' on each next()
3495 // https://stackoverflow.com/questions/65272613/how-to-implement-intoiterator-for-an-enum-of-iterable-variants
3496 #[allow(missing_docs)]
3497 All,
3498 #[allow(missing_docs)]
3499 One(isize),
3500 #[allow(missing_docs)]
3501 Vec(Vec<isize>),
3502 #[allow(missing_docs)]
3503 NDArray(nd::Array1<isize>),
3504 #[allow(missing_docs)]
3505 VecBool(Vec<bool>),
3506 #[allow(missing_docs)]
3507 NDArrayBool(nd::Array1<bool>),
3508 #[allow(missing_docs)]
3509 NDSliceInfo(SliceInfo1),
3510 #[allow(missing_docs)]
3511 RangeAny(RangeAny),
3512}
3513
3514#[doc(hidden)]
3515/// Used internally to represent Rust ranges such as `0..10`, `..10`, etc.
3516#[derive(Debug, Clone)]
3517pub struct RangeAny {
3518 start: Option<usize>,
3519 end: Option<usize>,
3520}
3521
3522impl RangeAny {
3523 fn new<T: RangeBounds<usize>>(range_thing: &T) -> RangeAny {
3524 let start_bound = range_thing.start_bound();
3525 let start = match start_bound {
3526 Bound::Included(&start) => Some(start),
3527 Bound::Excluded(&start) => Some(start + 1),
3528 Bound::Unbounded => None,
3529 };
3530
3531 let end_bound = range_thing.end_bound();
3532 let end = match end_bound {
3533 Bound::Included(&end) => Some(end + 1),
3534 Bound::Excluded(&end) => Some(end),
3535 Bound::Unbounded => None,
3536 };
3537 RangeAny { start, end }
3538 }
3539
3540 // https://stackoverflow.com/questions/55925523/array-cannot-be-indexed-by-rangefull
3541 fn to_range(&self, count: usize) -> Result<Range<usize>, Box<BedErrorPlus>> {
3542 let start = self.start.unwrap_or_default();
3543 let end = if let Some(end) = self.end { end } else { count };
3544 if start > end {
3545 Err(BedError::StartGreaterThanEnd(start, end).into())
3546 } else {
3547 Ok(Range { start, end })
3548 }
3549 }
3550
3551 fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3552 let range = self.to_range(count)?;
3553 Ok(range.end - range.start)
3554 }
3555
3556 fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3557 Ok(self.len(count)? == 0)
3558 }
3559}
3560
3561#[doc(hidden)]
3562#[derive(Debug, Clone)]
3563/// Used internally to represent NDArray Slices such as s![..], s![0..;2], s![0..10;-1]
3564pub struct RangeNdSlice {
3565 start: usize,
3566 end: usize,
3567 step: usize,
3568 is_reversed: bool,
3569}
3570
3571impl RangeNdSlice {
3572 fn len(&self) -> usize {
3573 if self.start > self.end {
3574 0
3575 } else {
3576 (self.end - self.start).div_ceil(self.step)
3577 }
3578 }
3579
3580 fn is_empty(&self) -> bool {
3581 self.len() == 0
3582 }
3583
3584 // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#slicing
3585 fn to_vec(&self) -> Vec<isize> {
3586 if self.start >= self.end {
3587 Vec::new()
3588 } else if !self.is_reversed {
3589 (self.start..self.end)
3590 .step_by(self.step)
3591 .map(|i| i as isize)
3592 .collect()
3593 } else {
3594 // https://docs.rs/ndarray/latest/ndarray/macro.s.html
3595 let size = self.len();
3596 let mut vec: Vec<isize> = Vec::<isize>::with_capacity(size);
3597 let mut i = self.end - 1;
3598 while i >= self.start {
3599 vec.push(i as isize);
3600 if i < self.step {
3601 break;
3602 }
3603 i -= self.step;
3604 }
3605 vec
3606 }
3607 }
3608
3609 fn new(nd_slice_info: &SliceInfo1, count: usize) -> Result<Self, Box<BedErrorPlus>> {
3610 // self.to_vec(count).len(),
3611 // https://docs.rs/ndarray/0.15.4/ndarray/struct.ArrayBase.html#method.slice_collapse
3612 // Error in the following cases
3613 // * SliceInfo is not a 1-dimensional or is a NewAxis
3614 // * Step is 0
3615 // * Start is greater than count
3616 // * End is greater than count
3617 // As with ndarray, Start can be greater than End is allowed
3618 // and means the slice is empty.
3619 if nd_slice_info.in_ndim() != 1 || nd_slice_info.out_ndim() != 1 {
3620 Err(BedError::NdSliceInfoNot1D)?;
3621 }
3622
3623 let slice_info_elem = nd_slice_info[0];
3624 match slice_info_elem {
3625 nd::SliceInfoElem::Slice { start, end, step } => {
3626 // https://docs.rs/ndarray/0.15.4/ndarray/enum.SliceInfoElem.html
3627 // s![..], 0,None,1
3628 // s![a..b;2] a,b,2
3629 // s![a..;-1], from a to end in reverse order
3630 // start index; negative are counted from the back of the axis
3631 // end index; negative are counted from the back of the axis; when not present the default is the full length of the axis.
3632 // step size in elements; the default is 1, for every element.
3633 // A range with step size. end is an exclusive index. Negative start or end indexes are counted from the back of the axis. If end is None, the slice extends to the end of the axis.
3634 let (step2, is_reverse2) = match step.cmp(&0) {
3635 Ordering::Greater => (step as usize, false),
3636 Ordering::Less => ((-step) as usize, true),
3637 Ordering::Equal => Err(BedError::StepZero)?,
3638 };
3639
3640 let start2 = if start >= 0 {
3641 let start3 = start as usize;
3642 if start3 > count {
3643 Err(BedError::StartGreaterThanCount(start3, count))?;
3644 }
3645 start3
3646 } else {
3647 let start3 = (-start) as usize;
3648 if start3 > count {
3649 Err(BedError::StartGreaterThanCount(start3, count))?;
3650 }
3651 count - start3
3652 };
3653
3654 let end2 = if let Some(end) = end {
3655 if end >= 0 {
3656 let end3 = end as usize;
3657 if end3 > count {
3658 Err(BedError::EndGreaterThanCount(end3, count))?;
3659 }
3660 end3
3661 } else {
3662 let end3 = (-end) as usize;
3663 if end3 > count {
3664 Err(BedError::EndGreaterThanCount(end3, count))?;
3665 }
3666 count - end3
3667 }
3668 } else {
3669 count
3670 };
3671
3672 Ok(RangeNdSlice {
3673 start: start2,
3674 end: end2,
3675 step: step2,
3676 is_reversed: is_reverse2,
3677 })
3678 }
3679 nd::SliceInfoElem::Index(index) => Ok(RangeNdSlice {
3680 start: index as usize,
3681 end: index as usize + 1,
3682 step: 1,
3683 is_reversed: false,
3684 }),
3685 nd::SliceInfoElem::NewAxis => Err(BedError::NewAxis.into()),
3686 }
3687 }
3688}
3689
3690impl Index {
3691 /// Returns the number of elements in an [`Index`](enum.Index.html).
3692 #[allow(clippy::len_without_is_empty)]
3693 pub fn len(&self, count: usize) -> Result<usize, Box<BedErrorPlus>> {
3694 match self {
3695 Index::All => Ok(count),
3696 Index::One(_) => Ok(1),
3697 Index::Vec(vec) => Ok(vec.len()),
3698 Index::NDArray(nd_array) => Ok(nd_array.len()),
3699 Index::VecBool(vec_bool) => Ok(vec_bool.iter().filter(|&b| *b).count()),
3700 Index::NDArrayBool(nd_array_bool) => Ok(nd_array_bool.iter().filter(|&b| *b).count()),
3701 Index::NDSliceInfo(nd_slice_info) => Ok(RangeNdSlice::new(nd_slice_info, count)?.len()),
3702 Index::RangeAny(range_any) => range_any.len(count),
3703 }
3704 }
3705
3706 /// Returns true if the [`Index`](enum.Index.html) is empty.
3707 pub fn is_empty(&self, count: usize) -> Result<bool, Box<BedErrorPlus>> {
3708 match self {
3709 Index::All => Ok(count == 0),
3710 Index::One(_) => Ok(false),
3711 Index::Vec(vec) => Ok(vec.is_empty()),
3712 Index::NDArray(nd_array) => Ok(nd_array.is_empty()),
3713 Index::VecBool(vec_bool) => Ok(!vec_bool.iter().any(|&b| b)),
3714 Index::NDArrayBool(nd_array_bool) => Ok(!nd_array_bool.iter().any(|&b| b)),
3715 Index::NDSliceInfo(nd_slice_info) => {
3716 Ok(RangeNdSlice::new(nd_slice_info, count)?.is_empty())
3717 }
3718 Index::RangeAny(range_any) => range_any.is_empty(count),
3719 }
3720 }
3721}
3722
3723impl From<SliceInfo1> for Index {
3724 fn from(slice_info: SliceInfo1) -> Index {
3725 Index::NDSliceInfo(slice_info)
3726 }
3727}
3728impl From<&SliceInfo1> for Index {
3729 fn from(slice_info: &SliceInfo1) -> Index {
3730 Index::NDSliceInfo(slice_info.to_owned())
3731 }
3732}
3733
3734impl From<RangeFull> for Index {
3735 fn from(range_thing: RangeFull) -> Index {
3736 Index::RangeAny(RangeAny::new(&range_thing))
3737 }
3738}
3739
3740impl From<&RangeFull> for Index {
3741 fn from(range_thing: &RangeFull) -> Index {
3742 Index::RangeAny(RangeAny::new(range_thing))
3743 }
3744}
3745
3746impl From<Range<usize>> for Index {
3747 fn from(range_thing: Range<usize>) -> Index {
3748 Index::RangeAny(RangeAny::new(&range_thing))
3749 }
3750}
3751
3752impl From<&Range<usize>> for Index {
3753 fn from(range_thing: &Range<usize>) -> Index {
3754 Index::RangeAny(RangeAny::new(range_thing))
3755 }
3756}
3757
3758impl From<RangeFrom<usize>> for Index {
3759 fn from(range_thing: RangeFrom<usize>) -> Index {
3760 Index::RangeAny(RangeAny::new(&range_thing))
3761 }
3762}
3763
3764impl From<&RangeFrom<usize>> for Index {
3765 fn from(range_thing: &RangeFrom<usize>) -> Index {
3766 Index::RangeAny(RangeAny::new(range_thing))
3767 }
3768}
3769
3770impl From<RangeInclusive<usize>> for Index {
3771 fn from(range_thing: RangeInclusive<usize>) -> Index {
3772 Index::RangeAny(RangeAny::new(&range_thing))
3773 }
3774}
3775
3776impl From<&RangeInclusive<usize>> for Index {
3777 fn from(range_thing: &RangeInclusive<usize>) -> Index {
3778 Index::RangeAny(RangeAny::new(range_thing))
3779 }
3780}
3781
3782impl From<RangeTo<usize>> for Index {
3783 fn from(range_thing: RangeTo<usize>) -> Index {
3784 Index::RangeAny(RangeAny::new(&range_thing))
3785 }
3786}
3787
3788impl From<&RangeTo<usize>> for Index {
3789 fn from(range_thing: &RangeTo<usize>) -> Index {
3790 Index::RangeAny(RangeAny::new(range_thing))
3791 }
3792}
3793
3794impl From<RangeToInclusive<usize>> for Index {
3795 fn from(range_thing: RangeToInclusive<usize>) -> Index {
3796 Index::RangeAny(RangeAny::new(&range_thing))
3797 }
3798}
3799
3800impl From<&RangeToInclusive<usize>> for Index {
3801 fn from(range_thing: &RangeToInclusive<usize>) -> Index {
3802 Index::RangeAny(RangeAny::new(range_thing))
3803 }
3804}
3805
3806impl From<&[isize]> for Index {
3807 fn from(array: &[isize]) -> Index {
3808 Index::Vec(array.to_vec())
3809 }
3810}
3811
3812impl<const N: usize> From<[isize; N]> for Index {
3813 fn from(array: [isize; N]) -> Index {
3814 Index::Vec(array.to_vec())
3815 }
3816}
3817
3818impl<const N: usize> From<&[isize; N]> for Index {
3819 fn from(array: &[isize; N]) -> Index {
3820 Index::Vec(array.to_vec())
3821 }
3822}
3823
3824impl From<&nd::ArrayView1<'_, isize>> for Index {
3825 fn from(view: &nd::ArrayView1<isize>) -> Index {
3826 Index::NDArray(view.to_owned())
3827 }
3828}
3829
3830impl From<nd::ArrayView1<'_, isize>> for Index {
3831 fn from(view: nd::ArrayView1<isize>) -> Index {
3832 Index::NDArray(view.to_owned())
3833 }
3834}
3835
3836impl From<Vec<isize>> for Index {
3837 fn from(vec: Vec<isize>) -> Index {
3838 Index::Vec(vec)
3839 }
3840}
3841impl From<&Vec<isize>> for Index {
3842 fn from(vec_ref: &Vec<isize>) -> Index {
3843 Index::Vec(vec_ref.clone())
3844 }
3845}
3846
3847impl From<nd::ArrayView1<'_, bool>> for Index {
3848 fn from(view: nd::ArrayView1<bool>) -> Index {
3849 Index::NDArrayBool(view.to_owned())
3850 }
3851}
3852
3853impl From<&nd::ArrayView1<'_, bool>> for Index {
3854 fn from(view: &nd::ArrayView1<bool>) -> Index {
3855 Index::NDArrayBool(view.to_owned())
3856 }
3857}
3858
3859impl From<&Vec<bool>> for Index {
3860 fn from(vec_ref: &Vec<bool>) -> Index {
3861 Index::VecBool(vec_ref.clone())
3862 }
3863}
3864
3865impl From<&[bool]> for Index {
3866 fn from(array: &[bool]) -> Index {
3867 Index::VecBool(array.to_vec())
3868 }
3869}
3870
3871impl<const N: usize> From<[bool; N]> for Index {
3872 fn from(array: [bool; N]) -> Index {
3873 Index::VecBool(array.to_vec())
3874 }
3875}
3876
3877impl<const N: usize> From<&[bool; N]> for Index {
3878 fn from(array: &[bool; N]) -> Index {
3879 Index::VecBool(array.to_vec())
3880 }
3881}
3882
3883impl From<isize> for Index {
3884 fn from(one: isize) -> Index {
3885 Index::One(one)
3886 }
3887}
3888impl From<&isize> for Index {
3889 fn from(one: &isize) -> Index {
3890 Index::One(one.to_owned())
3891 }
3892}
3893
3894impl From<nd::Array1<isize>> for Index {
3895 fn from(nd_array: nd::Array1<isize>) -> Index {
3896 Index::NDArray(nd_array)
3897 }
3898}
3899
3900impl From<&nd::Array1<isize>> for Index {
3901 fn from(nd_array: &nd::Array1<isize>) -> Index {
3902 Index::NDArray(nd_array.to_owned())
3903 }
3904}
3905
3906impl From<nd::Array1<bool>> for Index {
3907 fn from(nd_array_bool: nd::Array1<bool>) -> Index {
3908 Index::NDArrayBool(nd_array_bool)
3909 }
3910}
3911
3912impl From<&nd::Array1<bool>> for Index {
3913 fn from(nd_array_bool: &nd::Array1<bool>) -> Index {
3914 Index::NDArrayBool(nd_array_bool.clone())
3915 }
3916}
3917
3918impl From<Vec<bool>> for Index {
3919 fn from(vec_bool: Vec<bool>) -> Index {
3920 Index::VecBool(vec_bool)
3921 }
3922}
3923
3924impl From<()> for Index {
3925 fn from((): ()) -> Index {
3926 Index::All
3927 }
3928}
3929
3930// See https://nullderef.com/blog/rust-parameters/
3931
3932/// Represents options for reading genotype data from a PLINK .bed file.
3933///
3934/// Construct with [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
3935///
3936/// See the [Table of `ReadOptions`](index.html#readoptions)
3937/// for a list of the supported options.
3938/// See the [Table of Index Expressions](index.html#index-expressions)
3939/// for a list of expressions for selecting individuals (sample)
3940/// and SNPs (variants).
3941#[derive(Debug, Clone, Builder)]
3942#[builder(build_fn(error = "Box<BedErrorPlus>"))]
3943pub struct ReadOptions<TVal: BedVal> {
3944 /// Value to use for missing values (defaults to -127 or NaN)
3945 ///
3946 /// -127 is the default for i8 and NaN is the default for f32 and f64.
3947 ///
3948 /// In this example, the missing value is set to -1:
3949 /// ```
3950 /// use ndarray as nd;
3951 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
3952 /// use bed_reader::assert_eq_nan;
3953 ///
3954 /// let file_name = sample_bed_file("small.bed")?;
3955 /// let mut bed = Bed::new(file_name)?;
3956 /// let val = ReadOptions::builder().missing_value(-1).i8().read(&mut bed)?;
3957 ///
3958 /// assert_eq_nan(
3959 /// &val,
3960 /// &nd::array![
3961 /// [1, 0, -1, 0],
3962 /// [2, 0, -1, 2],
3963 /// [0, 1, 2, 0]
3964 /// ],
3965 /// );
3966 /// # use bed_reader::BedErrorPlus;
3967 /// # Ok::<(), Box<BedErrorPlus>>(())
3968 /// ```
3969 #[builder(default = "TVal::missing()")]
3970 missing_value: TVal,
3971
3972 /// Select which individual (sample) values to read -- Defaults to all.
3973 ///
3974 /// Can select with a signed number, various lists of signed numbers,
3975 /// ranges, and various lists of booleans.
3976 ///
3977 /// See the [Table of Index Expressions](index.html#index-expressions)
3978 /// for a list of the supported index expressions.
3979 ///
3980 /// # Examples:
3981 /// ```
3982 /// use ndarray as nd;
3983 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
3984 /// use ndarray::s;
3985 ///
3986 /// let file_name = sample_bed_file("some_missing.bed")?;
3987 /// let mut bed = Bed::new(file_name)?;
3988 ///
3989 /// // Read the individual at index position 3
3990 ///
3991 /// let val = ReadOptions::builder()
3992 /// .iid_index(3)
3993 /// .f64()
3994 /// .read(&mut bed)?;
3995 /// assert!(val.dim() == (1, 100));
3996 ///
3997 /// // Read the individuals at index positions 0, 5, and 1st-from-last.
3998 ///
3999 /// let val = ReadOptions::builder()
4000 /// .iid_index([0, 5, -1])
4001 /// .f64()
4002 /// .read(&mut bed)?;
4003 ///
4004 /// assert!(val.dim() == (3, 100));
4005 ///
4006 /// // Read the individuals at index positions 20 (inclusive) to 30 (exclusive).
4007 ///
4008 /// let val = ReadOptions::builder()
4009 /// .iid_index(20..30)
4010 /// .f64()
4011 /// .read(&mut bed)?;
4012 ///
4013 /// assert!(val.dim() == (10, 100));
4014 ///
4015 /// // Read the individuals at every 2nd index position.
4016 ///
4017 /// let val = ReadOptions::builder()
4018 /// .iid_index(s![..;2])
4019 /// .f64()
4020 /// .read(&mut bed)?;
4021 ///
4022 /// assert!(val.dim() == (50, 100));
4023 ///
4024 /// // Read chromosome 5 of the female individuals.
4025 ///
4026 /// let female = bed.sex()?.map(|elem| *elem == 2);
4027 /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4028 /// let val = ReadOptions::builder()
4029 /// .iid_index(female)
4030 /// .sid_index(chrom_5)
4031 /// .f64()
4032 /// .read(&mut bed)?;
4033 ///
4034 /// assert!(val.dim() == (50, 6));
4035 /// # use bed_reader::BedErrorPlus;
4036 /// # Ok::<(), Box<BedErrorPlus>>(())
4037 /// ```
4038 #[builder(default = "Index::All")]
4039 #[builder(setter(into))]
4040 iid_index: Index,
4041
4042 /// Select which SNPs (variant) values to read -- Defaults to all.
4043 ///
4044 /// Can select with a signed number, various lists of signed numbers,
4045 /// ranges, and various lists of booleans.
4046 ///
4047 /// See the [Table of Index Expressions](index.html#index-expressions)
4048 /// for a list of the supported index expressions.
4049 ///
4050 /// # Examples:
4051 /// ```
4052 /// use ndarray as nd;
4053 /// use ndarray::s;
4054 /// use bed_reader::{Bed, ReadOptions, assert_eq_nan, sample_bed_file};
4055 ///
4056 /// let file_name = sample_bed_file("some_missing.bed")?;
4057 /// let mut bed = Bed::new(file_name)?;
4058 ///
4059 /// // Read the SNP at index position 3
4060 ///
4061 /// let val = ReadOptions::builder()
4062 /// .sid_index(3)
4063 /// .f64()
4064 /// .read(&mut bed)?;
4065 /// assert!(val.dim() == (100, 1));
4066 ///
4067 /// // Read the SNPs at index positions 0, 5, and 1st-from-last.
4068 ///
4069 /// let val = ReadOptions::builder()
4070 /// .sid_index([0, 5, -1])
4071 /// .f64()
4072 /// .read(&mut bed)?;
4073 ///
4074 /// assert!(val.dim() == (100, 3));
4075 ///
4076 /// // Read the SNPs at index positions 20 (inclusive) to 30 (exclusive).
4077 ///
4078 /// let val = ReadOptions::builder()
4079 /// .sid_index(20..30)
4080 /// .f64()
4081 /// .read(&mut bed)?;
4082 ///
4083 /// assert!(val.dim() == (100, 10));
4084 ///
4085 /// // Read the SNPs at every 2nd index position.
4086 ///
4087 /// let val = ReadOptions::builder()
4088 /// .sid_index(s![..;2])
4089 /// .f64()
4090 /// .read(&mut bed)?;
4091 ///
4092 /// assert!(val.dim() == (100, 50));
4093 ///
4094 /// // Read chromosome 5 of the female individuals.
4095 ///
4096 /// let female = bed.sex()?.map(|elem| *elem == 2);
4097 /// let chrom_5 = bed.chromosome()?.map(|elem| elem == "5");
4098 /// let val = ReadOptions::builder()
4099 /// .iid_index(female)
4100 /// .sid_index(chrom_5)
4101 /// .f64()
4102 /// .read(&mut bed)?;
4103 ///
4104 /// assert!(val.dim() == (50, 6));
4105 /// # use bed_reader::BedErrorPlus;
4106 /// # Ok::<(), Box<BedErrorPlus>>(())
4107 /// ```
4108 #[builder(default = "Index::All")]
4109 #[builder(setter(into))]
4110 sid_index: Index,
4111
4112 /// Sets if the order of the output array is Fortran-style -- Default is true.
4113 ///
4114 /// "Fortran order" is also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4115 ///
4116 /// Also see [`f`](struct.ReadOptionsBuilder.html#method.f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4117 #[builder(default = "true")]
4118 is_f: bool,
4119
4120 /// Sets if allele 1 is counted. Default is true.
4121 ///
4122 /// Also see [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4123 #[builder(default = "true")]
4124 is_a1_counted: bool,
4125
4126 /// Number of threads to use (defaults to all processors)
4127 ///
4128 /// Can also be set with an environment variable.
4129 /// See [Environment Variables](index.html#environment-variables).
4130 ///
4131 /// In this example, we read using only one thread.
4132 /// ```
4133 /// use ndarray as nd;
4134 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4135 /// use bed_reader::assert_eq_nan;
4136 ///
4137 /// let file_name = sample_bed_file("small.bed")?;
4138 /// let mut bed = Bed::new(file_name)?;
4139 /// let val = ReadOptions::builder().num_threads(1).i8().read(&mut bed)?;
4140 ///
4141 /// assert_eq_nan(
4142 /// &val,
4143 /// &nd::array![
4144 /// [1, 0, -127, 0],
4145 /// [2, 0, -127, 2],
4146 /// [0, 1, 2, 0]
4147 /// ],
4148 /// );
4149 /// # use bed_reader::BedErrorPlus;
4150 /// # Ok::<(), Box<BedErrorPlus>>(())
4151 /// ```
4152 #[builder(default, setter(strip_option))]
4153 num_threads: Option<usize>,
4154
4155 // LATER: Allow this to be set with an environment variable.
4156 /// Maximum number of concurrent async requests (defaults to 10) --
4157 /// Used by [`BedCloud`](struct.BedCloud.html).
4158 ///
4159 /// In this example, we read using only request at a time.
4160 /// ```
4161 /// use ndarray as nd;
4162 /// use bed_reader::{BedCloud, ReadOptions};
4163 /// use bed_reader::assert_eq_nan;
4164 ///
4165 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4166 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4167 /// let mut bed_cloud = BedCloud::new(&url).await?;
4168 /// let val = ReadOptions::builder().max_concurrent_requests(1).i8().read_cloud(&mut bed_cloud).await?;
4169 ///
4170 /// assert_eq_nan(
4171 /// &val,
4172 /// &nd::array![
4173 /// [1, 0, -127, 0],
4174 /// [2, 0, -127, 2],
4175 /// [0, 1, 2, 0]
4176 /// ],
4177 /// );
4178 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4179 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4180 #[builder(default, setter(strip_option))]
4181 #[allow(dead_code)]
4182 max_concurrent_requests: Option<usize>,
4183
4184 // LATER: Allow this to be set with an environment variable.
4185 /// Maximum chunk size of async requests (defaults to `8_000_000` bytes) --
4186 /// Used by [`BedCloud`](struct.BedCloud.html).
4187 ///
4188 /// In this example, we read using only `1_000_000` bytes per request.
4189 /// ```
4190 /// use ndarray as nd;
4191 /// use bed_reader::{BedCloud, ReadOptions};
4192 /// use bed_reader::assert_eq_nan;
4193 ///
4194 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4195 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4196 /// let mut bed_cloud = BedCloud::new(&url).await?;
4197 /// let val = ReadOptions::builder().max_chunk_bytes(1_000_000).i8().read_cloud(&mut bed_cloud).await?;
4198 ///
4199 /// assert_eq_nan(
4200 /// &val,
4201 /// &nd::array![
4202 /// [1, 0, -127, 0],
4203 /// [2, 0, -127, 2],
4204 /// [0, 1, 2, 0]
4205 /// ],
4206 /// );
4207 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4208 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4209 /// ```
4210 #[builder(default, setter(strip_option))]
4211 #[allow(dead_code)]
4212 max_chunk_bytes: Option<usize>,
4213}
4214
4215impl<TVal: BedVal> ReadOptions<TVal> {
4216 /// Read genotype data. Supports selection and options.
4217 ///
4218 /// > Also see [`Bed::read`](struct.Bed.html#method.read) (read without options).
4219 /// > To fill a preallocated ndarray, see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
4220 ///
4221 /// See the [Table of `ReadOptions`](index.html#readoptions)
4222 /// for a list of the supported options.
4223 /// See the [Table of Index Expressions](index.html#index-expressions)
4224 /// for a list of expressions for selecting individuals (sample)
4225 /// and SNPs (variants).
4226 ///
4227 /// # Errors
4228 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4229 /// for all possible errors.
4230 ///
4231 /// # Examples
4232 ///
4233 /// ```
4234 /// use ndarray as nd;
4235 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4236 /// use bed_reader::assert_eq_nan;
4237 ///
4238 /// // Read all data from a .bed file into an ndarray of f64.
4239 /// let file_name = sample_bed_file("small.bed")?;
4240 /// let mut bed = Bed::new(file_name)?;
4241 /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4242 ///
4243 /// assert_eq_nan(
4244 /// &val,
4245 /// &nd::array![
4246 /// [1.0, 0.0, f64::NAN, 0.0],
4247 /// [2.0, 0.0, f64::NAN, 2.0],
4248 /// [0.0, 1.0, 2.0, 0.0]
4249 /// ],
4250 /// );
4251 ///
4252 /// // Read the SNPs indexed by 2.
4253 /// let val = ReadOptions::builder().sid_index(2).f64().read(&mut bed)?;
4254 ///
4255 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4256 ///
4257 /// // Read the SNPs indexed by 2, 3, and 4th from last.
4258 /// let val = ReadOptions::builder()
4259 /// .sid_index([2, 3, -4])
4260 /// .f64()
4261 /// .read(&mut bed)?;
4262 ///
4263 /// assert_eq_nan(
4264 /// &val,
4265 /// &nd::array![[f64::NAN, 0.0, 1.0], [f64::NAN, 2.0, 2.0], [2.0, 0.0, 0.0]],
4266 /// );
4267 ///
4268 /// // Read SNPs from 1 (inclusive) to 4 (exclusive).
4269 /// let val = ReadOptions::builder()
4270 /// .sid_index(1..4)
4271 /// .f64()
4272 /// .read(&mut bed)?;
4273 ///
4274 /// assert_eq_nan(
4275 /// &val,
4276 /// &nd::array![[0.0, f64::NAN, 0.0], [0.0, f64::NAN, 2.0], [1.0, 2.0, 0.0]],
4277 /// );
4278 ///
4279 /// // Print unique chrom values. Then, read all SNPs in chrom 5.
4280 /// use std::collections::HashSet;
4281 ///
4282 /// println!("{:?}", bed.chromosome()?.iter().collect::<HashSet<_>>());
4283 /// // This outputs: {"1", "5", "Y"}.
4284 /// let val = ReadOptions::builder()
4285 /// .sid_index(bed.chromosome()?.map(|elem| elem == "5"))
4286 /// .f64()
4287 /// .read(&mut bed)?;
4288 ///
4289 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4290 ///
4291 /// // Read 1st individual (across all SNPs).
4292 /// let val = ReadOptions::builder().iid_index(0).f64().read(&mut bed)?;
4293 /// assert_eq_nan(&val, &nd::array![[1.0, 0.0, f64::NAN, 0.0]]);
4294 ///
4295 /// // Read every 2nd individual.
4296 /// use ndarray::s;
4297 ///
4298 /// let val = ReadOptions::builder()
4299 /// .iid_index(s![..;2])
4300 /// .f64()
4301 /// .read(&mut bed)?;
4302 /// assert_eq_nan(
4303 /// &val,
4304 /// &nd::array![[1.0, 0.0, f64::NAN, 0.0], [0.0, 1.0, 2.0, 0.0]],
4305 /// );
4306 ///
4307 /// // Read last and 2nd-to-last individuals and the last SNP
4308 /// let val = ReadOptions::builder()
4309 /// .iid_index([-1,-2])
4310 /// .sid_index(-1)
4311 /// .f64()
4312 /// .read(&mut bed)?;
4313 ///
4314 /// assert_eq_nan(&val, &nd::array![[0.0],[2.0]]);
4315 ///
4316 /// // The output array can be f32, f64, or i8
4317 /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4318 ///
4319 /// assert_eq_nan(
4320 /// &val,
4321 /// &nd::array![
4322 /// [1, 0, -127, 0],
4323 /// [2, 0, -127, 2],
4324 /// [0, 1, 2, 0]
4325 /// ],
4326 /// );
4327 /// # use bed_reader::BedErrorPlus;
4328 /// # Ok::<(), Box<BedErrorPlus>>(())
4329 /// ```
4330 #[must_use]
4331 pub fn builder() -> ReadOptionsBuilder<TVal> {
4332 ReadOptionsBuilder::default()
4333 }
4334
4335 /// Value to be used for missing values (defaults to -127 or NaN).
4336 ///
4337 /// # Example
4338 /// ```
4339 /// use ndarray as nd;
4340 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4341 /// use bed_reader::assert_eq_nan;
4342 ///
4343 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4344 /// assert_eq!(read_options.missing_value(), -127);
4345 ///
4346 /// let file_name = sample_bed_file("small.bed")?;
4347 /// let mut bed = Bed::new(file_name)?;
4348 /// let val = bed.read_with_options(&read_options)?;
4349 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4350 /// # use bed_reader::BedErrorPlus;
4351 /// # Ok::<(), Box<BedErrorPlus>>(())
4352 /// ```
4353 pub fn missing_value(&self) -> TVal {
4354 self.missing_value
4355 }
4356
4357 /// Index of individuals (samples) to read (defaults to all).
4358 ///
4359 /// # Example
4360 /// ```
4361 /// use ndarray as nd;
4362 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4363 /// use bed_reader::assert_eq_nan;
4364 ///
4365 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4366 /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4367 /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4368 ///
4369 /// let file_name = sample_bed_file("small.bed")?;
4370 /// let mut bed = Bed::new(file_name)?;
4371 /// let val = bed.read_with_options(&read_options)?;
4372 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4373 /// # use bed_reader::BedErrorPlus;
4374 /// # Ok::<(), Box<BedErrorPlus>>(())
4375 /// ```
4376 pub fn iid_index(&self) -> &Index {
4377 &self.iid_index
4378 }
4379
4380 /// Index of SNPs (variants) to read (defaults to all).
4381 ///
4382 /// # Example
4383 /// ```
4384 /// use ndarray as nd;
4385 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4386 /// use bed_reader::assert_eq_nan;
4387 ///
4388 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4389 /// println!("{0:?}", read_options.iid_index()); // Outputs 'All'
4390 /// println!("{0:?}", read_options.sid_index()); // Outputs 'Vec([2, 3, 0])'
4391 ///
4392 /// let file_name = sample_bed_file("small.bed")?;
4393 /// let mut bed = Bed::new(file_name)?;
4394 /// let val = bed.read_with_options(&read_options)?;
4395 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4396 /// # use bed_reader::BedErrorPlus;
4397 /// # Ok::<(), Box<BedErrorPlus>>(())
4398 /// ```
4399 pub fn sid_index(&self) -> &Index {
4400 &self.sid_index
4401 }
4402
4403 /// Is the order of the output array Fortran-style (defaults to true).
4404 ///
4405 /// # Example
4406 /// ```
4407 /// use ndarray as nd;
4408 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4409 /// use bed_reader::assert_eq_nan;
4410 ///
4411 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4412 /// assert_eq!(read_options.is_f(), true);
4413 ///
4414 /// let file_name = sample_bed_file("small.bed")?;
4415 /// let mut bed = Bed::new(file_name)?;
4416 /// let val = bed.read_with_options(&read_options)?;
4417 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4418 /// # use bed_reader::BedErrorPlus;
4419 /// # Ok::<(), Box<BedErrorPlus>>(())
4420 /// ```
4421 pub fn is_f(&self) -> bool {
4422 self.is_f
4423 }
4424
4425 /// If allele 1 will be counted (defaults to true).
4426 ///
4427 /// # Example
4428 /// ```
4429 /// use ndarray as nd;
4430 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4431 /// use bed_reader::assert_eq_nan;
4432 ///
4433 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4434 /// assert_eq!(read_options.is_a1_counted(), true);
4435 ///
4436 /// let file_name = sample_bed_file("small.bed")?;
4437 /// let mut bed = Bed::new(file_name)?;
4438 /// let val = bed.read_with_options(&read_options)?;
4439 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4440 /// # use bed_reader::BedErrorPlus;
4441 /// # Ok::<(), Box<BedErrorPlus>>(())
4442 /// ```
4443 pub fn is_a1_counted(&self) -> bool {
4444 self.is_a1_counted
4445 }
4446
4447 /// Number of threads to be used (`None` means set with
4448 /// [Environment Variables](index.html#environment-variables) or use all processors).
4449 ///
4450 /// # Example
4451 /// ```
4452 /// use ndarray as nd;
4453 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4454 /// use bed_reader::assert_eq_nan;
4455 ///
4456 /// let read_options = ReadOptions::builder().sid_index([2, 3, 0]).i8().build()?;
4457 /// assert_eq!(read_options.num_threads(), None);
4458 ///
4459 /// let file_name = sample_bed_file("small.bed")?;
4460 /// let mut bed = Bed::new(file_name)?;
4461 /// let val = bed.read_with_options(&read_options)?;
4462 /// assert_eq_nan(&val, &nd::array![[-127, 0, 1], [-127, 2, 2], [2, 0, 0]]);
4463 /// # use bed_reader::BedErrorPlus;
4464 /// # Ok::<(), Box<BedErrorPlus>>(())
4465 /// ```
4466 pub fn num_threads(&self) -> Option<usize> {
4467 self.num_threads
4468 }
4469}
4470
4471impl<TVal: BedVal> ReadOptionsBuilder<TVal> {
4472 /// > See [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) for details and examples.
4473 pub fn read(&self, bed: &mut Bed) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4474 let read_options = self.build()?;
4475 bed.read_with_options(&read_options)
4476 }
4477
4478 /// Read genotype data from the cloud.
4479 ///
4480 /// > Also see
4481 /// > [`BedCloud::read_with_options`](struct.BedCloud.html#method.read_with_options).
4482 ///
4483 /// # Errors
4484 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4485 /// for all possible errors.
4486 ///
4487 /// # Example
4488 ///
4489 /// ```
4490 /// use ndarray as nd;
4491 /// use bed_reader::{BedCloud, ReadOptions};
4492 /// use bed_reader::assert_eq_nan;
4493 ///
4494 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4495 /// // Read the SNPs indexed by 2.
4496 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4497 /// let mut bed_cloud = BedCloud::new(&url).await?;
4498 /// let mut val = ReadOptions::builder()
4499 /// .sid_index(2)
4500 /// .read_cloud(&mut bed_cloud).await?;
4501 ///
4502 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4503 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4504 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4505 /// ```
4506 pub async fn read_cloud(
4507 &self,
4508 bed_cloud: &mut BedCloud,
4509 ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
4510 let read_options = self.build()?;
4511 bed_cloud.read_with_options(&read_options).await
4512 }
4513
4514 /// Read genotype data into a preallocated array.
4515 ///
4516 /// > Also see [`Bed::read_and_fill`](struct.Bed.html#method.read_and_fill) and
4517 /// > [`Bed::read_and_fill_with_options`](struct.Bed.html#method.read_and_fill_with_options).
4518 ///
4519 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4520 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4521 /// are ignored. Instead, the order of the preallocated array is used.
4522 ///
4523 /// # Errors
4524 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4525 /// for all possible errors.
4526 ///
4527 /// # Example
4528 ///
4529 /// ```
4530 /// use ndarray as nd;
4531 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4532 /// use bed_reader::assert_eq_nan;
4533 ///
4534 /// // Read the SNPs indexed by 2.
4535 /// let file_name = sample_bed_file("small.bed")?;
4536 /// let mut bed = Bed::new(file_name)?;
4537 /// let mut val = nd::Array2::<f64>::default((3, 1));
4538 /// ReadOptions::builder()
4539 /// .sid_index(2)
4540 /// .read_and_fill(&mut bed, &mut val.view_mut())?;
4541 ///
4542 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4543 /// # use bed_reader::BedErrorPlus;
4544 /// # Ok::<(), Box<BedErrorPlus>>(())
4545 /// ```
4546 pub fn read_and_fill(
4547 &self,
4548 bed: &mut Bed,
4549 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4550 ) -> Result<(), Box<BedErrorPlus>> {
4551 let read_options = self.build()?;
4552 bed.read_and_fill_with_options(val, &read_options)
4553 }
4554
4555 /// Read genotype data from the cloud into a preallocated array.
4556 ///
4557 /// > Also see [`BedCloud::read_and_fill`](struct.BedCloud.html#method.read_and_fill) and
4558 /// > [`BedCloud::read_and_fill_with_options`](struct.BedCloud.html#method.read_and_fill_with_options).
4559 ///
4560 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
4561 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
4562 /// are ignored. Instead, the order of the preallocated array is used.
4563 ///
4564 /// # Errors
4565 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
4566 /// for all possible errors.
4567 ///
4568 /// # Example
4569 ///
4570 /// ```
4571 /// use ndarray as nd;
4572 /// use bed_reader::{BedCloud, ReadOptions};
4573 /// use bed_reader::assert_eq_nan;
4574 ///
4575 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
4576 /// // Read the SNPs indexed by 2.
4577 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
4578 /// let mut bed_cloud = BedCloud::new(&url).await?;
4579 /// let mut val = nd::Array2::<f64>::default((3, 1));
4580 /// ReadOptions::builder()
4581 /// .sid_index(2)
4582 /// .read_and_fill_cloud(&mut bed_cloud, &mut val.view_mut()).await?;
4583 ///
4584 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
4585 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
4586 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
4587 /// ```
4588 pub async fn read_and_fill_cloud(
4589 &self,
4590 bed_cloud: &mut BedCloud,
4591 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
4592 ) -> Result<(), Box<BedErrorPlus>> {
4593 let read_options = self.build()?;
4594 bed_cloud
4595 .read_and_fill_with_options(val, &read_options)
4596 .await
4597 }
4598
4599 /// Order of the output array, Fortran-style (default)
4600 ///
4601 /// Also called "column-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4602 ///
4603 /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`c`](struct.ReadOptionsBuilder.html#method.c).
4604 pub fn f(&mut self) -> &mut Self {
4605 self.is_f(true);
4606 self
4607 }
4608
4609 /// Order of the output array, C (default)
4610 ///
4611 /// Also called "row-major order" [Wikipedia](https://en.wikipedia.org/wiki/Row-_and_column-major_order).
4612 ///
4613 /// Also see [`is_f`](struct.ReadOptionsBuilder.html#method.is_f) and [`f`](struct.ReadOptionsBuilder.html#method.f).
4614 pub fn c(&mut self) -> &mut Self {
4615 self.is_f(false);
4616 self
4617 }
4618
4619 /// Count the number allele 1 (default and PLINK standard).
4620 ///
4621 /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.ReadOptionsBuilder.html#method.count_a2).
4622 ///
4623 /// # Example:
4624 /// ```
4625 /// use ndarray as nd;
4626 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4627 /// use bed_reader::assert_eq_nan;
4628 ///
4629 /// let file_name = sample_bed_file("small.bed")?;
4630 /// let mut bed = Bed::new(file_name)?;
4631 /// let val = ReadOptions::builder().count_a1().i8().read(&mut bed)?;
4632 ///
4633 /// assert_eq_nan(
4634 /// &val,
4635 /// &nd::array![
4636 /// [1, 0, -127, 0],
4637 /// [2, 0, -127, 2],
4638 /// [0, 1, 2, 0]
4639 /// ],
4640 /// );
4641 /// # use bed_reader::BedErrorPlus;
4642 /// # Ok::<(), Box<BedErrorPlus>>(())
4643 /// ```
4644 pub fn count_a1(&mut self) -> &mut Self {
4645 self.is_a1_counted = Some(true);
4646 self
4647 }
4648
4649 /// Count the number allele 2.
4650 ///
4651 /// Also see [`is_a1_counted`](struct.ReadOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.ReadOptionsBuilder.html#method.count_a1).
4652 ///
4653 /// # Example:
4654 /// ```
4655 /// use ndarray as nd;
4656 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4657 /// use bed_reader::assert_eq_nan;
4658 ///
4659 /// let file_name = sample_bed_file("small.bed")?;
4660 /// let mut bed = Bed::new(file_name)?;
4661 /// let val = ReadOptions::builder().count_a2().i8().read(&mut bed)?;
4662 ///
4663 /// assert_eq_nan(
4664 /// &val,
4665 /// &nd::array![
4666 /// [1, 2, -127, 2],
4667 /// [0, 2, -127, 0],
4668 /// [2, 1, 0, 2]
4669 /// ],
4670 /// );
4671 /// # use bed_reader::BedErrorPlus;
4672 /// # Ok::<(), Box<BedErrorPlus>>(())
4673 /// ```
4674 pub fn count_a2(&mut self) -> &mut Self {
4675 self.is_a1_counted = Some(false);
4676 self
4677 }
4678}
4679
4680impl ReadOptionsBuilder<i8> {
4681 /// Output an ndarray of i8.
4682 ///
4683 /// # Example:
4684 /// ```
4685 /// use ndarray as nd;
4686 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4687 /// use bed_reader::assert_eq_nan;
4688 ///
4689 /// let file_name = sample_bed_file("small.bed")?;
4690 /// let mut bed = Bed::new(file_name)?;
4691 /// let val = ReadOptions::builder().i8().read(&mut bed)?;
4692 ///
4693 /// assert_eq_nan(
4694 /// &val,
4695 /// &nd::array![
4696 /// [1, 0, -127, 0],
4697 /// [2, 0, -127, 2],
4698 /// [0, 1, 2, 0]
4699 /// ],
4700 /// );
4701 /// # use bed_reader::BedErrorPlus;
4702 /// # Ok::<(), Box<BedErrorPlus>>(())
4703 /// ```
4704 pub fn i8(&mut self) -> &mut Self {
4705 self
4706 }
4707}
4708
4709impl ReadOptionsBuilder<f32> {
4710 /// Output an ndarray of f32.
4711 ///
4712 /// # Example:
4713 /// ```
4714 /// use ndarray as nd;
4715 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4716 /// use bed_reader::assert_eq_nan;
4717 ///
4718 /// let file_name = sample_bed_file("small.bed")?;
4719 /// let mut bed = Bed::new(file_name)?;
4720 /// let val = ReadOptions::builder().f32().read(&mut bed)?;
4721 ///
4722 /// assert_eq_nan(
4723 /// &val,
4724 /// &nd::array![
4725 /// [1.0, 0.0, f32::NAN, 0.0],
4726 /// [2.0, 0.0, f32::NAN, 2.0],
4727 /// [0.0, 1.0, 2.0, 0.0]
4728 /// ],
4729 /// );
4730 /// # use bed_reader::BedErrorPlus;
4731 /// # Ok::<(), Box<BedErrorPlus>>(())
4732 /// ```
4733 pub fn f32(&mut self) -> &mut Self {
4734 self
4735 }
4736}
4737
4738impl ReadOptionsBuilder<f64> {
4739 /// Output an ndarray of f64.
4740 ///
4741 /// # Example:
4742 /// ```
4743 /// use ndarray as nd;
4744 /// use bed_reader::{Bed, ReadOptions, sample_bed_file};
4745 /// use bed_reader::assert_eq_nan;
4746 ///
4747 /// let file_name = sample_bed_file("small.bed")?;
4748 /// let mut bed = Bed::new(file_name)?;
4749 /// let val = ReadOptions::builder().f64().read(&mut bed)?;
4750 ///
4751 /// assert_eq_nan(
4752 /// &val,
4753 /// &nd::array![
4754 /// [1.0, 0.0, f64::NAN, 0.0],
4755 /// [2.0, 0.0, f64::NAN, 2.0],
4756 /// [0.0, 1.0, 2.0, 0.0]
4757 /// ],
4758 /// );
4759 /// # use bed_reader::BedErrorPlus;
4760 /// # Ok::<(), Box<BedErrorPlus>>(())
4761 /// ```
4762 pub fn f64(&mut self) -> &mut Self {
4763 self
4764 }
4765}
4766
4767/// Represents options for writing genotype data and metadata to a PLINK .bed file.
4768///
4769/// Construct with [`WriteOptions::builder`](struct.WriteOptions.html#method.builder).
4770#[derive(Clone, Debug, Builder)]
4771#[builder(build_fn(skip))]
4772pub struct WriteOptions<TVal>
4773where
4774 TVal: BedVal,
4775{
4776 #[builder(setter(custom))]
4777 path: PathBuf,
4778
4779 #[builder(setter(custom))]
4780 fam_path: PathBuf,
4781
4782 #[builder(setter(custom))]
4783 bim_path: PathBuf,
4784
4785 #[builder(setter(custom))]
4786 metadata: Metadata,
4787
4788 #[builder(setter(custom), default = "true")]
4789 is_a1_counted: bool,
4790
4791 #[builder(default, setter(custom))]
4792 num_threads: Option<usize>,
4793
4794 #[builder(default = "TVal::missing()", setter(custom))]
4795 missing_value: TVal,
4796
4797 #[builder(setter(custom), default = "false")]
4798 skip_fam: bool,
4799
4800 #[builder(setter(custom), default = "false")]
4801 skip_bim: bool,
4802}
4803
4804impl<TVal> WriteOptions<TVal>
4805where
4806 TVal: BedVal,
4807{
4808 /// Write values to a file in PLINK .bed format. Supports metadata and options.
4809 ///
4810 /// > Also see [`Bed::write`](struct.Bed.html#method.write), which does not support metadata or options.
4811 ///
4812 /// The options, [listed here](struct.WriteOptionsBuilder.html#implementations), can specify the:
4813 /// * items of metadata, for example the individual ids or the SNP ids
4814 /// * a non-default path for the .fam and/or .bim files
4815 /// * a non-default value that represents missing data
4816 /// * whether the first allele is counted (default) or the second
4817 /// * number of threads to use for writing
4818 /// * a [`Metadata`](struct.Metadata.html)
4819 ///
4820 /// # Examples
4821 /// In this example, all metadata is given one item at a time.
4822 /// ```
4823 /// use ndarray as nd;
4824 /// use bed_reader::{Bed, WriteOptions};
4825 ///
4826 /// let output_folder = temp_testdir::TempDir::default();
4827 /// let output_file = output_folder.join("small.bed");
4828 /// let val = nd::array![
4829 /// [1.0, 0.0, f64::NAN, 0.0],
4830 /// [2.0, 0.0, f64::NAN, 2.0],
4831 /// [0.0, 1.0, 2.0, 0.0]
4832 /// ];
4833 /// WriteOptions::builder(output_file)
4834 /// .fid(["fid1", "fid1", "fid2"])
4835 /// .iid(["iid1", "iid2", "iid3"])
4836 /// .father(["iid23", "iid23", "iid22"])
4837 /// .mother(["iid34", "iid34", "iid33"])
4838 /// .sex([1, 2, 0])
4839 /// .pheno(["red", "red", "blue"])
4840 /// .chromosome(["1", "1", "5", "Y"])
4841 /// .sid(["sid1", "sid2", "sid3", "sid4"])
4842 /// .cm_position([100.4, 2000.5, 4000.7, 7000.9])
4843 /// .bp_position([1, 100, 1000, 1004])
4844 /// .allele_1(["A", "T", "A", "T"])
4845 /// .allele_2(["A", "C", "C", "G"])
4846 /// .write(&val)?;
4847 /// # use bed_reader::BedErrorPlus;
4848 /// # Ok::<(), Box<BedErrorPlus>>(())
4849 /// ```
4850 /// Here, no metadata is given, so default values are assigned.
4851 /// If we then read the new file and list the chromosome property,
4852 /// it is an array of zeros, the default chromosome value.
4853 /// ```
4854 /// # use ndarray as nd;
4855 /// # use bed_reader::{Bed, WriteOptions};
4856 /// # let output_folder = temp_testdir::TempDir::default();
4857 /// let output_file2 = output_folder.join("small2.bed");
4858 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
4859 ///
4860 /// WriteOptions::builder(&output_file2).write(&val)?;
4861 ///
4862 /// let mut bed2 = Bed::new(&output_file2)?;
4863 /// println!("{:?}", bed2.chromosome()?); // Outputs ndarray ["0", "0", "0", "0"]
4864 /// # use bed_reader::BedErrorPlus;
4865 /// # Ok::<(), Box<BedErrorPlus>>(())
4866 /// ```
4867 #[anyinput]
4868 pub fn builder(path: AnyPath) -> WriteOptionsBuilder<TVal> {
4869 WriteOptionsBuilder::new(path)
4870 }
4871
4872 /// Family id of each of individual (sample). Defaults to "0"'s
4873 ///
4874 /// # Example
4875 /// ```
4876 /// use ndarray as nd;
4877 /// use bed_reader::{WriteOptions};
4878 /// let output_folder = temp_testdir::TempDir::default();
4879 /// let output_file = output_folder.join("small.bed");
4880 /// let write_options = WriteOptions::builder(output_file)
4881 /// .f64()
4882 /// .iid(["i1", "i2", "i3"])
4883 /// .sid(["s1", "s2", "s3", "s4"])
4884 /// .build(3, 4)?;
4885 ///
4886 /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
4887 /// # use bed_reader::BedErrorPlus;
4888 /// # Ok::<(), Box<BedErrorPlus>>(())
4889 /// ```
4890 pub fn fid(&self) -> &nd::Array1<String> {
4891 // unwrap always works because the WriteOptions constructor fills all metadata.
4892 self.metadata.fid.as_ref().unwrap()
4893 }
4894
4895 /// Individual id of each of individual (sample). Defaults to "iid1", "iid2" ...
4896 ///
4897 /// # Example
4898 /// ```
4899 /// use ndarray as nd;
4900 /// use bed_reader::{Bed, WriteOptions};
4901 /// let output_folder = temp_testdir::TempDir::default();
4902 /// let output_file = output_folder.join("small.bed");
4903 /// let write_options = WriteOptions::builder(output_file)
4904 /// .f64()
4905 /// .iid(["i1", "i2", "i3"])
4906 /// .sid(["s1", "s2", "s3", "s4"])
4907 /// .build(3, 4)?;
4908 ///
4909 /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
4910 ///
4911 /// let val = nd::array![
4912 /// [1.0, 0.0, f64::NAN, 0.0],
4913 /// [2.0, 0.0, f64::NAN, 2.0],
4914 /// [0.0, 1.0, 2.0, 0.0]
4915 /// ];
4916 /// Bed::write_with_options(&val, &write_options)?;
4917 /// # use bed_reader::BedErrorPlus;
4918 /// # Ok::<(), Box<BedErrorPlus>>(())
4919 /// ```
4920 pub fn iid(&self) -> &nd::Array1<String> {
4921 // unwrap always works because the WriteOptions constructor fills all metadata.
4922 self.metadata.iid.as_ref().unwrap()
4923 }
4924
4925 /// Father id of each of individual (sample). Defaults to "0"'s
4926 ///
4927 /// # Example
4928 /// ```
4929 /// use ndarray as nd;
4930 /// use bed_reader::WriteOptions;
4931 /// let output_folder = temp_testdir::TempDir::default();
4932 /// let output_file = output_folder.join("small.bed");
4933 /// let write_options = WriteOptions::builder(output_file)
4934 /// .f64()
4935 /// .iid(["i1", "i2", "i3"])
4936 /// .sid(["s1", "s2", "s3", "s4"])
4937 /// .build(3, 4)?;
4938 ///
4939 /// println!("{0:?}", write_options.father()); // Outputs ndarray ["0", "0", "0"]
4940 /// # use bed_reader::BedErrorPlus;
4941 /// # Ok::<(), Box<BedErrorPlus>>(())
4942 /// ```
4943 pub fn father(&self) -> &nd::Array1<String> {
4944 // unwrap always works because the WriteOptions constructor fills all metadata.
4945 self.metadata.father.as_ref().unwrap()
4946 }
4947
4948 /// Mother id of each of individual (sample). Defaults to "0"'s
4949 ///
4950 /// # Example
4951 /// ```
4952 /// use ndarray as nd;
4953 /// use bed_reader::WriteOptions;
4954 /// let output_folder = temp_testdir::TempDir::default();
4955 /// let output_file = output_folder.join("small.bed");
4956 /// let write_options = WriteOptions::builder(output_file)
4957 /// .f64()
4958 /// .iid(["i1", "i2", "i3"])
4959 /// .sid(["s1", "s2", "s3", "s4"])
4960 /// .build(3, 4)?;
4961 ///
4962 /// println!("{0:?}", write_options.mother()); // Outputs ndarray ["0", "0", "0"]
4963 /// # use bed_reader::BedErrorPlus;
4964 /// # Ok::<(), Box<BedErrorPlus>>(())
4965 /// ```
4966 pub fn mother(&self) -> &nd::Array1<String> {
4967 // unwrap always works because the WriteOptions constructor fills all metadata.
4968 self.metadata.mother.as_ref().unwrap()
4969 }
4970
4971 /// Sex of each of individual (sample). Defaults to 0's
4972 ///
4973 /// 0 is unknown, 1 is male, 2 is female
4974 ///
4975 /// # Example
4976 /// ```
4977 /// use ndarray as nd;
4978 /// use bed_reader::WriteOptions;
4979 /// let output_folder = temp_testdir::TempDir::default();
4980 /// let output_file = output_folder.join("small.bed");
4981 /// let write_options = WriteOptions::builder(output_file)
4982 /// .f64()
4983 /// .iid(["i1", "i2", "i3"])
4984 /// .sid(["s1", "s2", "s3", "s4"])
4985 /// .build(3, 4)?;
4986 ///
4987 /// println!("{0:?}", write_options.sex()); // Outputs ndarray [0, 0, 0]
4988 /// # use bed_reader::BedErrorPlus;
4989 /// # Ok::<(), Box<BedErrorPlus>>(())
4990 /// ```
4991 pub fn sex(&self) -> &nd::Array1<i32> {
4992 // unwrap always works because the WriteOptions constructor fills all metadata.
4993 self.metadata.sex.as_ref().unwrap()
4994 }
4995
4996 /// Phenotype of each of individual (sample). Seldom used. Defaults to 0's
4997 ///
4998 /// # Example
4999 /// ```
5000 /// use ndarray as nd;
5001 /// use bed_reader::WriteOptions;
5002 /// let output_folder = temp_testdir::TempDir::default();
5003 /// let output_file = output_folder.join("small.bed");
5004 /// let write_options = WriteOptions::builder(output_file)
5005 /// .f64()
5006 /// .iid(["i1", "i2", "i3"])
5007 /// .sid(["s1", "s2", "s3", "s4"])
5008 /// .build(3, 4)?;
5009 ///
5010 /// println!("{0:?}", write_options.pheno()); // Outputs ndarray ["0", "0", "0"]
5011 /// # use bed_reader::BedErrorPlus;
5012 /// # Ok::<(), Box<BedErrorPlus>>(())
5013 /// ```
5014 pub fn pheno(&self) -> &nd::Array1<String> {
5015 // unwrap always works because the WriteOptions constructor fills all metadata.
5016 self.metadata.pheno.as_ref().unwrap()
5017 }
5018
5019 /// Chromosome of each of SNP (variant). Defaults to "0"'s
5020 ///
5021 /// # Example
5022 /// ```
5023 /// use ndarray as nd;
5024 /// use bed_reader::WriteOptions;
5025 /// let output_folder = temp_testdir::TempDir::default();
5026 /// let output_file = output_folder.join("small.bed");
5027 /// let write_options = WriteOptions::builder(output_file)
5028 /// .f64()
5029 /// .iid(["i1", "i2", "i3"])
5030 /// .sid(["s1", "s2", "s3", "s4"])
5031 /// .build(3, 4)?;
5032 ///
5033 /// println!("{0:?}", write_options.chromosome()); // Outputs ndarray ["0", "0", "0", "0"]
5034 /// # use bed_reader::BedErrorPlus;
5035 /// # Ok::<(), Box<BedErrorPlus>>(())
5036 /// ```
5037 pub fn chromosome(&self) -> &nd::Array1<String> {
5038 // unwrap always works because the WriteOptions constructor fills all metadata.
5039 self.metadata.chromosome.as_ref().unwrap()
5040 }
5041
5042 /// SNP id of each of SNP (variant). Defaults to "sid1", "sid2", ...
5043 ///
5044 /// # Example
5045 /// ```
5046 /// use ndarray as nd;
5047 /// use bed_reader::{Bed, WriteOptions};
5048 /// let output_folder = temp_testdir::TempDir::default();
5049 /// let output_file = output_folder.join("small.bed");
5050 /// let write_options = WriteOptions::builder(output_file)
5051 /// .f64()
5052 /// .iid(["i1", "i2", "i3"])
5053 /// .sid(["s1", "s2", "s3", "s4"])
5054 /// .build(3, 4)?;
5055 ///
5056 /// println!("{0:?}", write_options.sid()); // Outputs ndarray ["s1", "s2", "s3", "s4"]
5057 ///
5058 /// let val = nd::array![
5059 /// [1.0, 0.0, f64::NAN, 0.0],
5060 /// [2.0, 0.0, f64::NAN, 2.0],
5061 /// [0.0, 1.0, 2.0, 0.0]
5062 /// ];
5063 /// Bed::write_with_options(&val, &write_options)?;
5064 /// # use bed_reader::BedErrorPlus;
5065 /// # Ok::<(), Box<BedErrorPlus>>(())
5066 /// ```
5067 pub fn sid(&self) -> &nd::Array1<String> {
5068 // unwrap always works because the WriteOptions constructor fills all metadata.
5069 self.metadata.sid.as_ref().unwrap()
5070 }
5071
5072 /// Centimorgan position of each SNP (variant). Defaults to 0.0's.
5073 ///
5074 /// # Example
5075 /// ```
5076 /// use ndarray as nd;
5077 /// use bed_reader::WriteOptions;
5078 /// let output_folder = temp_testdir::TempDir::default();
5079 /// let output_file = output_folder.join("small.bed");
5080 /// let write_options = WriteOptions::builder(output_file)
5081 /// .f64()
5082 /// .iid(["i1", "i2", "i3"])
5083 /// .sid(["s1", "s2", "s3", "s4"])
5084 /// .build(3, 4)?;
5085 ///
5086 /// println!("{0:?}", write_options.cm_position()); // Outputs ndarray [0.0, 0.0, 0.0, 0.0]
5087 /// # use bed_reader::BedErrorPlus;
5088 /// # Ok::<(), Box<BedErrorPlus>>(())
5089 /// ```
5090 pub fn cm_position(&self) -> &nd::Array1<f32> {
5091 // unwrap always works because the WriteOptions constructor fills all metadata.
5092 self.metadata.cm_position.as_ref().unwrap()
5093 }
5094
5095 /// Base-pair position of each SNP (variant). Defaults to 0's.
5096 ///
5097 /// # Example
5098 /// ```
5099 /// use ndarray as nd;
5100 /// use bed_reader::{Bed, WriteOptions};
5101 /// let output_folder = temp_testdir::TempDir::default();
5102 /// let output_file = output_folder.join("small.bed");
5103 /// let write_options = WriteOptions::builder(output_file)
5104 /// .f64()
5105 /// .iid(["i1", "i2", "i3"])
5106 /// .sid(["s1", "s2", "s3", "s4"])
5107 /// .build(3, 4)?;
5108 ///
5109 /// println!("{0:?}", write_options.bp_position()); // Outputs ndarray [0, 0, 0, 0]
5110 /// # use bed_reader::BedErrorPlus;
5111 /// # Ok::<(), Box<BedErrorPlus>>(())
5112 /// ```
5113 pub fn bp_position(&self) -> &nd::Array1<i32> {
5114 // unwrap always works because the WriteOptions constructor fills all metadata.
5115 self.metadata.bp_position.as_ref().unwrap()
5116 }
5117
5118 /// First allele of each SNP (variant). Defaults to "A1"
5119 ///
5120 /// # Example
5121 /// ```
5122 /// use ndarray as nd;
5123 /// use bed_reader::{Bed, WriteOptions};
5124 /// let output_folder = temp_testdir::TempDir::default();
5125 /// let output_file = output_folder.join("small.bed");
5126 /// let write_options = WriteOptions::builder(output_file)
5127 /// .f64()
5128 /// .iid(["i1", "i2", "i3"])
5129 /// .sid(["s1", "s2", "s3", "s4"])
5130 /// .build(3, 4)?;
5131 ///
5132 /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5133 /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5134 /// # use bed_reader::BedErrorPlus;
5135 /// # Ok::<(), Box<BedErrorPlus>>(())
5136 /// ```
5137 pub fn allele_1(&self) -> &nd::Array1<String> {
5138 // unwrap always works because the WriteOptions constructor fills all metadata.
5139 self.metadata.allele_1.as_ref().unwrap()
5140 }
5141
5142 /// Second allele of each SNP (variant). Defaults to "A2"
5143 ///
5144 /// # Example
5145 /// ```
5146 /// use ndarray as nd;
5147 /// use bed_reader::{Bed, WriteOptions};
5148 /// let output_folder = temp_testdir::TempDir::default();
5149 /// let output_file = output_folder.join("small.bed");
5150 /// let write_options = WriteOptions::builder(output_file)
5151 /// .f64()
5152 /// .iid(["i1", "i2", "i3"])
5153 /// .sid(["s1", "s2", "s3", "s4"])
5154 /// .build(3, 4)?;
5155 ///
5156 /// println!("{0:?}", write_options.allele_1()); // Outputs ndarray ["A1", "A1", "A1", "A1"]
5157 /// println!("{0:?}", write_options.allele_2()); // Outputs ndarray ["A2", "A2", "A2", "A2"]
5158 /// # use bed_reader::BedErrorPlus;
5159 /// # Ok::<(), Box<BedErrorPlus>>(())
5160 /// ```
5161 pub fn allele_2(&self) -> &nd::Array1<String> {
5162 // unwrap always works because the WriteOptions constructor fills all metadata.
5163 self.metadata.allele_2.as_ref().unwrap()
5164 }
5165
5166 /// [`Metadata`](struct.Metadata.html) for this [`WriteOptions`](struct.WriteOptions.html), for example, the individual (sample) Ids.
5167 ///
5168 /// This returns a struct with 12 fields. Each field is a ndarray.
5169 /// The struct will always be new, but the 12 ndarrays will be
5170 /// shared with this [`WriteOptions`](struct.WriteOptions.html).
5171 ///
5172 /// If the needed, default values will be used.
5173 ///
5174 /// # Example
5175 /// ```
5176 /// use ndarray as nd;
5177 /// use bed_reader::{Bed, WriteOptions};
5178 /// let output_folder = temp_testdir::TempDir::default();
5179 /// let output_file = output_folder.join("small.bed");
5180 /// let write_options = WriteOptions::builder(output_file)
5181 /// .f64()
5182 /// .iid(["i1", "i2", "i3"])
5183 /// .sid(["s1", "s2", "s3", "s4"])
5184 /// .build(3, 4)?;
5185 ///
5186 /// let metadata = write_options.metadata();
5187 /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"])
5188 /// # use bed_reader::BedErrorPlus;
5189 /// # Ok::<(), Box<BedErrorPlus>>(())
5190 /// ```
5191 pub fn metadata(&self) -> Metadata {
5192 self.metadata.clone()
5193 }
5194
5195 /// The number of individuals (samples)
5196 ///
5197 /// # Example
5198 /// ```
5199 /// use ndarray as nd;
5200 /// use bed_reader::{Bed, WriteOptions};
5201 /// let output_folder = temp_testdir::TempDir::default();
5202 /// let output_file = output_folder.join("small.bed");
5203 /// let write_options = WriteOptions::builder(output_file)
5204 /// .f64()
5205 /// .iid(["i1", "i2", "i3"])
5206 /// .sid(["s1", "s2", "s3", "s4"])
5207 /// .build(3, 4)?;
5208 ///
5209 /// assert_eq!(write_options.iid_count(), 3);
5210 /// assert_eq!(write_options.sid_count(), 4);
5211 /// # use bed_reader::BedErrorPlus;
5212 /// # Ok::<(), Box<BedErrorPlus>>(())
5213 /// ```
5214 pub fn iid_count(&self) -> usize {
5215 self.iid().len()
5216 }
5217
5218 /// The number of SNPs (variants)
5219 ///
5220 /// # Example
5221 /// ```
5222 /// use ndarray as nd;
5223 /// use bed_reader::{Bed, WriteOptions};
5224 /// let output_folder = temp_testdir::TempDir::default();
5225 /// let output_file = output_folder.join("small.bed");
5226 /// let write_options = WriteOptions::builder(output_file)
5227 /// .f64()
5228 /// .iid(["i1", "i2", "i3"])
5229 /// .sid(["s1", "s2", "s3", "s4"])
5230 /// .build(3, 4)?;
5231 ///
5232 /// assert_eq!(write_options.iid_count(), 3);
5233 /// assert_eq!(write_options.sid_count(), 4);
5234 /// # use bed_reader::BedErrorPlus;
5235 /// # Ok::<(), Box<BedErrorPlus>>(())
5236 /// ```
5237 pub fn sid_count(&self) -> usize {
5238 self.sid().len()
5239 }
5240
5241 /// Number of individuals (samples) and SNPs (variants)
5242 ///
5243 /// # Example
5244 /// ```
5245 /// use ndarray as nd;
5246 /// use bed_reader::{Bed, WriteOptions};
5247 /// let output_folder = temp_testdir::TempDir::default();
5248 /// let output_file = output_folder.join("small.bed");
5249 /// let write_options = WriteOptions::builder(output_file)
5250 /// .f64()
5251 /// .iid(["i1", "i2", "i3"])
5252 /// .sid(["s1", "s2", "s3", "s4"])
5253 /// .build(3, 4)?;
5254 ///
5255 /// assert_eq!(write_options.dim(), (3, 4));
5256 /// # use bed_reader::BedErrorPlus;
5257 /// # Ok::<(), Box<BedErrorPlus>>(())
5258 /// ```
5259 pub fn dim(&self) -> (usize, usize) {
5260 (self.iid_count(), self.sid_count())
5261 }
5262
5263 /// Path to .bed file.
5264 ///
5265 /// # Example
5266 /// ```
5267 /// use ndarray as nd;
5268 /// use bed_reader::{Bed, WriteOptions};
5269 /// let output_folder = temp_testdir::TempDir::default();
5270 /// let output_file = output_folder.join("small.bed");
5271 /// let write_options = WriteOptions::builder(output_file)
5272 /// .f64()
5273 /// .iid(["i1", "i2", "i3"])
5274 /// .sid(["s1", "s2", "s3", "s4"])
5275 /// .build(3, 4)?;
5276 ///
5277 /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5278 /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5279 /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5280 /// # use bed_reader::BedErrorPlus;
5281 /// # Ok::<(), Box<BedErrorPlus>>(())
5282 /// ```
5283 pub fn path(&self) -> &PathBuf {
5284 &self.path
5285 }
5286
5287 /// Path to .fam file.
5288 ///
5289 /// # Example
5290 /// ```
5291 /// use ndarray as nd;
5292 /// use bed_reader::{Bed, WriteOptions};
5293 /// let output_folder = temp_testdir::TempDir::default();
5294 /// let output_file = output_folder.join("small.bed");
5295 /// let write_options = WriteOptions::builder(output_file)
5296 /// .f64()
5297 /// .iid(["i1", "i2", "i3"])
5298 /// .sid(["s1", "s2", "s3", "s4"])
5299 /// .build(3, 4)?;
5300 ///
5301 /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5302 /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5303 /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5304 /// # use bed_reader::BedErrorPlus;
5305 /// # Ok::<(), Box<BedErrorPlus>>(())
5306 /// ```
5307 pub fn fam_path(&self) -> &PathBuf {
5308 &self.fam_path
5309 }
5310
5311 /// Path to .bim file.
5312 ///
5313 /// # Example
5314 /// ```
5315 /// use ndarray as nd;
5316 /// use bed_reader::{Bed, WriteOptions};
5317 /// let output_folder = temp_testdir::TempDir::default();
5318 /// let output_file = output_folder.join("small.bed");
5319 /// let write_options = WriteOptions::builder(output_file)
5320 /// .f64()
5321 /// .iid(["i1", "i2", "i3"])
5322 /// .sid(["s1", "s2", "s3", "s4"])
5323 /// .build(3, 4)?;
5324 ///
5325 /// println!("{0:?}", write_options.path()); // Outputs "...small.bed"
5326 /// println!("{0:?}", write_options.fam_path()); // Outputs "...small.fam"
5327 /// println!("{0:?}", write_options.bim_path()); // Outputs "...small.bim"
5328 /// # use bed_reader::BedErrorPlus;
5329 /// # Ok::<(), Box<BedErrorPlus>>(())
5330 /// ```
5331 pub fn bim_path(&self) -> &PathBuf {
5332 &self.bim_path
5333 }
5334
5335 /// If allele 1 will be counted (defaults to true).
5336 ///
5337 /// # Example
5338 /// ```
5339 /// use ndarray as nd;
5340 /// use bed_reader::{Bed, WriteOptions};
5341 /// let output_folder = temp_testdir::TempDir::default();
5342 /// let output_file = output_folder.join("small.bed");
5343 /// let write_options = WriteOptions::builder(output_file)
5344 /// .i8()
5345 /// .iid(["i1", "i2", "i3"])
5346 /// .sid(["s1", "s2", "s3", "s4"])
5347 /// .build(3, 4)?;
5348 ///
5349 /// assert!(write_options.is_a1_counted());
5350 /// # use bed_reader::BedErrorPlus;
5351 /// # Ok::<(), Box<BedErrorPlus>>(())
5352 /// ```
5353 pub fn is_a1_counted(&self) -> bool {
5354 self.is_a1_counted
5355 }
5356
5357 /// Number of threads to be used (`None` means set with
5358 /// [Environment Variables](index.html#environment-variables) or use all processors).
5359 ///
5360 /// # Example
5361 /// ```
5362 /// use ndarray as nd;
5363 /// use bed_reader::{Bed, WriteOptions};
5364 /// let output_folder = temp_testdir::TempDir::default();
5365 /// let output_file = output_folder.join("small.bed");
5366 /// let write_options = WriteOptions::builder(output_file)
5367 /// .i8()
5368 /// .iid(["i1", "i2", "i3"])
5369 /// .sid(["s1", "s2", "s3", "s4"])
5370 /// .build(3, 4)?;
5371 ///
5372 /// assert!(write_options.num_threads().is_none());
5373 /// # use bed_reader::BedErrorPlus;
5374 /// # Ok::<(), Box<BedErrorPlus>>(())
5375 /// ```
5376 pub fn num_threads(&self) -> Option<usize> {
5377 self.num_threads
5378 }
5379
5380 /// Value to be used for missing values (defaults to -127 or NaN).
5381 ///
5382 /// # Example
5383 /// ```
5384 /// use ndarray as nd;
5385 /// use bed_reader::{Bed, WriteOptions};
5386 /// let output_folder = temp_testdir::TempDir::default();
5387 /// let output_file = output_folder.join("small.bed");
5388 /// let write_options = WriteOptions::builder(output_file)
5389 /// .i8()
5390 /// .iid(["i1", "i2", "i3"])
5391 /// .sid(["s1", "s2", "s3", "s4"])
5392 /// .build(3, 4)?;
5393 ///
5394 /// assert!(write_options.missing_value() == -127);
5395 /// # use bed_reader::BedErrorPlus;
5396 /// # Ok::<(), Box<BedErrorPlus>>(())
5397 /// ```
5398 pub fn missing_value(&self) -> TVal {
5399 self.missing_value
5400 }
5401
5402 /// If skipping writing .fam file.
5403 ///
5404 /// # Example
5405 /// ```
5406 /// use ndarray as nd;
5407 /// use bed_reader::{Bed, WriteOptions};
5408 /// let output_folder = temp_testdir::TempDir::default();
5409 /// let output_file = output_folder.join("small.bed");
5410 /// let write_options = WriteOptions::builder(output_file)
5411 /// .i8()
5412 /// .skip_fam()
5413 /// .skip_bim()
5414 /// .build(3, 4)?;
5415 /// assert!(write_options.skip_fam());
5416 /// assert!(write_options.skip_bim());
5417 /// # use bed_reader::BedErrorPlus;
5418 /// # Ok::<(), Box<BedErrorPlus>>(())
5419 /// ```
5420 pub fn skip_fam(&self) -> bool {
5421 self.skip_fam
5422 }
5423
5424 /// If skipping writing .bim file.
5425 ///
5426 /// # Example
5427 /// ```
5428 /// use ndarray as nd;
5429 /// use bed_reader::{Bed, WriteOptions};
5430 /// let output_folder = temp_testdir::TempDir::default();
5431 /// let output_file = output_folder.join("small.bed");
5432 /// let write_options = WriteOptions::builder(output_file)
5433 /// .i8()
5434 /// .skip_fam()
5435 /// .skip_bim()
5436 /// .build(3, 4)?;
5437 /// assert!(write_options.skip_fam());
5438 /// assert!(write_options.skip_bim());
5439 /// # use bed_reader::BedErrorPlus;
5440 /// # Ok::<(), Box<BedErrorPlus>>(())
5441 /// ```
5442 pub fn skip_bim(&self) -> bool {
5443 self.skip_bim
5444 }
5445}
5446
5447impl<TVal> WriteOptionsBuilder<TVal>
5448where
5449 TVal: BedVal,
5450{
5451 /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given and then writes a .bed (and .fam and .bim) file.
5452 ///
5453 /// See [`WriteOptions`](struct.WriteOptions.html) for details and examples.
5454 pub fn write<S: nd::Data<Elem = TVal>>(
5455 &mut self,
5456 val: &nd::ArrayBase<S, nd::Ix2>,
5457 ) -> Result<(), Box<BedErrorPlus>> {
5458 let (iid_count, sid_count) = val.dim();
5459 let write_options = self.build(iid_count, sid_count)?;
5460 Bed::write_with_options(val, &write_options)?;
5461
5462 Ok(())
5463 }
5464
5465 /// Set the family id (fid) values for each individual (sample).
5466 ///
5467 /// Defaults to zeros.
5468 ///
5469 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5470 ///
5471 #[anyinput]
5472 #[must_use]
5473 pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
5474 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5475 self.metadata.as_mut().unwrap().set_fid(fid);
5476 self
5477 }
5478
5479 /// Set the individual id (iid) values for each individual (sample).
5480 ///
5481 /// Defaults to "iid1", "iid2", ...
5482 ///
5483 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5484 ///
5485 #[anyinput]
5486 #[must_use]
5487 pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
5488 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5489 self.metadata.as_mut().unwrap().set_iid(iid);
5490 self
5491 }
5492
5493 /// Set the father id values for each individual (sample).
5494 ///
5495 /// Defaults to zeros.
5496 ///
5497 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5498 ///
5499 #[anyinput]
5500 #[must_use]
5501 pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
5502 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5503 self.metadata.as_mut().unwrap().set_father(father);
5504 self
5505 }
5506
5507 /// Set the mother id values for each individual (sample).
5508 ///
5509 /// Defaults to zeros.
5510 ///
5511 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5512 ///
5513 #[anyinput]
5514 #[must_use]
5515 pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
5516 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5517 self.metadata.as_mut().unwrap().set_mother(mother);
5518 self
5519 }
5520
5521 /// Set the sex for each individual (sample).
5522 ///
5523 /// 0 is unknown (default), 1 is male, 2 is female
5524 #[anyinput]
5525 #[must_use]
5526 pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
5527 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5528 self.metadata.as_mut().unwrap().set_sex(sex);
5529 self
5530 }
5531
5532 /// Set a phenotype for each individual (sample). Seldom used.
5533 ///
5534 /// Defaults to zeros.
5535 ///
5536 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5537 ///
5538 #[anyinput]
5539 #[must_use]
5540 pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
5541 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5542 self.metadata.as_mut().unwrap().set_pheno(pheno);
5543 self
5544 }
5545
5546 /// Set the chromosome for each SNP (variant).
5547 ///
5548 /// Defaults to zeros.
5549 #[anyinput]
5550 #[must_use]
5551 pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
5552 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5553 self.metadata.as_mut().unwrap().set_chromosome(chromosome);
5554 self
5555 }
5556
5557 /// Set the SNP id (sid) for each SNP (variant).
5558 ///
5559 /// Defaults to "sid1", "sid2", ...
5560 ///
5561 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5562 ///
5563 #[anyinput]
5564 #[must_use]
5565 pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
5566 self.metadata.as_mut().unwrap().set_sid(sid);
5567 self
5568 }
5569
5570 /// Set the centimorgan position for each SNP (variant).
5571 ///
5572 /// Defaults to zeros.
5573 #[anyinput]
5574 #[must_use]
5575 pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
5576 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5577 self.metadata.as_mut().unwrap().set_cm_position(cm_position);
5578 self
5579 }
5580
5581 /// Set the base-pair position for each SNP (variant).
5582 ///
5583 /// Defaults to zeros.
5584 ///
5585 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5586 ///
5587 #[anyinput]
5588 #[must_use]
5589 pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
5590 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5591 self.metadata.as_mut().unwrap().set_bp_position(bp_position);
5592 self
5593 }
5594
5595 /// Set the first allele for each SNP (variant).
5596 ///
5597 /// Defaults to "A1", A1" ...
5598 ///
5599 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5600 ///
5601 #[anyinput]
5602 #[must_use]
5603 pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
5604 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5605 self.metadata.as_mut().unwrap().set_allele_1(allele_1);
5606 self
5607 }
5608
5609 /// Set the second allele for each SNP (variant).
5610 ///
5611 /// Defaults to "A2", A2" ...
5612 ///
5613 /// > See [`WriteOptions`](struct.WriteOptions.html) for examples.
5614 ///
5615 #[anyinput]
5616 #[must_use]
5617 pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
5618 // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5619 self.metadata.as_mut().unwrap().set_allele_2(allele_2);
5620 self
5621 }
5622
5623 /// Merge metadata from a [`Metadata`](struct.Metadata.html).
5624 ///
5625 /// If a field is set in both [`Metadata`](struct.Metadata.html)'s,
5626 /// it will be overridden.
5627 ///
5628 /// # Example
5629 ///
5630 /// Extract metadata from a file.
5631 /// Create a random file with the same metadata.
5632 /// ```
5633 /// use ndarray as nd;
5634 /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5635 /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5636 ///
5637 /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5638 /// let metadata = bed.metadata()?;
5639 /// let shape = bed.dim()?;
5640 ///
5641 /// let mut rng = StdRng::seed_from_u64(0);
5642 /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5643 ///
5644 /// let temp_out = temp_testdir::TempDir::default();
5645 /// let output_file = temp_out.join("random.bed");
5646 /// WriteOptions::builder(output_file)
5647 /// .metadata(&metadata)
5648 /// .missing_value(-1)
5649 /// .write(&val)?;
5650 /// # use bed_reader::BedErrorPlus;
5651 /// # Ok::<(), Box<BedErrorPlus>>(())
5652 /// ```
5653 #[must_use]
5654 pub fn metadata(mut self, metadata: &Metadata) -> Self {
5655 self.metadata = Some(
5656 Metadata::builder()
5657 .metadata(&self.metadata.unwrap()) // Unwrap will always work because WriteOptionsBuilder starting with some metadata
5658 .metadata(metadata)
5659 .build_no_file_check() // Don't need to check consistent counts here. Builder will do it.
5660 .unwrap(), // Unwrap will always work nothing can go wrong
5661 );
5662 self
5663 }
5664
5665 /// Set the path to the .fam file.
5666 ///
5667 /// If not set, the .fam file will be assumed
5668 /// to have the same name as the .bed file, but with the extension .fam.
5669 ///
5670 /// # Example:
5671 /// Write .bed, .fam, and .bim files with non-standard names.
5672 /// ```
5673 /// use ndarray as nd;
5674 /// use bed_reader::WriteOptions;
5675 /// let output_folder = temp_testdir::TempDir::default();
5676 /// let output_file = output_folder.join("small.deb");
5677 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5678 /// WriteOptions::builder(output_file)
5679 /// .fam_path(output_folder.join("small.maf"))
5680 /// .bim_path(output_folder.join("small.mib"))
5681 /// .write(&val)?;
5682 /// # use bed_reader::BedErrorPlus;
5683 /// # Ok::<(), Box<BedErrorPlus>>(())
5684 /// ```
5685 #[anyinput]
5686 #[must_use]
5687 pub fn fam_path(mut self, path: AnyPath) -> Self {
5688 self.fam_path = Some(path.to_owned());
5689 self
5690 }
5691
5692 /// Set the path to the .bim file.
5693 ///
5694 /// If not set, the .bim file will be assumed
5695 /// to have the same name as the .bed file, but with the extension .bim.
5696 ///
5697 /// # Example:
5698 /// Write .bed, .fam, and .bim files with non-standard names.
5699 /// ```
5700 /// use ndarray as nd;
5701 /// use bed_reader::{WriteOptions};
5702 /// let output_folder = temp_testdir::TempDir::default();
5703 /// let output_file = output_folder.join("small.deb");
5704 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5705 /// WriteOptions::builder(output_file)
5706 /// .fam_path(output_folder.join("small.maf"))
5707 /// .bim_path(output_folder.join("small.mib"))
5708 /// .write(&val)?;
5709 /// # use bed_reader::BedErrorPlus;
5710 /// # Ok::<(), Box<BedErrorPlus>>(())
5711 /// ```
5712 #[anyinput]
5713 #[must_use]
5714 pub fn bim_path(mut self, path: AnyPath) -> Self {
5715 self.bim_path = Some(path.to_owned());
5716 self
5717 }
5718
5719 /// Value used for missing values (defaults to -127 or NaN)
5720 ///
5721 /// -127 is the default for i8 and NaN is the default for f32 and f64.
5722 ///
5723 /// # Example
5724 ///
5725 /// Extract metadata from a file.
5726 /// Create a random file with the same metadata.
5727 /// ```
5728 /// use ndarray as nd;
5729 /// use bed_reader::{Bed, WriteOptions, sample_bed_file};
5730 /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
5731 ///
5732 /// let mut bed = Bed::new(sample_bed_file("small.bed")?)?;
5733 /// let metadata = bed.metadata()?;
5734 /// let shape = bed.dim()?;
5735 ///
5736 /// let mut rng = StdRng::seed_from_u64(0);
5737 /// let val = nd::Array::random_using(shape, Uniform::from(-1..3), &mut rng);
5738 ///
5739 /// let temp_out = temp_testdir::TempDir::default();
5740 /// let output_file = temp_out.join("random.bed");
5741 /// WriteOptions::builder(output_file)
5742 /// .metadata(&metadata)
5743 /// .missing_value(-1)
5744 /// .write(&val)?;
5745 /// # use bed_reader::BedErrorPlus;
5746 /// # Ok::<(), Box<BedErrorPlus>>(())
5747 /// ```
5748 pub fn missing_value(&mut self, missing_value: TVal) -> &mut Self {
5749 self.missing_value = Some(missing_value);
5750 self
5751 }
5752
5753 /// Count the number allele 1 (default and PLINK standard).
5754 ///
5755 /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).
5756 pub fn count_a1(&mut self) -> &mut Self {
5757 self.is_a1_counted = Some(true);
5758 self
5759 }
5760
5761 /// Count the number allele 2.
5762 ///
5763 /// Also see [`is_a1_counted`](struct.WriteOptionsBuilder.html#method.is_a1_counted) and [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1).
5764 pub fn count_a2(&mut self) -> &mut Self {
5765 self.is_a1_counted = Some(false);
5766 self
5767 }
5768
5769 /// Sets if allele 1 is counted. Default is true.
5770 ///
5771 /// Also see [`count_a1`](struct.WriteOptionsBuilder.html#method.count_a1) and [`count_a2`](struct.WriteOptionsBuilder.html#method.count_a2).
5772 pub fn is_a1_counted(&mut self, is_a1_counted: bool) -> &mut Self {
5773 self.is_a1_counted = Some(is_a1_counted);
5774 self
5775 }
5776
5777 /// Number of threads to use (defaults to all processors)
5778 ///
5779 /// Can also be set with an environment variable.
5780 /// See [Environment Variables](index.html#environment-variables).
5781 ///
5782 ///
5783 /// # Example:
5784 ///
5785 /// Write using only one thread.
5786 /// ```
5787 /// use ndarray as nd;
5788 /// use bed_reader::WriteOptions;
5789 /// let output_folder = temp_testdir::TempDir::default();
5790 /// let output_file = output_folder.join("small.bed");
5791 /// let val = nd::array![[1, 0, -127, 0], [2, 0, -127, 2], [0, 1, 2, 0]];
5792 /// WriteOptions::builder(output_file)
5793 /// .num_threads(1)
5794 /// .write(&val)?;
5795 /// # use bed_reader::BedErrorPlus;
5796 /// # Ok::<(), Box<BedErrorPlus>>(())
5797 /// ```
5798 pub fn num_threads(&mut self, num_threads: usize) -> &mut Self {
5799 self.num_threads = Some(Some(num_threads));
5800 self
5801 }
5802
5803 /// Skip writing .fam file.
5804 ///
5805 /// # Example
5806 /// ```
5807 /// use ndarray as nd;
5808 /// use bed_reader::{Bed, WriteOptions};
5809 /// let output_folder = temp_testdir::TempDir::default();
5810 /// let output_file = output_folder.join("small.bed");
5811 /// let write_options = WriteOptions::builder(output_file)
5812 /// .i8()
5813 /// .skip_fam()
5814 /// .skip_bim()
5815 /// .build(3, 4)?;
5816 /// assert!(write_options.skip_fam());
5817 /// assert!(write_options.skip_bim());
5818 /// # use bed_reader::BedErrorPlus;
5819 /// # Ok::<(), Box<BedErrorPlus>>(())
5820 /// ```
5821 pub fn skip_fam(&mut self) -> &mut Self {
5822 self.skip_fam = Some(true);
5823 self
5824 }
5825
5826 /// Skip writing .bim file.
5827 ///
5828 /// # Example
5829 /// ```
5830 /// use ndarray as nd;
5831 /// use bed_reader::{Bed, WriteOptions};
5832 /// let output_folder = temp_testdir::TempDir::default();
5833 /// let output_file = output_folder.join("small.bed");
5834 /// let write_options = WriteOptions::builder(output_file)
5835 /// .i8()
5836 /// .skip_fam()
5837 /// .skip_bim()
5838 /// .build(3, 4)?;
5839 /// assert!(write_options.skip_fam());
5840 /// assert!(write_options.skip_bim());
5841 /// # use bed_reader::BedErrorPlus;
5842 /// # Ok::<(), Box<BedErrorPlus>>(())
5843 /// ```
5844 pub fn skip_bim(&mut self) -> &mut Self {
5845 self.skip_bim = Some(true);
5846 self
5847 }
5848
5849 /// Creates a new [`WriteOptions`](struct.WriteOptions.html) with the options given.
5850 ///
5851 /// > Also see [`WriteOptionsBuilder::write`](struct.WriteOptionsBuilder.html#method.write), which creates
5852 /// > a [`WriteOptions`](struct.WriteOptions.html) and writes to file in one step.
5853 ///
5854 /// # Example
5855 /// Create a new [`WriteOptions`](struct.WriteOptions.html) with some given values and some
5856 /// default values. Then use it to write a .bed file.
5857 /// ```
5858 /// use ndarray as nd;
5859 /// use bed_reader::{WriteOptions, Bed};
5860 ///
5861 /// let output_folder = temp_testdir::TempDir::default();
5862 /// let output_file = output_folder.join("small.bed");
5863 /// let write_options = WriteOptions::builder(output_file)
5864 /// .f64()
5865 /// .iid(["i1", "i2", "i3"])
5866 /// .sid(["s1", "s2", "s3", "s4"])
5867 /// .build(3, 4)?;
5868 /// println!("{0:?}", write_options.fid()); // Outputs ndarray ["0", "0", "0"]
5869 /// println!("{0:?}", write_options.iid()); // Outputs ndarray ["i1", "i2", "i3"]
5870 ///
5871 /// let val = nd::array![
5872 /// [1.0, 0.0, f64::NAN, 0.0],
5873 /// [2.0, 0.0, f64::NAN, 2.0],
5874 /// [0.0, 1.0, 2.0, 0.0]
5875 /// ];
5876 /// Bed::write_with_options(&val, &write_options)?;
5877 /// # use bed_reader::BedErrorPlus;
5878 /// # Ok::<(), Box<BedErrorPlus>>(())
5879 /// ```
5880 pub fn build(
5881 &self,
5882 iid_count: usize,
5883 sid_count: usize,
5884 ) -> Result<WriteOptions<TVal>, Box<BedErrorPlus>> {
5885 let Some(path) = self.path.as_ref() else {
5886 Err(BedError::UninitializedField("path"))?
5887 };
5888
5889 // unwrap always works because the metadata builder always initializes metadata
5890 let metadata = self.metadata.as_ref().unwrap();
5891 let metadata = metadata.fill(iid_count, sid_count)?;
5892
5893 let write_options = WriteOptions {
5894 path: path.to_owned(),
5895 fam_path: to_metadata_path(path, self.fam_path.as_ref(), "fam"),
5896 bim_path: to_metadata_path(path, self.bim_path.as_ref(), "bim"),
5897 is_a1_counted: self.is_a1_counted.unwrap_or(true),
5898 num_threads: self.num_threads.unwrap_or(None),
5899 missing_value: self.missing_value.unwrap_or_else(|| TVal::missing()),
5900 skip_fam: self.skip_fam.unwrap_or(false),
5901 skip_bim: self.skip_bim.unwrap_or(false),
5902
5903 metadata,
5904 };
5905 Ok(write_options)
5906 }
5907
5908 #[anyinput]
5909 fn new(path: AnyPath) -> Self {
5910 Self {
5911 path: Some(path.to_owned()),
5912 fam_path: None,
5913 bim_path: None,
5914
5915 metadata: Some(Metadata::new()),
5916
5917 is_a1_counted: None,
5918 num_threads: None,
5919 missing_value: None,
5920 skip_fam: None,
5921 skip_bim: None,
5922 }
5923 }
5924}
5925
5926trait FromStringArray<T> {
5927 #[allow(dead_code)]
5928 fn from_string_array(
5929 string_array: nd::Array1<String>,
5930 ) -> Result<nd::Array1<Self>, Box<BedErrorPlus>>
5931 where
5932 Self: Sized;
5933}
5934
5935impl FromStringArray<String> for String {
5936 fn from_string_array(
5937 string_array: nd::Array1<String>,
5938 ) -> Result<nd::Array1<String>, Box<BedErrorPlus>> {
5939 Ok(string_array)
5940 }
5941}
5942
5943impl FromStringArray<f32> for f32 {
5944 fn from_string_array(
5945 string_array: nd::Array1<String>,
5946 ) -> Result<nd::Array1<f32>, Box<BedErrorPlus>> {
5947 let result = string_array
5948 .iter()
5949 .map(|s| s.parse::<f32>())
5950 .collect::<Result<nd::Array1<f32>, _>>();
5951 match result {
5952 Ok(array) => Ok(array),
5953 Err(e) => Err(Box::new(BedErrorPlus::ParseFloatError(e))),
5954 }
5955 }
5956}
5957impl FromStringArray<i32> for i32 {
5958 fn from_string_array(
5959 string_array: nd::Array1<String>,
5960 ) -> Result<nd::Array1<i32>, Box<BedErrorPlus>> {
5961 let result = string_array
5962 .iter()
5963 .map(|s| s.parse::<i32>())
5964 .collect::<Result<nd::Array1<i32>, _>>();
5965 match result {
5966 Ok(array) => Ok(array),
5967 Err(e) => Err(Box::new(BedErrorPlus::ParseIntError(e))),
5968 }
5969 }
5970}
5971
5972/// Asserts two 2-D arrays are equal, treating NaNs as values.
5973///
5974/// # Example
5975/// ```
5976/// use std::f64::NAN;
5977/// use ndarray as nd;
5978/// use bed_reader::assert_eq_nan;
5979/// let val1 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5980/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
5981/// assert_eq_nan(&val1, &val2);
5982/// # use bed_reader::BedErrorPlus;
5983/// # Ok::<(), Box<BedErrorPlus>>(())
5984/// ```
5985pub fn assert_eq_nan<T: 'static + Copy + PartialEq + PartialOrd + Signed + From<i8>>(
5986 val: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
5987 answer: &nd::ArrayBase<nd::OwnedRepr<T>, nd::Dim<[usize; 2]>>,
5988) {
5989 assert!(allclose::<T, T>(
5990 &val.view(),
5991 &answer.view(),
5992 0.into(),
5993 true
5994 ));
5995}
5996
5997/// Asserts that a result is an error and that the error is of a given variant.
5998#[macro_export]
5999macro_rules! assert_error_variant {
6000 ($result:expr, $pattern:pat) => {
6001 match $result {
6002 Err(ref boxed_error) => match **boxed_error {
6003 $pattern => (),
6004 _ => panic!("test failure"),
6005 },
6006 _ => panic!("test failure"),
6007 }
6008 };
6009}
6010
6011/// True if and only if two 2-D arrays are equal, within a given tolerance and possibly treating NaNs as values.
6012///
6013/// # Example
6014/// ```
6015/// use std::f64::NAN;
6016/// use ndarray as nd;
6017/// use bed_reader::allclose;
6018/// let val1 = nd::arr2(&[[1.0, 2.000000000001], [3.0, NAN]]);
6019/// let val2 = nd::arr2(&[[1.0, 2.0], [3.0, NAN]]);
6020/// assert!(allclose(&val1.view(), &val2.view(), 1e-08, true));
6021/// # use bed_reader::BedErrorPlus;
6022/// # Ok::<(), Box<BedErrorPlus>>(())
6023/// ```
6024pub fn allclose<
6025 T1: 'static + Copy + PartialEq + PartialOrd + Signed,
6026 T2: 'static + Copy + PartialEq + PartialOrd + Signed + Into<T1>,
6027>(
6028 val1: &nd::ArrayView2<'_, T1>,
6029 val2: &nd::ArrayView2<'_, T2>,
6030 atol: T1,
6031 equal_nan: bool,
6032) -> bool {
6033 assert!(val1.dim() == val2.dim());
6034 // Could be run in parallel
6035
6036 nd::Zip::from(val1)
6037 .and(val2)
6038 .fold(true, |acc, ptr_a, ptr_b| -> bool {
6039 if !acc {
6040 return false;
6041 }
6042 // x != x is a generic nan check
6043 #[allow(clippy::eq_op)]
6044 let a_nan = *ptr_a != *ptr_a;
6045 #[allow(clippy::eq_op)]
6046 let b_nan = *ptr_b != *ptr_b;
6047
6048 if a_nan || b_nan {
6049 if equal_nan {
6050 a_nan == b_nan
6051 } else {
6052 false
6053 }
6054 } else {
6055 let c: T1 = abs(*ptr_a - T2::into(*ptr_b));
6056 c <= atol
6057 }
6058 })
6059}
6060
6061impl WriteOptionsBuilder<i8> {
6062 /// The input ndarray will be i8.
6063 #[must_use]
6064 pub fn i8(self) -> Self {
6065 self
6066 }
6067}
6068
6069impl WriteOptionsBuilder<f32> {
6070 /// The input ndarray will be f32.
6071 #[must_use]
6072 pub fn f32(self) -> Self {
6073 self
6074 }
6075}
6076
6077impl WriteOptionsBuilder<f64> {
6078 /// The input ndarray will be f64.
6079 #[must_use]
6080 pub fn f64(self) -> Self {
6081 self
6082 }
6083}
6084
6085fn check_counts(
6086 count_vec: Vec<Option<usize>>,
6087 option_xid_count: &mut Option<usize>,
6088 prefix: &str,
6089) -> Result<(), Box<BedErrorPlus>> {
6090 for count in count_vec.into_iter().flatten() {
6091 if let Some(xid_count) = option_xid_count {
6092 if *xid_count != count {
6093 Err(BedError::InconsistentCount(
6094 prefix.to_string(),
6095 *xid_count,
6096 count,
6097 ))?;
6098 }
6099 } else {
6100 *option_xid_count = Some(count);
6101 }
6102 }
6103
6104 Ok(())
6105}
6106
6107// According to https://docs.rs/derive_builder/latest/derive_builder/
6108// "clone" is OK because "Luckily Rust is clever enough to optimize these
6109// clone-calls away in release builds for your every-day use cases.
6110// Thats quite a safe bet - we checked this for you. ;-)"
6111fn compute_field<T: Clone, F: Fn(usize) -> T>(
6112 field_name: &str,
6113 field: &mut Option<Rc<nd::Array1<T>>>,
6114 count: usize,
6115 lambda: F,
6116) -> Result<(), Box<BedErrorPlus>> {
6117 // let lambda = |_| "0".to_string();
6118 // let count = iid_count;
6119 // let field = &mut metadata.fid;
6120
6121 if let Some(array) = field {
6122 if array.len() != count {
6123 Err(BedError::InconsistentCount(
6124 field_name.to_string(),
6125 array.len(),
6126 count,
6127 ))?;
6128 }
6129 } else {
6130 let array = Rc::new((0..count).map(lambda).collect::<nd::Array1<T>>());
6131 *field = Some(array);
6132 }
6133 Ok(())
6134}
6135
6136impl MetadataBuilder {
6137 /// Create a [`Metadata`](struct.Metadata.html) from the builder.
6138 ///
6139 /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6140 pub fn build(&self) -> Result<Metadata, Box<BedErrorPlus>> {
6141 let metadata = self.build_no_file_check()?;
6142
6143 metadata.check_counts(None, None)?;
6144
6145 Ok(metadata)
6146 }
6147
6148 /// Set the family id (fid) values.
6149 #[anyinput]
6150 pub fn fid(&mut self, fid: AnyIter<AnyString>) -> &mut Self {
6151 self.fid = Some(Some(Rc::new(fid.map(|s| s.as_ref().to_string()).collect())));
6152 self
6153 }
6154
6155 /// Set the individual id (iid) values.
6156 /// ```
6157 /// use ndarray as nd;
6158 /// use bed_reader::{Metadata, assert_eq_nan};
6159 ///
6160 /// let metadata = Metadata::builder()
6161 /// .iid(["sample1", "sample2", "sample3"])
6162 /// .build()?;
6163 /// println!("{:?}", metadata.iid()); // Outputs ndarray Some(["sample1", "sample2", "sample3"])
6164 /// # use bed_reader::BedErrorPlus;
6165 /// # Ok::<(), Box<BedErrorPlus>>(())
6166 /// ```
6167 #[anyinput]
6168 pub fn iid(&mut self, iid: AnyIter<AnyString>) -> &mut Self {
6169 self.iid = Some(Some(Rc::new(iid.map(|s| s.as_ref().to_owned()).collect())));
6170 self
6171 }
6172
6173 /// Set the father values.
6174 #[anyinput]
6175 pub fn father(&mut self, father: AnyIter<AnyString>) -> &mut Self {
6176 self.father = Some(Some(Rc::new(
6177 father.map(|s| s.as_ref().to_owned()).collect(),
6178 )));
6179 self
6180 }
6181
6182 /// Override the mother values.
6183 #[anyinput]
6184 pub fn mother(&mut self, mother: AnyIter<AnyString>) -> &mut Self {
6185 self.mother = Some(Some(Rc::new(
6186 mother.map(|s| s.as_ref().to_owned()).collect(),
6187 )));
6188 self
6189 }
6190
6191 /// Override the sex values.
6192 #[anyinput]
6193 pub fn sex(&mut self, sex: AnyIter<i32>) -> &mut Self {
6194 self.sex = Some(Some(Rc::new(sex.collect())));
6195 self
6196 }
6197
6198 /// Override the phenotype values.
6199 #[anyinput]
6200 pub fn pheno(&mut self, pheno: AnyIter<AnyString>) -> &mut Self {
6201 self.pheno = Some(Some(Rc::new(
6202 pheno.map(|s| s.as_ref().to_owned()).collect(),
6203 )));
6204 self
6205 }
6206
6207 /// Override the chromosome values.
6208 #[anyinput]
6209 pub fn chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &mut Self {
6210 self.chromosome = Some(Some(Rc::new(
6211 chromosome.map(|s| s.as_ref().to_owned()).collect(),
6212 )));
6213 self
6214 }
6215
6216 /// Override the SNP id (sid) values.
6217 /// ```
6218 /// use ndarray as nd;
6219 /// use bed_reader::{Metadata, assert_eq_nan};
6220 ///
6221 /// let metadata = Metadata::builder()
6222 /// .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
6223 /// .build()?;
6224 /// println!("{:?}", metadata.sid()); // Outputs ndarray Some(["SNP1", "SNP2", "SNP3", "SNP4"])
6225 /// # use bed_reader::BedErrorPlus;
6226 /// # Ok::<(), Box<BedErrorPlus>>(())
6227 /// ```
6228 #[anyinput]
6229 pub fn sid(&mut self, sid: AnyIter<AnyString>) -> &mut Self {
6230 self.sid = Some(Some(Rc::new(
6231 sid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
6232 )));
6233 self
6234 }
6235
6236 /// Override the centimorgan position values.
6237 #[anyinput]
6238 pub fn cm_position(&mut self, cm_position: AnyIter<f32>) -> &mut Self {
6239 self.cm_position = Some(Some(Rc::new(cm_position.into_iter().collect())));
6240 self
6241 }
6242
6243 /// Override the base-pair position values.
6244 #[anyinput]
6245 pub fn bp_position(&mut self, bp_position: AnyIter<i32>) -> &mut Self {
6246 self.bp_position = Some(Some(Rc::new(bp_position.into_iter().collect())));
6247 self
6248 }
6249
6250 /// Override the allele 1 values.
6251 #[anyinput]
6252 pub fn allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &mut Self {
6253 self.allele_1 = Some(Some(Rc::new(
6254 allele_1
6255 .into_iter()
6256 .map(|s| s.as_ref().to_owned())
6257 .collect(),
6258 )));
6259 self
6260 }
6261
6262 /// Override the allele 2 values.
6263 #[anyinput]
6264 pub fn allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &mut Self {
6265 self.allele_2 = Some(Some(Rc::new(
6266 allele_2
6267 .into_iter()
6268 .map(|s| s.as_ref().to_owned())
6269 .collect(),
6270 )));
6271 self
6272 }
6273
6274 /// Merge metadata from a [`Metadata`](struct.Metadata.html).
6275 ///
6276 /// # Example
6277 ///
6278 /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
6279 /// and sid arrays. Next, we use another [`MetadataBuilder`](struct.MetadataBuilder.html) to set an fid array
6280 /// and an iid array. Then, we add the first [`Metadata`](struct.Metadata.html)
6281 /// to the [`MetadataBuilder`](struct.MetadataBuilder.html),
6282 /// overwriting iid and setting sid. Finally, we print these
6283 /// three arrays and chromosome. Chromosome is `None`.
6284 ///```
6285 /// use ndarray as nd;
6286 /// use bed_reader::Metadata;
6287 ///
6288 /// let metadata1 = Metadata::builder()
6289 /// .iid(["i1", "i2", "i3"])
6290 /// .sid(["s1", "s2", "s3", "s4"])
6291 /// .build()?;
6292 /// let metadata2 = Metadata::builder()
6293 /// .fid(["f1", "f2", "f3"])
6294 /// .iid(["x1", "x2", "x3"])
6295 /// .metadata(&metadata1)
6296 /// .build()?;
6297 ///
6298 /// println!("{0:?}", metadata2.fid()); // Outputs optional ndarray Some(["f1", "f2", "f3"]...)
6299 /// println!("{0:?}", metadata2.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6300 /// println!("{0:?}", metadata2.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
6301 /// println!("{0:?}", metadata2.chromosome()); // Outputs None
6302 /// # use bed_reader::BedErrorPlus;
6303 /// # Ok::<(), Box<BedErrorPlus>>(())
6304 /// ```
6305 pub fn metadata(&mut self, metadata: &Metadata) -> &mut Self {
6306 set_field(metadata.fid.as_ref(), &mut self.fid);
6307 set_field(metadata.iid.as_ref(), &mut self.iid);
6308 set_field(metadata.father.as_ref(), &mut self.father);
6309 set_field(metadata.mother.as_ref(), &mut self.mother);
6310 set_field(metadata.sex.as_ref(), &mut self.sex);
6311 set_field(metadata.pheno.as_ref(), &mut self.pheno);
6312
6313 set_field(metadata.chromosome.as_ref(), &mut self.chromosome);
6314 set_field(metadata.sid.as_ref(), &mut self.sid);
6315 set_field(metadata.cm_position.as_ref(), &mut self.cm_position);
6316 set_field(metadata.bp_position.as_ref(), &mut self.bp_position);
6317 set_field(metadata.allele_1.as_ref(), &mut self.allele_1);
6318 set_field(metadata.allele_2.as_ref(), &mut self.allele_2);
6319 self
6320 }
6321}
6322
6323impl Default for Metadata {
6324 fn default() -> Self {
6325 Self::new()
6326 }
6327}
6328
6329impl Metadata {
6330 fn check_counts(
6331 &self,
6332 mut iid_count: Option<usize>,
6333 mut sid_count: Option<usize>,
6334 ) -> Result<(Option<usize>, Option<usize>), Box<BedErrorPlus>> {
6335 check_counts(
6336 vec![
6337 lazy_or_skip_count(self.fid.as_ref()),
6338 lazy_or_skip_count(self.iid.as_ref()),
6339 lazy_or_skip_count(self.father.as_ref()),
6340 lazy_or_skip_count(self.mother.as_ref()),
6341 lazy_or_skip_count(self.sex.as_ref()),
6342 lazy_or_skip_count(self.pheno.as_ref()),
6343 ],
6344 &mut iid_count,
6345 "iid",
6346 )?;
6347 check_counts(
6348 vec![
6349 lazy_or_skip_count(self.chromosome.as_ref()),
6350 lazy_or_skip_count(self.sid.as_ref()),
6351 lazy_or_skip_count(self.cm_position.as_ref()),
6352 lazy_or_skip_count(self.bp_position.as_ref()),
6353 lazy_or_skip_count(self.allele_1.as_ref()),
6354 lazy_or_skip_count(self.allele_2.as_ref()),
6355 ],
6356 &mut sid_count,
6357 "sid",
6358 )?;
6359 Ok((iid_count, sid_count))
6360 }
6361
6362 /// Create a [`Metadata`](struct.Metadata.html) using a builder.
6363 ///
6364 /// # Example
6365 /// Create metadata.
6366 /// Create a random file with the metadata.
6367 /// ```
6368 /// use ndarray as nd;
6369 /// use bed_reader::{Metadata, WriteOptions};
6370 /// use ndarray_rand::{rand::prelude::StdRng, rand::SeedableRng, rand_distr::Uniform, RandomExt};
6371 ///
6372 /// let metadata = Metadata::builder()
6373 /// .iid(["i1", "i2", "i3"])
6374 /// .sid(["s1", "s2", "s3", "s4"])
6375 /// .build()?;
6376 /// let mut rng = StdRng::seed_from_u64(0);
6377 /// let val = nd::Array::random_using((3, 4), Uniform::from(-1..3), &mut rng);
6378 /// let temp_out = temp_testdir::TempDir::default();
6379 /// let output_file = temp_out.join("random.bed");
6380 /// WriteOptions::builder(output_file)
6381 /// .metadata(&metadata)
6382 /// .missing_value(-1)
6383 /// .write(&val)?;
6384 /// # use bed_reader::BedErrorPlus;
6385 /// # Ok::<(), Box<BedErrorPlus>>(())
6386 /// ```
6387 #[must_use]
6388 pub fn builder() -> MetadataBuilder {
6389 MetadataBuilder::default()
6390 }
6391
6392 /// Create an empty [`Metadata`](struct.Metadata.html).
6393 ///
6394 /// > See [`Metadata::builder()`](struct.Metadata.html#method.builder)
6395 #[must_use]
6396 pub fn new() -> Metadata {
6397 // Unwrap always works because an empty metadata builder always works.
6398 Metadata::builder().build().unwrap()
6399 }
6400
6401 /// Optional family id of each of individual (sample)
6402 #[must_use]
6403 pub fn fid(&self) -> Option<&nd::Array1<String>> {
6404 option_rc_as_ref(self.fid.as_ref())
6405 }
6406
6407 /// Optional individual id of each of individual (sample)
6408 ///
6409 /// # Example:
6410 /// ```
6411 /// use ndarray as nd;
6412 /// use bed_reader::Metadata;
6413 /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6414 /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6415 /// println!("{0:?}", metadata.sid()); // Outputs None
6416 /// # use bed_reader::BedErrorPlus;
6417 /// # Ok::<(), Box<BedErrorPlus>>(())
6418 #[must_use]
6419 pub fn iid(&self) -> Option<&nd::Array1<String>> {
6420 option_rc_as_ref(self.iid.as_ref())
6421 }
6422
6423 /// Optional father id of each of individual (sample)
6424 #[must_use]
6425 pub fn father(&self) -> Option<&nd::Array1<String>> {
6426 option_rc_as_ref(self.father.as_ref())
6427 }
6428
6429 /// Optional mother id of each of individual (sample)
6430 #[must_use]
6431 pub fn mother(&self) -> Option<&nd::Array1<String>> {
6432 option_rc_as_ref(self.mother.as_ref())
6433 }
6434
6435 /// Optional sex each of individual (sample)
6436 #[must_use]
6437 pub fn sex(&self) -> Option<&nd::Array1<i32>> {
6438 option_rc_as_ref(self.sex.as_ref())
6439 }
6440
6441 /// Optional phenotype for each individual (seldom used)
6442 #[must_use]
6443 pub fn pheno(&self) -> Option<&nd::Array1<String>> {
6444 option_rc_as_ref(self.pheno.as_ref())
6445 }
6446
6447 /// Optional chromosome of each SNP (variant)
6448 #[must_use]
6449 pub fn chromosome(&self) -> Option<&nd::Array1<String>> {
6450 option_rc_as_ref(self.chromosome.as_ref())
6451 }
6452
6453 /// Optional SNP id of each SNP (variant)
6454 ///
6455 /// # Example:
6456 /// ```
6457 /// use ndarray as nd;
6458 /// use bed_reader::Metadata;
6459 /// let metadata = Metadata::builder().iid(["i1", "i2", "i3"]).build()?;
6460 /// println!("{0:?}", metadata.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
6461 /// println!("{0:?}", metadata.sid()); // Outputs None
6462 /// # use bed_reader::BedErrorPlus;
6463 /// # Ok::<(), Box<BedErrorPlus>>(())
6464 #[must_use]
6465 pub fn sid(&self) -> Option<&nd::Array1<String>> {
6466 option_rc_as_ref(self.sid.as_ref())
6467 }
6468
6469 /// Optional centimorgan position of each SNP (variant)
6470 #[must_use]
6471 pub fn cm_position(&self) -> Option<&nd::Array1<f32>> {
6472 option_rc_as_ref(self.cm_position.as_ref())
6473 }
6474
6475 /// Optional base-pair position of each SNP (variant)
6476 #[must_use]
6477 pub fn bp_position(&self) -> Option<&nd::Array1<i32>> {
6478 option_rc_as_ref(self.bp_position.as_ref())
6479 }
6480
6481 /// Optional first allele of each SNP (variant)
6482 #[must_use]
6483 pub fn allele_1(&self) -> Option<&nd::Array1<String>> {
6484 option_rc_as_ref(self.allele_1.as_ref())
6485 }
6486
6487 /// Optional second allele of each SNP (variant)
6488 #[must_use]
6489 pub fn allele_2(&self) -> Option<&nd::Array1<String>> {
6490 option_rc_as_ref(self.allele_2.as_ref())
6491 }
6492
6493 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .fam file.
6494 ///
6495 /// # Example
6496 ///
6497 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6498 /// Do not skip any fields.
6499 /// ```
6500 /// use ndarray as nd;
6501 /// use std::collections::HashSet;
6502 /// use bed_reader::{Metadata, MetadataFields, sample_file};
6503 ///
6504 /// let skip_set = HashSet::<MetadataFields>::new();
6505 /// let metadata_empty = Metadata::new();
6506 /// let (metadata_fam, iid_count) =
6507 /// metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6508 /// let (metadata_bim, sid_count) =
6509 /// metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6510 /// assert_eq!(iid_count, 3);
6511 /// assert_eq!(sid_count, 4);
6512 /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6513 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6514 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6515 /// # use bed_reader::BedErrorPlus;
6516 /// # Ok::<(), Box<BedErrorPlus>>(())
6517 /// ```
6518 #[anyinput]
6519 pub fn read_fam(
6520 &self,
6521 path: AnyPath,
6522 skip_set: &HashSet<MetadataFields>,
6523 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6524 let mut field_vec: Vec<usize> = Vec::new();
6525
6526 if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6527 field_vec.push(0);
6528 }
6529 if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6530 field_vec.push(1);
6531 }
6532 if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6533 field_vec.push(2);
6534 }
6535 if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6536 field_vec.push(3);
6537 }
6538 if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6539 field_vec.push(4);
6540 }
6541 if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6542 field_vec.push(5);
6543 }
6544
6545 let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, true, path)?;
6546
6547 let mut clone = self.clone();
6548
6549 // unwraps are safe because we pop once for every push
6550 if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6551 clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6552 }
6553 if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6554 let vec = vec_of_vec.pop().unwrap();
6555 let array = vec
6556 .iter()
6557 .map(|s| s.parse::<i32>())
6558 .collect::<Result<nd::Array1<i32>, _>>()?;
6559 clone.sex = Some(Rc::new(array));
6560 }
6561 if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6562 clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6563 }
6564 if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6565 clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6566 }
6567 if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6568 clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6569 }
6570 if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6571 clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6572 }
6573
6574 clone.check_counts(Some(count), None)?;
6575
6576 Ok((clone, count))
6577 }
6578
6579 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6580 /// fields with a .fam file in the cloud.
6581 ///
6582 /// # Example
6583 ///
6584 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6585 /// Do not skip any fields.
6586 /// ```
6587 /// use ndarray as nd;
6588 /// use std::collections::HashSet;
6589 /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6590 ///
6591 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6592 /// let skip_set = HashSet::<MetadataFields>::new();
6593 /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6594 /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6595 /// let metadata_empty = Metadata::new();
6596 /// let (metadata_fam, iid_count) =
6597 /// metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6598 /// let (metadata_bim, sid_count) =
6599 /// metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6600 /// assert_eq!(iid_count, 3);
6601 /// assert_eq!(sid_count, 4);
6602 /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6603 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6604 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6605 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6606 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6607 /// ```
6608 pub async fn read_fam_cloud(
6609 &self,
6610 cloud_file: &CloudFile,
6611 skip_set: &HashSet<MetadataFields>,
6612 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6613 let mut field_vec: Vec<usize> = Vec::new();
6614
6615 if self.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6616 field_vec.push(0);
6617 }
6618 if self.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6619 field_vec.push(1);
6620 }
6621 if self.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6622 field_vec.push(2);
6623 }
6624 if self.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6625 field_vec.push(3);
6626 }
6627 if self.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6628 field_vec.push(4);
6629 }
6630 if self.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6631 field_vec.push(5);
6632 }
6633
6634 let (mut vec_of_vec, count) = self
6635 .read_fam_or_bim_cloud(&field_vec, true, cloud_file)
6636 .await?;
6637
6638 let mut clone = self.clone();
6639
6640 // unwraps are safe because we pop once for every push
6641 if clone.pheno.is_none() && !skip_set.contains(&MetadataFields::Pheno) {
6642 clone.pheno = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6643 }
6644 if clone.sex.is_none() && !skip_set.contains(&MetadataFields::Sex) {
6645 let vec = vec_of_vec.pop().unwrap();
6646 let array = vec
6647 .iter()
6648 .map(|s| s.parse::<i32>())
6649 .collect::<Result<nd::Array1<i32>, _>>()?;
6650 clone.sex = Some(Rc::new(array));
6651 }
6652 if clone.mother.is_none() && !skip_set.contains(&MetadataFields::Mother) {
6653 clone.mother = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6654 }
6655 if clone.father.is_none() && !skip_set.contains(&MetadataFields::Father) {
6656 clone.father = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6657 }
6658 if clone.iid.is_none() && !skip_set.contains(&MetadataFields::Iid) {
6659 clone.iid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6660 }
6661 if clone.fid.is_none() && !skip_set.contains(&MetadataFields::Fid) {
6662 clone.fid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6663 }
6664
6665 clone.check_counts(Some(count), None)?;
6666
6667 Ok((clone, count))
6668 }
6669
6670 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with a .bim file.
6671 ///
6672 /// # Example
6673 ///
6674 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6675 /// Do not skip any fields.
6676 /// ```
6677 /// use ndarray as nd;
6678 /// use std::collections::HashSet;
6679 /// use bed_reader::{Metadata, MetadataFields, sample_file};
6680 ///
6681 /// let skip_set = HashSet::<MetadataFields>::new();
6682 /// let metadata_empty = Metadata::new();
6683 /// let (metadata_fam, iid_count) =
6684 /// metadata_empty.read_fam(sample_file("small.fam")?, &skip_set)?;
6685 /// let (metadata_bim, sid_count) =
6686 /// metadata_fam.read_bim(sample_file("small.bim")?, &skip_set)?;
6687 /// assert_eq!(iid_count, 3);
6688 /// assert_eq!(sid_count, 4);
6689 /// println!("{0:?}", metadata_bim.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6690 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6691 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6692 /// # use bed_reader::BedErrorPlus;
6693 /// # Ok::<(), Box<BedErrorPlus>>(())
6694 /// ```
6695 #[anyinput]
6696 pub fn read_bim(
6697 &self,
6698 path: AnyPath,
6699 skip_set: &HashSet<MetadataFields>,
6700 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6701 let mut field_vec: Vec<usize> = Vec::new();
6702 if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6703 field_vec.push(0);
6704 }
6705 if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6706 field_vec.push(1);
6707 }
6708
6709 if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6710 field_vec.push(2);
6711 }
6712 if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6713 field_vec.push(3);
6714 }
6715 if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6716 field_vec.push(4);
6717 }
6718 if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6719 field_vec.push(5);
6720 }
6721
6722 let mut clone = self.clone();
6723 let (mut vec_of_vec, count) = Metadata::read_fam_or_bim(&field_vec, false, path)?;
6724
6725 // unwraps are safe because we pop once for every push
6726 if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6727 clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6728 }
6729 if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6730 clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6731 }
6732 if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6733 let vec = vec_of_vec.pop().unwrap();
6734 let array = vec
6735 .iter()
6736 .map(|s| s.parse::<i32>())
6737 .collect::<Result<nd::Array1<i32>, _>>()?;
6738 clone.bp_position = Some(Rc::new(array));
6739 }
6740 if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6741 let vec = vec_of_vec.pop().unwrap();
6742 let array = vec
6743 .iter()
6744 .map(|s| s.parse::<f32>())
6745 .collect::<Result<nd::Array1<f32>, _>>()?;
6746 clone.cm_position = Some(Rc::new(array));
6747 }
6748
6749 if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6750 clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6751 }
6752 if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6753 clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6754 }
6755
6756 clone.check_counts(None, Some(count))?;
6757
6758 Ok((clone, count))
6759 }
6760
6761 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty
6762 /// fields with a .bim file in the cloud.
6763 ///
6764 /// # Example
6765 ///
6766 /// Read .fam and .bim information into a [`Metadata`](struct.Metadata.html).
6767 /// Do not skip any fields.
6768 /// ```
6769 /// use ndarray as nd;
6770 /// use std::collections::HashSet;
6771 /// use bed_reader::{Metadata, MetadataFields, sample_url, CloudFile};
6772 ///
6773 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
6774 /// let skip_set = HashSet::<MetadataFields>::new();
6775 /// let fam_cloud_file = CloudFile::new(sample_url("small.fam")?)?;
6776 /// let bim_cloud_file = CloudFile::new(sample_url("small.bim")?)?;
6777 /// let metadata_empty = Metadata::new();
6778 /// let (metadata_fam, iid_count) =
6779 /// metadata_empty.read_fam_cloud(&fam_cloud_file, &skip_set).await?;
6780 /// let (metadata_bim, sid_count) =
6781 /// metadata_fam.read_bim_cloud(&bim_cloud_file, &skip_set).await?;
6782 /// assert_eq!(iid_count, 3);
6783 /// assert_eq!(sid_count, 4);
6784 /// println!("{0:?}", metadata_fam.iid()); // Outputs optional ndarray Some(["iid1", "iid2", "iid3"]...)
6785 /// println!("{0:?}", metadata_bim.sid()); // Outputs optional ndarray Some(["sid1", "sid2", "sid3", "sid4"]...)
6786 /// println!("{0:?}", metadata_bim.chromosome()); // Outputs optional ndarray Some(["1", "1", "5", "Y"]...)
6787 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
6788 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
6789 /// ```
6790 pub async fn read_bim_cloud(
6791 &self,
6792 cloud_file: &CloudFile,
6793 skip_set: &HashSet<MetadataFields>,
6794 ) -> Result<(Metadata, usize), Box<BedErrorPlus>> {
6795 let mut field_vec: Vec<usize> = Vec::new();
6796 if self.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6797 field_vec.push(0);
6798 }
6799 if self.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6800 field_vec.push(1);
6801 }
6802
6803 if self.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6804 field_vec.push(2);
6805 }
6806 if self.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6807 field_vec.push(3);
6808 }
6809 if self.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6810 field_vec.push(4);
6811 }
6812 if self.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6813 field_vec.push(5);
6814 }
6815
6816 let mut clone = self.clone();
6817 let (mut vec_of_vec, count) = self
6818 .read_fam_or_bim_cloud(&field_vec, false, cloud_file)
6819 .await?;
6820
6821 // unwraps are safe because we pop once for every push
6822 if clone.allele_2.is_none() && !skip_set.contains(&MetadataFields::Allele2) {
6823 clone.allele_2 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6824 }
6825 if clone.allele_1.is_none() && !skip_set.contains(&MetadataFields::Allele1) {
6826 clone.allele_1 = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6827 }
6828 if clone.bp_position.is_none() && !skip_set.contains(&MetadataFields::BpPosition) {
6829 let vec = vec_of_vec.pop().unwrap();
6830 let array = vec
6831 .iter()
6832 .map(|s| s.parse::<i32>())
6833 .collect::<Result<nd::Array1<i32>, _>>()?;
6834 clone.bp_position = Some(Rc::new(array));
6835 }
6836 if clone.cm_position.is_none() && !skip_set.contains(&MetadataFields::CmPosition) {
6837 let vec = vec_of_vec.pop().unwrap();
6838 let array = vec
6839 .iter()
6840 .map(|s| s.parse::<f32>())
6841 .collect::<Result<nd::Array1<f32>, _>>()?;
6842 clone.cm_position = Some(Rc::new(array));
6843 }
6844
6845 if clone.sid.is_none() && !skip_set.contains(&MetadataFields::Sid) {
6846 clone.sid = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6847 }
6848 if clone.chromosome.is_none() && !skip_set.contains(&MetadataFields::Chromosome) {
6849 clone.chromosome = Some(Rc::new(nd::Array::from_vec(vec_of_vec.pop().unwrap())));
6850 }
6851
6852 clone.check_counts(None, Some(count))?;
6853
6854 Ok((clone, count))
6855 }
6856
6857 #[anyinput]
6858 fn read_fam_or_bim(
6859 field_vec: &[usize],
6860 is_split_whitespace: bool,
6861 path: AnyPath,
6862 ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6863 let mut vec_of_vec = vec![vec![]; field_vec.len()];
6864
6865 let file = File::open(path)?;
6866
6867 let reader = BufReader::new(file);
6868 let mut count = 0;
6869 for line in reader.lines() {
6870 let line = line?;
6871 count += 1;
6872
6873 let fields: Vec<&str> = if is_split_whitespace {
6874 line.split_whitespace().collect()
6875 } else {
6876 line.split('\t').collect()
6877 };
6878
6879 if fields.len() != 6 {
6880 Err(BedError::MetadataFieldCount(
6881 6,
6882 fields.len(),
6883 path_ref_to_string(path),
6884 ))?;
6885 }
6886
6887 let mut of_interest_count = 0;
6888 for (field_index, field) in fields.iter().enumerate() {
6889 if field_vec.contains(&field_index) {
6890 vec_of_vec[of_interest_count].push((*field).to_string());
6891 of_interest_count += 1;
6892 }
6893 }
6894 }
6895
6896 Ok((vec_of_vec, count))
6897 }
6898
6899 async fn read_fam_or_bim_cloud(
6900 &self,
6901 field_vec: &[usize],
6902 is_split_whitespace: bool,
6903 cloud_file: &CloudFile,
6904 ) -> Result<(Vec<Vec<String>>, usize), Box<BedErrorPlus>> {
6905 let mut vec_of_vec = vec![vec![]; field_vec.len()];
6906 let mut count = 0;
6907
6908 let mut line_chunks = cloud_file.stream_line_chunks().await?;
6909 while let Some(line_chunk) = line_chunks.next().await {
6910 let line_chunk = line_chunk.map_err(CloudFileError::ObjectStoreError)?;
6911 let lines = std::str::from_utf8(&line_chunk)?.lines();
6912 for line in lines {
6913 count += 1;
6914
6915 let fields: Vec<&str> = if is_split_whitespace {
6916 line.split_whitespace().collect()
6917 } else {
6918 line.split('\t').collect()
6919 };
6920
6921 if fields.len() != 6 {
6922 Err(BedError::MetadataFieldCount(
6923 6,
6924 fields.len(),
6925 cloud_file.to_string(),
6926 ))?;
6927 }
6928
6929 let mut of_interest_count = 0;
6930 for (field_index, field) in fields.iter().enumerate() {
6931 if field_vec.contains(&field_index) {
6932 vec_of_vec[of_interest_count].push((*field).to_string());
6933 of_interest_count += 1;
6934 }
6935 }
6936 }
6937 }
6938
6939 Ok((vec_of_vec, count))
6940 }
6941
6942 fn is_some_fam(&self) -> bool {
6943 self.fid.is_some()
6944 && self.iid.is_some()
6945 && self.father.is_some()
6946 && self.mother.is_some()
6947 && self.sex.is_some()
6948 && self.pheno.is_some()
6949 }
6950 fn is_some_bim(&self) -> bool {
6951 self.chromosome.is_some()
6952 && self.sid.is_some()
6953 && self.cm_position.is_some()
6954 && self.bp_position.is_some()
6955 && self.allele_1.is_some()
6956 && self.allele_2.is_some()
6957 }
6958
6959 /// Write the metadata related to individuals/samples to a .fam file.
6960 ///
6961 /// If any of the .fam metadata is not present, the function will return an error.
6962 ///
6963 /// # Example
6964 ///
6965 /// Create metadata with iid and sid arrays, then fill in the other
6966 /// fields with default arrays, finally write the .fam information
6967 /// to a file.
6968 ///```
6969 /// use ndarray as nd;
6970 /// use std::collections::HashSet;
6971 /// use bed_reader::Metadata;
6972 ///
6973 /// let metadata0 = Metadata::builder()
6974 /// .iid(["i1", "i2", "i3"])
6975 /// .sid(["s1", "s2", "s3", "s4"])
6976 /// .build()?;
6977 /// let metadata_filled = metadata0.fill(3, 4)?;
6978 /// let temp_out = temp_testdir::TempDir::default();
6979 /// let output_file = temp_out.join("no_bed.fam");
6980 /// metadata_filled.write_fam(output_file)?;
6981 /// # use bed_reader::BedErrorPlus;
6982 /// # Ok::<(), Box<BedErrorPlus>>(())
6983 /// ```
6984 #[anyinput]
6985 pub fn write_fam(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
6986 let file = File::create(path)?;
6987 let mut writer = BufWriter::new(file);
6988 let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
6989
6990 if !self.is_some_fam() {
6991 Err(BedError::MetadataMissingForWrite("fam".to_string()))?;
6992 }
6993
6994 // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
6995 // unwrap always works because we checked that all the fields are present
6996 // 2nd as as_ref turns &Rc<Array> into &Array
6997 nd::azip!((fid in self.fid.as_ref().unwrap().as_ref(),
6998 iid in self.iid.as_ref().unwrap().as_ref(),
6999 father in self.father.as_ref().unwrap().as_ref(),
7000 mother in self.mother.as_ref().unwrap().as_ref(),
7001 sex in self.sex.as_ref().unwrap().as_ref(),
7002 pheno in self.pheno.as_ref().unwrap().as_ref(),
7003 )
7004 {
7005 if result.is_ok() {
7006 if let Err(e) = writeln!(
7007 writer,
7008 "{} {} {} {} {} {}",
7009 *fid, *iid, *father, *mother, *sex, *pheno
7010 )
7011 {
7012 result = Err(Box::new(BedErrorPlus::IOError(e)));
7013 }
7014 }});
7015 result?;
7016
7017 Ok(())
7018 }
7019
7020 /// Write the metadata related to SNPs/variants to a .bim file.
7021 ///
7022 /// If any of the .bim metadata is not present, the function will return an error.
7023 ///
7024 /// # Example
7025 ///
7026 /// Create metadata with iid and sid arrays, then fill in the other
7027 /// fields with default arrays, finally write the .bim information
7028 /// to a file.
7029 ///```
7030 /// use ndarray as nd;
7031 /// use std::collections::HashSet;
7032 /// use bed_reader::Metadata;
7033 ///
7034 /// let metadata0 = Metadata::builder()
7035 /// .iid(["i1", "i2", "i3"])
7036 /// .sid(["s1", "s2", "s3", "s4"])
7037 /// .build()?;
7038 /// let metadata_filled = metadata0.fill(3, 4)?;
7039 /// let temp_out = temp_testdir::TempDir::default();
7040 /// let output_file = temp_out.join("no_bed.bim");
7041 /// metadata_filled.write_bim(output_file)?;
7042 /// # use bed_reader::BedErrorPlus;
7043 /// # Ok::<(), Box<BedErrorPlus>>(())
7044 /// ```
7045 #[anyinput]
7046 pub fn write_bim(&self, path: AnyPath) -> Result<(), Box<BedErrorPlus>> {
7047 let file = File::create(path)?;
7048 let mut writer = BufWriter::new(file);
7049 let mut result: Result<(), Box<BedErrorPlus>> = Ok(());
7050
7051 if !self.is_some_bim() {
7052 Err(BedError::MetadataMissingForWrite("bim".to_string()))?;
7053 }
7054
7055 // 1st as_ref turns Option<Rc<Array>> into Option<&Rc<Array>>
7056 // unwrap always works because we checked that all the fields are present
7057 // 2nd as as_ref turns &Rc<Array> into &Array
7058 nd::azip!((
7059 chromosome in self.chromosome.as_ref().unwrap().as_ref(),
7060 sid in self.sid.as_ref().unwrap().as_ref(),
7061 cm_position in self.cm_position.as_ref().unwrap().as_ref(),
7062 bp_position in self.bp_position.as_ref().unwrap().as_ref(),
7063 allele_1 in self.allele_1.as_ref().unwrap().as_ref(),
7064 allele_2 in self.allele_2.as_ref().unwrap().as_ref(),
7065 )
7066 {
7067 if result.is_ok() {
7068 if let Err(e) = writeln!(
7069 writer,
7070 "{}\t{}\t{}\t{}\t{}\t{}",
7071 *chromosome, *sid, *cm_position, *bp_position, *allele_1, *allele_2
7072 )
7073 {
7074 result = Err(Box::new(BedErrorPlus::IOError(e)));
7075 }
7076 }
7077 });
7078 result?;
7079
7080 Ok(())
7081 }
7082
7083 /// Create a new [`Metadata`](struct.Metadata.html) by filling in empty fields with default values.
7084 ///
7085 /// # Example
7086 /// ```
7087 /// use ndarray as nd;
7088 /// use std::collections::HashSet;
7089 /// use bed_reader::{Metadata, MetadataFields};
7090 ///
7091 /// let metadata0 = Metadata::builder()
7092 /// .iid(["i1", "i2", "i3"])
7093 /// .sid(["s1", "s2", "s3", "s4"])
7094 /// .build()?;
7095 /// let metadata_filled = metadata0.fill(3, 4)?;
7096 ///
7097 /// println!("{0:?}", metadata_filled.iid()); // Outputs optional ndarray Some(["i1", "i2", "i3"]...)
7098 /// println!("{0:?}", metadata_filled.sid()); // Outputs optional ndarray Some(["s1", "s2", "s3", "s4"]...)
7099 /// println!("{0:?}", metadata_filled.chromosome()); // Outputs optional ndarray Some(["0", "0", "0", "0"]...)
7100 /// # use bed_reader::BedErrorPlus;
7101 /// # Ok::<(), Box<BedErrorPlus>>(())
7102 /// ```
7103 pub fn fill(&self, iid_count: usize, sid_count: usize) -> Result<Metadata, Box<BedErrorPlus>> {
7104 let mut metadata = self.clone();
7105
7106 compute_field("fid", &mut metadata.fid, iid_count, |_| "0".to_string())?;
7107 compute_field("iid", &mut metadata.iid, iid_count, |i| {
7108 format!("iid{}", i + 1)
7109 })?;
7110 compute_field("father", &mut metadata.father, iid_count, |_| {
7111 "0".to_string()
7112 })?;
7113 compute_field("mother", &mut metadata.mother, iid_count, |_| {
7114 "0".to_string()
7115 })?;
7116 compute_field("sex", &mut metadata.sex, iid_count, |_| 0)?;
7117 compute_field("pheno", &mut metadata.pheno, iid_count, |_| "0".to_string())?;
7118 compute_field("chromosome", &mut metadata.chromosome, sid_count, |_| {
7119 "0".to_string()
7120 })?;
7121 compute_field("sid", &mut metadata.sid, sid_count, |i| {
7122 format!("sid{}", i + 1)
7123 })?;
7124 compute_field("cm_position", &mut metadata.cm_position, sid_count, |_| 0.0)?;
7125 compute_field("bp_position", &mut metadata.bp_position, sid_count, |_| 0)?;
7126 compute_field("allele_1", &mut metadata.allele_1, sid_count, |_| {
7127 "A1".to_string()
7128 })?;
7129 compute_field("allele_2", &mut metadata.allele_2, sid_count, |_| {
7130 "A2".to_string()
7131 })?;
7132
7133 Ok(metadata)
7134 }
7135
7136 #[anyinput]
7137 fn set_fid(&mut self, fid: AnyIter<AnyString>) -> &Self {
7138 self.fid = Some(Rc::new(
7139 fid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7140 ));
7141 self
7142 }
7143
7144 #[anyinput]
7145 fn set_iid(&mut self, iid: AnyIter<AnyString>) -> &Self {
7146 self.iid = Some(Rc::new(
7147 iid.into_iter().map(|s| s.as_ref().to_owned()).collect(),
7148 ));
7149 self
7150 }
7151
7152 #[anyinput]
7153 fn set_father(&mut self, father: AnyIter<AnyString>) -> &Self {
7154 self.father = Some(Rc::new(father.map(|s| s.as_ref().to_owned()).collect()));
7155 self
7156 }
7157
7158 #[anyinput]
7159 fn set_mother(&mut self, mother: AnyIter<AnyString>) -> &Self {
7160 self.mother = Some(Rc::new(mother.map(|s| s.as_ref().to_owned()).collect()));
7161 self
7162 }
7163
7164 #[anyinput]
7165 fn set_sex(&mut self, sex: AnyIter<i32>) -> &Self {
7166 self.sex = Some(Rc::new(sex.collect()));
7167 self
7168 }
7169
7170 #[anyinput]
7171 fn set_pheno(&mut self, pheno: AnyIter<AnyString>) -> &Self {
7172 self.pheno = Some(Rc::new(pheno.map(|s| s.as_ref().to_owned()).collect()));
7173 self
7174 }
7175
7176 #[anyinput]
7177 fn set_chromosome(&mut self, chromosome: AnyIter<AnyString>) -> &Self {
7178 self.chromosome = Some(Rc::new(chromosome.map(|s| s.as_ref().to_owned()).collect()));
7179 self
7180 }
7181
7182 #[anyinput]
7183 fn set_sid(&mut self, sid: AnyIter<AnyString>) -> &Self {
7184 self.sid = Some(Rc::new(sid.map(|s| s.as_ref().to_owned()).collect()));
7185 self
7186 }
7187
7188 #[anyinput]
7189 fn set_cm_position(&mut self, cm_position: AnyIter<f32>) -> &Self {
7190 self.cm_position = Some(Rc::new(cm_position.into_iter().collect()));
7191 self
7192 }
7193
7194 #[anyinput]
7195 fn set_bp_position(&mut self, bp_position: AnyIter<i32>) -> &Self {
7196 self.bp_position = Some(Rc::new(bp_position.into_iter().collect()));
7197 self
7198 }
7199
7200 #[anyinput]
7201 fn set_allele_1(&mut self, allele_1: AnyIter<AnyString>) -> &Self {
7202 self.allele_1 = Some(Rc::new(allele_1.map(|s| s.as_ref().to_owned()).collect()));
7203 self
7204 }
7205
7206 #[anyinput]
7207 fn set_allele_2(&mut self, allele_2: AnyIter<AnyString>) -> &Self {
7208 self.allele_2 = Some(Rc::new(allele_2.map(|s| s.as_ref().to_owned()).collect()));
7209 self
7210 }
7211}
7212
7213#[allow(clippy::option_option)]
7214fn set_field<T>(
7215 field1: Option<&Rc<nd::Array1<T>>>,
7216 field2: &mut Option<Option<Rc<nd::Array1<T>>>>,
7217) {
7218 if let Some(array) = field1 {
7219 *field2 = Some(Some(array.clone()));
7220 }
7221}
7222
7223fn option_rc_as_ref<T>(field: Option<&Rc<nd::Array1<T>>>) -> Option<&nd::Array1<T>> {
7224 match field {
7225 Some(array) => Some(array.as_ref()),
7226 None => None,
7227 }
7228}
7229
7230#[allow(dead_code)]
7231fn matrix_subset_no_alloc<
7232 TIn: Copy + Default + Debug + Sync + Send + Sync + Sized,
7233 TOut: Copy + Default + Debug + Sync + Send + Sync + From<TIn>,
7234>(
7235 in_val: &nd::ArrayView3<'_, TIn>,
7236 iid_index: &[usize],
7237 sid_index: &[usize],
7238 out_val: &mut nd::ArrayViewMut3<'_, TOut>,
7239) -> Result<(), Box<BedErrorPlus>> {
7240 let out_iid_count = iid_index.len();
7241 let out_sid_count = sid_index.len();
7242 let did_count = in_val.dim().2;
7243
7244 if (out_iid_count, out_sid_count, did_count) != out_val.dim() {
7245 Err(BedError::SubsetMismatch(
7246 out_iid_count,
7247 out_sid_count,
7248 out_val.dim().0,
7249 out_val.dim().1,
7250 ))?;
7251 }
7252
7253 // If output is F-order (or in general if iid stride is no more than sid_stride)
7254 if out_val.stride_of(nd::Axis(0)) <= out_val.stride_of(nd::Axis(1)) {
7255 // (No error are possible in the par_azip, so don't have to collect and check them)
7256 nd::par_azip!((mut out_col in out_val.axis_iter_mut(nd::Axis(1)),
7257 in_sid_i_pr in sid_index) {
7258 let in_col = in_val.index_axis(nd::Axis(1), *in_sid_i_pr);
7259 for did_i in 0..did_count
7260 {
7261 for (out_iid_i, in_iid_i_ptr) in iid_index.iter().enumerate() {
7262 out_col[(out_iid_i,did_i)] = in_col[(*in_iid_i_ptr,did_i)].into();
7263 }
7264 }
7265 });
7266 Ok(())
7267 } else {
7268 //If output is C-order, transpose input and output and recurse
7269 let in_val_t = in_val.view().permuted_axes([1, 0, 2]);
7270 let mut out_val_t = out_val.view_mut().permuted_axes([1, 0, 2]);
7271 matrix_subset_no_alloc(&in_val_t, sid_index, iid_index, &mut out_val_t)
7272 }
7273}
7274
7275#[fetch_data::ctor]
7276static STATIC_FETCH_DATA: FetchData = FetchData::new(
7277 include_str!("../bed_reader/tests/registry.txt"),
7278 "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/",
7279 "BED_READER_DATA_DIR",
7280 "github.io",
7281 "fastlmm",
7282 "bed-reader",
7283);
7284
7285/// Returns the local path to a sample .bed file. If necessary, the file will be downloaded.
7286///
7287/// The .fam and .bim files will also be downloaded, if they are not already present.
7288/// SHA256 hashes are used to verify that the files are correct.
7289/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7290/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7291#[anyinput]
7292pub fn sample_bed_file(bed_path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7293 let mut path_list: Vec<PathBuf> = Vec::new();
7294 for ext in &["bed", "bim", "fam"] {
7295 let file_path = bed_path.with_extension(ext);
7296 path_list.push(file_path);
7297 }
7298
7299 let vec = sample_files(path_list)?;
7300 assert!(vec.len() == 3);
7301 Ok(vec[0].clone())
7302}
7303
7304/// Returns the local path to a sample file. If necessary, the file will be downloaded.
7305///
7306/// A SHA256 hash is used to verify that the file is correct.
7307/// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7308/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7309#[anyinput]
7310pub fn sample_file(path: AnyPath) -> Result<PathBuf, Box<BedErrorPlus>> {
7311 Ok(STATIC_FETCH_DATA
7312 .fetch_file(path)
7313 .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7314}
7315
7316/// Returns the local paths to a list of files. If necessary, the files will be downloaded.
7317///
7318/// SHA256 hashes are used to verify that the files are correct.
7319/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
7320/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
7321#[anyinput]
7322pub fn sample_files(path_list: AnyIter<AnyPath>) -> Result<Vec<PathBuf>, Box<BedErrorPlus>>
7323where
7324{
7325 Ok(STATIC_FETCH_DATA
7326 .fetch_files(path_list)
7327 .map_err(|e| BedError::SampleFetch(e.to_string()))?)
7328}
7329
7330/// An empty set of cloud options
7331///
7332/// # Example
7333/// ```
7334/// use cloud_file::{EMPTY_OPTIONS, CloudFile};
7335///
7336/// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
7337/// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/plink_sim_10s_100v_10pmiss.bed";
7338/// let cloud_file = CloudFile::new_with_options(url, EMPTY_OPTIONS)?;
7339/// assert_eq!(cloud_file.read_file_size().await?, 303);
7340/// # Ok::<(), BedErrorPlus>(())}).unwrap();
7341/// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
7342/// ```
7343pub const EMPTY_OPTIONS: [(&str, String); 0] = [];
7344
7345#[cfg(feature = "tokio")]
7346pub mod supplemental_document_options {
7347 #![doc = include_str!("supplemental_documents/options_etc.md")]
7348}
7349
7350#[cfg(feature = "tokio")]
7351pub mod supplemental_document_cloud_urls {
7352 #![doc = include_str!("supplemental_documents/cloud_urls_etc.md")]
7353}