bed_reader/bed_cloud.rs
1#[cfg(not(target_pointer_width = "64"))]
2compile_error!("This code requires a 64-bit target architecture.");
3
4use anyinput::anyinput;
5use bytes::Bytes;
6use cloud_file::{abs_path_to_url_string, CloudFile};
7use derive_builder::Builder;
8use futures_util::StreamExt;
9use itertools::Itertools;
10use nd::ShapeBuilder;
11use ndarray as nd;
12use std::cmp::max;
13use std::collections::HashSet;
14use std::ops::Range;
15use std::path::PathBuf;
16
17use crate::{
18 check_and_precompute_iid_index, compute_max_chunk_bytes, compute_max_concurrent_requests,
19 set_up_two_bits_to_value, try_div_4, BedError, BedErrorPlus, BedVal, FromStringArray, Hold,
20 Metadata, ReadOptions, BED_FILE_MAGIC1, BED_FILE_MAGIC2, EMPTY_OPTIONS, STATIC_FETCH_DATA,
21};
22use crate::{MetadataFields, CB_HEADER_U64};
23
24/// Represents a PLINK .bed file in the cloud that is open for reading genotype data and metadata.
25///
26/// Construct with [`BedCloud::new`](struct.BedCloud.html#method.new), [`BedCloud::builder`](struct.BedCloud.html#method.builder),
27/// [`BedCloud::from_cloud_file`](struct.BedCloud.html#method.from_cloud_file), or
28/// [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file).
29///
30/// > For reading local files, see [`Bed`](struct.Bed.html).
31///
32/// # Example
33///
34/// Open a file for reading. Then, read the individual (sample) ids
35/// and all the genotype data.
36/// ```
37/// use ndarray as nd;
38/// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
39///
40/// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
41/// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
42/// let mut bed_cloud = BedCloud::new(url).await?;
43/// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
44/// let val = ReadOptions::builder().f64().read_cloud(&mut bed_cloud).await?;
45///
46/// assert_eq_nan(
47/// &val,
48/// &nd::array![
49/// [1.0, 0.0, f64::NAN, 0.0],
50/// [2.0, 0.0, f64::NAN, 2.0],
51/// [0.0, 1.0, 2.0, 0.0]
52/// ],
53/// );
54/// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
55/// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
56/// ```
57#[derive(Clone, Debug, Builder)]
58#[builder(build_fn(skip))]
59pub struct BedCloud {
60 #[builder(setter(custom))]
61 cloud_file: CloudFile,
62
63 #[builder(setter(custom))]
64 #[builder(default = "None")]
65 fam_cloud_file: Option<CloudFile>,
66
67 #[builder(setter(custom))]
68 #[builder(default = "None")]
69 bim_cloud_file: Option<CloudFile>,
70
71 #[builder(setter(custom))]
72 #[builder(default = "true")]
73 is_checked_early: bool,
74
75 #[builder(setter(custom))]
76 #[builder(default = "None")]
77 iid_count: Option<usize>,
78
79 #[builder(setter(custom))]
80 #[builder(default = "None")]
81 sid_count: Option<usize>,
82
83 #[builder(setter(custom))]
84 metadata: Metadata,
85
86 #[builder(setter(custom))]
87 skip_set: HashSet<MetadataFields>,
88}
89
90// We need to define our own build_no_file_check
91// because otherwise derive_builder (needlessly) requires ObjectStore: Clone
92impl BedCloudBuilder {
93 fn build_no_file_check(&self) -> Result<BedCloud, Box<BedErrorPlus>> {
94 Ok(BedCloud {
95 cloud_file: match self.cloud_file {
96 Some(ref value) => Clone::clone(value),
97 None => Err(BedError::UninitializedField("cloud_file"))?,
98 },
99 fam_cloud_file: match self.fam_cloud_file {
100 Some(ref value) => Clone::clone(value),
101 None => None,
102 },
103 bim_cloud_file: match self.bim_cloud_file {
104 Some(ref value) => Clone::clone(value),
105 None => None,
106 },
107 is_checked_early: match self.is_checked_early {
108 Some(ref value) => Clone::clone(value),
109 None => true,
110 },
111 iid_count: match self.iid_count {
112 Some(ref value) => Clone::clone(value),
113 None => None,
114 },
115 sid_count: match self.sid_count {
116 Some(ref value) => Clone::clone(value),
117 None => None,
118 },
119 metadata: match self.metadata {
120 Some(ref value) => Clone::clone(value),
121 None => Err(BedError::UninitializedField("metadata"))?,
122 },
123 skip_set: match self.skip_set {
124 Some(ref value) => Clone::clone(value),
125 None => Err(BedError::UninitializedField("skip_set"))?,
126 },
127 })
128 }
129}
130
131fn convert_negative_sid_index(
132 in_sid_i_signed: isize,
133 upper_sid_count: isize,
134 lower_sid_count: isize,
135) -> Result<u64, Box<BedErrorPlus>> {
136 if (0..=upper_sid_count).contains(&in_sid_i_signed) {
137 #[allow(clippy::cast_sign_loss)]
138 Ok(in_sid_i_signed as u64)
139 } else if (lower_sid_count..=-1).contains(&in_sid_i_signed) {
140 #[allow(clippy::cast_sign_loss)]
141 Ok((in_sid_i_signed - lower_sid_count) as u64)
142 } else {
143 Err(Box::new(BedErrorPlus::BedError(BedError::SidIndexTooBig(
144 in_sid_i_signed,
145 ))))
146 }
147}
148
149#[allow(clippy::too_many_arguments)]
150#[allow(clippy::similar_names)]
151async fn internal_read_no_alloc<TVal: BedVal>(
152 cloud_file: &CloudFile,
153 size: usize,
154 in_iid_count: usize,
155 in_sid_count: usize,
156 is_a1_counted: bool,
157 iid_index: &[isize],
158 sid_index: &[isize],
159 missing_value: TVal,
160 max_concurrent_requests: usize,
161 max_chunk_bytes: usize,
162 out_val: &mut nd::ArrayViewMut2<'_, TVal>,
163) -> Result<(), Box<BedErrorPlus>> {
164 // compute numbers outside of the loop
165 let in_iid_count_div4_u64 = check_file_length(in_iid_count, in_sid_count, size, cloud_file)?;
166 let (i_div_4_less_start_array, i_mod_4_times_2_array, i_div_4_start, i_div_4_len) =
167 check_and_precompute_iid_index(in_iid_count, iid_index)?;
168 if i_div_4_len == 0 {
169 return Ok(()); // we must return early because the chucks method doesn't work with size 0
170 }
171 let chunk_count = max(1, max_chunk_bytes / i_div_4_len as usize);
172 let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value);
173 let lower_sid_count = -(in_sid_count as isize);
174 let upper_sid_count: isize = (in_sid_count as isize) - 1;
175
176 // sid_index is a slice that tells us which columns to read from the (column-major) file.
177 // out_val is a column-major array to fill the decode results.
178
179 // For each chunk of columns to read ...
180
181 let chunks = sid_index.iter().chunks(chunk_count);
182 let iterator = chunks.into_iter().enumerate().map(|(chunk_index, chunk)| {
183 let result = extract_ranges(
184 chunk_count,
185 chunk,
186 chunk_index,
187 upper_sid_count,
188 lower_sid_count,
189 in_iid_count_div4_u64,
190 i_div_4_start,
191 i_div_4_len,
192 );
193 async move {
194 let (ranges, out_sid_i_vec) = result?;
195 let vec_bytes = cloud_file.read_ranges(&ranges).await?;
196 Result::<_, Box<BedErrorPlus>>::Ok((vec_bytes, out_sid_i_vec))
197 }
198 });
199
200 let mut stream = futures_util::stream::iter(iterator).buffer_unordered(max_concurrent_requests);
201
202 while let Some(result) = stream.next().await {
203 let (vec_bytes, out_sid_i_vec) = result?;
204 decode_bytes_into_columns(
205 &vec_bytes,
206 out_sid_i_vec,
207 iid_index,
208 &i_div_4_less_start_array,
209 &i_mod_4_times_2_array,
210 out_val,
211 from_two_bits_to_value,
212 );
213 }
214
215 Ok(())
216}
217
218#[inline]
219#[allow(clippy::type_complexity)]
220#[allow(clippy::too_many_arguments)]
221fn extract_ranges(
222 chunk_count: usize,
223 chunk: itertools::Chunk<'_, std::slice::Iter<'_, isize>>,
224 chunk_index: usize,
225 upper_sid_count: isize,
226 lower_sid_count: isize,
227 in_iid_count_div4_u64: u64,
228 i_div_4_start: u64,
229 i_div_4_len: u64,
230) -> Result<(Vec<Range<usize>>, Vec<usize>), Box<BedErrorPlus>> {
231 let mut ranges = Vec::with_capacity(chunk_count);
232 let mut out_sid_i_vec = Vec::with_capacity(chunk_count);
233 for (inner_index, in_sid_i_signed) in chunk.enumerate() {
234 let out_sid_i = chunk_index * chunk_count + inner_index;
235 let in_sid_i =
236 convert_negative_sid_index(*in_sid_i_signed, upper_sid_count, lower_sid_count)?;
237 let pos: usize =
238 (in_sid_i * in_iid_count_div4_u64 + i_div_4_start + CB_HEADER_U64) as usize; // "as" and math is safe because of early checks
239 let range = pos..pos + i_div_4_len as usize;
240 debug_assert!(range.end - range.start == i_div_4_len as usize); // real assert
241 ranges.push(range);
242 out_sid_i_vec.push(out_sid_i);
243 }
244 Ok((ranges, out_sid_i_vec))
245}
246
247#[inline]
248fn decode_bytes_into_columns<TVal: BedVal>(
249 bytes_slice: &[Bytes],
250 out_sid_i_vec: Vec<usize>,
251 iid_index: &[isize],
252 i_div_4_less_start_array: &nd::prelude::ArrayBase<
253 nd::OwnedRepr<usize>,
254 nd::prelude::Dim<[usize; 1]>,
255 >,
256 i_mod_4_times_2_array: &nd::prelude::ArrayBase<nd::OwnedRepr<u8>, nd::prelude::Dim<[usize; 1]>>,
257 out_val: &mut nd::prelude::ArrayBase<nd::ViewRepr<&mut TVal>, nd::prelude::Dim<[usize; 2]>>,
258 from_two_bits_to_value: [TVal; 4],
259) {
260 for (bytes, out_sid_i) in bytes_slice.iter().zip(out_sid_i_vec.into_iter()) {
261 let mut col = out_val.column_mut(out_sid_i);
262 // LATER: Consider doing this in parallel as in the non-cloud version.
263 for out_iid_i in 0..iid_index.len() {
264 let i_div_4_less_start = i_div_4_less_start_array[out_iid_i];
265 let i_mod_4_times_2: u8 = i_mod_4_times_2_array[out_iid_i];
266 let encoded: u8 = bytes[i_div_4_less_start];
267 let genotype_byte: u8 = (encoded >> i_mod_4_times_2) & 0x03;
268 col[out_iid_i] = from_two_bits_to_value[genotype_byte as usize];
269 }
270 }
271}
272
273#[allow(clippy::similar_names)]
274fn check_file_length(
275 in_iid_count: usize,
276 in_sid_count: usize,
277 size: usize,
278 cloud_file: &CloudFile,
279) -> Result<u64, Box<BedErrorPlus>> {
280 let in_iid_count_div4_u64 = try_div_4(in_iid_count, in_sid_count)?;
281 let file_len = size as u64;
282 let file_len2 = in_iid_count_div4_u64 * (in_sid_count as u64) + CB_HEADER_U64;
283 if file_len != file_len2 {
284 Err(BedError::IllFormed(cloud_file.to_string()))?;
285 }
286 Ok(in_iid_count_div4_u64)
287}
288
289#[inline]
290#[allow(clippy::too_many_arguments)]
291#[allow(clippy::similar_names)]
292async fn read_no_alloc<TVal: BedVal>(
293 cloud_file: &CloudFile,
294 iid_count: usize,
295 sid_count: usize,
296 is_a1_counted: bool,
297 iid_index: &[isize],
298 sid_index: &[isize],
299 missing_value: TVal,
300 max_concurrent_requests: usize,
301 max_chunk_bytes: usize,
302
303 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
304) -> Result<(), Box<BedErrorPlus>> {
305 let (size, bytes) = open_and_check(cloud_file).await?;
306
307 match bytes[2] {
308 0 => {
309 // We swap 'iid' and 'sid' and then reverse the axes.
310 let mut val_t = val.view_mut().reversed_axes();
311
312 internal_read_no_alloc(
313 cloud_file,
314 size,
315 sid_count,
316 iid_count,
317 is_a1_counted,
318 sid_index,
319 iid_index,
320 missing_value,
321 max_concurrent_requests,
322 max_chunk_bytes,
323 &mut val_t,
324 )
325 .await?;
326 }
327 1 => {
328 internal_read_no_alloc(
329 cloud_file,
330 size,
331 iid_count,
332 sid_count,
333 is_a1_counted,
334 iid_index,
335 sid_index,
336 missing_value,
337 max_concurrent_requests,
338 max_chunk_bytes,
339 val,
340 )
341 .await?;
342 }
343 _ => Err(BedError::BadMode(cloud_file.to_string()))?,
344 };
345 Ok(())
346}
347
348async fn open_and_check(cloud_file: &CloudFile) -> Result<(usize, Bytes), Box<BedErrorPlus>> {
349 let (bytes, size) = cloud_file
350 .read_range_and_file_size(0..CB_HEADER_U64 as usize)
351 .await?;
352 if (bytes.len() as u64) < CB_HEADER_U64
353 || BED_FILE_MAGIC1 != bytes[0]
354 || BED_FILE_MAGIC2 != bytes[1]
355 || (0 != bytes[2] && 1 != bytes[2])
356 {
357 Err(BedError::IllFormed(cloud_file.to_string()))?;
358 }
359 Ok((size, bytes))
360}
361
362impl BedCloudBuilder {
363 fn new<I, K, V>(url: impl AsRef<str>, options: I) -> Result<Self, Box<BedErrorPlus>>
364 where
365 I: IntoIterator<Item = (K, V)>,
366 K: AsRef<str>,
367 V: Into<String>,
368 {
369 let cloud_file = CloudFile::new_with_options(url, options)?;
370 Ok(BedCloudBuilder::from(cloud_file))
371 }
372
373 /// Set the cloud location of the .fam file. Specify the file with a URL string.
374 ///
375 /// If not set, the .fam file will be assumed
376 /// to have the same location as the .bed file, but with the extension .fam.
377 ///
378 /// > See [`BedCloudBuilder::fam_cloud_file`](struct.BedCloudBuilder.html#method.fam_cloud_file) to specify the file with an [`CloudFile`](struct.CloudFile.html)
379 /// > instead of a URL string.
380 ///
381 /// # Example:
382 /// Read .bed, .fam, and .bim files with non-standard names.
383 /// ```
384 /// use bed_reader::{BedCloud, ReadOptions, sample_urls, EMPTY_OPTIONS};
385 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
386 /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?;
387 /// let mut bed_cloud = BedCloud::builder(&deb_maf_mib[0])?
388 /// .fam(&deb_maf_mib[1], EMPTY_OPTIONS)?
389 /// .bim(&deb_maf_mib[2], EMPTY_OPTIONS)?
390 /// .build().await?;
391 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
392 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
393 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
394 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
395 /// ```
396 pub fn fam<I, K, V>(
397 mut self,
398 url: impl AsRef<str>,
399 options: I,
400 ) -> Result<Self, Box<BedErrorPlus>>
401 where
402 I: IntoIterator<Item = (K, V)>,
403 K: AsRef<str>,
404 V: Into<String>,
405 {
406 let cloud_file = CloudFile::new_with_options(url, options)?;
407 self.fam_cloud_file = Some(Some(cloud_file));
408 Ok(self)
409 }
410
411 /// Set the cloud location of the .bim file. Specify the file with a URL string.
412 ///
413 /// If not set, the .bim file will be assumed
414 /// to have the same location as the .bed file, but with the extension .bim.
415 ///
416 /// > See [`BedCloudBuilder::fam_cloud_file`](struct.BedCloudBuilder.html#method.bim_cloud_file) to specify the file with an [`CloudFile`](struct.CloudFile.html)
417 /// > instead of a URL string.
418 ///
419 /// # Example:
420 /// Read .bed, .fam, and .bim files with non-standard names.
421 /// ```
422 /// use bed_reader::{BedCloud, ReadOptions, sample_urls, EMPTY_OPTIONS};
423 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
424 /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?;
425 /// let mut bed_cloud = BedCloud::builder(&deb_maf_mib[0])?
426 /// .fam(&deb_maf_mib[1], EMPTY_OPTIONS)?
427 /// .bim(&deb_maf_mib[2], EMPTY_OPTIONS)?
428 /// .build().await?;
429 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
430 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
431 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
432 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
433 /// ```
434 pub fn bim<I, K, V>(
435 mut self,
436 url: impl AsRef<str>,
437 options: I,
438 ) -> Result<Self, Box<BedErrorPlus>>
439 where
440 I: IntoIterator<Item = (K, V)>,
441 K: AsRef<str>,
442 V: Into<String>,
443 {
444 let cloud_file = CloudFile::new_with_options(url, options)?;
445 self.bim_cloud_file = Some(Some(cloud_file));
446 Ok(self)
447 }
448}
449
450impl From<&CloudFile> for BedCloudBuilder {
451 fn from(cloud_file: &CloudFile) -> Self {
452 Self {
453 cloud_file: Some(cloud_file.clone()), // Cloned here.
454 fam_cloud_file: None,
455 bim_cloud_file: None,
456
457 is_checked_early: None,
458 iid_count: None,
459 sid_count: None,
460
461 metadata: Some(Metadata::new()),
462 skip_set: Some(HashSet::new()),
463 }
464 }
465}
466
467impl From<CloudFile> for BedCloudBuilder {
468 fn from(cloud_file: CloudFile) -> Self {
469 Self {
470 cloud_file: Some(cloud_file), // Cloned here.
471 fam_cloud_file: None,
472 bim_cloud_file: None,
473
474 is_checked_early: None,
475 iid_count: None,
476 sid_count: None,
477
478 metadata: Some(Metadata::new()),
479 skip_set: Some(HashSet::new()),
480 }
481 }
482}
483
484impl BedCloudBuilder {
485 /// Create a [`BedCloud`](struct.BedCloud.html) from the builder.
486 ///
487 /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) for more details and examples.
488 pub async fn build(&self) -> Result<BedCloud, Box<BedErrorPlus>> {
489 let mut bed_cloud = self.build_no_file_check()?;
490
491 // Unwrap is allowed because we can't construct BedCloudBuilder without cloud_file
492 if bed_cloud.is_checked_early {
493 let cloud_file = self.cloud_file.as_ref().unwrap().clone();
494 open_and_check(&cloud_file).await?;
495 }
496
497 (bed_cloud.iid_count, bed_cloud.sid_count) = bed_cloud
498 .metadata
499 .check_counts(bed_cloud.iid_count, bed_cloud.sid_count)?;
500
501 Ok(bed_cloud)
502 }
503
504 /// Override the family id (fid) values found in the .fam file.
505 ///
506 /// By default, if fid values are needed and haven't already been found,
507 /// they will be read from the .fam file.
508 /// Providing them here avoids that file read and provides a way to give different values.
509 #[anyinput]
510 #[must_use]
511 pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
512 // Unwrap will always work because BedCloudBuilder starting with some metadata
513 self.metadata.as_mut().unwrap().set_fid(fid);
514 self
515 }
516
517 /// Override the individual id (iid) values found in the .fam file.
518 ///
519 /// By default, if iid values are needed and haven't already been found,
520 /// they will be read from the .fam file.
521 /// Providing them here avoids that file read and provides a way to give different values.
522 /// ```
523 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
524 /// use ndarray as nd;
525 /// use bed_reader::{BedCloud, assert_eq_nan};
526 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
527 /// use bed_reader::ReadOptions;
528 ///
529 /// let mut bed_cloud = BedCloud::builder(url)?
530 /// .iid(["sample1", "sample2", "sample3"])
531 /// .build().await?;
532 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["sample1", "sample2", "sample3"]
533 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
534 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
535 /// ```
536 #[anyinput]
537 #[must_use]
538 pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
539 // Unwrap will always work because BedCloudBuilder starting with some metadata
540 self.metadata.as_mut().unwrap().set_iid(iid);
541 self
542 }
543
544 /// Override the father values found in the .fam file.
545 ///
546 /// By default, if father values are needed and haven't already been found,
547 /// they will be read from the .fam file.
548 /// Providing them here avoids that file read and provides a way to gi&ve different values.
549 #[anyinput]
550 #[must_use]
551 pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
552 // Unwrap will always work because BedCloudBuilder starting with some metadata
553 self.metadata.as_mut().unwrap().set_father(father);
554 self
555 }
556
557 /// Override the mother values found in the .fam file.
558 ///
559 /// By default, if mother values are needed and haven't already been found,
560 /// they will be read from the .fam file.
561 /// Providing them here avoids that file read and provides a way to give different values.
562 #[anyinput]
563 #[must_use]
564 pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
565 // Unwrap will always work because BedCloudBuilder starting with some metadata
566 self.metadata.as_mut().unwrap().set_mother(mother);
567 self
568 }
569
570 /// Override the sex values found in the .fam file.
571 ///
572 /// By default, if sex values are needed and haven't already been found,
573 /// they will be read from the .fam file.
574 /// Providing them here avoids that file read and provides a way to give different values.
575 #[anyinput]
576 #[must_use]
577 pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
578 // Unwrap will always work because BedCloudBuilder starting with some metadata
579 self.metadata.as_mut().unwrap().set_sex(sex);
580 self
581 }
582
583 /// Override the phenotype values found in the .fam file.
584 ///
585 /// Note that the phenotype values in the .fam file are seldom used.
586 /// By default, if phenotype values are needed and haven't already been found,
587 /// they will be read from the .fam file.
588 /// Providing them here avoids that file read and provides a way to give different values.
589 #[anyinput]
590 #[must_use]
591 pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
592 // Unwrap will always work because BedCloudBuilder starting with some metadata
593 self.metadata.as_mut().unwrap().set_pheno(pheno);
594 self
595 }
596
597 /// Override the chromosome values found in the .bim file.
598 ///
599 /// By default, if chromosome values are needed and haven't already been found,
600 /// they will be read from the .bim file.
601 /// Providing them here avoids that file read and provides a way to give different values.
602 #[anyinput]
603 #[must_use]
604 pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
605 // Unwrap will always work because BedCloudBuilder starting with some metadata
606 self.metadata.as_mut().unwrap().set_chromosome(chromosome);
607 self
608 }
609
610 /// Override the SNP id (sid) values found in the .fam file.
611 ///
612 /// By default, if sid values are needed and haven't already been found,
613 /// they will be read from the .bim file.
614 /// Providing them here avoids that file read and provides a way to give different values.
615 /// ```
616 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
617 /// use ndarray as nd;
618 /// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
619 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
620 ///
621 /// let mut bed_cloud = BedCloud::builder(url)?
622 /// .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
623 /// .build().await?;
624 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["SNP1", "SNP2", "SNP3", "SNP4"]
625 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
626 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
627 /// ```
628 #[anyinput]
629 #[must_use]
630 pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
631 self.metadata.as_mut().unwrap().set_sid(sid);
632 self
633 }
634
635 /// Override the centimorgan position values found in the .bim file.
636 ///
637 /// By default, if centimorgan position values are needed and haven't already been found,
638 /// they will be read from the .bim file.
639 /// Providing them here avoids that file read and provides a way to give different values.
640 #[anyinput]
641 #[must_use]
642 pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
643 // Unwrap will always work because BedCloudBuilder starting with some metadata
644 self.metadata.as_mut().unwrap().set_cm_position(cm_position);
645 self
646 }
647
648 /// Override the base-pair position values found in the .bim file.
649 ///
650 /// By default, if base-pair position values are needed and haven't already been found,
651 /// they will be read from the .bim file.
652 /// Providing them here avoids that file read and provides a way to give different values.
653 #[anyinput]
654 #[must_use]
655 pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
656 // Unwrap will always work because BedCloudBuilder starting with some metadata
657 self.metadata.as_mut().unwrap().set_bp_position(bp_position);
658 self
659 }
660
661 /// Override the allele 1 values found in the .bim file.
662 ///
663 /// By default, if allele 1 values are needed and haven't already been found,
664 /// they will be read from the .bim file.
665 /// Providing them here avoids that file read and provides a way to give different values.
666 #[anyinput]
667 #[must_use]
668 pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
669 // Unwrap will always work because BedCloudBuilder starting with some metadata
670 self.metadata.as_mut().unwrap().set_allele_1(allele_1);
671 self
672 }
673
674 /// Override the allele 2 values found in the .bim file.
675 ///
676 /// By default, if allele 2 values are needed and haven't already been found,
677 /// they will be read from the .bim file.
678 /// Providing them here avoids that file read and provides a way to give different values.
679 #[anyinput]
680 #[must_use]
681 pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
682 // Unwrap will always work because BedCloudBuilder starting with some metadata
683 self.metadata.as_mut().unwrap().set_allele_2(allele_2);
684 self
685 }
686
687 /// Set the number of individuals (samples) in the data.
688 ///
689 /// By default, if this number is needed, it will be found
690 /// and remembered
691 /// by opening the .fam file and quickly counting the number
692 /// of lines. Providing the number thus avoids a file read.
693 #[must_use]
694 pub fn iid_count(mut self, count: usize) -> Self {
695 self.iid_count = Some(Some(count));
696 self
697 }
698
699 /// Set the number of SNPs in the data.
700 ///
701 /// By default, if this number is needed, it will be found
702 /// and remembered
703 /// by opening the .bim file and quickly counting the number
704 /// of lines. Providing the number thus avoids a file read.
705 #[must_use]
706 pub fn sid_count(mut self, count: usize) -> Self {
707 self.sid_count = Some(Some(count));
708 self
709 }
710
711 /// Don't check the header of the .bed file until and unless the file is actually read.
712 ///
713 /// By default, when a [`BedCloud`](struct.BedCloud.html) struct is created, the .bed
714 /// file header is checked. This stops that early check.
715 /// ```
716 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
717 /// # use ndarray as nd;
718 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
719 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
720 /// let mut bed_cloud = BedCloud::builder(url)?.skip_early_check().build().await?;
721 /// let val = bed_cloud.read::<f64>().await?;
722 ///
723 /// assert_eq_nan(
724 /// &val,
725 /// &nd::array![
726 /// [1.0, 0.0, f64::NAN, 0.0],
727 /// [2.0, 0.0, f64::NAN, 2.0],
728 /// [0.0, 1.0, 2.0, 0.0]
729 /// ],
730 /// );
731 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
732 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
733 /// ```
734 #[must_use]
735 pub fn skip_early_check(mut self) -> Self {
736 self.is_checked_early = Some(false);
737 self
738 }
739
740 /// Set the cloud location of the .fam file.
741 ///
742 /// If not set, the .fam file will be assumed
743 /// to have the same location as the .bed file, but with the extension .fam.
744 ///
745 /// # Example:
746 /// Read .bed, .fam, and .bim files with non-standard names.
747 /// ```
748 /// use bed_reader::{BedCloud, ReadOptions, sample_urls, EMPTY_OPTIONS};
749 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
750 /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?;
751 /// let mut bed_cloud = BedCloud::builder(&deb_maf_mib[0])?
752 /// .fam(&deb_maf_mib[1], EMPTY_OPTIONS)?
753 /// .bim(&deb_maf_mib[2], EMPTY_OPTIONS)?
754 /// .build().await?;
755 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
756 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
757 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
758 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
759 /// ```
760 #[must_use]
761 pub fn fam_cloud_file(mut self, cloud_file: &CloudFile) -> Self {
762 self.fam_cloud_file = Some(Some(cloud_file.clone()));
763 self
764 }
765
766 /// Set the cloud location of the .bim file.
767 ///
768 /// If not set, the .bim file will be assumed
769 /// to have the same location as the .bed file, but with the extension .bim.
770 ///
771 /// # Example:
772 /// Read .bed, .fam, and .bim files with non-standard names.
773 /// ```
774 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
775 /// use bed_reader::{BedCloud, ReadOptions, sample_urls, CloudFile};
776 ///
777 /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?
778 /// .iter()
779 /// .map(|url| CloudFile::new(url))
780 /// .collect::<Result<Vec<CloudFile>, _>>()?;
781 /// let mut bed_cloud = BedCloud::builder_from_cloud_file(&deb_maf_mib[0])
782 /// .fam_cloud_file(&deb_maf_mib[1])
783 /// .bim_cloud_file(&deb_maf_mib[2])
784 /// .build().await?;
785 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
786 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
787 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
788 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
789 /// ```
790 #[must_use]
791 pub fn bim_cloud_file(mut self, cloud_file: &CloudFile) -> Self {
792 let cloud_file = cloud_file.clone();
793 self.bim_cloud_file = Some(Some(cloud_file));
794 self
795 }
796
797 /// Don't read the fid information from the .fam file.
798 ///
799 /// By default, when the .fam is read, the fid (the family id) is recorded.
800 /// This stops that recording. This is useful if the fid is not needed.
801 /// Asking for the fid after skipping it results in an error.
802 #[must_use]
803 pub fn skip_fid(mut self) -> Self {
804 // Unwrap will always work because BedCloudBuilder starting with some skip_set
805 self.skip_set.as_mut().unwrap().insert(MetadataFields::Fid);
806 self
807 }
808
809 /// Don't read the iid information from the .fam file.
810 ///
811 /// By default, when the .fam is read, the iid (the individual id) is recorded.
812 /// This stops that recording. This is useful if the iid is not needed.
813 /// Asking for the iid after skipping it results in an error.
814 #[must_use]
815 pub fn skip_iid(mut self) -> Self {
816 // Unwrap will always work because BedCloudBuilder starting with some skip_set
817 self.skip_set.as_mut().unwrap().insert(MetadataFields::Iid);
818 self
819 }
820
821 /// Don't read the father information from the .fam file.
822 ///
823 /// By default, when the .fam is read, the father id is recorded.
824 /// This stops that recording. This is useful if the father id is not needed.
825 /// Asking for the father id after skipping it results in an error.
826 #[must_use]
827 pub fn skip_father(mut self) -> Self {
828 // Unwrap will always work because BedCloudBuilder starting with some skip_set
829 self.skip_set
830 .as_mut()
831 .unwrap()
832 .insert(MetadataFields::Father);
833 self
834 }
835
836 /// Don't read the mother information from the .fam file.
837 ///
838 /// By default, when the .fam is read, the mother id is recorded.
839 /// This stops that recording. This is useful if the mother id is not needed.
840 /// Asking for the mother id after skipping it results in an error.
841 #[must_use]
842 pub fn skip_mother(mut self) -> Self {
843 // Unwrap will always work because BedCloudBuilder starting with some skip_set
844 self.skip_set
845 .as_mut()
846 .unwrap()
847 .insert(MetadataFields::Mother);
848 self
849 }
850
851 /// Don't read the sex information from the .fam file.
852 ///
853 /// By default, when the .fam is read, the sex is recorded.
854 /// This stops that recording. This is useful if sex is not needed.
855 /// Asking for sex after skipping it results in an error.
856 #[must_use]
857 pub fn skip_sex(mut self) -> Self {
858 // Unwrap will always work because BedCloudBuilder starting with some skip_set
859 self.skip_set.as_mut().unwrap().insert(MetadataFields::Sex);
860 self
861 }
862
863 /// Don't read the phenotype information from the .fam file.
864 ///
865 /// Note that the phenotype information in the .fam file is
866 /// seldom used.
867 ///
868 /// By default, when the .fam is read, the phenotype is recorded.
869 /// This stops that recording. This is useful if this phenotype
870 /// information is not needed.
871 /// Asking for the phenotype after skipping it results in an error.
872 #[must_use]
873 pub fn skip_pheno(mut self) -> Self {
874 // Unwrap will always work because BedCloudBuilder starting with some skip_set
875 self.skip_set
876 .as_mut()
877 .unwrap()
878 .insert(MetadataFields::Pheno);
879 self
880 }
881
882 /// Don't read the chromosome information from the .bim file.
883 ///
884 /// By default, when the .bim is read, the chromosome is recorded.
885 /// This stops that recording. This is useful if the chromosome is not needed.
886 /// Asking for the chromosome after skipping it results in an error.
887 #[must_use]
888 pub fn skip_chromosome(mut self) -> Self {
889 // Unwrap will always work because BedCloudBuilder starting with some skip_set
890 self.skip_set
891 .as_mut()
892 .unwrap()
893 .insert(MetadataFields::Chromosome);
894 self
895 }
896
897 /// Don't read the SNP id information from the .bim file.
898 ///
899 /// By default, when the .bim is read, the sid (SNP id) is recorded.
900 /// This stops that recording. This is useful if the sid is not needed.
901 /// Asking for the sid after skipping it results in an error.
902 #[must_use]
903 pub fn skip_sid(mut self) -> Self {
904 // Unwrap will always work because BedCloudBuilder starting with some skip_set
905 self.skip_set.as_mut().unwrap().insert(MetadataFields::Sid);
906 self
907 }
908
909 /// Don't read the centimorgan position information from the .bim file.
910 ///
911 /// By default, when the .bim is read, the cm position is recorded.
912 /// This stops that recording. This is useful if the cm position is not needed.
913 /// Asking for the cm position after skipping it results in an error.
914 #[must_use]
915 pub fn skip_cm_position(mut self) -> Self {
916 // Unwrap will always work because BedCloudBuilder starting with some skip_set
917 self.skip_set
918 .as_mut()
919 .unwrap()
920 .insert(MetadataFields::CmPosition);
921 self
922 }
923
924 /// Don't read the base-pair position information from the .bim file.
925 ///
926 /// By default, when the .bim is read, the bp position is recorded.
927 /// This stops that recording. This is useful if the bp position is not needed.
928 /// Asking for the cp position after skipping it results in an error.
929 #[must_use]
930 pub fn skip_bp_position(mut self) -> Self {
931 // Unwrap will always work because BedCloudBuilder starting with some skip_set
932 self.skip_set
933 .as_mut()
934 .unwrap()
935 .insert(MetadataFields::BpPosition);
936 self
937 }
938
939 /// Don't read the allele 1 information from the .bim file.
940 ///
941 /// By default, when the .bim is read, allele 1 is recorded.
942 /// This stops that recording. This is useful if allele 1 is not needed.
943 /// Asking for allele 1 after skipping it results in an error.
944 #[must_use]
945 pub fn skip_allele_1(mut self) -> Self {
946 // Unwrap will always work because BedCloudBuilder starting with some skip_set
947 self.skip_set
948 .as_mut()
949 .unwrap()
950 .insert(MetadataFields::Allele1);
951 self
952 }
953
954 /// Don't read the allele 2 information from the .bim file.
955 ///
956 /// By default, when the .bim is read, allele 2 is recorded.
957 /// This stops that recording. This is useful if allele 2 is not needed.
958 /// Asking for allele 2 after skipping it results in an error.
959 #[must_use]
960 pub fn skip_allele_2(mut self) -> Self {
961 // Unwrap will always work because BedCloudBuilder starting with some skip_set
962 self.skip_set
963 .as_mut()
964 .unwrap()
965 .insert(MetadataFields::Allele2);
966 self
967 }
968
969 /// Override the metadata in the .fam and .bim files with info merged in from a [`Metadata`](struct.Metadata.html).
970 ///
971 /// # Example
972 ///
973 /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
974 /// and sid arrays. Next, we use [`BedCloudBuilder`](struct.BedCloudBuilder.html) to override the fid array
975 /// and an iid array. Then, we add the metadata to the [`BedCloudBuilder`](struct.BedCloudBuilder.html),
976 /// overwriting iid (again) and overriding sid. Finally, we print these
977 /// three arrays and chromosome. Chromosome was never overridden so
978 /// it is read from the *.bim file.
979 ///```
980 /// use ndarray as nd;
981 /// use bed_reader::{BedCloud, Metadata};
982 ///
983 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
984 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
985 /// let metadata = Metadata::builder()
986 /// .iid(["i1", "i2", "i3"])
987 /// .sid(["s1", "s2", "s3", "s4"])
988 /// .build()?;
989 /// let mut bed_cloud = BedCloud::builder(url)?
990 /// .fid(["f1", "f2", "f3"])
991 /// .iid(["x1", "x2", "x3"])
992 /// .metadata(&metadata)
993 /// .build().await?;
994 /// println!("{0:?}", bed_cloud.fid().await?); // Outputs ndarray ["f1", "f2", "f3"]
995 /// println!("{0:?}", bed_cloud.iid().await?); // Outputs ndarray ["i1", "i2", "i3"]
996 /// println!("{0:?}", bed_cloud.sid().await?); // Outputs ndarray ["s1", "s2", "s3", "s4"]
997 /// println!("{0:?}", bed_cloud.chromosome().await?); // Outputs ndarray ["1", "1", "5", "Y"]
998 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
999 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1000 /// ```
1001 #[must_use]
1002 pub fn metadata(mut self, metadata: &Metadata) -> Self {
1003 self.metadata = Some(
1004 Metadata::builder()
1005 .metadata(&self.metadata.unwrap()) // unwrap is ok because we know we have metadata
1006 .metadata(metadata) // consistent counts will be check later by the BedCloudBuilder
1007 .build_no_file_check()
1008 .unwrap(), // unwrap is ok because nothing can go wrong
1009 );
1010
1011 self
1012 }
1013}
1014
1015impl BedCloud {
1016 #[allow(clippy::doc_link_with_quotes)]
1017 /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string and cloud options can be given.
1018 ///
1019 /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details specifying a file.
1020 ///
1021 /// You may give [cloud options](supplemental_document_options/index.html#cloud-options) but not
1022 /// [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options) or
1023 /// [`ReadOptions`](supplemental_document_options/index.html#readoptions).
1024 /// See ["Options, Options, Options"](supplemental_document_options/index.html) for details.
1025 ///
1026 /// > Also see [`BedCloud::new`](struct.BedCloud.html#method.new), which does not support cloud options.
1027 /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) and
1028 /// > [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options), which does support
1029 /// > `BedCloud` options.
1030 /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1031 /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1032 /// > see [`Bed`](struct.Bed.html).
1033 ///
1034 /// # Errors
1035 /// URL parsing may return an error.
1036 /// Also, by default, this method will return an error if the file is missing or its header
1037 /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1038 /// for all possible errors.
1039 ///
1040 /// # Examples
1041 /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1042 /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1043 /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1044 ///
1045 /// ```
1046 /// use ndarray as nd;
1047 /// use bed_reader::{BedCloud, assert_eq_nan};
1048 ///
1049 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1050 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1051 /// let cloud_options = [("timeout", "10s")];
1052 /// let mut bed_cloud = BedCloud::new_with_options(url, cloud_options).await?;
1053 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
1054 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
1055 /// let val = bed_cloud.read::<f64>().await?;
1056 ///
1057 /// assert_eq_nan(
1058 /// &val,
1059 /// &nd::array![
1060 /// [1.0, 0.0, f64::NAN, 0.0],
1061 /// [2.0, 0.0, f64::NAN, 2.0],
1062 /// [0.0, 1.0, 2.0, 0.0]
1063 /// ],
1064 /// );
1065 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1066 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1067 /// ```
1068 ///
1069 /// Open the file and read data for one SNP (variant)
1070 /// at index position 2.
1071 /// ```
1072 /// # use ndarray as nd;
1073 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1074 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1075 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1076 /// # let cloud_options = [("timeout", "10s")];
1077 /// let mut bed_cloud = BedCloud::new_with_options(url, cloud_options).await?;
1078 /// let val = ReadOptions::builder().sid_index(2).f64().read_cloud(&mut bed_cloud).await?;
1079 ///
1080 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
1081 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1082 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1083 /// ```
1084 pub async fn new_with_options<I, K, V>(
1085 url: impl AsRef<str>,
1086 cloud_options: I,
1087 ) -> Result<Self, Box<BedErrorPlus>>
1088 where
1089 I: IntoIterator<Item = (K, V)>,
1090 K: AsRef<str>,
1091 V: Into<String>,
1092 {
1093 let cloud_file = CloudFile::new_with_options(url, cloud_options)?;
1094 let bed_cloud = BedCloud::from_cloud_file(&cloud_file).await?;
1095 Ok(bed_cloud)
1096 }
1097
1098 #[allow(clippy::doc_link_with_quotes)]
1099 /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string.
1100 ///
1101 /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details specifying a file.
1102 ///
1103 /// See ["Options, Options, Options"](supplemental_document_options/index.html) for details of the different option types.
1104 ///
1105 /// > Also see [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options), which supports cloud options.
1106 /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) and
1107 /// > [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options), which does support
1108 /// > `BedCloud` options.
1109 /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1110 /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1111 /// > see [`Bed`](struct.Bed.html).
1112 ///
1113 /// # Errors
1114 /// URL parsing may return an error.
1115 /// Also, by default, this method will return an error if the file is missing or its header
1116 /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1117 /// for all possible errors.
1118 ///
1119 /// # Examples
1120 /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1121 /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1122 /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1123 ///
1124 /// ```
1125 /// use ndarray as nd;
1126 /// use bed_reader::{BedCloud, assert_eq_nan};
1127 ///
1128 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1129 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1130 /// let mut bed_cloud = BedCloud::new(url).await?;
1131 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
1132 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
1133 /// let val = bed_cloud.read::<f64>().await?;
1134 ///
1135 /// assert_eq_nan(
1136 /// &val,
1137 /// &nd::array![
1138 /// [1.0, 0.0, f64::NAN, 0.0],
1139 /// [2.0, 0.0, f64::NAN, 2.0],
1140 /// [0.0, 1.0, 2.0, 0.0]
1141 /// ],
1142 /// );
1143 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1144 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1145 /// ```
1146 ///
1147 /// Open the file and read data for one SNP (variant)
1148 /// at index position 2.
1149 /// ```
1150 /// # use ndarray as nd;
1151 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1152 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1153 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1154 /// let mut bed_cloud = BedCloud::new(url).await?;
1155 /// let val = ReadOptions::builder().sid_index(2).f64().read_cloud(&mut bed_cloud).await?;
1156 ///
1157 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
1158 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1159 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1160 /// ```
1161 pub async fn new(url: impl AsRef<str>) -> Result<Self, Box<BedErrorPlus>> {
1162 let cloud_file = CloudFile::new(url)?;
1163 let bed_cloud = BedCloud::from_cloud_file(&cloud_file).await?;
1164 Ok(bed_cloud)
1165 }
1166
1167 #[allow(clippy::doc_link_with_quotes)]
1168 /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string.
1169 /// Supports [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options) but not
1170 /// [cloud options](supplemental_document_options/index.html#cloud-options).
1171 ///
1172 /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details of specifying a file.
1173 /// See ["Options, Options, Options"](supplemental_document_options/index.html) for an overview of options types.
1174 ///
1175 /// > Also see [`BedCloud::new`](struct.BedCloud.html#method.new) and [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),
1176 /// > which do not support `BedCloud` options.
1177 /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1178 /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1179 /// > see [`Bed`](struct.Bed.html).
1180 ///
1181 /// The `BedCloud` options, [listed here](struct.BedCloudBuilder.html#implementations), can:
1182 /// * set the cloud location of the .fam and/or .bim file
1183 /// * override some metadata, for example, replace the individual ids.
1184 /// * set the number of individuals (samples) or SNPs (variants)
1185 /// * control checking the validity of the .bed file's header
1186 /// * skip reading selected metadata
1187 ///
1188 /// # Errors
1189 /// URL parsing may return an error.
1190 /// Also, by default, this method will return an error if the file is missing or its header
1191 /// is ill-formed. It will also return an error if the options contradict each other.
1192 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1193 /// for all possible errors.
1194 ///
1195 /// # Examples
1196 /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1197 /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1198 /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1199 ///
1200 /// ```
1201 /// use ndarray as nd;
1202 /// use bed_reader::{BedCloud, assert_eq_nan};
1203 ///
1204 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1205 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1206 /// let mut bed_cloud = BedCloud::builder(url)?.build().await?;
1207 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1208 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
1209 /// let val = bed_cloud.read::<f64>().await?;
1210 ///
1211 /// assert_eq_nan(
1212 /// &val,
1213 /// &nd::array![
1214 /// [1.0, 0.0, f64::NAN, 0.0],
1215 /// [2.0, 0.0, f64::NAN, 2.0],
1216 /// [0.0, 1.0, 2.0, 0.0]
1217 /// ],
1218 /// );
1219 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1220 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1221 /// ```
1222 ///
1223 /// Replace [`iid`](struct.BedCloud.html#method.iid).
1224 /// ```
1225 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1226 /// # use ndarray as nd;
1227 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1228 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1229 /// let mut bed_cloud = BedCloud::builder(url)?
1230 /// .iid(["sample1", "sample2", "sample3"])
1231 /// .build().await?;
1232 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1233 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1234 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1235 /// ```
1236 /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
1237 /// .bim files need never be opened. Use `.skip_early_check()` to avoid opening the
1238 /// .bed before the first read.
1239 /// ```
1240 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1241 /// # use ndarray as nd;
1242 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1243 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1244 /// let mut bed_cloud = BedCloud::builder(url)?
1245 /// .iid_count(3)
1246 /// .sid_count(4)
1247 /// .skip_early_check()
1248 /// .build()
1249 /// .await?;
1250 /// let val = bed_cloud.read::<f64>().await?;
1251 ///
1252 /// assert_eq_nan(
1253 /// &val,
1254 /// &nd::array![
1255 /// [1.0, 0.0, f64::NAN, 0.0],
1256 /// [2.0, 0.0, f64::NAN, 2.0],
1257 /// [0.0, 1.0, 2.0, 0.0]
1258 /// ],
1259 /// );
1260 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1261 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1262 /// ```
1263 /// Mark some properties as "don’t read or offer".
1264 /// ```
1265 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1266 /// # use ndarray as nd;
1267 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1268 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1269 /// let mut bed_cloud = BedCloud::builder(url)?
1270 /// .skip_father()
1271 /// .skip_mother()
1272 /// .skip_sex()
1273 /// .skip_pheno()
1274 /// .skip_allele_1()
1275 /// .skip_allele_2()
1276 /// .build().await?;
1277 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1278 /// bed_cloud.allele_2().await.expect_err("Can't be read");
1279 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1280 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1281 /// ```
1282 pub fn builder(url: impl AsRef<str>) -> Result<BedCloudBuilder, Box<BedErrorPlus>> {
1283 BedCloudBuilder::new(url, EMPTY_OPTIONS)
1284 }
1285
1286 #[allow(clippy::doc_link_with_quotes)]
1287 /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string and cloud options can be given.
1288 /// Supports both [cloud options](supplemental_document_options/index.html#cloud-options) and
1289 /// [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options).
1290 ///
1291 /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details of specifying a file.
1292 /// See ["Options, Options, Options"](supplemental_document_options/index.html) for an overview of options types.
1293 ///
1294 /// > Also see [`BedCloud::new`](struct.BedCloud.html#method.new) and [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),
1295 /// > which do not support `BedCloud` options.
1296 /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1297 /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1298 /// > see [`Bed`](struct.Bed.html).
1299 ///
1300 /// The `BedCloud` options, [listed here](struct.BedCloudBuilder.html#implementations), can:
1301 /// * set the cloud location of the .fam and/or .bim file
1302 /// * override some metadata, for example, replace the individual ids.
1303 /// * set the number of individuals (samples) or SNPs (variants)
1304 /// * control checking the validity of the .bed file's header
1305 /// * skip reading selected metadata
1306 ///
1307 /// # Errors
1308 /// URL parsing may return an error.
1309 /// Also, by default, this method will return an error if the file is missing or its header
1310 /// is ill-formed. It will also return an error if the options contradict each other.
1311 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1312 /// for all possible errors.
1313 ///
1314 /// # Examples
1315 /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1316 /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1317 /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1318 ///
1319 /// ```
1320 /// use ndarray as nd;
1321 /// use bed_reader::{BedCloud, assert_eq_nan};
1322 ///
1323 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1324 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1325 /// let cloud_options = [("timeout", "10s")];
1326 /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?.build().await?;
1327 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1328 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
1329 /// let val = bed_cloud.read::<f64>().await?;
1330 ///
1331 /// assert_eq_nan(
1332 /// &val,
1333 /// &nd::array![
1334 /// [1.0, 0.0, f64::NAN, 0.0],
1335 /// [2.0, 0.0, f64::NAN, 2.0],
1336 /// [0.0, 1.0, 2.0, 0.0]
1337 /// ],
1338 /// );
1339 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1340 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1341 /// ```
1342 ///
1343 /// Replace [`iid`](struct.BedCloud.html#method.iid).
1344 /// ```
1345 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1346 /// # use ndarray as nd;
1347 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1348 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1349 /// # let cloud_options = [("timeout", "10s")];
1350 /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?
1351 /// .iid(["sample1", "sample2", "sample3"])
1352 /// .build().await?;
1353 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1354 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1355 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1356 /// ```
1357 /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
1358 /// .bim files need never be opened. Use `.skip_early_check()` to avoid opening the
1359 /// .bed before the first read.
1360 /// ```
1361 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1362 /// # use ndarray as nd;
1363 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1364 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1365 /// # let cloud_options = [("timeout", "10s")];
1366 /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?
1367 /// .iid_count(3)
1368 /// .sid_count(4)
1369 /// .skip_early_check()
1370 /// .build()
1371 /// .await?;
1372 /// let val = bed_cloud.read::<f64>().await?;
1373 ///
1374 /// assert_eq_nan(
1375 /// &val,
1376 /// &nd::array![
1377 /// [1.0, 0.0, f64::NAN, 0.0],
1378 /// [2.0, 0.0, f64::NAN, 2.0],
1379 /// [0.0, 1.0, 2.0, 0.0]
1380 /// ],
1381 /// );
1382 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1383 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1384 /// ```
1385 /// Mark some properties as "don’t read or offer".
1386 /// ```
1387 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1388 /// # use ndarray as nd;
1389 /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1390 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1391 /// # let cloud_options = [("timeout", "10s")];
1392 /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?
1393 /// .skip_father()
1394 /// .skip_mother()
1395 /// .skip_sex()
1396 /// .skip_pheno()
1397 /// .skip_allele_1()
1398 /// .skip_allele_2()
1399 /// .build().await?;
1400 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1401 /// bed_cloud.allele_2().await.expect_err("Can't be read");
1402 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1403 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1404 /// ```
1405 pub fn builder_with_options<I, K, V>(
1406 url: impl AsRef<str>,
1407 options: I,
1408 ) -> Result<BedCloudBuilder, Box<BedErrorPlus>>
1409 where
1410 I: IntoIterator<Item = (K, V)>,
1411 K: AsRef<str>,
1412 V: Into<String>,
1413 {
1414 BedCloudBuilder::new(url, options)
1415 }
1416}
1417
1418impl BedCloud {
1419 /// Attempts to open a PLINK .bed file in the cloud for reading. Specify the file with an [`CloudFile`](https://docs.rs/cloud-file/).
1420 /// Supports [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options).
1421 ///
1422 /// > Alternatively, you can use [`BedCloud::new`](struct.BedCloud.html#method.new) or [`BedCloud::builder`](struct.BedCloud.html#method.builder)
1423 /// > to specify the cloud file via a URL string. For reading local files,
1424 /// > see [`Bed`](struct.Bed.html).
1425 ///
1426 /// The `BedCloud` options, [listed here](struct.BedCloudBuilder.html#implementations), can:
1427 /// * set the cloud location of the .fam and/or .bim file
1428 /// * override some metadata, for example, replace the individual ids.
1429 /// * set the number of individuals (samples) or SNPs (variants)
1430 /// * control checking the validity of the .bed file's header
1431 /// * skip reading selected metadata
1432 ///
1433 /// # Errors
1434 /// By default, this method will return an error if the file is missing or its header
1435 /// is ill-formed. It will also return an error if the options contradict each other.
1436 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1437 /// for all possible errors.
1438 ///
1439 /// # Examples
1440 /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1441 /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1442 /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1443 ///
1444 /// ```
1445 /// use ndarray as nd;
1446 /// use bed_reader::{BedCloud, assert_eq_nan};
1447 /// use cloud_file::CloudFile;
1448 ///
1449 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1450 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1451 /// let cloud_file = CloudFile::new(url)?;
1452 /// let mut bed_cloud = BedCloud::builder_from_cloud_file(&cloud_file).build().await?;
1453 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1454 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
1455 /// let val = bed_cloud.read::<f64>().await?;
1456 ///
1457 /// assert_eq_nan(
1458 /// &val,
1459 /// &nd::array![
1460 /// [1.0, 0.0, f64::NAN, 0.0],
1461 /// [2.0, 0.0, f64::NAN, 2.0],
1462 /// [0.0, 1.0, 2.0, 0.0]
1463 /// ],
1464 /// );
1465 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1466 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1467 /// ```
1468 #[must_use]
1469 pub fn builder_from_cloud_file(cloud_file: &CloudFile) -> BedCloudBuilder {
1470 BedCloudBuilder::from(cloud_file)
1471 }
1472
1473 /// Attempts to open a PLINK .bed file in the cloud for reading. Specify the file with an [`CloudFile`].
1474 ///
1475 /// You may not give
1476 /// [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options).
1477 /// See [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file), which does support
1478 /// `BedCloud` options.
1479 ///
1480 /// > Also see, [`BedCloud::new`](struct.BedCloud.html#method.new) and [`BedCloud::builder`](struct.BedCloud.html#method.builder)
1481 /// > to specify the cloud file via a URL string. For reading local files,
1482 /// > see [`Bed`](struct.Bed.html).
1483 ///
1484 /// # Errors
1485 /// By default, this method will return an error if the file is missing or its header
1486 /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1487 /// for all possible errors.
1488 ///
1489 /// # Examples
1490 /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1491 /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1492 /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1493 ///
1494 /// ```
1495 /// use ndarray as nd;
1496 /// use bed_reader::{BedCloud, assert_eq_nan};
1497 /// use cloud_file::CloudFile;
1498 ///
1499 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1500 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1501 /// let cloud_file = CloudFile::new(url)?;
1502 /// let mut bed_cloud = BedCloud::from_cloud_file(&cloud_file).await?;
1503 /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
1504 /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
1505 /// let val = bed_cloud.read::<f64>().await?;
1506 ///
1507 /// assert_eq_nan(
1508 /// &val,
1509 /// &nd::array![
1510 /// [1.0, 0.0, f64::NAN, 0.0],
1511 /// [2.0, 0.0, f64::NAN, 2.0],
1512 /// [0.0, 1.0, 2.0, 0.0]
1513 /// ],
1514 /// );
1515 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1516 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1517 /// ```
1518 pub async fn from_cloud_file(cloud_file: &CloudFile) -> Result<Self, Box<BedErrorPlus>> {
1519 BedCloudBuilder::from(cloud_file).build().await
1520 }
1521
1522 /// Number of individuals (samples)
1523 ///
1524 /// If this number is needed, it will be found
1525 /// by opening the .fam file and quickly counting the number
1526 /// of lines. Once found, the number will be remembered.
1527 /// The file read can be avoided by setting the
1528 /// number with [`BedCloudBuilder::iid_count`](struct.BedCloudBuilder.html#method.iid_count)
1529 /// or, for example, [`BedCloudBuilder::iid`](struct.BedCloudBuilder.html#method.iid).
1530 ///
1531 /// # Example:
1532 /// ```
1533 /// use ndarray as nd;
1534 /// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1535 ///
1536 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1537 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1538 /// let mut bed_cloud = BedCloud::new(url).await?;
1539 /// let iid_count = bed_cloud.iid_count().await?;
1540 ///
1541 /// assert!(iid_count == 3);
1542 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1543 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1544 pub async fn iid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
1545 if let Some(iid_count) = self.iid_count {
1546 Ok(iid_count)
1547 } else {
1548 let fam_cloud_file = self.fam_cloud_file()?;
1549 let iid_count = fam_cloud_file.count_lines().await?;
1550 self.iid_count = Some(iid_count);
1551 Ok(iid_count)
1552 }
1553 }
1554
1555 /// Number of SNPs (variants)
1556 ///
1557 /// If this number is needed, it will be found
1558 /// by opening the .bim file and quickly counting the number
1559 /// of lines. Once found, the number will be remembered.
1560 /// The file read can be avoided by setting the
1561 /// number with [`BedCloudBuilder::sid_count`](struct.BedCloudBuilder.html#method.sid_count)
1562 /// or, for example, [`BedCloudBuilder::sid`](struct.BedCloudBuilder.html#method.sid).
1563 ///
1564 /// # Example:
1565 /// ```
1566 /// use ndarray as nd;
1567 /// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1568 ///
1569 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1570 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1571 /// let mut bed_cloud = BedCloud::new(url).await?;
1572 /// let sid_count = bed_cloud.sid_count().await?;
1573 ///
1574 /// assert!(sid_count == 4);
1575 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1576 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1577 pub async fn sid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
1578 if let Some(sid_count) = self.sid_count {
1579 Ok(sid_count)
1580 } else {
1581 let bim_cloud_file = self.bim_cloud_file()?;
1582 let sid_count = bim_cloud_file.count_lines().await?;
1583 self.sid_count = Some(sid_count);
1584 Ok(sid_count)
1585 }
1586 }
1587
1588 /// Number of individuals (samples) and SNPs (variants)
1589 ///
1590 /// If these numbers aren't known, they will be found
1591 /// by opening the .fam and .bim files and quickly counting the number
1592 /// of lines. Once found, the numbers will be remembered.
1593 /// The file read can be avoided by setting the
1594 /// number with [`BedCloudBuilder::iid_count`](struct.BedCloudBuilder.html#method.iid_count)
1595 /// and [`BedCloudBuilder::sid_count`](struct.BedCloudBuilder.html#method.sid_count).
1596 ///
1597 /// # Example:
1598 /// ```
1599 /// use ndarray as nd;
1600 /// use bed_reader::{BedCloud, ReadOptions};
1601 /// use bed_reader::assert_eq_nan;
1602 ///
1603 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1604 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1605 /// let mut bed_cloud = BedCloud::new(url).await?;
1606 /// let dim = bed_cloud.dim().await?;
1607 ///
1608 /// assert!(dim == (3,4));
1609 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1610 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1611 // LATER: Could these be called at the same time, async?
1612 pub async fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
1613 Ok((self.iid_count().await?, self.sid_count().await?))
1614 }
1615
1616 /// Family id of each of individual (sample)
1617 ///
1618 /// If this ndarray is needed, it will be found
1619 /// by reading the .fam file. Once found, this ndarray
1620 /// and other information in the .fam file will be remembered.
1621 /// The file read can be avoided by setting the
1622 /// array with [`BedCloudBuilder::fid`](struct.BedCloudBuilder.html#method.fid).
1623 ///
1624 /// # Example:
1625 /// ```
1626 /// use ndarray as nd;
1627 /// use bed_reader::{BedCloud, ReadOptions};
1628 /// use bed_reader::assert_eq_nan;
1629 ///
1630 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1631 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1632 /// let mut bed_cloud = BedCloud::new(url).await?;
1633 /// let fid = bed_cloud.fid().await?;
1634 /// println!("{fid:?}"); // Outputs ndarray ["fid1", "fid1", "fid2"]
1635 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1636 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1637 pub async fn fid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1638 self.unlazy_fam::<String>(self.metadata.fid.is_none(), MetadataFields::Fid, "fid")
1639 .await?;
1640 Ok(self.metadata.fid.as_ref().unwrap()) //unwrap always works because of lazy_fam
1641 }
1642
1643 /// Individual id of each of individual (sample)
1644 ///
1645 /// If this ndarray is needed, it will be found
1646 /// by reading the .fam file. Once found, this ndarray
1647 /// and other information in the .fam file will be remembered.
1648 /// The file read can be avoided by setting the
1649 /// array with [`BedCloudBuilder::iid`](struct.BedCloudBuilder.html#method.iid).
1650 ///
1651 /// # Example:
1652 /// ```
1653 /// use ndarray as nd;
1654 /// use bed_reader::{BedCloud, ReadOptions};
1655 /// use bed_reader::assert_eq_nan;
1656 ///
1657 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1658 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1659 /// let mut bed_cloud = BedCloud::new(url).await?;
1660 /// let iid = bed_cloud.iid().await?; ///
1661 /// println!("{iid:?}"); // Outputs ndarray ["iid1", "iid2", "iid3"]
1662 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1663 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1664 pub async fn iid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1665 self.unlazy_fam::<String>(self.metadata.iid.is_none(), MetadataFields::Iid, "iid")
1666 .await?;
1667 Ok(self.metadata.iid.as_ref().unwrap()) //unwrap always works because of lazy_fam
1668 }
1669
1670 /// Father id of each of individual (sample)
1671 ///
1672 /// If this ndarray is needed, it will be found
1673 /// by reading the .fam file. Once found, this ndarray
1674 /// and other information in the .fam file will be remembered.
1675 /// The file read can be avoided by setting the
1676 /// array with [`BedCloudBuilder::father`](struct.BedCloudBuilder.html#method.father).
1677 ///
1678 /// # Example:
1679 /// ```
1680 /// use ndarray as nd;
1681 /// use bed_reader::{BedCloud, ReadOptions};
1682 /// use bed_reader::assert_eq_nan;
1683 ///
1684 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1685 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1686 /// let mut bed_cloud = BedCloud::new(url).await?;
1687 /// let father = bed_cloud.father().await?;
1688 /// println!("{father:?}"); // Outputs ndarray ["iid23", "iid23", "iid22"]
1689 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1690 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1691 pub async fn father(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1692 self.unlazy_fam::<String>(
1693 self.metadata.father.is_none(),
1694 MetadataFields::Father,
1695 "father",
1696 )
1697 .await?;
1698 Ok(self.metadata.father.as_ref().unwrap()) //unwrap always works because of lazy_fam
1699 }
1700
1701 /// Mother id of each of individual (sample)
1702 ///
1703 /// If this ndarray is needed, it will be found
1704 /// by reading the .fam file. Once found, this ndarray
1705 /// and other information in the .fam file will be remembered.
1706 /// The file read can be avoided by setting the
1707 /// array with [`BedCloudBuilder::mother`](struct.BedCloudBuilder.html#method.mother).
1708 ///
1709 /// # Example:
1710 /// ```
1711 /// use ndarray as nd;
1712 /// use bed_reader::{BedCloud, ReadOptions};
1713 /// use bed_reader::assert_eq_nan;
1714 ///
1715 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1716 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1717 /// let mut bed_cloud = BedCloud::new(url).await?;
1718 /// let mother = bed_cloud.mother().await?;
1719 /// println!("{mother:?}"); // Outputs ndarray ["iid34", "iid34", "iid33"]
1720 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1721 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1722 pub async fn mother(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1723 self.unlazy_fam::<String>(
1724 self.metadata.mother.is_none(),
1725 MetadataFields::Mother,
1726 "mother",
1727 )
1728 .await?;
1729 Ok(self.metadata.mother.as_ref().unwrap()) //unwrap always works because of lazy_fam
1730 }
1731
1732 /// Sex each of individual (sample)
1733 ///
1734 /// 0 is unknown, 1 is male, 2 is female
1735 ///
1736 /// If this ndarray is needed, it will be found
1737 /// by reading the .fam file. Once found, this ndarray
1738 /// and other information in the .fam file will be remembered.
1739 /// The file read can be avoided by setting the
1740 /// array with [`BedCloudBuilder::sex`](struct.BedCloudBuilder.html#method.sex).
1741 ///
1742 /// # Example:
1743 /// ```
1744 /// use ndarray as nd;
1745 /// use bed_reader::{BedCloud, ReadOptions};
1746 /// use bed_reader::assert_eq_nan;
1747 ///
1748 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1749 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1750 /// let mut bed_cloud = BedCloud::new(url).await?;
1751 /// let sex = bed_cloud.sex().await?;
1752 /// println!("{sex:?}"); // Outputs ndarray [1, 2, 0]
1753 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1754 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1755 pub async fn sex(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
1756 self.unlazy_fam::<String>(self.metadata.sex.is_none(), MetadataFields::Sex, "sex")
1757 .await?;
1758 Ok(self.metadata.sex.as_ref().unwrap()) //unwrap always works because of lazy_fam
1759 }
1760
1761 /// A phenotype for each individual (seldom used)
1762 ///
1763 /// If this ndarray is needed, it will be found
1764 /// by reading the .fam file. Once found, this ndarray
1765 /// and other information in the .fam file will be remembered.
1766 /// The file read can be avoided by setting the
1767 /// array with [`BedCloudBuilder::pheno`](struct.BedCloudBuilder.html#method.pheno).
1768 ///
1769 /// # Example:
1770 /// ```
1771 /// use ndarray as nd;
1772 /// use bed_reader::{BedCloud, ReadOptions};
1773 /// use bed_reader::assert_eq_nan;
1774 ///
1775 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1776 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1777 /// let mut bed_cloud = BedCloud::new(url).await?;
1778 /// let pheno = bed_cloud.pheno().await?;
1779 /// println!("{pheno:?}"); // Outputs ndarray ["red", "red", "blue"]
1780 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1781 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1782 pub async fn pheno(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1783 self.unlazy_fam::<String>(
1784 self.metadata.pheno.is_none(),
1785 MetadataFields::Pheno,
1786 "pheno",
1787 )
1788 .await?;
1789 Ok(self.metadata.pheno.as_ref().unwrap()) //unwrap always works because of lazy_fam
1790 }
1791
1792 /// Chromosome of each SNP (variant)
1793 ///
1794 /// If this ndarray is needed, it will be found
1795 /// by reading the .bim file. Once found, this ndarray
1796 /// and other information in the .bim file will be remembered.
1797 /// The file read can be avoided by setting the
1798 /// array with [`BedCloudBuilder::chromosome`](struct.BedCloudBuilder.html#method.chromosome).
1799 ///
1800 /// # Example:
1801 /// ```
1802 /// use ndarray as nd;
1803 /// use bed_reader::{BedCloud, ReadOptions};
1804 /// use bed_reader::assert_eq_nan;
1805 ///
1806 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1807 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1808 /// let mut bed_cloud = BedCloud::new(url).await?;
1809 /// let chromosome = bed_cloud.chromosome().await?;
1810 /// println!("{chromosome:?}"); // Outputs ndarray ["1", "1", "5", "Y"]
1811 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1812 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1813 /// ```
1814 pub async fn chromosome(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1815 self.unlazy_bim::<String>(
1816 self.metadata.chromosome.is_none(),
1817 MetadataFields::Chromosome,
1818 "chromosome",
1819 )
1820 .await?;
1821 Ok(self.metadata.chromosome.as_ref().unwrap()) //unwrap always works because of lazy_bim
1822 }
1823
1824 /// SNP id of each SNP (variant)
1825 ///
1826 /// If this ndarray is needed, it will be found
1827 /// by reading the .bim file. Once found, this ndarray
1828 /// and other information in the .bim file will be remembered.
1829 /// The file read can be avoided by setting the
1830 /// array with [`BedCloudBuilder::sid`](struct.BedCloudBuilder.html#method.sid).
1831 ///
1832 /// # Example:
1833 /// ```
1834 /// use ndarray as nd;
1835 /// use bed_reader::{BedCloud, ReadOptions};
1836 /// use bed_reader::assert_eq_nan;
1837 ///
1838 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1839 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1840 /// let mut bed_cloud = BedCloud::new(url).await?;
1841 /// let sid = bed_cloud.sid().await?;
1842 /// println!("{sid:?}"); // Outputs ndarray "sid1", "sid2", "sid3", "sid4"]
1843 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1844 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1845 pub async fn sid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1846 self.unlazy_bim::<String>(self.metadata.sid.is_none(), MetadataFields::Sid, "sid")
1847 .await?;
1848 Ok(self.metadata.sid.as_ref().unwrap()) //unwrap always works because of lazy_bim
1849 }
1850
1851 /// Centimorgan position of each SNP (variant)
1852 ///
1853 /// If this ndarray is needed, it will be found
1854 /// by reading the .bim file. Once found, this ndarray
1855 /// and other information in the .bim file will be remembered.
1856 /// The file read can be avoided by setting the
1857 /// array with [`BedCloudBuilder::cm_position`](struct.BedCloudBuilder.html#method.cm_position).
1858 ///
1859 /// # Example:
1860 /// ```
1861 /// use ndarray as nd;
1862 /// use bed_reader::{BedCloud, ReadOptions};
1863 /// use bed_reader::assert_eq_nan;
1864 ///
1865 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1866 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1867 /// let mut bed_cloud = BedCloud::new(url).await?;
1868 /// let cm_position = bed_cloud.cm_position().await?;
1869 /// println!("{cm_position:?}"); // Outputs ndarray [100.4, 2000.5, 4000.7, 7000.9]
1870 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1871 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1872 pub async fn cm_position(&mut self) -> Result<&nd::Array1<f32>, Box<BedErrorPlus>> {
1873 self.unlazy_bim::<String>(
1874 self.metadata.cm_position.is_none(),
1875 MetadataFields::CmPosition,
1876 "cm_position",
1877 )
1878 .await?;
1879 Ok(self.metadata.cm_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
1880 }
1881
1882 /// Base-pair position of each SNP (variant)
1883 ///
1884 /// If this ndarray is needed, it will be found
1885 /// by reading the .bim file. Once found, this ndarray
1886 /// and other information in the .bim file will be remembered.
1887 /// The file read can be avoided by setting the
1888 /// array with [`BedCloudBuilder::bp_position`](struct.BedCloudBuilder.html#method.bp_position).
1889 ///
1890 /// # Example:
1891 /// ```
1892 /// use ndarray as nd;
1893 /// use bed_reader::{BedCloud, ReadOptions};
1894 /// use bed_reader::assert_eq_nan;
1895 ///
1896 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1897 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1898 /// let mut bed_cloud = BedCloud::new(url).await?;
1899 /// let bp_position = bed_cloud.bp_position().await?;
1900 /// println!("{bp_position:?}"); // Outputs ndarray [1, 100, 1000, 1004]
1901 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1902 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1903 pub async fn bp_position(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
1904 self.unlazy_bim::<String>(
1905 self.metadata.bp_position.is_none(),
1906 MetadataFields::BpPosition,
1907 "bp_position",
1908 )
1909 .await?;
1910 Ok(self.metadata.bp_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
1911 }
1912
1913 /// First allele of each SNP (variant)
1914 ///
1915 /// If this ndarray is needed, it will be found
1916 /// by reading the .bim file. Once found, this ndarray
1917 /// and other information in the .bim file will be remembered.
1918 /// The file read can be avoided by setting the
1919 /// array with [`BedCloudBuilder::allele_1`](struct.BedCloudBuilder.html#method.allele_1).
1920 ///
1921 /// # Example:
1922 /// ```
1923 /// use ndarray as nd;
1924 /// use bed_reader::{BedCloud, ReadOptions};
1925 /// use bed_reader::assert_eq_nan;
1926 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1927 ///
1928 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1929 /// let mut bed_cloud = BedCloud::new(url).await?;
1930 /// let allele_1 = bed_cloud.allele_1().await?;
1931 /// println!("{allele_1:?}"); // Outputs ndarray ["A", "T", "A", "T"]
1932 /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1933 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1934 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1935 pub async fn allele_1(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1936 self.unlazy_bim::<String>(
1937 self.metadata.allele_1.is_none(),
1938 MetadataFields::Allele1,
1939 "allele_1",
1940 )
1941 .await?;
1942 Ok(self.metadata.allele_1.as_ref().unwrap()) //unwrap always works because of lazy_bim
1943 }
1944
1945 /// Second allele of each SNP (variant)
1946 ///
1947 /// If this ndarray is needed, it will be found
1948 /// by reading the .bim file. Once found, this ndarray
1949 /// and other information in the .bim file will be remembered.
1950 /// The file read can be avoided by setting the
1951 /// array with [`BedCloudBuilder::allele_2`](struct.BedCloudBuilder.html#method.allele_2).
1952 ///
1953 /// # Example:
1954 /// ```
1955 /// use ndarray as nd;
1956 /// use bed_reader::{BedCloud, ReadOptions};
1957 /// use bed_reader::assert_eq_nan;
1958 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1959 ///
1960 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1961 /// let mut bed_cloud = BedCloud::new(url).await?;
1962 /// let allele_2 = bed_cloud.allele_2().await?;
1963 /// println!("{allele_2:?}"); // Outputs ndarray ["A", "C", "C", "G"]
1964 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1965 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1966 /// ```
1967 pub async fn allele_2(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1968 self.unlazy_bim::<String>(
1969 self.metadata.allele_2.is_none(),
1970 MetadataFields::Allele2,
1971 "allele_2",
1972 )
1973 .await?;
1974 Ok(self.metadata.allele_2.as_ref().unwrap()) //unwrap always works because of lazy_bim
1975 }
1976
1977 /// [`Metadata`](struct.Metadata.html) for this dataset, for example, the individual (sample) Ids.
1978 ///
1979 /// This returns a struct with 12 fields. Each field is a ndarray.
1980 /// The struct will always be new, but the 12 ndarrays will be
1981 /// shared with this [`BedCloud`](struct.BedCloud.html).
1982 ///
1983 /// If the needed, the metadata will be read from the .fam and/or .bim files.
1984 /// ```
1985 /// use ndarray as nd;
1986 /// use bed_reader::{BedCloud};
1987 ///
1988 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1989 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1990 /// let mut bed_cloud = BedCloud::new(url).await?;
1991 /// let metadata = bed_cloud.metadata().await?;
1992 /// println!("{0:?}", metadata.iid()); // Outputs Some(["iid1", "iid2", "iid3"] ...)
1993 /// println!("{0:?}", metadata.sid()); // Outputs Some(["sid1", "sid2", "sid3", "sid4"] ...)
1994 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1995 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1996 pub async fn metadata(&mut self) -> Result<Metadata, Box<BedErrorPlus>> {
1997 self.fam().await?;
1998 self.bim().await?;
1999 Ok(self.metadata.clone())
2000 }
2001
2002 /// Return the `CloudFile` of the .bed file.
2003 #[must_use]
2004 pub fn cloud_file(&self) -> CloudFile {
2005 self.cloud_file.clone()
2006 }
2007
2008 /// Return the cloud location of the .fam file.
2009 pub fn fam_cloud_file(&mut self) -> Result<CloudFile, Box<BedErrorPlus>> {
2010 // We need to clone the cloud_file because self might mutate later
2011 if let Some(fam_cloud_file) = &self.fam_cloud_file {
2012 Ok(fam_cloud_file.clone())
2013 } else {
2014 let fam_cloud_file = to_metadata_path(&self.cloud_file, &self.fam_cloud_file, "fam")?;
2015 self.fam_cloud_file = Some(fam_cloud_file.clone());
2016 Ok(fam_cloud_file)
2017 }
2018 }
2019
2020 /// Return the cloud location of the .bim file.
2021 pub fn bim_cloud_file(&mut self) -> Result<CloudFile, Box<BedErrorPlus>> {
2022 // We need to clone the cloud_file because self might mutate later
2023 if let Some(bim_cloud_file) = &self.bim_cloud_file {
2024 Ok(bim_cloud_file.clone())
2025 } else {
2026 let bim_cloud_file = to_metadata_path(&self.cloud_file, &self.bim_cloud_file, "bim")?;
2027 self.bim_cloud_file = Some(bim_cloud_file.clone());
2028 Ok(bim_cloud_file)
2029 }
2030 }
2031
2032 /// Read genotype data.
2033 ///
2034 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) which supports selection and options.
2035 ///
2036 /// # Errors
2037 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2038 /// for all possible errors.
2039 ///
2040 /// # Examples
2041 /// Read all data in a .bed file.
2042 ///
2043 /// ```
2044 /// use ndarray as nd;
2045 /// use bed_reader::{BedCloud, ReadOptions};
2046 /// use bed_reader::assert_eq_nan;
2047 ///
2048 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2049 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2050 /// let mut bed_cloud = BedCloud::new(url).await?;
2051 /// let val = bed_cloud.read::<f64>().await?;
2052 ///
2053 /// assert_eq_nan(
2054 /// &val,
2055 /// &nd::array![
2056 /// [1.0, 0.0, f64::NAN, 0.0],
2057 /// [2.0, 0.0, f64::NAN, 2.0],
2058 /// [0.0, 1.0, 2.0, 0.0]
2059 /// ],
2060 /// );
2061 ///
2062 /// // Your output array can be f32, f64, or i8
2063 /// let val = bed_cloud.read::<i8>().await?;
2064 /// assert_eq_nan(
2065 /// &val,
2066 /// &nd::array![
2067 /// [1, 0, -127, 0],
2068 /// [2, 0, -127, 2],
2069 /// [0, 1, 2, 0]
2070 /// ],
2071 /// );
2072 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2073 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2074 /// ```
2075 pub async fn read<TVal: BedVal>(&mut self) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2076 let read_options = ReadOptions::<TVal>::builder().build()?;
2077 self.read_with_options(&read_options).await
2078 }
2079
2080 /// Read genotype data with options, into a preallocated array.
2081 ///
2082 /// > Also see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
2083 ///
2084 /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
2085 /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
2086 /// are ignored. Instead, the order of the preallocated array is used.
2087 ///
2088 /// # Errors
2089 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2090 /// for all possible errors.
2091 ///
2092 /// # Example
2093 ///
2094 /// ```
2095 /// use ndarray as nd;
2096 /// use bed_reader::{BedCloud, ReadOptions};
2097 /// use bed_reader::assert_eq_nan;
2098 ///
2099 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2100 /// // Read the SNPs indexed by 2.
2101 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2102 /// let mut bed_cloud = BedCloud::new(url).await?;
2103 /// let read_options = ReadOptions::builder().sid_index(2).build()?;
2104 /// let mut val = nd::Array2::<f64>::default((3, 1));
2105 /// bed_cloud.read_and_fill_with_options(&mut val.view_mut(), &read_options).await?;
2106 ///
2107 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2108 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2109 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2110 /// ```
2111 #[allow(clippy::similar_names)]
2112 pub async fn read_and_fill_with_options<TVal: BedVal>(
2113 &mut self,
2114 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2115 read_options: &ReadOptions<TVal>,
2116 ) -> Result<(), Box<BedErrorPlus>> {
2117 // must do these one-at-a-time because they mutate self to cache the results
2118 let iid_count = self.iid_count().await?;
2119 let sid_count = self.sid_count().await?;
2120
2121 let max_concurrent_requests =
2122 compute_max_concurrent_requests(read_options.max_concurrent_requests)?;
2123
2124 let max_chunk_bytes = compute_max_chunk_bytes(read_options.max_chunk_bytes)?;
2125
2126 // If we already have a Vec<isize>, reference it. If we don't, create one and reference it.
2127 let iid_hold = Hold::new(&read_options.iid_index, iid_count)?;
2128 let iid_index = iid_hold.as_ref();
2129 let sid_hold = Hold::new(&read_options.sid_index, sid_count)?;
2130 let sid_index = sid_hold.as_ref();
2131
2132 let dim = val.dim();
2133 if dim != (iid_index.len(), sid_index.len()) {
2134 Err(BedError::InvalidShape(
2135 iid_index.len(),
2136 sid_index.len(),
2137 dim.0,
2138 dim.1,
2139 ))?;
2140 }
2141
2142 read_no_alloc(
2143 &self.cloud_file,
2144 iid_count,
2145 sid_count,
2146 read_options.is_a1_counted,
2147 iid_index,
2148 sid_index,
2149 read_options.missing_value,
2150 max_concurrent_requests,
2151 max_chunk_bytes,
2152 &mut val.view_mut(),
2153 )
2154 .await
2155 }
2156
2157 /// Read all genotype data into a preallocated array.
2158 ///
2159 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
2160 ///
2161 /// # Errors
2162 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2163 /// for all possible errors.
2164 ///
2165 /// # Example
2166 ///
2167 /// ```
2168 /// use ndarray as nd;
2169 /// use bed_reader::{BedCloud, ReadOptions};
2170 /// use bed_reader::assert_eq_nan;
2171 ///
2172 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2173 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2174 /// let mut bed_cloud = BedCloud::new(url).await?;
2175 /// let mut val = nd::Array2::<i8>::default(bed_cloud.dim().await?);
2176 /// bed_cloud.read_and_fill(&mut val.view_mut()).await?;
2177 ///
2178 /// assert_eq_nan(
2179 /// &val,
2180 /// &nd::array![
2181 /// [1, 0, -127, 0],
2182 /// [2, 0, -127, 2],
2183 /// [0, 1, 2, 0]
2184 /// ],
2185 /// );
2186 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2187 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2188 /// ```
2189 pub async fn read_and_fill<TVal: BedVal>(
2190 &mut self,
2191 val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2192 ) -> Result<(), Box<BedErrorPlus>> {
2193 let read_options = ReadOptions::<TVal>::builder().build()?;
2194 self.read_and_fill_with_options(val, &read_options).await
2195 }
2196
2197 /// Read genotype data with options.
2198 ///
2199 /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
2200 ///
2201 /// # Errors
2202 /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2203 /// for all possible errors.
2204 ///
2205 /// # Example
2206 ///
2207 /// ```
2208 /// use ndarray as nd;
2209 /// use bed_reader::{BedCloud, ReadOptions};
2210 /// use bed_reader::assert_eq_nan;
2211 ///
2212 /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2213 /// // Read the SNPs indexed by 2.
2214 /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2215 /// let mut bed_cloud = BedCloud::new(url).await?;
2216 /// let read_options = ReadOptions::builder().sid_index(2).f64().build()?;
2217 /// let val = bed_cloud.read_with_options(&read_options).await?;
2218 ///
2219 /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2220 /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2221 /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2222 /// ```
2223 pub async fn read_with_options<TVal: BedVal>(
2224 &mut self,
2225 read_options: &ReadOptions<TVal>,
2226 ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2227 let iid_count_in = self.iid_count().await?;
2228 let sid_count_in = self.sid_count().await?;
2229 let iid_count_out = read_options.iid_index.len(iid_count_in)?;
2230 let sid_count_out = read_options.sid_index.len(sid_count_in)?;
2231 let shape = ShapeBuilder::set_f((iid_count_out, sid_count_out), read_options.is_f);
2232 let mut val = nd::Array2::<TVal>::default(shape);
2233
2234 self.read_and_fill_with_options(&mut val.view_mut(), read_options)
2235 .await?;
2236
2237 Ok(val)
2238 }
2239
2240 // LATER: Support writing to a BedCloud
2241
2242 async fn unlazy_fam<T: FromStringArray<T>>(
2243 &mut self,
2244 is_none: bool,
2245 field_index: MetadataFields,
2246 name: &str,
2247 ) -> Result<(), Box<BedErrorPlus>> {
2248 if self.skip_set.contains(&field_index) {
2249 Err(BedError::CannotUseSkippedMetadata(name.into()))?;
2250 }
2251 if is_none {
2252 self.fam().await?;
2253 }
2254 Ok(())
2255 }
2256
2257 async fn unlazy_bim<T: FromStringArray<T>>(
2258 &mut self,
2259 is_none: bool,
2260 field_index: MetadataFields,
2261 name: &str,
2262 ) -> Result<(), Box<BedErrorPlus>> {
2263 if self.skip_set.contains(&field_index) {
2264 Err(BedError::CannotUseSkippedMetadata(name.into()))?;
2265 }
2266 if is_none {
2267 self.bim().await?;
2268 }
2269 Ok(())
2270 }
2271
2272 async fn fam(&mut self) -> Result<(), Box<BedErrorPlus>> {
2273 let fam_cloud_file = self.fam_cloud_file()?.clone();
2274
2275 let (metadata, count) = self
2276 .metadata
2277 .read_fam_cloud(&fam_cloud_file, &self.skip_set)
2278 .await?;
2279 self.metadata = metadata;
2280
2281 match self.iid_count {
2282 Some(iid_count) => {
2283 if iid_count != count {
2284 Err(BedError::InconsistentCount("iid".into(), iid_count, count))?;
2285 }
2286 }
2287 None => {
2288 self.iid_count = Some(count);
2289 }
2290 }
2291 Ok(())
2292 }
2293
2294 async fn bim(&mut self) -> Result<(), Box<BedErrorPlus>> {
2295 let bim_cloud_file = self.bim_cloud_file()?.clone();
2296
2297 let (metadata, count) = self
2298 .metadata
2299 .read_bim_cloud(&bim_cloud_file, &self.skip_set)
2300 .await?;
2301 self.metadata = metadata;
2302
2303 match self.sid_count {
2304 Some(sid_count) => {
2305 if sid_count != count {
2306 Err(BedError::InconsistentCount("sid".into(), sid_count, count))?;
2307 }
2308 }
2309 None => {
2310 self.sid_count = Some(count);
2311 }
2312 }
2313 Ok(())
2314 }
2315}
2316
2317/// Returns the cloud location of a sample .bed file as a URL string.
2318///
2319/// Behind the scenes, the "cloud location" will actually be local.
2320/// If necessary, the file will be downloaded.
2321/// The .fam and .bim files will also be downloaded, if they are not already present.
2322/// SHA256 hashes are used to verify that the files are correct.
2323/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
2324/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
2325#[anyinput]
2326pub fn sample_bed_url(bed_path: AnyPath) -> Result<String, Box<BedErrorPlus>> {
2327 let mut path_list: Vec<PathBuf> = Vec::new();
2328 for ext in &["bed", "bim", "fam"] {
2329 let file_path = bed_path.with_extension(ext);
2330 path_list.push(file_path);
2331 }
2332
2333 let mut vec = sample_urls(path_list)?;
2334 Ok(vec.swap_remove(0))
2335}
2336
2337/// Returns the cloud location of a sample file as a URL string.
2338///
2339/// Behind the scenes, the "cloud location" will actually be local.
2340/// If necessary, the file will be downloaded.
2341/// A SHA256 hash is used to verify that the file is correct.
2342/// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
2343/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
2344#[anyinput]
2345pub fn sample_url(path: AnyPath) -> Result<String, Box<BedErrorPlus>> {
2346 let file_path = STATIC_FETCH_DATA
2347 .fetch_file(path)
2348 .map_err(|e| BedError::SampleFetch(e.to_string()))?;
2349 let url = abs_path_to_url_string(file_path)?;
2350 Ok(url)
2351}
2352
2353/// Returns the cloud locations of a list of files as URL strings.
2354///
2355/// Behind the scenes, the "cloud location" will actually be local.
2356/// If necessary, the file will be downloaded.
2357/// SHA256 hashes are used to verify that the files are correct.
2358/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
2359/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
2360#[anyinput]
2361pub fn sample_urls(path_list: AnyIter<AnyPath>) -> Result<Vec<String>, Box<BedErrorPlus>> {
2362 let file_paths = STATIC_FETCH_DATA
2363 .fetch_files(path_list)
2364 .map_err(|e| BedError::SampleFetch(e.to_string()))?;
2365 file_paths
2366 .iter()
2367 .map(|file_path| {
2368 let url = abs_path_to_url_string(file_path)?;
2369 Ok(url)
2370 })
2371 .collect()
2372}
2373
2374fn to_metadata_path(
2375 bed_cloud_file: &CloudFile,
2376 metadata_cloud_file: &Option<CloudFile>,
2377 extension: &str,
2378) -> Result<CloudFile, Box<BedErrorPlus>> {
2379 if let Some(metadata_cloud_file) = metadata_cloud_file {
2380 Ok(metadata_cloud_file.clone())
2381 } else {
2382 let mut meta_cloud_file = bed_cloud_file.clone();
2383 meta_cloud_file.set_extension(extension)?;
2384 Ok(meta_cloud_file)
2385 }
2386}