bed_reader/
bed_cloud.rs

1#[cfg(not(target_pointer_width = "64"))]
2compile_error!("This code requires a 64-bit target architecture.");
3
4use anyinput::anyinput;
5use bytes::Bytes;
6use cloud_file::{abs_path_to_url_string, CloudFile};
7use derive_builder::Builder;
8use futures_util::StreamExt;
9use itertools::Itertools;
10use nd::ShapeBuilder;
11use ndarray as nd;
12use std::cmp::max;
13use std::collections::HashSet;
14use std::ops::Range;
15use std::path::PathBuf;
16
17use crate::{
18    check_and_precompute_iid_index, compute_max_chunk_bytes, compute_max_concurrent_requests,
19    set_up_two_bits_to_value, try_div_4, BedError, BedErrorPlus, BedVal, FromStringArray, Hold,
20    Metadata, ReadOptions, BED_FILE_MAGIC1, BED_FILE_MAGIC2, EMPTY_OPTIONS, STATIC_FETCH_DATA,
21};
22use crate::{MetadataFields, CB_HEADER_U64};
23
24/// Represents a PLINK .bed file in the cloud that is open for reading genotype data and metadata.
25///
26/// Construct with [`BedCloud::new`](struct.BedCloud.html#method.new), [`BedCloud::builder`](struct.BedCloud.html#method.builder),
27/// [`BedCloud::from_cloud_file`](struct.BedCloud.html#method.from_cloud_file), or
28/// [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file).
29///
30/// > For reading local files, see [`Bed`](struct.Bed.html).
31///
32/// # Example
33///
34/// Open a file for reading. Then, read the individual (sample) ids
35/// and all the genotype data.
36/// ```
37/// use ndarray as nd;
38/// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
39///
40/// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
41/// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
42/// let mut bed_cloud = BedCloud::new(url).await?;
43/// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
44/// let val = ReadOptions::builder().f64().read_cloud(&mut bed_cloud).await?;
45///
46/// assert_eq_nan(
47///     &val,
48///     &nd::array![
49///         [1.0, 0.0, f64::NAN, 0.0],
50///         [2.0, 0.0, f64::NAN, 2.0],
51///         [0.0, 1.0, 2.0, 0.0]
52///     ],
53/// );
54/// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
55/// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
56/// ```
57#[derive(Clone, Debug, Builder)]
58#[builder(build_fn(skip))]
59pub struct BedCloud {
60    #[builder(setter(custom))]
61    cloud_file: CloudFile,
62
63    #[builder(setter(custom))]
64    #[builder(default = "None")]
65    fam_cloud_file: Option<CloudFile>,
66
67    #[builder(setter(custom))]
68    #[builder(default = "None")]
69    bim_cloud_file: Option<CloudFile>,
70
71    #[builder(setter(custom))]
72    #[builder(default = "true")]
73    is_checked_early: bool,
74
75    #[builder(setter(custom))]
76    #[builder(default = "None")]
77    iid_count: Option<usize>,
78
79    #[builder(setter(custom))]
80    #[builder(default = "None")]
81    sid_count: Option<usize>,
82
83    #[builder(setter(custom))]
84    metadata: Metadata,
85
86    #[builder(setter(custom))]
87    skip_set: HashSet<MetadataFields>,
88}
89
90// We need to define our own build_no_file_check
91// because otherwise derive_builder (needlessly) requires ObjectStore: Clone
92impl BedCloudBuilder {
93    fn build_no_file_check(&self) -> Result<BedCloud, Box<BedErrorPlus>> {
94        Ok(BedCloud {
95            cloud_file: match self.cloud_file {
96                Some(ref value) => Clone::clone(value),
97                None => Err(BedError::UninitializedField("cloud_file"))?,
98            },
99            fam_cloud_file: match self.fam_cloud_file {
100                Some(ref value) => Clone::clone(value),
101                None => None,
102            },
103            bim_cloud_file: match self.bim_cloud_file {
104                Some(ref value) => Clone::clone(value),
105                None => None,
106            },
107            is_checked_early: match self.is_checked_early {
108                Some(ref value) => Clone::clone(value),
109                None => true,
110            },
111            iid_count: match self.iid_count {
112                Some(ref value) => Clone::clone(value),
113                None => None,
114            },
115            sid_count: match self.sid_count {
116                Some(ref value) => Clone::clone(value),
117                None => None,
118            },
119            metadata: match self.metadata {
120                Some(ref value) => Clone::clone(value),
121                None => Err(BedError::UninitializedField("metadata"))?,
122            },
123            skip_set: match self.skip_set {
124                Some(ref value) => Clone::clone(value),
125                None => Err(BedError::UninitializedField("skip_set"))?,
126            },
127        })
128    }
129}
130
131fn convert_negative_sid_index(
132    in_sid_i_signed: isize,
133    upper_sid_count: isize,
134    lower_sid_count: isize,
135) -> Result<u64, Box<BedErrorPlus>> {
136    if (0..=upper_sid_count).contains(&in_sid_i_signed) {
137        #[allow(clippy::cast_sign_loss)]
138        Ok(in_sid_i_signed as u64)
139    } else if (lower_sid_count..=-1).contains(&in_sid_i_signed) {
140        #[allow(clippy::cast_sign_loss)]
141        Ok((in_sid_i_signed - lower_sid_count) as u64)
142    } else {
143        Err(Box::new(BedErrorPlus::BedError(BedError::SidIndexTooBig(
144            in_sid_i_signed,
145        ))))
146    }
147}
148
149#[allow(clippy::too_many_arguments)]
150#[allow(clippy::similar_names)]
151async fn internal_read_no_alloc<TVal: BedVal>(
152    cloud_file: &CloudFile,
153    size: usize,
154    in_iid_count: usize,
155    in_sid_count: usize,
156    is_a1_counted: bool,
157    iid_index: &[isize],
158    sid_index: &[isize],
159    missing_value: TVal,
160    max_concurrent_requests: usize,
161    max_chunk_bytes: usize,
162    out_val: &mut nd::ArrayViewMut2<'_, TVal>,
163) -> Result<(), Box<BedErrorPlus>> {
164    // compute numbers outside of the loop
165    let in_iid_count_div4_u64 = check_file_length(in_iid_count, in_sid_count, size, cloud_file)?;
166    let (i_div_4_less_start_array, i_mod_4_times_2_array, i_div_4_start, i_div_4_len) =
167        check_and_precompute_iid_index(in_iid_count, iid_index)?;
168    if i_div_4_len == 0 {
169        return Ok(()); // we must return early because the chucks method doesn't work with size 0
170    }
171    let chunk_count = max(1, max_chunk_bytes / i_div_4_len as usize);
172    let from_two_bits_to_value = set_up_two_bits_to_value(is_a1_counted, missing_value);
173    let lower_sid_count = -(in_sid_count as isize);
174    let upper_sid_count: isize = (in_sid_count as isize) - 1;
175
176    // sid_index is a slice that tells us which columns to read from the (column-major) file.
177    // out_val is a column-major array to fill the decode results.
178
179    // For each chunk of columns to read ...
180
181    let chunks = sid_index.iter().chunks(chunk_count);
182    let iterator = chunks.into_iter().enumerate().map(|(chunk_index, chunk)| {
183        let result = extract_ranges(
184            chunk_count,
185            chunk,
186            chunk_index,
187            upper_sid_count,
188            lower_sid_count,
189            in_iid_count_div4_u64,
190            i_div_4_start,
191            i_div_4_len,
192        );
193        async move {
194            let (ranges, out_sid_i_vec) = result?;
195            let vec_bytes = cloud_file.read_ranges(&ranges).await?;
196            Result::<_, Box<BedErrorPlus>>::Ok((vec_bytes, out_sid_i_vec))
197        }
198    });
199
200    let mut stream = futures_util::stream::iter(iterator).buffer_unordered(max_concurrent_requests);
201
202    while let Some(result) = stream.next().await {
203        let (vec_bytes, out_sid_i_vec) = result?;
204        decode_bytes_into_columns(
205            &vec_bytes,
206            out_sid_i_vec,
207            iid_index,
208            &i_div_4_less_start_array,
209            &i_mod_4_times_2_array,
210            out_val,
211            from_two_bits_to_value,
212        );
213    }
214
215    Ok(())
216}
217
218#[inline]
219#[allow(clippy::type_complexity)]
220#[allow(clippy::too_many_arguments)]
221fn extract_ranges(
222    chunk_count: usize,
223    chunk: itertools::Chunk<'_, std::slice::Iter<'_, isize>>,
224    chunk_index: usize,
225    upper_sid_count: isize,
226    lower_sid_count: isize,
227    in_iid_count_div4_u64: u64,
228    i_div_4_start: u64,
229    i_div_4_len: u64,
230) -> Result<(Vec<Range<usize>>, Vec<usize>), Box<BedErrorPlus>> {
231    let mut ranges = Vec::with_capacity(chunk_count);
232    let mut out_sid_i_vec = Vec::with_capacity(chunk_count);
233    for (inner_index, in_sid_i_signed) in chunk.enumerate() {
234        let out_sid_i = chunk_index * chunk_count + inner_index;
235        let in_sid_i =
236            convert_negative_sid_index(*in_sid_i_signed, upper_sid_count, lower_sid_count)?;
237        let pos: usize =
238            (in_sid_i * in_iid_count_div4_u64 + i_div_4_start + CB_HEADER_U64) as usize; // "as" and math is safe because of early checks
239        let range = pos..pos + i_div_4_len as usize;
240        debug_assert!(range.end - range.start == i_div_4_len as usize); // real assert
241        ranges.push(range);
242        out_sid_i_vec.push(out_sid_i);
243    }
244    Ok((ranges, out_sid_i_vec))
245}
246
247#[inline]
248fn decode_bytes_into_columns<TVal: BedVal>(
249    bytes_slice: &[Bytes],
250    out_sid_i_vec: Vec<usize>,
251    iid_index: &[isize],
252    i_div_4_less_start_array: &nd::prelude::ArrayBase<
253        nd::OwnedRepr<usize>,
254        nd::prelude::Dim<[usize; 1]>,
255    >,
256    i_mod_4_times_2_array: &nd::prelude::ArrayBase<nd::OwnedRepr<u8>, nd::prelude::Dim<[usize; 1]>>,
257    out_val: &mut nd::prelude::ArrayBase<nd::ViewRepr<&mut TVal>, nd::prelude::Dim<[usize; 2]>>,
258    from_two_bits_to_value: [TVal; 4],
259) {
260    for (bytes, out_sid_i) in bytes_slice.iter().zip(out_sid_i_vec.into_iter()) {
261        let mut col = out_val.column_mut(out_sid_i);
262        // LATER: Consider doing this in parallel as in the non-cloud version.
263        for out_iid_i in 0..iid_index.len() {
264            let i_div_4_less_start = i_div_4_less_start_array[out_iid_i];
265            let i_mod_4_times_2: u8 = i_mod_4_times_2_array[out_iid_i];
266            let encoded: u8 = bytes[i_div_4_less_start];
267            let genotype_byte: u8 = (encoded >> i_mod_4_times_2) & 0x03;
268            col[out_iid_i] = from_two_bits_to_value[genotype_byte as usize];
269        }
270    }
271}
272
273#[allow(clippy::similar_names)]
274fn check_file_length(
275    in_iid_count: usize,
276    in_sid_count: usize,
277    size: usize,
278    cloud_file: &CloudFile,
279) -> Result<u64, Box<BedErrorPlus>> {
280    let in_iid_count_div4_u64 = try_div_4(in_iid_count, in_sid_count)?;
281    let file_len = size as u64;
282    let file_len2 = in_iid_count_div4_u64 * (in_sid_count as u64) + CB_HEADER_U64;
283    if file_len != file_len2 {
284        Err(BedError::IllFormed(cloud_file.to_string()))?;
285    }
286    Ok(in_iid_count_div4_u64)
287}
288
289#[inline]
290#[allow(clippy::too_many_arguments)]
291#[allow(clippy::similar_names)]
292async fn read_no_alloc<TVal: BedVal>(
293    cloud_file: &CloudFile,
294    iid_count: usize,
295    sid_count: usize,
296    is_a1_counted: bool,
297    iid_index: &[isize],
298    sid_index: &[isize],
299    missing_value: TVal,
300    max_concurrent_requests: usize,
301    max_chunk_bytes: usize,
302
303    val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.
304) -> Result<(), Box<BedErrorPlus>> {
305    let (size, bytes) = open_and_check(cloud_file).await?;
306
307    match bytes[2] {
308        0 => {
309            // We swap 'iid' and 'sid' and then reverse the axes.
310            let mut val_t = val.view_mut().reversed_axes();
311
312            internal_read_no_alloc(
313                cloud_file,
314                size,
315                sid_count,
316                iid_count,
317                is_a1_counted,
318                sid_index,
319                iid_index,
320                missing_value,
321                max_concurrent_requests,
322                max_chunk_bytes,
323                &mut val_t,
324            )
325            .await?;
326        }
327        1 => {
328            internal_read_no_alloc(
329                cloud_file,
330                size,
331                iid_count,
332                sid_count,
333                is_a1_counted,
334                iid_index,
335                sid_index,
336                missing_value,
337                max_concurrent_requests,
338                max_chunk_bytes,
339                val,
340            )
341            .await?;
342        }
343        _ => Err(BedError::BadMode(cloud_file.to_string()))?,
344    };
345    Ok(())
346}
347
348async fn open_and_check(cloud_file: &CloudFile) -> Result<(usize, Bytes), Box<BedErrorPlus>> {
349    let (bytes, size) = cloud_file
350        .read_range_and_file_size(0..CB_HEADER_U64 as usize)
351        .await?;
352    if (bytes.len() as u64) < CB_HEADER_U64
353        || BED_FILE_MAGIC1 != bytes[0]
354        || BED_FILE_MAGIC2 != bytes[1]
355        || (0 != bytes[2] && 1 != bytes[2])
356    {
357        Err(BedError::IllFormed(cloud_file.to_string()))?;
358    }
359    Ok((size, bytes))
360}
361
362impl BedCloudBuilder {
363    fn new<I, K, V>(url: impl AsRef<str>, options: I) -> Result<Self, Box<BedErrorPlus>>
364    where
365        I: IntoIterator<Item = (K, V)>,
366        K: AsRef<str>,
367        V: Into<String>,
368    {
369        let cloud_file = CloudFile::new_with_options(url, options)?;
370        Ok(BedCloudBuilder::from(cloud_file))
371    }
372
373    /// Set the cloud location of the .fam file. Specify the file with a URL string.
374    ///
375    /// If not set, the .fam file will be assumed
376    /// to have the same location as the .bed file, but with the extension .fam.
377    ///
378    /// > See [`BedCloudBuilder::fam_cloud_file`](struct.BedCloudBuilder.html#method.fam_cloud_file) to specify the file with an [`CloudFile`](struct.CloudFile.html)
379    /// > instead of a URL string.
380    ///
381    /// # Example:
382    /// Read .bed, .fam, and .bim files with non-standard names.
383    /// ```
384    /// use bed_reader::{BedCloud, ReadOptions, sample_urls, EMPTY_OPTIONS};
385    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
386    /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?;
387    /// let mut bed_cloud = BedCloud::builder(&deb_maf_mib[0])?
388    ///    .fam(&deb_maf_mib[1], EMPTY_OPTIONS)?
389    ///    .bim(&deb_maf_mib[2], EMPTY_OPTIONS)?
390    ///    .build().await?;
391    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
392    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
393    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
394    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
395    /// ```
396    pub fn fam<I, K, V>(
397        mut self,
398        url: impl AsRef<str>,
399        options: I,
400    ) -> Result<Self, Box<BedErrorPlus>>
401    where
402        I: IntoIterator<Item = (K, V)>,
403        K: AsRef<str>,
404        V: Into<String>,
405    {
406        let cloud_file = CloudFile::new_with_options(url, options)?;
407        self.fam_cloud_file = Some(Some(cloud_file));
408        Ok(self)
409    }
410
411    /// Set the cloud location of the .bim file. Specify the file with a URL string.
412    ///
413    /// If not set, the .bim file will be assumed
414    /// to have the same location as the .bed file, but with the extension .bim.
415    ///
416    /// > See [`BedCloudBuilder::fam_cloud_file`](struct.BedCloudBuilder.html#method.bim_cloud_file) to specify the file with an [`CloudFile`](struct.CloudFile.html)
417    /// > instead of a URL string.
418    ///
419    /// # Example:
420    /// Read .bed, .fam, and .bim files with non-standard names.
421    /// ```
422    /// use bed_reader::{BedCloud, ReadOptions, sample_urls, EMPTY_OPTIONS};
423    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
424    /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?;
425    /// let mut bed_cloud = BedCloud::builder(&deb_maf_mib[0])?
426    ///    .fam(&deb_maf_mib[1], EMPTY_OPTIONS)?
427    ///    .bim(&deb_maf_mib[2], EMPTY_OPTIONS)?
428    ///    .build().await?;
429    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
430    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
431    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
432    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
433    /// ```
434    pub fn bim<I, K, V>(
435        mut self,
436        url: impl AsRef<str>,
437        options: I,
438    ) -> Result<Self, Box<BedErrorPlus>>
439    where
440        I: IntoIterator<Item = (K, V)>,
441        K: AsRef<str>,
442        V: Into<String>,
443    {
444        let cloud_file = CloudFile::new_with_options(url, options)?;
445        self.bim_cloud_file = Some(Some(cloud_file));
446        Ok(self)
447    }
448}
449
450impl From<&CloudFile> for BedCloudBuilder {
451    fn from(cloud_file: &CloudFile) -> Self {
452        Self {
453            cloud_file: Some(cloud_file.clone()), // Cloned here.
454            fam_cloud_file: None,
455            bim_cloud_file: None,
456
457            is_checked_early: None,
458            iid_count: None,
459            sid_count: None,
460
461            metadata: Some(Metadata::new()),
462            skip_set: Some(HashSet::new()),
463        }
464    }
465}
466
467impl From<CloudFile> for BedCloudBuilder {
468    fn from(cloud_file: CloudFile) -> Self {
469        Self {
470            cloud_file: Some(cloud_file), // Cloned here.
471            fam_cloud_file: None,
472            bim_cloud_file: None,
473
474            is_checked_early: None,
475            iid_count: None,
476            sid_count: None,
477
478            metadata: Some(Metadata::new()),
479            skip_set: Some(HashSet::new()),
480        }
481    }
482}
483
484impl BedCloudBuilder {
485    /// Create a [`BedCloud`](struct.BedCloud.html) from the builder.
486    ///
487    /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) for more details and examples.
488    pub async fn build(&self) -> Result<BedCloud, Box<BedErrorPlus>> {
489        let mut bed_cloud = self.build_no_file_check()?;
490
491        // Unwrap is allowed because we can't construct BedCloudBuilder without cloud_file
492        if bed_cloud.is_checked_early {
493            let cloud_file = self.cloud_file.as_ref().unwrap().clone();
494            open_and_check(&cloud_file).await?;
495        }
496
497        (bed_cloud.iid_count, bed_cloud.sid_count) = bed_cloud
498            .metadata
499            .check_counts(bed_cloud.iid_count, bed_cloud.sid_count)?;
500
501        Ok(bed_cloud)
502    }
503
504    /// Override the family id (fid) values found in the .fam file.
505    ///
506    /// By default, if fid values are needed and haven't already been found,
507    /// they will be read from the .fam file.
508    /// Providing them here avoids that file read and provides a way to give different values.
509    #[anyinput]
510    #[must_use]
511    pub fn fid(mut self, fid: AnyIter<AnyString>) -> Self {
512        // Unwrap will always work because BedCloudBuilder starting with some metadata
513        self.metadata.as_mut().unwrap().set_fid(fid);
514        self
515    }
516
517    /// Override the individual id (iid) values found in the .fam file.
518    ///
519    /// By default, if iid values are needed and haven't already been found,
520    /// they will be read from the .fam file.
521    /// Providing them here avoids that file read and provides a way to give different values.
522    /// ```
523    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
524    /// use ndarray as nd;
525    /// use bed_reader::{BedCloud, assert_eq_nan};
526    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
527    /// use bed_reader::ReadOptions;
528    ///
529    /// let mut bed_cloud = BedCloud::builder(url)?
530    ///    .iid(["sample1", "sample2", "sample3"])
531    ///    .build().await?;
532    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["sample1", "sample2", "sample3"]
533    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
534    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
535    /// ```
536    #[anyinput]
537    #[must_use]
538    pub fn iid(mut self, iid: AnyIter<AnyString>) -> Self {
539        // Unwrap will always work because BedCloudBuilder starting with some metadata
540        self.metadata.as_mut().unwrap().set_iid(iid);
541        self
542    }
543
544    /// Override the father values found in the .fam file.
545    ///
546    /// By default, if father values are needed and haven't already been found,
547    /// they will be read from the .fam file.
548    /// Providing them here avoids that file read and provides a way to gi&ve different values.
549    #[anyinput]
550    #[must_use]
551    pub fn father(mut self, father: AnyIter<AnyString>) -> Self {
552        // Unwrap will always work because BedCloudBuilder starting with some metadata
553        self.metadata.as_mut().unwrap().set_father(father);
554        self
555    }
556
557    /// Override the mother values found in the .fam file.
558    ///
559    /// By default, if mother values are needed and haven't already been found,
560    /// they will be read from the .fam file.
561    /// Providing them here avoids that file read and provides a way to give different values.
562    #[anyinput]
563    #[must_use]
564    pub fn mother(mut self, mother: AnyIter<AnyString>) -> Self {
565        // Unwrap will always work because BedCloudBuilder starting with some metadata
566        self.metadata.as_mut().unwrap().set_mother(mother);
567        self
568    }
569
570    /// Override the sex values found in the .fam file.
571    ///
572    /// By default, if sex values are needed and haven't already been found,
573    /// they will be read from the .fam file.
574    /// Providing them here avoids that file read and provides a way to give different values.
575    #[anyinput]
576    #[must_use]
577    pub fn sex(mut self, sex: AnyIter<i32>) -> Self {
578        // Unwrap will always work because BedCloudBuilder starting with some metadata
579        self.metadata.as_mut().unwrap().set_sex(sex);
580        self
581    }
582
583    /// Override the phenotype values found in the .fam file.
584    ///
585    /// Note that the phenotype values in the .fam file are seldom used.
586    /// By default, if phenotype values are needed and haven't already been found,
587    /// they will be read from the .fam file.
588    /// Providing them here avoids that file read and provides a way to give different values.
589    #[anyinput]
590    #[must_use]
591    pub fn pheno(mut self, pheno: AnyIter<AnyString>) -> Self {
592        // Unwrap will always work because BedCloudBuilder starting with some metadata
593        self.metadata.as_mut().unwrap().set_pheno(pheno);
594        self
595    }
596
597    /// Override the chromosome values found in the .bim file.
598    ///
599    /// By default, if chromosome values are needed and haven't already been found,
600    /// they will be read from the .bim file.
601    /// Providing them here avoids that file read and provides a way to give different values.
602    #[anyinput]
603    #[must_use]
604    pub fn chromosome(mut self, chromosome: AnyIter<AnyString>) -> Self {
605        // Unwrap will always work because BedCloudBuilder starting with some metadata
606        self.metadata.as_mut().unwrap().set_chromosome(chromosome);
607        self
608    }
609
610    /// Override the SNP id (sid) values found in the .fam file.
611    ///
612    /// By default, if sid values are needed and haven't already been found,
613    /// they will be read from the .bim file.
614    /// Providing them here avoids that file read and provides a way to give different values.
615    /// ```
616    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
617    /// use ndarray as nd;
618    /// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
619    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
620    ///
621    /// let mut bed_cloud = BedCloud::builder(url)?
622    ///    .sid(["SNP1", "SNP2", "SNP3", "SNP4"])
623    ///    .build().await?;
624    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["SNP1", "SNP2", "SNP3", "SNP4"]
625    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
626    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
627    /// ```
628    #[anyinput]
629    #[must_use]
630    pub fn sid(mut self, sid: AnyIter<AnyString>) -> Self {
631        self.metadata.as_mut().unwrap().set_sid(sid);
632        self
633    }
634
635    /// Override the centimorgan position values found in the .bim file.
636    ///
637    /// By default, if centimorgan position values are needed and haven't already been found,
638    /// they will be read from the .bim file.
639    /// Providing them here avoids that file read and provides a way to give different values.
640    #[anyinput]
641    #[must_use]
642    pub fn cm_position(mut self, cm_position: AnyIter<f32>) -> Self {
643        // Unwrap will always work because BedCloudBuilder starting with some metadata
644        self.metadata.as_mut().unwrap().set_cm_position(cm_position);
645        self
646    }
647
648    /// Override the base-pair position values found in the .bim file.
649    ///
650    /// By default, if base-pair position values are needed and haven't already been found,
651    /// they will be read from the .bim file.
652    /// Providing them here avoids that file read and provides a way to give different values.
653    #[anyinput]
654    #[must_use]
655    pub fn bp_position(mut self, bp_position: AnyIter<i32>) -> Self {
656        // Unwrap will always work because BedCloudBuilder starting with some metadata
657        self.metadata.as_mut().unwrap().set_bp_position(bp_position);
658        self
659    }
660
661    /// Override the allele 1 values found in the .bim file.
662    ///
663    /// By default, if allele 1 values are needed and haven't already been found,
664    /// they will be read from the .bim file.
665    /// Providing them here avoids that file read and provides a way to give different values.
666    #[anyinput]
667    #[must_use]
668    pub fn allele_1(mut self, allele_1: AnyIter<AnyString>) -> Self {
669        // Unwrap will always work because BedCloudBuilder starting with some metadata
670        self.metadata.as_mut().unwrap().set_allele_1(allele_1);
671        self
672    }
673
674    /// Override the allele 2 values found in the .bim file.
675    ///
676    /// By default, if allele 2 values are needed and haven't already been found,
677    /// they will be read from the .bim file.
678    /// Providing them here avoids that file read and provides a way to give different values.
679    #[anyinput]
680    #[must_use]
681    pub fn allele_2(mut self, allele_2: AnyIter<AnyString>) -> Self {
682        // Unwrap will always work because BedCloudBuilder starting with some metadata
683        self.metadata.as_mut().unwrap().set_allele_2(allele_2);
684        self
685    }
686
687    /// Set the number of individuals (samples) in the data.
688    ///
689    /// By default, if this number is needed, it will be found
690    /// and remembered
691    /// by opening the .fam file and quickly counting the number
692    /// of lines. Providing the number thus avoids a file read.
693    #[must_use]
694    pub fn iid_count(mut self, count: usize) -> Self {
695        self.iid_count = Some(Some(count));
696        self
697    }
698
699    /// Set the number of SNPs in the data.
700    ///
701    /// By default, if this number is needed, it will be found
702    /// and remembered
703    /// by opening the .bim file and quickly counting the number
704    /// of lines. Providing the number thus avoids a file read.
705    #[must_use]
706    pub fn sid_count(mut self, count: usize) -> Self {
707        self.sid_count = Some(Some(count));
708        self
709    }
710
711    /// Don't check the header of the .bed file until and unless the file is actually read.
712    ///
713    /// By default, when a [`BedCloud`](struct.BedCloud.html) struct is created, the .bed
714    /// file header is checked. This stops that early check.
715    /// ```
716    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
717    /// # use ndarray as nd;
718    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
719    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
720    /// let mut bed_cloud = BedCloud::builder(url)?.skip_early_check().build().await?;
721    /// let val = bed_cloud.read::<f64>().await?;
722    ///
723    /// assert_eq_nan(
724    ///     &val,
725    ///     &nd::array![
726    ///         [1.0, 0.0, f64::NAN, 0.0],
727    ///         [2.0, 0.0, f64::NAN, 2.0],
728    ///         [0.0, 1.0, 2.0, 0.0]
729    ///     ],
730    /// );
731    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
732    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
733    /// ```
734    #[must_use]
735    pub fn skip_early_check(mut self) -> Self {
736        self.is_checked_early = Some(false);
737        self
738    }
739
740    /// Set the cloud location of the .fam file.
741    ///
742    /// If not set, the .fam file will be assumed
743    /// to have the same location as the .bed file, but with the extension .fam.
744    ///
745    /// # Example:
746    /// Read .bed, .fam, and .bim files with non-standard names.
747    /// ```
748    /// use bed_reader::{BedCloud, ReadOptions, sample_urls, EMPTY_OPTIONS};
749    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
750    /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?;
751    /// let mut bed_cloud = BedCloud::builder(&deb_maf_mib[0])?
752    ///    .fam(&deb_maf_mib[1], EMPTY_OPTIONS)?
753    ///    .bim(&deb_maf_mib[2], EMPTY_OPTIONS)?
754    ///    .build().await?;
755    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
756    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
757    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
758    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
759    /// ```
760    #[must_use]
761    pub fn fam_cloud_file(mut self, cloud_file: &CloudFile) -> Self {
762        self.fam_cloud_file = Some(Some(cloud_file.clone()));
763        self
764    }
765
766    /// Set the cloud location of the .bim file.
767    ///
768    /// If not set, the .bim file will be assumed
769    /// to have the same location as the .bed file, but with the extension .bim.
770    ///
771    /// # Example:
772    /// Read .bed, .fam, and .bim files with non-standard names.
773    /// ```
774    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
775    /// use bed_reader::{BedCloud, ReadOptions, sample_urls, CloudFile};
776    ///
777    /// let deb_maf_mib = sample_urls(["small.deb", "small.maf", "small.mib"])?
778    ///    .iter()
779    ///    .map(|url| CloudFile::new(url))
780    ///    .collect::<Result<Vec<CloudFile>, _>>()?;
781    /// let mut bed_cloud = BedCloud::builder_from_cloud_file(&deb_maf_mib[0])
782    ///    .fam_cloud_file(&deb_maf_mib[1])
783    ///    .bim_cloud_file(&deb_maf_mib[2])
784    ///    .build().await?;
785    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
786    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["sid1", "sid2", "sid3", "sid4"]
787    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
788    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
789    /// ```
790    #[must_use]
791    pub fn bim_cloud_file(mut self, cloud_file: &CloudFile) -> Self {
792        let cloud_file = cloud_file.clone();
793        self.bim_cloud_file = Some(Some(cloud_file));
794        self
795    }
796
797    /// Don't read the fid information from the .fam file.
798    ///
799    /// By default, when the .fam is read, the fid (the family id) is recorded.
800    /// This stops that recording. This is useful if the fid is not needed.
801    /// Asking for the fid after skipping it results in an error.    
802    #[must_use]
803    pub fn skip_fid(mut self) -> Self {
804        // Unwrap will always work because BedCloudBuilder starting with some skip_set
805        self.skip_set.as_mut().unwrap().insert(MetadataFields::Fid);
806        self
807    }
808
809    /// Don't read the iid information from the .fam file.
810    ///
811    /// By default, when the .fam is read, the iid (the individual id) is recorded.
812    /// This stops that recording. This is useful if the iid is not needed.
813    /// Asking for the iid after skipping it results in an error.
814    #[must_use]
815    pub fn skip_iid(mut self) -> Self {
816        // Unwrap will always work because BedCloudBuilder starting with some skip_set
817        self.skip_set.as_mut().unwrap().insert(MetadataFields::Iid);
818        self
819    }
820
821    /// Don't read the father information from the .fam file.
822    ///
823    /// By default, when the .fam is read, the father id is recorded.
824    /// This stops that recording. This is useful if the father id is not needed.
825    /// Asking for the father id after skipping it results in an error.    
826    #[must_use]
827    pub fn skip_father(mut self) -> Self {
828        // Unwrap will always work because BedCloudBuilder starting with some skip_set
829        self.skip_set
830            .as_mut()
831            .unwrap()
832            .insert(MetadataFields::Father);
833        self
834    }
835
836    /// Don't read the mother information from the .fam file.
837    ///
838    /// By default, when the .fam is read, the mother id is recorded.
839    /// This stops that recording. This is useful if the mother id is not needed.
840    /// Asking for the mother id after skipping it results in an error.    
841    #[must_use]
842    pub fn skip_mother(mut self) -> Self {
843        // Unwrap will always work because BedCloudBuilder starting with some skip_set
844        self.skip_set
845            .as_mut()
846            .unwrap()
847            .insert(MetadataFields::Mother);
848        self
849    }
850
851    /// Don't read the sex information from the .fam file.
852    ///
853    /// By default, when the .fam is read, the sex is recorded.
854    /// This stops that recording. This is useful if sex is not needed.
855    /// Asking for sex after skipping it results in an error.    
856    #[must_use]
857    pub fn skip_sex(mut self) -> Self {
858        // Unwrap will always work because BedCloudBuilder starting with some skip_set
859        self.skip_set.as_mut().unwrap().insert(MetadataFields::Sex);
860        self
861    }
862
863    /// Don't read the phenotype information from the .fam file.
864    ///
865    /// Note that the phenotype information in the .fam file is
866    /// seldom used.
867    ///
868    /// By default, when the .fam is read, the phenotype is recorded.
869    /// This stops that recording. This is useful if this phenotype
870    /// information is not needed.
871    /// Asking for the phenotype after skipping it results in an error.    
872    #[must_use]
873    pub fn skip_pheno(mut self) -> Self {
874        // Unwrap will always work because BedCloudBuilder starting with some skip_set
875        self.skip_set
876            .as_mut()
877            .unwrap()
878            .insert(MetadataFields::Pheno);
879        self
880    }
881
882    /// Don't read the chromosome information from the .bim file.
883    ///
884    /// By default, when the .bim is read, the chromosome is recorded.
885    /// This stops that recording. This is useful if the chromosome is not needed.
886    /// Asking for the chromosome after skipping it results in an error.    
887    #[must_use]
888    pub fn skip_chromosome(mut self) -> Self {
889        // Unwrap will always work because BedCloudBuilder starting with some skip_set
890        self.skip_set
891            .as_mut()
892            .unwrap()
893            .insert(MetadataFields::Chromosome);
894        self
895    }
896
897    /// Don't read the SNP id information from the .bim file.
898    ///
899    /// By default, when the .bim is read, the sid (SNP id) is recorded.
900    /// This stops that recording. This is useful if the sid is not needed.
901    /// Asking for the sid after skipping it results in an error.    
902    #[must_use]
903    pub fn skip_sid(mut self) -> Self {
904        // Unwrap will always work because BedCloudBuilder starting with some skip_set
905        self.skip_set.as_mut().unwrap().insert(MetadataFields::Sid);
906        self
907    }
908
909    /// Don't read the centimorgan position information from the .bim file.
910    ///
911    /// By default, when the .bim is read, the cm position is recorded.
912    /// This stops that recording. This is useful if the cm position is not needed.
913    /// Asking for the cm position after skipping it results in an error.    
914    #[must_use]
915    pub fn skip_cm_position(mut self) -> Self {
916        // Unwrap will always work because BedCloudBuilder starting with some skip_set
917        self.skip_set
918            .as_mut()
919            .unwrap()
920            .insert(MetadataFields::CmPosition);
921        self
922    }
923
924    /// Don't read the base-pair position information from the .bim file.
925    ///
926    /// By default, when the .bim is read, the bp position is recorded.
927    /// This stops that recording. This is useful if the bp position is not needed.
928    /// Asking for the cp position after skipping it results in an error.    
929    #[must_use]
930    pub fn skip_bp_position(mut self) -> Self {
931        // Unwrap will always work because BedCloudBuilder starting with some skip_set
932        self.skip_set
933            .as_mut()
934            .unwrap()
935            .insert(MetadataFields::BpPosition);
936        self
937    }
938
939    /// Don't read the allele 1 information from the .bim file.
940    ///
941    /// By default, when the .bim is read, allele 1 is recorded.
942    /// This stops that recording. This is useful if allele 1 is not needed.
943    /// Asking for allele 1 after skipping it results in an error.    
944    #[must_use]
945    pub fn skip_allele_1(mut self) -> Self {
946        // Unwrap will always work because BedCloudBuilder starting with some skip_set
947        self.skip_set
948            .as_mut()
949            .unwrap()
950            .insert(MetadataFields::Allele1);
951        self
952    }
953
954    /// Don't read the allele 2 information from the .bim file.
955    ///
956    /// By default, when the .bim is read, allele 2 is recorded.
957    /// This stops that recording. This is useful if allele 2 is not needed.
958    /// Asking for allele 2 after skipping it results in an error.    
959    #[must_use]
960    pub fn skip_allele_2(mut self) -> Self {
961        // Unwrap will always work because BedCloudBuilder starting with some skip_set
962        self.skip_set
963            .as_mut()
964            .unwrap()
965            .insert(MetadataFields::Allele2);
966        self
967    }
968
969    /// Override the metadata in the .fam and .bim files with info merged in from a [`Metadata`](struct.Metadata.html).
970    ///
971    /// # Example
972    ///
973    /// In the example, we create a [`Metadata`](struct.Metadata.html) with iid
974    /// and sid arrays. Next, we use [`BedCloudBuilder`](struct.BedCloudBuilder.html) to override the fid array
975    /// and an iid array. Then, we add the metadata to the [`BedCloudBuilder`](struct.BedCloudBuilder.html),
976    /// overwriting iid (again) and overriding sid. Finally, we print these
977    /// three arrays and chromosome. Chromosome was never overridden so
978    /// it is read from the *.bim file.
979    ///```
980    /// use ndarray as nd;
981    /// use bed_reader::{BedCloud, Metadata};
982    ///
983    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
984    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
985    /// let metadata = Metadata::builder()
986    ///     .iid(["i1", "i2", "i3"])
987    ///     .sid(["s1", "s2", "s3", "s4"])
988    ///     .build()?;
989    /// let mut bed_cloud = BedCloud::builder(url)?
990    ///     .fid(["f1", "f2", "f3"])
991    ///     .iid(["x1", "x2", "x3"])
992    ///     .metadata(&metadata)
993    ///     .build().await?;
994    /// println!("{0:?}", bed_cloud.fid().await?);  // Outputs ndarray ["f1", "f2", "f3"]
995    /// println!("{0:?}", bed_cloud.iid().await?);  // Outputs ndarray ["i1", "i2", "i3"]
996    /// println!("{0:?}", bed_cloud.sid().await?);  // Outputs ndarray ["s1", "s2", "s3", "s4"]
997    /// println!("{0:?}", bed_cloud.chromosome().await?);  // Outputs ndarray ["1", "1", "5", "Y"]
998    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
999    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1000    /// ```
1001    #[must_use]
1002    pub fn metadata(mut self, metadata: &Metadata) -> Self {
1003        self.metadata = Some(
1004            Metadata::builder()
1005                .metadata(&self.metadata.unwrap()) // unwrap is ok because we know we have metadata
1006                .metadata(metadata) // consistent counts will be check later by the BedCloudBuilder
1007                .build_no_file_check()
1008                .unwrap(), // unwrap is ok because nothing can go wrong
1009        );
1010
1011        self
1012    }
1013}
1014
1015impl BedCloud {
1016    #[allow(clippy::doc_link_with_quotes)]
1017    /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string and cloud options can be given.
1018    ///
1019    /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details specifying a file.
1020    ///
1021    /// You may give [cloud options](supplemental_document_options/index.html#cloud-options) but not
1022    /// [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options) or
1023    /// [`ReadOptions`](supplemental_document_options/index.html#readoptions).
1024    /// See ["Options, Options, Options"](supplemental_document_options/index.html) for details.
1025    ///
1026    /// > Also see [`BedCloud::new`](struct.BedCloud.html#method.new), which does not support cloud options.
1027    /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) and
1028    /// > [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options), which does support
1029    /// > `BedCloud` options.
1030    /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1031    /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1032    /// > see [`Bed`](struct.Bed.html).
1033    ///
1034    /// # Errors
1035    /// URL parsing may return an error.
1036    /// Also, by default, this method will return an error if the file is missing or its header
1037    /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1038    /// for all possible errors.
1039    ///
1040    /// # Examples
1041    /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1042    /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1043    /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1044    ///
1045    /// ```
1046    /// use ndarray as nd;
1047    /// use bed_reader::{BedCloud, assert_eq_nan};
1048    ///
1049    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1050    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1051    /// let cloud_options = [("timeout", "10s")];
1052    /// let mut bed_cloud = BedCloud::new_with_options(url, cloud_options).await?;
1053    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
1054    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
1055    /// let val = bed_cloud.read::<f64>().await?;
1056    ///
1057    /// assert_eq_nan(
1058    ///     &val,
1059    ///     &nd::array![
1060    ///         [1.0, 0.0, f64::NAN, 0.0],
1061    ///         [2.0, 0.0, f64::NAN, 2.0],
1062    ///         [0.0, 1.0, 2.0, 0.0]
1063    ///     ],
1064    /// );
1065    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1066    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1067    /// ```
1068    ///
1069    /// Open the file and read data for one SNP (variant)
1070    /// at index position 2.
1071    /// ```
1072    /// # use ndarray as nd;
1073    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1074    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1075    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1076    /// # let cloud_options = [("timeout", "10s")];
1077    /// let mut bed_cloud = BedCloud::new_with_options(url, cloud_options).await?;
1078    /// let val = ReadOptions::builder().sid_index(2).f64().read_cloud(&mut bed_cloud).await?;
1079    ///
1080    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
1081    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1082    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1083    /// ```
1084    pub async fn new_with_options<I, K, V>(
1085        url: impl AsRef<str>,
1086        cloud_options: I,
1087    ) -> Result<Self, Box<BedErrorPlus>>
1088    where
1089        I: IntoIterator<Item = (K, V)>,
1090        K: AsRef<str>,
1091        V: Into<String>,
1092    {
1093        let cloud_file = CloudFile::new_with_options(url, cloud_options)?;
1094        let bed_cloud = BedCloud::from_cloud_file(&cloud_file).await?;
1095        Ok(bed_cloud)
1096    }
1097
1098    #[allow(clippy::doc_link_with_quotes)]
1099    /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string.
1100    ///
1101    /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details specifying a file.
1102    ///
1103    /// See ["Options, Options, Options"](supplemental_document_options/index.html) for details of the different option types.
1104    ///
1105    /// > Also see [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options), which supports cloud options.
1106    /// > See [`BedCloud::builder`](struct.BedCloud.html#method.builder) and
1107    /// > [`BedCloud::builder_with_options`](struct.BedCloud.html#method.builder_with_options), which does support
1108    /// > `BedCloud` options.
1109    /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1110    /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1111    /// > see [`Bed`](struct.Bed.html).
1112    ///
1113    /// # Errors
1114    /// URL parsing may return an error.
1115    /// Also, by default, this method will return an error if the file is missing or its header
1116    /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1117    /// for all possible errors.
1118    ///
1119    /// # Examples
1120    /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1121    /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1122    /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1123    ///
1124    /// ```
1125    /// use ndarray as nd;
1126    /// use bed_reader::{BedCloud, assert_eq_nan};
1127    ///
1128    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1129    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1130    /// let mut bed_cloud = BedCloud::new(url).await?;
1131    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
1132    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
1133    /// let val = bed_cloud.read::<f64>().await?;
1134    ///
1135    /// assert_eq_nan(
1136    ///     &val,
1137    ///     &nd::array![
1138    ///         [1.0, 0.0, f64::NAN, 0.0],
1139    ///         [2.0, 0.0, f64::NAN, 2.0],
1140    ///         [0.0, 1.0, 2.0, 0.0]
1141    ///     ],
1142    /// );
1143    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1144    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1145    /// ```
1146    ///
1147    /// Open the file and read data for one SNP (variant)
1148    /// at index position 2.
1149    /// ```
1150    /// # use ndarray as nd;
1151    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1152    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1153    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1154    /// let mut bed_cloud = BedCloud::new(url).await?;
1155    /// let val = ReadOptions::builder().sid_index(2).f64().read_cloud(&mut bed_cloud).await?;
1156    ///
1157    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
1158    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1159    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1160    /// ```
1161    pub async fn new(url: impl AsRef<str>) -> Result<Self, Box<BedErrorPlus>> {
1162        let cloud_file = CloudFile::new(url)?;
1163        let bed_cloud = BedCloud::from_cloud_file(&cloud_file).await?;
1164        Ok(bed_cloud)
1165    }
1166
1167    #[allow(clippy::doc_link_with_quotes)]
1168    /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string.
1169    /// Supports [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options) but not
1170    /// [cloud options](supplemental_document_options/index.html#cloud-options).
1171    ///
1172    /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details of specifying a file.
1173    /// See ["Options, Options, Options"](supplemental_document_options/index.html) for an overview of options types.
1174    ///
1175    /// > Also see [`BedCloud::new`](struct.BedCloud.html#method.new) and [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),
1176    /// > which do not support `BedCloud` options.
1177    /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1178    /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1179    /// > see [`Bed`](struct.Bed.html).
1180    ///
1181    /// The `BedCloud` options, [listed here](struct.BedCloudBuilder.html#implementations), can:
1182    ///  * set the cloud location of the .fam and/or .bim file
1183    ///  * override some metadata, for example, replace the individual ids.
1184    ///  * set the number of individuals (samples) or SNPs (variants)
1185    ///  * control checking the validity of the .bed file's header
1186    ///  * skip reading selected metadata
1187    ///
1188    /// # Errors
1189    /// URL parsing may return an error.
1190    /// Also, by default, this method will return an error if the file is missing or its header
1191    /// is ill-formed. It will also return an error if the options contradict each other.
1192    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1193    /// for all possible errors.
1194    ///
1195    /// # Examples
1196    /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1197    /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1198    /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1199    ///
1200    /// ```
1201    /// use ndarray as nd;
1202    /// use bed_reader::{BedCloud, assert_eq_nan};
1203    ///
1204    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1205    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1206    /// let mut bed_cloud = BedCloud::builder(url)?.build().await?;
1207    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1208    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
1209    /// let val = bed_cloud.read::<f64>().await?;
1210    ///
1211    /// assert_eq_nan(
1212    ///     &val,
1213    ///     &nd::array![
1214    ///         [1.0, 0.0, f64::NAN, 0.0],
1215    ///         [2.0, 0.0, f64::NAN, 2.0],
1216    ///         [0.0, 1.0, 2.0, 0.0]
1217    ///     ],
1218    /// );
1219    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1220    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1221    /// ```
1222    ///
1223    /// Replace [`iid`](struct.BedCloud.html#method.iid).
1224    /// ```
1225    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1226    /// # use ndarray as nd;
1227    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1228    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1229    /// let mut bed_cloud = BedCloud::builder(url)?
1230    ///    .iid(["sample1", "sample2", "sample3"])
1231    ///    .build().await?;
1232    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1233    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1234    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1235    /// ```
1236    /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
1237    /// .bim files need never be opened. Use `.skip_early_check()` to avoid opening the
1238    /// .bed before the first read.
1239    /// ```
1240    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1241    /// # use ndarray as nd;
1242    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1243    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1244    /// let mut bed_cloud = BedCloud::builder(url)?
1245    ///     .iid_count(3)
1246    ///     .sid_count(4)
1247    ///     .skip_early_check()
1248    ///     .build()
1249    ///     .await?;
1250    /// let val = bed_cloud.read::<f64>().await?;
1251    ///
1252    /// assert_eq_nan(
1253    ///     &val,
1254    ///     &nd::array![
1255    ///         [1.0, 0.0, f64::NAN, 0.0],
1256    ///         [2.0, 0.0, f64::NAN, 2.0],
1257    ///         [0.0, 1.0, 2.0, 0.0]
1258    ///     ],
1259    /// );
1260    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1261    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1262    /// ```
1263    /// Mark some properties as "don’t read or offer".
1264    /// ```
1265    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1266    /// # use ndarray as nd;
1267    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1268    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1269    /// let mut bed_cloud = BedCloud::builder(url)?
1270    ///     .skip_father()
1271    ///     .skip_mother()
1272    ///     .skip_sex()
1273    ///     .skip_pheno()
1274    ///     .skip_allele_1()
1275    ///     .skip_allele_2()
1276    ///     .build().await?;
1277    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1278    /// bed_cloud.allele_2().await.expect_err("Can't be read");
1279    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1280    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1281    /// ```
1282    pub fn builder(url: impl AsRef<str>) -> Result<BedCloudBuilder, Box<BedErrorPlus>> {
1283        BedCloudBuilder::new(url, EMPTY_OPTIONS)
1284    }
1285
1286    #[allow(clippy::doc_link_with_quotes)]
1287    /// Attempts to open a PLINK .bed file in the cloud for reading. The file is specified with a URL string and cloud options can be given.
1288    /// Supports both [cloud options](supplemental_document_options/index.html#cloud-options) and
1289    /// [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options).
1290    ///
1291    /// See ["Cloud URLs and `CloudFile` Examples"](supplemental_document_cloud_urls/index.html) for details of specifying a file.
1292    /// See ["Options, Options, Options"](supplemental_document_options/index.html) for an overview of options types.
1293    ///
1294    /// > Also see [`BedCloud::new`](struct.BedCloud.html#method.new) and [`BedCloud::new_with_options`](struct.BedCloud.html#method.new_with_options),
1295    /// > which do not support `BedCloud` options.
1296    /// > Alternatively, you can use [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file)
1297    /// > to specify the cloud file via an [`CloudFile`](struct.CloudFile.html). For reading local files,
1298    /// > see [`Bed`](struct.Bed.html).
1299    ///
1300    /// The `BedCloud` options, [listed here](struct.BedCloudBuilder.html#implementations), can:
1301    ///  * set the cloud location of the .fam and/or .bim file
1302    ///  * override some metadata, for example, replace the individual ids.
1303    ///  * set the number of individuals (samples) or SNPs (variants)
1304    ///  * control checking the validity of the .bed file's header
1305    ///  * skip reading selected metadata
1306    ///
1307    /// # Errors
1308    /// URL parsing may return an error.
1309    /// Also, by default, this method will return an error if the file is missing or its header
1310    /// is ill-formed. It will also return an error if the options contradict each other.
1311    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1312    /// for all possible errors.
1313    ///
1314    /// # Examples
1315    /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1316    /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1317    /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1318    ///
1319    /// ```
1320    /// use ndarray as nd;
1321    /// use bed_reader::{BedCloud, assert_eq_nan};
1322    ///
1323    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1324    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1325    /// let cloud_options = [("timeout", "10s")];
1326    /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?.build().await?;
1327    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1328    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
1329    /// let val = bed_cloud.read::<f64>().await?;
1330    ///
1331    /// assert_eq_nan(
1332    ///     &val,
1333    ///     &nd::array![
1334    ///         [1.0, 0.0, f64::NAN, 0.0],
1335    ///         [2.0, 0.0, f64::NAN, 2.0],
1336    ///         [0.0, 1.0, 2.0, 0.0]
1337    ///     ],
1338    /// );
1339    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1340    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1341    /// ```
1342    ///
1343    /// Replace [`iid`](struct.BedCloud.html#method.iid).
1344    /// ```
1345    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1346    /// # use ndarray as nd;
1347    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1348    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1349    /// # let cloud_options = [("timeout", "10s")];
1350    /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?
1351    ///    .iid(["sample1", "sample2", "sample3"])
1352    ///    .build().await?;
1353    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["sample1", "sample2", "sample3"]
1354    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1355    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1356    /// ```
1357    /// Give the number of individuals (samples) and SNPs (variants) so that the .fam and
1358    /// .bim files need never be opened. Use `.skip_early_check()` to avoid opening the
1359    /// .bed before the first read.
1360    /// ```
1361    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1362    /// # use ndarray as nd;
1363    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1364    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1365    /// # let cloud_options = [("timeout", "10s")];
1366    /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?
1367    ///     .iid_count(3)
1368    ///     .sid_count(4)
1369    ///     .skip_early_check()
1370    ///     .build()
1371    ///     .await?;
1372    /// let val = bed_cloud.read::<f64>().await?;
1373    ///
1374    /// assert_eq_nan(
1375    ///     &val,
1376    ///     &nd::array![
1377    ///         [1.0, 0.0, f64::NAN, 0.0],
1378    ///         [2.0, 0.0, f64::NAN, 2.0],
1379    ///         [0.0, 1.0, 2.0, 0.0]
1380    ///     ],
1381    /// );
1382    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1383    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1384    /// ```
1385    /// Mark some properties as "don’t read or offer".
1386    /// ```
1387    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1388    /// # use ndarray as nd;
1389    /// # use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1390    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1391    /// # let cloud_options = [("timeout", "10s")];
1392    /// let mut bed_cloud = BedCloud::builder_with_options(url, cloud_options)?
1393    ///     .skip_father()
1394    ///     .skip_mother()
1395    ///     .skip_sex()
1396    ///     .skip_pheno()
1397    ///     .skip_allele_1()
1398    ///     .skip_allele_2()
1399    ///     .build().await?;
1400    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1401    /// bed_cloud.allele_2().await.expect_err("Can't be read");
1402    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1403    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1404    /// ```
1405    pub fn builder_with_options<I, K, V>(
1406        url: impl AsRef<str>,
1407        options: I,
1408    ) -> Result<BedCloudBuilder, Box<BedErrorPlus>>
1409    where
1410        I: IntoIterator<Item = (K, V)>,
1411        K: AsRef<str>,
1412        V: Into<String>,
1413    {
1414        BedCloudBuilder::new(url, options)
1415    }
1416}
1417
1418impl BedCloud {
1419    /// Attempts to open a PLINK .bed file in the cloud for reading. Specify the file with an [`CloudFile`](https://docs.rs/cloud-file/).
1420    /// Supports [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options).
1421    ///
1422    /// > Alternatively, you can use [`BedCloud::new`](struct.BedCloud.html#method.new) or [`BedCloud::builder`](struct.BedCloud.html#method.builder)
1423    /// > to specify the cloud file via a URL string. For reading local files,
1424    /// > see [`Bed`](struct.Bed.html).
1425    ///
1426    /// The `BedCloud` options, [listed here](struct.BedCloudBuilder.html#implementations), can:
1427    ///  * set the cloud location of the .fam and/or .bim file
1428    ///  * override some metadata, for example, replace the individual ids.
1429    ///  * set the number of individuals (samples) or SNPs (variants)
1430    ///  * control checking the validity of the .bed file's header
1431    ///  * skip reading selected metadata
1432    ///
1433    /// # Errors
1434    /// By default, this method will return an error if the file is missing or its header
1435    /// is ill-formed. It will also return an error if the options contradict each other.
1436    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1437    /// for all possible errors.
1438    ///
1439    /// # Examples
1440    /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1441    /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1442    /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1443    ///
1444    /// ```
1445    /// use ndarray as nd;
1446    /// use bed_reader::{BedCloud, assert_eq_nan};
1447    /// use cloud_file::CloudFile;
1448    ///
1449    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1450    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1451    /// let cloud_file = CloudFile::new(url)?;
1452    /// let mut bed_cloud = BedCloud::builder_from_cloud_file(&cloud_file).build().await?;
1453    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray ["iid1", "iid2", "iid3"]
1454    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray ["snp1", "snp2", "snp3", "snp4"]
1455    /// let val = bed_cloud.read::<f64>().await?;
1456    ///
1457    /// assert_eq_nan(
1458    ///     &val,
1459    ///     &nd::array![
1460    ///         [1.0, 0.0, f64::NAN, 0.0],
1461    ///         [2.0, 0.0, f64::NAN, 2.0],
1462    ///         [0.0, 1.0, 2.0, 0.0]
1463    ///     ],
1464    /// );
1465    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1466    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1467    /// ```
1468    #[must_use]
1469    pub fn builder_from_cloud_file(cloud_file: &CloudFile) -> BedCloudBuilder {
1470        BedCloudBuilder::from(cloud_file)
1471    }
1472
1473    /// Attempts to open a PLINK .bed file in the cloud for reading. Specify the file with an [`CloudFile`].
1474    ///
1475    /// You may not give
1476    /// [`BedCloud` options](supplemental_document_options/index.html#bedbedcloud-options).
1477    /// See [`BedCloud::builder_from_cloud_file`](struct.BedCloud.html#method.builder_from_cloud_file), which does support
1478    /// `BedCloud` options.
1479    ///
1480    /// > Also see, [`BedCloud::new`](struct.BedCloud.html#method.new) and [`BedCloud::builder`](struct.BedCloud.html#method.builder)
1481    /// > to specify the cloud file via a URL string. For reading local files,
1482    /// > see [`Bed`](struct.Bed.html).
1483    ///
1484    /// # Errors
1485    /// By default, this method will return an error if the file is missing or its header
1486    /// is ill-formed. See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
1487    /// for all possible errors.
1488    ///
1489    /// # Examples
1490    /// List individual (sample) [`iid`](struct.BedCloud.html#method.iid) and
1491    /// SNP (variant) [`sid`](struct.BedCloud.html#method.sid),
1492    /// then [`read`](struct.BedCloud.html#method.read) the whole file.
1493    ///
1494    /// ```
1495    /// use ndarray as nd;
1496    /// use bed_reader::{BedCloud, assert_eq_nan};
1497    /// use cloud_file::CloudFile;
1498    ///
1499    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1500    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1501    /// let cloud_file = CloudFile::new(url)?;
1502    /// let mut bed_cloud = BedCloud::from_cloud_file(&cloud_file).await?;
1503    /// println!("{:?}", bed_cloud.iid().await?); // Outputs ndarray: ["iid1", "iid2", "iid3"]
1504    /// println!("{:?}", bed_cloud.sid().await?); // Outputs ndarray: ["sid1", "sid2", "sid3", "sid4"]
1505    /// let val = bed_cloud.read::<f64>().await?;
1506    ///
1507    /// assert_eq_nan(
1508    ///     &val,
1509    ///     &nd::array![
1510    ///         [1.0, 0.0, f64::NAN, 0.0],
1511    ///         [2.0, 0.0, f64::NAN, 2.0],
1512    ///         [0.0, 1.0, 2.0, 0.0]
1513    ///     ],
1514    /// );
1515    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1516    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1517    /// ```
1518    pub async fn from_cloud_file(cloud_file: &CloudFile) -> Result<Self, Box<BedErrorPlus>> {
1519        BedCloudBuilder::from(cloud_file).build().await
1520    }
1521
1522    /// Number of individuals (samples)
1523    ///
1524    /// If this number is needed, it will be found
1525    /// by opening the .fam file and quickly counting the number
1526    /// of lines. Once found, the number will be remembered.
1527    /// The file read can be avoided by setting the
1528    /// number with [`BedCloudBuilder::iid_count`](struct.BedCloudBuilder.html#method.iid_count)
1529    /// or, for example, [`BedCloudBuilder::iid`](struct.BedCloudBuilder.html#method.iid).
1530    ///
1531    /// # Example:
1532    /// ```
1533    /// use ndarray as nd;
1534    /// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1535    ///
1536    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1537    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1538    /// let mut bed_cloud = BedCloud::new(url).await?;
1539    /// let iid_count = bed_cloud.iid_count().await?;
1540    ///
1541    /// assert!(iid_count == 3);
1542    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1543    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1544    pub async fn iid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
1545        if let Some(iid_count) = self.iid_count {
1546            Ok(iid_count)
1547        } else {
1548            let fam_cloud_file = self.fam_cloud_file()?;
1549            let iid_count = fam_cloud_file.count_lines().await?;
1550            self.iid_count = Some(iid_count);
1551            Ok(iid_count)
1552        }
1553    }
1554
1555    /// Number of SNPs (variants)
1556    ///
1557    /// If this number is needed, it will be found
1558    /// by opening the .bim file and quickly counting the number
1559    /// of lines. Once found, the number will be remembered.
1560    /// The file read can be avoided by setting the
1561    /// number with [`BedCloudBuilder::sid_count`](struct.BedCloudBuilder.html#method.sid_count)
1562    /// or, for example, [`BedCloudBuilder::sid`](struct.BedCloudBuilder.html#method.sid).
1563    ///
1564    /// # Example:
1565    /// ```
1566    /// use ndarray as nd;
1567    /// use bed_reader::{BedCloud, ReadOptions, assert_eq_nan};
1568    ///
1569    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1570    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1571    /// let mut bed_cloud = BedCloud::new(url).await?;
1572    /// let sid_count = bed_cloud.sid_count().await?;
1573    ///
1574    /// assert!(sid_count == 4);
1575    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1576    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1577    pub async fn sid_count(&mut self) -> Result<usize, Box<BedErrorPlus>> {
1578        if let Some(sid_count) = self.sid_count {
1579            Ok(sid_count)
1580        } else {
1581            let bim_cloud_file = self.bim_cloud_file()?;
1582            let sid_count = bim_cloud_file.count_lines().await?;
1583            self.sid_count = Some(sid_count);
1584            Ok(sid_count)
1585        }
1586    }
1587
1588    /// Number of individuals (samples) and SNPs (variants)
1589    ///
1590    /// If these numbers aren't known, they will be found
1591    /// by opening the .fam and .bim files and quickly counting the number
1592    /// of lines. Once found, the numbers will be remembered.
1593    /// The file read can be avoided by setting the
1594    /// number with [`BedCloudBuilder::iid_count`](struct.BedCloudBuilder.html#method.iid_count)
1595    /// and [`BedCloudBuilder::sid_count`](struct.BedCloudBuilder.html#method.sid_count).
1596    ///
1597    /// # Example:
1598    /// ```
1599    /// use ndarray as nd;
1600    /// use bed_reader::{BedCloud, ReadOptions};
1601    /// use bed_reader::assert_eq_nan;
1602    ///
1603    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1604    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1605    /// let mut bed_cloud = BedCloud::new(url).await?;
1606    /// let dim = bed_cloud.dim().await?;
1607    ///
1608    /// assert!(dim == (3,4));
1609    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1610    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1611    // LATER: Could these be called at the same time, async?
1612    pub async fn dim(&mut self) -> Result<(usize, usize), Box<BedErrorPlus>> {
1613        Ok((self.iid_count().await?, self.sid_count().await?))
1614    }
1615
1616    /// Family id of each of individual (sample)
1617    ///
1618    /// If this ndarray is needed, it will be found
1619    /// by reading the .fam file. Once found, this ndarray
1620    /// and other information in the .fam file will be remembered.
1621    /// The file read can be avoided by setting the
1622    /// array with [`BedCloudBuilder::fid`](struct.BedCloudBuilder.html#method.fid).
1623    ///
1624    /// # Example:
1625    /// ```
1626    /// use ndarray as nd;
1627    /// use bed_reader::{BedCloud, ReadOptions};
1628    /// use bed_reader::assert_eq_nan;
1629    ///
1630    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1631    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1632    /// let mut bed_cloud = BedCloud::new(url).await?;
1633    /// let fid = bed_cloud.fid().await?;
1634    /// println!("{fid:?}"); // Outputs ndarray ["fid1", "fid1", "fid2"]
1635    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1636    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1637    pub async fn fid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1638        self.unlazy_fam::<String>(self.metadata.fid.is_none(), MetadataFields::Fid, "fid")
1639            .await?;
1640        Ok(self.metadata.fid.as_ref().unwrap()) //unwrap always works because of lazy_fam
1641    }
1642
1643    /// Individual id of each of individual (sample)
1644    ///
1645    /// If this ndarray is needed, it will be found
1646    /// by reading the .fam file. Once found, this ndarray
1647    /// and other information in the .fam file will be remembered.
1648    /// The file read can be avoided by setting the
1649    /// array with [`BedCloudBuilder::iid`](struct.BedCloudBuilder.html#method.iid).
1650    ///
1651    /// # Example:
1652    /// ```
1653    /// use ndarray as nd;
1654    /// use bed_reader::{BedCloud, ReadOptions};
1655    /// use bed_reader::assert_eq_nan;
1656    ///
1657    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1658    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1659    /// let mut bed_cloud = BedCloud::new(url).await?;
1660    /// let iid = bed_cloud.iid().await?;    ///
1661    /// println!("{iid:?}"); // Outputs ndarray ["iid1", "iid2", "iid3"]
1662    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1663    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1664    pub async fn iid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1665        self.unlazy_fam::<String>(self.metadata.iid.is_none(), MetadataFields::Iid, "iid")
1666            .await?;
1667        Ok(self.metadata.iid.as_ref().unwrap()) //unwrap always works because of lazy_fam
1668    }
1669
1670    /// Father id of each of individual (sample)
1671    ///
1672    /// If this ndarray is needed, it will be found
1673    /// by reading the .fam file. Once found, this ndarray
1674    /// and other information in the .fam file will be remembered.
1675    /// The file read can be avoided by setting the
1676    /// array with [`BedCloudBuilder::father`](struct.BedCloudBuilder.html#method.father).
1677    ///
1678    /// # Example:
1679    /// ```
1680    /// use ndarray as nd;
1681    /// use bed_reader::{BedCloud, ReadOptions};
1682    /// use bed_reader::assert_eq_nan;
1683    ///
1684    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1685    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1686    /// let mut bed_cloud = BedCloud::new(url).await?;
1687    /// let father = bed_cloud.father().await?;
1688    /// println!("{father:?}"); // Outputs ndarray ["iid23", "iid23", "iid22"]
1689    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1690    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1691    pub async fn father(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1692        self.unlazy_fam::<String>(
1693            self.metadata.father.is_none(),
1694            MetadataFields::Father,
1695            "father",
1696        )
1697        .await?;
1698        Ok(self.metadata.father.as_ref().unwrap()) //unwrap always works because of lazy_fam
1699    }
1700
1701    /// Mother id of each of individual (sample)
1702    ///
1703    /// If this ndarray is needed, it will be found
1704    /// by reading the .fam file. Once found, this ndarray
1705    /// and other information in the .fam file will be remembered.
1706    /// The file read can be avoided by setting the
1707    /// array with [`BedCloudBuilder::mother`](struct.BedCloudBuilder.html#method.mother).
1708    ///
1709    /// # Example:
1710    /// ```
1711    /// use ndarray as nd;
1712    /// use bed_reader::{BedCloud, ReadOptions};
1713    /// use bed_reader::assert_eq_nan;
1714    ///
1715    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1716    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1717    /// let mut bed_cloud = BedCloud::new(url).await?;
1718    /// let mother = bed_cloud.mother().await?;
1719    /// println!("{mother:?}"); // Outputs ndarray ["iid34", "iid34", "iid33"]
1720    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1721    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1722    pub async fn mother(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1723        self.unlazy_fam::<String>(
1724            self.metadata.mother.is_none(),
1725            MetadataFields::Mother,
1726            "mother",
1727        )
1728        .await?;
1729        Ok(self.metadata.mother.as_ref().unwrap()) //unwrap always works because of lazy_fam
1730    }
1731
1732    /// Sex each of individual (sample)
1733    ///
1734    /// 0 is unknown, 1 is male, 2 is female
1735    ///
1736    /// If this ndarray is needed, it will be found
1737    /// by reading the .fam file. Once found, this ndarray
1738    /// and other information in the .fam file will be remembered.
1739    /// The file read can be avoided by setting the
1740    /// array with [`BedCloudBuilder::sex`](struct.BedCloudBuilder.html#method.sex).
1741    ///
1742    /// # Example:
1743    /// ```
1744    /// use ndarray as nd;
1745    /// use bed_reader::{BedCloud, ReadOptions};
1746    /// use bed_reader::assert_eq_nan;
1747    ///
1748    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1749    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1750    /// let mut bed_cloud = BedCloud::new(url).await?;
1751    /// let sex = bed_cloud.sex().await?;
1752    /// println!("{sex:?}"); // Outputs ndarray [1, 2, 0]
1753    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1754    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1755    pub async fn sex(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
1756        self.unlazy_fam::<String>(self.metadata.sex.is_none(), MetadataFields::Sex, "sex")
1757            .await?;
1758        Ok(self.metadata.sex.as_ref().unwrap()) //unwrap always works because of lazy_fam
1759    }
1760
1761    /// A phenotype for each individual (seldom used)
1762    ///
1763    /// If this ndarray is needed, it will be found
1764    /// by reading the .fam file. Once found, this ndarray
1765    /// and other information in the .fam file will be remembered.
1766    /// The file read can be avoided by setting the
1767    /// array with [`BedCloudBuilder::pheno`](struct.BedCloudBuilder.html#method.pheno).
1768    ///
1769    /// # Example:
1770    /// ```
1771    /// use ndarray as nd;
1772    /// use bed_reader::{BedCloud, ReadOptions};
1773    /// use bed_reader::assert_eq_nan;
1774    ///
1775    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1776    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1777    /// let mut bed_cloud = BedCloud::new(url).await?;
1778    /// let pheno = bed_cloud.pheno().await?;
1779    /// println!("{pheno:?}"); // Outputs ndarray ["red", "red", "blue"]
1780    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1781    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1782    pub async fn pheno(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1783        self.unlazy_fam::<String>(
1784            self.metadata.pheno.is_none(),
1785            MetadataFields::Pheno,
1786            "pheno",
1787        )
1788        .await?;
1789        Ok(self.metadata.pheno.as_ref().unwrap()) //unwrap always works because of lazy_fam
1790    }
1791
1792    /// Chromosome of each SNP (variant)
1793    ///
1794    /// If this ndarray is needed, it will be found
1795    /// by reading the .bim file. Once found, this ndarray
1796    /// and other information in the .bim file will be remembered.
1797    /// The file read can be avoided by setting the
1798    /// array with [`BedCloudBuilder::chromosome`](struct.BedCloudBuilder.html#method.chromosome).
1799    ///
1800    /// # Example:
1801    /// ```
1802    /// use ndarray as nd;
1803    /// use bed_reader::{BedCloud, ReadOptions};
1804    /// use bed_reader::assert_eq_nan;
1805    ///
1806    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1807    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1808    /// let mut bed_cloud = BedCloud::new(url).await?;
1809    /// let chromosome = bed_cloud.chromosome().await?;
1810    /// println!("{chromosome:?}"); // Outputs ndarray ["1", "1", "5", "Y"]
1811    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1812    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1813    /// ```
1814    pub async fn chromosome(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1815        self.unlazy_bim::<String>(
1816            self.metadata.chromosome.is_none(),
1817            MetadataFields::Chromosome,
1818            "chromosome",
1819        )
1820        .await?;
1821        Ok(self.metadata.chromosome.as_ref().unwrap()) //unwrap always works because of lazy_bim
1822    }
1823
1824    /// SNP id of each SNP (variant)
1825    ///
1826    /// If this ndarray is needed, it will be found
1827    /// by reading the .bim file. Once found, this ndarray
1828    /// and other information in the .bim file will be remembered.
1829    /// The file read can be avoided by setting the
1830    /// array with [`BedCloudBuilder::sid`](struct.BedCloudBuilder.html#method.sid).
1831    ///
1832    /// # Example:
1833    /// ```
1834    /// use ndarray as nd;
1835    /// use bed_reader::{BedCloud, ReadOptions};
1836    /// use bed_reader::assert_eq_nan;
1837    ///
1838    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1839    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1840    /// let mut bed_cloud = BedCloud::new(url).await?;
1841    /// let sid = bed_cloud.sid().await?;
1842    /// println!("{sid:?}"); // Outputs ndarray "sid1", "sid2", "sid3", "sid4"]
1843    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1844    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1845    pub async fn sid(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1846        self.unlazy_bim::<String>(self.metadata.sid.is_none(), MetadataFields::Sid, "sid")
1847            .await?;
1848        Ok(self.metadata.sid.as_ref().unwrap()) //unwrap always works because of lazy_bim
1849    }
1850
1851    /// Centimorgan position of each SNP (variant)
1852    ///
1853    /// If this ndarray is needed, it will be found
1854    /// by reading the .bim file. Once found, this ndarray
1855    /// and other information in the .bim file will be remembered.
1856    /// The file read can be avoided by setting the
1857    /// array with [`BedCloudBuilder::cm_position`](struct.BedCloudBuilder.html#method.cm_position).
1858    ///
1859    /// # Example:
1860    /// ```
1861    /// use ndarray as nd;
1862    /// use bed_reader::{BedCloud, ReadOptions};
1863    /// use bed_reader::assert_eq_nan;
1864    ///
1865    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1866    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1867    /// let mut bed_cloud = BedCloud::new(url).await?;
1868    /// let cm_position = bed_cloud.cm_position().await?;
1869    /// println!("{cm_position:?}"); // Outputs ndarray [100.4, 2000.5, 4000.7, 7000.9]
1870    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1871    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1872    pub async fn cm_position(&mut self) -> Result<&nd::Array1<f32>, Box<BedErrorPlus>> {
1873        self.unlazy_bim::<String>(
1874            self.metadata.cm_position.is_none(),
1875            MetadataFields::CmPosition,
1876            "cm_position",
1877        )
1878        .await?;
1879        Ok(self.metadata.cm_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
1880    }
1881
1882    /// Base-pair position of each SNP (variant)
1883    ///
1884    /// If this ndarray is needed, it will be found
1885    /// by reading the .bim file. Once found, this ndarray
1886    /// and other information in the .bim file will be remembered.
1887    /// The file read can be avoided by setting the
1888    /// array with [`BedCloudBuilder::bp_position`](struct.BedCloudBuilder.html#method.bp_position).
1889    ///
1890    /// # Example:
1891    /// ```
1892    /// use ndarray as nd;
1893    /// use bed_reader::{BedCloud, ReadOptions};
1894    /// use bed_reader::assert_eq_nan;
1895    ///
1896    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1897    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1898    /// let mut bed_cloud = BedCloud::new(url).await?;
1899    /// let bp_position = bed_cloud.bp_position().await?;
1900    /// println!("{bp_position:?}"); // Outputs ndarray [1, 100, 1000, 1004]
1901    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1902    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1903    pub async fn bp_position(&mut self) -> Result<&nd::Array1<i32>, Box<BedErrorPlus>> {
1904        self.unlazy_bim::<String>(
1905            self.metadata.bp_position.is_none(),
1906            MetadataFields::BpPosition,
1907            "bp_position",
1908        )
1909        .await?;
1910        Ok(self.metadata.bp_position.as_ref().unwrap()) //unwrap always works because of lazy_bim
1911    }
1912
1913    /// First allele of each SNP (variant)
1914    ///
1915    /// If this ndarray is needed, it will be found
1916    /// by reading the .bim file. Once found, this ndarray
1917    /// and other information in the .bim file will be remembered.
1918    /// The file read can be avoided by setting the
1919    /// array with [`BedCloudBuilder::allele_1`](struct.BedCloudBuilder.html#method.allele_1).
1920    ///
1921    /// # Example:
1922    /// ```
1923    /// use ndarray as nd;
1924    /// use bed_reader::{BedCloud, ReadOptions};
1925    /// use bed_reader::assert_eq_nan;
1926    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1927    ///
1928    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1929    /// let mut bed_cloud = BedCloud::new(url).await?;
1930    /// let allele_1 = bed_cloud.allele_1().await?;
1931    /// println!("{allele_1:?}"); // Outputs ndarray ["A", "T", "A", "T"]
1932    /// # let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1933    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1934    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1935    pub async fn allele_1(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1936        self.unlazy_bim::<String>(
1937            self.metadata.allele_1.is_none(),
1938            MetadataFields::Allele1,
1939            "allele_1",
1940        )
1941        .await?;
1942        Ok(self.metadata.allele_1.as_ref().unwrap()) //unwrap always works because of lazy_bim
1943    }
1944
1945    /// Second allele of each SNP (variant)
1946    ///
1947    /// If this ndarray is needed, it will be found
1948    /// by reading the .bim file. Once found, this ndarray
1949    /// and other information in the .bim file will be remembered.
1950    /// The file read can be avoided by setting the
1951    /// array with [`BedCloudBuilder::allele_2`](struct.BedCloudBuilder.html#method.allele_2).
1952    ///
1953    /// # Example:
1954    /// ```
1955    /// use ndarray as nd;
1956    /// use bed_reader::{BedCloud, ReadOptions};
1957    /// use bed_reader::assert_eq_nan;
1958    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1959    ///
1960    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1961    /// let mut bed_cloud = BedCloud::new(url).await?;
1962    /// let allele_2 = bed_cloud.allele_2().await?;
1963    /// println!("{allele_2:?}"); // Outputs ndarray ["A", "C", "C", "G"]
1964    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1965    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1966    /// ```        
1967    pub async fn allele_2(&mut self) -> Result<&nd::Array1<String>, Box<BedErrorPlus>> {
1968        self.unlazy_bim::<String>(
1969            self.metadata.allele_2.is_none(),
1970            MetadataFields::Allele2,
1971            "allele_2",
1972        )
1973        .await?;
1974        Ok(self.metadata.allele_2.as_ref().unwrap()) //unwrap always works because of lazy_bim
1975    }
1976
1977    /// [`Metadata`](struct.Metadata.html) for this dataset, for example, the individual (sample) Ids.
1978    ///
1979    /// This returns a struct with 12 fields. Each field is a ndarray.
1980    /// The struct will always be new, but the 12 ndarrays will be
1981    /// shared with this [`BedCloud`](struct.BedCloud.html).
1982    ///
1983    /// If the needed, the metadata will be read from the .fam and/or .bim files.
1984    /// ```
1985    /// use ndarray as nd;
1986    /// use bed_reader::{BedCloud};
1987    ///
1988    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
1989    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
1990    /// let mut bed_cloud = BedCloud::new(url).await?;
1991    /// let metadata = bed_cloud.metadata().await?;
1992    /// println!("{0:?}", metadata.iid()); // Outputs Some(["iid1", "iid2", "iid3"] ...)
1993    /// println!("{0:?}", metadata.sid()); // Outputs Some(["sid1", "sid2", "sid3", "sid4"] ...)
1994    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
1995    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
1996    pub async fn metadata(&mut self) -> Result<Metadata, Box<BedErrorPlus>> {
1997        self.fam().await?;
1998        self.bim().await?;
1999        Ok(self.metadata.clone())
2000    }
2001
2002    /// Return the `CloudFile` of the .bed file.
2003    #[must_use]
2004    pub fn cloud_file(&self) -> CloudFile {
2005        self.cloud_file.clone()
2006    }
2007
2008    /// Return the cloud location of the .fam file.
2009    pub fn fam_cloud_file(&mut self) -> Result<CloudFile, Box<BedErrorPlus>> {
2010        // We need to clone the cloud_file because self might mutate later
2011        if let Some(fam_cloud_file) = &self.fam_cloud_file {
2012            Ok(fam_cloud_file.clone())
2013        } else {
2014            let fam_cloud_file = to_metadata_path(&self.cloud_file, &self.fam_cloud_file, "fam")?;
2015            self.fam_cloud_file = Some(fam_cloud_file.clone());
2016            Ok(fam_cloud_file)
2017        }
2018    }
2019
2020    /// Return the cloud location of the .bim file.
2021    pub fn bim_cloud_file(&mut self) -> Result<CloudFile, Box<BedErrorPlus>> {
2022        // We need to clone the cloud_file because self might mutate later
2023        if let Some(bim_cloud_file) = &self.bim_cloud_file {
2024            Ok(bim_cloud_file.clone())
2025        } else {
2026            let bim_cloud_file = to_metadata_path(&self.cloud_file, &self.bim_cloud_file, "bim")?;
2027            self.bim_cloud_file = Some(bim_cloud_file.clone());
2028            Ok(bim_cloud_file)
2029        }
2030    }
2031
2032    /// Read genotype data.
2033    ///
2034    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder) which supports selection and options.
2035    ///
2036    /// # Errors
2037    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2038    /// for all possible errors.
2039    ///
2040    /// # Examples
2041    /// Read all data in a .bed file.
2042    ///
2043    /// ```
2044    /// use ndarray as nd;
2045    /// use bed_reader::{BedCloud, ReadOptions};
2046    /// use bed_reader::assert_eq_nan;
2047    ///
2048    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2049    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2050    /// let mut bed_cloud = BedCloud::new(url).await?;
2051    /// let val = bed_cloud.read::<f64>().await?;
2052    ///
2053    /// assert_eq_nan(
2054    ///     &val,
2055    ///     &nd::array![
2056    ///         [1.0, 0.0, f64::NAN, 0.0],
2057    ///         [2.0, 0.0, f64::NAN, 2.0],
2058    ///         [0.0, 1.0, 2.0, 0.0]
2059    ///     ],
2060    /// );
2061    ///
2062    /// // Your output array can be f32, f64, or i8
2063    /// let val = bed_cloud.read::<i8>().await?;
2064    /// assert_eq_nan(
2065    ///     &val,
2066    ///     &nd::array![
2067    ///         [1, 0, -127, 0],
2068    ///         [2, 0, -127, 2],
2069    ///         [0, 1, 2, 0]
2070    ///     ],
2071    /// );
2072    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2073    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2074    /// ```    
2075    pub async fn read<TVal: BedVal>(&mut self) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2076        let read_options = ReadOptions::<TVal>::builder().build()?;
2077        self.read_with_options(&read_options).await
2078    }
2079
2080    /// Read genotype data with options, into a preallocated array.
2081    ///
2082    /// > Also see [`ReadOptionsBuilder::read_and_fill`](struct.ReadOptionsBuilder.html#method.read_and_fill).
2083    ///
2084    /// Note that options [`ReadOptions::f`](struct.ReadOptions.html#method.f),
2085    /// [`ReadOptions::c`](struct.ReadOptions.html#method.c), and [`ReadOptions::is_f`](struct.ReadOptionsBuilder.html#method.is_f)
2086    /// are ignored. Instead, the order of the preallocated array is used.
2087    ///
2088    /// # Errors
2089    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2090    /// for all possible errors.
2091    ///
2092    /// # Example
2093    ///
2094    /// ```
2095    /// use ndarray as nd;
2096    /// use bed_reader::{BedCloud, ReadOptions};
2097    /// use bed_reader::assert_eq_nan;
2098    ///
2099    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2100    /// // Read the SNPs indexed by 2.
2101    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2102    /// let mut bed_cloud = BedCloud::new(url).await?;
2103    /// let read_options = ReadOptions::builder().sid_index(2).build()?;
2104    /// let mut val = nd::Array2::<f64>::default((3, 1));
2105    /// bed_cloud.read_and_fill_with_options(&mut val.view_mut(), &read_options).await?;
2106    ///
2107    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2108    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2109    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2110    /// ```  
2111    #[allow(clippy::similar_names)]
2112    pub async fn read_and_fill_with_options<TVal: BedVal>(
2113        &mut self,
2114        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2115        read_options: &ReadOptions<TVal>,
2116    ) -> Result<(), Box<BedErrorPlus>> {
2117        // must do these one-at-a-time because they mutate self to cache the results
2118        let iid_count = self.iid_count().await?;
2119        let sid_count = self.sid_count().await?;
2120
2121        let max_concurrent_requests =
2122            compute_max_concurrent_requests(read_options.max_concurrent_requests)?;
2123
2124        let max_chunk_bytes = compute_max_chunk_bytes(read_options.max_chunk_bytes)?;
2125
2126        // If we already have a Vec<isize>, reference it. If we don't, create one and reference it.
2127        let iid_hold = Hold::new(&read_options.iid_index, iid_count)?;
2128        let iid_index = iid_hold.as_ref();
2129        let sid_hold = Hold::new(&read_options.sid_index, sid_count)?;
2130        let sid_index = sid_hold.as_ref();
2131
2132        let dim = val.dim();
2133        if dim != (iid_index.len(), sid_index.len()) {
2134            Err(BedError::InvalidShape(
2135                iid_index.len(),
2136                sid_index.len(),
2137                dim.0,
2138                dim.1,
2139            ))?;
2140        }
2141
2142        read_no_alloc(
2143            &self.cloud_file,
2144            iid_count,
2145            sid_count,
2146            read_options.is_a1_counted,
2147            iid_index,
2148            sid_index,
2149            read_options.missing_value,
2150            max_concurrent_requests,
2151            max_chunk_bytes,
2152            &mut val.view_mut(),
2153        )
2154        .await
2155    }
2156
2157    /// Read all genotype data into a preallocated array.
2158    ///
2159    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
2160    ///
2161    /// # Errors
2162    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2163    /// for all possible errors.
2164    ///
2165    /// # Example
2166    ///
2167    /// ```
2168    /// use ndarray as nd;
2169    /// use bed_reader::{BedCloud, ReadOptions};
2170    /// use bed_reader::assert_eq_nan;
2171    ///
2172    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2173    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2174    /// let mut bed_cloud = BedCloud::new(url).await?;
2175    /// let mut val = nd::Array2::<i8>::default(bed_cloud.dim().await?);
2176    /// bed_cloud.read_and_fill(&mut val.view_mut()).await?;
2177    ///
2178    /// assert_eq_nan(
2179    ///     &val,
2180    ///     &nd::array![
2181    ///         [1, 0, -127, 0],
2182    ///         [2, 0, -127, 2],
2183    ///         [0, 1, 2, 0]
2184    ///     ],
2185    /// );
2186    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2187    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2188    /// ```
2189    pub async fn read_and_fill<TVal: BedVal>(
2190        &mut self,
2191        val: &mut nd::ArrayViewMut2<'_, TVal>, //mutable slices additionally allow to modify elements. But slices cannot grow - they are just a view into some vector.,
2192    ) -> Result<(), Box<BedErrorPlus>> {
2193        let read_options = ReadOptions::<TVal>::builder().build()?;
2194        self.read_and_fill_with_options(val, &read_options).await
2195    }
2196
2197    /// Read genotype data with options.
2198    ///
2199    /// > Also see [`ReadOptions::builder`](struct.ReadOptions.html#method.builder).
2200    ///
2201    /// # Errors
2202    /// See [`BedError`](enum.BedError.html) and [`BedErrorPlus`](enum.BedErrorPlus.html)
2203    /// for all possible errors.
2204    ///
2205    /// # Example
2206    ///
2207    /// ```
2208    /// use ndarray as nd;
2209    /// use bed_reader::{BedCloud, ReadOptions};
2210    /// use bed_reader::assert_eq_nan;
2211    ///
2212    /// # #[cfg(feature = "tokio")] Runtime::new().unwrap().block_on(async {
2213    /// // Read the SNPs indexed by 2.
2214    /// let url = "https://raw.githubusercontent.com/fastlmm/bed-sample-files/main/small.bed";
2215    /// let mut bed_cloud = BedCloud::new(url).await?;
2216    /// let read_options = ReadOptions::builder().sid_index(2).f64().build()?;
2217    /// let val = bed_cloud.read_with_options(&read_options).await?;
2218    ///
2219    /// assert_eq_nan(&val, &nd::array![[f64::NAN], [f64::NAN], [2.0]]);
2220    /// # Ok::<(), Box<BedErrorPlus>>(())}).unwrap();
2221    /// # #[cfg(feature = "tokio")] use {tokio::runtime::Runtime, bed_reader::BedErrorPlus};
2222    /// ```  
2223    pub async fn read_with_options<TVal: BedVal>(
2224        &mut self,
2225        read_options: &ReadOptions<TVal>,
2226    ) -> Result<nd::Array2<TVal>, Box<BedErrorPlus>> {
2227        let iid_count_in = self.iid_count().await?;
2228        let sid_count_in = self.sid_count().await?;
2229        let iid_count_out = read_options.iid_index.len(iid_count_in)?;
2230        let sid_count_out = read_options.sid_index.len(sid_count_in)?;
2231        let shape = ShapeBuilder::set_f((iid_count_out, sid_count_out), read_options.is_f);
2232        let mut val = nd::Array2::<TVal>::default(shape);
2233
2234        self.read_and_fill_with_options(&mut val.view_mut(), read_options)
2235            .await?;
2236
2237        Ok(val)
2238    }
2239
2240    // LATER: Support writing to a BedCloud
2241
2242    async fn unlazy_fam<T: FromStringArray<T>>(
2243        &mut self,
2244        is_none: bool,
2245        field_index: MetadataFields,
2246        name: &str,
2247    ) -> Result<(), Box<BedErrorPlus>> {
2248        if self.skip_set.contains(&field_index) {
2249            Err(BedError::CannotUseSkippedMetadata(name.into()))?;
2250        }
2251        if is_none {
2252            self.fam().await?;
2253        }
2254        Ok(())
2255    }
2256
2257    async fn unlazy_bim<T: FromStringArray<T>>(
2258        &mut self,
2259        is_none: bool,
2260        field_index: MetadataFields,
2261        name: &str,
2262    ) -> Result<(), Box<BedErrorPlus>> {
2263        if self.skip_set.contains(&field_index) {
2264            Err(BedError::CannotUseSkippedMetadata(name.into()))?;
2265        }
2266        if is_none {
2267            self.bim().await?;
2268        }
2269        Ok(())
2270    }
2271
2272    async fn fam(&mut self) -> Result<(), Box<BedErrorPlus>> {
2273        let fam_cloud_file = self.fam_cloud_file()?.clone();
2274
2275        let (metadata, count) = self
2276            .metadata
2277            .read_fam_cloud(&fam_cloud_file, &self.skip_set)
2278            .await?;
2279        self.metadata = metadata;
2280
2281        match self.iid_count {
2282            Some(iid_count) => {
2283                if iid_count != count {
2284                    Err(BedError::InconsistentCount("iid".into(), iid_count, count))?;
2285                }
2286            }
2287            None => {
2288                self.iid_count = Some(count);
2289            }
2290        }
2291        Ok(())
2292    }
2293
2294    async fn bim(&mut self) -> Result<(), Box<BedErrorPlus>> {
2295        let bim_cloud_file = self.bim_cloud_file()?.clone();
2296
2297        let (metadata, count) = self
2298            .metadata
2299            .read_bim_cloud(&bim_cloud_file, &self.skip_set)
2300            .await?;
2301        self.metadata = metadata;
2302
2303        match self.sid_count {
2304            Some(sid_count) => {
2305                if sid_count != count {
2306                    Err(BedError::InconsistentCount("sid".into(), sid_count, count))?;
2307                }
2308            }
2309            None => {
2310                self.sid_count = Some(count);
2311            }
2312        }
2313        Ok(())
2314    }
2315}
2316
2317/// Returns the cloud location of a sample .bed file as a URL string.
2318///
2319/// Behind the scenes, the "cloud location" will actually be local.
2320/// If necessary, the file will be downloaded.
2321/// The .fam and .bim files will also be downloaded, if they are not already present.
2322/// SHA256 hashes are used to verify that the files are correct.
2323/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
2324/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
2325#[anyinput]
2326pub fn sample_bed_url(bed_path: AnyPath) -> Result<String, Box<BedErrorPlus>> {
2327    let mut path_list: Vec<PathBuf> = Vec::new();
2328    for ext in &["bed", "bim", "fam"] {
2329        let file_path = bed_path.with_extension(ext);
2330        path_list.push(file_path);
2331    }
2332
2333    let mut vec = sample_urls(path_list)?;
2334    Ok(vec.swap_remove(0))
2335}
2336
2337/// Returns the cloud location of a sample file as a URL string.
2338///
2339/// Behind the scenes, the "cloud location" will actually be local.
2340/// If necessary, the file will be downloaded.
2341/// A SHA256 hash is used to verify that the file is correct.
2342/// The file will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
2343/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
2344#[anyinput]
2345pub fn sample_url(path: AnyPath) -> Result<String, Box<BedErrorPlus>> {
2346    let file_path = STATIC_FETCH_DATA
2347        .fetch_file(path)
2348        .map_err(|e| BedError::SampleFetch(e.to_string()))?;
2349    let url = abs_path_to_url_string(file_path)?;
2350    Ok(url)
2351}
2352
2353/// Returns the cloud locations of a list of files as URL strings.
2354///
2355/// Behind the scenes, the "cloud location" will actually be local.
2356/// If necessary, the file will be downloaded.
2357/// SHA256 hashes are used to verify that the files are correct.
2358/// The files will be in a directory determined by environment variable `BED_READER_DATA_DIR`.
2359/// If that environment variable is not set, a cache folder, appropriate to the OS, will be used.
2360#[anyinput]
2361pub fn sample_urls(path_list: AnyIter<AnyPath>) -> Result<Vec<String>, Box<BedErrorPlus>> {
2362    let file_paths = STATIC_FETCH_DATA
2363        .fetch_files(path_list)
2364        .map_err(|e| BedError::SampleFetch(e.to_string()))?;
2365    file_paths
2366        .iter()
2367        .map(|file_path| {
2368            let url = abs_path_to_url_string(file_path)?;
2369            Ok(url)
2370        })
2371        .collect()
2372}
2373
2374fn to_metadata_path(
2375    bed_cloud_file: &CloudFile,
2376    metadata_cloud_file: &Option<CloudFile>,
2377    extension: &str,
2378) -> Result<CloudFile, Box<BedErrorPlus>> {
2379    if let Some(metadata_cloud_file) = metadata_cloud_file {
2380        Ok(metadata_cloud_file.clone())
2381    } else {
2382        let mut meta_cloud_file = bed_cloud_file.clone();
2383        meta_cloud_file.set_extension(extension)?;
2384        Ok(meta_cloud_file)
2385    }
2386}