frust5_api/
lib.rs

1#![deny(missing_docs)]
2//!This crate wraps the most basic functionality of the [`ONT-FAST5-API`] for python, but in Rust!
3//!# Warning
4//!Very much in alpha and a WIP, I worte this for one specific use case that I had.
5//!
6//! Currently it is only possible to read and write FAST5. It uses the HDF5 crate to deal with HDF5 files.
7//! It does apply the VBZ plugin to the files.
8//! 
9//!  [`ONT-FAST5_API`]: https://github.com/nanoporetech/ont_fast5_api
10
11mod utils;
12
13#[cfg(feature = "blosc")]
14use hdf5::filters::blosc_set_nthreads;
15use hdf5::types::VarLenAscii;
16use hdf5::{Error, File, Group, Result};
17use resolve_path::PathResolveExt;
18use std::collections::HashMap;
19
20#[cfg(test)]
21mod tests {
22    #[test]
23    fn it_works() {
24        assert_eq!(2 + 2, 4);
25    }
26}
27
28///Provides the Attributes for the Raw Group - which in turn conatins the Signal dataset.
29///This allows us to match for each attribute field, and provide the correct type to HDF5.
30pub enum RawAttrsOpts<'a> {
31    /// The duration of the read in seconds.
32    Duration(u32),
33    /// The reason that the read stopped.
34    EndReason(u8),
35    /// No idea lol
36    MedianBefore(f64),
37    /// 37 character UUID-4 identifer for the read.
38    ReadId(&'a str),
39    /// Read number - the number the read is in the run.
40    ReadNumber(i32),
41    /// Also not sure
42    StartMux(u8),
43    /// The start time of the read in milliseconds (I guess)
44    StartTime(u64),
45}
46
47/// Open a fast5 and return a Vec containing the groups.
48/// 
49/// # Panics
50/// Will panic if no Groups are found, or there is an issue reading the FAST5 file.
51/// 
52/// # Example
53/// ```rust
54/// use frust5_api::read_fast5;
55
56/// ```
57pub fn read_fast5(file_name: &str) -> Result<Vec<Group>, hdf5::Error> {
58    let file = File::open(file_name)?; // open for reading
59    let gs = file.groups()?; // open the dataset;
60    Ok(gs.clone())
61}
62
63/// Struct representing a "Multi" Fast5 file. 
64pub struct MultiFast5File {
65    /// The filename of the MultiFast5 opened.
66    filename: String,
67    /// The mode that the file was opened as.
68    mode: OpenMode,
69    /// The handle to the HDF5 file.
70    handle: File,
71    /// A hashmap of run_id to the read_id of the first read - used to hardlink all the attributes of read groups together.
72    _run_id_map: HashMap<String, String>,
73}
74
75/// Stuct to represent the channel info attributes for each read.
76pub struct ChannelInfo {
77    digitisation: f64,
78    offset: f64,
79    range: f64,
80    sampling_rate: f64,
81    channel_number: String,
82}
83
84#[doc(hidden)]
85impl IntoIterator for ChannelInfo {
86    type Item = (String, f64);
87    type IntoIter = std::array::IntoIter<(String, f64), 4>;
88
89    fn into_iter(self) -> Self::IntoIter {
90        std::array::IntoIter::new([
91            ("digitisation".to_string(), self.digitisation),
92            ("offset".to_string(), self.offset),
93            ("range".to_string(), self.range),
94            ("sampling_rate".to_string(), self.sampling_rate),
95        ])
96    }
97}
98
99impl ChannelInfo {
100    /// Return a new Channel Info struct.
101    pub fn new(
102        digitisation: f64,
103        offset: f64,
104        range: f64,
105        sampling_rate: f64,
106        channel_number: String,
107    ) -> ChannelInfo {
108        ChannelInfo {
109            digitisation,
110            offset,
111            range,
112            sampling_rate,
113            channel_number,
114        }
115    }
116}
117
118/// The mode to open a file with.
119pub enum OpenMode {
120    /// Open a fast5 to write to the end of.
121    Append,
122    /// Open a fast5 for read only.
123    Read,
124}
125const HARDLINK_GROUPS: [&str; 2] = ["context_tags", "tracking_id"];
126
127impl MultiFast5File {
128    /// Create a new MultiFast5 file - for either reading or writing.
129    /// 
130    /// # Panics
131    /// 
132    /// - Currently if opening for writing and the file already exists, as tries to write attributes that already exist
133    /// - If opening for reading and the file doesn't already exist.
134    /// 
135    /// # Examples
136    /// ```
137    /// ```
138    pub fn new(filename: String, mode: OpenMode) -> MultiFast5File {
139        let file = match mode {
140            OpenMode::Append => {
141                let file = File::with_options()
142                    .with_fapl(|p| p.core().core_filebacked(true))
143                    .append(&filename)
144                    .unwrap();
145                // default attributes for now
146                let file_type = VarLenAscii::from_ascii("multi-read").unwrap();
147                let file_version = VarLenAscii::from_ascii("2.2").unwrap();
148                file.new_attr::<VarLenAscii>()
149                    .create("file_type")
150                    .unwrap()
151                    .write_scalar(&file_type)
152                    .unwrap();
153                file.new_attr::<VarLenAscii>()
154                    .create("file_version")
155                    .unwrap()
156                    .write_scalar(&file_version)
157                    .unwrap();
158                file
159            }
160            OpenMode::Read => {
161                File::open("FAL37440_pass_5e83140e_100.fast5").unwrap() // open for reading
162            }
163        };
164        MultiFast5File {
165            filename: filename.clone(),
166            mode,
167            handle: file,
168            _run_id_map: HashMap::new(),
169        }
170    }
171    /// Diverged from ONT come back and straight up rework
172    /// Create and empty read group, and populate all the fields for it. MISNOMER - doesn't return an empty read, returns a populated read.
173    pub fn create_empty_read(
174        &mut self,
175        read_id: String,
176        run_id: String,
177        tracking_id: &HashMap<&str, &str>,
178        context_tags: &HashMap<&str, &str>,
179        channel_info: ChannelInfo,
180        raw_attrs: &HashMap<&str, RawAttrsOpts>,
181        signal: Vec<i16>
182    ) -> Result<Group, Error> {
183        // plz work
184        std::env::set_var("HDF5_PLUGIN_PATH", "./vbz_plugin".resolve().as_os_str());
185        let group_name = format!("read_{}", read_id);
186        let group = self.handle.create_group(&group_name).unwrap();
187        let s = VarLenAscii::from_ascii(run_id.as_str()).unwrap();
188        group
189            .new_attr::<VarLenAscii>()
190            .create("run_id")?
191            .write_scalar(&s).expect(format!("{} group is {:#?}", &s, group).as_str());
192        // set the shared groups for every read - namely the contstant Dict attributes
193        if self._run_id_map.contains_key(&run_id) {
194            for shared_group in HARDLINK_GROUPS {
195                self.handle
196                    .link_hard(
197                        format!("read_{}/{}", self._run_id_map[&run_id], shared_group).as_str(),
198                        format!("{}/{}", group_name, shared_group).as_str(),
199                    )
200                    .expect(format!("{}/{}", self._run_id_map[&run_id], shared_group).as_str());
201            }
202            // populate all the fields on the read.
203        } else {
204            self._run_id_map.insert(run_id, read_id);
205            let context_group = group.create_group("context_tags")?;
206            let tracking_group = group.create_group("tracking_id")?;
207            utils::add_tracking_info(tracking_group, &tracking_id)?;
208            utils::add_context_tags(context_group, context_tags)?;
209        }
210        let channel_group = group.create_group("channel_id")?;
211        let raw_data_group = group.create_group("Raw")?;
212        utils::add_channel_info(channel_group, channel_info)?;
213        utils::add_raw_data(raw_data_group, signal, raw_attrs)?;
214        Ok(group)
215    }
216}