1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#![deny(missing_docs)]
//!This crate wraps the most basic functionality of the [`ONT-FAST5-API`] for python, but in Rust!
//!# Warning
//!Very much in alpha and a WIP, I worte this for one specific use case that I had.
//!
//! Currently it is only possible to read and write FAST5. It uses the HDF5 crate to deal with HDF5 files.
//! It does apply the VBZ plugin to the files.
//! 
//!  [`ONT-FAST5_API`]: https://github.com/nanoporetech/ont_fast5_api

mod utils;

#[cfg(feature = "blosc")]
use hdf5::filters::blosc_set_nthreads;
use hdf5::types::VarLenAscii;
use hdf5::{Error, File, Group, Result};
use resolve_path::PathResolveExt;
use std::collections::HashMap;

#[cfg(test)]
mod tests {
    #[test]
    fn it_works() {
        assert_eq!(2 + 2, 4);
    }
}

///Provides the Attributes for the Raw Group - which in turn conatins the Signal dataset.
///This allows us to match for each attribute field, and provide the correct type to HDF5.
pub enum RawAttrsOpts<'a> {
    /// The duration of the read in seconds.
    Duration(u32),
    /// The reason that the read stopped.
    EndReason(u8),
    /// No idea lol
    MedianBefore(f64),
    /// 37 character UUID-4 identifer for the read.
    ReadId(&'a str),
    /// Read number - the number the read is in the run.
    ReadNumber(i32),
    /// Also not sure
    StartMux(u8),
    /// The start time of the read in milliseconds (I guess)
    StartTime(u64),
}

/// Open a fast5 and return a Vec containing the groups.
/// 
/// # Panics
/// Will panic if no Groups are found, or there is an issue reading the FAST5 file.
/// 
/// # Example
/// ```rust
/// use frust5_api::read_fast5;
/// for group in read_fast5("FAL37440_pass_5e83140e_100.fast5")? {
///     println!(group)
/// };
/// ```
pub fn read_fast5(file_name: &str) -> Result<Vec<Group>, hdf5::Error> {
    let file = File::open(file_name)?; // open for reading
    let gs = file.groups()?; // open the dataset;
    Ok(gs.clone())
}

/// Struct representing a "Multi" Fast5 file. 
pub struct MultiFast5File {
    /// The filename of the MultiFast5 opened.
    filename: String,
    /// The mode that the file was opened as.
    mode: OpenMode,
    /// The handle to the HDF5 file.
    handle: File,
    /// A hashmap of run_id to the read_id of the first read - used to hardlink all the attributes of read groups together.
    _run_id_map: HashMap<String, String>,
}

/// Stuct to represent the channel info attributes for each read.
pub struct ChannelInfo {
    digitisation: f64,
    offset: f64,
    range: f64,
    sampling_rate: f64,
    channel_number: String,
}

#[doc(hidden)]
impl IntoIterator for ChannelInfo {
    type Item = (String, f64);
    type IntoIter = std::array::IntoIter<(String, f64), 4>;

    fn into_iter(self) -> Self::IntoIter {
        std::array::IntoIter::new([
            ("digitisation".to_string(), self.digitisation),
            ("offset".to_string(), self.offset),
            ("range".to_string(), self.range),
            ("sampling_rate".to_string(), self.sampling_rate),
        ])
    }
}

impl ChannelInfo {
    /// Return a new Channel Info struct.
    pub fn new(
        digitisation: f64,
        offset: f64,
        range: f64,
        sampling_rate: f64,
        channel_number: String,
    ) -> ChannelInfo {
        ChannelInfo {
            digitisation,
            offset,
            range,
            sampling_rate,
            channel_number,
        }
    }
}

/// The mode to open a file with.
pub enum OpenMode {
    /// Open a fast5 to write to the end of.
    Append,
    /// Open a fast5 for read only.
    Read,
}
const HARDLINK_GROUPS: [&str; 2] = ["context_tags", "tracking_id"];

impl MultiFast5File {
    /// Create a new MultiFast5 file - for either reading or writing.
    /// 
    /// # Panics
    /// 
    /// - Currently if opening for writing and the file already exists, as tries to write attributes that already exist
    /// - If opening for reading and the file doesn't already exist.
    /// 
    /// # Examples
    /// ```rust
    /// use frust5_api::{MultiFast5File. OpenMode}
    /// 
    /// let mut multi = frust5_api::MultiFast5File::new("test.fast5".to_string(), frust5_api::OpenMode::Append);
    /// ```
    pub fn new(filename: String, mode: OpenMode) -> MultiFast5File {
        let file = match mode {
            OpenMode::Append => {
                let file = File::with_options()
                    .with_fapl(|p| p.core().core_filebacked(true))
                    .append(&filename)
                    .unwrap();
                // default attributes for now
                let file_type = VarLenAscii::from_ascii("multi-read").unwrap();
                let file_version = VarLenAscii::from_ascii("2.2").unwrap();
                file.new_attr::<VarLenAscii>()
                    .create("file_type")
                    .unwrap()
                    .write_scalar(&file_type)
                    .unwrap();
                file.new_attr::<VarLenAscii>()
                    .create("file_version")
                    .unwrap()
                    .write_scalar(&file_version)
                    .unwrap();
                file
            }
            OpenMode::Read => {
                File::open("FAL37440_pass_5e83140e_100.fast5").unwrap() // open for reading
            }
        };
        MultiFast5File {
            filename: filename.clone(),
            mode,
            handle: file,
            _run_id_map: HashMap::new(),
        }
    }
    /// Diverged from ONT come back and straight up rework
    /// Create and empty read group, and populate all the fields for it. MISNOMER - doesn't return an empty read, returns a populated read.
    pub fn create_empty_read(
        &mut self,
        read_id: String,
        run_id: String,
        tracking_id: &HashMap<&str, &str>,
        context_tags: &HashMap<&str, &str>,
        channel_info: ChannelInfo,
        raw_attrs: &HashMap<&str, RawAttrsOpts>,
    ) -> Result<Group, Error> {
        // plz work
        std::env::set_var("HDF5_PLUGIN_PATH", "./vbz_plugin".resolve().as_os_str());
        let group_name = format!("read_{}", read_id);
        let group = self.handle.create_group(&group_name).unwrap();
        let s = VarLenAscii::from_ascii(run_id.as_str()).unwrap();
        group
            .new_attr::<VarLenAscii>()
            .create("run_id")?
            .write_scalar(&s)?;
        // set the shared groups for every read - namely the contstant Dict attributes
        if self._run_id_map.contains_key(&run_id) {
            for shared_group in HARDLINK_GROUPS {
                self.handle
                    .link_hard(
                        format!("read_{}/{}", self._run_id_map[&run_id], shared_group).as_str(),
                        format!("{}/{}", group_name, shared_group).as_str(),
                    )
                    .expect(format!("{}/{}", self._run_id_map[&run_id], shared_group).as_str());
            }
            // populate all the fields on the read.
        } else {
            self._run_id_map.insert(run_id, read_id);
            let context_group = group.create_group("context_tags")?;
            let tracking_group = group.create_group("tracking_id")?;
            let channel_group = group.create_group("channel_id")?;
            let raw_data_group = group.create_group("Raw")?;
            utils::add_tracking_info(tracking_group, &tracking_id)?;
            utils::add_context_tags(context_group, context_tags)?;
            utils::add_channel_info(channel_group, channel_info)?;
            utils::add_raw_data(raw_data_group, vec![1, 2, 3, 4, 5, 6], raw_attrs)?;
        }
        Ok(group)
    }
}