1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
use std::io::{BufRead, Read, Seek};

use super::*;
use crate::error::*;
use crate::structs::PDB;
use crate::StrictnessLevel;

#[cfg(feature = "compression")]
use super::mmcif::open_mmcif_bufread;
#[cfg(feature = "compression")]
use flate2::read::GzDecoder;
#[cfg(feature = "compression")]
use std::fs;

/// Open an atomic data file, either PDB or mmCIF/PDBx. The correct type will be
/// determined based on the file extension.
///
/// # Errors
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
///
/// # Related
/// If you want to open a file from memory see [`open_raw`]. There are also function to open a specified file type directly
/// see [`crate::open_pdb`] and [`crate::open_mmcif`] respectively.
pub fn open(
    filename: impl AsRef<str>,
    level: StrictnessLevel,
) -> Result<(PDB, Vec<PDBError>), Vec<PDBError>> {
    if check_extension(&filename, "pdb") {
        open_pdb(filename, level)
    } else if check_extension(&filename, "cif") {
        open_mmcif(filename, level)
    } else {
        Err(vec![PDBError::new(
            ErrorLevel::BreakingError,
            "Incorrect extension",
            "Could not determine the type of the given file, make it .pdb or .cif",
            Context::show(filename.as_ref()),
        )])
    }
}

/// Open a compressed atomic data file, either PDB or mmCIF/PDBx. The correct type will be
/// determined based on the file extension (.pdb.gz or .cif.gz).
///
/// # Errors
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
///
/// # Related
/// If you want to open a file from memory see [`open_raw`]. There are also function to open a specified file type directly
/// see [`crate::open_pdb`] and [`crate::open_mmcif`] respectively.
#[cfg(feature = "compression")]
pub fn open_gz(
    filename: impl AsRef<str>,
    level: StrictnessLevel,
) -> Result<(PDB, Vec<PDBError>), Vec<PDBError>> {
    let filename = filename.as_ref();

    if check_extension(filename, "gz") {
        // open a decompression stream
        let file = fs::File::open(filename).map_err(|_| {
            vec![PDBError::new(
                ErrorLevel::BreakingError,
                "Could not open file",
                "Could not open the given file, make sure it exists and you have the correct permissions",
                Context::show(filename),
            )]
        })?;

        let decompressor = GzDecoder::new(file);

        let reader = std::io::BufReader::new(decompressor);

        if check_extension(&filename[..filename.len() - 3], "pdb") {
            open_pdb_raw(reader, Context::show(filename), level)
        } else if check_extension(&filename[..filename.len() - 3], "cif") {
            open_mmcif_bufread(reader, level)
        } else {
            Err(vec![PDBError::new(
                ErrorLevel::BreakingError,
                "Incorrect extension",
                "Could not determine the type of the given file, make it .pdb.gz or .cif.gz",
                Context::show(filename),
            )])
        }
    } else {
        Err(vec![PDBError::new(
            ErrorLevel::BreakingError,
            "Incorrect extension",
            "Could not determine the type of the given file, make it .pdb.gz or .cif.gz",
            Context::show(filename),
        )])
    }
}

/// Open a stream with either PDB or mmCIF data. The distinction is made on the start of the first line.
/// If it starts with `HEADER` it is a PDB file, if it starts with `data_` it is a mmCIF file.
///
/// # Errors
/// Returns a `PDBError` if a `BreakingError` is found. Otherwise it returns the PDB with all errors/warnings found while parsing it.
/// It returns a breaking error if the buffer could not be read, the file type could not be determined form the first line, or there was a breaking error in the file itself.
/// See the `PDBError` for more details.
///
/// # Related
/// If you want to open a file see [`open`]. There are also function to open a specified file type directly
/// see [`crate::open_pdb_raw`] and [`crate::open_mmcif_raw`] respectively.
pub fn open_raw<T: std::io::Read + std::io::Seek>(
    mut input: std::io::BufReader<T>,
    level: StrictnessLevel,
) -> Result<(PDB, Vec<PDBError>), Vec<PDBError>> {
    let mut first_line = String::new();
    if input.read_line(&mut first_line).is_err() {
        return Err(vec![PDBError::new(
            ErrorLevel::BreakingError,
            "Buffer could not be read",
            "The buffer provided to `open_raw` could not be read.",
            Context::None,
        )]);
    }
    if input.rewind().is_err() {
        return Err(vec![PDBError::new(
            ErrorLevel::BreakingError,
            "Buffer could not be read",
            "The buffer provided to `open_raw` could not be rewound to the start.",
            Context::None,
        )]);
    }
    if first_line.starts_with("HEADER") {
        open_pdb_raw(input, Context::None, level)
    } else if first_line.starts_with("data_") {
        let mut contents = String::new();
        if input.read_to_string(&mut contents).is_ok() {
            open_mmcif_raw(&contents, level)
        } else {
            Err(vec![PDBError::new(
                ErrorLevel::BreakingError,
                "Buffer could not be read",
                "The buffer provided to `open_raw` could not be read to end.",
                Context::show(&first_line),
            )])
        }
    } else {
        Err(vec![PDBError::new(
            ErrorLevel::BreakingError,
            "Could not determine file type",
            "Could not determine the type of the given file, make it .pdb or .cif",
            Context::show(&first_line),
        )])
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn open_invalid() {
        assert!(open("file.png", StrictnessLevel::Medium).is_err());
        assert!(open("file.mmcif", StrictnessLevel::Medium).is_err());
        assert!(open("file.pdbml", StrictnessLevel::Medium).is_err());
        assert!(open("file.pd", StrictnessLevel::Medium).is_err());
    }

    #[test]
    fn open_not_existing() {
        let pdb =
            open("file.pdb", StrictnessLevel::Medium).expect_err("This file should not exist.");
        assert_eq!(pdb[0].short_description(), "Could not open file");
        let cif =
            open("file.cif", StrictnessLevel::Medium).expect_err("This file should not exist.");
        assert_eq!(cif[0].short_description(), "Could not open file");
    }
}