1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
// [[file:../gchemol-readwrite.note::*imports][imports:1]]
use gut::fs::*;
use gut::prelude::*;

use gchemol_core::Molecule;
// imports:1 ends here

// [[file:../gchemol-readwrite.note::*traits][traits:1]]
pub trait FromFile: Sized {
    /// Return content of text file in string.
    ///
    /// Do not use this to read large file.
    ///
    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self>;
}

pub trait ToFile {
    /// Write string content to an external file.
    ///
    /// _Note:_ Replaces the current file content if the file already exists.
    ///
    fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()>;
}

pub trait StringIO {
    /// Format molecule as string in specific `fmt`.
    fn format_as<S: AsRef<str>>(&self, fmt: S) -> Result<String>;

    /// Parse molecule from string in specific `fmt`.
    fn parse_from<R: Read + Seek, S: AsRef<str>>(s: R, fmt: S) -> Result<Molecule>;

    fn from_str<S: AsRef<str>>(s: &str, fmt: S) -> Result<Molecule> {
        let f = std::io::Cursor::new(s.as_bytes());
        Self::parse_from(f, fmt)
    }
}
// traits:1 ends here

// [[file:../gchemol-readwrite.note::*file][file:1]]
impl FromFile for String {
    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
        gut::fs::read_file(path)
    }
}

impl ToFile for str {
    fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
        gut::fs::write_to_file(path, &self)
    }
}
// file:1 ends here

// [[file:../gchemol-readwrite.note::*molecule][molecule:1]]
impl FromFile for Molecule {
    /// Construct molecule from external text file
    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
        if let Some(mol) = read(path)?.last() {
            return Ok(mol);
        }
        bail!("No molecule found!");
    }
}

impl ToFile for Molecule {
    /// Save molecule to an external file
    fn to_file<T: AsRef<Path>>(&self, path: T) -> Result<()> {
        write(path, vec![self])
    }
}
// molecule:1 ends here

// [[file:../gchemol-readwrite.note::*string][string:1]]
impl StringIO for Molecule {
    /// Format molecule as string in specific molecular file format. Return
    /// error if cannot format molecule in `fmt`.
    fn format_as<S: AsRef<str>>(&self, fmt: S) -> Result<String> {
        let fmt = fmt.as_ref();
        crate::formats::format_as_chemical_file(&self, fmt)
    }

    /// construct molecule from string in specific molecular file format.
    fn parse_from<R: Read + Seek, S: AsRef<str>>(s: R, fmt: S) -> Result<Molecule> {
        read_from(s, &fmt)?
            .last()
            .ok_or(format_err!("Parse molecule failure in format: {}", fmt.as_ref()))
    }
}
// string:1 ends here

// [[file:../gchemol-readwrite.note::d500136e][d500136e]]
mod find {
    use super::*;

    use walkdir::{DirEntry, WalkDir};

    // // allow walk into ".", "..", or "./", "../" but not ".foo"
    // fn is_hidden(entry: &DirEntry) -> bool {
    //     entry
    //         .file_name()
    //         .to_str()
    //         .map(|s| s.starts_with(".") && s != "." && s != ".." && s != "./" && s != "../")
    //         .unwrap_or(false)
    // }

    // regular file name matching `pattern`
    fn matching(pattern: &str, entry: Option<DirEntry>) -> Option<PathBuf> {
        let entry = entry?;
        if entry.file_type().is_file() {
            let rx = regex::Regex::new(pattern).ok()?;
            let s = entry.file_name().to_str()?;
            if rx.find(s).is_some() {
                return entry.into_path().into();
            }
        }
        None
    }

    /// Recursively find all files in `root` dir with given file name
    /// matching regex `pattern`. If not recursive, only files in
    /// `root` dir will be returned.
    pub fn find_files<'a>(pattern: &'a str, root: &Path, recursive: bool) -> impl Iterator<Item = PathBuf> + 'a {
        let mut walk = WalkDir::new(root).follow_links(false).sort_by_file_name();
        if !recursive {
            walk = walk.max_depth(1);
        }
        walk.into_iter()
            // do not walk into hidden directories
            // .filter_entry(|e| !is_hidden(e))
            .filter_map(|entry| matching(pattern, entry.ok()))
    }

    #[test]
    fn test_find() -> Result<()> {
        let root = "./tests/files";
        let files = find_files(r"\.xyz$", root.as_ref(), true).collect_vec();
        assert!(!files.is_empty());
        for file in files {
            assert!(file.to_string_lossy().ends_with(".xyz"));
        }

        let root = "./tests/files";
        let files = find_files(r"\.cif$", root.as_ref(), false).collect_vec();
        assert!(files.is_empty());
        let root = "./tests/files/cif";
        let files = find_files(r"\.cif$", root.as_ref(), false).collect_vec();
        assert!(!files.is_empty());
        for file in files {
            assert!(file.to_string_lossy().ends_with(".cif"));
        }

        Ok(())
    }
}
// d500136e ends here

// [[file:../gchemol-readwrite.note::80c178b0][80c178b0]]
pub use self::find::find_files;

/// Read an iterator over `Molecule` from file.
/// file format will be determined according to the path
pub fn read<P: AsRef<Path>>(path: P) -> Result<impl Iterator<Item = Molecule>> {
    let path = path.as_ref();
    crate::formats::ChemicalFileParser::guess_from_path(path)
        .ok_or(format_err!("No parser for path: {:?}", path))?
        .parse_molecules(path.as_ref())
}

// https://stackoverflow.com/questions/26368288/how-do-i-stop-iteration-and-return-an-error-when-iteratormap-returns-a-result
/// Read all molecules into a Vec from `path`.
pub fn read_all<P: AsRef<Path>>(path: P) -> Result<Vec<Molecule>> {
    let mols: Vec<_> = read(path)?.collect();
    Ok(mols)
}

/// Read molecules from readable source in specific chemical file format.
pub fn read_from<R: Read + Seek, S: AsRef<str>>(source: R, fmt: S) -> Result<impl Iterator<Item = Molecule>> {
    let cf = crate::formats::ChemicalFileParser::new(fmt.as_ref());
    let r = gchemol_parser::TextReader::new(source);
    cf.parse_molecules_from(r)
}

/// Guess chemical file format from `path`
pub fn guess_format_from_path(path: &Path) -> Option<String> {
    crate::formats::ChemicalFileParser::guess_format_from_path(path)
}

/// Write molecules into path. File format will be determined according to the
/// path
pub fn write<'a, P: AsRef<Path>>(path: P, mols: impl IntoIterator<Item = &'a Molecule>) -> Result<()> {
    crate::formats::write_chemical_file(path.as_ref(), mols, None)
}

/// Write molecules into path in specific chemical file format.
pub fn write_format<'a, P: AsRef<Path>>(path: P, mols: impl IntoIterator<Item = &'a Molecule>, fmt: &str) -> Result<()> {
    crate::formats::write_chemical_file(path.as_ref(), mols, Some(fmt))
}
// 80c178b0 ends here