gchemol_readwrite/
io.rs

1// [[file:../gchemol-readwrite.note::*imports][imports:1]]
2use gut::fs::*;
3use gut::prelude::*;
4
5use gchemol_core::Molecule;
6// imports:1 ends here
7
8// [[file:../gchemol-readwrite.note::*traits][traits:1]]
9pub trait FromFile: Sized {
10    /// Return content of text file in string.
11    ///
12    /// Do not use this to read large file.
13    ///
14    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self>;
15}
16
17pub trait ToFile {
18    /// Write string content to an external file.
19    ///
20    /// _Note:_ Replaces the current file content if the file already exists.
21    ///
22    fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()>;
23}
24
25pub trait StringIO {
26    /// Format molecule as string in specific `fmt`.
27    fn format_as<S: AsRef<str>>(&self, fmt: S) -> Result<String>;
28
29    /// Parse molecule from string in specific `fmt`.
30    fn parse_from<R: Read + Seek, S: AsRef<str>>(s: R, fmt: S) -> Result<Molecule>;
31
32    fn from_str<S: AsRef<str>>(s: &str, fmt: S) -> Result<Molecule> {
33        let f = std::io::Cursor::new(s.as_bytes());
34        Self::parse_from(f, fmt)
35    }
36}
37// traits:1 ends here
38
39// [[file:../gchemol-readwrite.note::*file][file:1]]
40impl FromFile for String {
41    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
42        gut::fs::read_file(path)
43    }
44}
45
46impl ToFile for str {
47    fn to_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
48        gut::fs::write_to_file(path, &self)
49    }
50}
51// file:1 ends here
52
53// [[file:../gchemol-readwrite.note::*molecule][molecule:1]]
54impl FromFile for Molecule {
55    /// Construct molecule from external text file
56    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
57        if let Some(mol) = read(path)?.last() {
58            return Ok(mol);
59        }
60        bail!("No molecule found!");
61    }
62}
63
64impl ToFile for Molecule {
65    /// Save molecule to an external file
66    fn to_file<T: AsRef<Path>>(&self, path: T) -> Result<()> {
67        write(path, vec![self])
68    }
69}
70// molecule:1 ends here
71
72// [[file:../gchemol-readwrite.note::*string][string:1]]
73impl StringIO for Molecule {
74    /// Format molecule as string in specific molecular file format. Return
75    /// error if cannot format molecule in `fmt`.
76    fn format_as<S: AsRef<str>>(&self, fmt: S) -> Result<String> {
77        let fmt = fmt.as_ref();
78        crate::formats::format_as_chemical_file(&self, fmt)
79    }
80
81    /// construct molecule from string in specific molecular file format.
82    fn parse_from<R: Read + Seek, S: AsRef<str>>(s: R, fmt: S) -> Result<Molecule> {
83        read_from(s, &fmt)?
84            .last()
85            .ok_or(format_err!("Parse molecule failure in format: {}", fmt.as_ref()))
86    }
87}
88// string:1 ends here
89
90// [[file:../gchemol-readwrite.note::d500136e][d500136e]]
91mod find {
92    use super::*;
93
94    use walkdir::{DirEntry, WalkDir};
95
96    // // allow walk into ".", "..", or "./", "../" but not ".foo"
97    // fn is_hidden(entry: &DirEntry) -> bool {
98    //     entry
99    //         .file_name()
100    //         .to_str()
101    //         .map(|s| s.starts_with(".") && s != "." && s != ".." && s != "./" && s != "../")
102    //         .unwrap_or(false)
103    // }
104
105    // regular file name matching `pattern`
106    fn matching(pattern: &str, entry: Option<DirEntry>) -> Option<PathBuf> {
107        let entry = entry?;
108        if entry.file_type().is_file() {
109            let rx = regex::Regex::new(pattern).ok()?;
110            let s = entry.file_name().to_str()?;
111            if rx.find(s).is_some() {
112                return entry.into_path().into();
113            }
114        }
115        None
116    }
117
118    /// Recursively find all files in `root` dir with given file name
119    /// matching regex `pattern`. If not recursive, only files in
120    /// `root` dir will be returned.
121    pub fn find_files<'a>(pattern: &'a str, root: &Path, recursive: bool) -> impl Iterator<Item = PathBuf> + 'a {
122        let mut walk = WalkDir::new(root).follow_links(false).sort_by_file_name();
123        if !recursive {
124            walk = walk.max_depth(1);
125        }
126        walk.into_iter()
127            // do not walk into hidden directories
128            // .filter_entry(|e| !is_hidden(e))
129            .filter_map(|entry| matching(pattern, entry.ok()))
130    }
131
132    #[test]
133    fn test_find() -> Result<()> {
134        let root = "./tests/files";
135        let files = find_files(r"\.xyz$", root.as_ref(), true).collect_vec();
136        assert!(!files.is_empty());
137        for file in files {
138            assert!(file.to_string_lossy().ends_with(".xyz"));
139        }
140
141        let root = "./tests/files";
142        let files = find_files(r"\.cif$", root.as_ref(), false).collect_vec();
143        assert!(files.is_empty());
144        let root = "./tests/files/cif";
145        let files = find_files(r"\.cif$", root.as_ref(), false).collect_vec();
146        assert!(!files.is_empty());
147        for file in files {
148            assert!(file.to_string_lossy().ends_with(".cif"));
149        }
150
151        Ok(())
152    }
153}
154// d500136e ends here
155
156// [[file:../gchemol-readwrite.note::80c178b0][80c178b0]]
157pub use self::find::find_files;
158
159/// Read an iterator over `Molecule` from file.
160/// file format will be determined according to the path
161pub fn read<P: AsRef<Path>>(path: P) -> Result<impl Iterator<Item = Molecule>> {
162    use crate::formats::ExtxyzFile;
163
164    let path = path.as_ref();
165    // FIXME: rewrite below
166    let mut mols_extxyz = None;
167    if ExtxyzFile::parsable(path)? {
168        let mols: Vec<_> = ExtxyzFile::read_molecules_from(path)?.collect();
169        mols_extxyz = Some(mols.into_iter());
170    }
171    let mut mols_alt = None;
172    if mols_extxyz.is_none() {
173        mols_alt = crate::formats::ChemicalFileParser::guess_from_path(path)
174            .ok_or(format_err!("No parser for path: {:?}", path))?
175            .parse_molecules(path.as_ref())
176            .ok();
177    }
178
179    Ok(mols_extxyz.into_iter().flatten().chain(mols_alt.into_iter().flatten()))
180}
181
182// https://stackoverflow.com/questions/26368288/how-do-i-stop-iteration-and-return-an-error-when-iteratormap-returns-a-result
183/// Read all molecules into a Vec from `path`.
184pub fn read_all<P: AsRef<Path>>(path: P) -> Result<Vec<Molecule>> {
185    let mols: Vec<_> = read(path)?.collect();
186    Ok(mols)
187}
188
189/// Read molecules from readable source in specific chemical file format.
190pub fn read_from<R: Read + Seek, S: AsRef<str>>(source: R, fmt: S) -> Result<impl Iterator<Item = Molecule>> {
191    let cf = crate::formats::ChemicalFileParser::new(fmt.as_ref());
192    let r = gchemol_parser::TextReader::new(source);
193    cf.parse_molecules_from(r)
194}
195
196/// Guess chemical file format from `path`
197pub fn guess_format_from_path(path: &Path) -> Option<String> {
198    crate::formats::ChemicalFileParser::guess_format_from_path(path)
199}
200
201/// Write molecules into path. File format will be determined according to the
202/// path
203pub fn write<'a, P: AsRef<Path>>(path: P, mols: impl IntoIterator<Item = &'a Molecule>) -> Result<()> {
204    crate::formats::write_chemical_file(path.as_ref(), mols, None)
205}
206
207/// Write molecules into path in specific chemical file format.
208pub fn write_format<'a, P: AsRef<Path>>(path: P, mols: impl IntoIterator<Item = &'a Molecule>, fmt: &str) -> Result<()> {
209    crate::formats::write_chemical_file(path.as_ref(), mols, Some(fmt))
210}
211// 80c178b0 ends here