stam/
file.rs

1/*
2    STAM Library (Stand-off Text Annotation Model)
3        by Maarten van Gompel <proycon@anaproy.nl>
4        Digital Infrastucture, KNAW Humanities Cluster
5
6        Licensed under the GNU General Public License v3
7
8        https://github.com/annotation/stam-rust
9*/
10
11//! This module contains some common helper functions for dealing with file I/O
12
13use sealed::sealed;
14use std::fs::File;
15use std::io::{BufRead, BufReader, BufWriter, Write};
16use std::path::{Path, PathBuf};
17use std::sync::{Arc, RwLock};
18
19use crate::config::{Config, Configurable};
20use crate::error::StamError;
21use crate::types::*;
22
23const KNOWN_EXTENSIONS: &[&str; 14] = &[
24    ".store.stam.json",
25    ".annotationset.stam.json",
26    ".stam.json",
27    ".store.stam.cbor",
28    ".stam.cbor",
29    ".store.stam.csv",
30    ".annotationset.stam.csv",
31    ".annotations.stam.csv",
32    ".stam.csv",
33    ".json",
34    ".cbor",
35    ".csv",
36    ".txt",
37    ".md",
38];
39
40/// Get a file for reading or writing, this resolves relative files more intelligently
41/// It does not test whether a file exists or not.
42pub(crate) fn get_filepath(filename: &str, workdir: Option<&Path>) -> Result<PathBuf, StamError> {
43    if filename == "-" {
44        //designates stdin or stdout
45        return Ok(filename.into());
46    }
47    if filename.starts_with("https://") || filename.starts_with("http://") {
48        //TODO: implement downloading of remote URLs and storing them locally
49        return Err(StamError::OtherError("Loading URLs is not supported yet"));
50    }
51
52    let path = if filename.starts_with("file://") {
53        //strip the file:// prefix
54        PathBuf::from(&filename[7..])
55    } else {
56        PathBuf::from(filename)
57    };
58    if path.is_absolute() {
59        Ok(path)
60    } else {
61        //if a workdir is set, always use that:
62        if let Some(workdir) = workdir {
63            let path = workdir.join(&path);
64            //Does not check for existence! (needs to work for writing too)
65            Ok(path)
66        } else {
67            //No workdir means just current working directly
68            // we don't test for existence here
69            Ok(path)
70        }
71    }
72}
73
74/// Auxiliary function to help open files
75pub(crate) fn open_file(filename: &str, config: &Config) -> Result<File, StamError> {
76    let found_filename = get_filepath(filename, config.workdir())?;
77    debug(config, || {
78        format!("open_file: {:?} at {:?}", filename, found_filename)
79    });
80    File::open(found_filename.as_path()).map_err(|e| {
81        StamError::IOError(
82            e,
83            found_filename
84                .as_path()
85                .to_str()
86                .expect("path must be valid unicode")
87                .to_owned(),
88            "Opening file for reading failed",
89        )
90    })
91}
92
93/// Auxiliary function to help open files
94pub(crate) fn create_file(filename: &str, config: &Config) -> Result<File, StamError> {
95    let found_filename = get_filepath(filename, config.workdir())?;
96    debug(config, || {
97        format!(
98            "create_file: {:?}, workdir: {:?}",
99            found_filename,
100            config.workdir()
101        )
102    });
103    File::create(found_filename.as_path()).map_err(|e| {
104        StamError::IOError(
105            e,
106            found_filename
107                .as_path()
108                .to_str()
109                .expect("path must be valid unicode")
110                .to_owned(),
111            "Opening file for reading failed",
112        )
113    })
114}
115
116/// Auxiliary function to help open files
117pub(crate) fn open_file_reader(
118    filename: &str,
119    config: &Config,
120) -> Result<Box<dyn BufRead>, StamError> {
121    if filename == "-" {
122        //read from stdin
123        Ok(Box::new(std::io::stdin().lock()))
124    } else {
125        Ok(Box::new(BufReader::new(open_file(filename, config)?)))
126    }
127}
128
129/// Auxiliary function to help open files
130pub(crate) fn open_file_writer(
131    filename: &str,
132    config: &Config,
133) -> Result<Box<dyn Write>, StamError> {
134    if filename == "-" {
135        Ok(Box::new(std::io::stdout()))
136    } else {
137        Ok(Box::new(BufWriter::new(create_file(filename, config)?)))
138    }
139}
140
141/// Returns the filename without (known!) extension. The extension must be a known extension used by STAM for this to work.
142pub(crate) fn strip_known_extension(s: &str) -> &str {
143    for extension in KNOWN_EXTENSIONS.iter() {
144        if s.ends_with(extension) {
145            return &s[0..s.len() - extension.len()];
146        }
147    }
148    s
149}
150
151/// Helper function to replace some symbols that may not be valid in a filename
152/// Only the actual file name part, without any directories, should be passed here.
153/// It is mainly useful in converting public IDs to filenames
154pub(crate) fn sanitize_id_to_filename(id: &str) -> String {
155    let mut id = id.replace("://", ".").replace(&['/', '\\', ':', '?'], ".");
156    for extension in KNOWN_EXTENSIONS.iter() {
157        if id.ends_with(extension) {
158            id.truncate(id.len() - extension.len());
159        }
160    }
161    id
162}
163
164pub(crate) fn filename_without_workdir<'a>(filename: &'a str, config: &Config) -> &'a str {
165    //MAYBE TODO: use proper PathBuf, this probably won't work on Windows
166    if let Some(workdir) = config.workdir().map(|x| x.to_str().expect("valid utf-8")) {
167        if filename.starts_with(workdir) {
168            let filename = &filename[workdir.len()..];
169            if filename.starts_with(&['/', '\\']) {
170                return &filename[1..];
171            } else {
172                return filename;
173            }
174        }
175    }
176    filename
177}
178
179#[sealed(pub(crate))] //<-- this ensures nobody outside this crate can implement the trait
180#[allow(private_bounds)]
181pub trait AssociatedFile: Configurable + ChangeMarker {
182    fn filename(&self) -> Option<&str>;
183
184    //Set the associated filename for this structure.
185    fn set_filename(&mut self, filename: &str) -> &mut Self;
186
187    //Set the associated filename for this annotation store. Also sets the working directory. Builder pattern.
188    fn with_filename(mut self, filename: &str) -> Self
189    where
190        Self: Sized,
191    {
192        self.set_filename(filename);
193        self
194    }
195
196    /// Get the directory this file is stored, if any
197    /// This may return a relative or absolute directory. If it returns `None` the current working directory just applies
198    fn dirname(&self) -> Option<PathBuf> {
199        if let Some(mut storedir) = self.filename().map(|s| {
200            let pb: PathBuf = s.into();
201            pb
202        }) {
203            storedir.pop();
204            if let Some(workdir) = self.config().workdir.as_ref() {
205                let mut workdir = workdir.clone();
206                workdir.extend(&storedir);
207                debug(self.config(), || {
208                    format!("dirname(): workdir + storedir = {:?}", workdir)
209                });
210                return Some(workdir);
211            } else {
212                debug(self.config(), || {
213                    format!("dirname(): storedir = {:?}", storedir)
214                });
215                return Some(storedir);
216            }
217        } else if let Some(workdir) = self.config().workdir.as_ref() {
218            debug(self.config(), || {
219                format!("dirname(): workdir = {:?}", workdir)
220            });
221            return Some(workdir.clone());
222        }
223        debug(self.config(), || format!("dirname(): none"));
224        None
225    }
226
227    /// Returns the filename without (known!) extension. The extension must be a known extension used by STAM for this to work.
228    fn filename_without_extension(&self) -> Option<&str> {
229        if let Some(filename) = self.filename() {
230            Some(strip_known_extension(filename))
231        } else {
232            None
233        }
234    }
235
236    /// Serializes the filename ready for use with STAM JSON's @include or STAM CSV.
237    /// It basically only strips the workdir component, if any.
238    fn filename_without_workdir(&self) -> Option<&str> {
239        if let Some(filename) = self.filename() {
240            Some(filename_without_workdir(filename, self.config()))
241        } else {
242            None
243        }
244    }
245}
246
247#[sealed(pub(crate))] //<-- this ensures nobody outside this crate can implement the trait
248pub(crate) trait ChangeMarker {
249    fn change_marker(&self) -> &Arc<RwLock<bool>>;
250
251    fn changed(&self) -> bool {
252        let mut result = true;
253        if let Ok(changed) = self.change_marker().read() {
254            result = *changed;
255        }
256        result
257    }
258
259    fn mark_changed(&self) {
260        if let Ok(mut changed) = self.change_marker().write() {
261            *changed = true;
262        }
263    }
264
265    fn mark_unchanged(&self) {
266        if let Ok(mut changed) = self.change_marker().write() {
267            *changed = false;
268        }
269    }
270}