libmathcat/
shim_filesystem.rs

1#![allow(clippy::needless_return)]
2//! This is used to paste over normal reading of the Rules files and building them into the code for web assembly (WASM) which
3//! can't do file system access. For the latter, the Rules directory is zipped up.
4
5use std::path::{Path, PathBuf};
6use crate::errors::*;
7
8
9// The zipped files are needed by WASM builds.
10// However, they are also useful for other builds because there really isn't another good way to get at the rules.
11// Other build scripts can extract these files and unzip to their needed locations.
12// I'm not thrilled with this solution as it seems hacky, but I don't know another way for crates to allow for each access to data.
13cfg_if! {
14    if #[cfg(any(target_family = "wasm", feature = "include-zip"))] {
15        // For the include-zip builds, we build a fake file system based on ZIPPED_RULE_FILES.
16        // That stream encodes other zip files that must be unzipped.
17        // Only one level of embedded zip files is supported.
18        use zip::ZipArchive;
19        pub static ZIPPED_RULE_FILES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"),"/rules.zip"));
20
21        /// Struct to indicate where a file is located in the zip archive(s)
22        #[derive(Debug, Copy, Clone)]
23        struct ArchivePath {
24            main: usize,                // index into ZIPPED_RULE_FILES
25            zipped: Option<usize>,      // if Some, index into embedded zip file, None if top-level zip file
26        }
27
28        use std::cell::RefCell;
29        use std::io::Cursor;
30        use std::io::Read;
31        use std::collections::{HashMap, HashSet};
32        thread_local! {
33            // mapping the file names to whether they are are directory or a file
34            // Note: these are always stored with "/" as the path separator
35            static DIRECTORIES: RefCell<HashSet<String>> = RefCell::new(HashSet::with_capacity(127));
36            // if a file, we note whether it is in ZIPPED_RULE_FILES or the index of a zipped file within ZIPPED_RULE_FILES
37            static FILES: RefCell<HashMap<String, ArchivePath>> = RefCell::new(HashMap::with_capacity(1023));
38        }
39
40        /// Canonicalize path separators to "/"
41        fn canonicalize_path_separators(path: &Path) -> String {
42            return path.to_str().unwrap_or_default().replace("\\", "/");
43        }
44        
45        /// Return a zip archive given the zip bytes
46        fn get_zip_archive(zip_bytes: &[u8]) -> Result<ZipArchive<Cursor<&[u8]>>> {
47            let buf_reader = Cursor::new(zip_bytes);
48            let archive = match zip::ZipArchive::new(buf_reader) {
49                Err(e) => bail!("get_zip_archive: failed to create ZipArchive: {}", e),
50                Ok(archive) => archive,
51            };
52            return Ok(archive);
53        }
54
55        /// Read ZIPPED_RULE_FILES and build up the FILES and DIRECTORIES static variables.
56        /// This is called lazily when the first file or directory check is done.
57        fn initialize_static_vars() -> Result<()> {
58            let mut archive = get_zip_archive(ZIPPED_RULE_FILES)?;
59            read_zip_file("", &mut archive, None)?;
60
61            // Because of Rust's borrow checker, we can't recursively unzip contained zip files (FILES, etc., are borrowed mut)
62            // Here we gather up the zip files that were found and iterate over them non-recursively.
63            // Note: there shouldn't be embedded zip files in these files (if there are, they won't be unzipped)
64            let zip_files = FILES.with(|files| files.borrow().iter()
65                .filter_map(|(name, archive_path)| if name.ends_with(".zip") { Some((name.clone(), archive_path.clone())) } else { None } )
66                .collect::<Vec<_>>()
67            );
68            // debug!("Found {:?} embedded zip files", zip_files);
69            for (zip_file_name, archive_path) in zip_files.iter() {
70                let bytes = get_bytes_from_index(&mut archive, archive_path.main)?;
71                let mut inner_archive = get_zip_archive(bytes.as_slice())?;
72                // debug!("  internal zip file {} has {} files", zip_file_name, inner_archive.len());
73                let new_containing_dir = zip_file_name.rsplit_once("/").map(|(before, _)| before).unwrap_or("");
74                read_zip_file(&new_containing_dir, &mut inner_archive, Some(archive_path.main))?;
75            }
76            // FILES.with(|files| {
77            //     let files = files.borrow();
78            //     debug!("{} files={:?}", files.len(), files);
79            // });
80            return Ok(());
81        }
82
83        /// Get the bytes for a file in the zip archive (intended for embedded zip files)
84        fn get_bytes_from_index<'a>(archive: &mut ZipArchive<Cursor<&[u8]>>, index: usize) -> Result<Vec<u8>> {
85            let mut file = archive.by_index(index)
86                .map_err(|e| format!("Error getting index={} from zip archive: {}", index, e) )?;
87            let mut contents = Vec::new();
88            file.read_to_end(&mut contents)
89                .map_err(|e| format!("Error reading index={} from zip archive: {}", index, e) )?;
90            return Ok(contents);
91        }
92        /// Unzip the zip file (given by zip_archive) and record the file and dir names
93        /// 'containing_dir' is the rule dir (RulesDir or a subdir) and establishes a full path for unzipped file(s)
94        /// embedded_zip_file is index into ZIPPED_RULE_FILES if this is an embedded zip file, None if it is the top-level zip file
95        fn read_zip_file(containing_dir: &str, zip_archive: &mut ZipArchive<Cursor<&[u8]>>, embedded_zip_file: Option<usize>) -> Result<()> {
96            // debug!("read_zip_file: containing_dir='{}', zip_archive.len()={}", containing_dir, zip_archive.len());
97            return FILES.with(|files| {
98                let mut files = files.borrow_mut();
99                return DIRECTORIES.with(|dirs| {
100                    let mut dirs = dirs.borrow_mut();
101                    for i in 0..zip_archive.len() {
102                        let file = zip_archive.by_index(i).unwrap();
103                        // A little bit of safety/sanity checking
104                        let path = match file.enclosed_name() {
105                            Some(path) => PathBuf::from(containing_dir).join(path),
106                            None => {
107                                bail!("Entry {} has a suspicious path (outside of archive)", file.name());
108                            }
109                        };
110                        // debug!("read_zip_file: file path='{}'", path.display());
111                        // add all the dirs up to the containing dir -- skip the first one as that is a file
112                        // for files like unicode.yaml, this loop is a no-op, but for files in the Shared folder, it will go one time.
113                        for parent in path.ancestors().skip(1) {
114                            if parent.to_str().unwrap_or_default() == containing_dir {
115                                break;
116                            }
117                            dirs.insert(canonicalize_path_separators(parent));
118                        }
119                        let file_name = canonicalize_path_separators(&path);
120                        if file.is_file() {
121                            let archive_path = match embedded_zip_file {
122                                None => ArchivePath{ main: i, zipped: None },
123                                Some(main) => ArchivePath{ main, zipped: Some(i) },
124                            };
125                            files.insert(file_name, archive_path);
126                        } else if file.is_dir() {
127                            dirs.insert(file_name);
128                        } else {
129                            bail!("read_zip_file: {} is neither a file nor a directory", path.display());
130                        }
131                    };
132                    // debug!("{} files={:?}", files.len(), files);
133                    // debug!("{} dirs={:?}", dirs.len(), dirs);
134                    return Ok::<(), Error>( () );
135                });
136            });
137        }
138
139        pub fn is_file_shim(path: &Path) -> bool {
140            if FILES.with(|files| files.borrow().is_empty()) {
141                let _ignore_result = initialize_static_vars();
142            }
143            return FILES.with(|files| files.borrow().contains_key(&canonicalize_path_separators(&path)) );
144        }
145        
146        pub fn is_dir_shim(path: &Path) -> bool {
147            if FILES.with(|files| files.borrow().is_empty()) {
148                let _ignore_result = initialize_static_vars();
149            }
150            return DIRECTORIES.with(|dirs| dirs.borrow().contains(&canonicalize_path_separators(&path)) );
151        }
152
153        /// Find files in 'dir' that end with 'ending' (e.g., "_Rules.yaml")
154        pub fn find_files_in_dir_that_ends_with_shim(dir: &Path, ending: &str) -> Vec<String> {
155            // FIX: this is very inefficient because it looks through all the files -- maybe dirs should list the files in them?
156            // look for files that have 'path' as a prefix
157            return FILES.with(|files| {
158                let files = files.borrow();
159                let mut answer = Vec::new();
160
161                let dir_name = canonicalize_path_separators(dir);
162                for file_name in files.keys() {
163                    if let Some(dir_relative_name) = file_name.strip_prefix(&dir_name) {
164                        if file_name.ends_with(ending) {
165                            // this could be (e.g.) xxx_Rules.yaml or it could be subdir/xxx_Rules.yaml
166                            let file_name = dir_relative_name.split_once("/").map(|(_, after)| after).unwrap_or(dir_relative_name);
167                            answer.push( file_name.to_string() );
168                        }
169                    }
170                }
171                // debug!("find_files_in_dir_that_ends_with_shim: in dir '{}' found {:?}", dir.display(), answer);
172                return answer;
173            });
174        }
175        
176
177        pub fn find_all_dirs_shim(dir: &Path, found_dirs: &mut Vec<PathBuf> ) {
178            return DIRECTORIES.with(|dirs| {
179                let dirs = dirs.borrow();
180
181                let common_dir_name = canonicalize_path_separators(dir);
182                for dir_name in dirs.iter() {
183                    if dir_name.starts_with(&common_dir_name) && !dir_name.contains("SharedRules") {
184                        found_dirs.push(PathBuf::from(&dir_name));
185                    };
186                }
187            });
188        }
189
190        
191        pub fn canonicalize_shim(path: &Path) -> std::io::Result<PathBuf> {
192            use std::ffi::OsStr;
193            let dot_dot = OsStr::new("..");
194            let mut result = PathBuf::new();
195            for part in path.iter() {
196                if dot_dot == part {
197                    result.pop();
198                } else {
199                    result.push(part);
200                }
201            }
202            return Ok(result);
203        }
204        
205        /// Read the file at 'path' and return its contents as a String
206        pub fn read_to_string_shim(path: &Path) -> Result<String> {
207            let path = canonicalize_shim(path).unwrap();        // can't fail
208            let file_name = canonicalize_path_separators(&path);
209            // Is this the debugging override?
210            if let Some(contents) = OVERRIDE_FILE_NAME.with(|override_name| {
211                if file_name == override_name.borrow().as_str() {
212                    // debug!("override read_to_string_shim: {}",file_name);
213                    return OVERRIDE_FILE_CONTENTS.with(|contents| return Some(contents.borrow().clone()));
214                } else {
215                    return None;
216                }
217            }) {
218                return Ok(contents);
219            };
220
221            let file_name = file_name.replace('\\', "/"); // zip files always use forward slash
222            // top-level zip file or embedded zip file
223            return FILES.with(|files| {
224                let files = files.borrow();
225                let inner_bytes;
226                let (bytes, index) = match files.get(&file_name) {
227                    Some(archive_path) => {
228                        match &archive_path.zipped {
229                            None => (ZIPPED_RULE_FILES, archive_path.main),
230                            Some(i) => {
231                                // debug!("read_to_string_shim: reading embedded zip file {} at index {}", file_name, *i);
232                                let mut archive = get_zip_archive(ZIPPED_RULE_FILES)?;
233                                inner_bytes = get_bytes_from_index(&mut archive, archive_path.main)?;  // need to hold temp value
234                                (inner_bytes.as_slice(), *i)
235                            }
236                        }
237                    },
238                    None => bail!("read_to_string_shim: didn't find {} in zip archive", file_name),
239                };
240                let mut archive = get_zip_archive(bytes)?;
241                let mut file = match archive.by_index(index) {
242                    Ok(file) => {
243                        // debug!("read_to_string_shim: want {}; name of zipped file={:?}", file_name, file.enclosed_name().unwrap());
244                        file
245                    },
246                    Err(..) => {
247                        panic!("Didn't find {} in zip archive", file_name);
248                    }
249                };
250
251                let mut contents = String::new();
252                if let Err(e) = file.read_to_string(&mut contents) {
253                    bail!("read_to_string: {}", e);
254                }
255                return Ok(contents);
256            });
257        }
258
259        pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result<bool> {
260            let zip_file_path = dir.join(zip_file_name);
261            let full_zip_file_name = canonicalize_path_separators(&zip_file_path);
262            match FILES.with(|files| files.borrow().contains_key(full_zip_file_name.as_str()) ) {
263                true => Ok(true),
264                false => bail!("zip_extract_shim: didn't find {} in zip archive", full_zip_file_name),
265            }
266        }
267
268        thread_local! {
269            // For debugging rules files (mainly nav file) via MathCATDemo
270            static OVERRIDE_FILE_NAME: RefCell<String> = RefCell::new("".to_string());
271            static OVERRIDE_FILE_CONTENTS: RefCell<String> = RefCell::new("".to_string());
272        }
273        pub fn override_file_for_debugging_rules(file_name: &str, file_contents: &str) {
274            // file_name should be path name starting at Rules dir: e.g, "Rules/en/navigate.yaml"
275            OVERRIDE_FILE_NAME.with(|name| *name.borrow_mut() = file_name.to_string().replace("/", "\\"));
276            OVERRIDE_FILE_CONTENTS.with(|contents| *contents.borrow_mut() = file_contents.to_string());
277            crate::interface::set_rules_dir("Rules".to_string()).unwrap();       // force reinitialization after the change
278        }
279    } else {
280        pub fn is_file_shim(path: &Path) -> bool {
281            return path.is_file();
282        }
283        
284        pub fn is_dir_shim(path: &Path) -> bool {
285            return path.is_dir();
286        }
287        
288        pub fn find_files_in_dir_that_ends_with_shim(dir: &Path, ending: &str) ->  Vec<String> {
289            match dir.read_dir() {
290                Err(_) => return vec![],    // empty
291                Ok(read_dir) => {
292                    let mut answer = Vec::new();
293                    for dir_entry in read_dir.flatten() {
294                        let file_name = dir_entry.file_name();
295                        let file_name = file_name.to_string_lossy().to_string();
296                        if file_name.ends_with(ending) {
297                            // this could be (e.g.) xxx_Rules.yaml or it could be subdir/xxx_Rules.yaml
298                            let file_name = file_name.split_once(std::path::MAIN_SEPARATOR).map(|(_, after)| after).unwrap_or(&file_name);
299                            answer.push( file_name.to_string() );
300                        }
301                    }
302                    return answer;
303                }
304            }
305        }
306
307        pub fn find_all_dirs_shim(dir: &Path, found_dirs: &mut Vec<PathBuf> ) {
308            // FIX: this doesn't work for subdirectories that haven't been unzipped yet
309            assert!(dir.is_dir(), "find_all_dirs_shim called with non-directory path: {}", dir.display());
310            let mut found_rules_file = false;
311            if let Ok(entries) = std::fs::read_dir(dir) {
312                for entry in entries.flatten() {
313                    let path = entry.path();
314                    if path.is_dir() {
315                        // skip "SharedRules" directory
316                        if let Some(dir_name) = path.file_name() {
317                            if dir_name.to_str().unwrap_or_default() != "SharedRules" {
318                                find_all_dirs_shim(&path, found_dirs);
319                            }
320                        }
321                    } else {
322                        let file_name = path.file_name().unwrap_or_default().to_str().unwrap_or_default();
323                        if !found_rules_file &&
324                           (file_name.starts_with("unicode") || file_name.starts_with("definitions") || file_name.ends_with("_Rules.yaml") || file_name.ends_with(".zip")) {
325                            found_dirs.push(path.parent().unwrap().to_path_buf());
326                            // FIX: hack to get around not unzipping files and having zh/tw not found
327                            if file_name == "zh.zip" {
328                                let tw_dir = path.parent().unwrap().join("tw");
329                                if !found_dirs.contains(&tw_dir) {
330                                    found_dirs.push(tw_dir.to_path_buf());
331                                }
332                            }
333                            found_rules_file = true;
334                        }
335                    }
336                }
337            }
338        }
339        
340        pub fn canonicalize_shim(path: &Path) -> std::io::Result<PathBuf> {
341            return path.canonicalize();
342        }
343        
344        pub fn read_to_string_shim(path: &Path) -> Result<String> {
345            let path = match path.canonicalize() {
346                Ok(path) => path,
347                Err(e) => bail!("Read error while trying to canonicalize in read_to_string_shim {}: {}", path.display(), e),
348            };
349            debug!("Reading file '{}'", &path.display());
350            match std::fs::read_to_string(&path) {
351                Ok(str) => return Ok(str),
352                Err(e) => bail!("Read error while trying to read {}: {}", &path.display(), e),
353            }
354        }
355
356        pub fn zip_extract_shim(dir: &Path, zip_file_name: &str) -> Result<bool> {
357            let zip_file = dir.join(zip_file_name);
358            return match std::fs::read(zip_file) {
359                Err(e) => {
360                    // no zip file? -- maybe started out with all the files unzipped? See if there is a .yaml file
361                    let yaml_files = find_files_in_dir_that_ends_with_shim(dir, ".yaml");
362                    if yaml_files.is_empty() {
363                        bail!("{}", e)
364                    } else {
365                        Ok(false)
366                    }
367                },
368                Ok(contents) => {
369                    let archive = std::io::Cursor::new(contents);
370                    let mut zip_archive = zip::ZipArchive::new(archive).unwrap();
371                    zip_archive.extract(dir).expect("Zip extraction failed");
372                    Ok(true)
373                },
374            };
375        }
376    }
377}