liboskar/walk_parallel/
single_threaded.rs

1extern crate glob;
2
3use self::glob::glob;
4use colored::*;
5use error::*;
6use regex::{Regex, RegexSet};
7use std::fs;
8use std::fs::Metadata;
9use std::path::PathBuf;
10use std::process::exit;
11use std::result::Result;
12use types::*;
13use utils::*;
14
15#[cfg(not(target_os = "windows"))]
16use std::os::unix::fs::PermissionsExt;
17
18pub fn glob_exists(s: &str) -> bool {
19    glob(s).unwrap().filter_map(Result::ok).count() != 0 // ok because panic on IO Errors shouldn't happen.
20}
21
22/// Helper function to identify project directories. The heuristic is as follows:
23///
24/// 1. For `.stack-work`, look for a `.cabal` file or a `package.yaml` file in the parent
25///    directory.
26/// 2. For `target`, look for a `Cargo.toml` file in the parent directory.
27/// 3. For `elm-stuff`, look for `elm-package.json` in the parent directory.
28/// 4. For `build`, `dist`, look for a `.cabal`, `setup.py` or `cabal.project` file.
29/// 5. For `dist-newstyle`, look for a `.cabal` or `cabal.project` file.
30/// 6. For `nimcache`, look for a `.nim` file in the parent directory.
31/// 6. Otherwise, if `setup.py` is in the parent directory and it ends with `.egg-info`, return
32///    true.
33/// 7. In all other cases, return false, but still proceed into the directory to search files by
34///    extension.
35pub fn is_project_dir(p: &str, name: &str) -> bool {
36    // for project directories
37    lazy_static! {
38        static ref REGEX_PROJECT_DIR: Regex =
39            Regex::new(r"_minted|((\.stack-work|build|gen|cbits|ats-deps|\.atspkg|target|\.reco-work|\.cabal-sandbox|dist|\.criterion|dist-newstyle.*|target|\.egg-info|elm-stuff|\.pulp-cache|\.psc-package|output|bower_components|node_modules|__pycache__|lib|\.liquid)$)")
40            .unwrap();
41    }
42
43    if REGEX_PROJECT_DIR.is_match(name) {
44        let mut parent_path = PathBuf::from(p);
45        let mut parent_string = p.to_owned();
46        match name {
47            ".stack-work" => {
48                let mut hpack = parent_path.clone();
49                parent_path.push("../cabal.project");
50                hpack.push("package.yaml");
51                parent_string.push_str("/../*.cabal");
52                parent_path.exists() || hpack.exists() || glob_exists(&parent_string)
53            }
54            "nimcache" => {
55                parent_string.push_str("/../*.nim");
56                glob_exists(&parent_string)
57            }
58            "target" => {
59                let mut dhall = parent_path.clone();
60                dhall.push("../atspkg.dhall");
61                let mut shake = parent_path.clone();
62                shake.push("../shake.hs");
63                let mut elba = parent_path.clone();
64                elba.push("../elba.toml");
65                parent_path.push("../Cargo.toml");
66                parent_path.exists() || dhall.exists() || shake.exists() || elba.exists()
67            }
68            ".atspkg" | "ats-deps" | "cbits" | "gen" => {
69                parent_path.push("../atspkg.dhall");
70                parent_path.exists()
71            }
72            ".criterion" => {
73                parent_path.push("../Cargo.toml");
74                parent_path.exists()
75            }
76            ".liquid" => {
77                parent_string.push_str("/../*.hs");
78                glob_exists(&parent_string)
79            }
80            ".reco-work" => {
81                parent_path.push("../main.go");
82                parent_path.exists()
83            }
84            // "lib" => {
85            // parent_path.push("../futhark.pkg");
86            // parent_path.exists()
87            // }
88            "elm-stuff" => {
89                let mut package_path = PathBuf::from(p);
90                package_path.push("../elm-package.json");
91                package_path.exists()
92            }
93            ".pulp-cache" | "output" | ".psc-package" => {
94                let mut package_path = PathBuf::from(p);
95                package_path.push("../psc-package.json");
96                package_path.exists()
97            }
98            "build" | "dist" | ".cabal-sandbox" | "dist-newstyle" | "dist-newstyle-meta" => {
99                let mut cabal_project = parent_path.clone();
100                let mut parent_string_blod = parent_string.clone();
101                let mut parent_string_idr2 = parent_string.clone();
102                parent_path.push("../setup.py");
103                parent_string.push_str("/../*.cabal");
104                cabal_project.push("../cabal.project");
105                parent_string_blod.push_str("/../*.blod");
106                parent_string_idr2.push_str("/../*.ipkg");
107                parent_path.exists()
108                    || glob_exists(&parent_string)
109                    || cabal_project.exists()
110                    || glob_exists(&parent_string_blod)
111                    || glob_exists(&parent_string_idr2)
112            }
113            "bower_components" => {
114                let mut package_path = PathBuf::from(p);
115                package_path.push("../bower.json");
116                package_path.exists()
117            }
118            "__pycache__" => true,
119            "node_modules" => true,
120            _ => {
121                let mut parent_path_latex = parent_path.clone();
122                parent_path.push("../setup.py");
123                parent_path_latex.push("../*.tex");
124                (parent_path.exists() && str::ends_with(name, ".egg-info"))
125                    || (glob_exists(&parent_path_latex.to_string_lossy())
126                        && str::starts_with(name, "_minted"))
127            }
128        }
129    } else {
130        false
131    }
132}
133
134/// Helper function to determine whether a path points
135///
136/// Rules:
137/// - if the file extension of that is that of an artifact, return true
138/// - if the file is executable and included in the .gitignore, return true
139/// - return false otherwise
140///
141/// Explanation of extensions:
142/// - `.a`, `.la`, `.o`, `.lo`, `.so.*`:
143/// - `.S`: assembly
144/// - `.ll`, `.bc`: llvm
145/// - `.keter`: keter
146/// - `.d`: make
147/// - `.c`: ATS
148/// - `.rlib`, `.crate`: rust
149/// - `.hi`, `.hc`, `.chi`, `.dyn_hi`, `.dyn_o`, `.p_hi`, `.p_o`, `.prof`, `.dump-.*`, `.tix`,
150/// `.mix`, `.pix`: GHC
151/// - `.webapp`: Web app manifest
152/// - `.js.externs`, `.jsexe`, `.min.js`:
153/// - `.ibc`: Idris
154/// - `.toc`, `.aux`, `.fdb_latexmk`, `.fls`, `.bbl`, `.bbg`: TeX
155/// - `.egg-info`, `.whl`, `.pyc`: python
156/// - `.js_a`, `.js_hi`, `.js_o`: GHCJS
157/// - `.vmb`: Vim
158/// - `.crx`: chrome
159/// - `.elmo`, `.elmi`: Elm
160/// - `.mod`: FORTRAN
161/// - `.ji`, `.jld`: julia
162/// - `.exe`: Windows executable
163/// - `.sandbox.config`: Cabal sandbox configuration
164/// - `.eventlog`: GHC event log
165/// - `.ipa`: iOS applicative archive
166/// - `.ttc`: Blodwen compiled module
167/// - `.chs.h`, `.chi`: c2hs
168/// - `.1.expected`, `.1.actual`: Futhark test results
169pub fn is_artifact(
170    path_str: &str,
171    full_path: &str,
172    metadata: &Metadata,
173    vimtags: bool,
174    gitignore: &Option<RegexSet>,
175) -> bool {
176    lazy_static! {
177        static ref REGEX_GITIGNORE: Regex =
178            Regex::new(r"\.(stats|conf|h|c|out|cache.*|dat|pc|info|ll|js)$").unwrap();
179    }
180
181    // otherwise, use builtin expressions
182    {
183        lazy_static! {
184            static ref REGEX: Regex =
185                Regex::new(r"\.(a|i|ii|la|lo|o|keter|bc|dyn_o|d|rlib|crate|hi|hc|chi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|spl|bbl|blg|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|hspec-failures|pyc|vo|agdai|beam|mod|go\.(v|teak|xmldef|rewrittenast|rewrittengo|simplego|tree-(bind|eval|finish|parse))|p_hi|p_o|prof|hide-cache|ghc\.environment\..*\d.\d.\d|(t|p|m)ix|synctex\.gz|hl|hp|sandbox\.config|exe|eventlog|ipa|ttc|chs\.h|chi|\d+\.actual|\d+\.expected)$")
186                .unwrap();
187        }
188
189        if REGEX.is_match(path_str) || (path_str == "tags" && vimtags) {
190            true
191        } else if let Some(ref x) = *gitignore {
192            if metadata.permissions().mode() == 0o755 || REGEX_GITIGNORE.is_match(path_str) {
193                x.is_match(full_path)
194            } else {
195                false
196            }
197        } else {
198            path_str == "flxg_stats.txt"
199        }
200    }
201}
202
203/// Function to process directory contents and return a `FileTree` struct.
204pub fn read_size(
205    in_paths: &PathBuf,
206    excludes: Option<&Regex>,
207    maybe_gitignore: &Option<RegexSet>,
208    vimtags: bool,
209    artifacts_only: bool,
210) -> FileSize {
211    // attempt to read the .gitignore
212    let mut size = FileSize::new(0);
213    let gitignore = if artifacts_only {
214        mk_ignores(in_paths, maybe_gitignore)
215    } else {
216        None
217    };
218
219    // try to read directory contents
220    if let Ok(paths) = fs::read_dir(in_paths) {
221        // iterate over all the entries in the directory
222        for p in paths {
223            let val = match p {
224                Ok(x) => x,
225                _ => {
226                    panic!("{}", Internal::IoError);
227                }
228            };
229            let path = val.path();
230            let (path_string, bool_loop): (&str, bool) = if let Some(x) = path.as_path().to_str() {
231                let bool_loop = match excludes {
232                    Some(ex) => !ex.is_match(x),
233                    _ => true,
234                };
235                (x, bool_loop)
236            } else {
237                eprintln!(
238                    "{}: skipping invalid unicode filepath at {:?}",
239                    "Warning".yellow(),
240                    path
241                );
242                ("", false)
243            };
244
245            // only consider path if we're not using regex excludes or
246            // if they don't match the exclusion regex
247            if bool_loop {
248                let path_type = val.file_type().unwrap(); // ok because we already checked
249
250                // append file size/name for a file
251                if path_type.is_file() {
252                    // if this fails, it's probably because `path` is a broken symlink
253                    if let Ok(metadata) = val.metadata() {
254                        if !artifacts_only || {
255                            is_artifact(
256                                val.file_name().to_str().unwrap(), // ok because we already checked
257                                path_string,
258                                &metadata, // FIXME check metadata only when we know it matches gitignore
259                                vimtags,
260                                &gitignore,
261                            )
262                        } {
263                            // should check size before whether it's an artifact?
264                            let file_size = FileSize::new(metadata.len());
265                            size.add(file_size);
266                        }
267                    }
268                }
269                // otherwise, go deeper
270                else if path_type.is_dir() {
271                    let dir_size = if artifacts_only
272                        && is_project_dir(path_string, val.file_name().to_str().unwrap())
273                    {
274                        read_size(&path, excludes, &gitignore, vimtags, false)
275                    } else {
276                        read_size(&path, excludes, &gitignore, vimtags, artifacts_only)
277                    };
278                    size.add(dir_size);
279                }
280            }
281            /*else {
282            eprintln!(
283                    "{}: ignoring symlink at {}",
284                    "Warning".yellow(),
285                    path.display()
286                );
287            }*/
288        }
289    }
290    // if we can't read the directory contents, figure out why
291    // 1: check the path exists
292    else if !in_paths.exists() {
293        eprintln!(
294            "{}: path '{}' does not exist, or you do not have permission to enter.",
295            "Error".red(),
296            &in_paths.display()
297        );
298    }
299    // 2: check the path is actually a directory
300    else if !in_paths.is_dir() {
301        eprintln!(
302            "{}: {} is not a directory.",
303            "Error".red(),
304            &in_paths.display()
305        );
306        exit(0x0001);
307    }
308    // 3: otherwise, give a warning about permissions
309    else {
310        eprintln!(
311            "{}: permission denied for directory: {}",
312            "Warning".yellow(),
313            &in_paths.display()
314        );
315    }
316
317    size
318}
319
320/// Function to process directory contents and return a `FileTree` struct.
321pub fn read_all(
322    in_paths: &PathBuf,
323    depth: u8,
324    max_depth: Option<u8>,
325    excludes: Option<&Regex>,
326    maybe_gitignore: &Option<RegexSet>,
327    vimtags: bool,
328    artifacts_only: bool,
329) -> FileTree {
330    // attempt to read the .gitignore
331    let mut tree = FileTree::new();
332    let gitignore = if artifacts_only {
333        mk_ignores(in_paths, maybe_gitignore)
334    } else {
335        None
336    };
337
338    // try to read directory contents
339    if let Ok(paths) = fs::read_dir(in_paths) {
340        // iterate over all the entries in the directory
341        for p in paths {
342            let val = match p {
343                Ok(x) => x,
344                _ => {
345                    eprintln!("{}:  {:?}.", "Error".red(), p);
346                    exit(0x0001)
347                }
348            };
349            let path = val.path();
350            let (path_string, bool_loop): (&str, bool) = if let Some(x) = path.as_path().to_str() {
351                let bool_loop = match excludes {
352                    Some(ex) => !ex.is_match(x),
353                    _ => true,
354                };
355
356                (x, bool_loop)
357            } else {
358                eprintln!(
359                    "{}: skipping invalid unicode filepath at {:?}",
360                    "Warning".yellow(),
361                    path
362                );
363                ("", false)
364            };
365
366            // only consider path if we're not using regex excludes or if they don't match the
367            // exclusion regex
368            if bool_loop {
369                let path_type = val.file_type().unwrap(); // ok because we already checked
370
371                // append file size/name for a file
372                if path_type.is_file() {
373                    // if this fails, it's probably because `path` is a broken symlink
374                    if let Ok(metadata) = val.metadata() {
375                        // faster on Windows
376                        if !artifacts_only || {
377                            is_artifact(
378                                val.file_name().to_str().unwrap(), // ok because we already checked
379                                path_string,
380                                &metadata,
381                                vimtags,
382                                &gitignore,
383                            )
384                        } {
385                            let file_size = FileSize::new(metadata.len());
386                            tree.push(path_string.to_string(), file_size, None, depth + 1, false);
387                        }
388                    }
389                }
390                // otherwise, go deeper
391                else if path_type.is_dir() {
392                    if let Some(d) = max_depth {
393                        if depth + 1 >= d && !artifacts_only {
394                            let dir_size =
395                                { read_size(&path, excludes, &gitignore, vimtags, artifacts_only) };
396                            tree.push(path_string.to_string(), dir_size, None, depth + 1, true);
397                        } else if artifacts_only
398                            && is_project_dir(path_string, val.file_name().to_str().unwrap())
399                        {
400                            let dir_size =
401                                { read_size(&path, excludes, &gitignore, vimtags, false) };
402                            tree.push(path_string.to_string(), dir_size, None, depth + 1, true);
403                        } else {
404                            let mut subtree = read_all(
405                                &path,
406                                depth + 1,
407                                max_depth,
408                                excludes,
409                                &gitignore,
410                                vimtags,
411                                artifacts_only,
412                            );
413                            let dir_size = subtree.file_size;
414                            tree.push(
415                                path_string.to_string(),
416                                dir_size,
417                                Some(&mut subtree),
418                                depth + 1,
419                                true,
420                            );
421                        }
422                    } else if artifacts_only
423                        && is_project_dir(path_string, val.file_name().to_str().unwrap())
424                    {
425                        let dir_size = { read_size(&path, excludes, &gitignore, vimtags, false) };
426                        tree.push(path_string.to_string(), dir_size, None, depth + 1, true);
427                    } else {
428                        let mut subtree = read_all(
429                            &path,
430                            depth + 1,
431                            max_depth,
432                            excludes,
433                            &gitignore,
434                            vimtags,
435                            artifacts_only,
436                        );
437                        let dir_size = subtree.file_size;
438                        tree.push(
439                            path_string.to_string(),
440                            dir_size,
441                            Some(&mut subtree),
442                            depth + 1,
443                            true,
444                        );
445                    }
446                }
447            }
448        }
449    /*else {
450        eprintln!(
451            "{}: ignoring symlink at {}",
452            "Warning".yellow(),
453            path.display()
454        );
455    }*/
456    }
457    // if we can't read the directory contents, figure out why
458    // 1: check the path exists
459    else if !in_paths.exists() {
460        eprintln!(
461            "{}: path '{}' does not exist, or you do not have permission to enter.",
462            "Error".red(),
463            &in_paths.display()
464        );
465    }
466    // 2: check the path is actually a directory
467    else if !in_paths.is_dir() {
468        if artifacts_only {
469            eprintln!(
470                "{}: {} is not a directory; not searching for artifacts",
471                "Warning".yellow(),
472                &in_paths.display()
473            );
474        }
475
476        if let Ok(l) = in_paths.metadata() {
477            let size = l.len();
478            let to_formatted = format!("{}", FileSize::new(size));
479            println!("{}\t {}", &to_formatted.green(), in_paths.display());
480        } else {
481            panic!("{}", Internal::IoError);
482        }
483    }
484    // 3: otherwise, give a warning about permissions
485    else {
486        eprintln!(
487            "{}: permission denied for directory: {}",
488            "Warning".yellow(),
489            &in_paths.display()
490        );
491    }
492
493    tree
494}
495
496/// Function to process directory contents and return a `FileTree` struct.
497pub fn read_no_excludes(
498    in_paths: &PathBuf,
499    _: Option<&Regex>,
500    _: &Option<RegexSet>,
501    _: bool,
502) -> FileSize {
503    // attempt to read the .gitignore
504    let mut size = FileSize::new(0);
505
506    // try to read directory contents
507    if let Ok(paths) = fs::read_dir(in_paths) {
508        // iterate over all the entries in the directory
509        for p in paths {
510            let val = match p {
511                Ok(x) => x,
512                _ => {
513                    panic!("{}", Internal::IoError);
514                }
515            };
516            // only consider path if we're not using regex excludes or
517            // if they don't match the exclusion regex
518            let path_type = val.file_type().unwrap(); // ok because we already checked
519
520            // append file size/name for a file
521            if path_type.is_file() {
522                // if this fails, it's probably because `path` is a broken symlink
523                if let Ok(metadata) = val.metadata() {
524                    let file_size = FileSize::new(metadata.len());
525                    size.add(file_size);
526                }
527            }
528            // otherwise, go deeper
529            else if path_type.is_dir() {
530                let dir_size = {
531                    let path = val.path();
532                    read_no_excludes(&path, None, &None, false)
533                };
534                size.add(dir_size);
535            }
536        }
537    }
538    // if we can't read the directory contents, figure out why
539    // 1: check the path exists
540    else if !in_paths.exists() {
541        eprintln!(
542            "{}: path '{}' does not exist, or you do not have permission to enter.",
543            "Error".red(),
544            &in_paths.display()
545        );
546    }
547    // 2: check the path is actually a directory
548    else if !in_paths.is_dir() {
549        eprintln!(
550            "{}: {} is not a directory.",
551            "Error".red(),
552            &in_paths.display()
553        );
554        exit(0x0001);
555    }
556    // 3: otherwise, give a warning about permissions
557    else {
558        eprintln!(
559            "{}: permission denied for directory: {}",
560            "Warning".yellow(),
561            &in_paths.display()
562        );
563    }
564
565    size
566}
567
568/// Function to process directory contents and return a `FileTree` struct.
569pub fn read_all_fast(in_paths: &PathBuf, depth: u8, max_depth: Option<u8>) -> FileTree {
570    // attempt to read the .gitignore
571    let mut tree = FileTree::new();
572
573    // try to read directory contents
574    if let Ok(paths) = fs::read_dir(in_paths) {
575        // iterate over all the entries in the directory
576        for p in paths {
577            let val = match p {
578                Ok(x) => x,
579                _ => {
580                    eprintln!("{}: unexpected failure on {:?} failed.", "Error".red(), p);
581                    exit(0x0001)
582                }
583            };
584
585            // only consider path if we're not using regex excludes or if they don't match the
586            // exclusion regex
587            let path_type = val.file_type().unwrap(); // ok because we already checked
588
589            // append file size/name for a file
590            if path_type.is_file() {
591                // if this fails, it's probably because `path` is a broken symlink
592                if let Ok(metadata) = val.metadata() {
593                    // faster on Windows
594                    {
595                        let path = val.path();
596                        let path_string: &str = if let Some(x) = path.as_path().to_str() {
597                            x
598                        } else {
599                            eprintln!(
600                                "{}: skipping invalid unicode filepath at {:?}",
601                                "Warning".yellow(),
602                                path
603                            );
604                            ""
605                        };
606                        let file_size = FileSize::new(metadata.len());
607                        tree.push(path_string.to_string(), file_size, None, depth + 1, false);
608                    }
609                }
610            }
611            // otherwise, go deeper
612            else if path_type.is_dir() {
613                if let Some(d) = max_depth {
614                    if depth + 1 >= d {
615                        let path = val.path();
616                        let path_string: &str = if let Some(x) = path.as_path().to_str() {
617                            x
618                        } else {
619                            eprintln!(
620                                "{}: skipping invalid unicode filepath at {:?}",
621                                "Warning".yellow(),
622                                path
623                            );
624                            ""
625                        };
626                        let dir_size = { read_no_excludes(&path, None, &None, false) };
627                        tree.push(path_string.to_string(), dir_size, None, depth + 1, true);
628                    } else {
629                        let path = val.path();
630                        let path_string: &str = if let Some(x) = path.as_path().to_str() {
631                            x
632                        } else {
633                            eprintln!(
634                                "{}: skipping invalid unicode filepath at {:?}",
635                                "Warning".yellow(),
636                                path
637                            );
638                            ""
639                        };
640                        let mut subtree = read_all_fast(&path, depth + 1, max_depth);
641                        let dir_size = subtree.file_size;
642                        tree.push(
643                            path_string.to_string(),
644                            dir_size,
645                            Some(&mut subtree),
646                            depth + 1,
647                            true,
648                        );
649                    }
650                } else {
651                    let path = val.path();
652                    let path_string: &str = if let Some(x) = path.as_path().to_str() {
653                        x
654                    } else {
655                        eprintln!(
656                            "{}: skipping invalid unicode filepath at {:?}",
657                            "Warning".yellow(),
658                            path
659                        );
660                        ""
661                    };
662                    let mut subtree = read_all_fast(&path, depth + 1, max_depth);
663                    let dir_size = subtree.file_size;
664                    tree.push(
665                        path_string.to_string(),
666                        dir_size,
667                        Some(&mut subtree),
668                        depth + 1,
669                        true,
670                    );
671                }
672            }
673        }
674    }
675    // if we can't read the directory contents, figure out why
676    // 1: check the path exists
677    else if !in_paths.exists() {
678        eprintln!(
679            "{}: path '{}' does not exist, or you do not have permission to enter.",
680            "Error".red(),
681            &in_paths.display()
682        );
683    }
684    // 2: check the path is actually a directory
685    else if !in_paths.is_dir() {
686        if let Ok(l) = in_paths.metadata() {
687            let size = l.len();
688            let to_formatted = format!("{}", FileSize::new(size));
689            println!("{}\t {}", &to_formatted.green(), in_paths.display());
690        } else {
691            panic!("{}", Internal::IoError);
692        }
693    }
694    // 3: otherwise, give a warning about permissions
695    else {
696        eprintln!(
697            "{}: permission denied for directory: {}",
698            "Warning".yellow(),
699            &in_paths.display()
700        );
701    }
702
703    tree
704}