1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
//! The structure responsible for managing IO and the files implementation for codespan.

use codespan_reporting::files::Files;
use glob::Paths;
use hashbrown::HashMap;
use std::borrow::Cow;
use std::fs::read_to_string;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread::Builder;
use walkdir::WalkDir;

// 0 is reserved for "no file id" (virtual files)
static FILE_ID_COUNTER: AtomicUsize = AtomicUsize::new(1);

/// A list of ignored-by-default directory/file names
const IGNORED: [&str; 1] = ["node_modules"];
/// A list of the extension of files linted
const LINTED_FILES: [&str; 2] = ["js", "mjs"];

/// The structure for managing IO to and from the core runner.
/// The walker uses multithreaded IO, spawning a thread for every file being loaded.
// TODO: use IO_Uring for linux
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileWalker {
    pub files: HashMap<usize, JsFile>,
}

impl<'a> Files<'a> for FileWalker {
    type Name = Cow<'a, str>;
    type Source = Cow<'a, str>;
    type FileId = usize;

    fn name(&'a self, id: Self::FileId) -> Option<Cow<'a, str>> {
        let entry = self.files.get(&id)?;
        Some(
            entry
                .path
                .as_ref()
                .map(|p| p.to_string_lossy())
                .unwrap_or_else(|| (&entry.name).into()),
        )
    }

    fn source(&'a self, id: Self::FileId) -> Option<Cow<'a, str>> {
        let entry = self.files.get(&id)?;
        Some((&entry.source).into())
    }

    fn line_index(&self, id: Self::FileId, byte_index: usize) -> Option<usize> {
        self.files.get(&id)?.line_index(byte_index)
    }

    fn line_range(&self, id: Self::FileId, line_index: usize) -> Option<Range<usize>> {
        let line_start = self.line_start(id, line_index)?;
        let next_line_start = self.line_start(id, line_index + 1)?;

        Some(line_start..next_line_start)
    }
}

impl FileWalker {
    pub fn empty() -> Self {
        Self {
            files: HashMap::new(),
        }
    }

    /// Make a new file walker from a compiled glob pattern. This also
    /// skips any unreadable files/dirs
    pub fn from_glob(paths: Paths) -> Self {
        let mut threads = Vec::new();
        for entry in paths.filter_map(Result::ok) {
            if IGNORED.contains(
                &entry
                    .file_name()
                    .map(|x| x.to_string_lossy().to_string())
                    .unwrap_or_default()
                    .as_str(),
            ) {
                continue;
            }

            for file in WalkDir::new(entry).into_iter().filter_map(Result::ok) {
                if !LINTED_FILES.contains(
                    &file
                        .path()
                        .extension()
                        .map(|osstr| osstr.to_string_lossy().to_string())
                        .unwrap_or_default()
                        .as_str(),
                ) {
                    continue;
                }
                if IGNORED.contains(&file.file_name().to_string_lossy().to_string().as_str()) {
                    continue;
                }
                // Give each io thread a name so we can potentially debug any io failures easily
                let thread = Builder::new()
                    .name(format!("io-{}", file.file_name().to_string_lossy()))
                    .spawn(move || {
                        (
                            read_to_string(file.path()).expect("Failed to read file"),
                            file.path().to_owned(),
                        )
                    })
                    .expect("Failed to spawn IO thread");
                threads.push(thread);
            }
        }

        let jsfiles = threads
            .into_iter()
            .map(|handle| handle.join())
            .filter_map(Result::ok)
            .map(|(src, path)| JsFile::new_concrete(src, path))
            .map(|file| (file.id, file))
            .collect();

        Self { files: jsfiles }
    }

    pub fn line_start(&self, id: usize, line_index: usize) -> Option<usize> {
        self.files.get(&id)?.line_start(line_index)
    }
}

/// A structure representing either a concrete (in-disk) or virtual (temporary/non-disk) js or mjs file.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct JsFile {
    pub source: String,
    /// The name of the file.
    pub name: String,
    /// The path in disk if this is a concrete file.
    pub path: Option<PathBuf>,
    /// The codespan id assigned to this file used to refer back to it.
    pub id: usize,
    /// Whether this is a js or mjs file (script vs module).
    pub kind: JsFileKind,
    /// The cached line start locations in this file.
    pub line_starts: Vec<usize>,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum JsFileKind {
    Script,
    Module,
}

impl JsFile {
    pub fn new_concrete(source: String, path: PathBuf) -> Self {
        let id = FILE_ID_COUNTER.fetch_add(1, Ordering::SeqCst);
        let kind = if path
            .extension()
            .map_or("".into(), |ext| ext.to_string_lossy())
            == "mjs"
        {
            JsFileKind::Module
        } else {
            JsFileKind::Script
        };
        let line_starts = Self::line_starts(&source).collect();

        Self {
            source,
            name: path
                .file_name()
                .map_or(String::new(), |osstr| osstr.to_string_lossy().to_string()),
            path: Some(path),
            id,
            kind,
            line_starts,
        }
    }

    fn line_starts<'a>(source: &'a str) -> impl Iterator<Item = usize> + 'a {
        std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
    }

    pub fn line_start(&self, line_index: usize) -> Option<usize> {
        use std::cmp::Ordering;

        match line_index.cmp(&self.line_starts.len()) {
            Ordering::Less => self.line_starts.get(line_index).cloned(),
            Ordering::Equal => Some(self.source.len()),
            Ordering::Greater => None,
        }
    }

    pub fn line_index(&self, byte_index: usize) -> Option<usize> {
        match self.line_starts.binary_search(&byte_index) {
            Ok(line) => Some(line),
            Err(next_line) => Some(next_line - 1),
        }
    }

    pub fn line_col_to_index(&self, line: usize, column: usize) -> Option<usize> {
        let start = self.line_start(line)?;
        Some(start + column)
    }
}