linguist_build/
lib.rs

1use std::{
2    io::Write,
3    path::{Path, PathBuf},
4};
5
6use linguist::{
7    github::{
8        load_github_documentation, load_github_linguist_heuristics, load_github_linguist_languages,
9        load_github_vendors,
10    },
11    resolver::{HeuristicRule, Language},
12};
13use tempfile::tempdir;
14
15pub static GITHUB_LINGUIST_LANGUAGES_URL: &str =
16    "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/languages.yml";
17pub static GITHUB_LINGUIST_HEURISTICS_URL: &str =
18    "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/heuristics.yml";
19pub static GITHUB_LINGUIST_VENDORS_URL: &str =
20    "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/vendor.yml";
21pub static GITHUB_LINGUIST_DOCUMENTATION_URL: &str =
22    "https://raw.githubusercontent.com/github-linguist/linguist/master/lib/linguist/documentation.yml";
23
24/// The `Config` is used to configure the build process. It can be used to specify the `output path` and
25/// the `definitions` to be generated.
26#[derive(Clone, PartialEq, Eq)]
27pub struct Config {
28    /// The `out_path` is used to specify the path where the generated files will be written to.
29    out_path: PathBuf,
30    /// The `definitions` are used to specify which definitions should be generated.
31    definitions: Vec<Definition>,
32}
33
34impl Default for Config {
35    fn default() -> Self {
36        Config {
37            out_path: PathBuf::from(std::env::var_os("OUT_DIR").unwrap()),
38            definitions: vec![],
39        }
40    }
41}
42
43/// A `Definition` is used to specify the `name`, [`Location`], and the [`Kind`] of an artifact
44/// to generate. The `Location` can either be a `URL` or a `Path`. The `Kind` specifies the type of
45/// artifact to generate, e.g., Languages, Heuristics, Vendors, or Documentation.
46#[derive(Debug, PartialEq, Eq, Clone)]
47pub struct Definition {
48    pub name: String,
49    pub location: Location,
50    pub kind: Kind,
51}
52
53/// Location is used to specify the path to the respective [`Definition`].
54#[derive(Debug, PartialEq, Eq, Clone)]
55pub enum Location {
56    /// The `Path` variant is used to specify the path to the respective [`Definition`]. It must be
57    /// available locally on the filesystem.
58    Path(PathBuf),
59    /// The `URL` variant is used to specify the URL to the respective [`Definition`]. It will be
60    /// downloaded from the given URL.
61    URL(String),
62}
63
64/// Kind is used to specify the type of artifact to generate, e.g., Languages, Heuristics, Vendors,
65/// or Documentation.
66#[derive(Debug, PartialEq, Eq, Clone)]
67pub enum Kind {
68    Languages,
69    Heuristics,
70    Vendors,
71    Documentation,
72}
73
74impl Config {
75    /// Creates a new `Config` with default options.
76    pub fn new() -> Self {
77        Self::default()
78    }
79
80    /// Add a [`Definition`] to the `Config`.
81    pub fn add_definition(&mut self, definition: Definition) -> &mut Self {
82        self.definitions.push(definition);
83        self
84    }
85
86    /// Used internally to download a definition from the given `url` and write it to the given
87    /// `out_dir`.
88    fn download_from_url(&self, out_dir: &Path, url: &str) -> Result<PathBuf, ()> {
89        match reqwest::blocking::get(url) {
90            Ok(result) => {
91                let path = out_dir.join("file.yml");
92                let mut file = std::fs::File::create(path.clone()).expect("cannot create tempfile");
93                std::io::copy(
94                    &mut result
95                        .text()
96                        .expect("cannot unwrap response file")
97                        .as_bytes(),
98                    &mut file,
99                )
100                .expect("cannot copy reponse into file");
101                Ok(path)
102            }
103            Err(_) => Err(()),
104        }
105    }
106
107    /// Generate a [`Language`] definition and writes it to the `out_path`.
108    fn generate_language(&self, name: &str, location: Location) {
109        let tmpdir = tempdir().expect("failed to create a tempdir");
110        let def_file = match location {
111            Location::URL(url) => self.download_from_url(tmpdir.path(), &url).unwrap(),
112            Location::Path(path) => path,
113        };
114
115        let data = load_github_linguist_languages(def_file).unwrap();
116        let mut entries: Vec<String> = Vec::new();
117        for item in data.iter() {
118            entries.push(write_language_definition(item));
119        }
120
121        let target_path = self.out_path.clone();
122        let mut target_file = std::fs::File::create(target_path.join(name)).unwrap();
123        _ = target_file.write_all("use linguist::serde::StaticLanguage;\n\npub static LANGUAGES: &[&StaticLanguage] = &[\n".to_string().as_bytes());
124        for str in entries {
125            _ = target_file.write_all(format!("    {},\n", str).as_bytes());
126        }
127        _ = target_file.write_all("];\n".to_string().as_bytes());
128        _ = target_file.flush();
129    }
130
131    /// Generate a [`HeuristicRule`] definition and writes it to the `out_path`.
132    fn generate_heuristics(&self, name: &str, location: Location) {
133        let tmpdir = tempdir().expect("failed to create a tempdir");
134        let def_file = match location {
135            Location::URL(url) => self.download_from_url(tmpdir.path(), &url).unwrap(),
136            Location::Path(path) => path,
137        };
138
139        let data = load_github_linguist_heuristics(def_file).unwrap();
140        let mut entries: Vec<String> = Vec::new();
141        for item in data.iter() {
142            entries.push(write_heuristic_definition(item));
143        }
144
145        let target_path = self.out_path.clone();
146        let mut target_file = std::fs::File::create(target_path.join(name)).unwrap();
147        _ = target_file.write_all("use linguist::serde::StaticHeuristicRule;\n\npub static HEURISTICS: &[&StaticHeuristicRule] = &[\n".to_string().as_bytes());
148        for str in entries {
149            _ = target_file.write_all(format!("    {},\n", str).as_bytes());
150        }
151        _ = target_file.write_all("];\n".to_string().as_bytes());
152        _ = target_file.flush();
153    }
154
155    /// Generate a `Vendor` definition and writes it to the `out_path`.
156    fn generate_vendors(&self, name: &str, location: Location) {
157        let tmpdir = tempdir().expect("failed to create a tempdir");
158        let def_file = match location {
159            Location::URL(url) => self.download_from_url(tmpdir.path(), &url).unwrap(),
160            Location::Path(path) => path,
161        };
162
163        let data = load_github_vendors(def_file).unwrap();
164
165        let target_path = self.out_path.clone();
166        let mut target_file = std::fs::File::create(target_path.join(name)).unwrap();
167        _ = target_file
168            .write_all(format!("pub static VENDORS: &[&str; {}] = &[", data.len()).as_bytes());
169        for str in data {
170            _ = target_file.write_all(format!("    r\"{}\",\n", str).as_bytes());
171        }
172
173        _ = target_file.write_all("];\n".to_string().as_bytes());
174        _ = target_file.flush();
175    }
176
177    /// Generate a `Documentation` definition and writes it to the `out_path`.
178    fn generate_documentation(&self, name: &str, location: Location) {
179        let tmpdir = tempdir().expect("failed to create a tempdir");
180        let def_file = match location {
181            Location::URL(url) => self.download_from_url(tmpdir.path(), &url).unwrap(),
182            Location::Path(path) => path,
183        };
184
185        let data = load_github_documentation(def_file).unwrap();
186
187        let target_path = self.out_path.clone();
188        let mut target_file = std::fs::File::create(target_path.join(name)).unwrap();
189        _ = target_file.write_all(
190            format!("pub static DOCUMENTATION: &[&str; {}] = &[", data.len()).as_bytes(),
191        );
192        for str in data {
193            _ = target_file.write_all(format!("    r\"{}\",\n", str).as_bytes());
194        }
195
196        _ = target_file.write_all("];\n".to_string().as_bytes());
197        _ = target_file.flush();
198    }
199
200    /// Generates all configured definitions and writes them to the `out_path`.
201    pub fn generate(&self) {
202        for def in self.definitions.iter() {
203            match def.kind {
204                Kind::Languages => self.generate_language(&def.name, def.location.clone()),
205                Kind::Heuristics => self.generate_heuristics(&def.name, def.location.clone()),
206                Kind::Vendors => self.generate_vendors(&def.name, def.location.clone()),
207                Kind::Documentation => self.generate_documentation(&def.name, def.location.clone()),
208            };
209        }
210    }
211}
212
213impl std::fmt::Debug for Config {
214    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215        f.debug_struct("Config")
216            .field("definitions", &self.definitions)
217            .finish()
218    }
219}
220
221fn write_language_definition(lang: &Language) -> String {
222    let mut str = String::new();
223    str.push_str("&StaticLanguage {");
224
225    if let Some(parent) = &lang.parent {
226        str.push_str(format!("parent: Some(\"{}\"), ", parent).as_str());
227    } else {
228        str.push_str("parent: None, ");
229    }
230
231    str.push_str(format!("name: \"{}\", ", &lang.name).as_str());
232
233    if !lang.aliases.is_empty() {
234        str.push_str(
235            format!(
236                "aliases: Some(&[{}]), ",
237                &lang
238                    .aliases
239                    .iter()
240                    .map(|s| format!("\"{}\"", s))
241                    .collect::<Vec<String>>()
242                    .join(", ")
243            )
244            .as_str(),
245        );
246    } else {
247        str.push_str("aliases: None, ");
248    }
249
250    // str.push_str(format!("scope: Scope::{}, ", &lang.scope.to_string()).as_str());
251    str.push_str(format!("scope: \"{}\", ", &lang.scope.to_string()).as_str());
252
253    if !lang.extensions.is_empty() {
254        str.push_str(
255            format!(
256                "extensions: Some(&[{}]), ",
257                &lang
258                    .extensions
259                    .iter()
260                    .map(|s| format!("\"{}\"", s.to_str().expect("cannot unwrap extension")))
261                    .collect::<Vec<String>>()
262                    .join(", ")
263            )
264            .as_str(),
265        );
266    } else {
267        str.push_str("extensions: None, ");
268    }
269
270    if !lang.filenames.is_empty() {
271        str.push_str(
272            format!(
273                "filenames: Some(&[{}]), ",
274                &lang
275                    .filenames
276                    .iter()
277                    .map(|s| format!("\"{}\"", s.to_str().expect("cannot unwrap filename")))
278                    .collect::<Vec<String>>()
279                    .join(", ")
280            )
281            .as_str(),
282        );
283    } else {
284        str.push_str("filenames: None, ");
285    }
286
287    if !lang.interpreters.is_empty() {
288        str.push_str(
289            format!(
290                "interpreters: Some(&[{}]), ",
291                &lang
292                    .interpreters
293                    .iter()
294                    .map(|s| format!("\"{}\"", s))
295                    .collect::<Vec<String>>()
296                    .join(", ")
297            )
298            .as_str(),
299        );
300    } else {
301        str.push_str("interpreters: None, ");
302    }
303
304    if let Some(color) = &lang.color {
305        str.push_str(format!("color: Some(\"{}\") ", color).as_str());
306    } else {
307        str.push_str("color: None ");
308    }
309
310    str.push('}');
311    str
312}
313
314/// Convert a [`HeuristicRule`] into a string representation (as rust code).
315fn write_heuristic_definition(rule: &HeuristicRule) -> String {
316    let mut str = String::new();
317    str.push_str("&StaticHeuristicRule {");
318
319    str.push_str(format!("language: \"{}\", ", &rule.language).as_str());
320
321    if !rule.extensions.is_empty() {
322        str.push_str(
323            format!(
324                "extensions: &[{}], ",
325                &rule
326                    .extensions
327                    .iter()
328                    .map(|s| format!("\"{}\"", s.to_str().expect("cannot unwrap extension")))
329                    .collect::<Vec<String>>()
330                    .join(", ")
331            )
332            .as_str(),
333        );
334    } else {
335        str.push_str("extensions: &[], ");
336    }
337
338    if !rule.patterns.is_empty() {
339        str.push_str(
340            format!(
341                "patterns: &[{}], ",
342                &rule
343                    .patterns
344                    .iter()
345                    .map(|s| format!("\"{}\"", s.replace('\\', "\\\\").replace('\"', "\\\"")))
346                    .collect::<Vec<String>>()
347                    .join(", ")
348            )
349            .as_str(),
350        );
351    } else {
352        str.push_str("patterns: &[] ");
353    }
354
355    str.push('}');
356    str
357}