fennec_common/
import_path.rs

1// Copyright 2023 Gregory Petrosyan <pgregory@pgregory.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at https://mozilla.org/MPL/2.0/.
6
7use anyhow::anyhow;
8use once_cell::sync::Lazy;
9use regex::Regex;
10use std::fmt;
11
12use crate::util;
13
14#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
15pub struct ImportPath {
16    path: String,
17    package: String,
18    has_domain: bool,
19}
20
21pub(crate) static PACKAGE_RE: Lazy<Regex> =
22    Lazy::new(|| Regex::new(r"^[a-z][a-z0-9_]*$").expect(BAD_RE));
23static PATH_ELEM_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-zA-Z0-9._\-~]+$").expect(BAD_RE));
24static PATH_ELEM_DENY_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"~[0-9]+$").expect(BAD_RE));
25static DOMAIN_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[a-z0-9.-]+$").expect(BAD_RE));
26static VERSION_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^v[0-9.]+$").expect(BAD_RE));
27const BAD_RE: &str = "invalid regex literal";
28
29impl ImportPath {
30    // From https://go.dev/ref/mod#go-mod-file-ident:
31    //
32    // A module path must satisfy the following requirements:
33    //
34    // - The path must consist of one or more path elements separated by slashes
35    //   (/, U+002F). It must not begin or end with a slash.
36    // - Each path element is a non-empty string made of up ASCII letters, ASCII
37    //   digits, and limited ASCII punctuation (-, ., _, and ~).
38    // - A path element may not begin or end with a dot (., U+002E).
39    // - The element prefix up to the first dot must not be a reserved file name
40    //   on Windows, regardless of case (CON, com1, NuL, and so on).
41    // - The element prefix up to the first dot must not end with a tilde
42    //   followed by one or more digits (like EXAMPLE~1.COM).
43    //
44    // If the module path appears in a require directive and is not replaced, or
45    // if the module paths appears on the right side of a replace directive, the
46    // go command may need to download modules with that path, and some
47    // additional requirements must be satisfied.
48    //
49    // - The leading path element (up to the first slash, if any), by convention
50    //   a domain name, must contain only lower-case ASCII letters, ASCII digits,
51    //   dots (., U+002E), and dashes (-, U+002D); it must contain at least one
52    //   dot and cannot start with a dash.
53    // - For a final path element of the form /vN where N looks numeric (ASCII
54    //   digits and dots), N must not begin with a leading zero, must not be /v1,
55    //   and must not contain any dots.
56
57    pub fn parse(path: &str) -> Result<ImportPath, anyhow::Error> {
58        Self::do_parse(path, false)
59    }
60
61    pub fn parse_external_dep(path: &str) -> Result<ImportPath, anyhow::Error> {
62        Self::do_parse(path, true)
63    }
64
65    fn do_parse(path: &str, expect_domain: bool) -> Result<ImportPath, anyhow::Error> {
66        let mut last: Option<&str> = None;
67        let mut before_last: Option<&str> = None;
68        let mut has_domain = false;
69        for elem in path.split('/') {
70            Self::check_path_element(elem)?;
71            if last.is_none() {
72                let d = Self::check_domain(elem);
73                has_domain = d.is_ok();
74                if expect_domain {
75                    d?;
76                }
77            } else {
78                before_last = last;
79            }
80            last = Some(elem);
81        }
82
83        let last = last.ok_or(anyhow!(
84            "import path must consist of at least one valid path element"
85        ))?;
86
87        // For consistency, we require version suffix to always be valid.
88        let has_version = Self::check_version_suffix(last)?;
89
90        // On top of Go rules, we additionally require that last
91        // non-version path element is a valid identifier (package name).
92        let package = if has_version {
93            before_last.ok_or(anyhow!("import path must contain a non-version element"))?
94        } else {
95            last
96        };
97        Self::check_package(package)?;
98
99        Ok(ImportPath {
100            path: path.into(),
101            package: package.into(),
102            has_domain,
103        })
104    }
105
106    #[must_use]
107    pub fn as_str(&self) -> &str {
108        &self.path
109    }
110
111    #[must_use]
112    pub fn package(&self) -> &str {
113        &self.package
114    }
115
116    #[must_use]
117    pub fn has_domain(&self) -> bool {
118        self.has_domain
119    }
120
121    pub fn join(&self, rel: &str) -> Result<ImportPath, anyhow::Error> {
122        let mut path = self.path.clone();
123        path += "/";
124        path += rel;
125        Self::do_parse(&path, false)
126    }
127
128    fn check_path_element(elem: &str) -> Result<(), anyhow::Error> {
129        if elem.is_empty() {
130            return Err(anyhow!("import path element must be non-empty"));
131        }
132
133        if elem.starts_with('.') || elem.ends_with('.') {
134            return Err(anyhow!(
135                "import path element must not start nor end with a dot"
136            ));
137        }
138
139        if elem.contains("..") {
140            return Err(anyhow!(
141                "import path element must not contain two dots in a row"
142            ));
143        }
144
145        if !PATH_ELEM_RE.is_match(elem) {
146            let re = PATH_ELEM_RE.as_str();
147            return Err(anyhow!("import path element must match {re}"));
148        }
149
150        let prefix = match elem.split_once('.') {
151            Some(s) => s.0,
152            None => elem,
153        };
154
155        if PATH_ELEM_DENY_RE.is_match(prefix) {
156            let re = PATH_ELEM_DENY_RE.as_str();
157            return Err(anyhow!("import path element prefix must not match {re}"));
158        }
159
160        if util::is_reserved_windows_filename(prefix) {
161            return Err(anyhow!("import path element must not have {prefix} prefix"));
162        }
163
164        Ok(())
165    }
166
167    fn check_domain(elem: &str) -> Result<(), anyhow::Error> {
168        if !DOMAIN_RE.is_match(elem) {
169            let re = DOMAIN_RE.as_str();
170            return Err(anyhow!("import path domain must match {re}"));
171        }
172
173        if elem.starts_with('-') {
174            return Err(anyhow!("import path domain must not start with a dash"));
175        }
176
177        if !elem.contains('.') {
178            return Err(anyhow!("import path domain must contain a dot"));
179        }
180
181        Ok(())
182    }
183
184    fn check_version_suffix(elem: &str) -> Result<bool, anyhow::Error> {
185        if !VERSION_RE.is_match(elem) {
186            return Ok(false);
187        }
188
189        if elem == "v1" {
190            return Err(anyhow!("import path version suffix must not be v1"));
191        }
192
193        if elem.starts_with("v0") {
194            return Err(anyhow!("import path version suffix must not start with v0"));
195        }
196
197        if elem.contains('.') {
198            return Err(anyhow!("import path version suffix must not contain dots"));
199        }
200
201        Ok(true)
202    }
203
204    fn check_package(elem: &str) -> Result<(), anyhow::Error> {
205        if !PACKAGE_RE.is_match(elem) {
206            let re = PACKAGE_RE.as_str();
207            return Err(anyhow!("import path package name must match {re}"));
208        }
209
210        Ok(())
211    }
212}
213
214impl fmt::Display for ImportPath {
215    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
216        self.as_str().fmt(f)
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use std::collections::HashMap;
223
224    use super::*;
225
226    #[test]
227    fn parse_path() -> Result<(), anyhow::Error> {
228        let expected: HashMap<&str, ImportPath> = HashMap::from([
229            (
230                "fmt",
231                ImportPath {
232                    path: "fmt".into(),
233                    package: "fmt".into(),
234                    has_domain: false,
235                },
236            ),
237            (
238                "math/bits",
239                ImportPath {
240                    path: "math/bits".into(),
241                    package: "bits".into(),
242                    has_domain: false,
243                },
244            ),
245            (
246                "math/bits/v2",
247                ImportPath {
248                    path: "math/bits/v2".into(),
249                    package: "bits".into(),
250                    has_domain: false,
251                },
252            ),
253            (
254                "example/hello",
255                ImportPath {
256                    path: "example/hello".into(),
257                    package: "hello".into(),
258                    has_domain: false,
259                },
260            ),
261            (
262                "example.org/hello",
263                ImportPath {
264                    path: "example.org/hello".into(),
265                    package: "hello".into(),
266                    has_domain: true,
267                },
268            ),
269            (
270                "github.com/fennec-lang/fennec",
271                ImportPath {
272                    path: "github.com/fennec-lang/fennec".into(),
273                    package: "fennec".into(),
274                    has_domain: true,
275                },
276            ),
277            (
278                "github.com/fennec-lang/fennec/v2/test",
279                ImportPath {
280                    path: "github.com/fennec-lang/fennec/v2/test".into(),
281                    package: "test".into(),
282                    has_domain: true,
283                },
284            ),
285        ]);
286
287        for p in expected {
288            let r = ImportPath::parse(p.0)?;
289            assert_eq!(r, p.1);
290        }
291
292        let errors = [
293            "",
294            "/test",
295            "test/",
296            "/test/",
297            "test.mod",
298            "v2",
299            "test/v1",
300            "con",
301            "test/com1",
302        ];
303
304        for e in errors {
305            let r = ImportPath::parse(e);
306            assert!(r.is_err());
307        }
308
309        let dep_errors = [
310            "test",
311            "test/hello",
312            "example..org/test",
313            "example.org//test",
314            "example.org/CON.2/test",
315            "example.org/hello~0/test",
316            "example.org/hello~0.com/test",
317            "example.org/hello/test~",
318            "example.org/test/v1",
319            "example.org/test/v0.1",
320            "example.org/test/v2.5",
321        ];
322
323        for e in dep_errors {
324            let r = ImportPath::parse_external_dep(e);
325            assert!(r.is_err());
326        }
327
328        Ok(())
329    }
330}