use crate::detectors::network::patterns::RE_HTTP_URL;
const URL_TRIM_TRAILING: &[char] = &[
'"', '\'', ')', '>', '<', ',', '.', ';', ':', '!', '?', ']', '}',
];
pub(crate) fn extract_http_urls(content: &str) -> Vec<String> {
RE_HTTP_URL
.find_matches(content)
.into_iter()
.map(|m| {
m.matched_text
.trim_end_matches(URL_TRIM_TRAILING)
.to_string()
})
.collect()
}
pub(crate) fn is_common_lockfile_source(url: &str) -> bool {
[
"registry.npmjs.org",
"registry.yarnpkg.com",
"repo.yarnpkg.com",
"mirrors.tencentyun.com",
"registry.npmmirror.com",
"registry.yarnpkg.cn",
"npm.pkg.github.com",
]
.iter()
.any(|host| host_matches_url(host, url))
}
fn host_matches_url(host: &str, url: &str) -> bool {
url.contains(&format!("://{host}/"))
|| url.contains(&format!("://{host}:"))
|| url.contains(&format!("@{host}/"))
|| url.contains(&format!("@{host}:"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_http_urls_strips_trailing_markdown_reference_bracket() {
let urls = extract_http_urls("see <https://example.com/path> for details");
assert!(
urls.iter().any(|u| u == "https://example.com/path"),
"trailing `>` must be stripped; got {urls:?}"
);
assert!(
!urls.iter().any(|u| u.ends_with('>')),
"no extracted URL may end with `>`; got {urls:?}"
);
}
#[test]
fn is_common_lockfile_source_allows_github_packages() {
assert!(is_common_lockfile_source(
"https://npm.pkg.github.com/@myorg/tool/-/tool-1.0.0.tgz"
));
}
#[test]
fn is_common_lockfile_source_rejects_arbitrary_hosts() {
assert!(!is_common_lockfile_source(
"https://attacker.example.com/-/x.tgz"
));
}
#[test]
fn is_common_lockfile_source_rejects_substring_host_evasion() {
assert!(!is_common_lockfile_source(
"https://evil.registry.npmjs.org.attacker.com/pkg/-/pkg-1.0.0.tgz"
));
assert!(!is_common_lockfile_source(
"https://my-registry.npmmirror.com.evil.com/pkg/-/pkg-1.0.0.tgz"
));
}
#[test]
fn host_matches_url_matches_authenticated_registry_urls() {
assert!(
host_matches_url(
"registry.npmjs.org",
"https://user:pass@registry.npmjs.org/pkg"
),
"authenticated URL with @ must match known host"
);
assert!(
host_matches_url(
"registry.npmjs.org",
"https://user:pass@registry.npmjs.org:443/pkg"
),
"authenticated URL with @ and port must match known host"
);
}
}