use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use super::types::{CrawledPackage, CrawlerOptions};
pub fn parse_cargo_toml_name_version(content: &str) -> Option<(String, String)> {
let mut in_package = false;
let mut name: Option<String> = None;
let mut version: Option<String> = None;
for line in content.lines() {
let trimmed = line.trim();
if trimmed.starts_with('#') || trimmed.is_empty() {
continue;
}
if trimmed.starts_with('[') {
if trimmed == "[package]" {
in_package = true;
} else {
if in_package {
break;
}
}
continue;
}
if !in_package {
continue;
}
if let Some(val) = extract_string_value(trimmed, "name") {
name = Some(val);
} else if let Some(val) = extract_string_value(trimmed, "version") {
version = Some(val);
} else if trimmed.starts_with("version") && trimmed.contains("workspace") {
return None;
}
if name.is_some() && version.is_some() {
break;
}
}
match (name, version) {
(Some(n), Some(v)) if !n.is_empty() && !v.is_empty() => Some((n, v)),
_ => None,
}
}
fn extract_string_value(line: &str, key: &str) -> Option<String> {
let rest = line.strip_prefix(key)?;
let rest = rest.trim_start();
let rest = rest.strip_prefix('=')?;
let rest = rest.trim_start();
let rest = rest.strip_prefix('"')?;
let end = rest.find('"')?;
Some(rest[..end].to_string())
}
pub struct CargoCrawler;
impl CargoCrawler {
pub fn new() -> Self {
Self
}
pub async fn get_crate_source_paths(
&self,
options: &CrawlerOptions,
) -> Result<Vec<PathBuf>, std::io::Error> {
if options.global || options.global_prefix.is_some() {
if let Some(ref custom) = options.global_prefix {
return Ok(vec![custom.clone()]);
}
return Ok(Self::get_registry_src_paths().await);
}
let vendor_dir = options.cwd.join("vendor");
if is_dir(&vendor_dir).await {
return Ok(vec![vendor_dir]);
}
let has_cargo_toml = tokio::fs::metadata(options.cwd.join("Cargo.toml"))
.await
.is_ok();
let has_cargo_lock = tokio::fs::metadata(options.cwd.join("Cargo.lock"))
.await
.is_ok();
if has_cargo_toml || has_cargo_lock {
return Ok(Self::get_registry_src_paths().await);
}
Ok(Vec::new())
}
pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
let mut packages = Vec::new();
let mut seen = HashSet::new();
let src_paths = self.get_crate_source_paths(options).await.unwrap_or_default();
for src_path in &src_paths {
let found = self.scan_crate_source(src_path, &mut seen).await;
packages.extend(found);
}
packages
}
pub async fn find_by_purls(
&self,
src_path: &Path,
purls: &[String],
) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
let mut result: HashMap<String, CrawledPackage> = HashMap::new();
for purl in purls {
if let Some((name, version)) = crate::utils::purl::parse_cargo_purl(purl) {
let registry_dir = src_path.join(format!("{name}-{version}"));
if self
.verify_crate_at_path(®istry_dir, name, version)
.await
{
result.insert(
purl.clone(),
CrawledPackage {
name: name.to_string(),
version: version.to_string(),
namespace: None,
purl: purl.clone(),
path: registry_dir,
},
);
continue;
}
let vendor_dir = src_path.join(name);
if self
.verify_crate_at_path(&vendor_dir, name, version)
.await
{
result.insert(
purl.clone(),
CrawledPackage {
name: name.to_string(),
version: version.to_string(),
namespace: None,
purl: purl.clone(),
path: vendor_dir,
},
);
}
}
}
Ok(result)
}
async fn get_registry_src_paths() -> Vec<PathBuf> {
let cargo_home = Self::cargo_home();
let registry_src = cargo_home.join("registry").join("src");
let mut paths = Vec::new();
let mut entries = match tokio::fs::read_dir(®istry_src).await {
Ok(rd) => rd,
Err(_) => return paths,
};
while let Ok(Some(entry)) = entries.next_entry().await {
let ft = match entry.file_type().await {
Ok(ft) => ft,
Err(_) => continue,
};
if ft.is_dir() {
paths.push(registry_src.join(entry.file_name()));
}
}
paths
}
async fn scan_crate_source(
&self,
src_path: &Path,
seen: &mut HashSet<String>,
) -> Vec<CrawledPackage> {
let mut results = Vec::new();
let mut entries = match tokio::fs::read_dir(src_path).await {
Ok(rd) => rd,
Err(_) => return results,
};
let mut entry_list = Vec::new();
while let Ok(Some(entry)) = entries.next_entry().await {
entry_list.push(entry);
}
for entry in entry_list {
let ft = match entry.file_type().await {
Ok(ft) => ft,
Err(_) => continue,
};
if !ft.is_dir() {
continue;
}
let dir_name = entry.file_name();
let dir_name_str = dir_name.to_string_lossy();
if dir_name_str.starts_with('.') {
continue;
}
let crate_path = src_path.join(&*dir_name_str);
if let Some(pkg) =
self.read_crate_cargo_toml(&crate_path, &dir_name_str, seen).await
{
results.push(pkg);
}
}
results
}
async fn read_crate_cargo_toml(
&self,
crate_path: &Path,
dir_name: &str,
seen: &mut HashSet<String>,
) -> Option<CrawledPackage> {
let cargo_toml_path = crate_path.join("Cargo.toml");
let content = tokio::fs::read_to_string(&cargo_toml_path).await.ok()?;
let (name, version) = match parse_cargo_toml_name_version(&content) {
Some(nv) => nv,
None => {
Self::parse_dir_name_version(dir_name)?
}
};
let purl = crate::utils::purl::build_cargo_purl(&name, &version);
if seen.contains(&purl) {
return None;
}
seen.insert(purl.clone());
Some(CrawledPackage {
name,
version,
namespace: None,
purl,
path: crate_path.to_path_buf(),
})
}
async fn verify_crate_at_path(&self, path: &Path, name: &str, version: &str) -> bool {
let cargo_toml_path = path.join("Cargo.toml");
let content = match tokio::fs::read_to_string(&cargo_toml_path).await {
Ok(c) => c,
Err(_) => return false,
};
match parse_cargo_toml_name_version(&content) {
Some((n, v)) => n == name && v == version,
None => {
let dir_name = path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
if let Some((parsed_name, parsed_version)) =
Self::parse_dir_name_version(&dir_name)
{
parsed_name == name && parsed_version == version
} else {
false
}
}
}
}
fn parse_dir_name_version(dir_name: &str) -> Option<(String, String)> {
let mut split_idx = None;
for (i, _) in dir_name.match_indices('-') {
if dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit()) {
split_idx = Some(i);
}
}
let idx = split_idx?;
let name = &dir_name[..idx];
let version = &dir_name[idx + 1..];
if name.is_empty() || version.is_empty() {
return None;
}
Some((name.to_string(), version.to_string()))
}
fn cargo_home() -> PathBuf {
if let Ok(cargo_home) = std::env::var("CARGO_HOME") {
return PathBuf::from(cargo_home);
}
let home = std::env::var("HOME")
.or_else(|_| std::env::var("USERPROFILE"))
.unwrap_or_else(|_| "~".to_string());
PathBuf::from(home).join(".cargo")
}
}
impl Default for CargoCrawler {
fn default() -> Self {
Self::new()
}
}
async fn is_dir(path: &Path) -> bool {
tokio::fs::metadata(path)
.await
.map(|m| m.is_dir())
.unwrap_or(false)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_cargo_toml_basic() {
let content = r#"
[package]
name = "serde"
version = "1.0.200"
edition = "2021"
"#;
let (name, version) = parse_cargo_toml_name_version(content).unwrap();
assert_eq!(name, "serde");
assert_eq!(version, "1.0.200");
}
#[test]
fn test_parse_cargo_toml_with_comments() {
let content = r#"
# This is a comment
[package]
name = "tokio" # inline comment ignored since we stop at first "
version = "1.38.0"
"#;
let (name, version) = parse_cargo_toml_name_version(content).unwrap();
assert_eq!(name, "tokio");
assert_eq!(version, "1.38.0");
}
#[test]
fn test_parse_cargo_toml_workspace_version() {
let content = r#"
[package]
name = "my-crate"
version.workspace = true
"#;
assert!(parse_cargo_toml_name_version(content).is_none());
}
#[test]
fn test_parse_cargo_toml_missing_fields() {
let content = r#"
[package]
name = "incomplete"
"#;
assert!(parse_cargo_toml_name_version(content).is_none());
}
#[test]
fn test_parse_cargo_toml_no_package_section() {
let content = r#"
[dependencies]
serde = "1.0"
"#;
assert!(parse_cargo_toml_name_version(content).is_none());
}
#[test]
fn test_parse_cargo_toml_stops_at_next_section() {
let content = r#"
[package]
name = "foo"
[dependencies]
version = "fake"
"#;
assert!(parse_cargo_toml_name_version(content).is_none());
}
#[test]
fn test_parse_dir_name_version() {
assert_eq!(
CargoCrawler::parse_dir_name_version("serde-1.0.200"),
Some(("serde".to_string(), "1.0.200".to_string()))
);
assert_eq!(
CargoCrawler::parse_dir_name_version("serde-json-1.0.120"),
Some(("serde-json".to_string(), "1.0.120".to_string()))
);
assert_eq!(
CargoCrawler::parse_dir_name_version("tokio-1.38.0"),
Some(("tokio".to_string(), "1.38.0".to_string()))
);
assert!(CargoCrawler::parse_dir_name_version("no-version-here").is_none());
assert!(CargoCrawler::parse_dir_name_version("noversion").is_none());
}
#[tokio::test]
async fn test_find_by_purls_registry_layout() {
let dir = tempfile::tempdir().unwrap();
let serde_dir = dir.path().join("serde-1.0.200");
tokio::fs::create_dir_all(&serde_dir).await.unwrap();
tokio::fs::write(
serde_dir.join("Cargo.toml"),
"[package]\nname = \"serde\"\nversion = \"1.0.200\"\n",
)
.await
.unwrap();
let crawler = CargoCrawler::new();
let purls = vec![
"pkg:cargo/serde@1.0.200".to_string(),
"pkg:cargo/tokio@1.38.0".to_string(),
];
let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains_key("pkg:cargo/serde@1.0.200"));
assert!(!result.contains_key("pkg:cargo/tokio@1.38.0"));
}
#[tokio::test]
async fn test_find_by_purls_vendor_layout() {
let dir = tempfile::tempdir().unwrap();
let serde_dir = dir.path().join("serde");
tokio::fs::create_dir_all(&serde_dir).await.unwrap();
tokio::fs::write(
serde_dir.join("Cargo.toml"),
"[package]\nname = \"serde\"\nversion = \"1.0.200\"\n",
)
.await
.unwrap();
let crawler = CargoCrawler::new();
let purls = vec!["pkg:cargo/serde@1.0.200".to_string()];
let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains_key("pkg:cargo/serde@1.0.200"));
}
#[tokio::test]
async fn test_crawl_all_tempdir() {
let dir = tempfile::tempdir().unwrap();
let serde_dir = dir.path().join("serde-1.0.200");
tokio::fs::create_dir_all(&serde_dir).await.unwrap();
tokio::fs::write(
serde_dir.join("Cargo.toml"),
"[package]\nname = \"serde\"\nversion = \"1.0.200\"\n",
)
.await
.unwrap();
let tokio_dir = dir.path().join("tokio-1.38.0");
tokio::fs::create_dir_all(&tokio_dir).await.unwrap();
tokio::fs::write(
tokio_dir.join("Cargo.toml"),
"[package]\nname = \"tokio\"\nversion = \"1.38.0\"\n",
)
.await
.unwrap();
let crawler = CargoCrawler::new();
let options = CrawlerOptions {
cwd: dir.path().to_path_buf(),
global: false,
global_prefix: Some(dir.path().to_path_buf()),
batch_size: 100,
};
let packages = crawler.crawl_all(&options).await;
assert_eq!(packages.len(), 2);
let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect();
assert!(purls.contains("pkg:cargo/serde@1.0.200"));
assert!(purls.contains("pkg:cargo/tokio@1.38.0"));
}
#[tokio::test]
async fn test_crawl_all_deduplication() {
let dir = tempfile::tempdir().unwrap();
let dir1 = dir.path().join("serde-1.0.200");
tokio::fs::create_dir_all(&dir1).await.unwrap();
tokio::fs::write(
dir1.join("Cargo.toml"),
"[package]\nname = \"serde\"\nversion = \"1.0.200\"\n",
)
.await
.unwrap();
let crawler = CargoCrawler::new();
let options = CrawlerOptions {
cwd: dir.path().to_path_buf(),
global: false,
global_prefix: Some(dir.path().to_path_buf()),
batch_size: 100,
};
let packages = crawler.crawl_all(&options).await;
assert_eq!(packages.len(), 1);
assert_eq!(packages[0].purl, "pkg:cargo/serde@1.0.200");
}
#[tokio::test]
async fn test_crawl_workspace_version_fallback() {
let dir = tempfile::tempdir().unwrap();
let crate_dir = dir.path().join("my-crate-0.5.0");
tokio::fs::create_dir_all(&crate_dir).await.unwrap();
tokio::fs::write(
crate_dir.join("Cargo.toml"),
"[package]\nname = \"my-crate\"\nversion.workspace = true\n",
)
.await
.unwrap();
let crawler = CargoCrawler::new();
let options = CrawlerOptions {
cwd: dir.path().to_path_buf(),
global: false,
global_prefix: Some(dir.path().to_path_buf()),
batch_size: 100,
};
let packages = crawler.crawl_all(&options).await;
assert_eq!(packages.len(), 1);
assert_eq!(packages[0].purl, "pkg:cargo/my-crate@0.5.0");
}
#[tokio::test]
async fn test_vendor_layout_via_get_crate_source_paths() {
let dir = tempfile::tempdir().unwrap();
let vendor = dir.path().join("vendor");
tokio::fs::create_dir_all(&vendor).await.unwrap();
let serde_dir = vendor.join("serde");
tokio::fs::create_dir_all(&serde_dir).await.unwrap();
tokio::fs::write(
serde_dir.join("Cargo.toml"),
"[package]\nname = \"serde\"\nversion = \"1.0.200\"\n",
)
.await
.unwrap();
let crawler = CargoCrawler::new();
let options = CrawlerOptions {
cwd: dir.path().to_path_buf(),
global: false,
global_prefix: None,
batch_size: 100,
};
let paths = crawler.get_crate_source_paths(&options).await.unwrap();
assert_eq!(paths.len(), 1);
assert_eq!(paths[0], vendor);
}
}