use std::collections::HashMap;
use std::path::{Path, PathBuf};
use log::warn;
use crate::models::{DatasourceId, FileInfo, Package, PackageData, TopLevelDependency};
pub fn assemble_npm_workspaces(
files: &mut [FileInfo],
packages: &mut Vec<Package>,
dependencies: &mut Vec<TopLevelDependency>,
) {
let workspace_roots = find_workspace_roots(files);
if workspace_roots.is_empty() {
return;
}
for workspace_root in workspace_roots {
process_workspace(files, packages, dependencies, &workspace_root);
}
}
struct WorkspaceRoot {
root_dir: PathBuf,
root_package_json_idx: Option<usize>,
pnpm_workspace_yaml_idx: Option<usize>,
patterns: Vec<String>,
}
fn find_workspace_roots(files: &[FileInfo]) -> Vec<WorkspaceRoot> {
let mut roots = Vec::new();
let mut seen_roots: HashMap<PathBuf, WorkspaceRoot> = HashMap::new();
for (idx, file) in files.iter().enumerate() {
let path = Path::new(&file.path);
let file_name = if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
name
} else {
continue;
};
if file_name != "package.json" {
continue;
}
for pkg_data in &file.package_data {
if pkg_data.datasource_id != Some(DatasourceId::NpmPackageJson) {
continue;
}
if let Some(workspaces) = extract_workspaces(pkg_data)
&& let Some(parent) = path.parent()
{
let root_dir = parent.to_path_buf();
seen_roots.insert(
root_dir.clone(),
WorkspaceRoot {
root_dir,
root_package_json_idx: Some(idx),
pnpm_workspace_yaml_idx: None,
patterns: workspaces,
},
);
}
}
}
for (idx, file) in files.iter().enumerate() {
let path = Path::new(&file.path);
let file_name = if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
name
} else {
continue;
};
if file_name != "pnpm-workspace.yaml" {
continue;
}
for pkg_data in &file.package_data {
if pkg_data.datasource_id != Some(DatasourceId::PnpmWorkspaceYaml) {
continue;
}
if let Some(workspaces) = extract_workspaces(pkg_data)
&& let Some(parent) = path.parent()
{
let root_dir = parent.to_path_buf();
let root_package_json_idx = find_root_package_json_index(files, &root_dir);
if let Some(existing) = seen_roots.get_mut(&root_dir) {
existing.pnpm_workspace_yaml_idx = Some(idx);
if existing.root_package_json_idx.is_none() {
existing.root_package_json_idx = root_package_json_idx;
}
if existing.patterns.is_empty() {
existing.patterns = workspaces;
}
} else {
seen_roots.insert(
root_dir.clone(),
WorkspaceRoot {
root_dir,
root_package_json_idx,
pnpm_workspace_yaml_idx: Some(idx),
patterns: workspaces,
},
);
}
}
}
}
roots.extend(seen_roots.into_values());
roots.sort_by(|left, right| left.root_dir.cmp(&right.root_dir));
roots
}
fn find_root_package_json_index(files: &[FileInfo], root_dir: &Path) -> Option<usize> {
files.iter().position(|file| {
let path = Path::new(&file.path);
path.parent() == Some(root_dir)
&& path.file_name().and_then(|name| name.to_str()) == Some("package.json")
})
}
fn extract_workspaces(pkg_data: &PackageData) -> Option<Vec<String>> {
let extra_data = pkg_data.extra_data.as_ref()?;
let workspaces_value = extra_data.get("workspaces")?;
extract_workspace_patterns(workspaces_value)
}
fn extract_workspace_patterns(value: &serde_json::Value) -> Option<Vec<String>> {
let patterns = match value {
serde_json::Value::String(pattern) => vec![pattern.clone()],
serde_json::Value::Array(patterns) => patterns
.iter()
.filter_map(|pattern| pattern.as_str().map(str::to_string))
.collect(),
serde_json::Value::Object(object) => object
.get("packages")
.and_then(extract_workspace_patterns)
.unwrap_or_default(),
_ => Vec::new(),
};
if patterns.is_empty() {
None
} else {
Some(patterns)
}
}
fn process_workspace(
files: &mut [FileInfo],
packages: &mut Vec<Package>,
dependencies: &mut Vec<TopLevelDependency>,
workspace_root: &WorkspaceRoot,
) {
let member_indices = discover_members(files, workspace_root);
if member_indices.is_empty() {
warn!(
"No workspace members found for patterns {:?} in {:?}",
workspace_root.patterns, workspace_root.root_dir
);
return;
}
let is_pnpm_with_root_package = workspace_root.pnpm_workspace_yaml_idx.is_some()
&& workspace_root.root_package_json_idx.is_some_and(|idx| {
files[idx].package_data.iter().any(|pkg| {
pkg.datasource_id == Some(DatasourceId::NpmPackageJson)
&& pkg.purl.is_some()
&& !pkg.is_private
})
});
let root_package_uid = if is_pnpm_with_root_package {
packages.iter().find_map(|pkg| {
if let Some(idx) = workspace_root.root_package_json_idx
&& pkg.datafile_paths.contains(&files[idx].path)
{
Some(pkg.package_uid.clone())
} else {
None
}
})
} else if let Some(idx) = workspace_root.root_package_json_idx {
remove_root_package(&files[idx], packages, dependencies);
None
} else {
None
};
remove_member_packages(files, &member_indices, packages, dependencies);
let member_packages = create_member_packages(files, &member_indices);
let mut member_versions: HashMap<String, String> = HashMap::new();
for (pkg, _deps) in &member_packages {
if let (Some(name), Some(version)) = (workspace_member_name(pkg), &pkg.version) {
member_versions.insert(name, version.clone());
}
}
let member_uids: Vec<String> = member_packages
.iter()
.map(|(pkg, _deps)| pkg.package_uid.clone())
.collect();
if let Some(idx) = workspace_root.root_package_json_idx
&& !is_pnpm_with_root_package
{
remove_root_level_dependencies(dependencies, &workspace_root.root_dir);
hoist_root_dependencies(
files,
idx,
&workspace_root.root_dir,
dependencies,
&member_versions,
None,
);
}
for (pkg, deps) in member_packages {
packages.push(pkg);
dependencies.extend(deps);
}
assign_for_packages(
files,
workspace_root,
&member_indices,
&member_uids,
root_package_uid.as_deref(),
);
resolve_workspace_versions(dependencies, &member_versions);
}
fn discover_members(files: &[FileInfo], workspace_root: &WorkspaceRoot) -> Vec<usize> {
let mut member_indices = Vec::new();
let mut excluded_paths = Vec::new();
for pattern in &workspace_root.patterns {
if let Some(stripped) = pattern.strip_prefix('!') {
excluded_paths.push(stripped);
}
}
for (idx, file) in files.iter().enumerate() {
let path = Path::new(&file.path);
if path.file_name().and_then(|n| n.to_str()) != Some("package.json") {
continue;
}
if !path.starts_with(&workspace_root.root_dir) {
continue;
}
if Some(idx) == workspace_root.root_package_json_idx {
continue;
}
let has_valid_package = file.package_data.iter().any(|pkg| {
pkg.datasource_id == Some(DatasourceId::NpmPackageJson) && pkg.purl.is_some()
});
if !has_valid_package {
continue;
}
let relative_path = if let Ok(rel) = path.strip_prefix(&workspace_root.root_dir) {
rel
} else {
continue;
};
let mut matched = false;
for pattern in &workspace_root.patterns {
if pattern.starts_with('!') {
continue; }
if matches_workspace_pattern(relative_path, pattern) {
matched = true;
break;
}
}
if !matched {
continue;
}
let excluded = excluded_paths
.iter()
.any(|excl| matches_workspace_pattern(relative_path, excl));
if !excluded {
member_indices.push(idx);
}
}
member_indices.sort_by(|left, right| files[*left].path.cmp(&files[*right].path));
member_indices
}
fn matches_workspace_pattern(path: &Path, pattern: &str) -> bool {
let path_str = path.to_str().unwrap_or("");
if !pattern.contains('*') && !pattern.contains('?') {
let pattern_with_manifest = format!("{}/package.json", pattern);
return path_str == pattern_with_manifest;
}
if pattern.ends_with("/*") && !pattern[..pattern.len() - 2].contains('*') {
let prefix = &pattern[..pattern.len() - 2];
if let Some(remainder) = path_str.strip_prefix(prefix) {
if remainder.is_empty() {
return false;
}
let parts: Vec<&str> = remainder.trim_start_matches('/').split('/').collect();
return parts.len() == 2 && parts[1] == "package.json";
}
return false;
}
if let Ok(glob_pattern) = glob::Pattern::new(&format!("{}/package.json", pattern)) {
return glob_pattern.matches(path_str);
}
false
}
fn remove_member_packages(
files: &[FileInfo],
member_indices: &[usize],
packages: &mut Vec<Package>,
dependencies: &mut Vec<TopLevelDependency>,
) {
let member_paths: Vec<&str> = member_indices
.iter()
.map(|&idx| files[idx].path.as_str())
.collect();
let removed_uids: Vec<String> = packages
.iter()
.filter(|pkg| {
pkg.datafile_paths
.iter()
.any(|dp| member_paths.contains(&dp.as_str()))
})
.map(|pkg| pkg.package_uid.clone())
.collect();
packages.retain(|pkg| !removed_uids.contains(&pkg.package_uid));
dependencies.retain(|dep| {
dep.for_package_uid
.as_ref()
.is_none_or(|uid| !removed_uids.contains(uid))
});
}
fn remove_root_package(
root_file: &FileInfo,
packages: &mut Vec<Package>,
dependencies: &mut Vec<TopLevelDependency>,
) {
let root_purl = root_file
.package_data
.iter()
.find(|pkg| pkg.datasource_id == Some(DatasourceId::NpmPackageJson))
.and_then(|pkg| pkg.purl.as_ref())
.cloned();
let Some(purl) = root_purl else {
return;
};
let mut removed_uid = None;
packages.retain(|pkg| {
if pkg.purl.as_ref() == Some(&purl) {
removed_uid = Some(pkg.package_uid.clone());
false
} else {
true
}
});
if let Some(uid) = &removed_uid {
dependencies.retain(|dep| dep.for_package_uid.as_ref() != Some(uid));
}
}
fn remove_root_level_dependencies(dependencies: &mut Vec<TopLevelDependency>, root_dir: &Path) {
dependencies.retain(|dependency| {
let path = Path::new(&dependency.datafile_path);
let is_root_level = path.parent() == Some(root_dir);
let is_workspace_root_datasource = matches!(
dependency.datasource_id,
DatasourceId::NpmPackageJson
| DatasourceId::BunLock
| DatasourceId::BunLockb
| DatasourceId::NpmPackageLockJson
| DatasourceId::YarnLock
| DatasourceId::YarnLockV1
| DatasourceId::YarnLockV2
| DatasourceId::PnpmLockYaml
);
!(is_root_level && is_workspace_root_datasource)
});
}
fn create_member_packages(
files: &[FileInfo],
member_indices: &[usize],
) -> Vec<(Package, Vec<TopLevelDependency>)> {
let mut results = Vec::new();
for &idx in member_indices {
let file = &files[idx];
let pkg_data = if let Some(pkg) = file.package_data.iter().find(|pkg| {
pkg.datasource_id == Some(DatasourceId::NpmPackageJson) && pkg.purl.is_some()
}) {
pkg
} else {
continue;
};
let datafile_path = file.path.clone();
let datasource_id = DatasourceId::NpmPackageJson;
let package = Package::from_package_data(pkg_data, datafile_path.clone());
let for_package_uid = Some(package.package_uid.clone());
let deps: Vec<TopLevelDependency> = pkg_data
.dependencies
.iter()
.filter(|dep| dep.purl.is_some())
.map(|dep| {
TopLevelDependency::from_dependency(
dep,
datafile_path.clone(),
datasource_id,
for_package_uid.clone(),
)
})
.collect();
results.push((package, deps));
}
results
}
fn hoist_root_dependencies(
files: &[FileInfo],
root_idx: usize,
root_dir: &Path,
dependencies: &mut Vec<TopLevelDependency>,
member_versions: &HashMap<String, String>,
for_package_uid: Option<&str>,
) {
let root_file = &files[root_idx];
let root_pkg_data = if let Some(pkg) = root_file
.package_data
.iter()
.find(|pkg| pkg.datasource_id == Some(DatasourceId::NpmPackageJson))
{
pkg
} else {
return;
};
for dep in &root_pkg_data.dependencies {
if dep.purl.is_some() {
let mut top_dep = TopLevelDependency::from_dependency(
dep,
root_file.path.clone(),
DatasourceId::NpmPackageJson,
for_package_uid.map(|s| s.to_string()),
);
if let Some(req) = &top_dep.extracted_requirement
&& req.starts_with("workspace:")
&& let Some(resolved) =
resolve_workspace_requirement(req, &top_dep.purl, member_versions)
{
top_dep.extracted_requirement = Some(resolved);
}
dependencies.push(top_dep);
}
}
for file in files.iter() {
let path = Path::new(&file.path);
if path.parent() != Some(root_dir) {
continue;
}
let file_name = if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
name
} else {
continue;
};
let matches_datasource = |datasource_id: DatasourceId| match file_name {
"bun.lock" => datasource_id == DatasourceId::BunLock,
"bun.lockb" => datasource_id == DatasourceId::BunLockb,
".package-lock.json" | "package-lock.json" | ".npm-shrinkwrap.json" => {
datasource_id == DatasourceId::NpmPackageLockJson
}
"yarn.lock" => matches!(
datasource_id,
DatasourceId::YarnLock | DatasourceId::YarnLockV1 | DatasourceId::YarnLockV2
),
"pnpm-lock.yaml" | "shrinkwrap.yaml" => datasource_id == DatasourceId::PnpmLockYaml,
_ => false,
};
for pkg_data in &file.package_data {
let Some(dsid) = pkg_data.datasource_id else {
continue;
};
if !matches_datasource(dsid) {
continue;
}
for dep in &pkg_data.dependencies {
if dep.purl.is_some() {
let mut top_dep = TopLevelDependency::from_dependency(
dep,
file.path.clone(),
dsid,
for_package_uid.map(|s| s.to_string()),
);
if let Some(req) = &top_dep.extracted_requirement
&& req.starts_with("workspace:")
&& let Some(resolved) =
resolve_workspace_requirement(req, &top_dep.purl, member_versions)
{
top_dep.extracted_requirement = Some(resolved);
}
dependencies.push(top_dep);
}
}
}
}
}
fn assign_for_packages(
files: &mut [FileInfo],
workspace_root: &WorkspaceRoot,
member_indices: &[usize],
member_uids: &[String],
root_package_uid: Option<&str>,
) {
let mut member_dirs: Vec<PathBuf> = Vec::new();
for &idx in member_indices {
if let Some(parent) = Path::new(&files[idx].path).parent() {
member_dirs.push(parent.to_path_buf());
}
}
for file in files.iter_mut() {
let path = Path::new(&file.path);
if !path.starts_with(&workspace_root.root_dir) {
continue;
}
file.for_packages.clear();
let mut assigned = false;
for (i, member_dir) in member_dirs.iter().enumerate() {
if path.starts_with(member_dir) {
file.for_packages.push(member_uids[i].clone());
assigned = true;
break;
}
}
if assigned {
continue;
}
if let Ok(rel) = path.strip_prefix(&workspace_root.root_dir)
&& let Some(first_component) = rel.components().next()
&& first_component.as_os_str() == "node_modules"
{
continue;
}
if let Some(root_uid) = root_package_uid {
file.for_packages.push(root_uid.to_string());
} else {
for uid in member_uids {
file.for_packages.push(uid.clone());
}
}
}
}
fn resolve_workspace_versions(
dependencies: &mut [TopLevelDependency],
member_versions: &HashMap<String, String>,
) {
for dep in dependencies {
if let Some(req) = &dep.extracted_requirement
&& req.starts_with("workspace:")
&& let Some(resolved) = resolve_workspace_requirement(req, &dep.purl, member_versions)
{
dep.extracted_requirement = Some(resolved);
}
}
}
fn resolve_workspace_requirement(
requirement: &str,
dep_purl: &Option<String>,
member_versions: &HashMap<String, String>,
) -> Option<String> {
let package_name = dep_purl
.as_ref()
.and_then(|purl| extract_package_name_from_purl(purl))?;
let version = member_versions.get(&package_name)?;
let workspace_spec = requirement.strip_prefix("workspace:")?;
if workspace_spec == "*" || workspace_spec.is_empty() {
Some(version.clone())
} else if let Some(op) = workspace_spec.chars().next() {
if op == '^' || op == '~' || op == '>' || op == '<' || op == '=' {
Some(format!("{}{}", workspace_spec, version))
} else {
Some(workspace_spec.to_string())
}
} else {
Some(version.clone())
}
}
fn extract_package_name_from_purl(purl: &str) -> Option<String> {
let after_type = purl.strip_prefix("pkg:npm/")?;
let without_query = after_type.split('?').next().unwrap_or(after_type);
let name_part = if let Some(at_pos) = without_query.rfind('@') {
if at_pos > 0 {
&without_query[..at_pos]
} else {
without_query
}
} else {
without_query
};
let decoded = name_part
.replace("%40", "@")
.replace("%2F", "/")
.replace("%2f", "/");
Some(decoded)
}
fn workspace_member_name(package: &Package) -> Option<String> {
match (package.namespace.as_deref(), package.name.as_deref()) {
(Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
(None, Some(name)) => Some(name.to_string()),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::PackageType;
#[test]
fn test_matches_workspace_pattern_exact() {
let path = Path::new("packages/foo/package.json");
assert!(matches_workspace_pattern(path, "packages/foo"));
assert!(!matches_workspace_pattern(path, "packages/bar"));
}
#[test]
fn test_matches_workspace_pattern_single_star() {
let path = Path::new("packages/foo/package.json");
assert!(matches_workspace_pattern(path, "packages/*"));
let nested = Path::new("packages/foo/bar/package.json");
assert!(!matches_workspace_pattern(nested, "packages/*"));
let wrong_dir = Path::new("apps/foo/package.json");
assert!(!matches_workspace_pattern(wrong_dir, "packages/*"));
}
#[test]
fn test_matches_workspace_pattern_double_star() {
let path = Path::new("packages/foo/package.json");
assert!(matches_workspace_pattern(path, "packages/*"));
let nested = Path::new("packages/foo/bar/package.json");
assert!(matches_workspace_pattern(nested, "packages/**"));
}
#[test]
fn test_extract_package_name_from_purl() {
assert_eq!(
extract_package_name_from_purl("pkg:npm/lodash@4.17.21"),
Some("lodash".to_string())
);
assert_eq!(
extract_package_name_from_purl("pkg:npm/@types/node@18.0.0"),
Some("@types/node".to_string())
);
assert_eq!(
extract_package_name_from_purl("pkg:npm/package@1.0.0?uuid=abc"),
Some("package".to_string())
);
assert_eq!(extract_package_name_from_purl("pkg:pypi/django@3.2"), None);
assert_eq!(
extract_package_name_from_purl("pkg:npm/%40myorg%2Fcore"),
Some("@myorg/core".to_string())
);
assert_eq!(
extract_package_name_from_purl("pkg:npm/%40myorg%2Fcore@1.0.0"),
Some("@myorg/core".to_string())
);
assert_eq!(
extract_package_name_from_purl("pkg:npm/simple-pkg"),
Some("simple-pkg".to_string())
);
}
#[test]
fn test_resolve_workspace_requirement() {
let mut versions = HashMap::new();
versions.insert("my-package".to_string(), "1.2.3".to_string());
versions.insert("@myorg/core".to_string(), "1.0.0".to_string());
let purl = Some("pkg:npm/my-package@1.2.3".to_string());
assert_eq!(
resolve_workspace_requirement("workspace:*", &purl, &versions),
Some("1.2.3".to_string())
);
assert_eq!(
resolve_workspace_requirement("workspace:^", &purl, &versions),
Some("^1.2.3".to_string())
);
assert_eq!(
resolve_workspace_requirement("workspace:~", &purl, &versions),
Some("~1.2.3".to_string())
);
assert_eq!(
resolve_workspace_requirement("workspace:", &purl, &versions),
Some("1.2.3".to_string())
);
let scoped_purl = Some("pkg:npm/%40myorg%2Fcore@1.0.0".to_string());
assert_eq!(
resolve_workspace_requirement("workspace:^", &scoped_purl, &versions),
Some("^1.0.0".to_string())
);
}
#[test]
fn test_extract_workspaces() {
let mut extra_data = std::collections::HashMap::new();
extra_data.insert(
"workspaces".to_string(),
serde_json::json!(["packages/*", "apps/*"]),
);
let pkg_data = PackageData {
package_type: Some(PackageType::Npm),
datasource_id: Some(DatasourceId::NpmPackageJson),
extra_data: Some(extra_data),
..Default::default()
};
let workspaces = extract_workspaces(&pkg_data).unwrap();
assert_eq!(workspaces.len(), 2);
assert_eq!(workspaces[0], "packages/*");
assert_eq!(workspaces[1], "apps/*");
}
#[test]
fn test_extract_workspaces_string() {
let pkg_data = PackageData {
package_type: Some(PackageType::Npm),
datasource_id: Some(DatasourceId::NpmPackageJson),
extra_data: Some(std::collections::HashMap::from([(
"workspaces".to_string(),
serde_json::Value::String("packages/*".to_string()),
)])),
..Default::default()
};
let workspaces = extract_workspaces(&pkg_data).unwrap();
assert_eq!(workspaces, vec!["packages/*"]);
}
#[test]
fn test_extract_workspaces_object_packages() {
let pkg_data = PackageData {
package_type: Some(PackageType::Npm),
datasource_id: Some(DatasourceId::NpmPackageJson),
extra_data: Some(std::collections::HashMap::from([(
"workspaces".to_string(),
serde_json::json!({ "packages": ["packages/*", "apps/*"] }),
)])),
..Default::default()
};
let workspaces = extract_workspaces(&pkg_data).unwrap();
assert_eq!(workspaces, vec!["packages/*", "apps/*"]);
}
#[test]
fn test_extract_workspaces_empty() {
let pkg_data = PackageData {
package_type: Some(PackageType::Npm),
datasource_id: Some(DatasourceId::NpmPackageJson),
..Default::default()
};
assert_eq!(extract_workspaces(&pkg_data), None);
}
}