Skip to main content

provenant/parsers/
npm_workspace.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for npm/pnpm workspace configuration files.
5//!
6//! Extracts workspace package patterns and monorepo structure from workspace
7//! configuration files used by npm, yarn, and pnpm to define workspaces.
8//!
9//! # Supported Formats
10//! - pnpm-workspace.yaml (YAML workspace configuration)
11//!
12//! # Key Features
13//! - Workspace package pattern extraction (glob patterns for package locations)
14//! - Monorepo structure detection and documentation
15//! - Package discovery from workspace configurations
16//!
17//! # Implementation Notes
18//! - Parses YAML format for workspace field
19//! - Package patterns are glob expressions (e.g., `packages/*`, `@scoped/**`)
20//! - Returns package data representing the workspace configuration itself
21
22use crate::models::PackageData;
23use crate::models::{DatasourceId, PackageType};
24use crate::parser_warn as warn;
25use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
26use std::path::Path;
27use yaml_serde::Value;
28
29use super::PackageParser;
30
31/// npm workspace parser for pnpm-workspace.yaml files.
32///
33/// Extracts workspace package patterns for monorepo configurations.
34pub struct NpmWorkspaceParser;
35
36impl PackageParser for NpmWorkspaceParser {
37    const PACKAGE_TYPE: PackageType = PackageType::Npm;
38
39    fn is_match(path: &Path) -> bool {
40        path.file_name()
41            .and_then(|name| name.to_str())
42            .map(|name| name == "pnpm-workspace.yaml")
43            .unwrap_or(false)
44    }
45
46    fn extract_packages(path: &Path) -> Vec<PackageData> {
47        let content = match read_file_to_string(path, None) {
48            Ok(content) => content,
49            Err(e) => {
50                warn!("Failed to read npm workspace file at {:?}: {}", path, e);
51                return vec![default_package_data()];
52            }
53        };
54
55        let workspace_data: Value = match yaml_serde::from_str(&content) {
56            Ok(data) => data,
57            Err(e) => {
58                crate::parser_warn!("Failed to parse npm workspace file at {:?}: {}", path, e);
59                return vec![default_package_data()];
60            }
61        };
62
63        vec![parse_workspace_file(&workspace_data)]
64    }
65}
66
67/// Returns a default empty PackageData for error cases
68fn default_package_data() -> PackageData {
69    PackageData {
70        package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
71        datasource_id: Some(DatasourceId::PnpmWorkspaceYaml),
72        ..Default::default()
73    }
74}
75
76/// Parse a pnpm-workspace.yaml file and extract workspace configuration
77fn parse_workspace_file(workspace_data: &Value) -> PackageData {
78    // Extract the `packages` field which contains workspace patterns
79    let workspaces = workspace_data.get("packages").and_then(|v| v.as_sequence());
80
81    match workspaces {
82        Some(workspace_patterns) => {
83            let workspaces_vec: Vec<String> = workspace_patterns
84                .iter()
85                .take(MAX_ITERATION_COUNT)
86                .filter_map(|v| v.as_str())
87                .map(|s| truncate_field(s.to_string()))
88                .collect();
89
90            PackageData {
91                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
92                extra_data: if workspaces_vec.is_empty() {
93                    None
94                } else {
95                    let mut extra = std::collections::HashMap::new();
96                    extra.insert(
97                        "datasource_id".to_string(),
98                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
99                    );
100                    extra.insert(
101                        "workspaces".to_string(),
102                        serde_json::Value::Array(
103                            workspaces_vec
104                                .into_iter()
105                                .map(serde_json::Value::String)
106                                .collect(),
107                        ),
108                    );
109                    Some(extra)
110                },
111                ..default_package_data()
112            }
113        }
114        None => {
115            // No workspaces found, return basic package data
116            PackageData {
117                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
118                extra_data: {
119                    let mut extra = std::collections::HashMap::new();
120                    extra.insert(
121                        "datasource_id".to_string(),
122                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
123                    );
124                    Some(extra)
125                },
126                ..default_package_data()
127            }
128        }
129    }
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    #[test]
137    fn test_is_match() {
138        assert!(NpmWorkspaceParser::is_match(Path::new(
139            "pnpm-workspace.yaml"
140        )));
141        assert!(!NpmWorkspaceParser::is_match(Path::new("package.json")));
142        assert!(!NpmWorkspaceParser::is_match(Path::new("pnpm-lock.yaml")));
143        assert!(!NpmWorkspaceParser::is_match(Path::new("README.md")));
144    }
145
146    #[test]
147    fn test_parse_workspace_with_single_package() {
148        let yaml_content = r#"
149packages:
150  - "packages/*"
151"#;
152
153        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
154        let result = parse_workspace_file(&workspace_data);
155
156        assert_eq!(result.package_type, Some(PackageType::Npm));
157
158        let extra_data = result.extra_data.unwrap();
159        assert_eq!(
160            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
161            "pnpm_workspace_yaml"
162        );
163        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
164        assert_eq!(workspaces.len(), 1);
165        assert_eq!(workspaces[0], "packages/*");
166    }
167
168    #[test]
169    fn test_parse_workspace_with_multiple_packages() {
170        let yaml_content = r#"
171packages:
172  - "packages/*"
173  - "apps/*"
174  - "tools/*"
175"#;
176
177        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
178        let result = parse_workspace_file(&workspace_data);
179
180        let extra_data = result.extra_data.unwrap();
181        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
182        assert_eq!(workspaces.len(), 3);
183        assert_eq!(workspaces[0], "packages/*");
184        assert_eq!(workspaces[1], "apps/*");
185        assert_eq!(workspaces[2], "tools/*");
186    }
187
188    #[test]
189    fn test_parse_workspace_with_wildcard_pattern() {
190        let yaml_content = r#"
191packages:
192  - "*"
193"#;
194
195        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
196        let result = parse_workspace_file(&workspace_data);
197
198        let extra_data = result.extra_data.unwrap();
199        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
200        assert_eq!(workspaces.len(), 1);
201        assert_eq!(workspaces[0], "*");
202    }
203
204    #[test]
205    fn test_parse_workspace_with_negated_pattern() {
206        let yaml_content = r#"
207packages:
208  - "packages/*"
209  - "!packages/dont-scan-me"
210"#;
211
212        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
213        let result = parse_workspace_file(&workspace_data);
214
215        let extra_data = result.extra_data.unwrap();
216        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
217        assert_eq!(workspaces.len(), 2);
218        assert_eq!(workspaces[0], "packages/*");
219        assert_eq!(workspaces[1], "!packages/dont-scan-me");
220    }
221
222    #[test]
223    fn test_parse_workspace_with_depth_pattern() {
224        let yaml_content = r#"
225packages:
226  - "**/components/*"
227"#;
228
229        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
230        let result = parse_workspace_file(&workspace_data);
231
232        let extra_data = result.extra_data.unwrap();
233        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
234        assert_eq!(workspaces.len(), 1);
235        assert_eq!(workspaces[0], "**/components/*");
236    }
237
238    #[test]
239    fn test_parse_workspace_with_no_packages() {
240        let yaml_content = r#"
241name: my-workspace
242"#;
243
244        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
245        let result = parse_workspace_file(&workspace_data);
246
247        assert_eq!(result.package_type, Some(PackageType::Npm));
248        assert!(result.extra_data.is_some());
249        let extra_data = result.extra_data.unwrap();
250        assert_eq!(
251            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
252            "pnpm_workspace_yaml"
253        );
254        assert!(!extra_data.contains_key("workspaces"));
255    }
256
257    #[test]
258    fn test_parse_workspace_with_empty_packages_array() {
259        let yaml_content = r#"
260packages: []
261"#;
262
263        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
264        let result = parse_workspace_file(&workspace_data);
265
266        assert_eq!(result.package_type, Some(PackageType::Npm));
267        assert!(
268            result.extra_data.is_none() || !result.extra_data.unwrap().contains_key("workspaces")
269        );
270    }
271
272    #[test]
273    fn test_default_package_data() {
274        let result = default_package_data();
275
276        assert_eq!(result.package_type, Some(PackageType::Npm));
277        assert!(result.name.is_none());
278        assert!(result.version.is_none());
279        assert!(result.extra_data.is_none());
280    }
281}
282
283crate::register_parser!(
284    "pnpm workspace yaml file",
285    &["**/pnpm-workspace.yaml"],
286    "npm",
287    "JavaScript",
288    Some("https://pnpm.io/pnpm-workspace_yaml"),
289);