Skip to main content

provenant/parsers/
npm_workspace.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for npm/pnpm workspace configuration files.
5//!
6//! Extracts workspace package patterns and monorepo structure from workspace
7//! configuration files used by npm, yarn, and pnpm to define workspaces.
8//!
9//! # Supported Formats
10//! - pnpm-workspace.yaml (YAML workspace configuration)
11//!
12//! # Key Features
13//! - Workspace package pattern extraction (glob patterns for package locations)
14//! - Monorepo structure detection and documentation
15//! - Package discovery from workspace configurations
16//!
17//! # Implementation Notes
18//! - Parses YAML format for workspace field
19//! - Package patterns are glob expressions (e.g., `packages/*`, `@scoped/**`)
20//! - Returns package data representing the workspace configuration itself
21
22use crate::models::PackageData;
23use crate::models::{DatasourceId, PackageType};
24use crate::parser_warn as warn;
25use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
26use std::path::Path;
27use yaml_serde::Value;
28
29use super::PackageParser;
30use super::metadata::ParserMetadata;
31
32/// npm workspace parser for pnpm-workspace.yaml files.
33///
34/// Extracts workspace package patterns for monorepo configurations.
35pub struct NpmWorkspaceParser;
36
37impl PackageParser for NpmWorkspaceParser {
38    const PACKAGE_TYPE: PackageType = PackageType::Npm;
39
40    fn metadata() -> Vec<ParserMetadata> {
41        vec![ParserMetadata {
42            description: "pnpm workspace yaml file",
43            file_patterns: &["**/pnpm-workspace.yaml"],
44            package_type: "npm",
45            primary_language: "JavaScript",
46            documentation_url: Some("https://pnpm.io/pnpm-workspace_yaml"),
47        }]
48    }
49
50    fn is_match(path: &Path) -> bool {
51        path.file_name()
52            .and_then(|name| name.to_str())
53            .map(|name| name == "pnpm-workspace.yaml")
54            .unwrap_or(false)
55    }
56
57    fn extract_packages(path: &Path) -> Vec<PackageData> {
58        let content = match read_file_to_string(path, None) {
59            Ok(content) => content,
60            Err(e) => {
61                warn!("Failed to read npm workspace file at {:?}: {}", path, e);
62                return vec![default_package_data()];
63            }
64        };
65
66        let workspace_data: Value = match yaml_serde::from_str(&content) {
67            Ok(data) => data,
68            Err(e) => {
69                crate::parser_warn!("Failed to parse npm workspace file at {:?}: {}", path, e);
70                return vec![default_package_data()];
71            }
72        };
73
74        vec![parse_workspace_file(&workspace_data)]
75    }
76}
77
78/// Returns a default empty PackageData for error cases
79fn default_package_data() -> PackageData {
80    PackageData {
81        package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
82        datasource_id: Some(DatasourceId::PnpmWorkspaceYaml),
83        ..Default::default()
84    }
85}
86
87/// Parse a pnpm-workspace.yaml file and extract workspace configuration
88fn parse_workspace_file(workspace_data: &Value) -> PackageData {
89    // Extract the `packages` field which contains workspace patterns
90    let workspaces = workspace_data.get("packages").and_then(|v| v.as_sequence());
91
92    match workspaces {
93        Some(workspace_patterns) => {
94            let workspaces_vec: Vec<String> = workspace_patterns
95                .iter()
96                .take(MAX_ITERATION_COUNT)
97                .filter_map(|v| v.as_str())
98                .map(|s| truncate_field(s.to_string()))
99                .collect();
100
101            PackageData {
102                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
103                extra_data: if workspaces_vec.is_empty() {
104                    None
105                } else {
106                    let mut extra = std::collections::HashMap::new();
107                    extra.insert(
108                        "datasource_id".to_string(),
109                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
110                    );
111                    extra.insert(
112                        "workspaces".to_string(),
113                        serde_json::Value::Array(
114                            workspaces_vec
115                                .into_iter()
116                                .map(serde_json::Value::String)
117                                .collect(),
118                        ),
119                    );
120                    Some(extra)
121                },
122                ..default_package_data()
123            }
124        }
125        None => {
126            // No workspaces found, return basic package data
127            PackageData {
128                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
129                extra_data: {
130                    let mut extra = std::collections::HashMap::new();
131                    extra.insert(
132                        "datasource_id".to_string(),
133                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
134                    );
135                    Some(extra)
136                },
137                ..default_package_data()
138            }
139        }
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_is_match() {
149        assert!(NpmWorkspaceParser::is_match(Path::new(
150            "pnpm-workspace.yaml"
151        )));
152        assert!(!NpmWorkspaceParser::is_match(Path::new("package.json")));
153        assert!(!NpmWorkspaceParser::is_match(Path::new("pnpm-lock.yaml")));
154        assert!(!NpmWorkspaceParser::is_match(Path::new("README.md")));
155    }
156
157    #[test]
158    fn test_parse_workspace_with_single_package() {
159        let yaml_content = r#"
160packages:
161  - "packages/*"
162"#;
163
164        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
165        let result = parse_workspace_file(&workspace_data);
166
167        assert_eq!(result.package_type, Some(PackageType::Npm));
168
169        let extra_data = result.extra_data.unwrap();
170        assert_eq!(
171            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
172            "pnpm_workspace_yaml"
173        );
174        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
175        assert_eq!(workspaces.len(), 1);
176        assert_eq!(workspaces[0], "packages/*");
177    }
178
179    #[test]
180    fn test_parse_workspace_with_multiple_packages() {
181        let yaml_content = r#"
182packages:
183  - "packages/*"
184  - "apps/*"
185  - "tools/*"
186"#;
187
188        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
189        let result = parse_workspace_file(&workspace_data);
190
191        let extra_data = result.extra_data.unwrap();
192        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
193        assert_eq!(workspaces.len(), 3);
194        assert_eq!(workspaces[0], "packages/*");
195        assert_eq!(workspaces[1], "apps/*");
196        assert_eq!(workspaces[2], "tools/*");
197    }
198
199    #[test]
200    fn test_parse_workspace_with_wildcard_pattern() {
201        let yaml_content = r#"
202packages:
203  - "*"
204"#;
205
206        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
207        let result = parse_workspace_file(&workspace_data);
208
209        let extra_data = result.extra_data.unwrap();
210        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
211        assert_eq!(workspaces.len(), 1);
212        assert_eq!(workspaces[0], "*");
213    }
214
215    #[test]
216    fn test_parse_workspace_with_negated_pattern() {
217        let yaml_content = r#"
218packages:
219  - "packages/*"
220  - "!packages/dont-scan-me"
221"#;
222
223        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
224        let result = parse_workspace_file(&workspace_data);
225
226        let extra_data = result.extra_data.unwrap();
227        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
228        assert_eq!(workspaces.len(), 2);
229        assert_eq!(workspaces[0], "packages/*");
230        assert_eq!(workspaces[1], "!packages/dont-scan-me");
231    }
232
233    #[test]
234    fn test_parse_workspace_with_depth_pattern() {
235        let yaml_content = r#"
236packages:
237  - "**/components/*"
238"#;
239
240        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
241        let result = parse_workspace_file(&workspace_data);
242
243        let extra_data = result.extra_data.unwrap();
244        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
245        assert_eq!(workspaces.len(), 1);
246        assert_eq!(workspaces[0], "**/components/*");
247    }
248
249    #[test]
250    fn test_parse_workspace_with_no_packages() {
251        let yaml_content = r#"
252name: my-workspace
253"#;
254
255        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
256        let result = parse_workspace_file(&workspace_data);
257
258        assert_eq!(result.package_type, Some(PackageType::Npm));
259        assert!(result.extra_data.is_some());
260        let extra_data = result.extra_data.unwrap();
261        assert_eq!(
262            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
263            "pnpm_workspace_yaml"
264        );
265        assert!(!extra_data.contains_key("workspaces"));
266    }
267
268    #[test]
269    fn test_parse_workspace_with_empty_packages_array() {
270        let yaml_content = r#"
271packages: []
272"#;
273
274        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
275        let result = parse_workspace_file(&workspace_data);
276
277        assert_eq!(result.package_type, Some(PackageType::Npm));
278        assert!(
279            result.extra_data.is_none() || !result.extra_data.unwrap().contains_key("workspaces")
280        );
281    }
282
283    #[test]
284    fn test_default_package_data() {
285        let result = default_package_data();
286
287        assert_eq!(result.package_type, Some(PackageType::Npm));
288        assert!(result.name.is_none());
289        assert!(result.version.is_none());
290        assert!(result.extra_data.is_none());
291    }
292}