Skip to main content

provenant/parsers/
npm_workspace.rs

1//! Parser for npm/pnpm workspace configuration files.
2//!
3//! Extracts workspace package patterns and monorepo structure from workspace
4//! configuration files used by npm, yarn, and pnpm to define workspaces.
5//!
6//! # Supported Formats
7//! - pnpm-workspace.yaml (YAML workspace configuration)
8//!
9//! # Key Features
10//! - Workspace package pattern extraction (glob patterns for package locations)
11//! - Monorepo structure detection and documentation
12//! - Package discovery from workspace configurations
13//!
14//! # Implementation Notes
15//! - Parses YAML format for workspace field
16//! - Package patterns are glob expressions (e.g., `packages/*`, `@scoped/**`)
17//! - Returns package data representing the workspace configuration itself
18
19use crate::models::PackageData;
20use crate::models::{DatasourceId, PackageType};
21use serde_yaml::Value;
22use std::fs;
23use std::path::Path;
24
25use super::PackageParser;
26
27/// npm workspace parser for pnpm-workspace.yaml files.
28///
29/// Extracts workspace package patterns for monorepo configurations.
30pub struct NpmWorkspaceParser;
31
32impl PackageParser for NpmWorkspaceParser {
33    const PACKAGE_TYPE: PackageType = PackageType::Npm;
34
35    fn is_match(path: &Path) -> bool {
36        path.file_name()
37            .and_then(|name| name.to_str())
38            .map(|name| name == "pnpm-workspace.yaml")
39            .unwrap_or(false)
40    }
41
42    fn extract_packages(path: &Path) -> Vec<PackageData> {
43        let content = match fs::read_to_string(path) {
44            Ok(content) => content,
45            Err(e) => {
46                log::warn!("Failed to read npm workspace file at {:?}: {}", path, e);
47                return vec![default_package_data()];
48            }
49        };
50
51        let workspace_data: Value = match serde_yaml::from_str(&content) {
52            Ok(data) => data,
53            Err(e) => {
54                log::warn!("Failed to parse npm workspace file at {:?}: {}", path, e);
55                return vec![default_package_data()];
56            }
57        };
58
59        vec![parse_workspace_file(&workspace_data)]
60    }
61}
62
63/// Returns a default empty PackageData for error cases
64fn default_package_data() -> PackageData {
65    PackageData {
66        package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
67        datasource_id: Some(DatasourceId::PnpmWorkspaceYaml),
68        ..Default::default()
69    }
70}
71
72/// Parse a pnpm-workspace.yaml file and extract workspace configuration
73fn parse_workspace_file(workspace_data: &Value) -> PackageData {
74    // Extract the `packages` field which contains workspace patterns
75    let workspaces = workspace_data.get("packages").and_then(|v| v.as_sequence());
76
77    match workspaces {
78        Some(workspace_patterns) => {
79            let workspaces_vec: Vec<String> = workspace_patterns
80                .iter()
81                .filter_map(|v| v.as_str())
82                .map(|s| s.to_string())
83                .collect();
84
85            PackageData {
86                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
87                extra_data: if workspaces_vec.is_empty() {
88                    None
89                } else {
90                    let mut extra = std::collections::HashMap::new();
91                    extra.insert(
92                        "datasource_id".to_string(),
93                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
94                    );
95                    extra.insert(
96                        "workspaces".to_string(),
97                        serde_json::Value::Array(
98                            workspaces_vec
99                                .into_iter()
100                                .map(serde_json::Value::String)
101                                .collect(),
102                        ),
103                    );
104                    Some(extra)
105                },
106                ..default_package_data()
107            }
108        }
109        None => {
110            // No workspaces found, return basic package data
111            PackageData {
112                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
113                extra_data: {
114                    let mut extra = std::collections::HashMap::new();
115                    extra.insert(
116                        "datasource_id".to_string(),
117                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
118                    );
119                    Some(extra)
120                },
121                ..default_package_data()
122            }
123        }
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    #[test]
132    fn test_is_match() {
133        assert!(NpmWorkspaceParser::is_match(Path::new(
134            "pnpm-workspace.yaml"
135        )));
136        assert!(!NpmWorkspaceParser::is_match(Path::new("package.json")));
137        assert!(!NpmWorkspaceParser::is_match(Path::new("pnpm-lock.yaml")));
138        assert!(!NpmWorkspaceParser::is_match(Path::new("README.md")));
139    }
140
141    #[test]
142    fn test_parse_workspace_with_single_package() {
143        let yaml_content = r#"
144packages:
145  - "packages/*"
146"#;
147
148        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
149        let result = parse_workspace_file(&workspace_data);
150
151        assert_eq!(result.package_type, Some(PackageType::Npm));
152
153        let extra_data = result.extra_data.unwrap();
154        assert_eq!(
155            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
156            "pnpm_workspace_yaml"
157        );
158        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
159        assert_eq!(workspaces.len(), 1);
160        assert_eq!(workspaces[0], "packages/*");
161    }
162
163    #[test]
164    fn test_parse_workspace_with_multiple_packages() {
165        let yaml_content = r#"
166packages:
167  - "packages/*"
168  - "apps/*"
169  - "tools/*"
170"#;
171
172        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
173        let result = parse_workspace_file(&workspace_data);
174
175        let extra_data = result.extra_data.unwrap();
176        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
177        assert_eq!(workspaces.len(), 3);
178        assert_eq!(workspaces[0], "packages/*");
179        assert_eq!(workspaces[1], "apps/*");
180        assert_eq!(workspaces[2], "tools/*");
181    }
182
183    #[test]
184    fn test_parse_workspace_with_wildcard_pattern() {
185        let yaml_content = r#"
186packages:
187  - "*"
188"#;
189
190        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
191        let result = parse_workspace_file(&workspace_data);
192
193        let extra_data = result.extra_data.unwrap();
194        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
195        assert_eq!(workspaces.len(), 1);
196        assert_eq!(workspaces[0], "*");
197    }
198
199    #[test]
200    fn test_parse_workspace_with_negated_pattern() {
201        let yaml_content = r#"
202packages:
203  - "packages/*"
204  - "!packages/dont-scan-me"
205"#;
206
207        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
208        let result = parse_workspace_file(&workspace_data);
209
210        let extra_data = result.extra_data.unwrap();
211        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
212        assert_eq!(workspaces.len(), 2);
213        assert_eq!(workspaces[0], "packages/*");
214        assert_eq!(workspaces[1], "!packages/dont-scan-me");
215    }
216
217    #[test]
218    fn test_parse_workspace_with_depth_pattern() {
219        let yaml_content = r#"
220packages:
221  - "**/components/*"
222"#;
223
224        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
225        let result = parse_workspace_file(&workspace_data);
226
227        let extra_data = result.extra_data.unwrap();
228        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
229        assert_eq!(workspaces.len(), 1);
230        assert_eq!(workspaces[0], "**/components/*");
231    }
232
233    #[test]
234    fn test_parse_workspace_with_no_packages() {
235        let yaml_content = r#"
236name: my-workspace
237"#;
238
239        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
240        let result = parse_workspace_file(&workspace_data);
241
242        assert_eq!(result.package_type, Some(PackageType::Npm));
243        assert!(result.extra_data.is_some());
244        let extra_data = result.extra_data.unwrap();
245        assert_eq!(
246            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
247            "pnpm_workspace_yaml"
248        );
249        assert!(!extra_data.contains_key("workspaces"));
250    }
251
252    #[test]
253    fn test_parse_workspace_with_empty_packages_array() {
254        let yaml_content = r#"
255packages: []
256"#;
257
258        let workspace_data: Value = serde_yaml::from_str(yaml_content).unwrap();
259        let result = parse_workspace_file(&workspace_data);
260
261        assert_eq!(result.package_type, Some(PackageType::Npm));
262        assert!(
263            result.extra_data.is_none() || !result.extra_data.unwrap().contains_key("workspaces")
264        );
265    }
266
267    #[test]
268    fn test_default_package_data() {
269        let result = default_package_data();
270
271        assert_eq!(result.package_type, Some(PackageType::Npm));
272        assert!(result.name.is_none());
273        assert!(result.version.is_none());
274        assert!(result.extra_data.is_none());
275    }
276}
277
278crate::register_parser!(
279    "pnpm workspace yaml file",
280    &["**/pnpm-workspace.yaml"],
281    "npm",
282    "JavaScript",
283    Some("https://pnpm.io/pnpm-workspace_yaml"),
284);