Skip to main content

provenant/parsers/
npm_workspace.rs

1//! Parser for npm/pnpm workspace configuration files.
2//!
3//! Extracts workspace package patterns and monorepo structure from workspace
4//! configuration files used by npm, yarn, and pnpm to define workspaces.
5//!
6//! # Supported Formats
7//! - pnpm-workspace.yaml (YAML workspace configuration)
8//!
9//! # Key Features
10//! - Workspace package pattern extraction (glob patterns for package locations)
11//! - Monorepo structure detection and documentation
12//! - Package discovery from workspace configurations
13//!
14//! # Implementation Notes
15//! - Parses YAML format for workspace field
16//! - Package patterns are glob expressions (e.g., `packages/*`, `@scoped/**`)
17//! - Returns package data representing the workspace configuration itself
18
19use crate::models::PackageData;
20use crate::models::{DatasourceId, PackageType};
21use crate::parser_warn as warn;
22use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
23use std::path::Path;
24use yaml_serde::Value;
25
26use super::PackageParser;
27
28/// npm workspace parser for pnpm-workspace.yaml files.
29///
30/// Extracts workspace package patterns for monorepo configurations.
31pub struct NpmWorkspaceParser;
32
33impl PackageParser for NpmWorkspaceParser {
34    const PACKAGE_TYPE: PackageType = PackageType::Npm;
35
36    fn is_match(path: &Path) -> bool {
37        path.file_name()
38            .and_then(|name| name.to_str())
39            .map(|name| name == "pnpm-workspace.yaml")
40            .unwrap_or(false)
41    }
42
43    fn extract_packages(path: &Path) -> Vec<PackageData> {
44        let content = match read_file_to_string(path, None) {
45            Ok(content) => content,
46            Err(e) => {
47                warn!("Failed to read npm workspace file at {:?}: {}", path, e);
48                return vec![default_package_data()];
49            }
50        };
51
52        let workspace_data: Value = match yaml_serde::from_str(&content) {
53            Ok(data) => data,
54            Err(e) => {
55                crate::parser_warn!("Failed to parse npm workspace file at {:?}: {}", path, e);
56                return vec![default_package_data()];
57            }
58        };
59
60        vec![parse_workspace_file(&workspace_data)]
61    }
62}
63
64/// Returns a default empty PackageData for error cases
65fn default_package_data() -> PackageData {
66    PackageData {
67        package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
68        datasource_id: Some(DatasourceId::PnpmWorkspaceYaml),
69        ..Default::default()
70    }
71}
72
73/// Parse a pnpm-workspace.yaml file and extract workspace configuration
74fn parse_workspace_file(workspace_data: &Value) -> PackageData {
75    // Extract the `packages` field which contains workspace patterns
76    let workspaces = workspace_data.get("packages").and_then(|v| v.as_sequence());
77
78    match workspaces {
79        Some(workspace_patterns) => {
80            let workspaces_vec: Vec<String> = workspace_patterns
81                .iter()
82                .take(MAX_ITERATION_COUNT)
83                .filter_map(|v| v.as_str())
84                .map(|s| truncate_field(s.to_string()))
85                .collect();
86
87            PackageData {
88                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
89                extra_data: if workspaces_vec.is_empty() {
90                    None
91                } else {
92                    let mut extra = std::collections::HashMap::new();
93                    extra.insert(
94                        "datasource_id".to_string(),
95                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
96                    );
97                    extra.insert(
98                        "workspaces".to_string(),
99                        serde_json::Value::Array(
100                            workspaces_vec
101                                .into_iter()
102                                .map(serde_json::Value::String)
103                                .collect(),
104                        ),
105                    );
106                    Some(extra)
107                },
108                ..default_package_data()
109            }
110        }
111        None => {
112            // No workspaces found, return basic package data
113            PackageData {
114                package_type: Some(NpmWorkspaceParser::PACKAGE_TYPE),
115                extra_data: {
116                    let mut extra = std::collections::HashMap::new();
117                    extra.insert(
118                        "datasource_id".to_string(),
119                        serde_json::Value::String("pnpm_workspace_yaml".to_string()),
120                    );
121                    Some(extra)
122                },
123                ..default_package_data()
124            }
125        }
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132
133    #[test]
134    fn test_is_match() {
135        assert!(NpmWorkspaceParser::is_match(Path::new(
136            "pnpm-workspace.yaml"
137        )));
138        assert!(!NpmWorkspaceParser::is_match(Path::new("package.json")));
139        assert!(!NpmWorkspaceParser::is_match(Path::new("pnpm-lock.yaml")));
140        assert!(!NpmWorkspaceParser::is_match(Path::new("README.md")));
141    }
142
143    #[test]
144    fn test_parse_workspace_with_single_package() {
145        let yaml_content = r#"
146packages:
147  - "packages/*"
148"#;
149
150        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
151        let result = parse_workspace_file(&workspace_data);
152
153        assert_eq!(result.package_type, Some(PackageType::Npm));
154
155        let extra_data = result.extra_data.unwrap();
156        assert_eq!(
157            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
158            "pnpm_workspace_yaml"
159        );
160        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
161        assert_eq!(workspaces.len(), 1);
162        assert_eq!(workspaces[0], "packages/*");
163    }
164
165    #[test]
166    fn test_parse_workspace_with_multiple_packages() {
167        let yaml_content = r#"
168packages:
169  - "packages/*"
170  - "apps/*"
171  - "tools/*"
172"#;
173
174        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
175        let result = parse_workspace_file(&workspace_data);
176
177        let extra_data = result.extra_data.unwrap();
178        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
179        assert_eq!(workspaces.len(), 3);
180        assert_eq!(workspaces[0], "packages/*");
181        assert_eq!(workspaces[1], "apps/*");
182        assert_eq!(workspaces[2], "tools/*");
183    }
184
185    #[test]
186    fn test_parse_workspace_with_wildcard_pattern() {
187        let yaml_content = r#"
188packages:
189  - "*"
190"#;
191
192        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
193        let result = parse_workspace_file(&workspace_data);
194
195        let extra_data = result.extra_data.unwrap();
196        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
197        assert_eq!(workspaces.len(), 1);
198        assert_eq!(workspaces[0], "*");
199    }
200
201    #[test]
202    fn test_parse_workspace_with_negated_pattern() {
203        let yaml_content = r#"
204packages:
205  - "packages/*"
206  - "!packages/dont-scan-me"
207"#;
208
209        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
210        let result = parse_workspace_file(&workspace_data);
211
212        let extra_data = result.extra_data.unwrap();
213        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
214        assert_eq!(workspaces.len(), 2);
215        assert_eq!(workspaces[0], "packages/*");
216        assert_eq!(workspaces[1], "!packages/dont-scan-me");
217    }
218
219    #[test]
220    fn test_parse_workspace_with_depth_pattern() {
221        let yaml_content = r#"
222packages:
223  - "**/components/*"
224"#;
225
226        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
227        let result = parse_workspace_file(&workspace_data);
228
229        let extra_data = result.extra_data.unwrap();
230        let workspaces = extra_data.get("workspaces").unwrap().as_array().unwrap();
231        assert_eq!(workspaces.len(), 1);
232        assert_eq!(workspaces[0], "**/components/*");
233    }
234
235    #[test]
236    fn test_parse_workspace_with_no_packages() {
237        let yaml_content = r#"
238name: my-workspace
239"#;
240
241        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
242        let result = parse_workspace_file(&workspace_data);
243
244        assert_eq!(result.package_type, Some(PackageType::Npm));
245        assert!(result.extra_data.is_some());
246        let extra_data = result.extra_data.unwrap();
247        assert_eq!(
248            extra_data.get("datasource_id").unwrap().as_str().unwrap(),
249            "pnpm_workspace_yaml"
250        );
251        assert!(!extra_data.contains_key("workspaces"));
252    }
253
254    #[test]
255    fn test_parse_workspace_with_empty_packages_array() {
256        let yaml_content = r#"
257packages: []
258"#;
259
260        let workspace_data: Value = yaml_serde::from_str(yaml_content).unwrap();
261        let result = parse_workspace_file(&workspace_data);
262
263        assert_eq!(result.package_type, Some(PackageType::Npm));
264        assert!(
265            result.extra_data.is_none() || !result.extra_data.unwrap().contains_key("workspaces")
266        );
267    }
268
269    #[test]
270    fn test_default_package_data() {
271        let result = default_package_data();
272
273        assert_eq!(result.package_type, Some(PackageType::Npm));
274        assert!(result.name.is_none());
275        assert!(result.version.is_none());
276        assert!(result.extra_data.is_none());
277    }
278}
279
280crate::register_parser!(
281    "pnpm workspace yaml file",
282    &["**/pnpm-workspace.yaml"],
283    "npm",
284    "JavaScript",
285    Some("https://pnpm.io/pnpm-workspace_yaml"),
286);