provenant/parsers/
yarn_pnp.rs1use std::collections::{HashMap, HashSet};
2use std::fs;
3use std::path::Path;
4
5use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
6use crate::parser_warn as warn;
7use crate::parsers::utils::npm_purl;
8
9use super::PackageParser;
10
11pub struct YarnPnpParser;
12
13impl PackageParser for YarnPnpParser {
14 const PACKAGE_TYPE: PackageType = PackageType::Npm;
15
16 fn is_match(path: &Path) -> bool {
17 path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
18 }
19
20 fn extract_packages(path: &Path) -> Vec<PackageData> {
21 let content = match fs::read_to_string(path) {
22 Ok(content) => content,
23 Err(error) => {
24 warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
25 return vec![default_package_data()];
26 }
27 };
28
29 match parse_yarn_pnp(&content) {
30 Ok(package_data) => vec![package_data],
31 Err(error) => {
32 warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
33 vec![default_package_data()]
34 }
35 }
36 }
37}
38
39fn default_package_data() -> PackageData {
40 PackageData {
41 package_type: Some(YarnPnpParser::PACKAGE_TYPE),
42 primary_language: Some("JavaScript".to_string()),
43 datasource_id: Some(DatasourceId::YarnPnpCjs),
44 ..Default::default()
45 }
46}
47
48fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
49 let json_text = extract_raw_runtime_state_json(content)
50 .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
51 let runtime_state: serde_json::Value = serde_json::from_str(json_text)
52 .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
53
54 let registry_entries = runtime_state
55 .get("packageRegistryData")
56 .and_then(serde_json::Value::as_array)
57 .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
58
59 let root_refs = registry_entries
60 .iter()
61 .find_map(parse_root_dependency_map)
62 .unwrap_or_default();
63 let mut seen_locators = HashSet::new();
64 let mut dependencies = Vec::new();
65
66 for entry in registry_entries {
67 let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
68 continue;
69 };
70 if !seen_locators.insert(locator.to_string()) {
71 continue;
72 }
73 let Some((name, reference)) = split_locator(locator) else {
74 continue;
75 };
76
77 let version = reference.strip_prefix("npm:");
78 dependencies.push(Dependency {
79 purl: npm_purl(name, version),
80 extracted_requirement: Some(reference.to_string()),
81 scope: Some("dependencies".to_string()),
82 is_runtime: Some(true),
83 is_optional: Some(false),
84 is_pinned: Some(version.is_some()),
85 is_direct: Some(
86 root_refs
87 .get(name)
88 .is_some_and(|root_ref| root_ref == reference),
89 ),
90 resolved_package: None,
91 extra_data: Some(HashMap::from([(
92 "locator".to_string(),
93 serde_json::Value::String(locator.to_string()),
94 )])),
95 });
96 }
97
98 let mut package = default_package_data();
99 package.dependencies = dependencies;
100 package.extra_data = Some(HashMap::from([(
101 "package_registry_entries".to_string(),
102 serde_json::Value::from(registry_entries.len()),
103 )]));
104 Ok(package)
105}
106
107fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
108 if !entry.get(0).is_some_and(serde_json::Value::is_null) {
109 return None;
110 }
111
112 let dependencies = entry.get(1)?.get("packageDependencies")?;
113 Some(parse_dependency_pairs(dependencies))
114}
115
116fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
117 if let Some(array) = value.as_array() {
118 return array
119 .iter()
120 .filter_map(|pair| {
121 let pair = pair.as_array()?;
122 let name = pair.first()?.as_str()?;
123 let reference = pair.get(1)?.as_str()?;
124 Some((name.to_string(), reference.to_string()))
125 })
126 .collect();
127 }
128
129 value
130 .as_object()
131 .into_iter()
132 .flatten()
133 .filter_map(|(name, reference)| {
134 reference
135 .as_str()
136 .map(|reference| (name.clone(), reference.to_string()))
137 })
138 .collect()
139}
140
141fn split_locator(locator: &str) -> Option<(&str, &str)> {
142 let split_at = locator.rfind('@')?;
143 if split_at == 0 {
144 return None;
145 }
146 Some((&locator[..split_at], &locator[split_at + 1..]))
147}
148
149fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
150 let marker = "const RAW_RUNTIME_STATE =";
151 let marker_index = content.find(marker)?;
152 let after_marker = &content[marker_index + marker.len()..];
153 let open_index = after_marker.find('{')?;
154 let json_start = marker_index + marker.len() + open_index;
155
156 let mut depth = 0usize;
157 let mut in_string = false;
158 let mut escaped = false;
159
160 for (offset, ch) in content[json_start..].char_indices() {
161 if in_string {
162 if escaped {
163 escaped = false;
164 } else if ch == '\\' {
165 escaped = true;
166 } else if ch == '"' {
167 in_string = false;
168 }
169 continue;
170 }
171
172 match ch {
173 '"' => in_string = true,
174 '{' => depth += 1,
175 '}' => {
176 depth = depth.saturating_sub(1);
177 if depth == 0 {
178 let end = json_start + offset + ch.len_utf8();
179 return Some(&content[json_start..end]);
180 }
181 }
182 _ => {}
183 }
184 }
185
186 None
187}
188
189crate::register_parser!(
190 "yarn plug and play runtime state",
191 &["**/.pnp.cjs"],
192 "npm",
193 "JavaScript",
194 Some("https://yarnpkg.com/features/pnp"),
195);