provenant/parsers/
yarn_pnp.rs1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
5use crate::parser_warn as warn;
6use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, truncate_field};
7
8use super::PackageParser;
9
10pub struct YarnPnpParser;
11
12impl PackageParser for YarnPnpParser {
13 const PACKAGE_TYPE: PackageType = PackageType::Npm;
14
15 fn is_match(path: &Path) -> bool {
16 path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
17 }
18
19 fn extract_packages(path: &Path) -> Vec<PackageData> {
20 let content = match crate::parsers::utils::read_file_to_string(path, None) {
21 Ok(content) => content,
22 Err(error) => {
23 warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
24 return vec![default_package_data()];
25 }
26 };
27
28 match parse_yarn_pnp(&content) {
29 Ok(package_data) => vec![package_data],
30 Err(error) => {
31 warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
32 vec![default_package_data()]
33 }
34 }
35 }
36}
37
38fn default_package_data() -> PackageData {
39 PackageData {
40 package_type: Some(YarnPnpParser::PACKAGE_TYPE),
41 primary_language: Some("JavaScript".to_string()),
42 datasource_id: Some(DatasourceId::YarnPnpCjs),
43 ..Default::default()
44 }
45}
46
47fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
48 let json_text = extract_raw_runtime_state_json(content)
49 .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
50 let runtime_state: serde_json::Value = serde_json::from_str(json_text)
51 .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
52
53 let registry_entries = runtime_state
54 .get("packageRegistryData")
55 .and_then(serde_json::Value::as_array)
56 .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
57
58 let root_refs = registry_entries
59 .iter()
60 .find_map(parse_root_dependency_map)
61 .unwrap_or_default();
62 let mut seen_locators = HashSet::new();
63 let mut dependencies = Vec::new();
64
65 for entry in registry_entries.iter().take(MAX_ITERATION_COUNT) {
66 let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
67 continue;
68 };
69 if !seen_locators.insert(locator.to_string()) {
70 continue;
71 }
72 let Some((name, reference)) = split_locator(locator) else {
73 continue;
74 };
75
76 let version = reference
77 .strip_prefix("npm:")
78 .map(|v| truncate_field(v.to_string()));
79 dependencies.push(Dependency {
80 purl: npm_purl(
81 truncate_field(name.to_string()).as_str(),
82 version.as_deref(),
83 ),
84 extracted_requirement: Some(truncate_field(reference.to_string())),
85 scope: Some("dependencies".to_string()),
86 is_runtime: Some(true),
87 is_optional: Some(false),
88 is_pinned: Some(version.is_some()),
89 is_direct: Some(
90 root_refs
91 .get(name)
92 .is_some_and(|root_ref| root_ref == reference),
93 ),
94 resolved_package: None,
95 extra_data: Some(HashMap::from([(
96 "locator".to_string(),
97 serde_json::Value::String(truncate_field(locator.to_string())),
98 )])),
99 });
100 }
101
102 let mut package = default_package_data();
103 package.dependencies = dependencies;
104 package.extra_data = Some(HashMap::from([(
105 "package_registry_entries".to_string(),
106 serde_json::Value::from(registry_entries.len()),
107 )]));
108 Ok(package)
109}
110
111fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
112 if !entry.get(0).is_some_and(serde_json::Value::is_null) {
113 return None;
114 }
115
116 let dependencies = entry.get(1)?.get("packageDependencies")?;
117 Some(parse_dependency_pairs(dependencies))
118}
119
120fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
121 if let Some(array) = value.as_array() {
122 return array
123 .iter()
124 .take(MAX_ITERATION_COUNT)
125 .filter_map(|pair| {
126 let pair = pair.as_array()?;
127 let name = pair.first()?.as_str()?;
128 let reference = pair.get(1)?.as_str()?;
129 Some((
130 truncate_field(name.to_string()),
131 truncate_field(reference.to_string()),
132 ))
133 })
134 .collect();
135 }
136
137 value
138 .as_object()
139 .into_iter()
140 .flatten()
141 .take(MAX_ITERATION_COUNT)
142 .filter_map(|(name, reference)| {
143 reference.as_str().map(|reference| {
144 (
145 truncate_field(name.clone()),
146 truncate_field(reference.to_string()),
147 )
148 })
149 })
150 .collect()
151}
152
153fn split_locator(locator: &str) -> Option<(&str, &str)> {
154 let split_at = locator.rfind('@')?;
155 if split_at == 0 {
156 return None;
157 }
158 Some((&locator[..split_at], &locator[split_at + 1..]))
159}
160
161fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
162 let marker = "const RAW_RUNTIME_STATE =";
163 let marker_index = content.find(marker)?;
164 let after_marker = &content[marker_index + marker.len()..];
165 let open_index = after_marker.find('{')?;
166 let json_start = marker_index + marker.len() + open_index;
167
168 let mut depth = 0usize;
169 let mut in_string = false;
170 let mut escaped = false;
171
172 for (offset, ch) in content[json_start..].char_indices() {
173 if in_string {
174 if escaped {
175 escaped = false;
176 } else if ch == '\\' {
177 escaped = true;
178 } else if ch == '"' {
179 in_string = false;
180 }
181 continue;
182 }
183
184 match ch {
185 '"' => in_string = true,
186 '{' => depth += 1,
187 '}' => {
188 depth = depth.saturating_sub(1);
189 if depth == 0 {
190 let end = json_start + offset + ch.len_utf8();
191 return Some(&content[json_start..end]);
192 }
193 }
194 _ => {}
195 }
196 }
197
198 None
199}
200
201crate::register_parser!(
202 "yarn plug and play runtime state",
203 &["**/.pnp.cjs"],
204 "npm",
205 "JavaScript",
206 Some("https://yarnpkg.com/features/pnp"),
207);