provenant/parsers/
yarn_pnp.rs1use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, truncate_field};
10
11use super::PackageParser;
12
13pub struct YarnPnpParser;
14
15impl PackageParser for YarnPnpParser {
16 const PACKAGE_TYPE: PackageType = PackageType::Npm;
17
18 fn is_match(path: &Path) -> bool {
19 path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
20 }
21
22 fn extract_packages(path: &Path) -> Vec<PackageData> {
23 let content = match crate::parsers::utils::read_file_to_string(path, None) {
24 Ok(content) => content,
25 Err(error) => {
26 warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
27 return vec![default_package_data()];
28 }
29 };
30
31 match parse_yarn_pnp(&content) {
32 Ok(package_data) => vec![package_data],
33 Err(error) => {
34 warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
35 vec![default_package_data()]
36 }
37 }
38 }
39}
40
41fn default_package_data() -> PackageData {
42 PackageData {
43 package_type: Some(YarnPnpParser::PACKAGE_TYPE),
44 primary_language: Some("JavaScript".to_string()),
45 datasource_id: Some(DatasourceId::YarnPnpCjs),
46 ..Default::default()
47 }
48}
49
50fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
51 let json_text = extract_raw_runtime_state_json(content)
52 .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
53 let runtime_state: serde_json::Value = serde_json::from_str(json_text)
54 .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
55
56 let registry_entries = runtime_state
57 .get("packageRegistryData")
58 .and_then(serde_json::Value::as_array)
59 .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
60
61 let root_refs = registry_entries
62 .iter()
63 .find_map(parse_root_dependency_map)
64 .unwrap_or_default();
65 let mut seen_locators = HashSet::new();
66 let mut dependencies = Vec::new();
67
68 for entry in registry_entries.iter().take(MAX_ITERATION_COUNT) {
69 let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
70 continue;
71 };
72 if !seen_locators.insert(locator.to_string()) {
73 continue;
74 }
75 let Some((name, reference)) = split_locator(locator) else {
76 continue;
77 };
78
79 let version = reference
80 .strip_prefix("npm:")
81 .map(|v| truncate_field(v.to_string()));
82 dependencies.push(Dependency {
83 purl: npm_purl(
84 truncate_field(name.to_string()).as_str(),
85 version.as_deref(),
86 ),
87 extracted_requirement: Some(truncate_field(reference.to_string())),
88 scope: Some("dependencies".to_string()),
89 is_runtime: Some(true),
90 is_optional: Some(false),
91 is_pinned: Some(version.is_some()),
92 is_direct: Some(
93 root_refs
94 .get(name)
95 .is_some_and(|root_ref| root_ref == reference),
96 ),
97 resolved_package: None,
98 extra_data: Some(HashMap::from([(
99 "locator".to_string(),
100 serde_json::Value::String(truncate_field(locator.to_string())),
101 )])),
102 });
103 }
104
105 let mut package = default_package_data();
106 package.dependencies = dependencies;
107 package.extra_data = Some(HashMap::from([(
108 "package_registry_entries".to_string(),
109 serde_json::Value::from(registry_entries.len()),
110 )]));
111 Ok(package)
112}
113
114fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
115 if !entry.get(0).is_some_and(serde_json::Value::is_null) {
116 return None;
117 }
118
119 let dependencies = entry.get(1)?.get("packageDependencies")?;
120 Some(parse_dependency_pairs(dependencies))
121}
122
123fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
124 if let Some(array) = value.as_array() {
125 return array
126 .iter()
127 .take(MAX_ITERATION_COUNT)
128 .filter_map(|pair| {
129 let pair = pair.as_array()?;
130 let name = pair.first()?.as_str()?;
131 let reference = pair.get(1)?.as_str()?;
132 Some((
133 truncate_field(name.to_string()),
134 truncate_field(reference.to_string()),
135 ))
136 })
137 .collect();
138 }
139
140 value
141 .as_object()
142 .into_iter()
143 .flatten()
144 .take(MAX_ITERATION_COUNT)
145 .filter_map(|(name, reference)| {
146 reference.as_str().map(|reference| {
147 (
148 truncate_field(name.clone()),
149 truncate_field(reference.to_string()),
150 )
151 })
152 })
153 .collect()
154}
155
156fn split_locator(locator: &str) -> Option<(&str, &str)> {
157 let split_at = locator.rfind('@')?;
158 if split_at == 0 {
159 return None;
160 }
161 Some((&locator[..split_at], &locator[split_at + 1..]))
162}
163
164fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
165 let marker = "const RAW_RUNTIME_STATE =";
166 let marker_index = content.find(marker)?;
167 let after_marker = &content[marker_index + marker.len()..];
168 let open_index = after_marker.find('{')?;
169 let json_start = marker_index + marker.len() + open_index;
170
171 let mut depth = 0usize;
172 let mut in_string = false;
173 let mut escaped = false;
174
175 for (offset, ch) in content[json_start..].char_indices() {
176 if in_string {
177 if escaped {
178 escaped = false;
179 } else if ch == '\\' {
180 escaped = true;
181 } else if ch == '"' {
182 in_string = false;
183 }
184 continue;
185 }
186
187 match ch {
188 '"' => in_string = true,
189 '{' => depth += 1,
190 '}' => {
191 depth = depth.saturating_sub(1);
192 if depth == 0 {
193 let end = json_start + offset + ch.len_utf8();
194 return Some(&content[json_start..end]);
195 }
196 }
197 _ => {}
198 }
199 }
200
201 None
202}
203
204crate::register_parser!(
205 "yarn plug and play runtime state",
206 &["**/.pnp.cjs"],
207 "npm",
208 "JavaScript",
209 Some("https://yarnpkg.com/features/pnp"),
210);