provenant/parsers/
yarn_pnp.rs1use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, truncate_field};
10
11use super::PackageParser;
12
13pub struct YarnPnpParser;
14
15impl PackageParser for YarnPnpParser {
16 const PACKAGE_TYPE: PackageType = PackageType::Npm;
17
18 fn is_match(path: &Path) -> bool {
19 path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
20 }
21
22 fn extract_packages(path: &Path) -> Vec<PackageData> {
23 let content = match crate::parsers::utils::read_file_to_string(path, None) {
24 Ok(content) => content,
25 Err(error) => {
26 warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
27 return vec![default_package_data()];
28 }
29 };
30
31 match parse_yarn_pnp(&content) {
32 Ok(package_data) => vec![package_data],
33 Err(error) => {
34 warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
35 vec![default_package_data()]
36 }
37 }
38 }
39
40 fn metadata() -> Vec<super::metadata::ParserMetadata> {
41 vec![super::metadata::ParserMetadata {
42 description: "yarn plug and play runtime state",
43 file_patterns: &["**/.pnp.cjs"],
44 package_type: "npm",
45 primary_language: "JavaScript",
46 documentation_url: Some("https://yarnpkg.com/features/pnp"),
47 }]
48 }
49}
50
51fn default_package_data() -> PackageData {
52 PackageData {
53 package_type: Some(YarnPnpParser::PACKAGE_TYPE),
54 primary_language: Some("JavaScript".to_string()),
55 datasource_id: Some(DatasourceId::YarnPnpCjs),
56 ..Default::default()
57 }
58}
59
60fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
61 let json_text = extract_raw_runtime_state_json(content)
62 .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
63 let runtime_state: serde_json::Value = serde_json::from_str(json_text)
64 .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
65
66 let registry_entries = runtime_state
67 .get("packageRegistryData")
68 .and_then(serde_json::Value::as_array)
69 .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
70
71 let root_refs = registry_entries
72 .iter()
73 .find_map(parse_root_dependency_map)
74 .unwrap_or_default();
75 let mut seen_locators = HashSet::new();
76 let mut dependencies = Vec::new();
77
78 for entry in registry_entries.iter().take(MAX_ITERATION_COUNT) {
79 let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
80 continue;
81 };
82 if !seen_locators.insert(locator.to_string()) {
83 continue;
84 }
85 let Some((name, reference)) = split_locator(locator) else {
86 continue;
87 };
88
89 let version = reference
90 .strip_prefix("npm:")
91 .map(|v| truncate_field(v.to_string()));
92 dependencies.push(Dependency {
93 purl: npm_purl(
94 truncate_field(name.to_string()).as_str(),
95 version.as_deref(),
96 ),
97 extracted_requirement: Some(truncate_field(reference.to_string())),
98 scope: Some("dependencies".to_string()),
99 is_runtime: Some(true),
100 is_optional: Some(false),
101 is_pinned: Some(version.is_some()),
102 is_direct: Some(
103 root_refs
104 .get(name)
105 .is_some_and(|root_ref| root_ref == reference),
106 ),
107 resolved_package: None,
108 extra_data: Some(HashMap::from([(
109 "locator".to_string(),
110 serde_json::Value::String(truncate_field(locator.to_string())),
111 )])),
112 });
113 }
114
115 let mut package = default_package_data();
116 package.dependencies = dependencies;
117 package.extra_data = Some(HashMap::from([(
118 "package_registry_entries".to_string(),
119 serde_json::Value::from(registry_entries.len()),
120 )]));
121 Ok(package)
122}
123
124fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
125 if !entry.get(0).is_some_and(serde_json::Value::is_null) {
126 return None;
127 }
128
129 let dependencies = entry.get(1)?.get("packageDependencies")?;
130 Some(parse_dependency_pairs(dependencies))
131}
132
133fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
134 if let Some(array) = value.as_array() {
135 return array
136 .iter()
137 .take(MAX_ITERATION_COUNT)
138 .filter_map(|pair| {
139 let pair = pair.as_array()?;
140 let name = pair.first()?.as_str()?;
141 let reference = pair.get(1)?.as_str()?;
142 Some((
143 truncate_field(name.to_string()),
144 truncate_field(reference.to_string()),
145 ))
146 })
147 .collect();
148 }
149
150 value
151 .as_object()
152 .into_iter()
153 .flatten()
154 .take(MAX_ITERATION_COUNT)
155 .filter_map(|(name, reference)| {
156 reference.as_str().map(|reference| {
157 (
158 truncate_field(name.clone()),
159 truncate_field(reference.to_string()),
160 )
161 })
162 })
163 .collect()
164}
165
166fn split_locator(locator: &str) -> Option<(&str, &str)> {
167 let split_at = locator.rfind('@')?;
168 if split_at == 0 {
169 return None;
170 }
171 Some((&locator[..split_at], &locator[split_at + 1..]))
172}
173
174fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
175 let marker = "const RAW_RUNTIME_STATE =";
176 let marker_index = content.find(marker)?;
177 let after_marker = &content[marker_index + marker.len()..];
178 let open_index = after_marker.find('{')?;
179 let json_start = marker_index + marker.len() + open_index;
180
181 let mut depth = 0usize;
182 let mut in_string = false;
183 let mut escaped = false;
184
185 for (offset, ch) in content[json_start..].char_indices() {
186 if in_string {
187 if escaped {
188 escaped = false;
189 } else if ch == '\\' {
190 escaped = true;
191 } else if ch == '"' {
192 in_string = false;
193 }
194 continue;
195 }
196
197 match ch {
198 '"' => in_string = true,
199 '{' => depth += 1,
200 '}' => {
201 depth = depth.saturating_sub(1);
202 if depth == 0 {
203 let end = json_start + offset + ch.len_utf8();
204 return Some(&content[json_start..end]);
205 }
206 }
207 _ => {}
208 }
209 }
210
211 None
212}