Skip to main content

provenant/parsers/
yarn_pnp.rs

1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
5use crate::parser_warn as warn;
6use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, truncate_field};
7
8use super::PackageParser;
9
10pub struct YarnPnpParser;
11
12impl PackageParser for YarnPnpParser {
13    const PACKAGE_TYPE: PackageType = PackageType::Npm;
14
15    fn is_match(path: &Path) -> bool {
16        path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
17    }
18
19    fn extract_packages(path: &Path) -> Vec<PackageData> {
20        let content = match crate::parsers::utils::read_file_to_string(path, None) {
21            Ok(content) => content,
22            Err(error) => {
23                warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
24                return vec![default_package_data()];
25            }
26        };
27
28        match parse_yarn_pnp(&content) {
29            Ok(package_data) => vec![package_data],
30            Err(error) => {
31                warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
32                vec![default_package_data()]
33            }
34        }
35    }
36}
37
38fn default_package_data() -> PackageData {
39    PackageData {
40        package_type: Some(YarnPnpParser::PACKAGE_TYPE),
41        primary_language: Some("JavaScript".to_string()),
42        datasource_id: Some(DatasourceId::YarnPnpCjs),
43        ..Default::default()
44    }
45}
46
47fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
48    let json_text = extract_raw_runtime_state_json(content)
49        .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
50    let runtime_state: serde_json::Value = serde_json::from_str(json_text)
51        .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
52
53    let registry_entries = runtime_state
54        .get("packageRegistryData")
55        .and_then(serde_json::Value::as_array)
56        .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
57
58    let root_refs = registry_entries
59        .iter()
60        .find_map(parse_root_dependency_map)
61        .unwrap_or_default();
62    let mut seen_locators = HashSet::new();
63    let mut dependencies = Vec::new();
64
65    for entry in registry_entries.iter().take(MAX_ITERATION_COUNT) {
66        let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
67            continue;
68        };
69        if !seen_locators.insert(locator.to_string()) {
70            continue;
71        }
72        let Some((name, reference)) = split_locator(locator) else {
73            continue;
74        };
75
76        let version = reference
77            .strip_prefix("npm:")
78            .map(|v| truncate_field(v.to_string()));
79        dependencies.push(Dependency {
80            purl: npm_purl(
81                truncate_field(name.to_string()).as_str(),
82                version.as_deref(),
83            ),
84            extracted_requirement: Some(truncate_field(reference.to_string())),
85            scope: Some("dependencies".to_string()),
86            is_runtime: Some(true),
87            is_optional: Some(false),
88            is_pinned: Some(version.is_some()),
89            is_direct: Some(
90                root_refs
91                    .get(name)
92                    .is_some_and(|root_ref| root_ref == reference),
93            ),
94            resolved_package: None,
95            extra_data: Some(HashMap::from([(
96                "locator".to_string(),
97                serde_json::Value::String(truncate_field(locator.to_string())),
98            )])),
99        });
100    }
101
102    let mut package = default_package_data();
103    package.dependencies = dependencies;
104    package.extra_data = Some(HashMap::from([(
105        "package_registry_entries".to_string(),
106        serde_json::Value::from(registry_entries.len()),
107    )]));
108    Ok(package)
109}
110
111fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
112    if !entry.get(0).is_some_and(serde_json::Value::is_null) {
113        return None;
114    }
115
116    let dependencies = entry.get(1)?.get("packageDependencies")?;
117    Some(parse_dependency_pairs(dependencies))
118}
119
120fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
121    if let Some(array) = value.as_array() {
122        return array
123            .iter()
124            .take(MAX_ITERATION_COUNT)
125            .filter_map(|pair| {
126                let pair = pair.as_array()?;
127                let name = pair.first()?.as_str()?;
128                let reference = pair.get(1)?.as_str()?;
129                Some((
130                    truncate_field(name.to_string()),
131                    truncate_field(reference.to_string()),
132                ))
133            })
134            .collect();
135    }
136
137    value
138        .as_object()
139        .into_iter()
140        .flatten()
141        .take(MAX_ITERATION_COUNT)
142        .filter_map(|(name, reference)| {
143            reference.as_str().map(|reference| {
144                (
145                    truncate_field(name.clone()),
146                    truncate_field(reference.to_string()),
147                )
148            })
149        })
150        .collect()
151}
152
153fn split_locator(locator: &str) -> Option<(&str, &str)> {
154    let split_at = locator.rfind('@')?;
155    if split_at == 0 {
156        return None;
157    }
158    Some((&locator[..split_at], &locator[split_at + 1..]))
159}
160
161fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
162    let marker = "const RAW_RUNTIME_STATE =";
163    let marker_index = content.find(marker)?;
164    let after_marker = &content[marker_index + marker.len()..];
165    let open_index = after_marker.find('{')?;
166    let json_start = marker_index + marker.len() + open_index;
167
168    let mut depth = 0usize;
169    let mut in_string = false;
170    let mut escaped = false;
171
172    for (offset, ch) in content[json_start..].char_indices() {
173        if in_string {
174            if escaped {
175                escaped = false;
176            } else if ch == '\\' {
177                escaped = true;
178            } else if ch == '"' {
179                in_string = false;
180            }
181            continue;
182        }
183
184        match ch {
185            '"' => in_string = true,
186            '{' => depth += 1,
187            '}' => {
188                depth = depth.saturating_sub(1);
189                if depth == 0 {
190                    let end = json_start + offset + ch.len_utf8();
191                    return Some(&content[json_start..end]);
192                }
193            }
194            _ => {}
195        }
196    }
197
198    None
199}
200
201crate::register_parser!(
202    "yarn plug and play runtime state",
203    &["**/.pnp.cjs"],
204    "npm",
205    "JavaScript",
206    Some("https://yarnpkg.com/features/pnp"),
207);