Skip to main content

provenant/parsers/
yarn_pnp.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, truncate_field};
10
11use super::PackageParser;
12
13pub struct YarnPnpParser;
14
15impl PackageParser for YarnPnpParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Npm;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match crate::parsers::utils::read_file_to_string(path, None) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
27                return vec![default_package_data()];
28            }
29        };
30
31        match parse_yarn_pnp(&content) {
32            Ok(package_data) => vec![package_data],
33            Err(error) => {
34                warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
35                vec![default_package_data()]
36            }
37        }
38    }
39
40    fn metadata() -> Vec<super::metadata::ParserMetadata> {
41        vec![super::metadata::ParserMetadata {
42            description: "yarn plug and play runtime state",
43            file_patterns: &["**/.pnp.cjs"],
44            package_type: "npm",
45            primary_language: "JavaScript",
46            documentation_url: Some("https://yarnpkg.com/features/pnp"),
47        }]
48    }
49}
50
51fn default_package_data() -> PackageData {
52    PackageData {
53        package_type: Some(YarnPnpParser::PACKAGE_TYPE),
54        primary_language: Some("JavaScript".to_string()),
55        datasource_id: Some(DatasourceId::YarnPnpCjs),
56        ..Default::default()
57    }
58}
59
60fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
61    let json_text = extract_raw_runtime_state_json(content)
62        .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
63    let runtime_state: serde_json::Value = serde_json::from_str(json_text)
64        .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
65
66    let registry_entries = runtime_state
67        .get("packageRegistryData")
68        .and_then(serde_json::Value::as_array)
69        .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
70
71    let root_refs = registry_entries
72        .iter()
73        .find_map(parse_root_dependency_map)
74        .unwrap_or_default();
75    let mut seen_locators = HashSet::new();
76    let mut dependencies = Vec::new();
77
78    for entry in registry_entries.iter().take(MAX_ITERATION_COUNT) {
79        let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
80            continue;
81        };
82        if !seen_locators.insert(locator.to_string()) {
83            continue;
84        }
85        let Some((name, reference)) = split_locator(locator) else {
86            continue;
87        };
88
89        let version = reference
90            .strip_prefix("npm:")
91            .map(|v| truncate_field(v.to_string()));
92        dependencies.push(Dependency {
93            purl: npm_purl(
94                truncate_field(name.to_string()).as_str(),
95                version.as_deref(),
96            ),
97            extracted_requirement: Some(truncate_field(reference.to_string())),
98            scope: Some("dependencies".to_string()),
99            is_runtime: Some(true),
100            is_optional: Some(false),
101            is_pinned: Some(version.is_some()),
102            is_direct: Some(
103                root_refs
104                    .get(name)
105                    .is_some_and(|root_ref| root_ref == reference),
106            ),
107            resolved_package: None,
108            extra_data: Some(HashMap::from([(
109                "locator".to_string(),
110                serde_json::Value::String(truncate_field(locator.to_string())),
111            )])),
112        });
113    }
114
115    let mut package = default_package_data();
116    package.dependencies = dependencies;
117    package.extra_data = Some(HashMap::from([(
118        "package_registry_entries".to_string(),
119        serde_json::Value::from(registry_entries.len()),
120    )]));
121    Ok(package)
122}
123
124fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
125    if !entry.get(0).is_some_and(serde_json::Value::is_null) {
126        return None;
127    }
128
129    let dependencies = entry.get(1)?.get("packageDependencies")?;
130    Some(parse_dependency_pairs(dependencies))
131}
132
133fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
134    if let Some(array) = value.as_array() {
135        return array
136            .iter()
137            .take(MAX_ITERATION_COUNT)
138            .filter_map(|pair| {
139                let pair = pair.as_array()?;
140                let name = pair.first()?.as_str()?;
141                let reference = pair.get(1)?.as_str()?;
142                Some((
143                    truncate_field(name.to_string()),
144                    truncate_field(reference.to_string()),
145                ))
146            })
147            .collect();
148    }
149
150    value
151        .as_object()
152        .into_iter()
153        .flatten()
154        .take(MAX_ITERATION_COUNT)
155        .filter_map(|(name, reference)| {
156            reference.as_str().map(|reference| {
157                (
158                    truncate_field(name.clone()),
159                    truncate_field(reference.to_string()),
160                )
161            })
162        })
163        .collect()
164}
165
166fn split_locator(locator: &str) -> Option<(&str, &str)> {
167    let split_at = locator.rfind('@')?;
168    if split_at == 0 {
169        return None;
170    }
171    Some((&locator[..split_at], &locator[split_at + 1..]))
172}
173
174fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
175    let marker = "const RAW_RUNTIME_STATE =";
176    let marker_index = content.find(marker)?;
177    let after_marker = &content[marker_index + marker.len()..];
178    let open_index = after_marker.find('{')?;
179    let json_start = marker_index + marker.len() + open_index;
180
181    let mut depth = 0usize;
182    let mut in_string = false;
183    let mut escaped = false;
184
185    for (offset, ch) in content[json_start..].char_indices() {
186        if in_string {
187            if escaped {
188                escaped = false;
189            } else if ch == '\\' {
190                escaped = true;
191            } else if ch == '"' {
192                in_string = false;
193            }
194            continue;
195        }
196
197        match ch {
198            '"' => in_string = true,
199            '{' => depth += 1,
200            '}' => {
201                depth = depth.saturating_sub(1);
202                if depth == 0 {
203                    let end = json_start + offset + ch.len_utf8();
204                    return Some(&content[json_start..end]);
205                }
206            }
207            _ => {}
208        }
209    }
210
211    None
212}