Skip to main content

provenant/parsers/
yarn_pnp.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet};
5use std::path::Path;
6
7use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
8use crate::parser_warn as warn;
9use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, truncate_field};
10
11use super::PackageParser;
12
13pub struct YarnPnpParser;
14
15impl PackageParser for YarnPnpParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Npm;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match crate::parsers::utils::read_file_to_string(path, None) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
27                return vec![default_package_data()];
28            }
29        };
30
31        match parse_yarn_pnp(&content) {
32            Ok(package_data) => vec![package_data],
33            Err(error) => {
34                warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
35                vec![default_package_data()]
36            }
37        }
38    }
39}
40
41fn default_package_data() -> PackageData {
42    PackageData {
43        package_type: Some(YarnPnpParser::PACKAGE_TYPE),
44        primary_language: Some("JavaScript".to_string()),
45        datasource_id: Some(DatasourceId::YarnPnpCjs),
46        ..Default::default()
47    }
48}
49
50fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
51    let json_text = extract_raw_runtime_state_json(content)
52        .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
53    let runtime_state: serde_json::Value = serde_json::from_str(json_text)
54        .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
55
56    let registry_entries = runtime_state
57        .get("packageRegistryData")
58        .and_then(serde_json::Value::as_array)
59        .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
60
61    let root_refs = registry_entries
62        .iter()
63        .find_map(parse_root_dependency_map)
64        .unwrap_or_default();
65    let mut seen_locators = HashSet::new();
66    let mut dependencies = Vec::new();
67
68    for entry in registry_entries.iter().take(MAX_ITERATION_COUNT) {
69        let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
70            continue;
71        };
72        if !seen_locators.insert(locator.to_string()) {
73            continue;
74        }
75        let Some((name, reference)) = split_locator(locator) else {
76            continue;
77        };
78
79        let version = reference
80            .strip_prefix("npm:")
81            .map(|v| truncate_field(v.to_string()));
82        dependencies.push(Dependency {
83            purl: npm_purl(
84                truncate_field(name.to_string()).as_str(),
85                version.as_deref(),
86            ),
87            extracted_requirement: Some(truncate_field(reference.to_string())),
88            scope: Some("dependencies".to_string()),
89            is_runtime: Some(true),
90            is_optional: Some(false),
91            is_pinned: Some(version.is_some()),
92            is_direct: Some(
93                root_refs
94                    .get(name)
95                    .is_some_and(|root_ref| root_ref == reference),
96            ),
97            resolved_package: None,
98            extra_data: Some(HashMap::from([(
99                "locator".to_string(),
100                serde_json::Value::String(truncate_field(locator.to_string())),
101            )])),
102        });
103    }
104
105    let mut package = default_package_data();
106    package.dependencies = dependencies;
107    package.extra_data = Some(HashMap::from([(
108        "package_registry_entries".to_string(),
109        serde_json::Value::from(registry_entries.len()),
110    )]));
111    Ok(package)
112}
113
114fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
115    if !entry.get(0).is_some_and(serde_json::Value::is_null) {
116        return None;
117    }
118
119    let dependencies = entry.get(1)?.get("packageDependencies")?;
120    Some(parse_dependency_pairs(dependencies))
121}
122
123fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
124    if let Some(array) = value.as_array() {
125        return array
126            .iter()
127            .take(MAX_ITERATION_COUNT)
128            .filter_map(|pair| {
129                let pair = pair.as_array()?;
130                let name = pair.first()?.as_str()?;
131                let reference = pair.get(1)?.as_str()?;
132                Some((
133                    truncate_field(name.to_string()),
134                    truncate_field(reference.to_string()),
135                ))
136            })
137            .collect();
138    }
139
140    value
141        .as_object()
142        .into_iter()
143        .flatten()
144        .take(MAX_ITERATION_COUNT)
145        .filter_map(|(name, reference)| {
146            reference.as_str().map(|reference| {
147                (
148                    truncate_field(name.clone()),
149                    truncate_field(reference.to_string()),
150                )
151            })
152        })
153        .collect()
154}
155
156fn split_locator(locator: &str) -> Option<(&str, &str)> {
157    let split_at = locator.rfind('@')?;
158    if split_at == 0 {
159        return None;
160    }
161    Some((&locator[..split_at], &locator[split_at + 1..]))
162}
163
164fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
165    let marker = "const RAW_RUNTIME_STATE =";
166    let marker_index = content.find(marker)?;
167    let after_marker = &content[marker_index + marker.len()..];
168    let open_index = after_marker.find('{')?;
169    let json_start = marker_index + marker.len() + open_index;
170
171    let mut depth = 0usize;
172    let mut in_string = false;
173    let mut escaped = false;
174
175    for (offset, ch) in content[json_start..].char_indices() {
176        if in_string {
177            if escaped {
178                escaped = false;
179            } else if ch == '\\' {
180                escaped = true;
181            } else if ch == '"' {
182                in_string = false;
183            }
184            continue;
185        }
186
187        match ch {
188            '"' => in_string = true,
189            '{' => depth += 1,
190            '}' => {
191                depth = depth.saturating_sub(1);
192                if depth == 0 {
193                    let end = json_start + offset + ch.len_utf8();
194                    return Some(&content[json_start..end]);
195                }
196            }
197            _ => {}
198        }
199    }
200
201    None
202}
203
204crate::register_parser!(
205    "yarn plug and play runtime state",
206    &["**/.pnp.cjs"],
207    "npm",
208    "JavaScript",
209    Some("https://yarnpkg.com/features/pnp"),
210);