Skip to main content

provenant/parsers/
yarn_pnp.rs

1use std::collections::{HashMap, HashSet};
2use std::fs;
3use std::path::Path;
4
5use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
6use crate::parser_warn as warn;
7use crate::parsers::utils::npm_purl;
8
9use super::PackageParser;
10
11pub struct YarnPnpParser;
12
13impl PackageParser for YarnPnpParser {
14    const PACKAGE_TYPE: PackageType = PackageType::Npm;
15
16    fn is_match(path: &Path) -> bool {
17        path.file_name().and_then(|name| name.to_str()) == Some(".pnp.cjs")
18    }
19
20    fn extract_packages(path: &Path) -> Vec<PackageData> {
21        let content = match fs::read_to_string(path) {
22            Ok(content) => content,
23            Err(error) => {
24                warn!("Failed to read .pnp.cjs at {:?}: {}", path, error);
25                return vec![default_package_data()];
26            }
27        };
28
29        match parse_yarn_pnp(&content) {
30            Ok(package_data) => vec![package_data],
31            Err(error) => {
32                warn!("Failed to parse .pnp.cjs at {:?}: {}", path, error);
33                vec![default_package_data()]
34            }
35        }
36    }
37}
38
39fn default_package_data() -> PackageData {
40    PackageData {
41        package_type: Some(YarnPnpParser::PACKAGE_TYPE),
42        primary_language: Some("JavaScript".to_string()),
43        datasource_id: Some(DatasourceId::YarnPnpCjs),
44        ..Default::default()
45    }
46}
47
48fn parse_yarn_pnp(content: &str) -> Result<PackageData, String> {
49    let json_text = extract_raw_runtime_state_json(content)
50        .ok_or_else(|| "RAW_RUNTIME_STATE object not found in .pnp.cjs".to_string())?;
51    let runtime_state: serde_json::Value = serde_json::from_str(json_text)
52        .map_err(|error| format!("invalid RAW_RUNTIME_STATE JSON: {error}"))?;
53
54    let registry_entries = runtime_state
55        .get("packageRegistryData")
56        .and_then(serde_json::Value::as_array)
57        .ok_or_else(|| "packageRegistryData missing from RAW_RUNTIME_STATE".to_string())?;
58
59    let root_refs = registry_entries
60        .iter()
61        .find_map(parse_root_dependency_map)
62        .unwrap_or_default();
63    let mut seen_locators = HashSet::new();
64    let mut dependencies = Vec::new();
65
66    for entry in registry_entries {
67        let Some(locator) = entry.get(0).and_then(serde_json::Value::as_str) else {
68            continue;
69        };
70        if !seen_locators.insert(locator.to_string()) {
71            continue;
72        }
73        let Some((name, reference)) = split_locator(locator) else {
74            continue;
75        };
76
77        let version = reference.strip_prefix("npm:");
78        dependencies.push(Dependency {
79            purl: npm_purl(name, version),
80            extracted_requirement: Some(reference.to_string()),
81            scope: Some("dependencies".to_string()),
82            is_runtime: Some(true),
83            is_optional: Some(false),
84            is_pinned: Some(version.is_some()),
85            is_direct: Some(
86                root_refs
87                    .get(name)
88                    .is_some_and(|root_ref| root_ref == reference),
89            ),
90            resolved_package: None,
91            extra_data: Some(HashMap::from([(
92                "locator".to_string(),
93                serde_json::Value::String(locator.to_string()),
94            )])),
95        });
96    }
97
98    let mut package = default_package_data();
99    package.dependencies = dependencies;
100    package.extra_data = Some(HashMap::from([(
101        "package_registry_entries".to_string(),
102        serde_json::Value::from(registry_entries.len()),
103    )]));
104    Ok(package)
105}
106
107fn parse_root_dependency_map(entry: &serde_json::Value) -> Option<HashMap<String, String>> {
108    if !entry.get(0).is_some_and(serde_json::Value::is_null) {
109        return None;
110    }
111
112    let dependencies = entry.get(1)?.get("packageDependencies")?;
113    Some(parse_dependency_pairs(dependencies))
114}
115
116fn parse_dependency_pairs(value: &serde_json::Value) -> HashMap<String, String> {
117    if let Some(array) = value.as_array() {
118        return array
119            .iter()
120            .filter_map(|pair| {
121                let pair = pair.as_array()?;
122                let name = pair.first()?.as_str()?;
123                let reference = pair.get(1)?.as_str()?;
124                Some((name.to_string(), reference.to_string()))
125            })
126            .collect();
127    }
128
129    value
130        .as_object()
131        .into_iter()
132        .flatten()
133        .filter_map(|(name, reference)| {
134            reference
135                .as_str()
136                .map(|reference| (name.clone(), reference.to_string()))
137        })
138        .collect()
139}
140
141fn split_locator(locator: &str) -> Option<(&str, &str)> {
142    let split_at = locator.rfind('@')?;
143    if split_at == 0 {
144        return None;
145    }
146    Some((&locator[..split_at], &locator[split_at + 1..]))
147}
148
149fn extract_raw_runtime_state_json(content: &str) -> Option<&str> {
150    let marker = "const RAW_RUNTIME_STATE =";
151    let marker_index = content.find(marker)?;
152    let after_marker = &content[marker_index + marker.len()..];
153    let open_index = after_marker.find('{')?;
154    let json_start = marker_index + marker.len() + open_index;
155
156    let mut depth = 0usize;
157    let mut in_string = false;
158    let mut escaped = false;
159
160    for (offset, ch) in content[json_start..].char_indices() {
161        if in_string {
162            if escaped {
163                escaped = false;
164            } else if ch == '\\' {
165                escaped = true;
166            } else if ch == '"' {
167                in_string = false;
168            }
169            continue;
170        }
171
172        match ch {
173            '"' => in_string = true,
174            '{' => depth += 1,
175            '}' => {
176                depth = depth.saturating_sub(1);
177                if depth == 0 {
178                    let end = json_start + offset + ch.len_utf8();
179                    return Some(&content[json_start..end]);
180                }
181            }
182            _ => {}
183        }
184    }
185
186    None
187}
188
189crate::register_parser!(
190    "yarn plug and play runtime state",
191    &["**/.pnp.cjs"],
192    "npm",
193    "JavaScript",
194    Some("https://yarnpkg.com/features/pnp"),
195);