use crate::plugin::{ArchiveTypePlugin, ExtensionRow, ExtensionValue, HandlerCommand, HandlerMeta};
use arrow::datatypes::{DataType, Field};
use std::collections::HashMap;
#[cfg(feature = "host-decompressors")]
pub(crate) const MAX_INGEST_DECOMPRESS: usize = 256 * 1024 * 1024;
pub struct NpmPlugin;
impl NpmPlugin {
fn parse_filename(path: &str) -> (String, Option<String>) {
let filename = path.rsplit('/').next().unwrap_or(path);
let stem = filename.strip_suffix(".tgz").unwrap_or(filename);
let mut split_pos = None;
for (i, c) in stem.char_indices() {
if c == '-' {
if let Some(next) = stem[i + 1..].chars().next() {
if next.is_ascii_digit() {
split_pos = Some(i);
}
}
}
}
match split_pos {
Some(pos) => (stem[..pos].to_string(), Some(stem[pos + 1..].to_string())),
None => (stem.to_string(), None),
}
}
#[cfg(feature = "host-decompressors")]
fn parse_package_json(data: &[u8]) -> Option<(String, String)> {
let entries =
lgz::decompress_tar_gz_filter_capped(data, "package.json", MAX_INGEST_DECOMPRESS)
.ok()?;
let (_, bytes) = entries
.iter()
.find(|(p, _)| p.ends_with("package/package.json") || p.as_str() == "package.json")
.or_else(|| entries.first())?;
let v: serde_json::Value = serde_json::from_slice(bytes).ok()?;
let name = v.get("name")?.as_str()?.to_string();
let version = v.get("version")?.as_str()?.to_string();
if name.is_empty() || version.is_empty() {
return None;
}
Some((name, version))
}
fn resolve_coords(path: &str, _data: &[u8]) -> (String, Option<String>) {
#[cfg(feature = "host-decompressors")]
if let Some((name, version)) = Self::parse_package_json(_data) {
return (name, Some(version));
}
Self::parse_filename(path)
}
}
impl ArchiveTypePlugin for NpmPlugin {
fn name(&self) -> &str {
"npm"
}
fn type_id(&self) -> i8 {
6
}
fn meta(&self) -> HandlerMeta {
HandlerMeta {
name: "npm".into(),
aliases: vec!["node".into(), "yarn".into(), "pnpm".into()],
type_id: 6,
ecosystem: "JavaScript / npm (registry.npmjs.org)".into(),
extensions: vec![".tgz".into()],
description:
"npm package tarballs — authoritative name (incl. @scope) + version from package.json"
.into(),
commands: vec![HandlerCommand::new(
"coords",
"Print npm package name + version (package.json if readable, else filename)",
)],
}
}
fn run_command(&self, cmd: &str, args: &[String]) -> anyhow::Result<()> {
match cmd {
"coords" => {
let path =
args.first().ok_or_else(|| anyhow::anyhow!("usage: npm coords <file.tgz>"))?;
let (name, version) = Self::parse_filename(path);
match version {
Some(v) => println!("{} {}", name, v),
None => println!("{}", name),
}
Ok(())
}
other => anyhow::bail!("npm: unknown subcommand '{}'", other),
}
}
fn matches_path(&self, path: &str) -> bool {
path.ends_with(".tgz")
}
fn schema_fields(&self) -> Vec<Field> {
vec![
Field::new("name", DataType::Utf8, true),
Field::new("version", DataType::Utf8, true),
]
}
fn extract_metadata(&self, path: &str, data: &[u8]) -> Option<ExtensionRow> {
let (name, version) = Self::resolve_coords(path, data);
let mut fields = HashMap::new();
fields.insert("name".into(), ExtensionValue::Str(name));
fields.insert("version".into(), ExtensionValue::OptStr(version));
Some(ExtensionRow { fields })
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn matches_tgz_only() {
let p = NpmPlugin;
assert!(p.matches_path("react/-/react-18.2.0.tgz"));
assert!(!p.matches_path("react/-/react-18.2.0.tar.gz"));
assert!(!p.matches_path("foo.jar"));
}
#[test]
fn filename_fallback_splits_at_version() {
let (n, v) = NpmPlugin::parse_filename("@types/node/-/node-20.11.5.tgz");
assert_eq!(n, "node");
assert_eq!(v.as_deref(), Some("20.11.5"));
}
#[test]
fn schema_is_name_version() {
let f = NpmPlugin.schema_fields();
assert_eq!(f.len(), 2);
assert_eq!(f[0].name(), "name");
assert_eq!(f[1].name(), "version");
}
}