use anyhow::Result;
use std::collections::{HashMap, HashSet};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct ResolvedDependencies {
pub packages: Vec<String>,
pub resolver: ResolverKind,
pub unknown_packages: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ResolverKind {
Lock,
Llm,
Whitelist,
None,
}
impl std::fmt::Display for ResolverKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Lock => write!(f, "lock"),
Self::Llm => write!(f, "llm"),
Self::Whitelist => write!(f, "whitelist"),
Self::None => write!(f, "none"),
}
}
}
pub fn resolve_from_lock(skill_dir: &Path, compatibility: Option<&str>) -> Option<Vec<String>> {
let lock_path = skill_dir.join(".skilllite.lock");
let content = skilllite_fs::read_file(&lock_path).ok()?;
let lock: serde_json::Value = serde_json::from_str(&content).ok()?;
let current_hash = compatibility_hash(compatibility);
if lock.get("compatibility_hash")?.as_str()? != current_hash {
tracing::debug!("Lock file stale: hash mismatch");
return None;
}
let arr = lock.get("resolved_packages")?.as_array()?;
let packages: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if packages.is_empty() {
None
} else {
Some(packages)
}
}
pub fn write_lock(
skill_dir: &Path,
compatibility: Option<&str>,
language: &str,
packages: &[String],
resolver: &ResolverKind,
) -> Result<()> {
let mut sorted = packages.to_vec();
sorted.sort();
let lock = serde_json::json!({
"compatibility_hash": compatibility_hash(compatibility),
"language": language,
"resolved_packages": sorted,
"resolved_at": chrono::Utc::now().to_rfc3339(),
"resolver": resolver.to_string(),
});
let lock_path = skill_dir.join(".skilllite.lock");
skilllite_fs::write_file(&lock_path, &(serde_json::to_string_pretty(&lock)? + "\n"))?;
Ok(())
}
fn compatibility_hash(compat: Option<&str>) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(compat.unwrap_or("").as_bytes());
hex::encode(h.finalize())
}
pub fn resolve_from_whitelist(compatibility: &str, language: &str) -> Vec<String> {
let whitelist = get_whitelist();
let compat_lower = compatibility.to_lowercase();
let (packages, aliases) = match language {
"python" => (&whitelist.python_packages, &whitelist.python_aliases),
"node" => (&whitelist.node_packages, &whitelist.node_aliases),
_ => return Vec::new(),
};
let mut matched = Vec::new();
for pkg in packages {
if is_word_boundary_match(&compat_lower, &pkg.to_lowercase()) {
matched.push(pkg.clone());
}
}
for (alias, canonical) in aliases {
if is_word_boundary_match(&compat_lower, &alias.to_lowercase())
&& !matched.contains(canonical)
{
matched.push(canonical.clone());
}
}
matched
}
fn is_word_boundary_match(text: &str, word: &str) -> bool {
let word_chars: Vec<char> = word.chars().collect();
let text_chars: Vec<char> = text.chars().collect();
let mut i = 0;
while i + word_chars.len() <= text_chars.len() {
let slice_matches = word_chars
.iter()
.enumerate()
.all(|(j, wc)| text_chars.get(i + j) == Some(wc));
if slice_matches {
let before_ok = i == 0 || !text_chars[i - 1].is_alphanumeric();
let after_pos = i + word_chars.len();
let after_ok =
after_pos >= text_chars.len() || !text_chars[after_pos].is_alphanumeric();
if before_ok && after_ok {
return true;
}
}
i += 1;
}
false
}
pub fn validate_against_whitelist(packages: &[String], language: &str) -> Vec<String> {
let whitelist = get_whitelist();
let known: HashSet<String> = match language {
"python" => whitelist
.python_packages
.iter()
.map(|p| p.to_lowercase())
.collect(),
"node" => whitelist
.node_packages
.iter()
.map(|p| p.to_lowercase())
.collect(),
_ => HashSet::new(),
};
packages
.iter()
.filter(|p| {
let normalized = p.to_lowercase().replace('_', "-");
let base = normalized.split('[').next().unwrap_or(&normalized);
!known.contains(base)
})
.cloned()
.collect()
}
pub fn resolve_packages_sync(
skill_dir: &Path,
compatibility: Option<&str>,
language: &str,
allow_unknown: bool,
) -> Result<ResolvedDependencies> {
if let Some(packages) = resolve_from_lock(skill_dir, compatibility) {
tracing::debug!("Resolved from lock: {:?}", packages);
return Ok(ResolvedDependencies {
packages,
resolver: ResolverKind::Lock,
unknown_packages: Vec::new(),
});
}
let compat_str = compatibility.unwrap_or("");
if !compat_str.is_empty() {
let packages = resolve_from_whitelist(compat_str, language);
if !packages.is_empty() {
let unknown = if allow_unknown {
Vec::new()
} else {
validate_against_whitelist(&packages, language)
};
let _ = write_lock(
skill_dir,
compatibility,
language,
&packages,
&ResolverKind::Whitelist,
);
return Ok(ResolvedDependencies {
packages,
resolver: ResolverKind::Whitelist,
unknown_packages: unknown,
});
}
}
Ok(ResolvedDependencies {
packages: Vec::new(),
resolver: ResolverKind::None,
unknown_packages: Vec::new(),
})
}
struct PackagesWhitelist {
python_packages: Vec<String>,
python_aliases: HashMap<String, String>,
node_packages: Vec<String>,
node_aliases: HashMap<String, String>,
}
fn get_whitelist() -> PackagesWhitelist {
PackagesWhitelist {
python_packages: PYTHON_PACKAGES.iter().map(|s| s.to_string()).collect(),
python_aliases: PYTHON_ALIASES
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
node_packages: NODE_PACKAGES.iter().map(|s| s.to_string()).collect(),
node_aliases: NODE_ALIASES
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
}
}
const PYTHON_PACKAGES: &[&str] = &[
"requests",
"httpx",
"aiohttp",
"urllib3",
"httplib2",
"numpy",
"pandas",
"scipy",
"scikit-learn",
"statsmodels",
"polars",
"pyarrow",
"duckdb",
"openpyxl",
"tensorflow",
"keras",
"torch",
"pytorch",
"transformers",
"xgboost",
"lightgbm",
"catboost",
"onnx",
"onnxruntime",
"openai",
"anthropic",
"langchain",
"langgraph",
"llama-index",
"matplotlib",
"seaborn",
"plotly",
"bokeh",
"altair",
"flask",
"django",
"fastapi",
"starlette",
"uvicorn",
"gunicorn",
"sanic",
"tornado",
"bottle",
"pyramid",
"beautifulsoup4",
"lxml",
"scrapy",
"selenium",
"playwright",
"html5lib",
"cssselect",
"html2text",
"pillow",
"opencv-python",
"imageio",
"scikit-image",
"pyyaml",
"toml",
"tomli",
"python-dotenv",
"configparser",
"sqlalchemy",
"psycopg2",
"psycopg2-binary",
"pymysql",
"redis",
"pymongo",
"motor",
"asyncpg",
"aiosqlite",
"peewee",
"pyodps",
"boto3",
"botocore",
"google-cloud-storage",
"google-auth",
"azure-storage-blob",
"azure-identity",
"oss2",
"pytest",
"mock",
"responses",
"fakeredis",
"factory-boy",
"click",
"typer",
"argparse",
"fire",
"rich",
"tqdm",
"colorama",
"pydantic",
"attrs",
"dataclasses-json",
"marshmallow",
"cattrs",
"jinja2",
"mako",
"celery",
"rq",
"dramatiq",
"cryptography",
"pyjwt",
"passlib",
"bcrypt",
"paramiko",
"loguru",
"structlog",
"anyio",
"trio",
"aiofiles",
"arrow",
"pendulum",
"python-dateutil",
"pytz",
"chardet",
"charset-normalizer",
"orjson",
"ujson",
"tenacity",
"tox",
"nox",
"pre-commit",
"mypy",
"black",
"ruff",
"isort",
"setuptools",
"wheel",
"pip",
"poetry",
];
const PYTHON_ALIASES: &[(&str, &str)] = &[
("cv2", "opencv-python"),
("PIL", "pillow"),
("sklearn", "scikit-learn"),
("bs4", "beautifulsoup4"),
("yaml", "pyyaml"),
("dotenv", "python-dotenv"),
("jwt", "pyjwt"),
("odps", "pyodps"),
("llamaindex", "llama-index"),
("skimage", "scikit-image"),
("pytorch", "torch"),
("tf", "tensorflow"),
];
const NODE_PACKAGES: &[&str] = &[
"axios",
"node-fetch",
"got",
"superagent",
"ky",
"express",
"koa",
"fastify",
"hapi",
"nest",
"next",
"lodash",
"underscore",
"ramda",
"fp-ts",
"moment",
"dayjs",
"date-fns",
"luxon",
"cheerio",
"puppeteer",
"playwright",
"mongoose",
"sequelize",
"knex",
"prisma",
"typeorm",
"ioredis",
"redis",
"aws-sdk",
"@aws-sdk/client-s3",
"googleapis",
"openai",
"@anthropic-ai/sdk",
"jest",
"mocha",
"chai",
"vitest",
"sinon",
"@playwright/test",
"commander",
"yargs",
"inquirer",
"meow",
"cac",
"chalk",
"ora",
"boxen",
"cli-table3",
"figures",
"dotenv",
"convict",
"jsonwebtoken",
"bcrypt",
"crypto-js",
"uuid",
"nanoid",
"socket.io",
"ws",
"sharp",
"jimp",
"react",
"vue",
"svelte",
"solid-js",
"angular",
"webpack",
"vite",
"esbuild",
"rollup",
"parcel",
"typescript",
"ts-node",
"tsx",
"zod",
"yup",
"joi",
"ajv",
"glob",
"minimatch",
"chokidar",
"fs-extra",
"debug",
"winston",
"pino",
"p-limit",
"p-queue",
"p-retry",
"execa",
"cross-env",
"cross-spawn",
"agent-browser",
];
const NODE_ALIASES: &[(&str, &str)] = &[("socket.io-client", "socket.io")];
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_whitelist_matching_python() {
let pkgs = resolve_from_whitelist("Requires Python 3.x with requests library", "python");
assert!(pkgs.contains(&"requests".to_string()));
}
#[test]
fn test_whitelist_matching_aliases() {
let pkgs = resolve_from_whitelist("Requires Python 3.x with cv2, PIL", "python");
assert!(pkgs.contains(&"opencv-python".to_string()));
assert!(pkgs.contains(&"pillow".to_string()));
}
#[test]
fn test_whitelist_matching_node() {
let pkgs = resolve_from_whitelist("Requires Node.js with axios, lodash", "node");
assert!(pkgs.contains(&"axios".to_string()));
assert!(pkgs.contains(&"lodash".to_string()));
}
#[test]
fn test_whitelist_no_partial_match() {
let pkgs = resolve_from_whitelist("Requires request handling", "python");
assert!(!pkgs.contains(&"requests".to_string()));
}
#[test]
fn test_validate_against_whitelist() {
let unknown = validate_against_whitelist(
&["requests".to_string(), "my-custom-pkg".to_string()],
"python",
);
assert_eq!(unknown, vec!["my-custom-pkg".to_string()]);
}
#[test]
fn test_whitelist_matching_common_python_data_and_ai_packages() {
let pkgs = resolve_from_whitelist(
"Requires Python 3.x with pyodps, polars, pyarrow, openai and langchain",
"python",
);
assert!(pkgs.contains(&"pyodps".to_string()));
assert!(pkgs.contains(&"polars".to_string()));
assert!(pkgs.contains(&"pyarrow".to_string()));
assert!(pkgs.contains(&"openai".to_string()));
assert!(pkgs.contains(&"langchain".to_string()));
}
#[test]
fn test_whitelist_matching_python_aliases_for_odps_and_llamaindex() {
let pkgs = resolve_from_whitelist("Requires Python 3.x with odps and llamaindex", "python");
assert!(pkgs.contains(&"pyodps".to_string()));
assert!(pkgs.contains(&"llama-index".to_string()));
}
#[test]
fn test_whitelist_matching_common_node_ai_packages() {
let pkgs = resolve_from_whitelist(
"Requires Node.js with openai, @anthropic-ai/sdk, and @playwright/test",
"node",
);
assert!(pkgs.contains(&"openai".to_string()));
assert!(pkgs.contains(&"@anthropic-ai/sdk".to_string()));
assert!(pkgs.contains(&"@playwright/test".to_string()));
}
#[test]
fn test_compatibility_hash_deterministic() {
let h1 = compatibility_hash(Some("Requires Python 3.x"));
let h2 = compatibility_hash(Some("Requires Python 3.x"));
assert_eq!(h1, h2);
}
#[test]
fn test_word_boundary_match() {
assert!(is_word_boundary_match(
"requires requests library",
"requests"
));
assert!(!is_word_boundary_match(
"requires request handling",
"requests"
));
assert!(is_word_boundary_match("pandas, numpy", "pandas"));
assert!(is_word_boundary_match("pandas, numpy", "numpy"));
}
}
#[cfg(feature = "async-resolve")]
#[async_trait::async_trait]
pub trait LlmProvider: Send + Sync {
async fn extract_packages(&self, model: &str, prompt: &str) -> Option<String>;
}
#[cfg(feature = "async-resolve")]
mod async_resolve {
use super::*;
pub async fn resolve_from_llm<L: LlmProvider>(
llm: &L,
model: &str,
compatibility: &str,
language: &str,
) -> Option<Vec<String>> {
let prompt = format!(
"Extract the exact installable package names from this compatibility string.\n\
Language: {}\n\
Compatibility: \"{}\"\n\n\
Rules:\n\
- Only return package names that can be installed via pip (Python) or npm (Node.js).\n\
- Do NOT include standard library modules (os, sys, json, etc.).\n\
- Do NOT include language runtimes (Python, Node.js).\n\
- Do NOT include system tools (git, docker, etc.).\n\
- Return one package name per line, nothing else.\n\
- If no installable packages, return NONE.\n\n\
Output:",
language, compatibility
);
let resp = llm.extract_packages(model, &prompt).await?;
let text = resp.trim();
if text.eq_ignore_ascii_case("NONE") || text.is_empty() {
return None;
}
let candidates: Vec<String> = text
.lines()
.map(|l| {
l.trim().trim_matches(|c: char| {
!c.is_alphanumeric() && c != '-' && c != '_' && c != '.'
})
})
.filter(|l| !l.is_empty())
.map(|l| l.to_lowercase())
.collect();
if candidates.is_empty() {
return None;
}
let mut verified = Vec::new();
for pkg in &candidates {
if verify_package(pkg, language).await {
verified.push(pkg.clone());
} else {
tracing::debug!("LLM-suggested package '{}' failed verification", pkg);
}
}
if verified.is_empty() {
None
} else {
Some(verified)
}
}
async fn verify_package(name: &str, language: &str) -> bool {
let url = match language {
"python" => format!("https://pypi.org/pypi/{}/json", name),
"node" => format!("https://registry.npmjs.org/{}", name),
_ => return false,
};
let client = match reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(5))
.build()
{
Ok(c) => c,
Err(_) => return false,
};
match client.head(&url).send().await {
Ok(resp) => resp.status().is_success(),
Err(_) => false,
}
}
pub async fn resolve_packages<L: LlmProvider>(
skill_dir: &Path,
compatibility: Option<&str>,
language: &str,
llm: Option<&L>,
model: Option<&str>,
allow_unknown: bool,
) -> Result<ResolvedDependencies> {
if let Some(packages) = resolve_from_lock(skill_dir, compatibility) {
tracing::debug!("Resolved from lock: {:?}", packages);
return Ok(ResolvedDependencies {
packages,
resolver: ResolverKind::Lock,
unknown_packages: Vec::new(),
});
}
let compat_str = compatibility.unwrap_or("");
if !compat_str.is_empty() {
if let (Some(client), Some(model)) = (llm, model) {
match resolve_from_llm(client, model, compat_str, language).await {
Some(packages) if !packages.is_empty() => {
let unknown = if allow_unknown {
Vec::new()
} else {
validate_against_whitelist(&packages, language)
};
let _ = write_lock(
skill_dir,
compatibility,
language,
&packages,
&ResolverKind::Llm,
);
return Ok(ResolvedDependencies {
packages,
resolver: ResolverKind::Llm,
unknown_packages: unknown,
});
}
_ => {
tracing::debug!("LLM inference returned no packages, falling through");
}
}
}
}
if !compat_str.is_empty() {
let packages = resolve_from_whitelist(compat_str, language);
if !packages.is_empty() {
let unknown = if allow_unknown {
Vec::new()
} else {
validate_against_whitelist(&packages, language)
};
let _ = write_lock(
skill_dir,
compatibility,
language,
&packages,
&ResolverKind::Whitelist,
);
return Ok(ResolvedDependencies {
packages,
resolver: ResolverKind::Whitelist,
unknown_packages: unknown,
});
}
}
Ok(ResolvedDependencies {
packages: Vec::new(),
resolver: ResolverKind::None,
unknown_packages: Vec::new(),
})
}
}
#[cfg(feature = "async-resolve")]
pub use async_resolve::{resolve_from_llm, resolve_packages};