1use anyhow::Result;
19use std::collections::{HashMap, HashSet};
20use std::path::Path;
21
22#[derive(Debug, Clone)]
26pub struct ResolvedDependencies {
27 pub packages: Vec<String>,
29 pub resolver: ResolverKind,
31 pub unknown_packages: Vec<String>,
33}
34
35#[derive(Debug, Clone, PartialEq, Eq)]
37pub enum ResolverKind {
38 Lock,
39 Llm,
40 Whitelist,
41 None,
42}
43
44impl std::fmt::Display for ResolverKind {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 match self {
47 Self::Lock => write!(f, "lock"),
48 Self::Llm => write!(f, "llm"),
49 Self::Whitelist => write!(f, "whitelist"),
50 Self::None => write!(f, "none"),
51 }
52 }
53}
54
55pub fn resolve_from_lock(skill_dir: &Path, compatibility: Option<&str>) -> Option<Vec<String>> {
59 let lock_path = skill_dir.join(".skilllite.lock");
60 let content = skilllite_fs::read_file(&lock_path).ok()?;
61 let lock: serde_json::Value = serde_json::from_str(&content).ok()?;
62
63 let current_hash = compatibility_hash(compatibility);
64 if lock.get("compatibility_hash")?.as_str()? != current_hash {
65 tracing::debug!("Lock file stale: hash mismatch");
66 return None;
67 }
68
69 let arr = lock.get("resolved_packages")?.as_array()?;
70 let packages: Vec<String> = arr
71 .iter()
72 .filter_map(|v| v.as_str().map(String::from))
73 .collect();
74 if packages.is_empty() {
75 None
76 } else {
77 Some(packages)
78 }
79}
80
81pub fn write_lock(
83 skill_dir: &Path,
84 compatibility: Option<&str>,
85 language: &str,
86 packages: &[String],
87 resolver: &ResolverKind,
88) -> Result<()> {
89 let mut sorted = packages.to_vec();
90 sorted.sort();
91
92 let lock = serde_json::json!({
93 "compatibility_hash": compatibility_hash(compatibility),
94 "language": language,
95 "resolved_packages": sorted,
96 "resolved_at": chrono::Utc::now().to_rfc3339(),
97 "resolver": resolver.to_string(),
98 });
99
100 let lock_path = skill_dir.join(".skilllite.lock");
101 skilllite_fs::write_file(&lock_path, &(serde_json::to_string_pretty(&lock)? + "\n"))?;
102 Ok(())
103}
104
105fn compatibility_hash(compat: Option<&str>) -> String {
106 use sha2::{Digest, Sha256};
107 let mut h = Sha256::new();
108 h.update(compat.unwrap_or("").as_bytes());
109 hex::encode(h.finalize())
110}
111
112pub fn resolve_from_whitelist(compatibility: &str, language: &str) -> Vec<String> {
116 let whitelist = get_whitelist();
117 let compat_lower = compatibility.to_lowercase();
118
119 let (packages, aliases) = match language {
120 "python" => (&whitelist.python_packages, &whitelist.python_aliases),
121 "node" => (&whitelist.node_packages, &whitelist.node_aliases),
122 _ => return Vec::new(),
123 };
124
125 let mut matched = Vec::new();
126
127 for pkg in packages {
129 if is_word_boundary_match(&compat_lower, &pkg.to_lowercase()) {
130 matched.push(pkg.clone());
131 }
132 }
133
134 for (alias, canonical) in aliases {
136 if is_word_boundary_match(&compat_lower, &alias.to_lowercase())
137 && !matched.contains(canonical)
138 {
139 matched.push(canonical.clone());
140 }
141 }
142
143 matched
144}
145
146fn is_word_boundary_match(text: &str, word: &str) -> bool {
148 let word_chars: Vec<char> = word.chars().collect();
149 let text_chars: Vec<char> = text.chars().collect();
150
151 let mut i = 0;
152 while i + word_chars.len() <= text_chars.len() {
153 let slice_matches = word_chars
154 .iter()
155 .enumerate()
156 .all(|(j, wc)| text_chars.get(i + j) == Some(wc));
157
158 if slice_matches {
159 let before_ok = i == 0 || !text_chars[i - 1].is_alphanumeric();
160 let after_pos = i + word_chars.len();
161 let after_ok =
162 after_pos >= text_chars.len() || !text_chars[after_pos].is_alphanumeric();
163 if before_ok && after_ok {
164 return true;
165 }
166 }
167 i += 1;
168 }
169 false
170}
171
172pub fn validate_against_whitelist(packages: &[String], language: &str) -> Vec<String> {
177 let whitelist = get_whitelist();
178 let known: HashSet<String> = match language {
179 "python" => whitelist
180 .python_packages
181 .iter()
182 .map(|p| p.to_lowercase())
183 .collect(),
184 "node" => whitelist
185 .node_packages
186 .iter()
187 .map(|p| p.to_lowercase())
188 .collect(),
189 _ => HashSet::new(),
190 };
191
192 packages
193 .iter()
194 .filter(|p| {
195 let normalized = p.to_lowercase().replace('_', "-");
196 let base = normalized.split('[').next().unwrap_or(&normalized);
198 !known.contains(base)
199 })
200 .cloned()
201 .collect()
202}
203
204pub fn resolve_packages_sync(
210 skill_dir: &Path,
211 compatibility: Option<&str>,
212 language: &str,
213 allow_unknown: bool,
214) -> Result<ResolvedDependencies> {
215 if let Some(packages) = resolve_from_lock(skill_dir, compatibility) {
217 tracing::debug!("Resolved from lock: {:?}", packages);
218 return Ok(ResolvedDependencies {
219 packages,
220 resolver: ResolverKind::Lock,
221 unknown_packages: Vec::new(),
222 });
223 }
224
225 let compat_str = compatibility.unwrap_or("");
227 if !compat_str.is_empty() {
228 let packages = resolve_from_whitelist(compat_str, language);
229 if !packages.is_empty() {
230 let unknown = if allow_unknown {
231 Vec::new()
232 } else {
233 validate_against_whitelist(&packages, language)
234 };
235
236 let _ = write_lock(
238 skill_dir,
239 compatibility,
240 language,
241 &packages,
242 &ResolverKind::Whitelist,
243 );
244
245 return Ok(ResolvedDependencies {
246 packages,
247 resolver: ResolverKind::Whitelist,
248 unknown_packages: unknown,
249 });
250 }
251 }
252
253 Ok(ResolvedDependencies {
254 packages: Vec::new(),
255 resolver: ResolverKind::None,
256 unknown_packages: Vec::new(),
257 })
258}
259
260struct PackagesWhitelist {
263 python_packages: Vec<String>,
264 python_aliases: HashMap<String, String>,
265 node_packages: Vec<String>,
266 node_aliases: HashMap<String, String>,
267}
268
269fn get_whitelist() -> PackagesWhitelist {
270 PackagesWhitelist {
271 python_packages: PYTHON_PACKAGES.iter().map(|s| s.to_string()).collect(),
272 python_aliases: PYTHON_ALIASES
273 .iter()
274 .map(|(k, v)| (k.to_string(), v.to_string()))
275 .collect(),
276 node_packages: NODE_PACKAGES.iter().map(|s| s.to_string()).collect(),
277 node_aliases: NODE_ALIASES
278 .iter()
279 .map(|(k, v)| (k.to_string(), v.to_string()))
280 .collect(),
281 }
282}
283
284const PYTHON_PACKAGES: &[&str] = &[
287 "requests",
289 "httpx",
290 "aiohttp",
291 "urllib3",
292 "httplib2",
293 "numpy",
295 "pandas",
296 "scipy",
297 "scikit-learn",
298 "statsmodels",
299 "polars",
300 "pyarrow",
301 "duckdb",
302 "openpyxl",
303 "tensorflow",
305 "keras",
306 "torch",
307 "pytorch",
308 "transformers",
309 "xgboost",
310 "lightgbm",
311 "catboost",
312 "onnx",
313 "onnxruntime",
314 "openai",
315 "anthropic",
316 "langchain",
317 "langgraph",
318 "llama-index",
319 "matplotlib",
321 "seaborn",
322 "plotly",
323 "bokeh",
324 "altair",
325 "flask",
327 "django",
328 "fastapi",
329 "starlette",
330 "uvicorn",
331 "gunicorn",
332 "sanic",
333 "tornado",
334 "bottle",
335 "pyramid",
336 "beautifulsoup4",
338 "lxml",
339 "scrapy",
340 "selenium",
341 "playwright",
342 "html5lib",
343 "cssselect",
344 "html2text",
345 "pillow",
347 "opencv-python",
348 "imageio",
349 "scikit-image",
350 "pyyaml",
352 "toml",
353 "tomli",
354 "python-dotenv",
355 "configparser",
356 "sqlalchemy",
358 "psycopg2",
359 "psycopg2-binary",
360 "pymysql",
361 "redis",
362 "pymongo",
363 "motor",
364 "asyncpg",
365 "aiosqlite",
366 "peewee",
367 "pyodps",
368 "boto3",
370 "botocore",
371 "google-cloud-storage",
372 "google-auth",
373 "azure-storage-blob",
374 "azure-identity",
375 "oss2",
376 "pytest",
378 "mock",
379 "responses",
380 "fakeredis",
381 "factory-boy",
382 "click",
384 "typer",
385 "argparse",
386 "fire",
387 "rich",
388 "tqdm",
389 "colorama",
390 "pydantic",
392 "attrs",
393 "dataclasses-json",
394 "marshmallow",
395 "cattrs",
396 "jinja2",
398 "mako",
399 "celery",
401 "rq",
402 "dramatiq",
403 "cryptography",
405 "pyjwt",
406 "passlib",
407 "bcrypt",
408 "paramiko",
409 "loguru",
411 "structlog",
412 "anyio",
414 "trio",
415 "aiofiles",
416 "arrow",
418 "pendulum",
419 "python-dateutil",
420 "pytz",
421 "chardet",
422 "charset-normalizer",
423 "orjson",
424 "ujson",
425 "tenacity",
426 "tox",
427 "nox",
428 "pre-commit",
429 "mypy",
430 "black",
431 "ruff",
432 "isort",
433 "setuptools",
434 "wheel",
435 "pip",
436 "poetry",
437];
438
439const PYTHON_ALIASES: &[(&str, &str)] = &[
441 ("cv2", "opencv-python"),
442 ("PIL", "pillow"),
443 ("sklearn", "scikit-learn"),
444 ("bs4", "beautifulsoup4"),
445 ("yaml", "pyyaml"),
446 ("dotenv", "python-dotenv"),
447 ("jwt", "pyjwt"),
448 ("odps", "pyodps"),
449 ("llamaindex", "llama-index"),
450 ("skimage", "scikit-image"),
451 ("pytorch", "torch"),
452 ("tf", "tensorflow"),
453];
454
455const NODE_PACKAGES: &[&str] = &[
457 "axios",
459 "node-fetch",
460 "got",
461 "superagent",
462 "ky",
463 "express",
465 "koa",
466 "fastify",
467 "hapi",
468 "nest",
469 "next",
470 "lodash",
472 "underscore",
473 "ramda",
474 "fp-ts",
475 "moment",
477 "dayjs",
478 "date-fns",
479 "luxon",
480 "cheerio",
482 "puppeteer",
483 "playwright",
484 "mongoose",
486 "sequelize",
487 "knex",
488 "prisma",
489 "typeorm",
490 "ioredis",
492 "redis",
493 "aws-sdk",
495 "@aws-sdk/client-s3",
496 "googleapis",
497 "openai",
498 "@anthropic-ai/sdk",
499 "jest",
501 "mocha",
502 "chai",
503 "vitest",
504 "sinon",
505 "@playwright/test",
506 "commander",
508 "yargs",
509 "inquirer",
510 "meow",
511 "cac",
512 "chalk",
514 "ora",
515 "boxen",
516 "cli-table3",
517 "figures",
518 "dotenv",
520 "convict",
521 "jsonwebtoken",
523 "bcrypt",
524 "crypto-js",
525 "uuid",
526 "nanoid",
527 "socket.io",
529 "ws",
530 "sharp",
532 "jimp",
533 "react",
535 "vue",
536 "svelte",
537 "solid-js",
538 "angular",
539 "webpack",
541 "vite",
542 "esbuild",
543 "rollup",
544 "parcel",
545 "typescript",
547 "ts-node",
548 "tsx",
549 "zod",
551 "yup",
552 "joi",
553 "ajv",
554 "glob",
556 "minimatch",
557 "chokidar",
558 "fs-extra",
559 "debug",
560 "winston",
561 "pino",
562 "p-limit",
563 "p-queue",
564 "p-retry",
565 "execa",
566 "cross-env",
567 "cross-spawn",
568 "agent-browser",
569];
570
571const NODE_ALIASES: &[(&str, &str)] = &[("socket.io-client", "socket.io")];
573
574#[cfg(test)]
575mod tests {
576 use super::*;
577
578 #[test]
579 fn test_whitelist_matching_python() {
580 let pkgs = resolve_from_whitelist("Requires Python 3.x with requests library", "python");
581 assert!(pkgs.contains(&"requests".to_string()));
582 }
583
584 #[test]
585 fn test_whitelist_matching_aliases() {
586 let pkgs = resolve_from_whitelist("Requires Python 3.x with cv2, PIL", "python");
587 assert!(pkgs.contains(&"opencv-python".to_string()));
588 assert!(pkgs.contains(&"pillow".to_string()));
589 }
590
591 #[test]
592 fn test_whitelist_matching_node() {
593 let pkgs = resolve_from_whitelist("Requires Node.js with axios, lodash", "node");
594 assert!(pkgs.contains(&"axios".to_string()));
595 assert!(pkgs.contains(&"lodash".to_string()));
596 }
597
598 #[test]
599 fn test_whitelist_no_partial_match() {
600 let pkgs = resolve_from_whitelist("Requires request handling", "python");
602 assert!(!pkgs.contains(&"requests".to_string()));
603 }
604
605 #[test]
606 fn test_validate_against_whitelist() {
607 let unknown = validate_against_whitelist(
608 &["requests".to_string(), "my-custom-pkg".to_string()],
609 "python",
610 );
611 assert_eq!(unknown, vec!["my-custom-pkg".to_string()]);
612 }
613
614 #[test]
615 fn test_whitelist_matching_common_python_data_and_ai_packages() {
616 let pkgs = resolve_from_whitelist(
617 "Requires Python 3.x with pyodps, polars, pyarrow, openai and langchain",
618 "python",
619 );
620 assert!(pkgs.contains(&"pyodps".to_string()));
621 assert!(pkgs.contains(&"polars".to_string()));
622 assert!(pkgs.contains(&"pyarrow".to_string()));
623 assert!(pkgs.contains(&"openai".to_string()));
624 assert!(pkgs.contains(&"langchain".to_string()));
625 }
626
627 #[test]
628 fn test_whitelist_matching_python_aliases_for_odps_and_llamaindex() {
629 let pkgs = resolve_from_whitelist("Requires Python 3.x with odps and llamaindex", "python");
630 assert!(pkgs.contains(&"pyodps".to_string()));
631 assert!(pkgs.contains(&"llama-index".to_string()));
632 }
633
634 #[test]
635 fn test_whitelist_matching_common_node_ai_packages() {
636 let pkgs = resolve_from_whitelist(
637 "Requires Node.js with openai, @anthropic-ai/sdk, and @playwright/test",
638 "node",
639 );
640 assert!(pkgs.contains(&"openai".to_string()));
641 assert!(pkgs.contains(&"@anthropic-ai/sdk".to_string()));
642 assert!(pkgs.contains(&"@playwright/test".to_string()));
643 }
644
645 #[test]
646 fn test_compatibility_hash_deterministic() {
647 let h1 = compatibility_hash(Some("Requires Python 3.x"));
648 let h2 = compatibility_hash(Some("Requires Python 3.x"));
649 assert_eq!(h1, h2);
650 }
651
652 #[test]
653 fn test_word_boundary_match() {
654 assert!(is_word_boundary_match(
655 "requires requests library",
656 "requests"
657 ));
658 assert!(!is_word_boundary_match(
659 "requires request handling",
660 "requests"
661 ));
662 assert!(is_word_boundary_match("pandas, numpy", "pandas"));
663 assert!(is_word_boundary_match("pandas, numpy", "numpy"));
664 }
665}
666
667#[cfg(feature = "async-resolve")]
672#[async_trait::async_trait]
673pub trait LlmProvider: Send + Sync {
674 async fn extract_packages(&self, model: &str, prompt: &str) -> Option<String>;
677}
678
679#[cfg(feature = "async-resolve")]
680mod async_resolve {
681 use super::*;
682
683 pub async fn resolve_from_llm<L: LlmProvider>(
686 llm: &L,
687 model: &str,
688 compatibility: &str,
689 language: &str,
690 ) -> Option<Vec<String>> {
691 let prompt = format!(
692 "Extract the exact installable package names from this compatibility string.\n\
693 Language: {}\n\
694 Compatibility: \"{}\"\n\n\
695 Rules:\n\
696 - Only return package names that can be installed via pip (Python) or npm (Node.js).\n\
697 - Do NOT include standard library modules (os, sys, json, etc.).\n\
698 - Do NOT include language runtimes (Python, Node.js).\n\
699 - Do NOT include system tools (git, docker, etc.).\n\
700 - Return one package name per line, nothing else.\n\
701 - If no installable packages, return NONE.\n\n\
702 Output:",
703 language, compatibility
704 );
705
706 let resp = llm.extract_packages(model, &prompt).await?;
707 let text = resp.trim();
708
709 if text.eq_ignore_ascii_case("NONE") || text.is_empty() {
710 return None;
711 }
712
713 let candidates: Vec<String> = text
714 .lines()
715 .map(|l| {
716 l.trim().trim_matches(|c: char| {
717 !c.is_alphanumeric() && c != '-' && c != '_' && c != '.'
718 })
719 })
720 .filter(|l| !l.is_empty())
721 .map(|l| l.to_lowercase())
722 .collect();
723
724 if candidates.is_empty() {
725 return None;
726 }
727
728 let mut verified = Vec::new();
729 for pkg in &candidates {
730 if verify_package(pkg, language).await {
731 verified.push(pkg.clone());
732 } else {
733 tracing::debug!("LLM-suggested package '{}' failed verification", pkg);
734 }
735 }
736
737 if verified.is_empty() {
738 None
739 } else {
740 Some(verified)
741 }
742 }
743
744 async fn verify_package(name: &str, language: &str) -> bool {
745 let url = match language {
746 "python" => format!("https://pypi.org/pypi/{}/json", name),
747 "node" => format!("https://registry.npmjs.org/{}", name),
748 _ => return false,
749 };
750
751 let client = match reqwest::Client::builder()
752 .timeout(std::time::Duration::from_secs(5))
753 .build()
754 {
755 Ok(c) => c,
756 Err(_) => return false,
757 };
758
759 match client.head(&url).send().await {
760 Ok(resp) => resp.status().is_success(),
761 Err(_) => false,
762 }
763 }
764
765 pub async fn resolve_packages<L: LlmProvider>(
767 skill_dir: &Path,
768 compatibility: Option<&str>,
769 language: &str,
770 llm: Option<&L>,
771 model: Option<&str>,
772 allow_unknown: bool,
773 ) -> Result<ResolvedDependencies> {
774 if let Some(packages) = resolve_from_lock(skill_dir, compatibility) {
775 tracing::debug!("Resolved from lock: {:?}", packages);
776 return Ok(ResolvedDependencies {
777 packages,
778 resolver: ResolverKind::Lock,
779 unknown_packages: Vec::new(),
780 });
781 }
782
783 let compat_str = compatibility.unwrap_or("");
784
785 if !compat_str.is_empty() {
786 if let (Some(client), Some(model)) = (llm, model) {
787 match resolve_from_llm(client, model, compat_str, language).await {
788 Some(packages) if !packages.is_empty() => {
789 let unknown = if allow_unknown {
790 Vec::new()
791 } else {
792 validate_against_whitelist(&packages, language)
793 };
794
795 let _ = write_lock(
796 skill_dir,
797 compatibility,
798 language,
799 &packages,
800 &ResolverKind::Llm,
801 );
802
803 return Ok(ResolvedDependencies {
804 packages,
805 resolver: ResolverKind::Llm,
806 unknown_packages: unknown,
807 });
808 }
809 _ => {
810 tracing::debug!("LLM inference returned no packages, falling through");
811 }
812 }
813 }
814 }
815
816 if !compat_str.is_empty() {
817 let packages = resolve_from_whitelist(compat_str, language);
818 if !packages.is_empty() {
819 let unknown = if allow_unknown {
820 Vec::new()
821 } else {
822 validate_against_whitelist(&packages, language)
823 };
824
825 let _ = write_lock(
826 skill_dir,
827 compatibility,
828 language,
829 &packages,
830 &ResolverKind::Whitelist,
831 );
832
833 return Ok(ResolvedDependencies {
834 packages,
835 resolver: ResolverKind::Whitelist,
836 unknown_packages: unknown,
837 });
838 }
839 }
840
841 Ok(ResolvedDependencies {
842 packages: Vec::new(),
843 resolver: ResolverKind::None,
844 unknown_packages: Vec::new(),
845 })
846 }
847}
848
849#[cfg(feature = "async-resolve")]
850pub use async_resolve::{resolve_from_llm, resolve_packages};