Skip to main content

seshat_core/
dependency.rs

1//! Unified dependency domain taxonomy and package classification.
2//!
3//! Provides a single [`DependencyDomain`] enum that classifies dependencies
4//! by their functional role, plus [`classify_domain`] — the **single source of
5//! truth** for mapping package names to domains. Both the scanner (manifest
6//! analysis) and the detectors (usage analysis) call this function.
7
8use serde::{Deserialize, Serialize};
9
10use crate::ir::Language;
11
12/// Functional domain a dependency belongs to.
13///
14/// Covers the union of all categories previously split across
15/// `DependencyCategory` (scanner) and the old `DependencyDomain` (detectors).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
17#[serde(rename_all = "snake_case")]
18pub enum DependencyDomain {
19    /// HTTP clients (reqwest, axios, httpx, etc.).
20    Http,
21    /// Web frameworks (actix-web, express, flask, django, axum, rocket, etc.).
22    WebFramework,
23    /// Logging and observability (tracing, winston, loguru, etc.).
24    Logging,
25    /// Testing frameworks and utilities (jest, pytest, proptest, etc.).
26    Testing,
27    /// Input validation and schema enforcement (zod, pydantic, validator, etc.).
28    Validation,
29    /// Serialization and deserialization (serde, protobuf, msgpack, etc.).
30    Serialization,
31    /// Database clients and ORMs (sqlx, prisma, sqlalchemy, etc.).
32    Database,
33    /// CLI argument parsing (clap, commander, click, etc.).
34    Cli,
35    /// Async runtimes and utilities (tokio, asyncio, trio, etc.).
36    AsyncRuntime,
37    /// Cryptography and security (ring, bcrypt, hashlib, etc.).
38    Crypto,
39    /// General-purpose utility libraries.
40    Utilities,
41    /// Could not be classified into any known domain.
42    Unknown,
43}
44
45impl DependencyDomain {
46    /// Human-readable name used in finding descriptions.
47    pub fn as_str(self) -> &'static str {
48        match self {
49            Self::Http => "HTTP",
50            Self::WebFramework => "web framework",
51            Self::Logging => "logging",
52            Self::Testing => "testing",
53            Self::Validation => "validation",
54            Self::Serialization => "serialization",
55            Self::Database => "database",
56            Self::Cli => "CLI",
57            Self::AsyncRuntime => "async runtime",
58            Self::Crypto => "crypto",
59            Self::Utilities => "utilities",
60            Self::Unknown => "unknown",
61        }
62    }
63}
64
65// ---------------------------------------------------------------------------
66// Module-path helpers (single source of truth)
67// ---------------------------------------------------------------------------
68
69/// Extract the top-level package/module name from an import path or
70/// callee, regardless of the source language's separator.
71///
72/// Handles every separator the four supported languages use today:
73/// - Rust: `tracing::subscriber` → `tracing`, `crate::foo` → `crate`
74/// - Python: `logging.config` → `logging`
75/// - npm: `@scope/package` → `@scope`, `lodash/fp` → `lodash`
76/// - All: leading whitespace boundary.
77///
78/// This is the single helper for "what is the top-level package name of
79/// this thing?" — replacing several bespoke `split("::").next().unwrap_or(...)`
80/// chains spread across the detectors.
81///
82/// # Examples
83///
84/// ```
85/// use seshat_core::dependency::top_level_module;
86///
87/// assert_eq!(top_level_module("tracing"), "tracing");
88/// assert_eq!(top_level_module("tracing::subscriber"), "tracing");
89/// assert_eq!(top_level_module("logging.config"), "logging");
90/// assert_eq!(top_level_module("@scope/pkg"), "@scope");
91/// assert_eq!(top_level_module("crate::foo::bar"), "crate");
92/// ```
93pub fn top_level_module(module: &str) -> &str {
94    let pos = module
95        .chars()
96        .position(|c| [' ', ':', '.', '/'].contains(&c));
97    match pos {
98        Some(p) => &module[..p],
99        None => module,
100    }
101}
102
103/// Check whether `module` is a Python standard-library top-level package.
104///
105/// Splits on `.` to get the root segment, then matches a curated list
106/// of stdlib modules. Used by heuristic detectors to skip stdlib
107/// imports — e.g. `traceback`, `unittest.mock`, `logging.config` should
108/// not surface as "Possible logging library (name heuristic)" or
109/// "Testing-related import (heuristic)" since they're language built-ins,
110/// not project-internal nor third-party.
111///
112/// # Examples
113///
114/// ```
115/// use seshat_core::dependency::is_python_stdlib_module;
116///
117/// assert!(is_python_stdlib_module("logging"));
118/// assert!(is_python_stdlib_module("logging.config"));
119/// assert!(is_python_stdlib_module("traceback"));
120/// assert!(is_python_stdlib_module("unittest.mock"));
121/// assert!(!is_python_stdlib_module("loguru"));
122/// assert!(!is_python_stdlib_module("waltchat"));
123/// ```
124pub fn is_python_stdlib_module(module: &str) -> bool {
125    let root = module.split('.').next().unwrap_or(module);
126    matches!(
127        root,
128        "__future__"
129            | "abc"
130            | "argparse"
131            | "ast"
132            | "asyncio"
133            | "base64"
134            | "bisect"
135            | "builtins"
136            | "calendar"
137            | "cmath"
138            | "codecs"
139            | "collections"
140            | "concurrent"
141            | "configparser"
142            | "contextlib"
143            | "copy"
144            | "csv"
145            | "ctypes"
146            | "dataclasses"
147            | "datetime"
148            | "decimal"
149            | "difflib"
150            | "dis"
151            | "email"
152            | "enum"
153            | "errno"
154            | "fcntl"
155            | "fileinput"
156            | "fnmatch"
157            | "fractions"
158            | "functools"
159            | "gc"
160            | "getpass"
161            | "gettext"
162            | "glob"
163            | "gzip"
164            | "hashlib"
165            | "heapq"
166            | "hmac"
167            | "html"
168            | "http"
169            | "importlib"
170            | "inspect"
171            | "io"
172            | "ipaddress"
173            | "itertools"
174            | "json"
175            | "keyword"
176            | "linecache"
177            | "locale"
178            | "logging"
179            | "lzma"
180            | "math"
181            | "mimetypes"
182            | "multiprocessing"
183            | "numbers"
184            | "operator"
185            | "os"
186            | "pathlib"
187            | "platform"
188            | "pprint"
189            | "queue"
190            | "random"
191            | "re"
192            | "secrets"
193            | "select"
194            | "shelve"
195            | "shlex"
196            | "shutil"
197            | "signal"
198            | "site"
199            | "socket"
200            | "sqlite3"
201            | "ssl"
202            | "stat"
203            | "string"
204            | "struct"
205            | "subprocess"
206            | "sys"
207            | "syslog"
208            | "tempfile"
209            | "textwrap"
210            | "threading"
211            | "time"
212            | "timeit"
213            | "traceback"
214            | "types"
215            | "typing"
216            | "unicodedata"
217            | "unittest"
218            | "urllib"
219            | "uuid"
220            | "venv"
221            | "warnings"
222            | "weakref"
223            | "xml"
224            | "zipfile"
225            | "zipimport"
226            | "zlib"
227    )
228}
229
230// ---------------------------------------------------------------------------
231// Word-boundary keyword matching (shared by heuristic classifiers)
232// ---------------------------------------------------------------------------
233
234/// True when any of `keywords` appears in `name` at a word boundary.
235///
236/// Word boundaries: start-of-string, the byte after `_` / `-`, or a
237/// camelCase transition (lowercase byte → uppercase byte). ASCII-only
238/// — non-ASCII bytes degrade gracefully (their boundary checks return
239/// false, so we never panic on UTF-8 byte-index drift).
240///
241/// This is the **single source of truth** for the heuristic boundary
242/// rules. Used by [`crate`]'s consumers in two parallel classifiers:
243/// `dependency_usage::classify_heuristic_domain` (which scans multiple
244/// keyword groups, one per domain) and `test_patterns::is_heuristic_test_dep`.
245/// Keeping the rule in one place prevents the two from drifting.
246///
247/// Empty keywords are skipped to avoid an infinite loop on `find("")`.
248///
249/// # Examples
250///
251/// ```
252/// use seshat_core::dependency::matches_keyword_at_boundary;
253///
254/// // start-of-string boundary
255/// assert!(matches_keyword_at_boundary("ormlib", &["orm"]));
256/// // `_` separator boundary
257/// assert!(matches_keyword_at_boundary("my_orm_lib", &["orm"]));
258/// // `-` separator boundary
259/// assert!(matches_keyword_at_boundary("my-orm-lib", &["orm"]));
260/// // camelCase transition boundary
261/// assert!(matches_keyword_at_boundary("myOrmLib", &["orm"]));
262/// // substring inside another word — NOT a boundary match
263/// assert!(!matches_keyword_at_boundary("format", &["orm"]));
264/// // empty keyword: never matches (and never loops)
265/// assert!(!matches_keyword_at_boundary("anything", &[""]));
266/// ```
267pub fn matches_keyword_at_boundary(name: &str, keywords: &[&str]) -> bool {
268    let lower = name.to_ascii_lowercase();
269    let bytes = name.as_bytes();
270    for kw in keywords {
271        if kw.is_empty() {
272            continue;
273        }
274        let mut search_start = 0usize;
275        while let Some(pos) = lower[search_start..].find(kw) {
276            let abs_pos = search_start + pos;
277            let prev = abs_pos.checked_sub(1).and_then(|i| bytes.get(i)).copied();
278            let curr = bytes.get(abs_pos).copied();
279            let is_boundary = abs_pos == 0
280                || prev.is_some_and(|b| b == b'_' || b == b'-')
281                || (prev.is_some_and(|b| b.is_ascii_lowercase())
282                    && curr.is_some_and(|b| b.is_ascii_uppercase()));
283            if is_boundary {
284                return true;
285            }
286            search_start = abs_pos + 1;
287        }
288    }
289    false
290}
291
292// ---------------------------------------------------------------------------
293// Package → Domain classification (single source of truth)
294// ---------------------------------------------------------------------------
295
296/// Classify a package name into its functional domain for the given language.
297///
298/// The name is **normalised** internally — lowercased and hyphens replaced with
299/// underscores — so both manifest names (`"serde-json"`) and import-path names
300/// (`"serde_json"`) resolve correctly.
301///
302/// Returns `None` when the package does not appear in any known list.
303///
304/// # Examples
305///
306/// ```
307/// use seshat_core::ir::Language;
308/// use seshat_core::dependency::{DependencyDomain, classify_domain};
309///
310/// assert_eq!(classify_domain("reqwest", Language::Rust), Some(DependencyDomain::Http));
311/// assert_eq!(classify_domain("serde-json", Language::Rust), Some(DependencyDomain::Serialization));
312/// assert_eq!(classify_domain("my-custom-lib", Language::Rust), None);
313/// ```
314pub fn classify_domain(package: &str, language: Language) -> Option<DependencyDomain> {
315    let normalised = package.to_lowercase().replace('-', "_");
316    match language {
317        Language::Rust => classify_rust(&normalised),
318        Language::TypeScript | Language::JavaScript => classify_js_ts(&normalised),
319        Language::Python => classify_python(&normalised),
320    }
321}
322
323fn classify_rust(name: &str) -> Option<DependencyDomain> {
324    match name {
325        // HTTP clients
326        "reqwest" | "hyper" | "ureq" | "curl" | "attohttpc" | "isahc" | "tonic" | "prost"
327        | "tower" | "tower_http" => Some(DependencyDomain::Http),
328        // Web frameworks
329        "actix_web" | "axum" | "warp" | "rocket" | "tide" | "poem" | "salvo" | "ntex" => {
330            Some(DependencyDomain::WebFramework)
331        }
332        // Logging
333        "tracing" | "tracing_subscriber" | "tracing_log" | "log" | "env_logger"
334        | "pretty_env_logger" | "slog" | "flexi_logger" => Some(DependencyDomain::Logging),
335        // Testing
336        "proptest" | "quickcheck" | "rstest" | "criterion" | "test_case" | "mockall"
337        | "wiremock" | "assert_cmd" | "assert_fs" | "assert_matches" | "pretty_assertions"
338        | "insta" | "tempfile" => Some(DependencyDomain::Testing),
339        // Validation
340        "validator" | "garde" | "schemars" => Some(DependencyDomain::Validation),
341        // Serialization
342        "serde" | "serde_json" | "serde_yaml" | "serde_toml" | "toml" | "bincode" | "ciborium"
343        | "postcard" | "rmp_serde" | "ron" | "csv" => Some(DependencyDomain::Serialization),
344        // Database
345        "sqlx" | "diesel" | "sea_orm" | "rusqlite" | "tokio_postgres" | "deadpool_postgres"
346        | "mongodb" | "redis" | "surrealdb" => Some(DependencyDomain::Database),
347        // CLI
348        "clap" | "structopt" | "argh" | "pico_args" | "bpaf" => Some(DependencyDomain::Cli),
349        // Async runtime
350        "tokio" | "async_std" | "smol" | "futures" | "async_trait" | "rayon" | "crossbeam"
351        | "crossbeam_channel" => Some(DependencyDomain::AsyncRuntime),
352        // Crypto
353        "sha2" | "ring" | "rustls" | "openssl" | "aes" | "argon2" | "bcrypt" | "hmac" => {
354            Some(DependencyDomain::Crypto)
355        }
356        // Utilities
357        "uuid" | "chrono" | "time" | "url" | "bytes" | "indexmap" | "dashmap" | "parking_lot"
358        | "once_cell" | "lazy_static" | "anyhow" | "thiserror" | "eyre" | "itertools" | "regex"
359        | "rand" => Some(DependencyDomain::Utilities),
360        _ => None,
361    }
362}
363
364fn classify_js_ts(name: &str) -> Option<DependencyDomain> {
365    match name {
366        // HTTP clients
367        "axios"
368        | "node_fetch"
369        | "got"
370        | "ky"
371        | "superagent"
372        | "undici"
373        | "socket_io"
374        | "socket_io_client"
375        | "socket.io"
376        | "socket.io_client"
377        | "graphql"
378        | "@apollo/client"
379        | "urql"
380        | "@tanstack/react_query"
381        | "react_query"
382        | "@tanstack/query_core"
383        | "swr" => Some(DependencyDomain::Http),
384        // Web frameworks
385        "express" | "fastify" | "koa" | "hapi" | "next" | "hono" | "nest" | "nuxt" | "react"
386        | "vue" | "angular" | "svelte" | "remix" | "astro" => Some(DependencyDomain::WebFramework),
387        // Logging
388        "winston" | "pino" | "bunyan" | "morgan" | "log4js" | "loglevel" | "debug" | "signale"
389        | "consola" => Some(DependencyDomain::Logging),
390        // Testing
391        "jest"
392        | "mocha"
393        | "vitest"
394        | "ava"
395        | "jasmine"
396        | "chai"
397        | "sinon"
398        | "cypress"
399        | "playwright"
400        | "testing_library"
401        | "@testing_library/react"
402        | "@testing_library/jest_dom"
403        | "supertest"
404        | "nock"
405        | "msw" => Some(DependencyDomain::Testing),
406        // Validation
407        "zod" | "joi" | "yup" | "ajv" | "class_validator" | "superstruct" | "io_ts" | "valibot" => {
408            Some(DependencyDomain::Validation)
409        }
410        // Serialization
411        "protobufjs" | "avro_js" | "msgpack" | "@msgpack/msgpack" | "flatbuffers" => {
412            Some(DependencyDomain::Serialization)
413        }
414        // Database
415        "prisma" | "@prisma/client" | "typeorm" | "sequelize" | "knex" | "mongoose"
416        | "drizzle_orm" | "pg" | "mysql2" | "better_sqlite3" | "ioredis" | "redis" => {
417            Some(DependencyDomain::Database)
418        }
419        // CLI
420        "commander" | "yargs" | "meow" | "cac" | "citty" | "oclif" | "inquirer" => {
421            Some(DependencyDomain::Cli)
422        }
423        // Utilities
424        "zustand"
425        | "redux"
426        | "@reduxjs/toolkit"
427        | "recoil"
428        | "jotai"
429        | "mobx"
430        | "xstate"
431        | "react_router"
432        | "react_router_dom"
433        | "@tanstack/react_router"
434        | "lodash"
435        | "ramda"
436        | "underscore"
437        | "immer"
438        | "date_fns"
439        | "dayjs"
440        | "moment"
441        | "luxon"
442        | "dotenv"
443        | "cross_env"
444        | "@sentry/react"
445        | "@sentry/nextjs"
446        | "@sentry/node" => Some(DependencyDomain::Utilities),
447        _ => None,
448    }
449}
450
451fn classify_python(name: &str) -> Option<DependencyDomain> {
452    // Note: several names here (`logging`, `asyncio`, `hashlib`, `sqlite3`, `json`,
453    // `argparse`, `unittest`, `pickle`) are Python stdlib modules and will be filtered
454    // out by `is_python_stdlib_or_relative` in the parser before they ever reach
455    // this function. They are kept in the match for completeness and for any
456    // caller that bypasses the parser filter (e.g. tests or future heuristics).
457    match name {
458        // HTTP clients
459        "requests" | "httpx" | "aiohttp" | "urllib3" | "httplib2" | "websockets"
460        | "websocket_client" | "python_socketio" | "grpcio" | "grpcio_tools" => {
461            Some(DependencyDomain::Http)
462        }
463        // Web frameworks
464        "flask" | "django" | "fastapi" | "starlette" | "tornado" | "sanic" | "pyramid"
465        | "bottle" | "litestar" | "blacksheep" => Some(DependencyDomain::WebFramework),
466        // Logging (loguru / structlog are third-party; `logging` is stdlib but kept for completeness)
467        "logging" | "loguru" | "structlog" => Some(DependencyDomain::Logging),
468        // Testing (unittest is stdlib but kept for completeness)
469        "pytest" | "unittest" | "nose" | "hypothesis" | "mock" | "unittest_mock" | "faker"
470        | "factory_boy" | "responses" | "pytest_mock" | "pytest_asyncio" | "tox" | "coverage"
471        | "pytest_cov" => Some(DependencyDomain::Testing),
472        // Validation
473        "pydantic" | "marshmallow" | "cerberus" | "attrs" | "voluptuous" | "cattrs" => {
474            Some(DependencyDomain::Validation)
475        }
476        // Serialization (json/pickle are stdlib but kept for completeness)
477        "json" | "msgpack" | "protobuf" | "avro" | "pickle" | "pyyaml" | "toml" | "orjson"
478        | "ujson" => Some(DependencyDomain::Serialization),
479        // Database (sqlite3 is stdlib but kept for completeness)
480        "sqlalchemy" | "psycopg2" | "asyncpg" | "pymongo" | "redis" | "peewee" | "tortoise"
481        | "tortoise_orm" | "databases" | "sqlite3" | "alembic" | "aioredis" | "motor" | "neo4j"
482        | "py2neo" | "pinecone" | "qdrant_client" | "chromadb" | "weaviate_client" | "pymilvus"
483        | "elasticsearch" | "opensearch_py" => Some(DependencyDomain::Database),
484        // CLI (argparse is stdlib but kept for completeness)
485        "click" | "argparse" | "typer" | "fire" | "docopt" | "rich" => Some(DependencyDomain::Cli),
486        // Async runtime (asyncio is stdlib but kept for completeness)
487        "asyncio" | "trio" | "anyio" | "uvloop" | "twisted" | "celery" | "dramatiq" | "uvicorn"
488        | "gunicorn" | "hypercorn" | "daphne" => Some(DependencyDomain::AsyncRuntime),
489        // Crypto (hashlib is stdlib but kept for completeness)
490        "cryptography" | "pycryptodome" | "hashlib" | "passlib" | "bcrypt" | "itsdangerous"
491        | "jwt" | "python_jose" | "authlib" => Some(DependencyDomain::Crypto),
492        // Utilities — AI/ML, data science, cloud, misc popular libs
493        "openai"
494        | "anthropic"
495        | "cohere"
496        | "google_generativeai"
497        | "google_genai"
498        | "langchain"
499        | "langchain_core"
500        | "langchain_openai"
501        | "langchain_anthropic"
502        | "langfuse"
503        | "litellm"
504        | "transformers"
505        | "sentence_transformers"
506        | "pandas"
507        | "numpy"
508        | "scipy"
509        | "polars"
510        | "pyarrow"
511        | "boto3"
512        | "botocore"
513        | "aiobotocore"
514        | "google_cloud_storage"
515        | "azure_storage_blob"
516        | "jinja2"
517        | "mako"
518        | "tenacity"
519        | "backoff"
520        | "retry"
521        | "paramiko"
522        | "fabric"
523        | "pillow"
524        | "pil"
525        | "cv2"
526        | "opencv_python"
527        | "stripe"
528        | "sendgrid"
529        | "sqlglot"
530        | "alembic_utils" => Some(DependencyDomain::Utilities),
531        _ => None,
532    }
533}
534
535// ---------------------------------------------------------------------------
536// tsconfig.json path aliases
537// ---------------------------------------------------------------------------
538
539/// A single TypeScript `compilerOptions.paths` mapping, with each target
540/// already joined to `baseUrl` and normalised to forward slashes.
541///
542/// `pattern` and `targets` are stored **verbatim** (no case-folding, no
543/// hyphen normalisation) — TS module specifiers must match the literal text,
544/// consistent with the JS/TS workspace-name handling elsewhere.
545///
546/// A pattern contains at most one `*` wildcard (TS's rule). When present, the
547/// captured substring is substituted into each target's `*`.
548#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
549pub struct PathAlias {
550    /// The import-specifier pattern, e.g. `"@app/*"` or `"@config"`.
551    pub pattern: String,
552    /// Candidate target paths (baseUrl-joined), e.g. `["src/*"]`. Tried in
553    /// declared order; the first that resolves to a real file wins.
554    pub targets: Vec<String>,
555}
556
557impl PathAlias {
558    /// Length of the literal prefix before the `*` wildcard, used to rank
559    /// matches by specificity (longest prefix wins, matching `tsc`). Exact
560    /// (non-wildcard) patterns are treated as strictly more specific than any
561    /// wildcard pattern with the same prefix.
562    fn specificity(&self) -> usize {
563        match self.pattern.split_once('*') {
564            Some((prefix, _)) => prefix.len(),
565            None => self.pattern.len() + 1,
566        }
567    }
568
569    /// If `module` matches this alias, return its candidate target paths with
570    /// the wildcard capture substituted (in declared target order). Pure string
571    /// rewriting — the caller resolves each candidate against the real file set.
572    ///
573    /// Returns `None` when the pattern does not match `module`.
574    pub fn rewrite(&self, module: &str) -> Option<Vec<String>> {
575        match self.pattern.split_once('*') {
576            Some((prefix, suffix)) => {
577                if module.len() >= prefix.len() + suffix.len()
578                    && module.starts_with(prefix)
579                    && module.ends_with(suffix)
580                {
581                    let captured = &module[prefix.len()..module.len() - suffix.len()];
582                    Some(
583                        self.targets
584                            .iter()
585                            .map(|t| match t.split_once('*') {
586                                Some((tp, ts)) => format!("{tp}{captured}{ts}"),
587                                None => t.clone(),
588                            })
589                            .collect(),
590                    )
591                } else {
592                    None
593                }
594            }
595            None => (module == self.pattern).then(|| self.targets.clone()),
596        }
597    }
598}
599
600/// Resolve an import `module` against a set of path aliases, returning the
601/// substituted candidate target paths of the **most specific** matching alias
602/// (longest literal prefix wins, matching `tsc`'s resolution order). The result
603/// is independent of the aliases' declaration order.
604///
605/// Returns an empty `Vec` when no alias matches.
606pub fn resolve_path_alias(module: &str, aliases: &[PathAlias]) -> Vec<String> {
607    let mut order: Vec<usize> = (0..aliases.len()).collect();
608    order.sort_by_key(|&i| std::cmp::Reverse(aliases[i].specificity()));
609    for i in order {
610        if let Some(candidates) = aliases[i].rewrite(module) {
611            return candidates;
612        }
613    }
614    Vec::new()
615}
616
617// ---------------------------------------------------------------------------
618// Tests
619// ---------------------------------------------------------------------------
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    // -- Rust --
626
627    #[test]
628    fn rust_http_clients() {
629        assert_eq!(
630            classify_domain("reqwest", Language::Rust),
631            Some(DependencyDomain::Http)
632        );
633        assert_eq!(
634            classify_domain("hyper", Language::Rust),
635            Some(DependencyDomain::Http)
636        );
637    }
638
639    #[test]
640    fn rust_web_frameworks() {
641        assert_eq!(
642            classify_domain("axum", Language::Rust),
643            Some(DependencyDomain::WebFramework)
644        );
645        // Hyphenated name normalises to underscore.
646        assert_eq!(
647            classify_domain("actix-web", Language::Rust),
648            Some(DependencyDomain::WebFramework)
649        );
650    }
651
652    #[test]
653    fn rust_logging() {
654        assert_eq!(
655            classify_domain("tracing", Language::Rust),
656            Some(DependencyDomain::Logging)
657        );
658        assert_eq!(
659            classify_domain("log", Language::Rust),
660            Some(DependencyDomain::Logging)
661        );
662        assert_eq!(
663            classify_domain("tracing-subscriber", Language::Rust),
664            Some(DependencyDomain::Logging)
665        );
666    }
667
668    #[test]
669    fn rust_testing() {
670        assert_eq!(
671            classify_domain("proptest", Language::Rust),
672            Some(DependencyDomain::Testing)
673        );
674        assert_eq!(
675            classify_domain("pretty_assertions", Language::Rust),
676            Some(DependencyDomain::Testing)
677        );
678        assert_eq!(
679            classify_domain("tempfile", Language::Rust),
680            Some(DependencyDomain::Testing)
681        );
682    }
683
684    #[test]
685    fn rust_serialization() {
686        assert_eq!(
687            classify_domain("serde", Language::Rust),
688            Some(DependencyDomain::Serialization)
689        );
690        assert_eq!(
691            classify_domain("serde-json", Language::Rust),
692            Some(DependencyDomain::Serialization)
693        );
694        assert_eq!(
695            classify_domain("serde_json", Language::Rust),
696            Some(DependencyDomain::Serialization)
697        );
698    }
699
700    #[test]
701    fn rust_database() {
702        assert_eq!(
703            classify_domain("sqlx", Language::Rust),
704            Some(DependencyDomain::Database)
705        );
706        assert_eq!(
707            classify_domain("sea-orm", Language::Rust),
708            Some(DependencyDomain::Database)
709        );
710        assert_eq!(
711            classify_domain("rusqlite", Language::Rust),
712            Some(DependencyDomain::Database)
713        );
714    }
715
716    #[test]
717    fn rust_async_runtime() {
718        assert_eq!(
719            classify_domain("tokio", Language::Rust),
720            Some(DependencyDomain::AsyncRuntime)
721        );
722        assert_eq!(
723            classify_domain("async-std", Language::Rust),
724            Some(DependencyDomain::AsyncRuntime)
725        );
726    }
727
728    #[test]
729    fn rust_crypto() {
730        assert_eq!(
731            classify_domain("ring", Language::Rust),
732            Some(DependencyDomain::Crypto)
733        );
734    }
735
736    // -- JS/TS --
737
738    #[test]
739    fn js_ts_http_clients() {
740        assert_eq!(
741            classify_domain("axios", Language::TypeScript),
742            Some(DependencyDomain::Http)
743        );
744        assert_eq!(
745            classify_domain("node-fetch", Language::JavaScript),
746            Some(DependencyDomain::Http)
747        );
748    }
749
750    #[test]
751    fn js_ts_web_frameworks() {
752        assert_eq!(
753            classify_domain("express", Language::JavaScript),
754            Some(DependencyDomain::WebFramework)
755        );
756        assert_eq!(
757            classify_domain("react", Language::TypeScript),
758            Some(DependencyDomain::WebFramework)
759        );
760        assert_eq!(
761            classify_domain("hono", Language::TypeScript),
762            Some(DependencyDomain::WebFramework)
763        );
764    }
765
766    #[test]
767    fn js_ts_testing() {
768        assert_eq!(
769            classify_domain("jest", Language::TypeScript),
770            Some(DependencyDomain::Testing)
771        );
772        assert_eq!(
773            classify_domain("vitest", Language::TypeScript),
774            Some(DependencyDomain::Testing)
775        );
776        assert_eq!(
777            classify_domain("cypress", Language::JavaScript),
778            Some(DependencyDomain::Testing)
779        );
780    }
781
782    #[test]
783    fn js_ts_database() {
784        assert_eq!(
785            classify_domain("prisma", Language::TypeScript),
786            Some(DependencyDomain::Database)
787        );
788        assert_eq!(
789            classify_domain("drizzle-orm", Language::TypeScript),
790            Some(DependencyDomain::Database)
791        );
792    }
793
794    // -- Python --
795
796    #[test]
797    fn python_http_clients() {
798        assert_eq!(
799            classify_domain("requests", Language::Python),
800            Some(DependencyDomain::Http)
801        );
802        assert_eq!(
803            classify_domain("httpx", Language::Python),
804            Some(DependencyDomain::Http)
805        );
806    }
807
808    #[test]
809    fn python_web_frameworks() {
810        assert_eq!(
811            classify_domain("django", Language::Python),
812            Some(DependencyDomain::WebFramework)
813        );
814        assert_eq!(
815            classify_domain("fastapi", Language::Python),
816            Some(DependencyDomain::WebFramework)
817        );
818    }
819
820    #[test]
821    fn python_testing() {
822        assert_eq!(
823            classify_domain("pytest", Language::Python),
824            Some(DependencyDomain::Testing)
825        );
826        assert_eq!(
827            classify_domain("hypothesis", Language::Python),
828            Some(DependencyDomain::Testing)
829        );
830    }
831
832    #[test]
833    fn python_database() {
834        assert_eq!(
835            classify_domain("sqlalchemy", Language::Python),
836            Some(DependencyDomain::Database)
837        );
838        assert_eq!(
839            classify_domain("asyncpg", Language::Python),
840            Some(DependencyDomain::Database)
841        );
842    }
843
844    #[test]
845    fn python_async_runtime() {
846        assert_eq!(
847            classify_domain("asyncio", Language::Python),
848            Some(DependencyDomain::AsyncRuntime)
849        );
850        assert_eq!(
851            classify_domain("trio", Language::Python),
852            Some(DependencyDomain::AsyncRuntime)
853        );
854    }
855
856    #[test]
857    fn python_crypto() {
858        assert_eq!(
859            classify_domain("cryptography", Language::Python),
860            Some(DependencyDomain::Crypto)
861        );
862    }
863
864    #[test]
865    fn python_utilities_ai_ml() {
866        assert_eq!(
867            classify_domain("openai", Language::Python),
868            Some(DependencyDomain::Utilities)
869        );
870        assert_eq!(
871            classify_domain("anthropic", Language::Python),
872            Some(DependencyDomain::Utilities)
873        );
874        assert_eq!(
875            classify_domain("langchain", Language::Python),
876            Some(DependencyDomain::Utilities)
877        );
878        assert_eq!(
879            classify_domain("pandas", Language::Python),
880            Some(DependencyDomain::Utilities)
881        );
882        assert_eq!(
883            classify_domain("numpy", Language::Python),
884            Some(DependencyDomain::Utilities)
885        );
886        assert_eq!(
887            classify_domain("boto3", Language::Python),
888            Some(DependencyDomain::Utilities)
889        );
890    }
891
892    #[test]
893    fn python_async_runtime_extended() {
894        assert_eq!(
895            classify_domain("celery", Language::Python),
896            Some(DependencyDomain::AsyncRuntime)
897        );
898        assert_eq!(
899            classify_domain("uvicorn", Language::Python),
900            Some(DependencyDomain::AsyncRuntime)
901        );
902    }
903
904    #[test]
905    fn python_database_extended() {
906        assert_eq!(
907            classify_domain("aioredis", Language::Python),
908            Some(DependencyDomain::Database)
909        );
910        assert_eq!(
911            classify_domain("neo4j", Language::Python),
912            Some(DependencyDomain::Database)
913        );
914        assert_eq!(
915            classify_domain("qdrant-client", Language::Python),
916            Some(DependencyDomain::Database)
917        );
918    }
919
920    #[test]
921    fn python_http_extended() {
922        assert_eq!(
923            classify_domain("websockets", Language::Python),
924            Some(DependencyDomain::Http)
925        );
926        assert_eq!(
927            classify_domain("grpcio", Language::Python),
928            Some(DependencyDomain::Http)
929        );
930    }
931
932    #[test]
933    fn js_ts_utilities() {
934        assert_eq!(
935            classify_domain("zustand", Language::TypeScript),
936            Some(DependencyDomain::Utilities)
937        );
938        assert_eq!(
939            classify_domain("redux", Language::TypeScript),
940            Some(DependencyDomain::Utilities)
941        );
942        assert_eq!(
943            classify_domain("lodash", Language::JavaScript),
944            Some(DependencyDomain::Utilities)
945        );
946        assert_eq!(
947            classify_domain("date-fns", Language::TypeScript),
948            Some(DependencyDomain::Utilities)
949        );
950        assert_eq!(
951            classify_domain("dayjs", Language::TypeScript),
952            Some(DependencyDomain::Utilities)
953        );
954    }
955
956    #[test]
957    fn js_ts_http_extended() {
958        assert_eq!(
959            classify_domain("socket.io-client", Language::TypeScript),
960            Some(DependencyDomain::Http)
961        );
962        assert_eq!(
963            classify_domain("swr", Language::TypeScript),
964            Some(DependencyDomain::Http)
965        );
966    }
967
968    #[test]
969    fn rust_utilities() {
970        assert_eq!(
971            classify_domain("uuid", Language::Rust),
972            Some(DependencyDomain::Utilities)
973        );
974        assert_eq!(
975            classify_domain("chrono", Language::Rust),
976            Some(DependencyDomain::Utilities)
977        );
978        assert_eq!(
979            classify_domain("anyhow", Language::Rust),
980            Some(DependencyDomain::Utilities)
981        );
982        assert_eq!(
983            classify_domain("thiserror", Language::Rust),
984            Some(DependencyDomain::Utilities)
985        );
986    }
987
988    #[test]
989    fn rust_http_extended() {
990        assert_eq!(
991            classify_domain("tonic", Language::Rust),
992            Some(DependencyDomain::Http)
993        );
994        assert_eq!(
995            classify_domain("tower", Language::Rust),
996            Some(DependencyDomain::Http)
997        );
998    }
999
1000    // -- Cross-cutting --
1001
1002    #[test]
1003    fn unknown_returns_none() {
1004        assert_eq!(classify_domain("my-custom-lib", Language::Rust), None);
1005        assert_eq!(
1006            classify_domain("internal-utils", Language::TypeScript),
1007            None
1008        );
1009        assert_eq!(classify_domain("my_app", Language::Python), None);
1010    }
1011
1012    #[test]
1013    fn hyphen_underscore_normalization() {
1014        // Both forms resolve to the same domain.
1015        assert_eq!(
1016            classify_domain("serde-json", Language::Rust),
1017            classify_domain("serde_json", Language::Rust)
1018        );
1019        assert_eq!(
1020            classify_domain("actix-web", Language::Rust),
1021            classify_domain("actix_web", Language::Rust)
1022        );
1023        assert_eq!(
1024            classify_domain("node-fetch", Language::JavaScript),
1025            classify_domain("node_fetch", Language::JavaScript)
1026        );
1027    }
1028
1029    #[test]
1030    fn case_insensitive() {
1031        assert_eq!(
1032            classify_domain("Reqwest", Language::Rust),
1033            Some(DependencyDomain::Http)
1034        );
1035        assert_eq!(
1036            classify_domain("AXIOS", Language::TypeScript),
1037            Some(DependencyDomain::Http)
1038        );
1039    }
1040
1041    // ---- matches_keyword_at_boundary ----
1042
1043    #[test]
1044    fn keyword_boundary_start_of_string() {
1045        assert!(matches_keyword_at_boundary("ormlib", &["orm"]));
1046        assert!(matches_keyword_at_boundary("test_helper", &["test"]));
1047    }
1048
1049    #[test]
1050    fn keyword_boundary_after_separator() {
1051        assert!(matches_keyword_at_boundary("my_orm_lib", &["orm"]));
1052        assert!(matches_keyword_at_boundary("my-orm-lib", &["orm"]));
1053        assert!(matches_keyword_at_boundary("a_test_b", &["test"]));
1054    }
1055
1056    #[test]
1057    fn keyword_boundary_camel_case() {
1058        assert!(matches_keyword_at_boundary("myOrmLib", &["orm"]));
1059        assert!(matches_keyword_at_boundary("notTestLib", &["test"]));
1060    }
1061
1062    #[test]
1063    fn keyword_substring_inside_word_does_not_match() {
1064        // The whole point of the boundary check: substrings inside
1065        // longer words must NOT trigger.
1066        assert!(!matches_keyword_at_boundary("format", &["orm"]));
1067        assert!(!matches_keyword_at_boundary("request_id", &["test"]));
1068        assert!(!matches_keyword_at_boundary("timestamp", &["test"]));
1069        assert!(!matches_keyword_at_boundary("inspect", &["spec"]));
1070    }
1071
1072    #[test]
1073    fn keyword_empty_keyword_does_not_loop_or_match() {
1074        // Defensive: `find("")` returns Some(0) and would loop forever.
1075        // The helper must skip empty keywords without scanning.
1076        assert!(!matches_keyword_at_boundary("anything", &[""]));
1077        // Mixed list: empty entries are silently skipped, real ones still hit.
1078        assert!(matches_keyword_at_boundary("orm_lib", &["", "orm", ""]));
1079    }
1080
1081    #[test]
1082    fn keyword_empty_keyword_list_returns_false() {
1083        assert!(!matches_keyword_at_boundary("orm_lib", &[]));
1084    }
1085
1086    #[test]
1087    fn keyword_non_ascii_input_degrades_gracefully() {
1088        // Cyrillic / mixed UTF-8: must not panic, must not match.
1089        assert!(!matches_keyword_at_boundary("ормлиб", &["orm"]));
1090        // ASCII keyword inside non-ASCII surroundings — boundary checks
1091        // operate on raw bytes; we only require no panic.
1092        let _ = matches_keyword_at_boundary("İorm_lib", &["orm"]);
1093    }
1094
1095    #[test]
1096    fn keyword_multiple_keywords_first_match_wins() {
1097        // The function returns on the first hit; order in the slice
1098        // doesn't change correctness, just early-exit timing.
1099        assert!(matches_keyword_at_boundary("my_log_pkg", &["http", "log"]));
1100        assert!(matches_keyword_at_boundary("my_log_pkg", &["log", "http"]));
1101    }
1102
1103    // -- PathAlias --
1104
1105    fn alias(pattern: &str, targets: &[&str]) -> PathAlias {
1106        PathAlias {
1107            pattern: pattern.to_owned(),
1108            targets: targets.iter().map(|s| (*s).to_owned()).collect(),
1109        }
1110    }
1111
1112    #[test]
1113    fn alias_wildcard_substitutes_capture() {
1114        let a = alias("@app/*", &["src/*"]);
1115        assert_eq!(a.rewrite("@app/utils"), Some(vec!["src/utils".to_owned()]));
1116        assert_eq!(
1117            a.rewrite("@app/foo/bar"),
1118            Some(vec!["src/foo/bar".to_owned()])
1119        );
1120    }
1121
1122    #[test]
1123    fn alias_wildcard_requires_prefix_and_suffix() {
1124        let a = alias("@app/*", &["src/*"]);
1125        assert_eq!(a.rewrite("@other/utils"), None);
1126        // The capture must be non-degenerate: bare prefix without a suffix
1127        // char still matches with an empty capture (TS allows `@app/` → `src/`).
1128        assert_eq!(a.rewrite("@app/"), Some(vec!["src/".to_owned()]));
1129    }
1130
1131    #[test]
1132    fn alias_exact_match_only() {
1133        let a = alias("@config", &["src/config/index.ts"]);
1134        assert_eq!(
1135            a.rewrite("@config"),
1136            Some(vec!["src/config/index.ts".to_owned()])
1137        );
1138        assert_eq!(a.rewrite("@config/extra"), None);
1139    }
1140
1141    #[test]
1142    fn alias_multiple_targets_preserve_order() {
1143        let a = alias("@app/*", &["src/*", "generated/*"]);
1144        assert_eq!(
1145            a.rewrite("@app/x"),
1146            Some(vec!["src/x".to_owned(), "generated/x".to_owned()])
1147        );
1148    }
1149
1150    #[test]
1151    fn alias_target_without_wildcard_is_verbatim() {
1152        let a = alias("@app/*", &["src/shim.ts"]);
1153        assert_eq!(
1154            a.rewrite("@app/anything"),
1155            Some(vec!["src/shim.ts".to_owned()])
1156        );
1157    }
1158
1159    #[test]
1160    fn resolve_picks_most_specific_regardless_of_order() {
1161        // `@app/feature/*` is more specific than `@app/*`; it must win even
1162        // when declared after the broader pattern.
1163        let aliases = vec![
1164            alias("@app/*", &["src/*"]),
1165            alias("@app/feature/*", &["src/feature/impl/*"]),
1166        ];
1167        assert_eq!(
1168            resolve_path_alias("@app/feature/x", &aliases),
1169            vec!["src/feature/impl/x".to_owned()]
1170        );
1171        assert_eq!(
1172            resolve_path_alias("@app/other", &aliases),
1173            vec!["src/other".to_owned()]
1174        );
1175    }
1176
1177    #[test]
1178    fn resolve_no_match_is_empty() {
1179        let aliases = vec![alias("@app/*", &["src/*"])];
1180        assert!(resolve_path_alias("react", &aliases).is_empty());
1181        assert!(resolve_path_alias("@app/x", &[]).is_empty());
1182    }
1183
1184    #[test]
1185    fn path_alias_json_roundtrips() {
1186        let aliases = vec![alias("@app/*", &["src/*"]), alias("@config", &["c.ts"])];
1187        let json = serde_json::to_string(&aliases).unwrap();
1188        let back: Vec<PathAlias> = serde_json::from_str(&json).unwrap();
1189        assert_eq!(aliases, back);
1190    }
1191}