Skip to main content

seshat_core/
dependency.rs

1//! Unified dependency domain taxonomy and package classification.
2//!
3//! Provides a single [`DependencyDomain`] enum that classifies dependencies
4//! by their functional role, plus [`classify_domain`] — the **single source of
5//! truth** for mapping package names to domains. Both the scanner (manifest
6//! analysis) and the detectors (usage analysis) call this function.
7
8use serde::{Deserialize, Serialize};
9
10use crate::ir::Language;
11
12/// Functional domain a dependency belongs to.
13///
14/// Covers the union of all categories previously split across
15/// `DependencyCategory` (scanner) and the old `DependencyDomain` (detectors).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
17#[serde(rename_all = "snake_case")]
18pub enum DependencyDomain {
19    /// HTTP clients (reqwest, axios, httpx, etc.).
20    Http,
21    /// Web frameworks (actix-web, express, flask, django, axum, rocket, etc.).
22    WebFramework,
23    /// Logging and observability (tracing, winston, loguru, etc.).
24    Logging,
25    /// Testing frameworks and utilities (jest, pytest, proptest, etc.).
26    Testing,
27    /// Input validation and schema enforcement (zod, pydantic, validator, etc.).
28    Validation,
29    /// Serialization and deserialization (serde, protobuf, msgpack, etc.).
30    Serialization,
31    /// Database clients and ORMs (sqlx, prisma, sqlalchemy, etc.).
32    Database,
33    /// CLI argument parsing (clap, commander, click, etc.).
34    Cli,
35    /// Async runtimes and utilities (tokio, asyncio, trio, etc.).
36    AsyncRuntime,
37    /// Cryptography and security (ring, bcrypt, hashlib, etc.).
38    Crypto,
39    /// General-purpose utility libraries.
40    Utilities,
41    /// Could not be classified into any known domain.
42    Unknown,
43}
44
45impl DependencyDomain {
46    /// Human-readable name used in finding descriptions.
47    pub fn as_str(self) -> &'static str {
48        match self {
49            Self::Http => "HTTP",
50            Self::WebFramework => "web framework",
51            Self::Logging => "logging",
52            Self::Testing => "testing",
53            Self::Validation => "validation",
54            Self::Serialization => "serialization",
55            Self::Database => "database",
56            Self::Cli => "CLI",
57            Self::AsyncRuntime => "async runtime",
58            Self::Crypto => "crypto",
59            Self::Utilities => "utilities",
60            Self::Unknown => "unknown",
61        }
62    }
63}
64
65// ---------------------------------------------------------------------------
66// Module-path helpers (single source of truth)
67// ---------------------------------------------------------------------------
68
69/// Extract the top-level package/module name from an import path or
70/// callee, regardless of the source language's separator.
71///
72/// Handles every separator the four supported languages use today:
73/// - Rust: `tracing::subscriber` → `tracing`, `crate::foo` → `crate`
74/// - Python: `logging.config` → `logging`
75/// - npm: `@scope/package` → `@scope`, `lodash/fp` → `lodash`
76/// - All: leading whitespace boundary.
77///
78/// This is the single helper for "what is the top-level package name of
79/// this thing?" — replacing several bespoke `split("::").next().unwrap_or(...)`
80/// chains spread across the detectors.
81///
82/// # Examples
83///
84/// ```
85/// use seshat_core::dependency::top_level_module;
86///
87/// assert_eq!(top_level_module("tracing"), "tracing");
88/// assert_eq!(top_level_module("tracing::subscriber"), "tracing");
89/// assert_eq!(top_level_module("logging.config"), "logging");
90/// assert_eq!(top_level_module("@scope/pkg"), "@scope");
91/// assert_eq!(top_level_module("crate::foo::bar"), "crate");
92/// ```
93pub fn top_level_module(module: &str) -> &str {
94    let pos = module
95        .chars()
96        .position(|c| [' ', ':', '.', '/'].contains(&c));
97    match pos {
98        Some(p) => &module[..p],
99        None => module,
100    }
101}
102
103/// Check whether `module` is a Python standard-library top-level package.
104///
105/// Splits on `.` to get the root segment, then matches a curated list
106/// of stdlib modules. Used by heuristic detectors to skip stdlib
107/// imports — e.g. `traceback`, `unittest.mock`, `logging.config` should
108/// not surface as "Possible logging library (name heuristic)" or
109/// "Testing-related import (heuristic)" since they're language built-ins,
110/// not project-internal nor third-party.
111///
112/// # Examples
113///
114/// ```
115/// use seshat_core::dependency::is_python_stdlib_module;
116///
117/// assert!(is_python_stdlib_module("logging"));
118/// assert!(is_python_stdlib_module("logging.config"));
119/// assert!(is_python_stdlib_module("traceback"));
120/// assert!(is_python_stdlib_module("unittest.mock"));
121/// assert!(!is_python_stdlib_module("loguru"));
122/// assert!(!is_python_stdlib_module("waltchat"));
123/// ```
124pub fn is_python_stdlib_module(module: &str) -> bool {
125    let root = module.split('.').next().unwrap_or(module);
126    matches!(
127        root,
128        "__future__"
129            | "abc"
130            | "argparse"
131            | "ast"
132            | "asyncio"
133            | "base64"
134            | "bisect"
135            | "builtins"
136            | "calendar"
137            | "cmath"
138            | "codecs"
139            | "collections"
140            | "concurrent"
141            | "configparser"
142            | "contextlib"
143            | "copy"
144            | "csv"
145            | "ctypes"
146            | "dataclasses"
147            | "datetime"
148            | "decimal"
149            | "difflib"
150            | "dis"
151            | "email"
152            | "enum"
153            | "errno"
154            | "fcntl"
155            | "fileinput"
156            | "fnmatch"
157            | "fractions"
158            | "functools"
159            | "gc"
160            | "getpass"
161            | "gettext"
162            | "glob"
163            | "gzip"
164            | "hashlib"
165            | "heapq"
166            | "hmac"
167            | "html"
168            | "http"
169            | "importlib"
170            | "inspect"
171            | "io"
172            | "ipaddress"
173            | "itertools"
174            | "json"
175            | "keyword"
176            | "linecache"
177            | "locale"
178            | "logging"
179            | "lzma"
180            | "math"
181            | "mimetypes"
182            | "multiprocessing"
183            | "numbers"
184            | "operator"
185            | "os"
186            | "pathlib"
187            | "platform"
188            | "pprint"
189            | "queue"
190            | "random"
191            | "re"
192            | "secrets"
193            | "select"
194            | "shelve"
195            | "shlex"
196            | "shutil"
197            | "signal"
198            | "site"
199            | "socket"
200            | "sqlite3"
201            | "ssl"
202            | "stat"
203            | "string"
204            | "struct"
205            | "subprocess"
206            | "sys"
207            | "syslog"
208            | "tempfile"
209            | "textwrap"
210            | "threading"
211            | "time"
212            | "timeit"
213            | "traceback"
214            | "types"
215            | "typing"
216            | "unicodedata"
217            | "unittest"
218            | "urllib"
219            | "uuid"
220            | "venv"
221            | "warnings"
222            | "weakref"
223            | "xml"
224            | "zipfile"
225            | "zipimport"
226            | "zlib"
227    )
228}
229
230// ---------------------------------------------------------------------------
231// Word-boundary keyword matching (shared by heuristic classifiers)
232// ---------------------------------------------------------------------------
233
234/// True when any of `keywords` appears in `name` at a word boundary.
235///
236/// Word boundaries: start-of-string, the byte after `_` / `-`, or a
237/// camelCase transition (lowercase byte → uppercase byte). ASCII-only
238/// — non-ASCII bytes degrade gracefully (their boundary checks return
239/// false, so we never panic on UTF-8 byte-index drift).
240///
241/// This is the **single source of truth** for the heuristic boundary
242/// rules. Used by [`crate`]'s consumers in two parallel classifiers:
243/// `dependency_usage::classify_heuristic_domain` (which scans multiple
244/// keyword groups, one per domain) and `test_patterns::is_heuristic_test_dep`.
245/// Keeping the rule in one place prevents the two from drifting.
246///
247/// Empty keywords are skipped to avoid an infinite loop on `find("")`.
248///
249/// # Examples
250///
251/// ```
252/// use seshat_core::dependency::matches_keyword_at_boundary;
253///
254/// // start-of-string boundary
255/// assert!(matches_keyword_at_boundary("ormlib", &["orm"]));
256/// // `_` separator boundary
257/// assert!(matches_keyword_at_boundary("my_orm_lib", &["orm"]));
258/// // `-` separator boundary
259/// assert!(matches_keyword_at_boundary("my-orm-lib", &["orm"]));
260/// // camelCase transition boundary
261/// assert!(matches_keyword_at_boundary("myOrmLib", &["orm"]));
262/// // substring inside another word — NOT a boundary match
263/// assert!(!matches_keyword_at_boundary("format", &["orm"]));
264/// // empty keyword: never matches (and never loops)
265/// assert!(!matches_keyword_at_boundary("anything", &[""]));
266/// ```
267pub fn matches_keyword_at_boundary(name: &str, keywords: &[&str]) -> bool {
268    let lower = name.to_ascii_lowercase();
269    let bytes = name.as_bytes();
270    for kw in keywords {
271        if kw.is_empty() {
272            continue;
273        }
274        let mut search_start = 0usize;
275        while let Some(pos) = lower[search_start..].find(kw) {
276            let abs_pos = search_start + pos;
277            let prev = abs_pos.checked_sub(1).and_then(|i| bytes.get(i)).copied();
278            let curr = bytes.get(abs_pos).copied();
279            let is_boundary = abs_pos == 0
280                || prev.is_some_and(|b| b == b'_' || b == b'-')
281                || (prev.is_some_and(|b| b.is_ascii_lowercase())
282                    && curr.is_some_and(|b| b.is_ascii_uppercase()));
283            if is_boundary {
284                return true;
285            }
286            search_start = abs_pos + 1;
287        }
288    }
289    false
290}
291
292// ---------------------------------------------------------------------------
293// Package → Domain classification (single source of truth)
294// ---------------------------------------------------------------------------
295
296/// Classify a package name into its functional domain for the given language.
297///
298/// The name is **normalised** internally — lowercased and hyphens replaced with
299/// underscores — so both manifest names (`"serde-json"`) and import-path names
300/// (`"serde_json"`) resolve correctly.
301///
302/// Returns `None` when the package does not appear in any known list.
303///
304/// # Examples
305///
306/// ```
307/// use seshat_core::ir::Language;
308/// use seshat_core::dependency::{DependencyDomain, classify_domain};
309///
310/// assert_eq!(classify_domain("reqwest", Language::Rust), Some(DependencyDomain::Http));
311/// assert_eq!(classify_domain("serde-json", Language::Rust), Some(DependencyDomain::Serialization));
312/// assert_eq!(classify_domain("my-custom-lib", Language::Rust), None);
313/// ```
314pub fn classify_domain(package: &str, language: Language) -> Option<DependencyDomain> {
315    let normalised = package.to_lowercase().replace('-', "_");
316    match language {
317        Language::Rust => classify_rust(&normalised),
318        Language::TypeScript | Language::JavaScript => classify_js_ts(&normalised),
319        Language::Python => classify_python(&normalised),
320    }
321}
322
323fn classify_rust(name: &str) -> Option<DependencyDomain> {
324    match name {
325        // HTTP clients
326        "reqwest" | "hyper" | "ureq" | "curl" | "attohttpc" | "isahc" | "tonic" | "prost"
327        | "tower" | "tower_http" => Some(DependencyDomain::Http),
328        // Web frameworks
329        "actix_web" | "axum" | "warp" | "rocket" | "tide" | "poem" | "salvo" | "ntex" => {
330            Some(DependencyDomain::WebFramework)
331        }
332        // Logging
333        "tracing" | "tracing_subscriber" | "tracing_log" | "log" | "env_logger"
334        | "pretty_env_logger" | "slog" | "flexi_logger" => Some(DependencyDomain::Logging),
335        // Testing
336        "proptest" | "quickcheck" | "rstest" | "criterion" | "test_case" | "mockall"
337        | "wiremock" | "assert_cmd" | "assert_fs" | "assert_matches" | "pretty_assertions"
338        | "insta" | "tempfile" => Some(DependencyDomain::Testing),
339        // Validation
340        "validator" | "garde" | "schemars" => Some(DependencyDomain::Validation),
341        // Serialization
342        "serde" | "serde_json" | "serde_yaml" | "serde_toml" | "toml" | "bincode" | "ciborium"
343        | "postcard" | "rmp_serde" | "ron" | "csv" => Some(DependencyDomain::Serialization),
344        // Database
345        "sqlx" | "diesel" | "sea_orm" | "rusqlite" | "tokio_postgres" | "deadpool_postgres"
346        | "mongodb" | "redis" | "surrealdb" => Some(DependencyDomain::Database),
347        // CLI
348        "clap" | "structopt" | "argh" | "pico_args" | "bpaf" => Some(DependencyDomain::Cli),
349        // Async runtime
350        "tokio" | "async_std" | "smol" | "futures" | "async_trait" | "rayon" | "crossbeam"
351        | "crossbeam_channel" => Some(DependencyDomain::AsyncRuntime),
352        // Crypto
353        "sha2" | "ring" | "rustls" | "openssl" | "aes" | "argon2" | "bcrypt" | "hmac" => {
354            Some(DependencyDomain::Crypto)
355        }
356        // Utilities
357        "uuid" | "chrono" | "time" | "url" | "bytes" | "indexmap" | "dashmap" | "parking_lot"
358        | "once_cell" | "lazy_static" | "anyhow" | "thiserror" | "eyre" | "itertools" | "regex"
359        | "rand" => Some(DependencyDomain::Utilities),
360        _ => None,
361    }
362}
363
364fn classify_js_ts(name: &str) -> Option<DependencyDomain> {
365    match name {
366        // HTTP clients
367        "axios"
368        | "node_fetch"
369        | "got"
370        | "ky"
371        | "superagent"
372        | "undici"
373        | "socket_io"
374        | "socket_io_client"
375        | "socket.io"
376        | "socket.io_client"
377        | "graphql"
378        | "@apollo/client"
379        | "urql"
380        | "@tanstack/react_query"
381        | "react_query"
382        | "@tanstack/query_core"
383        | "swr" => Some(DependencyDomain::Http),
384        // Web frameworks
385        "express" | "fastify" | "koa" | "hapi" | "next" | "hono" | "nest" | "nuxt" | "react"
386        | "vue" | "angular" | "svelte" | "remix" | "astro" => Some(DependencyDomain::WebFramework),
387        // Logging
388        "winston" | "pino" | "bunyan" | "morgan" | "log4js" | "loglevel" | "debug" | "signale"
389        | "consola" => Some(DependencyDomain::Logging),
390        // Testing
391        "jest"
392        | "mocha"
393        | "vitest"
394        | "ava"
395        | "jasmine"
396        | "chai"
397        | "sinon"
398        | "cypress"
399        | "playwright"
400        | "testing_library"
401        | "@testing_library/react"
402        | "@testing_library/jest_dom"
403        | "supertest"
404        | "nock"
405        | "msw" => Some(DependencyDomain::Testing),
406        // Validation
407        "zod" | "joi" | "yup" | "ajv" | "class_validator" | "superstruct" | "io_ts" | "valibot" => {
408            Some(DependencyDomain::Validation)
409        }
410        // Serialization
411        "protobufjs" | "avro_js" | "msgpack" | "@msgpack/msgpack" | "flatbuffers" => {
412            Some(DependencyDomain::Serialization)
413        }
414        // Database
415        "prisma" | "@prisma/client" | "typeorm" | "sequelize" | "knex" | "mongoose"
416        | "drizzle_orm" | "pg" | "mysql2" | "better_sqlite3" | "ioredis" | "redis" => {
417            Some(DependencyDomain::Database)
418        }
419        // CLI
420        "commander" | "yargs" | "meow" | "cac" | "citty" | "oclif" | "inquirer" => {
421            Some(DependencyDomain::Cli)
422        }
423        // Utilities
424        "zustand"
425        | "redux"
426        | "@reduxjs/toolkit"
427        | "recoil"
428        | "jotai"
429        | "mobx"
430        | "xstate"
431        | "react_router"
432        | "react_router_dom"
433        | "@tanstack/react_router"
434        | "lodash"
435        | "ramda"
436        | "underscore"
437        | "immer"
438        | "date_fns"
439        | "dayjs"
440        | "moment"
441        | "luxon"
442        | "dotenv"
443        | "cross_env"
444        | "@sentry/react"
445        | "@sentry/nextjs"
446        | "@sentry/node" => Some(DependencyDomain::Utilities),
447        _ => None,
448    }
449}
450
451fn classify_python(name: &str) -> Option<DependencyDomain> {
452    // Note: several names here (`logging`, `asyncio`, `hashlib`, `sqlite3`, `json`,
453    // `argparse`, `unittest`, `pickle`) are Python stdlib modules and will be filtered
454    // out by `is_python_stdlib_or_relative` in the parser before they ever reach
455    // this function. They are kept in the match for completeness and for any
456    // caller that bypasses the parser filter (e.g. tests or future heuristics).
457    match name {
458        // HTTP clients
459        "requests" | "httpx" | "aiohttp" | "urllib3" | "httplib2" | "websockets"
460        | "websocket_client" | "python_socketio" | "grpcio" | "grpcio_tools" => {
461            Some(DependencyDomain::Http)
462        }
463        // Web frameworks
464        "flask" | "django" | "fastapi" | "starlette" | "tornado" | "sanic" | "pyramid"
465        | "bottle" | "litestar" | "blacksheep" => Some(DependencyDomain::WebFramework),
466        // Logging (loguru / structlog are third-party; `logging` is stdlib but kept for completeness)
467        "logging" | "loguru" | "structlog" => Some(DependencyDomain::Logging),
468        // Testing (unittest is stdlib but kept for completeness)
469        "pytest" | "unittest" | "nose" | "hypothesis" | "mock" | "unittest_mock" | "faker"
470        | "factory_boy" | "responses" | "pytest_mock" | "pytest_asyncio" | "tox" | "coverage"
471        | "pytest_cov" => Some(DependencyDomain::Testing),
472        // Validation
473        "pydantic" | "marshmallow" | "cerberus" | "attrs" | "voluptuous" | "cattrs" => {
474            Some(DependencyDomain::Validation)
475        }
476        // Serialization (json/pickle are stdlib but kept for completeness)
477        "json" | "msgpack" | "protobuf" | "avro" | "pickle" | "pyyaml" | "toml" | "orjson"
478        | "ujson" => Some(DependencyDomain::Serialization),
479        // Database (sqlite3 is stdlib but kept for completeness)
480        "sqlalchemy" | "psycopg2" | "asyncpg" | "pymongo" | "redis" | "peewee" | "tortoise"
481        | "tortoise_orm" | "databases" | "sqlite3" | "alembic" | "aioredis" | "motor" | "neo4j"
482        | "py2neo" | "pinecone" | "qdrant_client" | "chromadb" | "weaviate_client" | "pymilvus"
483        | "elasticsearch" | "opensearch_py" => Some(DependencyDomain::Database),
484        // CLI (argparse is stdlib but kept for completeness)
485        "click" | "argparse" | "typer" | "fire" | "docopt" | "rich" => Some(DependencyDomain::Cli),
486        // Async runtime (asyncio is stdlib but kept for completeness)
487        "asyncio" | "trio" | "anyio" | "uvloop" | "twisted" | "celery" | "dramatiq" | "uvicorn"
488        | "gunicorn" | "hypercorn" | "daphne" => Some(DependencyDomain::AsyncRuntime),
489        // Crypto (hashlib is stdlib but kept for completeness)
490        "cryptography" | "pycryptodome" | "hashlib" | "passlib" | "bcrypt" | "itsdangerous"
491        | "jwt" | "python_jose" | "authlib" => Some(DependencyDomain::Crypto),
492        // Utilities — AI/ML, data science, cloud, misc popular libs
493        "openai"
494        | "anthropic"
495        | "cohere"
496        | "google_generativeai"
497        | "google_genai"
498        | "langchain"
499        | "langchain_core"
500        | "langchain_openai"
501        | "langchain_anthropic"
502        | "langfuse"
503        | "litellm"
504        | "transformers"
505        | "sentence_transformers"
506        | "pandas"
507        | "numpy"
508        | "scipy"
509        | "polars"
510        | "pyarrow"
511        | "boto3"
512        | "botocore"
513        | "aiobotocore"
514        | "google_cloud_storage"
515        | "azure_storage_blob"
516        | "jinja2"
517        | "mako"
518        | "tenacity"
519        | "backoff"
520        | "retry"
521        | "paramiko"
522        | "fabric"
523        | "pillow"
524        | "pil"
525        | "cv2"
526        | "opencv_python"
527        | "stripe"
528        | "sendgrid"
529        | "sqlglot"
530        | "alembic_utils" => Some(DependencyDomain::Utilities),
531        _ => None,
532    }
533}
534
535// ---------------------------------------------------------------------------
536// Tests
537// ---------------------------------------------------------------------------
538
539#[cfg(test)]
540mod tests {
541    use super::*;
542
543    // -- Rust --
544
545    #[test]
546    fn rust_http_clients() {
547        assert_eq!(
548            classify_domain("reqwest", Language::Rust),
549            Some(DependencyDomain::Http)
550        );
551        assert_eq!(
552            classify_domain("hyper", Language::Rust),
553            Some(DependencyDomain::Http)
554        );
555    }
556
557    #[test]
558    fn rust_web_frameworks() {
559        assert_eq!(
560            classify_domain("axum", Language::Rust),
561            Some(DependencyDomain::WebFramework)
562        );
563        // Hyphenated name normalises to underscore.
564        assert_eq!(
565            classify_domain("actix-web", Language::Rust),
566            Some(DependencyDomain::WebFramework)
567        );
568    }
569
570    #[test]
571    fn rust_logging() {
572        assert_eq!(
573            classify_domain("tracing", Language::Rust),
574            Some(DependencyDomain::Logging)
575        );
576        assert_eq!(
577            classify_domain("log", Language::Rust),
578            Some(DependencyDomain::Logging)
579        );
580        assert_eq!(
581            classify_domain("tracing-subscriber", Language::Rust),
582            Some(DependencyDomain::Logging)
583        );
584    }
585
586    #[test]
587    fn rust_testing() {
588        assert_eq!(
589            classify_domain("proptest", Language::Rust),
590            Some(DependencyDomain::Testing)
591        );
592        assert_eq!(
593            classify_domain("pretty_assertions", Language::Rust),
594            Some(DependencyDomain::Testing)
595        );
596        assert_eq!(
597            classify_domain("tempfile", Language::Rust),
598            Some(DependencyDomain::Testing)
599        );
600    }
601
602    #[test]
603    fn rust_serialization() {
604        assert_eq!(
605            classify_domain("serde", Language::Rust),
606            Some(DependencyDomain::Serialization)
607        );
608        assert_eq!(
609            classify_domain("serde-json", Language::Rust),
610            Some(DependencyDomain::Serialization)
611        );
612        assert_eq!(
613            classify_domain("serde_json", Language::Rust),
614            Some(DependencyDomain::Serialization)
615        );
616    }
617
618    #[test]
619    fn rust_database() {
620        assert_eq!(
621            classify_domain("sqlx", Language::Rust),
622            Some(DependencyDomain::Database)
623        );
624        assert_eq!(
625            classify_domain("sea-orm", Language::Rust),
626            Some(DependencyDomain::Database)
627        );
628        assert_eq!(
629            classify_domain("rusqlite", Language::Rust),
630            Some(DependencyDomain::Database)
631        );
632    }
633
634    #[test]
635    fn rust_async_runtime() {
636        assert_eq!(
637            classify_domain("tokio", Language::Rust),
638            Some(DependencyDomain::AsyncRuntime)
639        );
640        assert_eq!(
641            classify_domain("async-std", Language::Rust),
642            Some(DependencyDomain::AsyncRuntime)
643        );
644    }
645
646    #[test]
647    fn rust_crypto() {
648        assert_eq!(
649            classify_domain("ring", Language::Rust),
650            Some(DependencyDomain::Crypto)
651        );
652    }
653
654    // -- JS/TS --
655
656    #[test]
657    fn js_ts_http_clients() {
658        assert_eq!(
659            classify_domain("axios", Language::TypeScript),
660            Some(DependencyDomain::Http)
661        );
662        assert_eq!(
663            classify_domain("node-fetch", Language::JavaScript),
664            Some(DependencyDomain::Http)
665        );
666    }
667
668    #[test]
669    fn js_ts_web_frameworks() {
670        assert_eq!(
671            classify_domain("express", Language::JavaScript),
672            Some(DependencyDomain::WebFramework)
673        );
674        assert_eq!(
675            classify_domain("react", Language::TypeScript),
676            Some(DependencyDomain::WebFramework)
677        );
678        assert_eq!(
679            classify_domain("hono", Language::TypeScript),
680            Some(DependencyDomain::WebFramework)
681        );
682    }
683
684    #[test]
685    fn js_ts_testing() {
686        assert_eq!(
687            classify_domain("jest", Language::TypeScript),
688            Some(DependencyDomain::Testing)
689        );
690        assert_eq!(
691            classify_domain("vitest", Language::TypeScript),
692            Some(DependencyDomain::Testing)
693        );
694        assert_eq!(
695            classify_domain("cypress", Language::JavaScript),
696            Some(DependencyDomain::Testing)
697        );
698    }
699
700    #[test]
701    fn js_ts_database() {
702        assert_eq!(
703            classify_domain("prisma", Language::TypeScript),
704            Some(DependencyDomain::Database)
705        );
706        assert_eq!(
707            classify_domain("drizzle-orm", Language::TypeScript),
708            Some(DependencyDomain::Database)
709        );
710    }
711
712    // -- Python --
713
714    #[test]
715    fn python_http_clients() {
716        assert_eq!(
717            classify_domain("requests", Language::Python),
718            Some(DependencyDomain::Http)
719        );
720        assert_eq!(
721            classify_domain("httpx", Language::Python),
722            Some(DependencyDomain::Http)
723        );
724    }
725
726    #[test]
727    fn python_web_frameworks() {
728        assert_eq!(
729            classify_domain("django", Language::Python),
730            Some(DependencyDomain::WebFramework)
731        );
732        assert_eq!(
733            classify_domain("fastapi", Language::Python),
734            Some(DependencyDomain::WebFramework)
735        );
736    }
737
738    #[test]
739    fn python_testing() {
740        assert_eq!(
741            classify_domain("pytest", Language::Python),
742            Some(DependencyDomain::Testing)
743        );
744        assert_eq!(
745            classify_domain("hypothesis", Language::Python),
746            Some(DependencyDomain::Testing)
747        );
748    }
749
750    #[test]
751    fn python_database() {
752        assert_eq!(
753            classify_domain("sqlalchemy", Language::Python),
754            Some(DependencyDomain::Database)
755        );
756        assert_eq!(
757            classify_domain("asyncpg", Language::Python),
758            Some(DependencyDomain::Database)
759        );
760    }
761
762    #[test]
763    fn python_async_runtime() {
764        assert_eq!(
765            classify_domain("asyncio", Language::Python),
766            Some(DependencyDomain::AsyncRuntime)
767        );
768        assert_eq!(
769            classify_domain("trio", Language::Python),
770            Some(DependencyDomain::AsyncRuntime)
771        );
772    }
773
774    #[test]
775    fn python_crypto() {
776        assert_eq!(
777            classify_domain("cryptography", Language::Python),
778            Some(DependencyDomain::Crypto)
779        );
780    }
781
782    #[test]
783    fn python_utilities_ai_ml() {
784        assert_eq!(
785            classify_domain("openai", Language::Python),
786            Some(DependencyDomain::Utilities)
787        );
788        assert_eq!(
789            classify_domain("anthropic", Language::Python),
790            Some(DependencyDomain::Utilities)
791        );
792        assert_eq!(
793            classify_domain("langchain", Language::Python),
794            Some(DependencyDomain::Utilities)
795        );
796        assert_eq!(
797            classify_domain("pandas", Language::Python),
798            Some(DependencyDomain::Utilities)
799        );
800        assert_eq!(
801            classify_domain("numpy", Language::Python),
802            Some(DependencyDomain::Utilities)
803        );
804        assert_eq!(
805            classify_domain("boto3", Language::Python),
806            Some(DependencyDomain::Utilities)
807        );
808    }
809
810    #[test]
811    fn python_async_runtime_extended() {
812        assert_eq!(
813            classify_domain("celery", Language::Python),
814            Some(DependencyDomain::AsyncRuntime)
815        );
816        assert_eq!(
817            classify_domain("uvicorn", Language::Python),
818            Some(DependencyDomain::AsyncRuntime)
819        );
820    }
821
822    #[test]
823    fn python_database_extended() {
824        assert_eq!(
825            classify_domain("aioredis", Language::Python),
826            Some(DependencyDomain::Database)
827        );
828        assert_eq!(
829            classify_domain("neo4j", Language::Python),
830            Some(DependencyDomain::Database)
831        );
832        assert_eq!(
833            classify_domain("qdrant-client", Language::Python),
834            Some(DependencyDomain::Database)
835        );
836    }
837
838    #[test]
839    fn python_http_extended() {
840        assert_eq!(
841            classify_domain("websockets", Language::Python),
842            Some(DependencyDomain::Http)
843        );
844        assert_eq!(
845            classify_domain("grpcio", Language::Python),
846            Some(DependencyDomain::Http)
847        );
848    }
849
850    #[test]
851    fn js_ts_utilities() {
852        assert_eq!(
853            classify_domain("zustand", Language::TypeScript),
854            Some(DependencyDomain::Utilities)
855        );
856        assert_eq!(
857            classify_domain("redux", Language::TypeScript),
858            Some(DependencyDomain::Utilities)
859        );
860        assert_eq!(
861            classify_domain("lodash", Language::JavaScript),
862            Some(DependencyDomain::Utilities)
863        );
864        assert_eq!(
865            classify_domain("date-fns", Language::TypeScript),
866            Some(DependencyDomain::Utilities)
867        );
868        assert_eq!(
869            classify_domain("dayjs", Language::TypeScript),
870            Some(DependencyDomain::Utilities)
871        );
872    }
873
874    #[test]
875    fn js_ts_http_extended() {
876        assert_eq!(
877            classify_domain("socket.io-client", Language::TypeScript),
878            Some(DependencyDomain::Http)
879        );
880        assert_eq!(
881            classify_domain("swr", Language::TypeScript),
882            Some(DependencyDomain::Http)
883        );
884    }
885
886    #[test]
887    fn rust_utilities() {
888        assert_eq!(
889            classify_domain("uuid", Language::Rust),
890            Some(DependencyDomain::Utilities)
891        );
892        assert_eq!(
893            classify_domain("chrono", Language::Rust),
894            Some(DependencyDomain::Utilities)
895        );
896        assert_eq!(
897            classify_domain("anyhow", Language::Rust),
898            Some(DependencyDomain::Utilities)
899        );
900        assert_eq!(
901            classify_domain("thiserror", Language::Rust),
902            Some(DependencyDomain::Utilities)
903        );
904    }
905
906    #[test]
907    fn rust_http_extended() {
908        assert_eq!(
909            classify_domain("tonic", Language::Rust),
910            Some(DependencyDomain::Http)
911        );
912        assert_eq!(
913            classify_domain("tower", Language::Rust),
914            Some(DependencyDomain::Http)
915        );
916    }
917
918    // -- Cross-cutting --
919
920    #[test]
921    fn unknown_returns_none() {
922        assert_eq!(classify_domain("my-custom-lib", Language::Rust), None);
923        assert_eq!(
924            classify_domain("internal-utils", Language::TypeScript),
925            None
926        );
927        assert_eq!(classify_domain("my_app", Language::Python), None);
928    }
929
930    #[test]
931    fn hyphen_underscore_normalization() {
932        // Both forms resolve to the same domain.
933        assert_eq!(
934            classify_domain("serde-json", Language::Rust),
935            classify_domain("serde_json", Language::Rust)
936        );
937        assert_eq!(
938            classify_domain("actix-web", Language::Rust),
939            classify_domain("actix_web", Language::Rust)
940        );
941        assert_eq!(
942            classify_domain("node-fetch", Language::JavaScript),
943            classify_domain("node_fetch", Language::JavaScript)
944        );
945    }
946
947    #[test]
948    fn case_insensitive() {
949        assert_eq!(
950            classify_domain("Reqwest", Language::Rust),
951            Some(DependencyDomain::Http)
952        );
953        assert_eq!(
954            classify_domain("AXIOS", Language::TypeScript),
955            Some(DependencyDomain::Http)
956        );
957    }
958
959    // ---- matches_keyword_at_boundary ----
960
961    #[test]
962    fn keyword_boundary_start_of_string() {
963        assert!(matches_keyword_at_boundary("ormlib", &["orm"]));
964        assert!(matches_keyword_at_boundary("test_helper", &["test"]));
965    }
966
967    #[test]
968    fn keyword_boundary_after_separator() {
969        assert!(matches_keyword_at_boundary("my_orm_lib", &["orm"]));
970        assert!(matches_keyword_at_boundary("my-orm-lib", &["orm"]));
971        assert!(matches_keyword_at_boundary("a_test_b", &["test"]));
972    }
973
974    #[test]
975    fn keyword_boundary_camel_case() {
976        assert!(matches_keyword_at_boundary("myOrmLib", &["orm"]));
977        assert!(matches_keyword_at_boundary("notTestLib", &["test"]));
978    }
979
980    #[test]
981    fn keyword_substring_inside_word_does_not_match() {
982        // The whole point of the boundary check: substrings inside
983        // longer words must NOT trigger.
984        assert!(!matches_keyword_at_boundary("format", &["orm"]));
985        assert!(!matches_keyword_at_boundary("request_id", &["test"]));
986        assert!(!matches_keyword_at_boundary("timestamp", &["test"]));
987        assert!(!matches_keyword_at_boundary("inspect", &["spec"]));
988    }
989
990    #[test]
991    fn keyword_empty_keyword_does_not_loop_or_match() {
992        // Defensive: `find("")` returns Some(0) and would loop forever.
993        // The helper must skip empty keywords without scanning.
994        assert!(!matches_keyword_at_boundary("anything", &[""]));
995        // Mixed list: empty entries are silently skipped, real ones still hit.
996        assert!(matches_keyword_at_boundary("orm_lib", &["", "orm", ""]));
997    }
998
999    #[test]
1000    fn keyword_empty_keyword_list_returns_false() {
1001        assert!(!matches_keyword_at_boundary("orm_lib", &[]));
1002    }
1003
1004    #[test]
1005    fn keyword_non_ascii_input_degrades_gracefully() {
1006        // Cyrillic / mixed UTF-8: must not panic, must not match.
1007        assert!(!matches_keyword_at_boundary("ормлиб", &["orm"]));
1008        // ASCII keyword inside non-ASCII surroundings — boundary checks
1009        // operate on raw bytes; we only require no panic.
1010        let _ = matches_keyword_at_boundary("İorm_lib", &["orm"]);
1011    }
1012
1013    #[test]
1014    fn keyword_multiple_keywords_first_match_wins() {
1015        // The function returns on the first hit; order in the slice
1016        // doesn't change correctness, just early-exit timing.
1017        assert!(matches_keyword_at_boundary("my_log_pkg", &["http", "log"]));
1018        assert!(matches_keyword_at_boundary("my_log_pkg", &["log", "http"]));
1019    }
1020}