Skip to main content

tirith_core/rules/
threatintel.rs

1use std::net::Ipv4Addr;
2
3use crate::extract::ExtractedUrl;
4use crate::threatdb::{self, Ecosystem, ThreatDb};
5use crate::tokenize::{Segment, ShellType};
6use crate::verdict::{Evidence, Finding, RuleId, Severity};
7
8/// A reference to a package extracted from a shell command.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct PackageRef {
11    pub ecosystem: Ecosystem,
12    pub name: String,
13    pub version: Option<String>,
14}
15
16/// Split a `name<sep>version` string (e.g. `serde@1.0` or `rails:7.0`).
17///
18/// Returns `(name, Some(version))` when `sep` is found and the version part
19/// is non-empty, otherwise `(input, None)`.
20fn split_name_version(s: &str, sep: char) -> (&str, Option<String>) {
21    if let Some(pos) = s.find(sep) {
22        let name = &s[..pos];
23        let ver = &s[pos + 1..];
24        (
25            name,
26            if ver.is_empty() {
27                None
28            } else {
29                Some(ver.to_string())
30            },
31        )
32    } else {
33        (s, None)
34    }
35}
36
37/// Extract package references from tokenized shell segments.
38///
39/// Recognizes install/add commands for: pip, npm, yarn, pnpm, bun, npx,
40/// cargo, gem, go, composer, dotnet.
41///
42/// Skips flags (tokens starting with `-`) and known non-package arguments
43/// like `--index-url <url>`, `--save-dev`, etc.
44pub fn extract_packages(segments: &[Segment]) -> Vec<PackageRef> {
45    let mut packages = Vec::new();
46
47    for seg in segments {
48        let cmd = match &seg.command {
49            Some(c) => c.to_lowercase(),
50            None => continue,
51        };
52
53        // Strip path prefix: `/usr/bin/pip3` -> `pip3`.
54        let cmd_name = cmd.rsplit('/').next().unwrap_or(&cmd);
55
56        match cmd_name {
57            "pip" | "pip3" | "uv" => {
58                extract_pip_packages(&seg.args, &mut packages);
59            }
60            "npm" | "npx" | "yarn" | "pnpm" | "bun" => {
61                extract_npm_packages(cmd_name, &seg.args, &mut packages);
62            }
63            "cargo" => {
64                extract_cargo_packages(&seg.args, &mut packages);
65            }
66            "gem" => {
67                extract_gem_packages(&seg.args, &mut packages);
68            }
69            "go" => {
70                extract_go_packages(&seg.args, &mut packages);
71            }
72            "composer" => {
73                extract_composer_packages(&seg.args, &mut packages);
74            }
75            "dotnet" => {
76                extract_dotnet_packages(&seg.args, &mut packages);
77            }
78            "mvn" | "gradle" | "gradlew" => {
79                extract_maven_packages(&seg.args, &mut packages);
80            }
81            _ => {}
82        }
83    }
84
85    packages
86}
87
88/// Flags for pip that consume the next argument (so it should be skipped).
89const PIP_ARG_FLAGS: &[&str] = &[
90    "--index-url",
91    "-i",
92    "--extra-index-url",
93    "--find-links",
94    "-f",
95    "--constraint",
96    "-c",
97    "--requirement",
98    "-r",
99    "--target",
100    "-t",
101    "--root",
102    "--prefix",
103    "--src",
104    "--build",
105    "-b",
106    "--config-settings",
107    "--global-option",
108    "--install-option",
109    "--proxy",
110    "--retries",
111    "--timeout",
112    "--exists-action",
113    "--trusted-host",
114    "--cert",
115    "--client-cert",
116    "--cache-dir",
117];
118
119fn extract_pip_packages(args: &[String], packages: &mut Vec<PackageRef>) {
120    let mut iter = args.iter();
121    let mut found_install = false;
122    while let Some(arg) = iter.next() {
123        let lower = arg.to_lowercase();
124        if !found_install {
125            if lower == "install" {
126                found_install = true;
127            }
128            continue;
129        }
130
131        if arg.starts_with('-') {
132            if PIP_ARG_FLAGS.contains(&lower.as_str()) {
133                let _ = iter.next();
134            }
135            continue;
136        }
137
138        // VCS / URL / local path installs aren't registry packages — skip.
139        if arg.contains("://") || lower.starts_with("git+") {
140            continue;
141        }
142        if arg.contains('/') || arg.contains('\\') || arg.starts_with('.') {
143            continue;
144        }
145
146        // pip spec shapes we handle: foo==1.2.3, foo>=1.0, foo~=2.0, foo!=1.0,
147        // foo[extra]==1.0, foo[a,b]>=1.0.
148        let pkg_str = arg.as_str();
149
150        // Strip extras: `foo[bar,baz]==1.0` -> name=`foo`, rest=`==1.0`.
151        let (name_part, rest) = if let Some(bracket_pos) = pkg_str.find('[') {
152            if let Some(close_pos) = pkg_str[bracket_pos..].find(']') {
153                let name = &pkg_str[..bracket_pos];
154                let after = &pkg_str[bracket_pos + close_pos + 1..];
155                (name, after)
156            } else {
157                (pkg_str, "")
158            }
159        } else {
160            let split_pos = pkg_str
161                .find("==")
162                .or_else(|| pkg_str.find(">="))
163                .or_else(|| pkg_str.find("<="))
164                .or_else(|| pkg_str.find("~="))
165                .or_else(|| pkg_str.find("!="))
166                .or_else(|| pkg_str.find('>'))
167                .or_else(|| pkg_str.find('<'));
168            if let Some(pos) = split_pos {
169                (&pkg_str[..pos], &pkg_str[pos..])
170            } else {
171                (pkg_str, "")
172            }
173        };
174
175        if name_part.is_empty() {
176            continue;
177        }
178
179        let version = extract_pip_version(rest);
180        let normalized = normalize_pypi_name(name_part);
181
182        packages.push(PackageRef {
183            ecosystem: Ecosystem::PyPI,
184            name: normalized,
185            version,
186        });
187    }
188}
189
190/// Normalize a PyPI package name: lowercase, replace `_` and `.` with `-`.
191fn normalize_pypi_name(name: &str) -> String {
192    name.to_lowercase()
193        .chars()
194        .map(|c| if c == '_' || c == '.' { '-' } else { c })
195        .collect()
196}
197
198/// Extract exact version from pip version specifier.
199/// Only returns a version for `==` (exact match).
200fn extract_pip_version(spec: &str) -> Option<String> {
201    if let Some(ver) = spec.strip_prefix("==") {
202        let v = ver.trim();
203        if !v.is_empty() {
204            return Some(v.to_string());
205        }
206    }
207    None
208}
209
210/// Flags for npm/yarn/pnpm that consume the next argument.
211const NPM_ARG_FLAGS: &[&str] = &[
212    "--registry",
213    "--tag",
214    "--scope",
215    "--otp",
216    "--workspace",
217    "-w",
218    "--prefix",
219];
220
221fn extract_npm_packages(cmd_name: &str, args: &[String], packages: &mut Vec<PackageRef>) {
222    let mut iter = args.iter().peekable();
223    let mut found_subcmd = false;
224
225    // npx is special: `npx foo` runs `foo` directly. `--package`/-p can override,
226    // in which case the first positional is an entry point, not a package.
227    if cmd_name == "npx" {
228        let mut has_explicit_package = false;
229        while let Some(arg) = iter.next() {
230            if arg.starts_with('-') {
231                if arg == "--package" || arg == "-p" {
232                    if let Some(pkg_arg) = iter.next() {
233                        if let Some(pr) = parse_npm_package_spec(pkg_arg) {
234                            packages.push(pr);
235                            has_explicit_package = true;
236                        }
237                    }
238                }
239                continue;
240            }
241            if !has_explicit_package {
242                if let Some(pr) = parse_npm_package_spec(arg) {
243                    packages.push(pr);
244                }
245            }
246            break;
247        }
248        return;
249    }
250
251    while let Some(arg) = iter.next() {
252        let lower = arg.to_lowercase();
253        if !found_subcmd {
254            if matches!(lower.as_str(), "install" | "i" | "add") {
255                found_subcmd = true;
256            }
257            continue;
258        }
259
260        if arg.starts_with('-') {
261            let lower_ref = lower.as_str();
262            if NPM_ARG_FLAGS.contains(&lower_ref) {
263                let _ = iter.next();
264            }
265            continue;
266        }
267
268        if arg.contains("://") || arg.starts_with('.') || arg.starts_with('/') {
269            continue;
270        }
271
272        if let Some(pr) = parse_npm_package_spec(arg) {
273            packages.push(pr);
274        }
275    }
276}
277
278/// Parse an npm-style package spec: `@scope/name@version` or `name@version`.
279fn parse_npm_package_spec(spec: &str) -> Option<PackageRef> {
280    if spec.is_empty() {
281        return None;
282    }
283
284    let (name, version) = if spec.starts_with('@') {
285        // Scoped package: @scope/name@version
286        // Find the version @ after the scope
287        if let Some(slash_pos) = spec.find('/') {
288            let after_scope = &spec[slash_pos + 1..];
289            if let Some(at_pos) = after_scope.find('@') {
290                let full_name = &spec[..slash_pos + 1 + at_pos];
291                let ver = &after_scope[at_pos + 1..];
292                (full_name, if ver.is_empty() { None } else { Some(ver) })
293            } else {
294                (spec, None)
295            }
296        } else {
297            // Invalid scoped package (no slash)
298            return None;
299        }
300    } else if let Some(at_pos) = spec.find('@') {
301        let name = &spec[..at_pos];
302        let ver = &spec[at_pos + 1..];
303        (name, if ver.is_empty() { None } else { Some(ver) })
304    } else {
305        (spec, None)
306    };
307
308    if name.is_empty() {
309        return None;
310    }
311
312    Some(PackageRef {
313        ecosystem: Ecosystem::Npm,
314        name: name.to_string(),
315        version: version.map(|v| v.to_string()),
316    })
317}
318
319fn extract_cargo_packages(args: &[String], packages: &mut Vec<PackageRef>) {
320    let mut iter = args.iter();
321    let mut found_subcmd = false;
322
323    while let Some(arg) = iter.next() {
324        let lower = arg.to_lowercase();
325        if !found_subcmd {
326            if matches!(lower.as_str(), "install" | "add") {
327                found_subcmd = true;
328            }
329            continue;
330        }
331
332        if arg.starts_with('-') {
333            if matches!(
334                lower.as_str(),
335                "--version"
336                    | "--vers"
337                    | "--git"
338                    | "--branch"
339                    | "--tag"
340                    | "--rev"
341                    | "--path"
342                    | "--registry"
343                    | "--index"
344                    | "--features"
345                    | "-F"
346                    | "--target-dir"
347                    | "--root"
348                    | "--jobs"
349                    | "-j"
350                    | "--rename"
351            ) {
352                // `--version` / `--vers` carries the version of the previously seen package.
353                if lower == "--version" || lower == "--vers" {
354                    if let Some(ver) = iter.next() {
355                        if let Some(last) = packages.last_mut() {
356                            if last.ecosystem == Ecosystem::Crates && last.version.is_none() {
357                                last.version = Some(ver.to_string());
358                            }
359                        }
360                    }
361                } else {
362                    let _ = iter.next();
363                }
364                continue;
365            }
366            continue;
367        }
368
369        // git URLs and local paths aren't crates.io packages.
370        if arg.contains("://") || arg.starts_with('.') || arg.contains('/') {
371            continue;
372        }
373
374        // `cargo add foo@1.0.0` form.
375        let (name, version) = split_name_version(arg, '@');
376
377        if !name.is_empty() {
378            packages.push(PackageRef {
379                ecosystem: Ecosystem::Crates,
380                name: name.to_string(),
381                version,
382            });
383        }
384    }
385}
386
387fn extract_gem_packages(args: &[String], packages: &mut Vec<PackageRef>) {
388    let mut iter = args.iter();
389    let mut found_install = false;
390
391    while let Some(arg) = iter.next() {
392        let lower = arg.to_lowercase();
393        if !found_install {
394            if lower == "install" {
395                found_install = true;
396            }
397            continue;
398        }
399
400        if arg.starts_with('-') {
401            if matches!(
402                lower.as_str(),
403                "--version" | "-v" | "--source" | "--platform" | "--install-dir" | "-i"
404            ) {
405                // `--version` / `-v` carries the version of the previously seen gem.
406                if lower == "--version" || lower == "-v" {
407                    if let Some(ver) = iter.next() {
408                        if let Some(last) = packages.last_mut() {
409                            if last.ecosystem == Ecosystem::RubyGems && last.version.is_none() {
410                                last.version = Some(ver.to_string());
411                            }
412                        }
413                    }
414                } else {
415                    let _ = iter.next();
416                }
417                continue;
418            }
419            continue;
420        }
421
422        // `gem install rails:7.0` form (also accepts bare name).
423        let (name, version) = split_name_version(arg, ':');
424
425        if !name.is_empty() {
426            packages.push(PackageRef {
427                ecosystem: Ecosystem::RubyGems,
428                name: name.to_string(),
429                version,
430            });
431        }
432    }
433}
434
435fn extract_go_packages(args: &[String], packages: &mut Vec<PackageRef>) {
436    let mut found_subcmd = false;
437
438    for arg in args {
439        let lower = arg.to_lowercase();
440        if !found_subcmd {
441            if matches!(lower.as_str(), "get" | "install") {
442                found_subcmd = true;
443            }
444            continue;
445        }
446
447        if arg.starts_with('-') {
448            continue;
449        }
450
451        // `go get github.com/user/pkg@v1.2.3` form.
452        let (name, version) = split_name_version(arg, '@');
453
454        if !name.is_empty() {
455            packages.push(PackageRef {
456                ecosystem: Ecosystem::Go,
457                name: name.to_string(),
458                version,
459            });
460        }
461    }
462}
463
464fn extract_composer_packages(args: &[String], packages: &mut Vec<PackageRef>) {
465    let mut found_require = false;
466
467    for arg in args {
468        if !found_require {
469            if arg.to_lowercase() == "require" {
470                found_require = true;
471            }
472            continue;
473        }
474
475        if arg.starts_with('-') {
476            continue;
477        }
478
479        // `composer require vendor/package:^1.0` form.
480        let (name, version) = split_name_version(arg, ':');
481
482        if !name.is_empty() {
483            packages.push(PackageRef {
484                ecosystem: Ecosystem::Packagist,
485                name: name.to_string(),
486                version,
487            });
488        }
489    }
490}
491
492fn extract_dotnet_packages(args: &[String], packages: &mut Vec<PackageRef>) {
493    let mut iter = args.iter();
494    let mut found_add = false;
495    let mut found_package = false;
496
497    while let Some(arg) = iter.next() {
498        let lower = arg.to_lowercase();
499        if !found_add {
500            if lower == "add" {
501                found_add = true;
502            }
503            continue;
504        }
505
506        // `dotnet add package <name>` — skip the project file arg
507        if !found_package {
508            if lower == "package" {
509                found_package = true;
510            }
511            continue;
512        }
513
514        if arg.starts_with('-') {
515            if lower == "--version" || lower == "-v" {
516                if let Some(ver) = iter.next() {
517                    if let Some(last) = packages.last_mut() {
518                        if last.ecosystem == Ecosystem::NuGet && last.version.is_none() {
519                            last.version = Some(ver.to_string());
520                        }
521                    }
522                }
523                continue;
524            }
525            if matches!(lower.as_str(), "--source" | "-s" | "--framework" | "-f") {
526                let _ = iter.next();
527            }
528            continue;
529        }
530
531        packages.push(PackageRef {
532            ecosystem: Ecosystem::NuGet,
533            name: arg.to_string(),
534            version: None,
535        });
536    }
537}
538
539/// Extract Maven/Gradle dependency coordinates from command arguments.
540///
541/// Handles `mvn dependency:get -Dartifact=group:artifact:version` and
542/// `gradle` dependency notation `group:artifact:version`.
543fn extract_maven_packages(args: &[String], packages: &mut Vec<PackageRef>) {
544    for arg in args {
545        // mvn form: `-Dartifact=group:artifact:version[:packaging[:classifier]]`.
546        if let Some(coord) = arg.strip_prefix("-Dartifact=") {
547            let parts: Vec<&str> = coord.splitn(4, ':').collect();
548            if parts.len() >= 2 {
549                let name = format!("{}:{}", parts[0], parts[1]);
550                let version = parts.get(2).and_then(|v| {
551                    if v.is_empty() {
552                        None
553                    } else {
554                        Some(v.to_string())
555                    }
556                });
557                packages.push(PackageRef {
558                    ecosystem: Ecosystem::Maven,
559                    name,
560                    version,
561                });
562            }
563            continue;
564        }
565
566        if arg.starts_with('-') {
567            continue;
568        }
569
570        // Gradle dependency notation: `group:artifact:version` (at least one colon required).
571        let parts: Vec<&str> = arg.splitn(4, ':').collect();
572        if parts.len() >= 2 && !parts[0].is_empty() && !parts[1].is_empty() {
573            let name = format!("{}:{}", parts[0], parts[1]);
574            let version = parts.get(2).and_then(|v| {
575                if v.is_empty() {
576                    None
577                } else {
578                    Some(v.to_string())
579                }
580            });
581            packages.push(PackageRef {
582                ecosystem: Ecosystem::Maven,
583                name,
584                version,
585            });
586        }
587    }
588}
589
590/// Extract IPv4 addresses from a shell token.
591///
592/// Handles:
593/// - Bare IP: `1.2.3.4`
594/// - user@IP: `user@1.2.3.4`
595/// - IP:port: `1.2.3.4:22`
596/// - user@IP:port: `user@1.2.3.4:22`
597///
598/// Does NOT match:
599/// - IPv6 addresses
600/// - Non-IP text
601/// - IPs embedded inside URLs (those are handled by URL extraction)
602pub fn extract_ipv4_from_token(token: &str) -> Option<Ipv4Addr> {
603    let after_at = if let Some(at_pos) = token.rfind('@') {
604        &token[at_pos + 1..]
605    } else {
606        token
607    };
608
609    // Only strip a trailing `:NNNN` — anything else after `:` would be part of
610    // an IPv6 literal or something else we shouldn't touch.
611    let ip_str = if let Some(colon_pos) = after_at.rfind(':') {
612        let after_colon = &after_at[colon_pos + 1..];
613        if !after_colon.is_empty() && after_colon.chars().all(|c| c.is_ascii_digit()) {
614            &after_at[..colon_pos]
615        } else {
616            after_at
617        }
618    } else {
619        after_at
620    };
621
622    // Some formats wrap with `[...]`; strip before parsing.
623    let ip_str = ip_str.trim_matches(|c| c == '[' || c == ']');
624
625    ip_str.parse::<Ipv4Addr>().ok()
626}
627
628/// Map threat-DB confidence to finding severity.
629fn confidence_to_severity(c: threatdb::Confidence) -> Severity {
630    match c {
631        threatdb::Confidence::Confirmed => Severity::Critical,
632        threatdb::Confidence::Medium => Severity::Medium,
633        threatdb::Confidence::Low => Severity::Medium,
634    }
635}
636
637fn hostname_rule_for_source(source: threatdb::ThreatSource) -> (RuleId, Severity, &'static str) {
638    match source {
639        threatdb::ThreatSource::Urlhaus => (
640            RuleId::ThreatMaliciousUrl,
641            Severity::High,
642            "malicious hostname",
643        ),
644        threatdb::ThreatSource::PhishingArmy | threatdb::ThreatSource::PhishTank => (
645            RuleId::ThreatPhishingUrl,
646            Severity::High,
647            "phishing hostname",
648        ),
649        threatdb::ThreatSource::ThreatFoxIoc => {
650            (RuleId::ThreatThreatFoxIoc, Severity::High, "IOC hostname")
651        }
652        // Package/IP-oriented sources and FireHOL aren't expected on hostname
653        // records, but enumerate them explicitly so the compiler flags any new
654        // variant added later instead of falling through a `_` arm.
655        threatdb::ThreatSource::OssfMalicious
656        | threatdb::ThreatSource::DatadogMalicious
657        | threatdb::ThreatSource::FeodoTracker
658        | threatdb::ThreatSource::EcosystemsTyposquat
659        | threatdb::ThreatSource::CisaKev
660        | threatdb::ThreatSource::FireholIp
661        | threatdb::ThreatSource::TorExit => (
662            RuleId::ThreatMaliciousUrl,
663            Severity::High,
664            "malicious hostname",
665        ),
666    }
667}
668
669fn ip_rule_for_source(source: threatdb::ThreatSource) -> (RuleId, Severity, &'static str) {
670    match source {
671        threatdb::ThreatSource::TorExit => {
672            (RuleId::ThreatTorExitNode, Severity::Medium, "Tor exit node")
673        }
674        threatdb::ThreatSource::ThreatFoxIoc => {
675            (RuleId::ThreatThreatFoxIoc, Severity::High, "IOC IP")
676        }
677        // Same exhaustive-match rationale as the hostname mapping above.
678        threatdb::ThreatSource::OssfMalicious
679        | threatdb::ThreatSource::DatadogMalicious
680        | threatdb::ThreatSource::FeodoTracker
681        | threatdb::ThreatSource::EcosystemsTyposquat
682        | threatdb::ThreatSource::CisaKev
683        | threatdb::ThreatSource::Urlhaus
684        | threatdb::ThreatSource::PhishingArmy
685        | threatdb::ThreatSource::PhishTank
686        | threatdb::ThreatSource::FireholIp => {
687            (RuleId::ThreatMaliciousIp, Severity::High, "malicious IP")
688        }
689    }
690}
691
692/// Check input against the local threat intelligence database.
693///
694/// Fail-open: if `db` is `None` (no DB file loaded), returns an empty Vec
695/// and does not block the command. All lookups are in-memory binary search
696/// with no network I/O.
697pub fn check(
698    input: &str,
699    shell: ShellType,
700    extracted: &[ExtractedUrl],
701    db: Option<&ThreatDb>,
702) -> Vec<Finding> {
703    let db = match db {
704        Some(d) => d,
705        // Fail-open: no DB loaded means no findings, never block the user.
706        None => return Vec::new(),
707    };
708
709    let mut findings = Vec::new();
710
711    let segments = crate::tokenize::tokenize(input, shell);
712    let packages = extract_packages(&segments);
713
714    for pkg in &packages {
715        let db_eco = pkg.ecosystem;
716
717        if let Some(m) = db.check_package(db_eco, &pkg.name, pkg.version.as_deref()) {
718            findings.push(Finding {
719                rule_id: RuleId::ThreatMaliciousPackage,
720                severity: confidence_to_severity(m.confidence),
721                title: format!("Known malicious {} package: {}", pkg.ecosystem, pkg.name),
722                description: format!(
723                    "Package '{}' in {} is flagged as malicious by {}. {}",
724                    pkg.name,
725                    pkg.ecosystem,
726                    m.source.label(),
727                    if m.all_versions_malicious {
728                        "All versions are affected."
729                    } else {
730                        "Specific version(s) affected."
731                    }
732                ),
733                evidence: vec![Evidence::ThreatIntel {
734                    source: m.source.label().to_string(),
735                    threat_type: "malicious_package".to_string(),
736                    confidence: m.confidence,
737                    reference: m.reference_url,
738                }],
739                human_view: None,
740                agent_view: None,
741                mitre_id: None,
742                custom_rule_id: None,
743            });
744            // A confirmed-malicious package already explains itself — don't pile on typosquat findings.
745            continue;
746        }
747
748        if let Some(t) = db.check_typosquat(db_eco, &pkg.name) {
749            findings.push(Finding {
750                rule_id: RuleId::ThreatPackageTyposquat,
751                severity: Severity::High,
752                title: format!("Confirmed typosquat: {} → {}", pkg.name, t.target_name),
753                description: format!(
754                    "Package '{}' in {} is a confirmed typosquat of '{}' \
755                     (source: ecosyste.ms typosquatting dataset).",
756                    pkg.name, pkg.ecosystem, t.target_name
757                ),
758                evidence: vec![Evidence::ThreatIntel {
759                    source: "ecosyste.ms Typosquats".to_string(),
760                    threat_type: "typosquat".to_string(),
761                    confidence: threatdb::Confidence::Confirmed,
762                    reference: None,
763                }],
764                human_view: None,
765                agent_view: None,
766                mitre_id: None,
767                custom_rule_id: None,
768            });
769        }
770
771        if let Some((popular_name, distance)) = db.check_popular_distance(db_eco, &pkg.name) {
772            findings.push(Finding {
773                rule_id: RuleId::ThreatPackageSimilarName,
774                severity: Severity::Medium,
775                title: format!(
776                    "Package name similar to popular package: {} ≈ {}",
777                    pkg.name, popular_name
778                ),
779                description: format!(
780                    "Package '{}' in {} is within edit distance {} of popular package '{}'. \
781                     This could indicate a typosquatting attempt.",
782                    pkg.name, pkg.ecosystem, distance, popular_name
783                ),
784                evidence: vec![Evidence::ThreatIntel {
785                    source: "popular package names".to_string(),
786                    threat_type: "similar_name".to_string(),
787                    confidence: threatdb::Confidence::Low,
788                    reference: None,
789                }],
790                human_view: None,
791                agent_view: None,
792                mitre_id: None,
793                custom_rule_id: None,
794            });
795        }
796    }
797
798    let mut checked_ips = std::collections::HashSet::new();
799    for url_info in extracted {
800        if let Some(host) = url_info.parsed.host() {
801            if let Some(m) = db.check_hostname(host) {
802                let (rule_id, severity, threat_type) = hostname_rule_for_source(m.source);
803                findings.push(Finding {
804                    rule_id,
805                    severity,
806                    title: format!("Threat intelligence hostname match: {}", host),
807                    description: format!(
808                        "Hostname '{}' appears in threat intelligence feed ({}).",
809                        host,
810                        m.source.label()
811                    ),
812                    evidence: vec![Evidence::ThreatIntel {
813                        source: m.source.label().to_string(),
814                        threat_type: threat_type.to_string(),
815                        confidence: m.confidence,
816                        reference: m.reference_url,
817                    }],
818                    human_view: None,
819                    agent_view: None,
820                    mitre_id: None,
821                    custom_rule_id: None,
822                });
823            }
824
825            // URL host may itself be an IP literal (e.g. `https://203.0.113.50/payload`).
826            if let Ok(ip) = host.parse::<std::net::Ipv4Addr>() {
827                if checked_ips.insert(ip) {
828                    if let Some(m) = db.check_ip(ip) {
829                        let (rule_id, severity, threat_type) = ip_rule_for_source(m.source);
830                        findings.push(Finding {
831                            rule_id,
832                            severity,
833                            title: format!("Threat intelligence IP match in URL: {}", ip),
834                            description: format!(
835                                "IP address {} (from URL) is flagged by {}.",
836                                ip,
837                                m.source.label()
838                            ),
839                            evidence: vec![Evidence::ThreatIntel {
840                                source: m.source.label().to_string(),
841                                threat_type: threat_type.to_string(),
842                                confidence: m.confidence,
843                                reference: m.reference_url,
844                            }],
845                            human_view: None,
846                            agent_view: None,
847                            mitre_id: None,
848                            custom_rule_id: None,
849                        });
850                    }
851                }
852            }
853        }
854    }
855
856    // IP literals in command tokens — ssh/scp/nc and friends.
857    for seg in &segments {
858        for arg in &seg.args {
859            if let Some(ip) = extract_ipv4_from_token(arg) {
860                if checked_ips.insert(ip) {
861                    if let Some(m) = db.check_ip(ip) {
862                        let (rule_id, severity, threat_type) = ip_rule_for_source(m.source);
863                        findings.push(Finding {
864                            rule_id,
865                            severity,
866                            title: format!("Threat intelligence IP match: {}", ip),
867                            description: format!(
868                                "IP address {} is flagged by {}.",
869                                ip,
870                                m.source.label()
871                            ),
872                            evidence: vec![Evidence::ThreatIntel {
873                                source: m.source.label().to_string(),
874                                threat_type: threat_type.to_string(),
875                                confidence: m.confidence,
876                                reference: m.reference_url,
877                            }],
878                            human_view: None,
879                            agent_view: None,
880                            mitre_id: None,
881                            custom_rule_id: None,
882                        });
883                    }
884                }
885            }
886        }
887    }
888
889    findings
890}
891
892#[cfg(test)]
893mod tests {
894    use super::*;
895    use crate::tokenize;
896
897    fn tokenize_and_extract(input: &str) -> Vec<PackageRef> {
898        let segments = tokenize::tokenize(input, ShellType::Posix);
899        extract_packages(&segments)
900    }
901
902    #[test]
903    fn pip_install_single() {
904        let pkgs = tokenize_and_extract("pip install requests");
905        assert_eq!(pkgs.len(), 1);
906        assert_eq!(pkgs[0].ecosystem, Ecosystem::PyPI);
907        assert_eq!(pkgs[0].name, "requests");
908        assert_eq!(pkgs[0].version, None);
909    }
910
911    #[test]
912    fn pip_install_with_version() {
913        let pkgs = tokenize_and_extract("pip install requests==2.31.0");
914        assert_eq!(pkgs.len(), 1);
915        assert_eq!(pkgs[0].name, "requests");
916        assert_eq!(pkgs[0].version, Some("2.31.0".to_string()));
917    }
918
919    #[test]
920    fn pip_install_version_range_not_exact() {
921        let pkgs = tokenize_and_extract("pip install requests>=2.0");
922        assert_eq!(pkgs.len(), 1);
923        assert_eq!(pkgs[0].name, "requests");
924        // Only `==` pins an exact version; ranges leave version as None.
925        assert_eq!(pkgs[0].version, None);
926    }
927
928    #[test]
929    fn pip3_install() {
930        let pkgs = tokenize_and_extract("pip3 install flask");
931        assert_eq!(pkgs.len(), 1);
932        assert_eq!(pkgs[0].ecosystem, Ecosystem::PyPI);
933        assert_eq!(pkgs[0].name, "flask");
934    }
935
936    #[test]
937    fn uv_install() {
938        let pkgs = tokenize_and_extract("uv install numpy");
939        assert_eq!(pkgs.len(), 1);
940        assert_eq!(pkgs[0].ecosystem, Ecosystem::PyPI);
941        assert_eq!(pkgs[0].name, "numpy");
942    }
943
944    #[test]
945    fn pip_install_multiple() {
946        let pkgs = tokenize_and_extract("pip install requests flask django");
947        assert_eq!(pkgs.len(), 3);
948        assert_eq!(pkgs[0].name, "requests");
949        assert_eq!(pkgs[1].name, "flask");
950        assert_eq!(pkgs[2].name, "django");
951    }
952
953    #[test]
954    fn pip_install_with_extras() {
955        let pkgs = tokenize_and_extract("pip install requests[security]==2.31.0");
956        assert_eq!(pkgs.len(), 1);
957        assert_eq!(pkgs[0].name, "requests");
958        assert_eq!(pkgs[0].version, Some("2.31.0".to_string()));
959    }
960
961    #[test]
962    fn pip_install_skips_flags() {
963        let pkgs =
964            tokenize_and_extract("pip install --index-url https://pypi.org/simple/ requests");
965        assert_eq!(pkgs.len(), 1);
966        assert_eq!(pkgs[0].name, "requests");
967    }
968
969    #[test]
970    fn pip_install_skips_url() {
971        let pkgs =
972            tokenize_and_extract("pip install git+https://github.com/user/repo.git requests");
973        assert_eq!(pkgs.len(), 1);
974        assert_eq!(pkgs[0].name, "requests");
975    }
976
977    #[test]
978    fn pip_install_skips_local_path() {
979        let pkgs = tokenize_and_extract("pip install ./local_pkg requests");
980        assert_eq!(pkgs.len(), 1);
981        assert_eq!(pkgs[0].name, "requests");
982    }
983
984    #[test]
985    fn pip_normalizes_name() {
986        let pkgs = tokenize_and_extract("pip install My_Package.Name");
987        assert_eq!(pkgs.len(), 1);
988        assert_eq!(pkgs[0].name, "my-package-name");
989    }
990
991    #[test]
992    fn pip_no_install_subcommand() {
993        let pkgs = tokenize_and_extract("pip freeze");
994        assert!(pkgs.is_empty());
995    }
996
997    #[test]
998    fn npm_install_single() {
999        let pkgs = tokenize_and_extract("npm install lodash");
1000        assert_eq!(pkgs.len(), 1);
1001        assert_eq!(pkgs[0].ecosystem, Ecosystem::Npm);
1002        assert_eq!(pkgs[0].name, "lodash");
1003        assert_eq!(pkgs[0].version, None);
1004    }
1005
1006    #[test]
1007    fn npm_install_with_version() {
1008        let pkgs = tokenize_and_extract("npm install lodash@4.17.21");
1009        assert_eq!(pkgs.len(), 1);
1010        assert_eq!(pkgs[0].name, "lodash");
1011        assert_eq!(pkgs[0].version, Some("4.17.21".to_string()));
1012    }
1013
1014    #[test]
1015    fn npm_install_scoped() {
1016        let pkgs = tokenize_and_extract("npm install @angular/core@16.0.0");
1017        assert_eq!(pkgs.len(), 1);
1018        assert_eq!(pkgs[0].name, "@angular/core");
1019        assert_eq!(pkgs[0].version, Some("16.0.0".to_string()));
1020    }
1021
1022    #[test]
1023    fn npm_install_scoped_no_version() {
1024        let pkgs = tokenize_and_extract("npm install @types/node");
1025        assert_eq!(pkgs.len(), 1);
1026        assert_eq!(pkgs[0].name, "@types/node");
1027        assert_eq!(pkgs[0].version, None);
1028    }
1029
1030    #[test]
1031    fn npm_i_shorthand() {
1032        let pkgs = tokenize_and_extract("npm i express");
1033        assert_eq!(pkgs.len(), 1);
1034        assert_eq!(pkgs[0].name, "express");
1035    }
1036
1037    #[test]
1038    fn yarn_add() {
1039        let pkgs = tokenize_and_extract("yarn add react@18.2.0");
1040        assert_eq!(pkgs.len(), 1);
1041        assert_eq!(pkgs[0].ecosystem, Ecosystem::Npm);
1042        assert_eq!(pkgs[0].name, "react");
1043        assert_eq!(pkgs[0].version, Some("18.2.0".to_string()));
1044    }
1045
1046    #[test]
1047    fn pnpm_add() {
1048        let pkgs = tokenize_and_extract("pnpm add vue");
1049        assert_eq!(pkgs.len(), 1);
1050        assert_eq!(pkgs[0].ecosystem, Ecosystem::Npm);
1051        assert_eq!(pkgs[0].name, "vue");
1052    }
1053
1054    #[test]
1055    fn bun_add() {
1056        let pkgs = tokenize_and_extract("bun add elysia");
1057        assert_eq!(pkgs.len(), 1);
1058        assert_eq!(pkgs[0].ecosystem, Ecosystem::Npm);
1059        assert_eq!(pkgs[0].name, "elysia");
1060    }
1061
1062    #[test]
1063    fn npx_package() {
1064        let pkgs = tokenize_and_extract("npx create-react-app my-app");
1065        assert_eq!(pkgs.len(), 1);
1066        assert_eq!(pkgs[0].ecosystem, Ecosystem::Npm);
1067        assert_eq!(pkgs[0].name, "create-react-app");
1068    }
1069
1070    #[test]
1071    fn npx_scoped_package() {
1072        let pkgs = tokenize_and_extract("npx @angular/cli new my-app");
1073        assert_eq!(pkgs.len(), 1);
1074        assert_eq!(pkgs[0].name, "@angular/cli");
1075    }
1076
1077    #[test]
1078    fn npx_with_package_flag() {
1079        let pkgs = tokenize_and_extract("npx --package typescript tsc");
1080        assert_eq!(pkgs.len(), 1);
1081        assert_eq!(pkgs[0].name, "typescript");
1082    }
1083
1084    #[test]
1085    fn npm_install_skips_save_dev() {
1086        let pkgs = tokenize_and_extract("npm install --save-dev jest");
1087        assert_eq!(pkgs.len(), 1);
1088        assert_eq!(pkgs[0].name, "jest");
1089    }
1090
1091    #[test]
1092    fn npm_install_multiple() {
1093        let pkgs = tokenize_and_extract("npm install react react-dom");
1094        assert_eq!(pkgs.len(), 2);
1095        assert_eq!(pkgs[0].name, "react");
1096        assert_eq!(pkgs[1].name, "react-dom");
1097    }
1098
1099    #[test]
1100    fn cargo_install() {
1101        let pkgs = tokenize_and_extract("cargo install ripgrep");
1102        assert_eq!(pkgs.len(), 1);
1103        assert_eq!(pkgs[0].ecosystem, Ecosystem::Crates);
1104        assert_eq!(pkgs[0].name, "ripgrep");
1105        assert_eq!(pkgs[0].version, None);
1106    }
1107
1108    #[test]
1109    fn cargo_add() {
1110        let pkgs = tokenize_and_extract("cargo add serde");
1111        assert_eq!(pkgs.len(), 1);
1112        assert_eq!(pkgs[0].name, "serde");
1113    }
1114
1115    #[test]
1116    fn cargo_add_with_version() {
1117        let pkgs = tokenize_and_extract("cargo add serde@1.0.193");
1118        assert_eq!(pkgs.len(), 1);
1119        assert_eq!(pkgs[0].name, "serde");
1120        assert_eq!(pkgs[0].version, Some("1.0.193".to_string()));
1121    }
1122
1123    #[test]
1124    fn cargo_install_with_version_flag() {
1125        let pkgs = tokenize_and_extract("cargo install ripgrep --version 14.0.0");
1126        assert_eq!(pkgs.len(), 1);
1127        assert_eq!(pkgs[0].name, "ripgrep");
1128        assert_eq!(pkgs[0].version, Some("14.0.0".to_string()));
1129    }
1130
1131    #[test]
1132    fn cargo_skips_git_url() {
1133        let pkgs = tokenize_and_extract("cargo install --git https://github.com/user/repo");
1134        assert!(pkgs.is_empty());
1135    }
1136
1137    #[test]
1138    fn cargo_build_not_install() {
1139        let pkgs = tokenize_and_extract("cargo build --release");
1140        assert!(pkgs.is_empty());
1141    }
1142
1143    #[test]
1144    fn gem_install() {
1145        let pkgs = tokenize_and_extract("gem install rails");
1146        assert_eq!(pkgs.len(), 1);
1147        assert_eq!(pkgs[0].ecosystem, Ecosystem::RubyGems);
1148        assert_eq!(pkgs[0].name, "rails");
1149    }
1150
1151    #[test]
1152    fn gem_install_with_version_flag() {
1153        let pkgs = tokenize_and_extract("gem install rails --version 7.0.0");
1154        assert_eq!(pkgs.len(), 1);
1155        assert_eq!(pkgs[0].name, "rails");
1156        assert_eq!(pkgs[0].version, Some("7.0.0".to_string()));
1157    }
1158
1159    #[test]
1160    fn gem_install_with_colon_version() {
1161        let pkgs = tokenize_and_extract("gem install rails:7.0.0");
1162        assert_eq!(pkgs.len(), 1);
1163        assert_eq!(pkgs[0].name, "rails");
1164        assert_eq!(pkgs[0].version, Some("7.0.0".to_string()));
1165    }
1166
1167    #[test]
1168    fn go_get() {
1169        let pkgs = tokenize_and_extract("go get github.com/gin-gonic/gin");
1170        assert_eq!(pkgs.len(), 1);
1171        assert_eq!(pkgs[0].ecosystem, Ecosystem::Go);
1172        assert_eq!(pkgs[0].name, "github.com/gin-gonic/gin");
1173        assert_eq!(pkgs[0].version, None);
1174    }
1175
1176    #[test]
1177    fn go_get_with_version() {
1178        let pkgs = tokenize_and_extract("go get github.com/gin-gonic/gin@v1.9.1");
1179        assert_eq!(pkgs.len(), 1);
1180        assert_eq!(pkgs[0].name, "github.com/gin-gonic/gin");
1181        assert_eq!(pkgs[0].version, Some("v1.9.1".to_string()));
1182    }
1183
1184    #[test]
1185    fn go_install() {
1186        let pkgs = tokenize_and_extract("go install golang.org/x/tools/gopls@latest");
1187        assert_eq!(pkgs.len(), 1);
1188        assert_eq!(pkgs[0].name, "golang.org/x/tools/gopls");
1189        assert_eq!(pkgs[0].version, Some("latest".to_string()));
1190    }
1191
1192    #[test]
1193    fn composer_require() {
1194        let pkgs = tokenize_and_extract("composer require monolog/monolog");
1195        assert_eq!(pkgs.len(), 1);
1196        assert_eq!(pkgs[0].ecosystem, Ecosystem::Packagist);
1197        assert_eq!(pkgs[0].name, "monolog/monolog");
1198        assert_eq!(pkgs[0].version, None);
1199    }
1200
1201    #[test]
1202    fn composer_require_with_version() {
1203        let pkgs = tokenize_and_extract("composer require monolog/monolog:^3.0");
1204        assert_eq!(pkgs.len(), 1);
1205        assert_eq!(pkgs[0].name, "monolog/monolog");
1206        assert_eq!(pkgs[0].version, Some("^3.0".to_string()));
1207    }
1208
1209    #[test]
1210    fn dotnet_add_package() {
1211        let pkgs = tokenize_and_extract("dotnet add package Newtonsoft.Json");
1212        assert_eq!(pkgs.len(), 1);
1213        assert_eq!(pkgs[0].ecosystem, Ecosystem::NuGet);
1214        assert_eq!(pkgs[0].name, "Newtonsoft.Json");
1215        assert_eq!(pkgs[0].version, None);
1216    }
1217
1218    #[test]
1219    fn dotnet_add_package_with_version() {
1220        let pkgs = tokenize_and_extract("dotnet add package Newtonsoft.Json --version 13.0.3");
1221        assert_eq!(pkgs.len(), 1);
1222        assert_eq!(pkgs[0].name, "Newtonsoft.Json");
1223        assert_eq!(pkgs[0].version, Some("13.0.3".to_string()));
1224    }
1225
1226    #[test]
1227    fn no_packages_in_ls() {
1228        let pkgs = tokenize_and_extract("ls -la");
1229        assert!(pkgs.is_empty());
1230    }
1231
1232    #[test]
1233    fn no_packages_in_echo() {
1234        let pkgs = tokenize_and_extract("echo hello world");
1235        assert!(pkgs.is_empty());
1236    }
1237
1238    #[test]
1239    fn piped_commands_both_extracted() {
1240        let pkgs = tokenize_and_extract("pip install requests && npm install lodash");
1241        assert_eq!(pkgs.len(), 2);
1242        assert_eq!(pkgs[0].ecosystem, Ecosystem::PyPI);
1243        assert_eq!(pkgs[0].name, "requests");
1244        assert_eq!(pkgs[1].ecosystem, Ecosystem::Npm);
1245        assert_eq!(pkgs[1].name, "lodash");
1246    }
1247
1248    #[test]
1249    fn ipv4_bare() {
1250        let ip = extract_ipv4_from_token("1.2.3.4");
1251        assert_eq!(ip, Some(Ipv4Addr::new(1, 2, 3, 4)));
1252    }
1253
1254    #[test]
1255    fn ipv4_with_user() {
1256        let ip = extract_ipv4_from_token("user@192.168.1.1");
1257        assert_eq!(ip, Some(Ipv4Addr::new(192, 168, 1, 1)));
1258    }
1259
1260    #[test]
1261    fn ipv4_with_port() {
1262        let ip = extract_ipv4_from_token("10.0.0.1:22");
1263        assert_eq!(ip, Some(Ipv4Addr::new(10, 0, 0, 1)));
1264    }
1265
1266    #[test]
1267    fn ipv4_with_user_and_port() {
1268        let ip = extract_ipv4_from_token("root@10.0.0.1:22");
1269        assert_eq!(ip, Some(Ipv4Addr::new(10, 0, 0, 1)));
1270    }
1271
1272    #[test]
1273    fn ipv4_localhost() {
1274        let ip = extract_ipv4_from_token("127.0.0.1");
1275        assert_eq!(ip, Some(Ipv4Addr::new(127, 0, 0, 1)));
1276    }
1277
1278    #[test]
1279    fn ipv4_not_an_ip() {
1280        let ip = extract_ipv4_from_token("hello");
1281        assert!(ip.is_none());
1282    }
1283
1284    #[test]
1285    fn ipv4_partial_not_valid() {
1286        let ip = extract_ipv4_from_token("1.2.3");
1287        assert!(ip.is_none());
1288    }
1289
1290    #[test]
1291    fn ipv4_out_of_range() {
1292        let ip = extract_ipv4_from_token("999.999.999.999");
1293        assert!(ip.is_none());
1294    }
1295
1296    #[test]
1297    fn ipv6_not_matched() {
1298        assert!(extract_ipv4_from_token("::1").is_none());
1299        assert!(extract_ipv4_from_token("2001:db8::1").is_none());
1300        assert!(extract_ipv4_from_token("fe80::1%eth0").is_none());
1301    }
1302
1303    #[test]
1304    fn ipv4_empty_string() {
1305        let ip = extract_ipv4_from_token("");
1306        assert!(ip.is_none());
1307    }
1308
1309    #[test]
1310    fn ipv4_in_brackets() {
1311        let ip = extract_ipv4_from_token("[10.0.0.1]");
1312        assert_eq!(ip, Some(Ipv4Addr::new(10, 0, 0, 1)));
1313    }
1314
1315    #[test]
1316    fn check_returns_empty_without_db() {
1317        let findings = check("pip install malicious-pkg", ShellType::Posix, &[], None);
1318        assert!(findings.is_empty(), "check() must be fail-open without DB");
1319    }
1320
1321    #[test]
1322    fn hostname_rule_urlhaus_maps_to_malicious_url() {
1323        let (rule, sev, _) = hostname_rule_for_source(threatdb::ThreatSource::Urlhaus);
1324        assert_eq!(rule, RuleId::ThreatMaliciousUrl);
1325        assert_eq!(sev, Severity::High);
1326    }
1327
1328    #[test]
1329    fn hostname_rule_phishing_sources_map_to_phishing_url() {
1330        for source in [
1331            threatdb::ThreatSource::PhishingArmy,
1332            threatdb::ThreatSource::PhishTank,
1333        ] {
1334            let (rule, sev, _) = hostname_rule_for_source(source);
1335            assert_eq!(rule, RuleId::ThreatPhishingUrl);
1336            assert_eq!(sev, Severity::High);
1337        }
1338    }
1339
1340    #[test]
1341    fn hostname_rule_threatfox_maps_to_ioc() {
1342        let (rule, sev, _) = hostname_rule_for_source(threatdb::ThreatSource::ThreatFoxIoc);
1343        assert_eq!(rule, RuleId::ThreatThreatFoxIoc);
1344        assert_eq!(sev, Severity::High);
1345    }
1346
1347    #[test]
1348    fn ip_rule_tor_exit_maps_to_medium() {
1349        let (rule, sev, _) = ip_rule_for_source(threatdb::ThreatSource::TorExit);
1350        assert_eq!(rule, RuleId::ThreatTorExitNode);
1351        assert_eq!(sev, Severity::Medium);
1352    }
1353
1354    #[test]
1355    fn ip_rule_threatfox_maps_to_ioc() {
1356        let (rule, sev, _) = ip_rule_for_source(threatdb::ThreatSource::ThreatFoxIoc);
1357        assert_eq!(rule, RuleId::ThreatThreatFoxIoc);
1358        assert_eq!(sev, Severity::High);
1359    }
1360
1361    #[test]
1362    fn ip_rule_feodo_maps_to_malicious_ip() {
1363        let (rule, sev, _) = ip_rule_for_source(threatdb::ThreatSource::FeodoTracker);
1364        assert_eq!(rule, RuleId::ThreatMaliciousIp);
1365        assert_eq!(sev, Severity::High);
1366    }
1367}