aegis-scan 0.2.0

Supply chain security CLI for npm — detect malicious packages before installing
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
use crate::registry::package::{Maintainer, PackageMetadata};
use crate::types::{Finding, FindingCategory, Severity};
use std::collections::HashSet;

/// Analyzes npm package maintainer metadata for suspicious changes.
///
/// This analyzer works on registry metadata (not file contents), so it does
/// **not** implement the `Analyzer` trait.  Call [`MaintainerAnalyzer::analyze`]
/// directly with a [`PackageMetadata`] value.
pub struct MaintainerAnalyzer;

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

/// Return a canonical lowercase key for a maintainer — prefer the npm username,
/// fall back to email.
fn maintainer_key(m: &Maintainer) -> String {
    m.name
        .as_deref()
        .or(m.email.as_deref())
        .unwrap_or("unknown")
        .to_lowercase()
}

/// Extract the domain part of an email (everything after the last '@').
fn email_domain(email: &str) -> Option<&str> {
    email.rsplit_once('@').map(|(_, domain)| domain)
}

/// Very lightweight ISO-8601 parser — returns (year, month, day) or `None`.
/// Expects strings that start with `YYYY-MM-DD` (the npm registry format).
fn parse_ymd(ts: &str) -> Option<(i32, u32, u32)> {
    if ts.len() < 10 {
        return None;
    }
    let y: i32 = ts[..4].parse().ok()?;
    let m: u32 = ts[5..7].parse().ok()?;
    let d: u32 = ts[8..10].parse().ok()?;
    Some((y, m, d))
}

/// Return an approximate day-ordinal so we can compare two dates.
/// Not astronomically precise, but good enough for a 7-day window.
fn day_ordinal(y: i32, m: u32, d: u32) -> i64 {
    let y = y as i64;
    let m = m as i64;
    let d = d as i64;
    y * 365 + y / 4 - y / 100 + y / 400 + (m * 30) + d
}

/// Return `true` if the ISO timestamp is within `days` of `reference_ts`.
fn is_within_days(ts: &str, reference_ts: &str, days: i64) -> bool {
    let Some((ry, rm, rd)) = parse_ymd(reference_ts) else {
        return false;
    };
    let Some((ty, tm, td)) = parse_ymd(ts) else {
        return false;
    };
    let ref_ord = day_ordinal(ry, rm, rd);
    let ts_ord = day_ordinal(ty, tm, td);
    (ref_ord - ts_ord).abs() <= days
}

/// Return versions sorted by their publish time (ascending), paired with their
/// timestamp.  Skips the special `created` / `modified` keys that npm includes
/// in the `time` map.
fn versions_by_time(meta: &PackageMetadata) -> Vec<(String, String)> {
    let skip: HashSet<&str> = ["created", "modified"].into_iter().collect();
    let mut pairs: Vec<(String, String)> = meta
        .time
        .iter()
        .filter(|(k, _)| !skip.contains(k.as_str()))
        .map(|(k, v)| (k.clone(), v.clone()))
        .collect();
    pairs.sort_by(|a, b| a.1.cmp(&b.1));
    pairs
}

// ---------------------------------------------------------------------------
// Analyzer
// ---------------------------------------------------------------------------

impl MaintainerAnalyzer {
    /// Analyze the package metadata for suspicious maintainer changes.
    pub fn analyze(&self, metadata: &PackageMetadata) -> Vec<Finding> {
        let mut findings: Vec<Finding> = Vec::new();

        let current_maintainers = match &metadata.maintainers {
            Some(m) if !m.is_empty() => m,
            _ => return findings,
        };

        let pkg_name = metadata.name.as_deref().unwrap_or("<unknown>");

        // ---- Derive a "today" reference from the most recent publish time ----
        let sorted_versions = versions_by_time(metadata);
        let latest_ts = sorted_versions.last().map(|(_, ts)| ts.as_str());

        // ---- LOW: single maintainer (bus factor) ----------------------------
        if current_maintainers.len() == 1 {
            findings.push(Finding {
                severity: Severity::Low,
                category: FindingCategory::MaintainerChange,
                title: "Single maintainer (bus factor risk)".into(),
                description: format!(
                    "Package `{pkg_name}` has only 1 maintainer ({}). \
                     If the account is compromised there is no second party to notice.",
                    maintainer_key(&current_maintainers[0]),
                ),
                file: None,
                line: None,
                snippet: None,
            });
        }

        // ---- Compare maintainers across the last two versions ---------------
        if sorted_versions.len() >= 2 {
            let prev_ver = &sorted_versions[sorted_versions.len() - 2].0;
            let latest_ver = &sorted_versions[sorted_versions.len() - 1].0;

            let prev_maintainers = metadata
                .versions
                .get(prev_ver)
                .and_then(|v| v.maintainers.as_ref());

            let latest_maintainers = metadata
                .versions
                .get(latest_ver)
                .and_then(|v| v.maintainers.as_ref())
                .or(Some(current_maintainers));

            if let (Some(prev), Some(curr)) = (prev_maintainers, latest_maintainers) {
                let prev_keys: HashSet<String> = prev.iter().map(maintainer_key).collect();
                let curr_keys: HashSet<String> = curr.iter().map(maintainer_key).collect();

                let added: Vec<&String> = curr_keys.difference(&prev_keys).collect();
                let removed: Vec<&String> = prev_keys.difference(&curr_keys).collect();

                // CRITICAL: ownership transferred to entirely new maintainer
                if !prev_keys.is_empty()
                    && !curr_keys.is_empty()
                    && prev_keys.is_disjoint(&curr_keys)
                {
                    findings.push(Finding {
                        severity: Severity::Critical,
                        category: FindingCategory::MaintainerChange,
                        title: "Complete ownership transfer".into(),
                        description: format!(
                            "Package `{pkg_name}` ownership was transferred between versions \
                             {prev_ver} and {latest_ver}. Previous maintainers ({}) were \
                             completely replaced by new maintainers ({}).",
                            prev_keys.iter().cloned().collect::<Vec<_>>().join(", "),
                            curr_keys.iter().cloned().collect::<Vec<_>>().join(", "),
                        ),
                        file: None,
                        line: None,
                        snippet: None,
                    });
                }

                // HIGH: all previous maintainers removed and replaced (full takeover)
                // (overlaps with critical — only emit if there IS some intersection)
                if !prev_keys.is_empty()
                    && !removed.is_empty()
                    && removed.len() == prev_keys.len()
                    && !prev_keys.is_disjoint(&curr_keys)
                {
                    findings.push(Finding {
                        severity: Severity::High,
                        category: FindingCategory::MaintainerChange,
                        title: "All previous maintainers removed".into(),
                        description: format!(
                            "Every maintainer present in version {prev_ver} was removed by \
                             version {latest_ver}. Removed: {}. Current: {}.",
                            removed
                                .iter()
                                .map(|s| s.as_str())
                                .collect::<Vec<_>>()
                                .join(", "),
                            curr_keys.iter().cloned().collect::<Vec<_>>().join(", "),
                        ),
                        file: None,
                        line: None,
                        snippet: None,
                    });
                }

                // HIGH: new maintainer added in the last 7 days
                if !added.is_empty() {
                    if let Some(ref_ts) = latest_ts {
                        let latest_publish_ts = &sorted_versions[sorted_versions.len() - 1].1;
                        // "today" is approximated as the most-recent publish time.
                        if is_within_days(latest_publish_ts, ref_ts, 7) {
                            findings.push(Finding {
                                severity: Severity::High,
                                category: FindingCategory::MaintainerChange,
                                title: "New maintainer added recently".into(),
                                description: format!(
                                    "New maintainer(s) ({}) were added to `{pkg_name}` in the \
                                     latest version published on {latest_publish_ts}.",
                                    added
                                        .iter()
                                        .map(|s| s.as_str())
                                        .collect::<Vec<_>>()
                                        .join(", "),
                                ),
                                file: None,
                                line: None,
                                snippet: None,
                            });
                        }
                    }
                }

                // MEDIUM: any new maintainer added (even if old ones remain)
                if !added.is_empty() {
                    findings.push(Finding {
                        severity: Severity::Medium,
                        category: FindingCategory::MaintainerChange,
                        title: "New maintainer added".into(),
                        description: format!(
                            "Maintainer(s) added between versions {prev_ver} and {latest_ver}: {}.",
                            added
                                .iter()
                                .map(|s| s.as_str())
                                .collect::<Vec<_>>()
                                .join(", "),
                        ),
                        file: None,
                        line: None,
                        snippet: None,
                    });
                }

                // MEDIUM: maintainer email domain changed
                check_email_domain_changes(prev, curr, pkg_name, &mut findings);
            }
        }

        // HIGH: maintainer with no other packages (brand-new npm account)
        // We cannot query the registry for other packages here, but we can flag
        // maintainers whose npm username looks like a throwaway (no email or
        // free-mail provider combined with a single-package context).  This is a
        // heuristic stand-in; a full implementation would query the npm user API.
        for m in current_maintainers {
            if let Some(email) = m.email.as_deref() {
                if email.is_empty() {
                    findings.push(Finding {
                        severity: Severity::High,
                        category: FindingCategory::MaintainerChange,
                        title: "Maintainer with no email".into(),
                        description: format!(
                            "Maintainer `{}` on `{pkg_name}` has no email address, which may \
                             indicate a brand-new or throwaway npm account.",
                            maintainer_key(m),
                        ),
                        file: None,
                        line: None,
                        snippet: None,
                    });
                }
            } else {
                findings.push(Finding {
                    severity: Severity::High,
                    category: FindingCategory::MaintainerChange,
                    title: "Maintainer with no email".into(),
                    description: format!(
                        "Maintainer `{}` on `{pkg_name}` has no email address, which may \
                         indicate a brand-new or throwaway npm account.",
                        maintainer_key(m),
                    ),
                    file: None,
                    line: None,
                    snippet: None,
                });
            }
        }

        findings
    }
}

/// Detect email domain changes between previous and current maintainer lists.
fn check_email_domain_changes(
    prev: &[Maintainer],
    curr: &[Maintainer],
    pkg_name: &str,
    findings: &mut Vec<Finding>,
) {
    // Build a map: maintainer key -> email domain for the previous version.
    let prev_domains: std::collections::HashMap<String, String> = prev
        .iter()
        .filter_map(|m| {
            let email = m.email.as_deref()?;
            let domain = email_domain(email)?;
            Some((maintainer_key(m), domain.to_lowercase()))
        })
        .collect();

    for m in curr {
        let key = maintainer_key(m);
        if let Some(old_domain) = prev_domains.get(&key) {
            if let Some(email) = m.email.as_deref() {
                if let Some(new_domain) = email_domain(email) {
                    let new_domain_lower = new_domain.to_lowercase();
                    if *old_domain != new_domain_lower {
                        findings.push(Finding {
                            severity: Severity::Medium,
                            category: FindingCategory::MaintainerChange,
                            title: "Maintainer email domain changed".into(),
                            description: format!(
                                "Maintainer `{key}` on `{pkg_name}` changed email domain \
                                 from @{old_domain} to @{new_domain_lower}.",
                            ),
                            file: None,
                            line: None,
                            snippet: None,
                        });
                    }
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::registry::package::{Maintainer, PackageMetadata, VersionInfo};
    use std::collections::HashMap;

    fn make_maintainer(name: &str, email: &str) -> Maintainer {
        Maintainer {
            name: Some(name.into()),
            email: Some(email.into()),
        }
    }

    fn make_version(maintainers: Vec<Maintainer>) -> VersionInfo {
        VersionInfo {
            name: None,
            version: None,
            description: None,
            dist: None,
            scripts: None,
            dependencies: None,
            dev_dependencies: None,
            maintainers: Some(maintainers),
            extra: HashMap::new(),
        }
    }

    fn base_metadata() -> PackageMetadata {
        PackageMetadata {
            name: Some("test-pkg".into()),
            description: None,
            versions: HashMap::new(),
            time: HashMap::new(),
            maintainers: None,
            dist_tags: None,
            extra: HashMap::new(),
        }
    }

    #[test]
    fn single_maintainer_bus_factor() {
        let mut meta = base_metadata();
        meta.maintainers = Some(vec![make_maintainer("alice", "alice@example.com")]);

        let findings = MaintainerAnalyzer.analyze(&meta);
        assert!(findings
            .iter()
            .any(|f| f.severity == Severity::Low && f.title.contains("Single maintainer")));
    }

    #[test]
    fn complete_ownership_transfer() {
        let mut meta = base_metadata();
        meta.maintainers = Some(vec![make_maintainer("eve", "eve@evil.com")]);
        meta.versions.insert(
            "1.0.0".into(),
            make_version(vec![make_maintainer("alice", "alice@co.com")]),
        );
        meta.versions.insert(
            "1.0.1".into(),
            make_version(vec![make_maintainer("eve", "eve@evil.com")]),
        );
        meta.time
            .insert("1.0.0".into(), "2026-01-01T00:00:00Z".into());
        meta.time
            .insert("1.0.1".into(), "2026-03-30T00:00:00Z".into());

        let findings = MaintainerAnalyzer.analyze(&meta);
        assert!(findings
            .iter()
            .any(|f| f.severity == Severity::Critical && f.title.contains("ownership transfer")));
    }

    #[test]
    fn new_maintainer_added() {
        let mut meta = base_metadata();
        meta.maintainers = Some(vec![
            make_maintainer("alice", "alice@co.com"),
            make_maintainer("bob", "bob@co.com"),
        ]);
        meta.versions.insert(
            "1.0.0".into(),
            make_version(vec![make_maintainer("alice", "alice@co.com")]),
        );
        meta.versions.insert(
            "1.0.1".into(),
            make_version(vec![
                make_maintainer("alice", "alice@co.com"),
                make_maintainer("bob", "bob@co.com"),
            ]),
        );
        meta.time
            .insert("1.0.0".into(), "2026-01-01T00:00:00Z".into());
        meta.time
            .insert("1.0.1".into(), "2026-03-30T00:00:00Z".into());

        let findings = MaintainerAnalyzer.analyze(&meta);
        assert!(findings
            .iter()
            .any(|f| f.severity == Severity::Medium && f.title.contains("New maintainer added")));
    }

    #[test]
    fn email_domain_change() {
        let mut meta = base_metadata();
        meta.maintainers = Some(vec![make_maintainer("alice", "alice@gmail.com")]);
        meta.versions.insert(
            "1.0.0".into(),
            make_version(vec![make_maintainer("alice", "alice@company.com")]),
        );
        meta.versions.insert(
            "1.0.1".into(),
            make_version(vec![make_maintainer("alice", "alice@gmail.com")]),
        );
        meta.time
            .insert("1.0.0".into(), "2026-01-01T00:00:00Z".into());
        meta.time
            .insert("1.0.1".into(), "2026-03-30T00:00:00Z".into());

        let findings = MaintainerAnalyzer.analyze(&meta);
        assert!(findings
            .iter()
            .any(|f| f.severity == Severity::Medium && f.title.contains("email domain changed")));
    }
}