1use std::net::Ipv4Addr;
33use std::path::{Path, PathBuf};
34use std::sync::atomic::{AtomicU64, Ordering};
35use std::sync::{Arc, OnceLock, RwLock};
36
37use ed25519_dalek::{Signature, VerifyingKey, PUBLIC_KEY_LENGTH, SIGNATURE_LENGTH};
38use sha2::{Digest, Sha256};
39use thiserror::Error;
40
41use crate::policy;
42use crate::util::levenshtein;
43
44const MAGIC: &[u8; 8] = b"TIRITHDB";
45const FORMAT_VERSION: u32 = 1;
46const HEADER_SIZE: usize = 172;
48const SIG_OFFSET: usize = 108;
50const FINGERPRINT_OFFSET: usize = 76;
52const FINGERPRINT_LEN: usize = 32;
53const DB_FILENAME: &str = "tirith-threatdb.dat";
54const SUPPLEMENTAL_DB_FILENAME: &str = "tirith-threatdb-supplemental.dat";
55const MTIME_CHECK_INTERVAL_SECS: u64 = 60;
57
58static VERIFY_KEY_BYTES: &[u8; PUBLIC_KEY_LENGTH] =
61 include_bytes!("../assets/keys/threatdb-verify.pub");
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
65#[repr(u8)]
66pub enum Ecosystem {
67 Npm = 0,
68 PyPI = 1,
69 RubyGems = 2,
70 Crates = 3,
71 Go = 4,
72 Maven = 5,
73 NuGet = 6,
74 Packagist = 7,
75}
76
77impl std::fmt::Display for Ecosystem {
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 match self {
80 Ecosystem::Npm => write!(f, "npm"),
81 Ecosystem::PyPI => write!(f, "pypi"),
82 Ecosystem::RubyGems => write!(f, "rubygems"),
83 Ecosystem::Crates => write!(f, "crates.io"),
84 Ecosystem::Go => write!(f, "go"),
85 Ecosystem::Maven => write!(f, "maven"),
86 Ecosystem::NuGet => write!(f, "nuget"),
87 Ecosystem::Packagist => write!(f, "packagist"),
88 }
89 }
90}
91
92impl Ecosystem {
93 fn from_u8(v: u8) -> Option<Self> {
94 match v {
95 0 => Some(Self::Npm),
96 1 => Some(Self::PyPI),
97 2 => Some(Self::RubyGems),
98 3 => Some(Self::Crates),
99 4 => Some(Self::Go),
100 5 => Some(Self::Maven),
101 6 => Some(Self::NuGet),
102 7 => Some(Self::Packagist),
103 _ => None,
104 }
105 }
106
107 pub fn from_name(s: &str) -> Option<Self> {
110 match s.to_lowercase().as_str() {
111 "npm" => Some(Self::Npm),
112 "pypi" => Some(Self::PyPI),
113 "rubygems" => Some(Self::RubyGems),
114 "crates.io" | "crates" | "cargo" => Some(Self::Crates),
115 "go" => Some(Self::Go),
116 "maven" => Some(Self::Maven),
117 "nuget" => Some(Self::NuGet),
118 "packagist" => Some(Self::Packagist),
119 _ => None,
120 }
121 }
122}
123
124#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
126#[repr(u8)]
127pub enum ThreatSource {
128 OssfMalicious = 0,
129 DatadogMalicious = 1,
130 FeodoTracker = 2,
131 EcosystemsTyposquat = 3,
132 CisaKev = 4,
133 Urlhaus = 5,
134 PhishingArmy = 6,
135 PhishTank = 7,
136 ThreatFoxIoc = 8,
137 FireholIp = 9,
138 TorExit = 10,
139}
140
141impl ThreatSource {
142 fn from_u8(v: u8) -> Option<Self> {
143 match v {
144 0 => Some(Self::OssfMalicious),
145 1 => Some(Self::DatadogMalicious),
146 2 => Some(Self::FeodoTracker),
147 3 => Some(Self::EcosystemsTyposquat),
148 4 => Some(Self::CisaKev),
149 5 => Some(Self::Urlhaus),
150 6 => Some(Self::PhishingArmy),
151 7 => Some(Self::PhishTank),
152 8 => Some(Self::ThreatFoxIoc),
153 9 => Some(Self::FireholIp),
154 10 => Some(Self::TorExit),
155 _ => None,
156 }
157 }
158
159 pub fn label(&self) -> &'static str {
161 match self {
162 Self::OssfMalicious => "OSSF Malicious Packages",
163 Self::DatadogMalicious => "Datadog Malicious Packages",
164 Self::FeodoTracker => "Feodo Tracker",
165 Self::EcosystemsTyposquat => "ecosyste.ms Typosquats",
166 Self::CisaKev => "CISA KEV",
167 Self::Urlhaus => "URLhaus",
168 Self::PhishingArmy => "Phishing Army",
169 Self::PhishTank => "PhishTank",
170 Self::ThreatFoxIoc => "ThreatFox IOC",
171 Self::FireholIp => "FireHOL IP",
172 Self::TorExit => "Tor Exit Node",
173 }
174 }
175
176 pub fn default_confidence(self) -> Confidence {
179 match self {
180 Self::TorExit => Confidence::Medium,
181 Self::OssfMalicious
182 | Self::DatadogMalicious
183 | Self::FeodoTracker
184 | Self::EcosystemsTyposquat
185 | Self::CisaKev
186 | Self::Urlhaus
187 | Self::PhishingArmy
188 | Self::PhishTank
189 | Self::ThreatFoxIoc
190 | Self::FireholIp => Confidence::Confirmed,
191 }
192 }
193}
194
195#[derive(
197 Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
198)]
199#[serde(rename_all = "lowercase")]
200#[repr(u8)]
201pub enum Confidence {
202 Low = 0,
203 Medium = 1,
204 Confirmed = 2,
205}
206
207impl Confidence {
208 fn from_u8(v: u8) -> Option<Self> {
209 match v {
210 0 => Some(Self::Low),
211 1 => Some(Self::Medium),
212 2 => Some(Self::Confirmed),
213 _ => None,
214 }
215 }
216}
217
218#[derive(Debug, Clone)]
224pub struct ThreatMatch {
225 pub ecosystem: Option<Ecosystem>,
226 pub name: String,
227 pub source: ThreatSource,
228 pub confidence: Confidence,
229 pub reference_url: Option<String>,
230 pub all_versions_malicious: bool,
231}
232
233#[derive(Debug, Clone)]
235pub struct TyposquatMatch {
236 pub ecosystem: Ecosystem,
237 pub malicious_name: String,
238 pub target_name: String,
239}
240
241#[derive(Debug, Clone, Default)]
243pub struct ThreatDbStats {
244 pub format_version: u32,
245 pub build_timestamp: u64,
246 pub build_sequence: u64,
247 pub package_count: u32,
248 pub hostname_count: u32,
249 pub ip_count: u32,
250 pub typosquat_count: u32,
251 pub popular_count: u32,
252 pub string_table_bytes: u32,
253}
254
255#[derive(Debug, Error)]
256pub enum ThreatDbError {
257 #[error("invalid magic: expected TIRITHDB")]
258 InvalidMagic,
259 #[error("unsupported format version {0}")]
260 UnsupportedVersion(u32),
261 #[error("file too small: {0} bytes, need at least {HEADER_SIZE}")]
262 FileTooSmall(usize),
263 #[error("section offset/count out of bounds")]
264 SectionOutOfBounds,
265 #[error("invalid signature")]
266 InvalidSignature,
267 #[error("signer fingerprint mismatch")]
268 FingerprintMismatch,
269 #[error("rollback detected: sequence {got} <= current {current}")]
270 RollbackDetected { got: u64, current: u64 },
271 #[error("I/O error: {0}")]
272 Io(#[from] std::io::Error),
273 #[error("invalid record at offset {0}")]
274 InvalidRecord(usize),
275 #[error("string table offset out of bounds: {0}")]
276 StringOutOfBounds(u32),
277}
278
279const PKG_INDEX_ENTRY_SIZE: usize = 8;
299
300const IP_RECORD_SIZE: usize = 5;
302
303const TYPOSQUAT_INDEX_ENTRY_SIZE: usize = 8;
305
306const POPULAR_INDEX_ENTRY_SIZE: usize = 8;
308
309const HOSTNAME_INDEX_ENTRY_SIZE: usize = 8;
311
312fn fnv1a_hash(data: &[u8]) -> u32 {
314 let mut h: u32 = 0x811c_9dc5;
315 for &b in data {
316 h ^= b as u32;
317 h = h.wrapping_mul(0x0100_0193);
318 }
319 h
320}
321
322fn pkg_key_hash(eco: Ecosystem, name: &[u8]) -> u32 {
323 let mut buf = Vec::with_capacity(1 + name.len());
324 buf.push(eco as u8);
325 buf.extend_from_slice(name);
326 fnv1a_hash(&buf)
327}
328
329fn read_u16_le(buf: &[u8], off: usize) -> Option<u16> {
330 buf.get(off..off + 2)
331 .map(|b| u16::from_le_bytes([b[0], b[1]]))
332}
333
334fn read_u32_le(buf: &[u8], off: usize) -> Option<u32> {
335 buf.get(off..off + 4)
336 .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
337}
338
339fn read_u64_le(buf: &[u8], off: usize) -> Option<u64> {
340 buf.get(off..off + 8)
341 .map(|b| u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]))
342}
343
344#[derive(Debug)]
346pub struct ThreatDb {
347 data: Vec<u8>,
348 supplemental: Option<Box<ThreatDb>>,
349 format_version: u32,
351 build_timestamp: u64,
352 build_sequence: u64,
353 pkg_index_offset: u32,
355 pkg_index_count: u32,
356 hostname_index_offset: u32,
357 hostname_index_count: u32,
358 ip_offset: u32,
359 ip_count: u32,
360 typosquat_index_offset: u32,
361 typosquat_index_count: u32,
362 popular_index_offset: u32,
363 popular_index_count: u32,
364 string_table_offset: u32,
365 string_table_size: u32,
366}
367
368impl ThreatDb {
369 pub fn from_bytes(data: Vec<u8>, min_sequence: u64) -> Result<Self, ThreatDbError> {
374 if data.len() < HEADER_SIZE {
375 return Err(ThreatDbError::FileTooSmall(data.len()));
376 }
377
378 if &data[0..8] != MAGIC {
380 return Err(ThreatDbError::InvalidMagic);
381 }
382
383 let err = || ThreatDbError::InvalidRecord(0);
385 let version = read_u32_le(&data, 8).ok_or_else(err)?;
386 if version != FORMAT_VERSION {
387 return Err(ThreatDbError::UnsupportedVersion(version));
388 }
389
390 let build_timestamp = read_u64_le(&data, 12).ok_or_else(err)?;
391 let build_sequence = read_u64_le(&data, 20).ok_or_else(err)?;
392
393 if min_sequence > 0 && build_sequence <= min_sequence {
395 return Err(ThreatDbError::RollbackDetected {
396 got: build_sequence,
397 current: min_sequence,
398 });
399 }
400
401 let pkg_index_offset = read_u32_le(&data, 28).ok_or_else(err)?;
403 let pkg_index_count = read_u32_le(&data, 32).ok_or_else(err)?;
404 let hostname_index_offset = read_u32_le(&data, 36).ok_or_else(err)?;
405 let hostname_index_count = read_u32_le(&data, 40).ok_or_else(err)?;
406 let ip_offset = read_u32_le(&data, 44).ok_or_else(err)?;
407 let ip_count = read_u32_le(&data, 48).ok_or_else(err)?;
408 let typosquat_index_offset = read_u32_le(&data, 52).ok_or_else(err)?;
409 let typosquat_index_count = read_u32_le(&data, 56).ok_or_else(err)?;
410 let popular_index_offset = read_u32_le(&data, 60).ok_or_else(err)?;
411 let popular_index_count = read_u32_le(&data, 64).ok_or_else(err)?;
412 let string_table_offset = read_u32_le(&data, 68).ok_or_else(err)?;
413 let string_table_size = read_u32_le(&data, 72).ok_or_else(err)?;
414
415 let len = data.len() as u64;
417 let check_section = |off: u32, count: u32, entry_size: usize| -> bool {
418 let end = off as u64 + count as u64 * entry_size as u64;
419 end <= len
420 };
421
422 if !check_section(ip_offset, ip_count, IP_RECORD_SIZE) {
424 return Err(ThreatDbError::SectionOutOfBounds);
425 }
426
427 if !check_section(pkg_index_offset, pkg_index_count, PKG_INDEX_ENTRY_SIZE) {
432 return Err(ThreatDbError::SectionOutOfBounds);
433 }
434 if !check_section(
435 hostname_index_offset,
436 hostname_index_count,
437 HOSTNAME_INDEX_ENTRY_SIZE,
438 ) {
439 return Err(ThreatDbError::SectionOutOfBounds);
440 }
441 if !check_section(
442 typosquat_index_offset,
443 typosquat_index_count,
444 TYPOSQUAT_INDEX_ENTRY_SIZE,
445 ) {
446 return Err(ThreatDbError::SectionOutOfBounds);
447 }
448 if !check_section(
449 popular_index_offset,
450 popular_index_count,
451 POPULAR_INDEX_ENTRY_SIZE,
452 ) {
453 return Err(ThreatDbError::SectionOutOfBounds);
454 }
455
456 if (string_table_offset as u64 + string_table_size as u64) > len {
458 return Err(ThreatDbError::SectionOutOfBounds);
459 }
460
461 Ok(Self {
462 data,
463 supplemental: None,
464 format_version: version,
465 build_timestamp,
466 build_sequence,
467 pkg_index_offset,
468 pkg_index_count,
469 hostname_index_offset,
470 hostname_index_count,
471 ip_offset,
472 ip_count,
473 typosquat_index_offset,
474 typosquat_index_count,
475 popular_index_offset,
476 popular_index_count,
477 string_table_offset,
478 string_table_size,
479 })
480 }
481
482 pub fn load_from_data_dir() -> Result<Self, ThreatDbError> {
484 let path = Self::default_path().ok_or_else(|| {
485 ThreatDbError::Io(std::io::Error::new(
486 std::io::ErrorKind::NotFound,
487 "cannot determine data directory",
488 ))
489 })?;
490 Self::load_from_path(&path, 0)
491 }
492
493 pub fn load_from_path(path: &Path, min_sequence: u64) -> Result<Self, ThreatDbError> {
495 let data = std::fs::read(path)?;
496 Self::from_bytes(data, min_sequence)
497 }
498
499 pub fn default_path() -> Option<PathBuf> {
504 if let Ok(p) = std::env::var("TIRITH_THREATDB_PATH") {
505 if !p.is_empty() {
506 return Some(PathBuf::from(p));
507 }
508 }
509 policy::data_dir().map(|d| d.join(DB_FILENAME))
510 }
511
512 pub fn supplemental_path() -> Option<PathBuf> {
515 if let Ok(p) = std::env::var("TIRITH_THREATDB_SUPPLEMENTAL_PATH") {
516 if !p.is_empty() {
517 return Some(PathBuf::from(p));
518 }
519 }
520 policy::data_dir().map(|d| d.join(SUPPLEMENTAL_DB_FILENAME))
521 }
522
523 fn with_supplemental(mut self, supplemental: Option<ThreatDb>) -> Self {
524 self.supplemental = supplemental.map(Box::new);
525 self
526 }
527
528 pub fn verify_signature(&self) -> Result<(), String> {
534 let key_fingerprint = Sha256::digest(VERIFY_KEY_BYTES);
536 let stored_fp = &self.data[FINGERPRINT_OFFSET..FINGERPRINT_OFFSET + FINGERPRINT_LEN];
537 if key_fingerprint.as_slice() != stored_fp {
538 return Err("signer fingerprint does not match embedded public key".to_string());
539 }
540
541 let verify_key = VerifyingKey::from_bytes(VERIFY_KEY_BYTES)
543 .map_err(|e| format!("invalid embedded public key: {e}"))?;
544
545 let sig_bytes = &self.data[SIG_OFFSET..SIG_OFFSET + SIGNATURE_LENGTH];
547 let signature = Signature::from_slice(sig_bytes)
548 .map_err(|e| format!("invalid signature in header: {e}"))?;
549
550 let mut signed_data = Vec::with_capacity(SIG_OFFSET + (self.data.len() - HEADER_SIZE));
552 signed_data.extend_from_slice(&self.data[..SIG_OFFSET]);
553 signed_data.extend_from_slice(&self.data[HEADER_SIZE..]);
554
555 use ed25519_dalek::Verifier;
556 verify_key
557 .verify(&signed_data, &signature)
558 .map_err(|_| "Ed25519 signature verification failed".to_string())
559 }
560
561 pub fn build_time(&self) -> u64 {
562 self.build_timestamp
563 }
564
565 pub fn build_sequence(&self) -> u64 {
566 self.build_sequence
567 }
568
569 pub fn stats(&self) -> ThreatDbStats {
570 let overlay = self
571 .supplemental
572 .as_deref()
573 .map(|db| db.stats())
574 .unwrap_or_default();
575 ThreatDbStats {
576 format_version: self.format_version,
577 build_timestamp: self.build_timestamp,
578 build_sequence: self.build_sequence,
579 package_count: self.pkg_index_count + overlay.package_count,
580 hostname_count: self.hostname_index_count + overlay.hostname_count,
581 ip_count: self.ip_count + overlay.ip_count,
582 typosquat_count: self.typosquat_index_count + overlay.typosquat_count,
583 popular_count: self.popular_index_count + overlay.popular_count,
584 string_table_bytes: self.string_table_size + overlay.string_table_bytes,
585 }
586 }
587
588 fn read_string_table_entry(&self, offset: u32) -> Option<&str> {
589 if offset == 0xFFFF_FFFF {
590 return None;
591 }
592 let abs = self.string_table_offset as usize + offset as usize;
593 let len = read_u16_le(&self.data, abs)? as usize;
594 let start = abs + 2;
595 let end = start + len;
596 if end > self.data.len() {
597 return None;
598 }
599 std::str::from_utf8(&self.data[start..end]).ok()
600 }
601
602 fn pkg_index_entry(&self, idx: u32) -> Option<(u32, u32)> {
604 let base = self.pkg_index_offset as usize + idx as usize * PKG_INDEX_ENTRY_SIZE;
605 let data_off = read_u32_le(&self.data, base)?;
606 let hash = read_u32_le(&self.data, base + 4)?;
607 Some((data_off, hash))
608 }
609
610 fn parse_pkg_record(&self, off: usize) -> Option<PkgRecord<'_>> {
612 let eco = Ecosystem::from_u8(*self.data.get(off)?)?;
613 let name_len = read_u16_le(&self.data, off + 1)? as usize;
614 let name_start = off + 3;
615 let name_end = name_start + name_len;
616 if name_end + 4 > self.data.len() {
617 return None;
618 }
619 let name = std::str::from_utf8(&self.data[name_start..name_end]).ok()?;
620 let mut cursor = name_end;
621
622 let source = ThreatSource::from_u8(*self.data.get(cursor)?)?;
623 cursor += 1;
624 let confidence = Confidence::from_u8(*self.data.get(cursor)?)?;
625 cursor += 1;
626 let flags = *self.data.get(cursor)?;
627 cursor += 1;
628 let all_versions_malicious = (flags & 1) != 0;
629
630 let version_count = read_u16_le(&self.data, cursor)? as usize;
631 cursor += 2;
632
633 let mut versions = Vec::with_capacity(version_count);
634 for _ in 0..version_count {
635 let vlen = read_u16_le(&self.data, cursor)? as usize;
636 cursor += 2;
637 let vend = cursor + vlen;
638 if vend > self.data.len() {
639 return None;
640 }
641 let v = std::str::from_utf8(&self.data[cursor..vend]).ok()?;
642 versions.push(v);
643 cursor = vend;
644 }
645
646 let ref_offset = read_u32_le(&self.data, cursor)?;
647
648 Some(PkgRecord {
649 ecosystem: eco,
650 name,
651 source,
652 confidence,
653 all_versions_malicious,
654 versions,
655 reference_offset: ref_offset,
656 })
657 }
658
659 pub fn check_package(
665 &self,
666 eco: Ecosystem,
667 name: &str,
668 version: Option<&str>,
669 ) -> Option<ThreatMatch> {
670 let target_hash = pkg_key_hash(eco, name.as_bytes());
671
672 if let Some(idx) = self.binary_search_pkg_index(eco, name, target_hash) {
673 let (data_off, _) = self.pkg_index_entry(idx)?;
674 let rec = self.parse_pkg_record(data_off as usize)?;
675
676 match version {
678 Some(v) => {
679 if !rec.all_versions_malicious && !rec.versions.iter().any(|rv| rv == &v) {
680 return self
681 .supplemental
682 .as_deref()
683 .and_then(|db| db.check_package(eco, name, version));
684 }
685 }
686 None => {
687 if !rec.all_versions_malicious {
688 return self
689 .supplemental
690 .as_deref()
691 .and_then(|db| db.check_package(eco, name, version));
692 }
693 }
694 }
695
696 let reference_url = self
697 .read_string_table_entry(rec.reference_offset)
698 .map(String::from);
699
700 return Some(ThreatMatch {
701 ecosystem: Some(rec.ecosystem),
702 name: rec.name.to_string(),
703 source: rec.source,
704 confidence: rec.confidence,
705 reference_url,
706 all_versions_malicious: rec.all_versions_malicious,
707 });
708 }
709
710 self.supplemental
711 .as_deref()
712 .and_then(|db| db.check_package(eco, name, version))
713 }
714
715 fn binary_search_pkg_index(&self, eco: Ecosystem, name: &str, target_hash: u32) -> Option<u32> {
716 if self.pkg_index_count == 0 {
717 return None;
718 }
719 let mut lo: u32 = 0;
720 let mut hi: u32 = self.pkg_index_count;
721 while lo < hi {
722 let mid = lo + (hi - lo) / 2;
723 let (data_off, hash) = self.pkg_index_entry(mid)?;
724
725 match hash.cmp(&target_hash) {
727 std::cmp::Ordering::Less => lo = mid + 1,
728 std::cmp::Ordering::Greater => hi = mid,
729 std::cmp::Ordering::Equal => {
730 let rec = self.parse_pkg_record(data_off as usize)?;
732 match (rec.ecosystem as u8, rec.name).cmp(&(eco as u8, name)) {
733 std::cmp::Ordering::Equal => return Some(mid),
734 std::cmp::Ordering::Less => lo = mid + 1,
735 std::cmp::Ordering::Greater => hi = mid,
736 }
737 }
738 }
739 }
740 None
741 }
742
743 pub fn check_hostname(&self, host: &str) -> Option<ThreatMatch> {
745 if self.hostname_index_count == 0 {
746 return self
747 .supplemental
748 .as_deref()
749 .and_then(|db| db.check_hostname(host));
750 }
751 let normalized = host.to_ascii_lowercase();
752 let target_hash = fnv1a_hash(normalized.as_bytes());
753
754 let Some(idx) = self.binary_search_hostname_index(&normalized, target_hash) else {
755 return self
756 .supplemental
757 .as_deref()
758 .and_then(|db| db.check_hostname(host));
759 };
760 let base = self.hostname_index_offset as usize + idx as usize * HOSTNAME_INDEX_ENTRY_SIZE;
761 let data_off = read_u32_le(&self.data, base)? as usize;
762
763 let source = ThreatSource::from_u8(*self.data.get(data_off)?)?;
765 let name_len = read_u16_le(&self.data, data_off + 1)? as usize;
766 let name_start = data_off + 3;
767 let name_end = name_start + name_len;
768 if name_end > self.data.len() {
769 return None;
770 }
771
772 Some(ThreatMatch {
773 ecosystem: None,
774 name: normalized,
775 confidence: source.default_confidence(),
776 source,
777 reference_url: None,
778 all_versions_malicious: false,
779 })
780 }
781
782 fn binary_search_hostname_index(&self, normalized: &str, target_hash: u32) -> Option<u32> {
783 if self.hostname_index_count == 0 {
784 return None;
785 }
786 let mut lo: u32 = 0;
787 let mut hi: u32 = self.hostname_index_count;
788 while lo < hi {
789 let mid = lo + (hi - lo) / 2;
790 let base =
791 self.hostname_index_offset as usize + mid as usize * HOSTNAME_INDEX_ENTRY_SIZE;
792 let _data_off = read_u32_le(&self.data, base)?;
793 let hash = read_u32_le(&self.data, base + 4)?;
794 match hash.cmp(&target_hash) {
795 std::cmp::Ordering::Less => lo = mid + 1,
796 std::cmp::Ordering::Greater => hi = mid,
797 std::cmp::Ordering::Equal => {
798 let data_off = _data_off as usize;
800 let name_len = read_u16_le(&self.data, data_off + 1)? as usize;
801 let name_start = data_off + 3;
802 let name_end = name_start + name_len;
803 if name_end > self.data.len() {
804 return None;
805 }
806 let stored = std::str::from_utf8(&self.data[name_start..name_end]).ok()?;
807 match stored.cmp(normalized) {
808 std::cmp::Ordering::Equal => return Some(mid),
809 std::cmp::Ordering::Less => lo = mid + 1,
810 std::cmp::Ordering::Greater => hi = mid,
811 }
812 }
813 }
814 }
815 None
816 }
817
818 pub fn check_ip(&self, ip: Ipv4Addr) -> Option<ThreatMatch> {
820 if self.ip_count == 0 {
821 return self.supplemental.as_deref().and_then(|db| db.check_ip(ip));
822 }
823 let target = u32::from(ip);
824 let Some(idx) = self.binary_search_ip(target) else {
825 return self.supplemental.as_deref().and_then(|db| db.check_ip(ip));
826 };
827 let base = self.ip_offset as usize + idx as usize * IP_RECORD_SIZE;
828 let source = ThreatSource::from_u8(*self.data.get(base + 4)?)?;
829
830 Some(ThreatMatch {
831 ecosystem: None,
832 name: ip.to_string(),
833 confidence: source.default_confidence(),
834 source,
835 reference_url: None,
836 all_versions_malicious: false,
837 })
838 }
839
840 fn binary_search_ip(&self, target: u32) -> Option<u32> {
841 let mut lo: u32 = 0;
842 let mut hi: u32 = self.ip_count;
843 while lo < hi {
844 let mid = lo + (hi - lo) / 2;
845 let base = self.ip_offset as usize + mid as usize * IP_RECORD_SIZE;
846 let val = read_u32_le(&self.data, base)?;
847 match val.cmp(&target) {
848 std::cmp::Ordering::Less => lo = mid + 1,
849 std::cmp::Ordering::Greater => hi = mid,
850 std::cmp::Ordering::Equal => return Some(mid),
851 }
852 }
853 None
854 }
855
856 pub fn check_typosquat(&self, eco: Ecosystem, name: &str) -> Option<TyposquatMatch> {
858 if self.typosquat_index_count == 0 {
859 return self
860 .supplemental
861 .as_deref()
862 .and_then(|db| db.check_typosquat(eco, name));
863 }
864 let target_hash = pkg_key_hash(eco, name.as_bytes());
865 let Some(idx) = self.binary_search_typosquat_index(eco, name, target_hash) else {
866 return self
867 .supplemental
868 .as_deref()
869 .and_then(|db| db.check_typosquat(eco, name));
870 };
871 let base = self.typosquat_index_offset as usize + idx as usize * TYPOSQUAT_INDEX_ENTRY_SIZE;
872 let data_off = read_u32_le(&self.data, base)? as usize;
873
874 let _eco = Ecosystem::from_u8(*self.data.get(data_off)?)?;
877 let mut cursor = data_off + 1;
878 let mal_len = read_u16_le(&self.data, cursor)? as usize;
879 cursor += 2;
880 let mal_end = cursor + mal_len;
881 if mal_end > self.data.len() {
882 return None;
883 }
884 let malicious_name = std::str::from_utf8(&self.data[cursor..mal_end]).ok()?;
885 cursor = mal_end;
886
887 let tgt_len = read_u16_le(&self.data, cursor)? as usize;
888 cursor += 2;
889 let tgt_end = cursor + tgt_len;
890 if tgt_end > self.data.len() {
891 return None;
892 }
893 let target_name = std::str::from_utf8(&self.data[cursor..tgt_end]).ok()?;
894
895 Some(TyposquatMatch {
896 ecosystem: eco,
897 malicious_name: malicious_name.to_string(),
898 target_name: target_name.to_string(),
899 })
900 }
901
902 fn binary_search_typosquat_index(
903 &self,
904 eco: Ecosystem,
905 name: &str,
906 target_hash: u32,
907 ) -> Option<u32> {
908 let mut lo: u32 = 0;
909 let mut hi: u32 = self.typosquat_index_count;
910 while lo < hi {
911 let mid = lo + (hi - lo) / 2;
912 let base =
913 self.typosquat_index_offset as usize + mid as usize * TYPOSQUAT_INDEX_ENTRY_SIZE;
914 let _data_off = read_u32_le(&self.data, base)?;
915 let hash = read_u32_le(&self.data, base + 4)?;
916 match hash.cmp(&target_hash) {
917 std::cmp::Ordering::Less => lo = mid + 1,
918 std::cmp::Ordering::Greater => hi = mid,
919 std::cmp::Ordering::Equal => {
920 let data_off = _data_off as usize;
922 let rec_eco = Ecosystem::from_u8(*self.data.get(data_off)?)?;
923 let mal_len = read_u16_le(&self.data, data_off + 1)? as usize;
924 let mal_start = data_off + 3;
925 let mal_end = mal_start + mal_len;
926 if mal_end > self.data.len() {
927 return None;
928 }
929 let stored = std::str::from_utf8(&self.data[mal_start..mal_end]).ok()?;
930 match (rec_eco as u8, stored).cmp(&(eco as u8, name)) {
931 std::cmp::Ordering::Equal => return Some(mid),
932 std::cmp::Ordering::Less => lo = mid + 1,
933 std::cmp::Ordering::Greater => hi = mid,
934 }
935 }
936 }
937 }
938 None
939 }
940
941 pub fn check_popular_distance(&self, eco: Ecosystem, name: &str) -> Option<(String, usize)> {
944 let mut best: Option<(String, usize)> = None;
946 let max_distance = 1;
947
948 for i in 0..self.popular_index_count {
949 let base = self.popular_index_offset as usize + i as usize * POPULAR_INDEX_ENTRY_SIZE;
950 let data_off = match read_u32_le(&self.data, base) {
951 Some(v) => v as usize,
952 None => continue,
953 };
954
955 let rec_eco = match self.data.get(data_off).and_then(|&b| Ecosystem::from_u8(b)) {
957 Some(e) => e,
958 None => continue,
959 };
960 if rec_eco != eco {
961 continue;
962 }
963
964 let name_len = match read_u16_le(&self.data, data_off + 1) {
965 Some(l) => l as usize,
966 None => continue,
967 };
968 let name_start = data_off + 3;
969 let name_end = name_start + name_len;
970 if name_end > self.data.len() {
971 continue;
972 }
973 let popular_name = match std::str::from_utf8(&self.data[name_start..name_end]) {
974 Ok(s) => s,
975 Err(_) => continue,
976 };
977
978 if popular_name == name {
980 continue;
981 }
982
983 let dist = levenshtein(name, popular_name);
984 if dist <= max_distance {
985 match &best {
986 Some((_, d)) if dist < *d => {
987 best = Some((popular_name.to_string(), dist));
988 }
989 None => {
990 best = Some((popular_name.to_string(), dist));
991 }
992 _ => {}
993 }
994 }
995 }
996
997 let overlay = self
998 .supplemental
999 .as_deref()
1000 .and_then(|db| db.check_popular_distance(eco, name));
1001
1002 match (best, overlay) {
1004 (Some(a), Some(b)) if b.1 < a.1 => Some(b),
1005 (Some(a), _) => Some(a),
1006 (None, b) => b,
1007 }
1008 }
1009
1010 pub fn cached() -> Option<Arc<ThreatDb>> {
1017 let cache = CACHE.get_or_init(ThreatDbCache::new);
1018 cache.get()
1019 }
1020
1021 pub fn refresh_cache() {
1023 if let Some(cache) = CACHE.get() {
1024 cache.force_reload();
1025 }
1026 }
1027}
1028
1029struct PkgRecord<'a> {
1031 ecosystem: Ecosystem,
1032 name: &'a str,
1033 source: ThreatSource,
1034 confidence: Confidence,
1035 all_versions_malicious: bool,
1036 versions: Vec<&'a str>,
1037 reference_offset: u32,
1038}
1039
1040static CACHE: OnceLock<ThreatDbCache> = OnceLock::new();
1041
1042struct ThreatDbCache {
1043 db: RwLock<Option<Arc<ThreatDb>>>,
1044 last_mtime_check: AtomicU64,
1045 loaded_mtime: AtomicU64,
1046}
1047
1048impl ThreatDbCache {
1049 fn new() -> Self {
1050 let cache = Self {
1051 db: RwLock::new(None),
1052 last_mtime_check: AtomicU64::new(0),
1053 loaded_mtime: AtomicU64::new(0),
1054 };
1055 cache.force_reload();
1057 cache
1058 }
1059
1060 fn get(&self) -> Option<Arc<ThreatDb>> {
1061 let now = unix_now();
1062 let last_check = self.last_mtime_check.load(Ordering::Relaxed);
1063 if now.saturating_sub(last_check) >= MTIME_CHECK_INTERVAL_SECS {
1064 self.last_mtime_check.store(now, Ordering::Relaxed);
1065 if let Some(file_mtime) = combined_mtime_epoch() {
1066 if file_mtime != self.loaded_mtime.load(Ordering::Relaxed) {
1067 self.reload(file_mtime);
1068 }
1069 }
1070 }
1071 self.db.read().ok()?.clone()
1072 }
1073
1074 fn force_reload(&self) {
1075 if let Some(file_mtime) = combined_mtime_epoch() {
1076 self.reload(file_mtime);
1077 }
1078 }
1079
1080 fn reload(&self, file_mtime: u64) {
1081 let min_seq = self
1082 .db
1083 .read()
1084 .ok()
1085 .and_then(|guard| guard.as_ref().map(|db| db.build_sequence))
1086 .unwrap_or(0);
1087
1088 match ThreatDb::load_from_data_dir() {
1089 Ok(primary_db) => {
1090 if let Err(e) = primary_db.verify_signature() {
1091 eprintln!(
1092 "tirith: warning: threat DB failed signature verification, ignoring update: {e}"
1093 );
1094 return;
1095 }
1096 let supplemental_db = ThreatDb::supplemental_path()
1101 .filter(|path| path.exists())
1102 .and_then(|path| match ThreatDb::load_from_path(&path, 0) {
1103 Ok(db) => Some(db),
1104 Err(e) => {
1105 eprintln!(
1106 "tirith: warning: failed to load supplemental threat DB {}: {e}",
1107 path.display()
1108 );
1109 None
1110 }
1111 });
1112 let new_db = primary_db.with_supplemental(supplemental_db);
1113 if new_db.build_sequence > min_seq || min_seq == 0 {
1116 if let Ok(mut guard) = self.db.write() {
1117 *guard = Some(Arc::new(new_db));
1118 self.loaded_mtime.store(file_mtime, Ordering::Relaxed);
1119 }
1120 }
1121 }
1122 Err(e) => {
1123 eprintln!("tirith: warning: failed to reload threat DB: {e}");
1124 }
1125 }
1126 }
1127}
1128
1129fn unix_now() -> u64 {
1130 std::time::SystemTime::now()
1131 .duration_since(std::time::UNIX_EPOCH)
1132 .map(|d| d.as_secs())
1133 .unwrap_or(0)
1134}
1135
1136fn file_mtime_epoch() -> Option<u64> {
1137 let path = ThreatDb::default_path()?;
1138 let meta = std::fs::metadata(&path).ok()?;
1139 meta.modified()
1140 .ok()?
1141 .duration_since(std::time::UNIX_EPOCH)
1142 .ok()
1143 .map(|d| d.as_secs())
1144}
1145
1146fn combined_mtime_epoch() -> Option<u64> {
1147 let primary = file_mtime_epoch();
1148 let supplemental = ThreatDb::supplemental_path()
1149 .and_then(|path| std::fs::metadata(path).ok())
1150 .and_then(|meta| meta.modified().ok())
1151 .and_then(|mtime| mtime.duration_since(std::time::UNIX_EPOCH).ok())
1152 .map(|d| d.as_secs())
1153 .unwrap_or(0);
1154
1155 primary
1158 .map(|mtime| mtime.rotate_left(13) ^ supplemental.rotate_left(29) ^ 0x5448_5245_4154_4442)
1159}
1160
1161pub struct ThreatDbWriter {
1173 build_timestamp: u64,
1174 build_sequence: u64,
1175 packages: Vec<WriterPkg>,
1176 hostnames: Vec<WriterHostname>,
1177 ips: Vec<WriterIp>,
1178 typosquats: Vec<WriterTyposquat>,
1179 popular: Vec<WriterPopular>,
1180 string_table: StringTable,
1181}
1182
1183struct WriterPkg {
1184 ecosystem: Ecosystem,
1185 name: String,
1186 versions: Vec<String>,
1187 source: ThreatSource,
1188 confidence: Confidence,
1189 all_versions_malicious: bool,
1190 reference_offset: u32, }
1192
1193struct WriterHostname {
1194 name: String,
1195 source: ThreatSource,
1196}
1197
1198struct WriterIp {
1199 addr: u32,
1200 source: ThreatSource,
1201}
1202
1203struct WriterTyposquat {
1204 ecosystem: Ecosystem,
1205 malicious_name: String,
1206 target_name: String,
1207}
1208
1209struct WriterPopular {
1210 ecosystem: Ecosystem,
1211 name: String,
1212}
1213
1214struct StringTable {
1216 data: Vec<u8>,
1217 index: std::collections::HashMap<String, u32>,
1218}
1219
1220impl StringTable {
1221 fn new() -> Self {
1222 Self {
1223 data: Vec::new(),
1224 index: std::collections::HashMap::new(),
1225 }
1226 }
1227
1228 fn intern(&mut self, s: &str) -> u32 {
1230 if let Some(&off) = self.index.get(s) {
1231 return off;
1232 }
1233 let off = self.data.len() as u32;
1234 let bytes = s.as_bytes();
1235 self.data
1236 .extend_from_slice(&(bytes.len() as u16).to_le_bytes());
1237 self.data.extend_from_slice(bytes);
1238 self.index.insert(s.to_string(), off);
1239 off
1240 }
1241
1242 fn bytes(&self) -> &[u8] {
1243 &self.data
1244 }
1245
1246 fn len(&self) -> u32 {
1247 self.data.len() as u32
1248 }
1249}
1250
1251impl ThreatDbWriter {
1252 pub fn new(build_timestamp: u64, build_sequence: u64) -> Self {
1253 Self {
1254 build_timestamp,
1255 build_sequence,
1256 packages: Vec::new(),
1257 hostnames: Vec::new(),
1258 ips: Vec::new(),
1259 typosquats: Vec::new(),
1260 popular: Vec::new(),
1261 string_table: StringTable::new(),
1262 }
1263 }
1264
1265 #[allow(clippy::too_many_arguments)]
1266 pub fn add_package(
1267 &mut self,
1268 eco: Ecosystem,
1269 name: &str,
1270 versions: &[&str],
1271 source: ThreatSource,
1272 confidence: Confidence,
1273 all_versions_malicious: bool,
1274 reference: Option<&str>,
1275 ) {
1276 let ref_offset = match reference {
1277 Some(r) => self.string_table.intern(r),
1278 None => 0xFFFF_FFFF,
1279 };
1280 self.packages.push(WriterPkg {
1281 ecosystem: eco,
1282 name: name.to_string(),
1283 versions: versions.iter().map(|v| v.to_string()).collect(),
1284 source,
1285 confidence,
1286 all_versions_malicious,
1287 reference_offset: ref_offset,
1288 });
1289 }
1290
1291 pub fn add_hostname(&mut self, name: &str, source: ThreatSource) {
1292 self.hostnames.push(WriterHostname {
1293 name: name.to_ascii_lowercase(),
1294 source,
1295 });
1296 }
1297
1298 pub fn add_ip(&mut self, addr: Ipv4Addr, source: ThreatSource) {
1299 self.ips.push(WriterIp {
1300 addr: u32::from(addr),
1301 source,
1302 });
1303 }
1304
1305 pub fn add_typosquat(&mut self, eco: Ecosystem, malicious_name: &str, target_name: &str) {
1306 self.typosquats.push(WriterTyposquat {
1307 ecosystem: eco,
1308 malicious_name: malicious_name.to_string(),
1309 target_name: target_name.to_string(),
1310 });
1311 }
1312
1313 pub fn add_popular(&mut self, eco: Ecosystem, name: &str) {
1314 self.popular.push(WriterPopular {
1315 ecosystem: eco,
1316 name: name.to_string(),
1317 });
1318 }
1319
1320 pub fn write_to(
1322 mut self,
1323 path: &Path,
1324 signing_key: &ed25519_dalek::SigningKey,
1325 ) -> Result<(), ThreatDbError> {
1326 let bytes = self.build(signing_key)?;
1327 std::fs::write(path, bytes)?;
1328 Ok(())
1329 }
1330
1331 pub fn build(
1333 &mut self,
1334 signing_key: &ed25519_dalek::SigningKey,
1335 ) -> Result<Vec<u8>, ThreatDbError> {
1336 self.packages
1338 .sort_by(|a, b| (a.ecosystem as u8, &a.name).cmp(&(b.ecosystem as u8, &b.name)));
1339 self.packages
1340 .dedup_by(|a, b| a.ecosystem == b.ecosystem && a.name == b.name);
1341
1342 self.hostnames.sort_by(|a, b| a.name.cmp(&b.name));
1343 self.hostnames.dedup_by(|a, b| a.name == b.name);
1344
1345 self.ips.sort_by_key(|ip| ip.addr);
1346 self.ips.dedup_by_key(|ip| ip.addr);
1347
1348 self.typosquats.sort_by(|a, b| {
1349 (a.ecosystem as u8, &a.malicious_name).cmp(&(b.ecosystem as u8, &b.malicious_name))
1350 });
1351 self.typosquats
1352 .dedup_by(|a, b| a.ecosystem == b.ecosystem && a.malicious_name == b.malicious_name);
1353
1354 self.popular
1355 .sort_by(|a, b| (a.ecosystem as u8, &a.name).cmp(&(b.ecosystem as u8, &b.name)));
1356 self.popular
1357 .dedup_by(|a, b| a.ecosystem == b.ecosystem && a.name == b.name);
1358
1359 let mut pkg_data: Vec<u8> = Vec::new();
1360 let mut pkg_index: Vec<(u32, u32)> = Vec::new(); for pkg in &self.packages {
1363 let data_offset = (HEADER_SIZE + pkg_data.len()) as u32; let key_hash = pkg_key_hash(pkg.ecosystem, pkg.name.as_bytes());
1365
1366 pkg_data.push(pkg.ecosystem as u8);
1367 let name_bytes = pkg.name.as_bytes();
1368 pkg_data.extend_from_slice(&(name_bytes.len() as u16).to_le_bytes());
1369 pkg_data.extend_from_slice(name_bytes);
1370 pkg_data.push(pkg.source as u8);
1371 pkg_data.push(pkg.confidence as u8);
1372 let flags: u8 = if pkg.all_versions_malicious { 1 } else { 0 };
1373 pkg_data.push(flags);
1374 pkg_data.extend_from_slice(&(pkg.versions.len() as u16).to_le_bytes());
1375 for v in &pkg.versions {
1376 let vbytes = v.as_bytes();
1377 pkg_data.extend_from_slice(&(vbytes.len() as u16).to_le_bytes());
1378 pkg_data.extend_from_slice(vbytes);
1379 }
1380 pkg_data.extend_from_slice(&pkg.reference_offset.to_le_bytes());
1381
1382 pkg_index.push((data_offset, key_hash));
1383 }
1384
1385 let mut hostname_data: Vec<u8> = Vec::new();
1387 let mut hostname_index: Vec<(u32, u32)> = Vec::new();
1388
1389 for hn in &self.hostnames {
1390 let key_hash = fnv1a_hash(hn.name.as_bytes());
1391 let local_off = hostname_data.len();
1392
1393 hostname_data.push(hn.source as u8);
1394 let name_bytes = hn.name.as_bytes();
1395 hostname_data.extend_from_slice(&(name_bytes.len() as u16).to_le_bytes());
1396 hostname_data.extend_from_slice(name_bytes);
1397
1398 hostname_index.push((local_off as u32, key_hash));
1399 }
1400
1401 let mut typo_data: Vec<u8> = Vec::new();
1403 let mut typo_index: Vec<(u32, u32)> = Vec::new();
1404
1405 for ts in &self.typosquats {
1406 let local_off = typo_data.len();
1407 let key_hash = pkg_key_hash(ts.ecosystem, ts.malicious_name.as_bytes());
1408
1409 typo_data.push(ts.ecosystem as u8);
1410 let mal_bytes = ts.malicious_name.as_bytes();
1411 typo_data.extend_from_slice(&(mal_bytes.len() as u16).to_le_bytes());
1412 typo_data.extend_from_slice(mal_bytes);
1413 let tgt_bytes = ts.target_name.as_bytes();
1414 typo_data.extend_from_slice(&(tgt_bytes.len() as u16).to_le_bytes());
1415 typo_data.extend_from_slice(tgt_bytes);
1416
1417 typo_index.push((local_off as u32, key_hash));
1418 }
1419
1420 let mut popular_data: Vec<u8> = Vec::new();
1422 let mut popular_index: Vec<(u32, u32)> = Vec::new();
1423
1424 for pop in &self.popular {
1425 let local_off = popular_data.len();
1426 let key_hash = pkg_key_hash(pop.ecosystem, pop.name.as_bytes());
1427
1428 popular_data.push(pop.ecosystem as u8);
1429 let name_bytes = pop.name.as_bytes();
1430 popular_data.extend_from_slice(&(name_bytes.len() as u16).to_le_bytes());
1431 popular_data.extend_from_slice(name_bytes);
1432
1433 popular_index.push((local_off as u32, key_hash));
1434 }
1435
1436 let mut ip_data: Vec<u8> = Vec::with_capacity(self.ips.len() * IP_RECORD_SIZE);
1438 for ip in &self.ips {
1439 ip_data.extend_from_slice(&ip.addr.to_le_bytes());
1440 ip_data.push(ip.source as u8);
1441 }
1442
1443 let pkg_index_size = pkg_index.len() * PKG_INDEX_ENTRY_SIZE;
1448 let hostname_index_size = hostname_index.len() * HOSTNAME_INDEX_ENTRY_SIZE;
1449 let typo_index_size = typo_index.len() * TYPOSQUAT_INDEX_ENTRY_SIZE;
1450 let popular_index_size = popular_index.len() * POPULAR_INDEX_ENTRY_SIZE;
1451
1452 let mut offset = HEADER_SIZE;
1453
1454 let pkg_index_offset = offset as u32;
1455 offset += pkg_index_size;
1456 let pkg_data_offset = offset;
1457 offset += pkg_data.len();
1458
1459 let hostname_index_offset = offset as u32;
1460 offset += hostname_index_size;
1461 let hostname_data_offset = offset;
1462 offset += hostname_data.len();
1463
1464 let ip_data_offset = offset as u32;
1465 offset += ip_data.len();
1466
1467 let typo_index_offset = offset as u32;
1468 offset += typo_index_size;
1469 let typo_data_offset = offset;
1470 offset += typo_data.len();
1471
1472 let popular_index_offset = offset as u32;
1473 offset += popular_index_size;
1474 let popular_data_offset = offset;
1475 offset += popular_data.len();
1476
1477 let string_table_offset = offset as u32;
1478 for (data_off, _) in &mut pkg_index {
1482 let local_off = *data_off as usize - HEADER_SIZE;
1487 *data_off = (pkg_data_offset + local_off) as u32;
1488 }
1489
1490 for (data_off, _) in &mut hostname_index {
1491 *data_off = (hostname_data_offset + *data_off as usize) as u32;
1492 }
1493
1494 for (data_off, _) in &mut typo_index {
1495 *data_off = (typo_data_offset + *data_off as usize) as u32;
1496 }
1497
1498 for (data_off, _) in &mut popular_index {
1499 *data_off = (popular_data_offset + *data_off as usize) as u32;
1500 }
1501
1502 pkg_index.sort_by_key(|&(_, hash)| hash);
1507 hostname_index.sort_by_key(|&(_, hash)| hash);
1508 typo_index.sort_by_key(|&(_, hash)| hash);
1509
1510 let total_size = HEADER_SIZE
1511 + pkg_index_size
1512 + pkg_data.len()
1513 + hostname_index_size
1514 + hostname_data.len()
1515 + ip_data.len()
1516 + typo_index_size
1517 + typo_data.len()
1518 + popular_index_size
1519 + popular_data.len()
1520 + self.string_table.len() as usize;
1521
1522 let mut buf = vec![0u8; total_size];
1523
1524 buf[0..8].copy_from_slice(MAGIC);
1526 buf[8..12].copy_from_slice(&FORMAT_VERSION.to_le_bytes());
1527 buf[12..20].copy_from_slice(&self.build_timestamp.to_le_bytes());
1528 buf[20..28].copy_from_slice(&self.build_sequence.to_le_bytes());
1529 buf[28..32].copy_from_slice(&pkg_index_offset.to_le_bytes());
1530 buf[32..36].copy_from_slice(&(self.packages.len() as u32).to_le_bytes());
1531 buf[36..40].copy_from_slice(&hostname_index_offset.to_le_bytes());
1532 buf[40..44].copy_from_slice(&(self.hostnames.len() as u32).to_le_bytes());
1533 buf[44..48].copy_from_slice(&ip_data_offset.to_le_bytes());
1534 buf[48..52].copy_from_slice(&(self.ips.len() as u32).to_le_bytes());
1535 buf[52..56].copy_from_slice(&typo_index_offset.to_le_bytes());
1536 buf[56..60].copy_from_slice(&(self.typosquats.len() as u32).to_le_bytes());
1537 buf[60..64].copy_from_slice(&popular_index_offset.to_le_bytes());
1538 buf[64..68].copy_from_slice(&(self.popular.len() as u32).to_le_bytes());
1539 buf[68..72].copy_from_slice(&string_table_offset.to_le_bytes());
1540 buf[72..76].copy_from_slice(&self.string_table.len().to_le_bytes());
1541
1542 let fingerprint = Sha256::digest(signing_key.verifying_key().as_bytes());
1544 buf[FINGERPRINT_OFFSET..FINGERPRINT_OFFSET + FINGERPRINT_LEN].copy_from_slice(&fingerprint);
1545
1546 let mut pos = HEADER_SIZE;
1548
1549 for (data_off, hash) in &pkg_index {
1551 buf[pos..pos + 4].copy_from_slice(&data_off.to_le_bytes());
1552 buf[pos + 4..pos + 8].copy_from_slice(&hash.to_le_bytes());
1553 pos += PKG_INDEX_ENTRY_SIZE;
1554 }
1555 buf[pos..pos + pkg_data.len()].copy_from_slice(&pkg_data);
1557 pos += pkg_data.len();
1558
1559 for (data_off, hash) in &hostname_index {
1561 buf[pos..pos + 4].copy_from_slice(&data_off.to_le_bytes());
1562 buf[pos + 4..pos + 8].copy_from_slice(&hash.to_le_bytes());
1563 pos += HOSTNAME_INDEX_ENTRY_SIZE;
1564 }
1565 buf[pos..pos + hostname_data.len()].copy_from_slice(&hostname_data);
1567 pos += hostname_data.len();
1568
1569 buf[pos..pos + ip_data.len()].copy_from_slice(&ip_data);
1571 pos += ip_data.len();
1572
1573 for (data_off, hash) in &typo_index {
1575 buf[pos..pos + 4].copy_from_slice(&data_off.to_le_bytes());
1576 buf[pos + 4..pos + 8].copy_from_slice(&hash.to_le_bytes());
1577 pos += TYPOSQUAT_INDEX_ENTRY_SIZE;
1578 }
1579 buf[pos..pos + typo_data.len()].copy_from_slice(&typo_data);
1581 pos += typo_data.len();
1582
1583 for (data_off, hash) in &popular_index {
1585 buf[pos..pos + 4].copy_from_slice(&data_off.to_le_bytes());
1586 buf[pos + 4..pos + 8].copy_from_slice(&hash.to_le_bytes());
1587 pos += POPULAR_INDEX_ENTRY_SIZE;
1588 }
1589 buf[pos..pos + popular_data.len()].copy_from_slice(&popular_data);
1591 pos += popular_data.len();
1592
1593 let st = self.string_table.bytes();
1595 buf[pos..pos + st.len()].copy_from_slice(st);
1596
1597 let mut signed_data = Vec::with_capacity(SIG_OFFSET + (buf.len() - HEADER_SIZE));
1599 signed_data.extend_from_slice(&buf[..SIG_OFFSET]);
1600 signed_data.extend_from_slice(&buf[HEADER_SIZE..]);
1601
1602 use ed25519_dalek::Signer;
1603 let signature = signing_key.sign(&signed_data);
1604 buf[SIG_OFFSET..SIG_OFFSET + SIGNATURE_LENGTH].copy_from_slice(&signature.to_bytes());
1605
1606 Ok(buf)
1607 }
1608}
1609
1610#[cfg(test)]
1611mod tests {
1612 use super::*;
1613 use ed25519_dalek::SigningKey;
1614 use rand_core::OsRng;
1615 use std::sync::Mutex;
1616
1617 static ENV_LOCK: Mutex<()> = Mutex::new(());
1618
1619 fn build_test_db(signing_key: &SigningKey) -> ThreatDb {
1621 let mut writer = ThreatDbWriter::new(1700000000, 42);
1622
1623 writer.add_package(
1625 Ecosystem::Npm,
1626 "evil-package",
1627 &["1.0.0", "1.0.1"],
1628 ThreatSource::OssfMalicious,
1629 Confidence::Confirmed,
1630 false,
1631 Some("https://example.com/advisory/1"),
1632 );
1633 writer.add_package(
1634 Ecosystem::PyPI,
1635 "malware-pkg",
1636 &[],
1637 ThreatSource::DatadogMalicious,
1638 Confidence::Confirmed,
1639 true,
1640 None,
1641 );
1642 writer.add_package(
1643 Ecosystem::Npm,
1644 "borderline-pkg",
1645 &["2.0.0"],
1646 ThreatSource::OssfMalicious,
1647 Confidence::Medium,
1648 false,
1649 Some("https://example.com/advisory/2"),
1650 );
1651
1652 writer.add_ip(Ipv4Addr::new(192, 168, 1, 100), ThreatSource::FeodoTracker);
1654 writer.add_ip(Ipv4Addr::new(10, 0, 0, 1), ThreatSource::FeodoTracker);
1655 writer.add_ip(Ipv4Addr::new(203, 0, 113, 50), ThreatSource::FeodoTracker);
1656
1657 writer.add_typosquat(Ecosystem::Npm, "reacct", "react");
1659 writer.add_typosquat(Ecosystem::PyPI, "reqeusts", "requests");
1660
1661 writer.add_popular(Ecosystem::Npm, "react");
1663 writer.add_popular(Ecosystem::Npm, "express");
1664 writer.add_popular(Ecosystem::PyPI, "requests");
1665 writer.add_popular(Ecosystem::PyPI, "flask");
1666
1667 let bytes = writer.build(signing_key).expect("build failed");
1668 ThreatDb::from_bytes(bytes, 0).expect("load failed")
1669 }
1670
1671 #[test]
1672 fn test_round_trip_all_sections() {
1673 let key = SigningKey::generate(&mut OsRng);
1674 let db = build_test_db(&key);
1675
1676 let stats = db.stats();
1677 assert_eq!(stats.format_version, 1);
1678 assert_eq!(stats.build_timestamp, 1700000000);
1679 assert_eq!(stats.build_sequence, 42);
1680 assert_eq!(stats.package_count, 3);
1681 assert_eq!(stats.ip_count, 3);
1682 assert_eq!(stats.typosquat_count, 2);
1683 assert_eq!(stats.popular_count, 4);
1684 assert_eq!(stats.hostname_count, 0);
1685 }
1686
1687 #[test]
1688 fn test_package_version_in_list() {
1689 let key = SigningKey::generate(&mut OsRng);
1690 let db = build_test_db(&key);
1691
1692 let m = db
1693 .check_package(Ecosystem::Npm, "evil-package", Some("1.0.0"))
1694 .expect("should match");
1695 assert_eq!(m.source, ThreatSource::OssfMalicious);
1696 assert_eq!(m.confidence, Confidence::Confirmed);
1697 assert!(!m.all_versions_malicious);
1698 assert!(m.reference_url.is_some());
1699 }
1700
1701 #[test]
1702 fn test_package_version_not_in_list() {
1703 let key = SigningKey::generate(&mut OsRng);
1704 let db = build_test_db(&key);
1705
1706 assert!(db
1707 .check_package(Ecosystem::Npm, "evil-package", Some("2.0.0"))
1708 .is_none());
1709 }
1710
1711 #[test]
1712 fn test_package_no_version_all_malicious() {
1713 let key = SigningKey::generate(&mut OsRng);
1714 let db = build_test_db(&key);
1715
1716 let m = db
1717 .check_package(Ecosystem::PyPI, "malware-pkg", None)
1718 .expect("should match all-versions-malicious without version");
1719 assert!(m.all_versions_malicious);
1720 assert_eq!(m.source, ThreatSource::DatadogMalicious);
1721 }
1722
1723 #[test]
1724 fn test_package_no_version_not_all_malicious() {
1725 let key = SigningKey::generate(&mut OsRng);
1726 let db = build_test_db(&key);
1727
1728 assert!(
1729 db.check_package(Ecosystem::Npm, "evil-package", None)
1730 .is_none(),
1731 "should NOT match when no version provided and all_versions_malicious=false"
1732 );
1733 }
1734
1735 #[test]
1736 fn test_package_all_malicious_with_version() {
1737 let key = SigningKey::generate(&mut OsRng);
1738 let db = build_test_db(&key);
1739
1740 let m = db
1741 .check_package(Ecosystem::PyPI, "malware-pkg", Some("99.99.99"))
1742 .expect("all_versions_malicious should match any version");
1743 assert!(m.all_versions_malicious);
1744 }
1745
1746 #[test]
1747 fn test_package_missing() {
1748 let key = SigningKey::generate(&mut OsRng);
1749 let db = build_test_db(&key);
1750
1751 assert!(db
1752 .check_package(Ecosystem::Npm, "safe-package", Some("1.0.0"))
1753 .is_none());
1754 }
1755
1756 #[test]
1757 fn test_package_wrong_ecosystem() {
1758 let key = SigningKey::generate(&mut OsRng);
1759 let db = build_test_db(&key);
1760
1761 assert!(db
1762 .check_package(Ecosystem::PyPI, "evil-package", Some("1.0.0"))
1763 .is_none());
1764 }
1765
1766 #[test]
1767 fn test_package_medium_confidence() {
1768 let key = SigningKey::generate(&mut OsRng);
1769 let db = build_test_db(&key);
1770
1771 let m = db
1772 .check_package(Ecosystem::Npm, "borderline-pkg", Some("2.0.0"))
1773 .expect("should match");
1774 assert_eq!(m.confidence, Confidence::Medium);
1775 }
1776
1777 #[test]
1778 fn test_ip_found() {
1779 let key = SigningKey::generate(&mut OsRng);
1780 let db = build_test_db(&key);
1781
1782 let m = db
1783 .check_ip(Ipv4Addr::new(192, 168, 1, 100))
1784 .expect("should find IP");
1785 assert_eq!(m.source, ThreatSource::FeodoTracker);
1786 }
1787
1788 #[test]
1789 fn test_ip_not_found() {
1790 let key = SigningKey::generate(&mut OsRng);
1791 let db = build_test_db(&key);
1792
1793 assert!(db.check_ip(Ipv4Addr::new(8, 8, 8, 8)).is_none());
1794 }
1795
1796 #[test]
1797 fn test_ip_first_element() {
1798 let key = SigningKey::generate(&mut OsRng);
1799 let db = build_test_db(&key);
1800
1801 assert!(db.check_ip(Ipv4Addr::new(10, 0, 0, 1)).is_some());
1802 }
1803
1804 #[test]
1805 fn test_ip_last_element() {
1806 let key = SigningKey::generate(&mut OsRng);
1807 let db = build_test_db(&key);
1808
1809 assert!(db.check_ip(Ipv4Addr::new(203, 0, 113, 50)).is_some());
1810 }
1811
1812 #[test]
1813 fn test_typosquat_found() {
1814 let key = SigningKey::generate(&mut OsRng);
1815 let db = build_test_db(&key);
1816
1817 let m = db
1818 .check_typosquat(Ecosystem::Npm, "reacct")
1819 .expect("should find typosquat");
1820 assert_eq!(m.target_name, "react");
1821 }
1822
1823 #[test]
1824 fn test_typosquat_not_found() {
1825 let key = SigningKey::generate(&mut OsRng);
1826 let db = build_test_db(&key);
1827
1828 assert!(db.check_typosquat(Ecosystem::Npm, "react").is_none());
1829 }
1830
1831 #[test]
1832 fn test_typosquat_wrong_ecosystem() {
1833 let key = SigningKey::generate(&mut OsRng);
1834 let db = build_test_db(&key);
1835
1836 assert!(db.check_typosquat(Ecosystem::PyPI, "reacct").is_none());
1837 }
1838
1839 #[test]
1840 fn test_popular_distance_1() {
1841 let key = SigningKey::generate(&mut OsRng);
1842 let db = build_test_db(&key);
1843
1844 let result = db.check_popular_distance(Ecosystem::PyPI, "reqests");
1845 assert!(result.is_some(), "should find close match");
1846 let (name, dist) = result.unwrap();
1847 assert_eq!(name, "requests");
1848 assert_eq!(dist, 1);
1849 }
1850
1851 #[test]
1852 fn test_popular_exact_match_skipped() {
1853 let key = SigningKey::generate(&mut OsRng);
1854 let db = build_test_db(&key);
1855
1856 assert!(db.check_popular_distance(Ecosystem::Npm, "react").is_none());
1857 }
1858
1859 #[test]
1860 fn test_popular_distance_too_far() {
1861 let key = SigningKey::generate(&mut OsRng);
1862 let db = build_test_db(&key);
1863
1864 assert!(db.check_popular_distance(Ecosystem::Npm, "xyz").is_none());
1865 }
1866
1867 #[test]
1868 fn test_hostname_empty_section() {
1869 let key = SigningKey::generate(&mut OsRng);
1870 let db = build_test_db(&key);
1871
1872 assert!(db.check_hostname("evil.example.com").is_none());
1873 }
1874
1875 #[test]
1876 fn test_signature_valid() {
1877 let key = SigningKey::generate(&mut OsRng);
1878 let mut writer = ThreatDbWriter::new(1700000000, 1);
1879 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
1880
1881 let bytes = writer.build(&key).expect("build");
1885 let db = ThreatDb::from_bytes(bytes, 0).expect("load");
1886
1887 assert!(
1890 db.verify_signature().is_err(),
1891 "placeholder key should not verify real signature"
1892 );
1893 }
1894
1895 #[test]
1896 fn test_signature_corrupt_byte() {
1897 let key = SigningKey::generate(&mut OsRng);
1898 let mut writer = ThreatDbWriter::new(1700000000, 1);
1899 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
1900
1901 let mut bytes = writer.build(&key).expect("build");
1902
1903 if bytes.len() > HEADER_SIZE + 1 {
1905 bytes[HEADER_SIZE + 1] ^= 0xFF;
1906 }
1907
1908 let db = ThreatDb::from_bytes(bytes, 0).expect("load");
1909 assert!(
1910 db.verify_signature().is_err(),
1911 "corrupt data should fail verification"
1912 );
1913 }
1914
1915 #[test]
1916 fn test_signature_with_matching_key() {
1917 let key = SigningKey::generate(&mut OsRng);
1921 let mut writer = ThreatDbWriter::new(1700000000, 1);
1922 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
1923
1924 let bytes = writer.build(&key).expect("build");
1925
1926 let sig_bytes = &bytes[SIG_OFFSET..SIG_OFFSET + SIGNATURE_LENGTH];
1928 let signature = Signature::from_slice(sig_bytes).expect("parse sig");
1929
1930 let mut signed_data = Vec::new();
1931 signed_data.extend_from_slice(&bytes[..SIG_OFFSET]);
1932 signed_data.extend_from_slice(&bytes[HEADER_SIZE..]);
1933
1934 use ed25519_dalek::Verifier;
1935 assert!(
1936 key.verifying_key().verify(&signed_data, &signature).is_ok(),
1937 "signature should verify against signing key"
1938 );
1939 }
1940
1941 #[test]
1942 fn test_rollback_rejected() {
1943 let key = SigningKey::generate(&mut OsRng);
1944 let mut writer = ThreatDbWriter::new(1700000000, 5);
1945 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
1946 let bytes = writer.build(&key).expect("build");
1947
1948 let err = ThreatDb::from_bytes(bytes, 10).expect_err("should reject rollback");
1949 match err {
1950 ThreatDbError::RollbackDetected {
1951 got: 5,
1952 current: 10,
1953 } => {}
1954 other => panic!("expected RollbackDetected, got: {other}"),
1955 }
1956 }
1957
1958 #[test]
1959 fn test_rollback_equal_rejected() {
1960 let key = SigningKey::generate(&mut OsRng);
1961 let mut writer = ThreatDbWriter::new(1700000000, 10);
1962 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
1963 let bytes = writer.build(&key).expect("build");
1964
1965 let err = ThreatDb::from_bytes(bytes, 10).expect_err("equal sequence should be rejected");
1966 assert!(matches!(err, ThreatDbError::RollbackDetected { .. }));
1967 }
1968
1969 #[test]
1970 fn test_rollback_newer_accepted() {
1971 let key = SigningKey::generate(&mut OsRng);
1972 let mut writer = ThreatDbWriter::new(1700000000, 20);
1973 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
1974 let bytes = writer.build(&key).expect("build");
1975
1976 assert!(ThreatDb::from_bytes(bytes, 10).is_ok());
1977 }
1978
1979 #[test]
1980 fn test_invalid_magic() {
1981 let mut data = vec![0u8; HEADER_SIZE + 10];
1982 data[0..8].copy_from_slice(b"BADMAGIC");
1983 assert!(matches!(
1984 ThreatDb::from_bytes(data, 0),
1985 Err(ThreatDbError::InvalidMagic)
1986 ));
1987 }
1988
1989 #[test]
1990 fn test_file_too_small() {
1991 let data = vec![0u8; 10];
1992 assert!(matches!(
1993 ThreatDb::from_bytes(data, 0),
1994 Err(ThreatDbError::FileTooSmall(_))
1995 ));
1996 }
1997
1998 #[test]
1999 fn test_unsupported_version() {
2000 let mut data = vec![0u8; HEADER_SIZE + 10];
2001 data[0..8].copy_from_slice(MAGIC);
2002 data[8..12].copy_from_slice(&99u32.to_le_bytes()); assert!(matches!(
2004 ThreatDb::from_bytes(data, 0),
2005 Err(ThreatDbError::UnsupportedVersion(99))
2006 ));
2007 }
2008
2009 #[test]
2010 fn test_single_entry_db() {
2011 let key = SigningKey::generate(&mut OsRng);
2012 let mut writer = ThreatDbWriter::new(1700000000, 1);
2013 writer.add_package(
2014 Ecosystem::Crates,
2015 "only-pkg",
2016 &["0.1.0"],
2017 ThreatSource::OssfMalicious,
2018 Confidence::Confirmed,
2019 false,
2020 None,
2021 );
2022 let bytes = writer.build(&key).expect("build");
2023 let db = ThreatDb::from_bytes(bytes, 0).expect("load");
2024
2025 assert!(db
2026 .check_package(Ecosystem::Crates, "only-pkg", Some("0.1.0"))
2027 .is_some());
2028 assert!(db
2029 .check_package(Ecosystem::Crates, "other", Some("0.1.0"))
2030 .is_none());
2031 }
2032
2033 #[test]
2034 fn test_empty_db() {
2035 let key = SigningKey::generate(&mut OsRng);
2036 let mut writer = ThreatDbWriter::new(1700000000, 1);
2037 let bytes = writer.build(&key).expect("build");
2038 let db = ThreatDb::from_bytes(bytes, 0).expect("load");
2039
2040 assert!(db.check_package(Ecosystem::Npm, "anything", None).is_none());
2041 assert!(db.check_ip(Ipv4Addr::new(1, 2, 3, 4)).is_none());
2042 assert!(db.check_typosquat(Ecosystem::Npm, "anything").is_none());
2043 assert!(db.check_hostname("anything.com").is_none());
2044 assert!(db
2045 .check_popular_distance(Ecosystem::Npm, "anything")
2046 .is_none());
2047
2048 let stats = db.stats();
2049 assert_eq!(stats.package_count, 0);
2050 assert_eq!(stats.ip_count, 0);
2051 }
2052
2053 #[test]
2054 fn test_cache_returns_none_when_no_file() {
2055 let result = ThreatDb::cached();
2063 let _ = result;
2065 }
2066
2067 #[test]
2068 fn test_writer_deduplicates() {
2069 let key = SigningKey::generate(&mut OsRng);
2070 let mut writer = ThreatDbWriter::new(1700000000, 1);
2071
2072 writer.add_package(
2074 Ecosystem::Npm,
2075 "dupe-pkg",
2076 &["1.0.0"],
2077 ThreatSource::OssfMalicious,
2078 Confidence::Confirmed,
2079 false,
2080 None,
2081 );
2082 writer.add_package(
2083 Ecosystem::Npm,
2084 "dupe-pkg",
2085 &["2.0.0"],
2086 ThreatSource::DatadogMalicious,
2087 Confidence::Confirmed,
2088 false,
2089 None,
2090 );
2091
2092 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
2094 writer.add_ip(Ipv4Addr::new(1, 2, 3, 4), ThreatSource::FeodoTracker);
2095
2096 let bytes = writer.build(&key).expect("build");
2097 let db = ThreatDb::from_bytes(bytes, 0).expect("load");
2098
2099 assert_eq!(
2100 db.stats().package_count,
2101 1,
2102 "duplicate packages should be deduped"
2103 );
2104 assert_eq!(db.stats().ip_count, 1, "duplicate IPs should be deduped");
2105 }
2106
2107 #[test]
2108 fn test_supplemental_overlay_lookup_and_stats() {
2109 let key = SigningKey::generate(&mut OsRng);
2110
2111 let mut primary_writer = ThreatDbWriter::new(1700000000, 1);
2112 primary_writer.add_package(
2113 Ecosystem::Npm,
2114 "primary-pkg",
2115 &["1.0.0"],
2116 ThreatSource::OssfMalicious,
2117 Confidence::Confirmed,
2118 false,
2119 None,
2120 );
2121 let primary = ThreatDb::from_bytes(primary_writer.build(&key).expect("primary build"), 0)
2122 .expect("primary load");
2123
2124 let mut supplemental_writer = ThreatDbWriter::new(1700000001, 1);
2125 supplemental_writer.add_package(
2126 Ecosystem::PyPI,
2127 "overlay-pkg",
2128 &["2.0.0"],
2129 ThreatSource::DatadogMalicious,
2130 Confidence::Confirmed,
2131 false,
2132 None,
2133 );
2134 supplemental_writer.add_hostname("overlay.example", ThreatSource::Urlhaus);
2135 supplemental_writer.add_ip(Ipv4Addr::new(203, 0, 113, 77), ThreatSource::ThreatFoxIoc);
2136 supplemental_writer.add_typosquat(Ecosystem::Npm, "reacct", "react");
2137 supplemental_writer.add_popular(Ecosystem::Npm, "react");
2138
2139 let supplemental = ThreatDb::from_bytes(
2140 supplemental_writer.build(&key).expect("supplemental build"),
2141 0,
2142 )
2143 .expect("supplemental load");
2144
2145 let db = primary.with_supplemental(Some(supplemental));
2146
2147 assert!(db
2148 .check_package(Ecosystem::Npm, "primary-pkg", Some("1.0.0"))
2149 .is_some());
2150 assert!(db
2151 .check_package(Ecosystem::PyPI, "overlay-pkg", Some("2.0.0"))
2152 .is_some());
2153 assert!(db.check_hostname("overlay.example").is_some());
2154 assert!(db.check_ip(Ipv4Addr::new(203, 0, 113, 77)).is_some());
2155 assert!(db.check_typosquat(Ecosystem::Npm, "reacct").is_some());
2156 assert_eq!(
2157 db.check_popular_distance(Ecosystem::Npm, "reac"),
2158 Some(("react".to_string(), 1))
2159 );
2160
2161 let stats = db.stats();
2162 assert_eq!(stats.package_count, 2);
2163 assert_eq!(stats.hostname_count, 1);
2164 assert_eq!(stats.ip_count, 1);
2165 assert_eq!(stats.typosquat_count, 1);
2166 assert_eq!(stats.popular_count, 1);
2167 }
2168
2169 #[test]
2170 fn test_supplemental_overlay_falls_through_on_primary_version_mismatch() {
2171 let key = SigningKey::generate(&mut OsRng);
2172
2173 let mut primary_writer = ThreatDbWriter::new(1700000000, 1);
2174 primary_writer.add_package(
2175 Ecosystem::Npm,
2176 "shared-pkg",
2177 &["1.0.0"],
2178 ThreatSource::OssfMalicious,
2179 Confidence::Confirmed,
2180 false,
2181 None,
2182 );
2183 let primary = ThreatDb::from_bytes(primary_writer.build(&key).expect("primary build"), 0)
2184 .expect("primary load");
2185
2186 let mut supplemental_writer = ThreatDbWriter::new(1700000001, 1);
2187 supplemental_writer.add_package(
2188 Ecosystem::Npm,
2189 "shared-pkg",
2190 &["2.0.0"],
2191 ThreatSource::DatadogMalicious,
2192 Confidence::Confirmed,
2193 false,
2194 None,
2195 );
2196 let supplemental = ThreatDb::from_bytes(
2197 supplemental_writer.build(&key).expect("supplemental build"),
2198 0,
2199 )
2200 .expect("supplemental load");
2201
2202 let db = primary.with_supplemental(Some(supplemental));
2203 let threat = db
2204 .check_package(Ecosystem::Npm, "shared-pkg", Some("2.0.0"))
2205 .expect("supplemental version should match");
2206 assert_eq!(threat.source, ThreatSource::DatadogMalicious);
2207 }
2208
2209 #[test]
2210 fn test_combined_mtime_requires_primary_db() {
2211 let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
2212 let tmp = tempfile::tempdir().unwrap();
2213 let primary = tmp.path().join("primary.dat");
2214 let supplemental = tmp.path().join("supplemental.dat");
2215
2216 unsafe {
2217 std::env::set_var("TIRITH_THREATDB_PATH", &primary);
2218 std::env::set_var("TIRITH_THREATDB_SUPPLEMENTAL_PATH", &supplemental);
2219 }
2220
2221 assert_eq!(combined_mtime_epoch(), None);
2222
2223 std::fs::write(&supplemental, b"overlay").unwrap();
2224 assert_eq!(combined_mtime_epoch(), None);
2225
2226 std::fs::remove_file(&supplemental).unwrap();
2227 std::fs::write(&primary, b"primary").unwrap();
2228 let primary_only = combined_mtime_epoch().expect("primary mtime");
2229
2230 std::fs::write(&supplemental, b"overlay-updated").unwrap();
2231 let combined = combined_mtime_epoch().expect("combined mtime");
2232 assert_ne!(primary_only, combined);
2233
2234 unsafe {
2235 std::env::remove_var("TIRITH_THREATDB_PATH");
2236 std::env::remove_var("TIRITH_THREATDB_SUPPLEMENTAL_PATH");
2237 }
2238 }
2239
2240 #[test]
2241 fn test_string_table_deduplication() {
2242 let mut st = StringTable::new();
2243 let off1 = st.intern("https://example.com");
2244 let off2 = st.intern("https://example.com");
2245 let off3 = st.intern("https://other.com");
2246
2247 assert_eq!(off1, off2, "same string should return same offset");
2248 assert_ne!(
2249 off1, off3,
2250 "different strings should have different offsets"
2251 );
2252 }
2253
2254 #[test]
2255 fn test_reference_url_round_trip() {
2256 let key = SigningKey::generate(&mut OsRng);
2257 let mut writer = ThreatDbWriter::new(1700000000, 1);
2258 writer.add_package(
2259 Ecosystem::Npm,
2260 "ref-pkg",
2261 &["1.0.0"],
2262 ThreatSource::OssfMalicious,
2263 Confidence::Confirmed,
2264 false,
2265 Some("https://example.com/advisory/123"),
2266 );
2267 let bytes = writer.build(&key).expect("build");
2268 let db = ThreatDb::from_bytes(bytes, 0).expect("load");
2269
2270 let m = db
2271 .check_package(Ecosystem::Npm, "ref-pkg", Some("1.0.0"))
2272 .expect("should match");
2273 assert_eq!(
2274 m.reference_url.as_deref(),
2275 Some("https://example.com/advisory/123")
2276 );
2277 }
2278}