malwaredb_api/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![doc = include_str!("../README.md")]
4#![deny(missing_docs)]
5#![deny(clippy::all)]
6#![deny(clippy::pedantic)]
7#![forbid(unsafe_code)]
8
9/// Wrapper for fixed-size hash digests from hex strings
10pub mod digest;
11
12use std::fmt::{Display, Formatter};
13
14use chrono::serde::ts_seconds_option;
15use chrono::{DateTime, Utc};
16use serde::{Deserialize, Serialize};
17use zeroize::{Zeroize, ZeroizeOnDrop};
18
19/// MDB version
20pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");
21
22/// Header key used to present the API key to the server
23pub const MDB_API_HEADER: &str = "mdb-api-key";
24
25/// Login API endpoint, POST
26pub const USER_LOGIN_URL: &str = "/v1/users/getkey";
27
28/// User logs in with username and password
29#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
30pub struct GetAPIKeyRequest {
31    /// Username
32    pub user: String,
33
34    /// User's password
35    pub password: String,
36}
37
38/// Logout API endpoint, GET, authenticated.
39pub const USER_LOGOUT_URL: &str = "/v1/users/clearkey";
40
41/// Response includes the key, if the credentials were correct,
42/// and possibly show a message related to errors or warnings.
43#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
44pub struct GetAPIKeyResponse {
45    /// User's API key if successful
46    pub key: Option<String>,
47
48    /// Error response
49    pub message: Option<String>,
50}
51
52/// User's get self information API endpoint, GET, authenticated
53/// User `EmptyAuthenticatingPost` to authenticate
54pub const USER_INFO_URL: &str = "/v1/users/info";
55
56/// User gets information about their account
57#[derive(Clone, Debug, Deserialize, Serialize)]
58pub struct GetUserInfoResponse {
59    /// User's numeric ID
60    pub id: u32,
61
62    /// User's name
63    pub username: String,
64
65    /// User's group memberships, if any
66    pub groups: Vec<String>,
67
68    /// User's available sample sources, if any
69    pub sources: Vec<String>,
70
71    /// If the user is an admin
72    pub is_admin: bool,
73
74    /// When the account was created
75    pub created: DateTime<Utc>,
76
77    /// User has read-only access, perhaps a guest or demo account
78    pub is_readonly: bool,
79}
80
81/// Server information, request is empty, GET, Unauthenticated.
82pub const SERVER_INFO: &str = "/v1/server/info";
83
84/// Information about the server
85#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
86pub struct ServerInfo {
87    /// Operating System used
88    pub os_name: String,
89
90    /// Memory footprint
91    pub memory_used: String,
92
93    /// MDB version
94    pub mdb_version: String,
95
96    /// Type and version of the database
97    pub db_version: String,
98
99    /// Size of the database on disk
100    pub db_size: String,
101
102    /// Total number of samples in `MalwareDB`
103    pub num_samples: u64,
104
105    /// Total users of `MalwareDB`
106    pub num_users: u32,
107
108    /// Uptime of `MalwareDB` in a human-readable format
109    pub uptime: String,
110}
111
112/// File types supported by `MalwareDB`, request is empty, `GET`, Unauthenticated.
113pub const SUPPORTED_FILE_TYPES: &str = "/v1/server/types";
114
115/// One record of supported file types
116#[derive(Clone, Debug, Deserialize, Serialize)]
117pub struct SupportedFileType {
118    /// Common name of the file type
119    pub name: String,
120
121    /// Magic number bytes in hex of the file type
122    pub magic: Vec<String>,
123
124    /// Whether the file type is executable
125    pub is_executable: bool,
126
127    /// Description of the file type
128    pub description: Option<String>,
129}
130
131/// Server's supported types, the response
132#[derive(Clone, Debug, Deserialize, Serialize)]
133pub struct SupportedFileTypes {
134    /// Supported file types
135    pub types: Vec<SupportedFileType>,
136
137    /// Optional server messages
138    pub message: Option<String>,
139}
140
141/// Endpoint for the sources, per-user, GET, authenticated
142pub const LIST_SOURCES: &str = "/v1/sources/list";
143
144/// Source record
145#[derive(Clone, Debug, Deserialize, Serialize)]
146pub struct SourceInfo {
147    /// ID of the source
148    pub id: u32,
149
150    /// Name of the source
151    pub name: String,
152
153    /// Description of the source
154    pub description: Option<String>,
155
156    /// URL of the source, or where the files were found
157    pub url: Option<String>,
158
159    /// Creation date or first acquisition date of or from the source
160    pub first_acquisition: DateTime<Utc>,
161
162    /// Whether the source holds malware
163    pub malicious: Option<bool>,
164}
165
166/// Sources response for request for sources
167#[derive(Clone, Debug, Deserialize, Serialize)]
168pub struct Sources {
169    /// List of sources
170    pub sources: Vec<SourceInfo>,
171
172    /// Error message, if any
173    pub message: Option<String>,
174}
175
176/// API endpoint for uploading a sample, POST, Authenticated
177pub const UPLOAD_SAMPLE: &str = "/v1/samples/upload";
178
179/// New file sample being sent to `MalwareDB`
180#[derive(Clone, Debug, Deserialize, Serialize)]
181pub struct NewSample {
182    /// The original file name, might not be known
183    pub file_name: String,
184
185    /// ID of the source for this sample
186    pub source_id: u32,
187
188    /// Base64 encoding of the binary file
189    pub file_contents_b64: String,
190
191    /// SHA-256 of the sample being sent, for server-side validation
192    pub sha256: String,
193}
194
195/// API endpoint for downloading a sample, GET. The hash value goes at the end of the URL.
196/// For example: /v1/samples/download/aabbccddeeff0011223344556677889900
197/// Response is raw bytes of the file, or HTTP 404 if not found
198pub const DOWNLOAD_SAMPLE: &str = "/v1/samples/download";
199
200/// API endpoint for downloading a sample as a `CaRT` container file, GET
201/// Example: /v1/samples/download/cart/aabbccddeeff0011223344556677889900
202/// Response is the file encoded in a `CaRT` container file, or HTTP 404 if not found
203pub const DOWNLOAD_SAMPLE_CART: &str = "/v1/samples/download/cart";
204
205/// API endpoint to get a report for a given sample
206/// Example: /v1/samples/report/aabbccddeeff0011223344556677889900
207pub const SAMPLE_REPORT: &str = "/v1/samples/report";
208
209/// Virus Total hits summary
210#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
211pub struct VirusTotalSummary {
212    /// Anti-Virus products which identified the sample as malicious
213    pub hits: u32,
214
215    /// Anti-Virus products available when last analyzed
216    pub total: u32,
217
218    /// Hit details in json format, if available
219    #[serde(default)]
220    pub detail: Option<serde_json::Value>,
221
222    /// Date of most recent analysis
223    #[serde(default, with = "ts_seconds_option")]
224    pub last_analysis_date: Option<DateTime<Utc>>,
225}
226
227// TODO: Add sections for parsed fields for documents, executables
228/// All the data for a sample known to `MalwareDB`
229#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
230pub struct Report {
231    ///MD5 hash
232    pub md5: String,
233
234    /// SHA-1 hash
235    pub sha1: String,
236
237    /// SHA-256 hash
238    pub sha256: String,
239
240    /// SHA-384 hash
241    pub sha384: String,
242
243    /// SHA-512 hash
244    pub sha512: String,
245
246    /// LZJD similarity hash, if available
247    /// <https://github.com/EdwardRaff/LZJD>
248    pub lzjd: Option<String>,
249
250    /// TLSH similarity hash, if available
251    /// <https://github.com/trendmicro/tlsh>
252    pub tlsh: Option<String>,
253
254    /// `SSDeep` similarity hash, if available
255    /// <https://ssdeep-project.github.io/ssdeep/index.html>
256    pub ssdeep: Option<String>,
257
258    /// Human hash
259    /// <https://github.com/zacharyvoase/humanhash>
260    pub humanhash: Option<String>,
261
262    /// The output from libmagic, aka the `file` command
263    /// <https://man7.org/linux/man-pages/man3/libmagic.3.html>
264    pub filecommand: Option<String>,
265
266    /// Sample size in bytes
267    pub bytes: u32,
268
269    /// Sample size in human-readable size (2048 becomes 2 kb, for example)
270    pub size: String,
271
272    /// Entropy of the file, values over 6.5 may indicate compression or encryption
273    pub entropy: f32,
274
275    /// Virus Total summary data, if enabled on the server
276    /// <https://www.virustotal.com>
277    #[serde(default)]
278    pub vt: Option<VirusTotalSummary>,
279}
280
281impl Display for Report {
282    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
283        writeln!(f, "Size: {} bytes, or {}", self.bytes, self.size)?;
284        writeln!(f, "Entropy: {}", self.entropy)?;
285        if let Some(filecmd) = &self.filecommand {
286            writeln!(f, "File command: {filecmd}")?;
287        }
288        if let Some(vt) = &self.vt {
289            writeln!(f, "VT Hits: {}/{}", vt.hits, vt.total)?;
290        }
291        writeln!(f, "MD5: {}", self.md5)?;
292        writeln!(f, "SHA-1: {}", self.sha1)?;
293        writeln!(f, "SHA256: {}", self.sha256)
294    }
295}
296
297/// API endpoint for finding samples which are similar to specific file, POST, Authenticated.
298pub const SIMILAR_SAMPLES: &str = "/v1/samples/similar";
299
300/// The hash by which a sample is identified
301#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)]
302#[non_exhaustive]
303pub enum SimilarityHashType {
304    /// `SSDeep` similarity of the whole file
305    SSDeep,
306
307    /// `LZJD` similarity of the whole file
308    LZJD,
309
310    /// TLSH similarity of the hole file
311    TLSH,
312
313    /// `PEHash`, for PE32 files
314    PEHash,
315
316    /// Import Hash for executable files
317    ImportHash,
318
319    /// `SSDeep` fuzzy hash of the import data, for executable files
320    FuzzyImportHash,
321}
322
323impl SimilarityHashType {
324    /// For a similarity hash type, return:
325    /// * The database table & field which stores the hash
326    /// * If applicable, the similarity hash function (Postgres extension) which calculates the similarity
327    #[must_use]
328    pub fn get_table_field_simfunc(&self) -> (&'static str, Option<&'static str>) {
329        match self {
330            SimilarityHashType::SSDeep => ("file.ssdeep", Some("fuzzy_hash_compare")),
331            SimilarityHashType::LZJD => ("file.lzjd", Some("lzjd_compare")),
332            SimilarityHashType::TLSH => ("file.tlsh", Some("tlsh_compare")),
333            SimilarityHashType::PEHash => ("executable.pehash", None),
334            SimilarityHashType::ImportHash => ("executable.importhash", None),
335            SimilarityHashType::FuzzyImportHash => {
336                ("executable.importhashfuzzy", Some("fuzzy_hash_compare"))
337            }
338        }
339    }
340}
341
342impl Display for SimilarityHashType {
343    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
344        match self {
345            SimilarityHashType::SSDeep => write!(f, "SSDeep"),
346            SimilarityHashType::LZJD => write!(f, "LZJD"),
347            SimilarityHashType::TLSH => write!(f, "TLSH"),
348            SimilarityHashType::PEHash => write!(f, "PeHash"),
349            SimilarityHashType::ImportHash => write!(f, "Import Hash (IMPHASH)"),
350            SimilarityHashType::FuzzyImportHash => write!(f, "Fuzzy Import hash"),
351        }
352    }
353}
354
355/// Requesting a sample from `MalwareDB` by similarity hash
356#[derive(Clone, Debug, Deserialize, Serialize)]
357pub struct SimilarSamplesRequest {
358    /// The hashes of the requested sample
359    pub hashes: Vec<(SimilarityHashType, String)>,
360}
361
362/// Relation between a similar sample and the hashes by which the sample is similar
363#[derive(Clone, Debug, Deserialize, Serialize)]
364pub struct SimilarSample {
365    /// The SHA-256 hash of the found sample
366    pub sha256: String,
367
368    /// Matches from the requested sample to this sample by algorithm and score
369    pub algorithms: Vec<(SimilarityHashType, f32)>,
370}
371
372/// Response indicating samples which are similar
373#[derive(Clone, Debug, Deserialize, Serialize)]
374pub struct SimilarSamplesResponse {
375    /// The responses
376    pub results: Vec<SimilarSample>,
377
378    /// Possible messages from the server, if any
379    pub message: Option<String>,
380}
381
382/// APU endpoint for searching for files with some criteria
383pub const SEARCH: &str = "/v1/search";
384
385/// Search for a file by some criteria
386/// Specifying both a hash and file name is an AND operation!
387#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
388pub struct SearchRequest {
389    /// Search for a file by partial hash
390    pub partial_hash: Option<(PartialHashSearchType, String)>,
391
392    /// Search for a file by whole or partial file name
393    pub file_name: Option<String>,
394
395    /// Maximum number of results to return, 100 results or less.
396    pub limit: u32,
397
398    /// Get returned result by a hash type.
399    /// [`PartialHashSearchType::Any`] results in SHA-256
400    pub response: PartialHashSearchType,
401}
402
403impl SearchRequest {
404    /// Ensure the search request is valid:
405    /// * The partial hash is valid hexidecimal
406    /// * At least a file path or partial hash is provided
407    #[must_use]
408    #[inline]
409    pub fn is_valid(&self) -> bool {
410        if let Some((_hash_type, partial_hash)) = &self.partial_hash {
411            let hex = hex::decode(partial_hash);
412            return hex.is_ok();
413        }
414
415        (self.partial_hash.is_some() || self.file_name.is_some()) && self.limit > 0
416    }
417}
418
419impl Default for SearchRequest {
420    fn default() -> Self {
421        Self {
422            partial_hash: None,
423            file_name: None,
424            limit: 100,
425            response: PartialHashSearchType::default(),
426        }
427    }
428}
429
430/// Specify the type of hash when searching for a partial match
431#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
432pub enum PartialHashSearchType {
433    /// Search by any know hash type
434    Any,
435
436    /// Search only for MD5 hashes
437    MD5,
438
439    /// Search only for SHA-1 hashes
440    SHA1,
441
442    /// Search only for SHA-256 hashes
443    #[default]
444    SHA256,
445
446    /// Search only for SHA-384 hashes
447    SHA384,
448
449    /// Search only for SHA-512 hashes
450    SHA512,
451}
452
453impl Display for PartialHashSearchType {
454    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
455        match self {
456            PartialHashSearchType::Any => write!(f, "any"),
457            PartialHashSearchType::MD5 => write!(f, "md5"),
458            PartialHashSearchType::SHA1 => write!(f, "sha1"),
459            PartialHashSearchType::SHA256 => write!(f, "sha256"),
460            PartialHashSearchType::SHA384 => write!(f, "sha384"),
461            PartialHashSearchType::SHA512 => write!(f, "sha512"),
462        }
463    }
464}
465
466impl TryInto<PartialHashSearchType> for &str {
467    type Error = String;
468
469    fn try_into(self) -> Result<PartialHashSearchType, Self::Error> {
470        match self {
471            "any" => Ok(PartialHashSearchType::Any),
472            "md5" => Ok(PartialHashSearchType::MD5),
473            "sha1" => Ok(PartialHashSearchType::SHA1),
474            "sha256" => Ok(PartialHashSearchType::SHA256),
475            "sha384" => Ok(PartialHashSearchType::SHA384),
476            "sha512" => Ok(PartialHashSearchType::SHA512),
477            x => Err(format!("Invalid hash type {x}")),
478        }
479    }
480}
481
482impl TryInto<PartialHashSearchType> for Option<&str> {
483    type Error = String;
484
485    fn try_into(self) -> Result<PartialHashSearchType, Self::Error> {
486        if let Some(hash) = self {
487            hash.try_into()
488        } else {
489            Ok(PartialHashSearchType::SHA256)
490        }
491    }
492}
493
494/// API endpoint for finding samples which are similar to a specific file, `POST`
495pub const LIST_LABELS: &str = "/v1/labels";
496
497/// A label, used for sources and samples
498#[derive(Clone, Debug, Deserialize, Serialize)]
499pub struct Label {
500    /// Label ID
501    pub id: u64,
502
503    /// Label value
504    pub name: String,
505
506    /// Label parent
507    pub parent: Option<String>,
508}
509
510/// One or more labels
511#[derive(Clone, Debug, Default, Deserialize, Serialize)]
512pub struct Labels(pub Vec<Label>);
513
514// Convenience functions
515impl Labels {
516    /// Number of labels
517    #[must_use]
518    pub fn len(&self) -> usize {
519        self.0.len()
520    }
521
522    /// If the labels are empty
523    #[must_use]
524    pub fn is_empty(&self) -> bool {
525        self.0.is_empty()
526    }
527}
528
529impl Display for Labels {
530    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
531        if self.is_empty() {
532            return writeln!(f, "No labels.");
533        }
534        for label in &self.0 {
535            let parent = if let Some(parent) = &label.parent {
536                format!(", parent: {parent}")
537            } else {
538                String::new()
539            };
540            writeln!(f, "{}: {}{parent}", label.id, label.name)?;
541        }
542        Ok(())
543    }
544}