malwaredb_api/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![doc = include_str!("../README.md")]
4#![deny(missing_docs)]
5#![deny(clippy::all)]
6#![deny(clippy::pedantic)]
7#![forbid(unsafe_code)]
8
9/// Wrapper for fixed-size hash digests from hex strings
10pub mod digest;
11
12use std::fmt::{Display, Formatter};
13
14use chrono::serde::ts_seconds_option;
15use chrono::{DateTime, Utc};
16use serde::{Deserialize, Serialize};
17use zeroize::{Zeroize, ZeroizeOnDrop};
18
19/// MDB version
20pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");
21
22/// Header key used to present the API key to the server
23pub const MDB_API_HEADER: &str = "mdb-api-key";
24
25/// Login API endpoint, POST
26pub const USER_LOGIN_URL: &str = "/v1/users/getkey";
27
28/// User logs in with username and password
29#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
30pub struct GetAPIKeyRequest {
31    /// Username
32    pub user: String,
33
34    /// User's password
35    pub password: String,
36}
37
38/// Logout API endpoint, GET, authenticated.
39pub const USER_LOGOUT_URL: &str = "/v1/users/clearkey";
40
41/// Response includes the key if the credentials were correct,
42/// and possibly show a message related to errors or warnings.
43#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
44pub struct GetAPIKeyResponse {
45    /// User's API key if successful
46    pub key: Option<String>,
47
48    /// Error response
49    pub message: Option<String>,
50}
51
52/// User's account information API endpoint, GET, authenticated
53/// User `EmptyAuthenticatingPost` to authenticate
54pub const USER_INFO_URL: &str = "/v1/users/info";
55
56/// User gets information about their account
57#[derive(Clone, Debug, Deserialize, Serialize)]
58pub struct GetUserInfoResponse {
59    /// User's numeric ID
60    pub id: u32,
61
62    /// User's name
63    pub username: String,
64
65    /// User's group memberships, if any
66    pub groups: Vec<String>,
67
68    /// User's available sample sources, if any
69    pub sources: Vec<String>,
70
71    /// If the user is an admin
72    pub is_admin: bool,
73
74    /// When the account was created
75    pub created: DateTime<Utc>,
76
77    /// User has read-only access, perhaps a guest or demo account
78    pub is_readonly: bool,
79}
80
81/// Server information, request is empty, GET, Unauthenticated.
82pub const SERVER_INFO: &str = "/v1/server/info";
83
84/// Information about the server
85#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
86pub struct ServerInfo {
87    /// Operating System used
88    pub os_name: String,
89
90    /// Memory footprint
91    pub memory_used: String,
92
93    /// MDB version
94    pub mdb_version: semver::Version,
95
96    /// Type and version of the database
97    pub db_version: String,
98
99    /// Size of the database on disk
100    pub db_size: String,
101
102    /// Total number of samples in `MalwareDB`
103    pub num_samples: u64,
104
105    /// Total users of `MalwareDB`
106    pub num_users: u32,
107
108    /// Uptime of `MalwareDB` in a human-readable format
109    pub uptime: String,
110
111    /// The name of the Malware DB instance
112    pub instance_name: String,
113}
114
115/// File types supported by `MalwareDB`, request is empty, `GET`, Unauthenticated.
116pub const SUPPORTED_FILE_TYPES: &str = "/v1/server/types";
117
118/// One record of supported file types
119#[derive(Clone, Debug, Deserialize, Serialize)]
120pub struct SupportedFileType {
121    /// Common name of the file type
122    pub name: String,
123
124    /// Magic number bytes in hex of the file type
125    pub magic: Vec<String>,
126
127    /// Whether the file type is executable
128    pub is_executable: bool,
129
130    /// Description of the file type
131    pub description: Option<String>,
132}
133
134/// Server's supported types, the response
135#[derive(Clone, Debug, Deserialize, Serialize)]
136pub struct SupportedFileTypes {
137    /// Supported file types
138    pub types: Vec<SupportedFileType>,
139
140    /// Optional server messages
141    pub message: Option<String>,
142}
143
144/// Endpoint for the sources, per-user, GET, authenticated
145pub const LIST_SOURCES: &str = "/v1/sources/list";
146
147/// Source record
148#[derive(Clone, Debug, Deserialize, Serialize)]
149pub struct SourceInfo {
150    /// ID of the source
151    pub id: u32,
152
153    /// Name of the source
154    pub name: String,
155
156    /// Description of the source
157    pub description: Option<String>,
158
159    /// URL of the source, or where the files were found
160    pub url: Option<String>,
161
162    /// Creation date or first acquisition date of or from the source
163    pub first_acquisition: DateTime<Utc>,
164
165    /// Whether the source holds malware
166    pub malicious: Option<bool>,
167}
168
169/// Sources response for request for sources
170#[derive(Clone, Debug, Deserialize, Serialize)]
171pub struct Sources {
172    /// List of sources
173    pub sources: Vec<SourceInfo>,
174
175    /// Error message, if any
176    pub message: Option<String>,
177}
178
179/// API endpoint for uploading a sample, POST, Authenticated
180pub const UPLOAD_SAMPLE: &str = "/v1/samples/upload";
181
182/// New file sample being sent to `MalwareDB`
183#[derive(Clone, Debug, Deserialize, Serialize)]
184pub struct NewSample {
185    /// The original file name, which might not be known. If it's not known,
186    /// use a hash or something like "unknown.bin".
187    pub file_name: String,
188
189    /// ID of the source for this sample
190    pub source_id: u32,
191
192    /// Base64 encoding of the binary file
193    pub file_contents_b64: String,
194
195    /// SHA-256 of the sample being sent, for server-side validation
196    pub sha256: String,
197}
198
199/// API endpoint for downloading a sample, GET. The hash value goes at the end of the URL.
200/// Example: `/v1/samples/download/aabbccddeeff0011223344556677889900`
201/// Response is raw bytes of the file, or HTTP 404 if not found
202pub const DOWNLOAD_SAMPLE: &str = "/v1/samples/download";
203
204/// API endpoint for downloading a sample as a `CaRT` container file, GET
205/// Example: `/v1/samples/download/cart/aabbccddeeff0011223344556677889900`
206/// Response is the file encoded in a `CaRT` container file, or HTTP 404 if not found
207pub const DOWNLOAD_SAMPLE_CART: &str = "/v1/samples/download/cart";
208
209/// API endpoint to get a report for a given sample
210/// Example: `/v1/samples/report/aabbccddeeff0011223344556677889900`
211pub const SAMPLE_REPORT: &str = "/v1/samples/report";
212
213/// Virus Total hits summary
214#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
215pub struct VirusTotalSummary {
216    /// Anti-Virus products which identified the sample as malicious
217    pub hits: u32,
218
219    /// Anti-Virus products available when last analyzed
220    pub total: u32,
221
222    /// Hit details in JSON format, if available
223    #[serde(default)]
224    pub detail: Option<serde_json::Value>,
225
226    /// Most recent analysis date, if available
227    #[serde(default, with = "ts_seconds_option")]
228    pub last_analysis_date: Option<DateTime<Utc>>,
229}
230
231// TODO: Add sections for parsed fields for documents, executables
232/// All the data for a sample known to `MalwareDB`
233#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
234pub struct Report {
235    ///MD5 hash
236    pub md5: String,
237
238    /// SHA-1 hash
239    pub sha1: String,
240
241    /// SHA-256 hash
242    pub sha256: String,
243
244    /// SHA-384 hash
245    pub sha384: String,
246
247    /// SHA-512 hash
248    pub sha512: String,
249
250    /// LZJD similarity hash, if available
251    /// <https://github.com/EdwardRaff/LZJD>
252    pub lzjd: Option<String>,
253
254    /// TLSH similarity hash, if available
255    /// <https://github.com/trendmicro/tlsh>
256    pub tlsh: Option<String>,
257
258    /// `SSDeep` similarity hash, if available
259    /// <https://ssdeep-project.github.io/ssdeep/index.html>
260    pub ssdeep: Option<String>,
261
262    /// Human hash
263    /// <https://github.com/zacharyvoase/humanhash>
264    pub humanhash: Option<String>,
265
266    /// The output from libmagic, aka the `file` command
267    /// <https://man7.org/linux/man-pages/man3/libmagic.3.html>
268    pub filecommand: Option<String>,
269
270    /// Sample size in bytes
271    pub bytes: u64,
272
273    /// Sample size in human-readable size (2048 becomes 2 kb, for example)
274    pub size: String,
275
276    /// Entropy of the file, values over 6.5 may indicate compression or encryption
277    pub entropy: f32,
278
279    /// Virus Total summary data, if enabled on the server
280    /// <https://www.virustotal.com>
281    #[serde(default)]
282    pub vt: Option<VirusTotalSummary>,
283}
284
285impl Display for Report {
286    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
287        writeln!(f, "Size: {} bytes, or {}", self.bytes, self.size)?;
288        writeln!(f, "Entropy: {}", self.entropy)?;
289        if let Some(filecmd) = &self.filecommand {
290            writeln!(f, "File command: {filecmd}")?;
291        }
292        if let Some(vt) = &self.vt {
293            writeln!(f, "VT Hits: {}/{}", vt.hits, vt.total)?;
294        }
295        writeln!(f, "MD5: {}", self.md5)?;
296        writeln!(f, "SHA-1: {}", self.sha1)?;
297        writeln!(f, "SHA256: {}", self.sha256)
298    }
299}
300
301/// API endpoint for finding samples which are similar to a specific file, POST, Authenticated.
302pub const SIMILAR_SAMPLES: &str = "/v1/samples/similar";
303
304/// The hash by which a sample is identified
305#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)]
306#[non_exhaustive]
307pub enum SimilarityHashType {
308    /// `SSDeep` similarity of the whole file
309    SSDeep,
310
311    /// `LZJD` similarity of the whole file
312    LZJD,
313
314    /// TLSH similarity of the hole file
315    TLSH,
316
317    /// `PEHash`, for PE32 files
318    PEHash,
319
320    /// Import Hash for executable files
321    ImportHash,
322
323    /// `SSDeep` fuzzy hash of the import data, for executable files
324    FuzzyImportHash,
325}
326
327impl SimilarityHashType {
328    /// For a similarity hash type, return:
329    /// * The database table and field which stores the hash
330    /// * If applicable, the similarity hash function (Postgres extension) which calculates the similarity
331    #[must_use]
332    pub fn get_table_field_simfunc(&self) -> (&'static str, Option<&'static str>) {
333        match self {
334            SimilarityHashType::SSDeep => ("file.ssdeep", Some("fuzzy_hash_compare")),
335            SimilarityHashType::LZJD => ("file.lzjd", Some("lzjd_compare")),
336            SimilarityHashType::TLSH => ("file.tlsh", Some("tlsh_compare")),
337            SimilarityHashType::PEHash => ("executable.pehash", None),
338            SimilarityHashType::ImportHash => ("executable.importhash", None),
339            SimilarityHashType::FuzzyImportHash => {
340                ("executable.importhashfuzzy", Some("fuzzy_hash_compare"))
341            }
342        }
343    }
344}
345
346impl Display for SimilarityHashType {
347    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
348        match self {
349            SimilarityHashType::SSDeep => write!(f, "SSDeep"),
350            SimilarityHashType::LZJD => write!(f, "LZJD"),
351            SimilarityHashType::TLSH => write!(f, "TLSH"),
352            SimilarityHashType::PEHash => write!(f, "PeHash"),
353            SimilarityHashType::ImportHash => write!(f, "Import Hash (IMPHASH)"),
354            SimilarityHashType::FuzzyImportHash => write!(f, "Fuzzy Import hash"),
355        }
356    }
357}
358
359/// Requesting a sample from `MalwareDB` by similarity hash
360#[derive(Clone, Debug, Deserialize, Serialize)]
361pub struct SimilarSamplesRequest {
362    /// The hashes of the requested sample
363    pub hashes: Vec<(SimilarityHashType, String)>,
364}
365
366/// Relation between a similar sample and the hashes by which the sample is similar
367#[derive(Clone, Debug, Deserialize, Serialize)]
368pub struct SimilarSample {
369    /// The SHA-256 hash of the found sample
370    pub sha256: String,
371
372    /// Matches from the requested sample to this sample by algorithm and score
373    pub algorithms: Vec<(SimilarityHashType, f32)>,
374}
375
376/// Response indicating samples which are similar
377#[derive(Clone, Debug, Deserialize, Serialize)]
378pub struct SimilarSamplesResponse {
379    /// The responses
380    pub results: Vec<SimilarSample>,
381
382    /// Possible messages from the server, if any
383    pub message: Option<String>,
384}
385
386/// APU endpoint for searching for files with some criteria
387pub const SEARCH: &str = "/v1/search";
388
389/// Search for a file by some criteria
390/// Specifying both a hash and file name is an AND operation!
391#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
392pub struct SearchRequest {
393    /// Search for a file by partial hash
394    pub partial_hash: Option<(PartialHashSearchType, String)>,
395
396    /// Search for a file by whole or partial file name
397    pub file_name: Option<String>,
398
399    /// Maximum number of results to return, 100 results or less.
400    pub limit: u32,
401
402    /// Get the returned result by a hash type.
403    /// [`PartialHashSearchType::Any`] results in SHA-256
404    pub response: PartialHashSearchType,
405}
406
407impl SearchRequest {
408    /// Ensure the search request is valid:
409    /// * The partial hash is valid hexidecimal
410    /// * At least a file path or partial hash is provided
411    #[must_use]
412    #[inline]
413    pub fn is_valid(&self) -> bool {
414        if let Some((_hash_type, partial_hash)) = &self.partial_hash {
415            let hex = hex::decode(partial_hash);
416            return hex.is_ok();
417        }
418
419        (self.partial_hash.is_some() || self.file_name.is_some()) && self.limit > 0
420    }
421}
422
423impl Default for SearchRequest {
424    fn default() -> Self {
425        Self {
426            partial_hash: None,
427            file_name: None,
428            limit: 100,
429            response: PartialHashSearchType::default(),
430        }
431    }
432}
433
434/// Specify the type of hash when searching for a partial match
435#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
436pub enum PartialHashSearchType {
437    /// Search by any known hash type
438    Any,
439
440    /// Search only for MD5 hashes
441    MD5,
442
443    /// Search only for SHA-1 hashes
444    SHA1,
445
446    /// Search only for SHA-256 hashes
447    #[default]
448    SHA256,
449
450    /// Search only for SHA-384 hashes
451    SHA384,
452
453    /// Search only for SHA-512 hashes
454    SHA512,
455}
456
457impl Display for PartialHashSearchType {
458    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
459        match self {
460            PartialHashSearchType::Any => write!(f, "any"),
461            PartialHashSearchType::MD5 => write!(f, "md5"),
462            PartialHashSearchType::SHA1 => write!(f, "sha1"),
463            PartialHashSearchType::SHA256 => write!(f, "sha256"),
464            PartialHashSearchType::SHA384 => write!(f, "sha384"),
465            PartialHashSearchType::SHA512 => write!(f, "sha512"),
466        }
467    }
468}
469
470impl TryInto<PartialHashSearchType> for &str {
471    type Error = String;
472
473    fn try_into(self) -> Result<PartialHashSearchType, Self::Error> {
474        match self {
475            "any" => Ok(PartialHashSearchType::Any),
476            "md5" => Ok(PartialHashSearchType::MD5),
477            "sha1" => Ok(PartialHashSearchType::SHA1),
478            "sha256" => Ok(PartialHashSearchType::SHA256),
479            "sha384" => Ok(PartialHashSearchType::SHA384),
480            "sha512" => Ok(PartialHashSearchType::SHA512),
481            x => Err(format!("Invalid hash type {x}")),
482        }
483    }
484}
485
486impl TryInto<PartialHashSearchType> for Option<&str> {
487    type Error = String;
488
489    fn try_into(self) -> Result<PartialHashSearchType, Self::Error> {
490        if let Some(hash) = self {
491            hash.try_into()
492        } else {
493            Ok(PartialHashSearchType::SHA256)
494        }
495    }
496}
497
498/// API endpoint for finding samples which are similar to a specific file, `POST`
499pub const LIST_LABELS: &str = "/v1/labels";
500
501/// A label, used for sources and samples
502#[derive(Clone, Debug, Deserialize, Serialize)]
503pub struct Label {
504    /// Label ID
505    pub id: u64,
506
507    /// Label value
508    pub name: String,
509
510    /// Label parent
511    pub parent: Option<String>,
512}
513
514/// One or more labels
515#[derive(Clone, Debug, Default, Deserialize, Serialize)]
516pub struct Labels(pub Vec<Label>);
517
518// Convenience functions
519impl Labels {
520    /// Number of labels
521    #[must_use]
522    pub fn len(&self) -> usize {
523        self.0.len()
524    }
525
526    /// If the labels are empty
527    #[must_use]
528    pub fn is_empty(&self) -> bool {
529        self.0.is_empty()
530    }
531}
532
533impl Display for Labels {
534    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
535        if self.is_empty() {
536            return writeln!(f, "No labels.");
537        }
538        for label in &self.0 {
539            let parent = if let Some(parent) = &label.parent {
540                format!(", parent: {parent}")
541            } else {
542                String::new()
543            };
544            writeln!(f, "{}: {}{parent}", label.id, label.name)?;
545        }
546        Ok(())
547    }
548}