malwaredb_api/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![doc = include_str!("../README.md")]
4#![deny(missing_docs)]
5#![forbid(unsafe_code)]
6
7/// Wrapper for fixed-size hash digests from hex strings
8pub mod digest;
9
10use std::fmt::{Display, Formatter};
11
12use chrono::serde::ts_seconds_option;
13use chrono::{DateTime, Utc};
14use serde::{Deserialize, Serialize};
15use zeroize::{Zeroize, ZeroizeOnDrop};
16
17/// MDB version
18pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");
19
20/// Header key used to present the API key to the server
21pub const MDB_API_HEADER: &str = "mdb-api-key";
22
23/// Login API endpoint, POST
24pub const USER_LOGIN_URL: &str = "/v1/users/getkey";
25
26/// User logs in with username and password
27#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
28pub struct GetAPIKeyRequest {
29    /// Username
30    pub user: String,
31
32    /// User's password
33    pub password: String,
34}
35
36/// Logout API endpoint, GET, authenticated.
37pub const USER_LOGOUT_URL: &str = "/v1/users/clearkey";
38
39/// Response includes the key, if the credentials were correct,
40/// and possibly show a message related to errors or warnings.
41#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
42pub struct GetAPIKeyResponse {
43    /// User's API key if successful
44    pub key: Option<String>,
45
46    /// Error response
47    pub message: Option<String>,
48}
49
50/// User's get self information API endpoint, GET, authenticated
51/// User `EmptyAuthenticatingPost` to authenticate
52pub const USER_INFO_URL: &str = "/v1/users/info";
53
54/// User gets information about their account
55#[derive(Clone, Debug, Deserialize, Serialize)]
56pub struct GetUserInfoResponse {
57    /// User's numeric ID
58    pub id: i32,
59
60    /// User's name
61    pub username: String,
62
63    /// User's group memberships, if any
64    pub groups: Vec<String>,
65
66    /// User's available sample sources, if any
67    pub sources: Vec<String>,
68
69    /// If the user is an admin
70    pub is_admin: bool,
71
72    /// When the account was created
73    pub created: DateTime<Utc>,
74
75    /// User has read-only access, perhaps a guest or demo account
76    pub is_readonly: bool,
77}
78
79/// Server information, request is empty, GET, Unauthenticated.
80pub const SERVER_INFO: &str = "/v1/server/info";
81
82/// Information about the server
83#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
84pub struct ServerInfo {
85    /// Operating System used
86    pub os_name: String,
87
88    /// Memory footprint
89    pub memory_used: String,
90
91    /// MDB version
92    pub mdb_version: String,
93
94    /// Type and version of the database
95    pub db_version: String,
96
97    /// Size of the database on disk
98    pub db_size: String,
99
100    /// Total number of samples in MalwareDB
101    pub num_samples: u64,
102
103    /// Total users of MalwareDB
104    pub num_users: u32,
105
106    /// Uptime of MalwareDB in a human readable format
107    pub uptime: String,
108}
109
110/// File types supported by MalwareDB, request is empty, GET, Unauthenticated.
111pub const SUPPORTED_FILE_TYPES: &str = "/v1/server/types";
112
113/// One record of supported file types
114#[derive(Clone, Debug, Deserialize, Serialize)]
115pub struct SupportedFileType {
116    /// Common name of the file type
117    pub name: String,
118
119    /// Magic number bytes in hex of the file type
120    pub magic: Vec<String>,
121
122    /// Whether the file type is executable
123    pub is_executable: bool,
124
125    /// Description of the file type
126    pub description: Option<String>,
127}
128
129/// Server's supported types, the response
130#[derive(Clone, Debug, Deserialize, Serialize)]
131pub struct SupportedFileTypes {
132    /// Supported file types
133    pub types: Vec<SupportedFileType>,
134
135    /// Optional server messages
136    pub message: Option<String>,
137}
138
139/// Endpoint for the sources, per-user, GET, authenticated
140pub const LIST_SOURCES: &str = "/v1/sources/list";
141
142/// Source record
143#[derive(Clone, Debug, Deserialize, Serialize)]
144pub struct SourceInfo {
145    /// ID of the source
146    pub id: u32,
147
148    /// Name of the source
149    pub name: String,
150
151    /// Description of the source
152    pub description: Option<String>,
153
154    /// URL of the source, or where the files were found
155    pub url: Option<String>,
156
157    /// Creation date or first acquisition date of or from the source
158    pub first_acquisition: DateTime<Utc>,
159
160    /// Whether the source holds malware
161    pub malicious: Option<bool>,
162}
163
164/// Sources response for request for sources
165#[derive(Clone, Debug, Deserialize, Serialize)]
166pub struct Sources {
167    /// List of sources
168    pub sources: Vec<SourceInfo>,
169
170    /// Error message, if any
171    pub message: Option<String>,
172}
173
174/// API endpoint for uploading a sample, POST, Authenticated
175pub const UPLOAD_SAMPLE: &str = "/v1/samples/upload";
176
177/// New file sample being sent to MalwareDB
178#[derive(Clone, Debug, Deserialize, Serialize)]
179pub struct NewSample {
180    /// The original file name, might not be known
181    pub file_name: String,
182
183    /// ID of the source for this sample
184    pub source_id: u32,
185
186    /// Base64 encoding of the binary file
187    pub file_contents_b64: String,
188
189    /// SHA-256 of the sample being sent, for server-side validation
190    pub sha256: String,
191}
192
193/// API endpoint for downloading a sample, GET. The hash value goes at the end of the URL.
194/// For example: /v1/samples/download/aabbccddeeff0011223344556677889900
195/// Response is raw bytes of the file, or HTTP 404 if not found
196pub const DOWNLOAD_SAMPLE: &str = "/v1/samples/download";
197
198/// API endpoint for downloading a sample as a CaRT file, GET
199/// For example: /v1/samples/download/cart/aabbccddeeff0011223344556677889900
200/// Response is the file encoded in a CaRT container file, or HTTP 404 if not found
201pub const DOWNLOAD_SAMPLE_CART: &str = "/v1/samples/download/cart";
202
203/// API endpoint to get a report for a given sample
204/// For example: /v1/samples/report/aabbccddeeff0011223344556677889900
205pub const SAMPLE_REPORT: &str = "/v1/samples/report";
206
207/// VirusTotal hits summary
208#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
209pub struct VirusTotalSummary {
210    /// Anti-Virus products which identified the sample as malicious
211    pub hits: u32,
212
213    /// Anti-Virus products available when last analyzed
214    pub total: u32,
215
216    /// Hit details in json format, if available
217    #[serde(default)]
218    pub detail: Option<serde_json::Value>,
219
220    /// Date of most recent analysis
221    #[serde(default, with = "ts_seconds_option")]
222    pub last_analysis_date: Option<DateTime<Utc>>,
223}
224
225// TODO: Add sections for parsed fields for documents, executables
226/// All the data for a sample known to MalwareDB
227#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
228pub struct Report {
229    ///MD5 hash
230    pub md5: String,
231
232    /// SHA-1 hash
233    pub sha1: String,
234
235    /// SHA-256 hash
236    pub sha256: String,
237
238    /// SHA-384 hash
239    pub sha384: String,
240
241    /// SHA-512 hash
242    pub sha512: String,
243
244    /// LZJD similarity hash, if available
245    /// <https://github.com/EdwardRaff/LZJD>
246    pub lzjd: Option<String>,
247
248    /// TLSH similarity hash, if available
249    /// <https://github.com/trendmicro/tlsh>
250    pub tlsh: Option<String>,
251
252    /// SSDeep similarity hash, if available
253    /// <https://ssdeep-project.github.io/ssdeep/index.html>
254    pub ssdeep: Option<String>,
255
256    /// Human hash
257    /// <https://github.com/zacharyvoase/humanhash>
258    pub humanhash: Option<String>,
259
260    /// The output from libmagic, aka the `file` command
261    /// <https://man7.org/linux/man-pages/man3/libmagic.3.html>
262    pub filecommand: Option<String>,
263
264    /// Sample size in bytes
265    pub bytes: u32,
266
267    /// Sample size in human-readable size (2048 becomes 2 kb, for example)
268    pub size: String,
269
270    /// Entropy of the file, values over 6.5 may indicate compression or encryption
271    pub entropy: f32,
272
273    /// VirusTotal summary data, if enabled on the server
274    /// <https://www.virustotal.com>
275    #[serde(default)]
276    pub vt: Option<VirusTotalSummary>,
277}
278
279impl Display for Report {
280    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
281        writeln!(f, "Size: {} bytes, or {}", self.bytes, self.size)?;
282        writeln!(f, "Entropy: {}", self.entropy)?;
283        if let Some(filecmd) = &self.filecommand {
284            writeln!(f, "File command: {filecmd}")?;
285        }
286        if let Some(vt) = &self.vt {
287            writeln!(f, "VT Hits: {}/{}", vt.hits, vt.total)?;
288        }
289        writeln!(f, "MD5: {}", self.md5)?;
290        writeln!(f, "SHA-1: {}", self.sha1)?;
291        writeln!(f, "SHA256: {}", self.sha256)
292    }
293}
294
295/// API endpoint for finding samples which are similar to specific file, POST, Authenticated.
296pub const SIMILAR_SAMPLES: &str = "/v1/samples/similar";
297
298/// The hash by which a sample is identified
299#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)]
300#[non_exhaustive]
301pub enum SimilarityHashType {
302    /// SSDeep similarity of the whole file
303    SSDeep,
304
305    /// LZJD similarity of the whole file
306    LZJD,
307
308    /// TLSH similarity of the hole file
309    TLSH,
310
311    /// PEHash, for PE32 files
312    PEHash,
313
314    /// Import Hash for executable files
315    ImportHash,
316
317    /// SSDeep fuzzy hash of the import data, for executable files
318    FuzzyImportHash,
319}
320
321impl SimilarityHashType {
322    /// For a similarity hash type, return:
323    /// * The database table & field which stores the hash
324    /// * If applicable, the similarity hash function (Postgres extension) which calculates the similarity
325    pub fn get_table_field_simfunc(&self) -> (&'static str, Option<&'static str>) {
326        match self {
327            SimilarityHashType::SSDeep => ("file.ssdeep", Some("fuzzy_hash_compare")),
328            SimilarityHashType::LZJD => ("file.lzjd", Some("lzjd_compare")),
329            SimilarityHashType::TLSH => ("file.tlsh", Some("tlsh_compare")),
330            SimilarityHashType::PEHash => ("executable.pehash", None),
331            SimilarityHashType::ImportHash => ("executable.importhash", None),
332            SimilarityHashType::FuzzyImportHash => {
333                ("executable.importhashfuzzy", Some("fuzzy_hash_compare"))
334            }
335        }
336    }
337}
338
339impl Display for SimilarityHashType {
340    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
341        match self {
342            SimilarityHashType::SSDeep => write!(f, "SSDeep"),
343            SimilarityHashType::LZJD => write!(f, "LZJD"),
344            SimilarityHashType::TLSH => write!(f, "TLSH"),
345            SimilarityHashType::PEHash => write!(f, "PeHash"),
346            SimilarityHashType::ImportHash => write!(f, "Import Hash (IMPHASH)"),
347            SimilarityHashType::FuzzyImportHash => write!(f, "Fuzzy Import hash"),
348        }
349    }
350}
351
352/// Requesting a sample from MalwareDB by similarity hash
353#[derive(Clone, Debug, Deserialize, Serialize)]
354pub struct SimilarSamplesRequest {
355    /// The hashes of the requested sample
356    pub hashes: Vec<(SimilarityHashType, String)>,
357}
358
359/// Relation between a similar sample and the hashes by which the sample is similar
360#[derive(Clone, Debug, Deserialize, Serialize)]
361pub struct SimilarSample {
362    /// The SHA-256 hash of the found sample
363    pub sha256: String,
364
365    /// Matches from the requested sample to this sample by algorithm and score
366    pub algorithms: Vec<(SimilarityHashType, f32)>,
367}
368
369/// Response indicating samples which are similar
370#[derive(Clone, Debug, Deserialize, Serialize)]
371pub struct SimilarSamplesResponse {
372    /// The responses
373    pub results: Vec<SimilarSample>,
374
375    /// Possible messages from the server, if any
376    pub message: Option<String>,
377}
378
379/// API endpoint for finding samples which are similar to specific file, POST
380pub const LIST_LABELS: &str = "/v1/labels";
381
382/// A label, used for sources and samples
383#[derive(Clone, Debug, Deserialize, Serialize)]
384pub struct Label {
385    /// Label ID
386    pub id: u64,
387
388    /// Label value
389    pub name: String,
390
391    /// Label parent
392    pub parent: Option<String>,
393}
394
395/// One or more labels
396#[derive(Clone, Debug, Default, Deserialize, Serialize)]
397pub struct Labels(pub Vec<Label>);
398
399// Convenience functions
400impl Labels {
401    /// Number of labels
402    pub fn len(&self) -> usize {
403        self.0.len()
404    }
405
406    /// If the labels are empty
407    pub fn is_empty(&self) -> bool {
408        self.0.is_empty()
409    }
410}
411
412impl Display for Labels {
413    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
414        if self.is_empty() {
415            return writeln!(f, "No labels.");
416        }
417        for label in &self.0 {
418            let parent = if let Some(parent) = &label.parent {
419                format!(", parent: {parent}")
420            } else {
421                String::new()
422            };
423            writeln!(f, "{}: {}{parent}", label.id, label.name)?;
424        }
425        Ok(())
426    }
427}