malwaredb_api/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2
3#![doc = include_str!("../README.md")]
4#![deny(missing_docs)]
5#![deny(clippy::all)]
6#![deny(clippy::pedantic)]
7#![forbid(unsafe_code)]
8
9/// Wrapper for fixed-size cryptographic hash digests from hex strings
10pub mod digest;
11
12use std::error::Error;
13use std::fmt::{Display, Formatter};
14
15use chrono::serde::ts_seconds_option;
16use chrono::{DateTime, Utc};
17use serde::{Deserialize, Serialize};
18use zeroize::{Zeroize, ZeroizeOnDrop};
19
20/// MDB version
21pub const MDB_VERSION: &str = env!("CARGO_PKG_VERSION");
22
23/// HTTP header used to present the API key to the server
24pub const MDB_API_HEADER: &str = "mdb-api-key";
25
26/// Authentication endpoint, POST
27pub const USER_LOGIN_URL: &str = "/v1/users/getkey";
28
29/// Endpoint name for use with Multicast DNS
30pub const MDNS_NAME: &str = "_malwaredb._tcp.local.";
31
32/// User authentication with username and password to get the API key
33#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
34pub struct GetAPIKeyRequest {
35    /// Username
36    pub user: String,
37
38    /// User's password
39    pub password: String,
40}
41
42/// Logout API endpoint to clear their API key, GET, authenticated.
43pub const USER_LOGOUT_URL: &str = "/v1/users/clearkey";
44
45/// Respond to authentication with the key if the credentials were correct,
46/// and possibly show a message related to errors or warnings.
47#[derive(Deserialize, Serialize, Zeroize, ZeroizeOnDrop)]
48pub struct GetAPIKeyResponse {
49    /// User's API key if successful
50    pub key: String,
51
52    /// Error response
53    pub message: Option<String>,
54}
55
56/// For request types, wrap in this struct to handle some error conditions
57///
58/// All API endpoints use this response format EXCEPT:
59///   * [`USER_LOGOUT_URL`]
60///   * [`UPLOAD_SAMPLE_JSON_URL`]
61///   * [`UPLOAD_SAMPLE_CBOR_URL`]
62///   * [`DOWNLOAD_SAMPLE_URL`]
63///   * [`DOWNLOAD_SAMPLE_CART_URL`]
64#[derive(Clone, Debug, Deserialize, Serialize)]
65pub enum ServerResponse<D> {
66    /// Request successful
67    #[serde(alias = "success")]
68    Success(D),
69
70    /// Request unsuccessful
71    #[serde(alias = "error")]
72    Error(ServerError),
73}
74
75impl<D> ServerResponse<D> {
76    /// Unwrap a server response
77    ///
78    /// # Panics
79    ///
80    /// Will panic if the response is an error
81    #[inline]
82    pub fn unwrap(self) -> D {
83        match self {
84            ServerResponse::Success(d) => d,
85            ServerResponse::Error(e) => panic!("forced ServerResponse::unwrap() on error: {e}"),
86        }
87    }
88
89    /// Convert the server response into a traditional [`Result`] type
90    ///
91    /// # Errors
92    ///
93    /// The return error is the server error, if present
94    #[inline]
95    pub fn into_result(self) -> Result<D, ServerError> {
96        match self {
97            ServerResponse::Success(d) => Ok(d),
98            ServerResponse::Error(e) => Err(e),
99        }
100    }
101
102    /// If the server response was successful
103    #[inline]
104    pub const fn is_successful(&self) -> bool {
105        matches!(*self, ServerResponse::Success(_))
106    }
107
108    /// If the server response was not successful
109    #[inline]
110    pub const fn is_err(&self) -> bool {
111        matches!(*self, ServerResponse::Error(_))
112    }
113}
114
115/// Server error responses
116#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Hash)]
117pub enum ServerError {
118    /// The server was asked for samples but doesn't store them
119    NoSamples,
120
121    /// The requested item was not found or the search yielded no results
122    NotFound,
123
124    /// Internal server error, details not disclosed to the client
125    ServerError,
126
127    /// Unauthorized: API key was not provided, or the user doesn't have access to the requested item(s)
128    Unauthorized,
129}
130
131impl Display for ServerError {
132    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
133        match self {
134            ServerError::NoSamples => write!(f, "NoSamples"),
135            ServerError::NotFound => write!(f, "NotFound"),
136            ServerError::ServerError => write!(f, "ServerError"),
137            ServerError::Unauthorized => write!(f, "Unauthorized"),
138        }
139    }
140}
141
142impl Error for ServerError {}
143
144/// User's account information API endpoint, GET, authenticated
145pub const USER_INFO_URL: &str = "/v1/users/info";
146
147/// User account information
148#[derive(Clone, Debug, Deserialize, Serialize)]
149pub struct GetUserInfoResponse {
150    /// User's numeric ID
151    pub id: u32,
152
153    /// User's name
154    pub username: String,
155
156    /// User's group memberships, if any
157    pub groups: Vec<String>,
158
159    /// User's available sample sources, if any
160    pub sources: Vec<String>,
161
162    /// If the user is an admin
163    pub is_admin: bool,
164
165    /// When the account was created
166    pub created: DateTime<Utc>,
167
168    /// User has read-only access, perhaps a guest or demo account
169    pub is_readonly: bool,
170}
171
172/// Server information, request is empty, GET, Unauthenticated.
173pub const SERVER_INFO_URL: &str = "/v1/server/info";
174
175/// Information about the server
176#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
177pub struct ServerInfo {
178    /// Operating System used
179    pub os_name: String,
180
181    /// Memory footprint
182    pub memory_used: String,
183
184    /// MDB version
185    pub mdb_version: semver::Version,
186
187    /// Type and version of the database
188    pub db_version: String,
189
190    /// Size of the database on disk
191    pub db_size: String,
192
193    /// Total number of samples in Malware DB
194    pub num_samples: u64,
195
196    /// Total users of Malware DB
197    pub num_users: u32,
198
199    /// Uptime of Malware DB in a human-readable format
200    pub uptime: String,
201
202    /// The name of the Malware DB instance
203    pub instance_name: String,
204}
205
206/// File types supported by Malware DB, request is empty, GET, Unauthenticated.
207pub const SUPPORTED_FILE_TYPES_URL: &str = "/v1/server/types";
208
209/// One record of supported file types
210#[derive(Clone, Debug, Deserialize, Serialize)]
211pub struct SupportedFileType {
212    /// Common name of the file type
213    pub name: String,
214
215    /// Magic number bytes in hex of the file type
216    pub magic: Vec<String>,
217
218    /// Whether the file type is executable
219    pub is_executable: bool,
220
221    /// Description of the file type
222    pub description: Option<String>,
223}
224
225/// Server's supported types, the response
226#[derive(Clone, Debug, Deserialize, Serialize)]
227pub struct SupportedFileTypes {
228    /// Supported file types
229    pub types: Vec<SupportedFileType>,
230
231    /// Optional server messages
232    pub message: Option<String>,
233}
234
235/// Endpoint for the sources, per-user, GET, authenticated
236pub const LIST_SOURCES_URL: &str = "/v1/sources/list";
237
238/// Information about a sample source
239#[derive(Clone, Debug, Deserialize, Serialize)]
240pub struct SourceInfo {
241    /// ID of the source
242    pub id: u32,
243
244    /// Name of the source
245    pub name: String,
246
247    /// Description of the source
248    pub description: Option<String>,
249
250    /// URL of the source, or where the files were found
251    pub url: Option<String>,
252
253    /// Creation date or first acquisition date of or from the source
254    pub first_acquisition: DateTime<Utc>,
255
256    /// Whether the source holds malware
257    pub malicious: Option<bool>,
258}
259
260/// Sources response for request for sources
261#[derive(Clone, Debug, Deserialize, Serialize)]
262pub struct Sources {
263    /// List of sources
264    pub sources: Vec<SourceInfo>,
265
266    /// Error message, if any
267    pub message: Option<String>,
268}
269
270/// API endpoint for uploading a sample with JSON, POST, Authenticated
271pub const UPLOAD_SAMPLE_JSON_URL: &str = "/v1/samples/json/upload";
272
273/// API endpoint for uploading a sample with CBOR, POST, Authenticated
274pub const UPLOAD_SAMPLE_CBOR_URL: &str = "/v1/samples/cbor/upload";
275
276/// New file sample being sent to Malware DB via [`UPLOAD_SAMPLE_JSON_URL`]
277#[derive(Clone, Debug, Deserialize, Serialize)]
278pub struct NewSampleB64 {
279    /// The original file name, which might not be known. If it's not known,
280    /// use a hash or something like "unknown.bin".
281    pub file_name: String,
282
283    /// ID of the source for this sample
284    pub source_id: u32,
285
286    /// Base64 encoding of the binary file
287    pub file_contents_b64: String,
288
289    /// SHA-256 of the sample being sent, for server-side validation
290    pub sha256: String,
291}
292
293/// New file sample being sent to Malware DB via [`UPLOAD_SAMPLE_CBOR_URL`]
294#[derive(Clone, Debug, Deserialize, Serialize)]
295pub struct NewSampleBytes {
296    /// The original file name, which might not be known. If it's not known,
297    /// use a hash or something like "unknown.bin".
298    pub file_name: String,
299
300    /// ID of the source for this sample
301    pub source_id: u32,
302
303    /// Raw binary contents
304    pub file_contents: Vec<u8>,
305
306    /// SHA-256 of the sample being sent, for server-side validation
307    pub sha256: String,
308}
309
310/// API endpoint for downloading a sample, GET. The hash value goes at the end of the URL.
311/// Example: `/v1/samples/download/aabbccddeeff0011223344556677889900`
312/// Response is raw bytes of the file, or HTTP 404 if not found
313pub const DOWNLOAD_SAMPLE_URL: &str = "/v1/samples/download";
314
315/// API endpoint for downloading a sample as a `CaRT` container file, GET
316/// Example: `/v1/samples/download/cart/aabbccddeeff0011223344556677889900`
317/// Response is the file encoded in a `CaRT` container file, or HTTP 404 if not found
318pub const DOWNLOAD_SAMPLE_CART_URL: &str = "/v1/samples/download/cart";
319
320/// API endpoint to get a report for a given sample
321/// Example: `/v1/samples/report/aabbccddeeff0011223344556677889900`
322pub const SAMPLE_REPORT_URL: &str = "/v1/samples/report";
323
324/// Virus Total hits summary for a specific sample
325#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
326pub struct VirusTotalSummary {
327    /// Anti-Virus products which identified the sample as malicious
328    pub hits: u32,
329
330    /// Anti-Virus products available when last analyzed
331    pub total: u32,
332
333    /// Hit details in JSON format, if available
334    #[serde(default)]
335    pub detail: Option<serde_json::Value>,
336
337    /// Most recent analysis date, if available
338    #[serde(default, with = "ts_seconds_option")]
339    pub last_analysis_date: Option<DateTime<Utc>>,
340}
341
342// TODO: Add sections for parsed fields for documents, executables
343/// Information for an individual sample
344#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
345pub struct Report {
346    /// MD-5 hash
347    pub md5: String,
348
349    /// SHA-1 hash
350    pub sha1: String,
351
352    /// SHA-256 hash
353    pub sha256: String,
354
355    /// SHA-384 hash
356    pub sha384: String,
357
358    /// SHA-512 hash
359    pub sha512: String,
360
361    /// LZJD similarity hash, if available
362    /// <https://github.com/EdwardRaff/LZJD>
363    pub lzjd: Option<String>,
364
365    /// TLSH similarity hash, if available
366    /// <https://github.com/trendmicro/tlsh>
367    pub tlsh: Option<String>,
368
369    /// `SSDeep` similarity hash, if available
370    /// <https://ssdeep-project.github.io/ssdeep/index.html>
371    pub ssdeep: Option<String>,
372
373    /// Human hash
374    /// <https://github.com/zacharyvoase/humanhash>
375    pub humanhash: Option<String>,
376
377    /// The output from libmagic, aka the `file` command
378    /// <https://man7.org/linux/man-pages/man3/libmagic.3.html>
379    pub filecommand: Option<String>,
380
381    /// Sample size in bytes
382    pub bytes: u64,
383
384    /// Sample size in human-readable size (2048 becomes 2 kb, for example)
385    pub size: String,
386
387    /// Entropy of the file, values over 6.5 may indicate compression or encryption
388    pub entropy: f32,
389
390    /// Virus Total summary data, if enabled on the server
391    /// <https://www.virustotal.com>
392    #[serde(default)]
393    pub vt: Option<VirusTotalSummary>,
394}
395
396impl Display for Report {
397    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
398        writeln!(f, "Size: {} bytes, or {}", self.bytes, self.size)?;
399        writeln!(f, "Entropy: {}", self.entropy)?;
400        if let Some(filecmd) = &self.filecommand {
401            writeln!(f, "File command: {filecmd}")?;
402        }
403        if let Some(vt) = &self.vt {
404            writeln!(f, "VT Hits: {}/{}", vt.hits, vt.total)?;
405        }
406        writeln!(f, "MD5: {}", self.md5)?;
407        writeln!(f, "SHA-1: {}", self.sha1)?;
408        writeln!(f, "SHA256: {}", self.sha256)
409    }
410}
411
412/// API endpoint for finding samples which are similar to a specific file, POST, Authenticated.
413pub const SIMILAR_SAMPLES_URL: &str = "/v1/samples/similar";
414
415/// The hash by which a sample is identified
416#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)]
417#[non_exhaustive]
418pub enum SimilarityHashType {
419    /// `SSDeep` similarity of the whole file
420    SSDeep,
421
422    /// `LZJD` similarity of the whole file
423    LZJD,
424
425    /// TLSH similarity of the hole file
426    TLSH,
427
428    /// `PEHash`, for PE32 files
429    PEHash,
430
431    /// Import Hash for executable files
432    ImportHash,
433
434    /// `SSDeep` fuzzy hash of the import data, for executable files
435    FuzzyImportHash,
436}
437
438impl SimilarityHashType {
439    /// For a similarity hash type, return:
440    /// * The database table and field which stores the hash
441    /// * If applicable, the similarity hash function which calculates the similarity
442    #[must_use]
443    pub fn get_table_field_simfunc(&self) -> (&'static str, Option<&'static str>) {
444        match self {
445            SimilarityHashType::SSDeep => ("file.ssdeep", Some("fuzzy_hash_compare")),
446            SimilarityHashType::LZJD => ("file.lzjd", Some("lzjd_compare")),
447            SimilarityHashType::TLSH => ("file.tlsh", Some("tlsh_compare")),
448            SimilarityHashType::PEHash => ("executable.pehash", None),
449            SimilarityHashType::ImportHash => ("executable.importhash", None),
450            SimilarityHashType::FuzzyImportHash => {
451                ("executable.importhashfuzzy", Some("fuzzy_hash_compare"))
452            }
453        }
454    }
455}
456
457impl Display for SimilarityHashType {
458    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
459        match self {
460            SimilarityHashType::SSDeep => write!(f, "SSDeep"),
461            SimilarityHashType::LZJD => write!(f, "LZJD"),
462            SimilarityHashType::TLSH => write!(f, "TLSH"),
463            SimilarityHashType::PEHash => write!(f, "PeHash"),
464            SimilarityHashType::ImportHash => write!(f, "Import Hash (IMPHASH)"),
465            SimilarityHashType::FuzzyImportHash => write!(f, "Fuzzy Import hash"),
466        }
467    }
468}
469
470/// Requesting hashes of possible similar samples by similarity hash
471#[derive(Clone, Debug, Deserialize, Serialize)]
472pub struct SimilarSamplesRequest {
473    /// The hashes of the requested sample
474    pub hashes: Vec<(SimilarityHashType, String)>,
475}
476
477/// Relation between a similar sample and the hashes by which the sample is similar
478#[derive(Clone, Debug, Deserialize, Serialize)]
479pub struct SimilarSample {
480    /// The SHA-256 hash of the found sample
481    pub sha256: String,
482
483    /// Matches from the requested sample to this sample by algorithm and score
484    pub algorithms: Vec<(SimilarityHashType, f32)>,
485}
486
487/// Response indicating samples which are similar
488#[derive(Clone, Debug, Deserialize, Serialize)]
489pub struct SimilarSamplesResponse {
490    /// The responses
491    pub results: Vec<SimilarSample>,
492
493    /// Possible messages from the server, if any
494    pub message: Option<String>,
495}
496
497/// APU endpoint for searching for files with some criteria
498pub const SEARCH_URL: &str = "/v1/search";
499
500/// Searching the next batch from a prior search, or the initial search
501#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
502pub enum SearchType {
503    /// The next batch of results from a prior search
504    Continuation(uuid::Uuid),
505
506    /// The initial search
507    Search(SearchRequestParameters),
508}
509
510/// Search for a file by some criteria, all of which are an AND operation:
511/// * Partial hash
512/// * Name of the sample
513/// * Type of the sample
514/// * Libmagic description of the sample
515/// * Labels applied to the sample
516#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
517pub struct SearchRequestParameters {
518    /// Search for a file by partial hash
519    pub partial_hash: Option<(PartialHashSearchType, String)>,
520
521    /// Search for a file by whole or partial file name
522    pub file_name: Option<String>,
523
524    /// Maximum number of results to return, 100 results or fewer.
525    pub limit: u32,
526
527    /// Optionally search for samples of a specific file type.
528    pub file_type: Option<String>,
529
530    /// Optionally search for samples based on `libmagic`, also known as the file command.
531    pub magic: Option<String>,
532
533    /// Optionally search for samples with specific label(s).
534    pub labels: Option<Vec<String>>,
535
536    /// Get the returned result by a hash type.
537    /// [`PartialHashSearchType::Any`] results in SHA-256
538    pub response: PartialHashSearchType,
539}
540
541impl SearchRequestParameters {
542    /// Ensure the search request is valid:
543    /// * The partial hash is valid hexidecimal, if present
544    /// * At least one search parameter is provided
545    /// * The limit must be greater than zero
546    #[must_use]
547    #[inline]
548    pub fn is_valid(&self) -> bool {
549        if self.limit == 0 {
550            return false;
551        }
552
553        if let Some((_hash_type, partial_hash)) = &self.partial_hash {
554            let hex = hex::decode(partial_hash);
555            return hex.is_ok();
556        }
557
558        self.partial_hash.is_some()
559            || self.file_name.is_some()
560            || self.file_type.is_some()
561            || self.magic.is_some()
562            || self.labels.is_some()
563    }
564}
565
566/// This trait implementation is provided as a convenience. This does not create a valid object.
567impl Default for SearchRequestParameters {
568    fn default() -> Self {
569        Self {
570            partial_hash: None,
571            file_name: None,
572            limit: 100,
573            labels: None,
574            file_type: None,
575            magic: None,
576            response: PartialHashSearchType::default(),
577        }
578    }
579}
580
581/// Search for a file by some criteria
582/// Specifying both a hash and file name is an AND operation!
583#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
584pub struct SearchRequest {
585    /// Search or continuation of a search
586    pub search: SearchType,
587}
588
589impl SearchRequest {
590    /// Ensure the search request is valid:
591    /// * The partial hash is valid hexidecimal
592    /// * At least a file path or partial hash is provided
593    #[must_use]
594    #[inline]
595    pub fn is_valid(&self) -> bool {
596        if let SearchType::Search(search) = &self.search {
597            search.is_valid()
598        } else {
599            true
600        }
601    }
602}
603
604/// Search result
605#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
606pub struct SearchResponse {
607    /// Hashes of samples which match the search criteria
608    pub hashes: Vec<String>,
609
610    /// Identifier for getting the next batch of results
611    pub pagination: Option<uuid::Uuid>,
612
613    /// The total number of samples which match the search criteria
614    pub total_results: u64,
615
616    /// Optional server messages
617    pub message: Option<String>,
618}
619
620/// Specify the type of hash when searching for a partial match
621#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
622pub enum PartialHashSearchType {
623    /// Search by any known hash type
624    Any,
625
626    /// Search only for MD5 hashes
627    MD5,
628
629    /// Search only for SHA-1 hashes
630    SHA1,
631
632    /// Search only for SHA-256 hashes
633    #[default]
634    SHA256,
635
636    /// Search only for SHA-384 hashes
637    SHA384,
638
639    /// Search only for SHA-512 hashes
640    SHA512,
641}
642
643impl Display for PartialHashSearchType {
644    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
645        match self {
646            PartialHashSearchType::Any => write!(f, "any"),
647            PartialHashSearchType::MD5 => write!(f, "md5"),
648            PartialHashSearchType::SHA1 => write!(f, "sha1"),
649            PartialHashSearchType::SHA256 => write!(f, "sha256"),
650            PartialHashSearchType::SHA384 => write!(f, "sha384"),
651            PartialHashSearchType::SHA512 => write!(f, "sha512"),
652        }
653    }
654}
655
656impl TryInto<PartialHashSearchType> for &str {
657    type Error = String;
658
659    fn try_into(self) -> Result<PartialHashSearchType, Self::Error> {
660        match self {
661            "any" => Ok(PartialHashSearchType::Any),
662            "md5" => Ok(PartialHashSearchType::MD5),
663            "sha1" => Ok(PartialHashSearchType::SHA1),
664            "sha256" => Ok(PartialHashSearchType::SHA256),
665            "sha384" => Ok(PartialHashSearchType::SHA384),
666            "sha512" => Ok(PartialHashSearchType::SHA512),
667            x => Err(format!("Invalid hash type {x}")),
668        }
669    }
670}
671
672impl TryInto<PartialHashSearchType> for Option<&str> {
673    type Error = String;
674
675    fn try_into(self) -> Result<PartialHashSearchType, Self::Error> {
676        if let Some(hash) = self {
677            hash.try_into()
678        } else {
679            Ok(PartialHashSearchType::SHA256)
680        }
681    }
682}
683
684/// API endpoint for finding samples which are similar to a specific file, POST
685pub const LIST_LABELS_URL: &str = "/v1/labels";
686
687/// A label, used for describing sources and/or samples
688#[derive(Clone, Debug, Deserialize, Serialize)]
689pub struct Label {
690    /// Label ID
691    pub id: u64,
692
693    /// Label value
694    pub name: String,
695
696    /// Label parent
697    pub parent: Option<String>,
698}
699
700/// One or more available labels
701#[derive(Clone, Debug, Default, Deserialize, Serialize)]
702pub struct Labels(pub Vec<Label>);
703
704// Convenience functions
705impl Labels {
706    /// Number of labels
707    #[must_use]
708    pub fn len(&self) -> usize {
709        self.0.len()
710    }
711
712    /// If the labels are empty
713    #[must_use]
714    pub fn is_empty(&self) -> bool {
715        self.0.is_empty()
716    }
717}
718
719impl Display for Labels {
720    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
721        if self.is_empty() {
722            return writeln!(f, "No labels.");
723        }
724        for label in &self.0 {
725            let parent = if let Some(parent) = &label.parent {
726                format!(", parent: {parent}")
727            } else {
728                String::new()
729            };
730            writeln!(f, "{}: {}{parent}", label.id, label.name)?;
731        }
732        Ok(())
733    }
734}