malwaredb-virustotal 0.2.0

Logic and datatypes for interacting with VirusTotal
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
/// Report details for a Linux/Unix/BSD file
pub mod elf;

/// Report details for a Mach-O file
pub mod macho;

/// Report details for a PE32 file
pub mod pe;

use crate::VirusTotalError;

use chrono::serde::{ts_seconds, ts_seconds_option};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// File report response, which could return data (success confirmation) or an error message
#[allow(clippy::large_enum_variant)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum FileReportRequestResponse {
    /// Information about the report request
    #[serde(rename = "data")]
    Data(FileReportData),

    /// Error message, file report request not successful
    #[serde(rename = "error")]
    Error(VirusTotalError),
}

/// Successful file report request response contents
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FileReportData {
    /// The file report details, the interesting part
    pub attributes: ScanResultAttributes,

    /// Report type, probably "file"
    #[serde(rename = "type")]
    pub record_type: String,

    /// Report ID, also the file's SHA-256 hash
    pub id: String,

    /// Link to the file report
    pub links: HashMap<String, String>,
}

/// All scan results
/// [https://virustotal.readme.io/reference/files]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ScanResultAttributes {
    /// When the file was created, often spoofed by malware
    #[serde(default, with = "ts_seconds_option")]
    pub creation_date: Option<DateTime<Utc>>,

    /// List of tags related to the file's capabilities
    /// Requires VirusTotal Premium
    pub capabilities_tags: Option<Vec<String>>,

    /// Extracted malware configuration
    /// Requires VirusTotal Premium
    pub malware_config: Option<HashMap<String, String>>,

    /// A description of the file type
    pub type_description: String,

    /// Trend Micro's Locality Sensitive Hash: [https://tlsh.org/]
    pub tlsh: Option<String>,

    /// VirusTotal's custom algorithm for clustering similar files
    pub vhash: Option<String>,

    /// Trend Micro's ELF hash
    pub telfhash: Option<String>,

    /// Tags which may show further details of the file type
    pub type_tags: Vec<String>,

    /// Additional attribute tags
    #[serde(default)]
    pub tags: Vec<String>,

    /// File names this sample has had when submitted to VirusTotal
    pub names: Vec<String>,

    /// When the file was last modified
    #[serde(with = "ts_seconds")]
    pub last_modification_date: DateTime<Utc>,

    /// Another first seen field
    #[serde(default, with = "ts_seconds_option")]
    pub first_seen_itw_date: Option<DateTime<Utc>>,

    /// Type tags which can be used with VirusTotal Intelligence
    pub type_tag: String,

    /// The number of times the file has been submitted to VirusTotal
    pub times_submitted: u32,

    /// Votes from the VirusTotal user community whether the file is dangerous
    pub total_votes: Votes,

    /// Size of the file, in bytes
    pub size: u64,

    /// Community votes as to the nature of the thread of this file
    pub popular_threat_classification: Option<PopularThreatClassification>,

    /// When the file was last submitted to VirusTotal
    #[serde(with = "ts_seconds")]
    pub last_submission_date: DateTime<Utc>,

    /// Antivirus results, where the key is the name of the antivirus software product
    /// More info: https://docs.virustotal.com/reference/analyses-object
    pub last_analysis_results: HashMap<String, AnalysisResult>,

    /// Results from TrID, an attempt to identify the file type
    /// See https://mark0.net/soft-trid-e.html
    pub trid: Option<Vec<TrID>>,

    /// Another file type detection program
    pub detectiteasy: Option<DetectItEasy>,

    /// SHA-256 hash of the file
    pub sha256: String,

    /// File extension for this file type
    pub type_extension: Option<String>,

    /// When the file was last analyzed by VirusTotal
    #[serde(with = "ts_seconds")]
    pub last_analysis_date: DateTime<Utc>,

    /// The number of unique sources which have submitted this file
    pub unique_sources: u32,

    /// When the file was first submitted to VirusTotal
    #[serde(with = "ts_seconds")]
    pub first_submission_date: DateTime<Utc>,

    /// MD-5 hash of the file
    pub md5: String,

    /// SSDeep fuzzy hash of the file
    /// See [https://ssdeep-project.github.io/ssdeep/index.html]
    pub ssdeep: String,

    /// SHA-1 of the file
    pub sha1: String,

    /// The output from libmagic, the `file` command for this file
    pub magic: String,

    /// Antivirus results summary
    pub last_analysis_stats: LastAnalysisStats,

    /// Dictionary containing the number of matched Sigma rules group by its severity
    /// [https://blog.virustotal.com/2021/05/context-is-king-part-i-crowdsourced.html]
    /// [https://virustotal.readme.io/docs/crowdsourced-sigma-rules]
    #[serde(default)]
    pub sigma_analysis_summary: HashMap<String, serde_json::Value>,

    /// Sigma results, if available
    /// [https://blog.virustotal.com/2021/05/context-is-king-part-i-crowdsourced.html]
    /// [https://virustotal.readme.io/docs/crowdsourced-sigma-rules]
    #[serde(default)]
    pub sigma_analysis_stats: Option<SigmaAnalysisStats>,

    /// Results from VT's Sigma rules
    /// See [https://github.com/SigmaHQ/sigma/wiki/Rule-Creation-Guide]
    #[serde(default)]
    pub sigma_analysis_results: Vec<SigmaAnalysisResults>,

    /// Executables: Information on packers, if available
    #[serde(default)]
    pub packers: HashMap<String, String>,

    /// The most interesting name of all the file names used with this file
    pub meaningful_name: String,

    /// The file's reputation from all votes,
    /// see [https://support.virustotal.com/hc/en-us/articles/115002146769-Vote-comment]
    pub reputation: u32,

    /// Mach-O details, if a Mach-O file (macOS, iOS, etc)
    /// This is a vector since there is a separate [macho::MachInfo] struct per
    /// each architecture if this is a Fat Mach-O file.
    pub macho_info: Option<Vec<macho::MachoInfo>>,

    /// Portable Executable (PE) details, if a PE32 file (Windows, OS2)
    pub pe_info: Option<pe::PEInfo>,

    /// PE32: DotNet Assembly Information
    #[serde(default)]
    pub dot_net_assembly: Option<pe::dotnet::DotNetAssembly>,

    /// PE32: SHA-256 hash used my Microsoft's AppLocker to ensure the binary is unmodified
    #[serde(default)]
    pub authentihash: Option<String>,

    /// Executable and Linkable Format (ELF) details, if an ELF (Linux, *BSD, Haiku, Solaris, etc)
    #[serde(default)]
    pub elf_info: Option<elf::ElfInfo>,

    /// Executables: Signature information, varies by executable file type
    #[serde(default)]
    pub signature_info: HashMap<String, serde_json::Value>,

    /// Results from opening the file in various sandbox environments
    #[serde(default)]
    pub sandbox_verdicts: HashMap<String, SandboxVerdict>,

    /// Anything else not capture by this struct
    #[serde(flatten)]
    pub extra: HashMap<String, serde_json::Value>,
}

/// Community votes whether a file is benign or malicious
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Votes {
    /// Votes that the file is harmless
    pub harmless: u32,

    /// Votes that the file is malicious
    pub malicious: u32,
}

/// Popular threat classification contains threat information pulled from antivirus results
/// [https://virustotal.readme.io/reference/popular_threat_classification]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PopularThreatClassification {
    /// Popular threat category and name
    pub suggested_threat_label: String,

    /// Threat categories or types, if available; examples might be "ransomware" or "trojan"
    #[serde(default)]
    pub popular_threat_category: Vec<PopularThreatClassificationInner>,

    /// Threat name(s) from antivirus results, if available
    #[serde(default)]
    pub popular_threat_name: Vec<PopularThreatClassificationInner>,
}

/// Popular thread classification details
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PopularThreatClassificationInner {
    /// Votes for this threat type
    pub count: u32,

    /// Type of threat
    pub value: String,
}

/// Result per each anti-virus product
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnalysisResult {
    /// Type of file or threat
    pub category: String,

    /// Anti-virus engine
    pub engine_name: String,

    /// Version of the antivirus engine
    pub engine_version: Option<String>,

    /// Name of the malware identified
    pub result: Option<String>,

    /// Method for identifying the malware
    pub method: String,

    /// The date of the antivirus engine
    pub engine_update: Option<String>,
}

/// File type based on TrID
/// [https://virustotal.readme.io/reference/files-object-trid]
/// [https://mark0.net/soft-trid-e.html]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TrID {
    /// Detected file type
    pub file_type: String,

    /// Probability the file is of this type
    pub probability: f32,
}

/// Output from Detect It Easy [https://github.com/horsicq/Detect-It-Easy]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DetectItEasy {
    /// File type
    pub filetype: String,

    /// Details
    #[serde(default)]
    pub values: Vec<DetectItEasyValues>,
}

/// File type from Detect It Easy
/// [https://virustotal.readme.io/reference/detectiteasy]
/// [https://github.com/horsicq/Detect-It-Easy]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DetectItEasyValues {
    /// Artifacts detected in the file
    pub info: Option<String>,

    /// File type
    #[serde(rename = "type")]
    pub detection_type: String,

    /// Name of the file
    pub name: String,

    /// Version
    pub version: Option<String>,
}

/// Last Analysis Stats
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct LastAnalysisStats {
    /// Antivirus products which indicate this file is harmless
    pub harmless: u32,

    /// Antivirus products which don't support this file type
    #[serde(rename = "type-unsupported")]
    pub type_unsupported: u32,

    /// Antivirus products which indicate the file is suspicious
    pub suspicious: u32,

    /// Antivirus products which timed out trying to evaluate the file
    #[serde(rename = "confirmed-timeout")]
    pub confirmed_timeout: u32,

    /// Antivirus products which timed out trying to evaluate the file
    pub timeout: u32,

    /// Antivirus products which failed to analyze the file
    pub failure: u32,

    /// Antivirus products which indicate the file is malicious
    pub malicious: u32,

    /// Antivirus products which didn't detect a known malware type
    pub undetected: u32,
}

impl LastAnalysisStats {
    /// Return the number of antivirus products which could have evaluated this file,
    /// and exclude errors, including unsupported file type.
    pub fn av_count(&self) -> u32 {
        self.harmless + self.suspicious + self.malicious + self.undetected
    }

    /// Return the number of antivirus products which think the file is benign,
    /// which is harmless and undetected
    pub fn safe_count(&self) -> u32 {
        self.harmless + self.undetected
    }

    /// Return the number of antivirus products which had errors for this file
    pub fn error_count(&self) -> u32 {
        self.type_unsupported + self.confirmed_timeout + self.timeout + self.failure
    }

    /// In an effort to error on the side of caution, call a file benign is no antivirus products
    /// call it malicious or suspicious
    pub fn is_benign(&self) -> bool {
        self.malicious == 0 && self.suspicious == 0
    }
}

/// Sandbox verdicts, see [https://virustotal.readme.io/reference/sandbox_verdicts]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SandboxVerdict {
    /// Sandbox verdict
    pub category: SandboxVerdictCategory,

    /// Verdict confidence from 0 to 100.
    pub confidence: u8,

    /// Name of the sandbox environment
    pub sandbox_name: String,

    /// Raw sandbox verdicts
    #[serde(default)]
    pub malware_classification: Vec<String>,
}

/// Sandbox verdicts, see [https://virustotal.readme.io/reference/sandbox_verdicts]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum SandboxVerdictCategory {
    /// Sample was suspicious
    #[serde(alias = "suspicious", alias = "Suspicious")]
    Suspicious,

    /// Sample was malicious
    #[serde(alias = "malicious", alias = "Malicious")]
    Malicious,

    /// Sample was harmless
    #[serde(alias = "harmless", alias = "Harmless")]
    Harmless,

    /// Threat not detected
    #[serde(alias = "undetected", alias = "Undetected")]
    Undetected,
}

/// Sigma analysis stats
/// [https://virustotal.readme.io/reference/sigma_analysis_stats]
/// [https://virustotal.readme.io/docs/crowdsourced-sigma-rules]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SigmaAnalysisStats {
    /// Number of matched low severity rules.
    pub low: u64,

    /// Number of matched medium severity rules.
    pub medium: u64,

    /// Number of matched high severity rules
    pub high: u64,

    /// Number of matched critical severity rules.
    pub critical: u64,
}

/// Sigma analysis results
/// [https://virustotal.readme.io/reference/sigma_analysis_results]
/// [https://virustotal.readme.io/docs/crowdsourced-sigma-rules]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SigmaAnalysisResults {
    /// Sigma rule title
    pub rule_title: String,

    /// Sigma rule source description
    pub rule_source: String,

    /// The `HashMap` likely has one field: "values" which is another map of event data
    pub match_context: Vec<HashMap<String, serde_json::Value>>,
}

#[cfg(test)]
mod tests {
    use super::*;
    use rstest::rstest;

    #[rstest]
    #[case::rtf(include_str!("../../testdata/fff40032c3dc062147c530e3a0a5c7e6acda4d1f1369fbc994cddd3c19a2de88.json"), "Rich Text Format")]
    #[case::com(include_str!("../../testdata/0001a1252300b4732e4a010a5dd13a291dcb8b0ebee6febedb5152dfb0bcd488.json"), "DOS COM")]
    #[case::word(include_str!("../../testdata/001015aafcae8a6942366cbb0e7d39c0738752a7800c41ea1c655d47b0a4d04c.json"), "MS Word Document")]
    #[case::exedotnet(include_str!("../../testdata/417c06700c3e899f0554654102fa064385bf1d3ecec32471ac488096d81bf38c.json"), "Win32 EXE")] // .Net
    #[case::macho(include_str!("../../testdata/b8e7a581d85807ea6659ea2f681bd16d5baa7017ff144aa3030aefba9cbcdfd3.json"), "Mach-O")]
    #[case::exe(include_str!("../../testdata/ddecc35aa198f401948c73a0d53fd93c4ecb770198ad7db308de026745c56b71.json"), "Win32 EXE")]
    #[case::elf(include_str!("../../testdata/de10ba5e5402b46ea975b5cb8a45eb7df9e81dc81012fd4efd145ed2dce3a740.json"), "ELF")]
    fn deserialize_valid_report(#[case] report: &str, #[case] file_type: &str) {
        let report: FileReportRequestResponse =
            serde_json::from_str(report).expect("failed to deserialize VT report");

        if let FileReportRequestResponse::Data(data) = report {
            if file_type == "Mach-O" {
                assert!(data.attributes.macho_info.is_some());
            } else if file_type == "Win32 EXE" {
                assert!(data.attributes.pe_info.is_some());
            } else if file_type == "ELF" {
                assert!(data.attributes.elf_info.is_some());
            }
            println!("{data:?}");
            assert_eq!(data.attributes.type_description, file_type);
            assert_eq!(data.record_type, "file");
            for (key, value) in &data.attributes.extra {
                println!("KEY: {key}");
                println!("VALUE: {value}\n\n");
            }
            assert!(data.attributes.extra.is_empty());
        } else {
            panic!("File wasn't a report!");
        }
    }

    #[rstest]
    #[case(include_str!("../../testdata/not_found.json"))]
    #[case(include_str!("../../testdata/wrong_key.json"))]
    fn deserialize_errors(#[case] contents: &str) {
        let report: FileReportRequestResponse =
            serde_json::from_str(contents).expect("failed to deserialize VT error response");

        match report {
            FileReportRequestResponse::Data(_) => panic!("Should have been an error type!"),
            FileReportRequestResponse::Error(_) => {}
        }
    }
}