malwaredb-virustotal 0.0.8

Logic and datatypes for interacting with VirusTotal
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
pub mod elf;
pub mod macho;
pub mod pe;

use crate::VirusTotalError;

use chrono::serde::{ts_seconds, ts_seconds_option};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

#[allow(clippy::large_enum_variant)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum FileReportRequestResponse {
    #[serde(rename = "data")]
    Data(FileReportData),
    #[serde(rename = "error")]
    Error(VirusTotalError),
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FileReportData {
    pub attributes: ScanResultAttributes,

    #[serde(rename = "type")]
    pub record_type: String,
    pub id: String,
    pub links: HashMap<String, String>,
}

/// All scan results
/// https://virustotal.readme.io/reference/files
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ScanResultAttributes {
    /// When the file was created, often spoofed by malware
    #[serde(default, with = "ts_seconds_option")]
    pub creation_date: Option<DateTime<Utc>>,

    /// List of tags related to the file's capabilities
    /// Requires VirusTotal Premium
    pub capabilities_tags: Option<Vec<String>>,

    /// Extracted malware configuration
    /// Requires VirusTotal Premium
    pub malware_config: Option<HashMap<String, String>>,

    /// A description of the file type
    pub type_description: String,

    /// Trend Micro's Locality Sensitive Hash: https://tlsh.org/
    pub tlsh: Option<String>,

    /// VirusTotal's custom algorithm for clustering similar files
    pub vhash: Option<String>,

    /// Trend Micro's ELF hash
    pub telfhash: Option<String>,

    /// Tags which may show further details of the file type
    pub type_tags: Vec<String>,

    /// Additional attribute tags
    #[serde(default)]
    pub tags: Vec<String>,

    /// File names this sample has had when submitted to VirusTotal
    pub names: Vec<String>,

    /// When the file was last modified
    #[serde(with = "ts_seconds")]
    pub last_modification_date: DateTime<Utc>,

    /// Another first seen field
    #[serde(default, with = "ts_seconds_option")]
    pub first_seen_itw_date: Option<DateTime<Utc>>,

    /// Type tags which can be used with VirusTotal Intelligence
    pub type_tag: String,

    /// The number of times the file has been submitted to VirusTotal
    pub times_submitted: u32,

    /// Votes from the VirusTotal user community whether the file is dangerous
    pub total_votes: Votes,

    /// Size of the file, in bytes
    pub size: u64,

    /// Community votes as to the nature of the thread of this file
    pub popular_threat_classification: Option<PopularThreatClassification>,

    /// When the file was last submitted to VirusTotal
    #[serde(with = "ts_seconds")]
    pub last_submission_date: DateTime<Utc>,

    /// Antivirus results, where the key is the name of the antivirus software product
    /// More info: https://docs.virustotal.com/reference/analyses-object
    pub last_analysis_results: HashMap<String, AnalysisResult>,

    /// Results from TrID, an attempt to identify the file type
    /// See https://mark0.net/soft-trid-e.html
    pub trid: Option<Vec<TrID>>,

    /// Another file type detection program
    pub detectiteasy: Option<DetectItEasy>,

    /// SHA-256 hash of the file
    pub sha256: String,

    /// File extension for this file type
    pub type_extension: Option<String>,

    /// When the file was last analyzed by VirusTotal
    #[serde(with = "ts_seconds")]
    pub last_analysis_date: DateTime<Utc>,

    /// The number of unique sources which have submitted this file
    pub unique_sources: u32,

    /// When the file was first submitted to VirusTotal
    #[serde(with = "ts_seconds")]
    pub first_submission_date: DateTime<Utc>,

    /// MD-5 hash of the file
    pub md5: String,

    /// SSDeep fuzzy hash of the file
    /// See https://ssdeep-project.github.io/ssdeep/index.html
    pub ssdeep: String,

    /// SHA-1 of the file
    pub sha1: String,

    /// The output from libmagic, the `file` command for this file
    pub magic: String,

    /// Antivirus results summary
    pub last_analysis_stats: LastAnalysisStats,

    /// Dictionary containing the number of matched Sigma rules group by its severity
    #[serde(default)]
    pub sigma_analysis_summary: HashMap<String, serde_json::Value>,

    #[serde(default)]
    pub sigma_analysis_stats: Option<SigmaAnalysisStats>,

    /// Results from VT's Sigma rules
    /// See https://github.com/SigmaHQ/sigma/wiki/Rule-Creation-Guide
    #[serde(default)]
    pub sigma_analysis_results: Vec<SigmaAnalysisResults>,

    /// Executables: Information on packers, if available
    #[serde(default)]
    pub packers: HashMap<String, String>,

    /// The most interesting name of all the file names used with this file
    pub meaningful_name: String,

    /// The file's reputation from all votes,
    /// see https://support.virustotal.com/hc/en-us/articles/115002146769-Vote-comment
    pub reputation: u32,

    /// Mach-O details, if a Mach-O file (macOS, iOS, etc)
    /// This is a vector since there is a separate `macho::MachInfo` struct per
    /// each architecture if this is a Fat Mach-O file.
    pub macho_info: Option<Vec<macho::MachoInfo>>,

    /// Portable Executable (PE) details, if a PE32 file (Windows, OS2)
    pub pe_info: Option<pe::PEInfo>,

    /// PE32: DotNet Assembly Information
    #[serde(default)]
    pub dot_net_assembly: Option<pe::dotnet::DotNetAssembly>,

    /// PE32: SHA-256 hash used my Microsoft's AppLocker to ensure the binary is unmodified
    #[serde(default)]
    pub authentihash: Option<String>,

    /// Executable and Linkable Format (ELF) details, if an ELF (Linux, *BSD, Haiku, Solaris, etc)
    #[serde(default)]
    pub elf_info: Option<elf::ElfInfo>,

    /// Executables: Signature information, varies by executable file type
    #[serde(default)]
    pub signature_info: HashMap<String, serde_json::Value>,

    /// Results from opening the file in various sandbox environments
    #[serde(default)]
    pub sandbox_verdicts: HashMap<String, SandboxVerdict>,

    /// Anything else not capture by this struct
    #[serde(flatten)]
    pub extra: HashMap<String, serde_json::Value>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Votes {
    /// Votes that the file is harmless
    pub harmless: u32,

    /// Votes that the file is malicious
    pub malicious: u32,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PopularThreatClassification {
    pub suggested_threat_label: String,

    #[serde(default)]
    pub popular_threat_category: Vec<PopularThreatClassificationInner>,

    #[serde(default)]
    pub popular_threat_name: Vec<PopularThreatClassificationInner>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PopularThreatClassificationInner {
    /// Votes for this threat type
    pub count: u32,

    /// Type of threat
    pub value: String,
}

/// Result per each anti-virus product
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AnalysisResult {
    /// Type of file or threat
    pub category: String,

    /// Anti-virus engine
    pub engine_name: String,

    /// Version of the antivirus engine
    pub engine_version: Option<String>,

    /// Name of the malware identified
    pub result: Option<String>,

    /// Method for identifying the malware
    pub method: String,

    /// The date of the antivirus engine
    pub engine_update: Option<String>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct TrID {
    /// Detected file type
    pub file_type: String,

    /// Probability the file is of this type
    pub probability: f32,
}

/// Output from Detect It Easy https://github.com/horsicq/Detect-It-Easy
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DetectItEasy {
    pub filetype: String,
    #[serde(default)]
    pub values: Vec<DetectItEasyValues>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DetectItEasyValues {
    pub info: Option<String>,
    #[serde(rename = "type")]
    pub detection_type: String,
    pub name: String,
    pub version: Option<String>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct LastAnalysisStats {
    /// Antivirus products which indicate this file is harmless
    pub harmless: u32,

    /// Antivirus products which don't support this file type
    #[serde(rename = "type-unsupported")]
    pub type_unsupported: u32,

    /// Antivirus products which indicate the file is suspicious
    pub suspicious: u32,

    /// Antivirus products which timed out trying to evaluate the file
    #[serde(rename = "confirmed-timeout")]
    pub confirmed_timeout: u32,

    /// Antivirus products which timed out trying to evaluate the file
    pub timeout: u32,

    /// Antivirus products which failed to analyze the file
    pub failure: u32,

    /// Antivirus products which indicate the file is malicious
    pub malicious: u32,

    /// Antivirus products which didn't detect a known malware type
    pub undetected: u32,
}

impl LastAnalysisStats {
    /// Return the number of antivirus products which could have evaluated this file,
    /// and exclude errors, including unsupported file type.
    pub fn av_count(&self) -> u32 {
        self.harmless + self.suspicious + self.malicious + self.undetected
    }

    /// Return the number of antivirus products which think the file is benign,
    /// which is harmless and undetected
    pub fn safe_count(&self) -> u32 {
        self.harmless + self.undetected
    }

    /// Return the number of antivirus products which had errors for this file
    pub fn error_count(&self) -> u32 {
        self.type_unsupported + self.confirmed_timeout + self.timeout + self.failure
    }

    /// In an effort to error on the side of caution, call a file benign is no antivirus products
    /// call it malicious or suspicious
    pub fn is_benign(&self) -> bool {
        self.malicious == 0 && self.suspicious == 0
    }
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SandboxVerdict {
    pub category: String,

    /// Verdict confidence from 0 to 100.
    pub confidence: u8,
    pub sandbox_name: String,

    #[serde(default)]
    pub malware_classification: Vec<String>,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SigmaAnalysisStats {
    pub low: u64,
    pub medium: u64,
    pub high: u64,
    pub critical: u64,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct SigmaAnalysisResults {
    /// Sigma rule title
    pub rule_title: String,

    /// Sigma rule source description
    pub rule_source: String,

    /// The `HashMap` likely has one field: "values" which is another map of event data
    pub match_context: Vec<HashMap<String, serde_json::Value>>,
}

#[cfg(test)]
mod tests {
    use super::*;
    use rstest::rstest;

    #[rstest]
    #[case(include_str!("../../testdata/fff40032c3dc062147c530e3a0a5c7e6acda4d1f1369fbc994cddd3c19a2de88.json"), "Rich Text Format")]
    #[case(include_str!("../../testdata/0001a1252300b4732e4a010a5dd13a291dcb8b0ebee6febedb5152dfb0bcd488.json"), "DOS COM")]
    #[case(include_str!("../../testdata/001015aafcae8a6942366cbb0e7d39c0738752a7800c41ea1c655d47b0a4d04c.json"), "MS Word Document")]
    #[case(include_str!("../../testdata/417c06700c3e899f0554654102fa064385bf1d3ecec32471ac488096d81bf38c.json"), "Win32 EXE")] // .Net
    #[case(include_str!("../../testdata/b8e7a581d85807ea6659ea2f681bd16d5baa7017ff144aa3030aefba9cbcdfd3.json"), "Mach-O")]
    #[case(include_str!("../../testdata/ddecc35aa198f401948c73a0d53fd93c4ecb770198ad7db308de026745c56b71.json"), "Win32 EXE")]
    #[case(include_str!("../../testdata/de10ba5e5402b46ea975b5cb8a45eb7df9e81dc81012fd4efd145ed2dce3a740.json"), "ELF")]
    fn deserialize_valid_report(#[case] report: &str, #[case] file_type: &str) {
        let report: FileReportRequestResponse =
            serde_json::from_str(report).expect("failed to deserialize VT report");

        if let FileReportRequestResponse::Data(data) = report {
            if file_type == "Mach-O" {
                assert!(data.attributes.macho_info.is_some());
            } else if file_type == "Win32 EXE" {
                assert!(data.attributes.pe_info.is_some());
            } else if file_type == "ELF" {
                assert!(data.attributes.elf_info.is_some());
            }
            println!("{data:?}");
            assert_eq!(data.attributes.type_description, file_type);
            assert_eq!(data.record_type, "file");
            for (key, value) in &data.attributes.extra {
                println!("KEY: {key}");
                println!("VALUE: {value}\n\n");
            }
            assert!(data.attributes.extra.is_empty());
        } else {
            panic!("File wasn't a report!");
        }
    }

    #[rstest]
    #[case(include_str!("../../testdata/not_found.json"))]
    #[case(include_str!("../../testdata/wrong_key.json"))]
    fn deserialize_errors(#[case] contents: &str) {
        let report: FileReportRequestResponse =
            serde_json::from_str(contents).expect("failed to deserialize VT error response");

        match report {
            FileReportRequestResponse::Data(_) => panic!("Should have been an error type!"),
            FileReportRequestResponse::Error(_) => {}
        }
    }
}