1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
use malwaredb_api::{SupportedFileType, SupportedFileTypes};
use malwaredb_types::utils::EntropyCalc;

use fuzzyhash::FuzzyHash;
use human_hash::humanize;
use magic::cookie::DatabasePaths;
use malwaredb_lzjd::{LZDict, Murmur3HashState};
use md5::Md5;
use sha1::Sha1;
use sha2::{Digest, Sha256, Sha384, Sha512};
use tlsh_fixed::TlshBuilder;
use tracing::error;
use uuid::Uuid;

#[derive(Debug)]
pub struct FileMetadata {
    pub name: Option<String>,
    pub size: u32,
    pub entropy: f32,
    pub sha1: String,
    pub sha256: String,
    pub sha384: String,
    pub sha512: String,
    pub md5: String,
    pub lzjd: Option<String>,
    pub ssdeep: Option<String>,
    pub sdhash: Option<String>,
    pub tlsh: Option<String>,
    pub humanhash: String,
    pub file_command: String,
}

impl FileMetadata {
    pub fn new(contents: &[u8], name: Option<&str>) -> Self {
        let mut sha1 = Sha1::new();
        sha1.update(contents);
        let sha1 = sha1.finalize();

        let mut sha256 = Sha256::new();
        sha256.update(contents);
        let sha256 = sha256.finalize();

        let mut sha384 = Sha384::new();
        sha384.update(contents);
        let sha384 = sha384.finalize();

        let mut sha512 = Sha512::new();
        sha512.update(contents);
        let sha512 = sha512.finalize();

        let mut md5 = Md5::new();
        md5.update(contents);
        let md5 = md5.finalize();

        let build_hasher = Murmur3HashState::default();
        let lzjd_str =
            LZDict::from_bytes_stream(contents.iter().copied(), &build_hasher).to_string();

        let mut builder = TlshBuilder::new(
            tlsh_fixed::BucketKind::Bucket256,
            tlsh_fixed::ChecksumKind::ThreeByte,
            tlsh_fixed::Version::Version4,
        );

        builder.update(contents);

        let tlsh = if let Ok(hasher) = builder.build() {
            Some(hasher.hash())
        } else {
            None
        };

        let md5 = hex::encode(md5);
        let uuid = Uuid::parse_str(&md5).unwrap();

        let file_command = {
            if let Ok(cookie) = magic::Cookie::open(magic::cookie::Flags::ERROR) {
                let db_paths = DatabasePaths::default();
                if let Ok(cookie) = cookie.load(&db_paths) {
                    if let Ok(output) = cookie.buffer(contents) {
                        output
                    } else {
                        error!("LibMagic: failed to get output for buffer");
                        "".into()
                    }
                } else {
                    error!("LibMagic: failed to load signature database");
                    "".into()
                }
            } else {
                error!("LibMagic: failed to get handle");
                "".into()
            }
        };

        Self {
            name: name.map(|n| n.to_ascii_lowercase()),
            size: contents.len() as u32,
            entropy: contents.entropy(),
            sha1: hex::encode(sha1),
            sha256: hex::encode(sha256),
            sha384: hex::encode(sha384),
            sha512: hex::encode(sha512),
            md5,
            lzjd: Some(lzjd_str),
            ssdeep: Some(FuzzyHash::new(contents).to_string()),
            sdhash: None,
            tlsh,
            humanhash: humanize(&uuid, 4),
            file_command,
        }
    }
}

#[derive(Debug, Clone)]
pub struct FileType {
    /// Database ID number
    pub id: i32,

    /// Friendly name
    pub name: String,

    /// Description of the type
    pub description: Option<String>,

    /// Magic numbers as bytes
    /// These are the first few bytes of the file which identify it's type
    /// Some types have more than one possible magic number, though it's rare
    pub magic: Vec<Vec<u8>>,

    /// Whether or not this file is executable on some system
    /// Assumption: if not executable, it's a document
    pub executable: bool,
}

pub struct FileTypes(pub Vec<FileType>);

impl From<FileType> for SupportedFileType {
    fn from(value: FileType) -> Self {
        Self {
            name: value.name,
            magic: value.magic.iter().map(hex::encode).collect(),
            is_executable: value.executable,
            description: value.description,
        }
    }
}

impl From<FileTypes> for SupportedFileTypes {
    fn from(value: FileTypes) -> Self {
        Self {
            types: value.0.into_iter().map(|t| t.into()).collect(),
            message: None,
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use std::str::FromStr;

    #[test]
    fn meta_and_sim_hashes() {
        let contents = include_bytes!("../../../types/testdata/elf/elf_haiku_x86").to_vec();
        let meta = FileMetadata::new(&contents, Some("elf_haiku_x86"));
        assert!(meta.lzjd.is_some());
        assert!(meta.tlsh.is_some());
        assert!(meta.ssdeep.is_some());

        let ssdeep = meta.ssdeep.unwrap();
        let tlsh = meta.tlsh.unwrap();
        let lzjd = meta.lzjd.unwrap();

        println!("LZJD: {lzjd}");
        println!("Tlsh: {tlsh}");
        println!("SSDeep: {ssdeep}");
        println!("Human hash: {}", meta.humanhash);
        println!("File command: {}", meta.file_command);

        assert_eq!(FuzzyHash::compare(ssdeep.clone(), ssdeep).unwrap(), 100);

        let tlsh =
            tlsh_fixed::Tlsh::from_str(&tlsh).expect("failed to convert tlsh string to object");
        assert_eq!(tlsh.diff(&tlsh, true), 0);

        let lzjd =
            LZDict::from_base64_string(&lzjd).expect("failed to convert lzjd string to object");
        assert_eq!(lzjd.jaccard_similarity(&lzjd), 1.0);
    }
}