use malwaredb_api::{SupportedFileType, SupportedFileTypes};
use malwaredb_types::utils::EntropyCalc;
use fuzzyhash::FuzzyHash;
use human_hash::humanize;
use magic::cookie::DatabasePaths;
use malwaredb_lzjd::{LZDict, Murmur3HashState};
use md5::Md5;
use sha1::Sha1;
use sha2::{Digest, Sha256, Sha384, Sha512};
use tlsh_fixed::TlshBuilder;
use tracing::error;
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct FileMetadata {
pub name: Option<String>,
pub size: u32,
pub entropy: f32,
pub sha1: String,
pub sha256: String,
pub sha384: String,
pub sha512: String,
pub md5: String,
pub lzjd: Option<String>,
pub ssdeep: Option<String>,
pub tlsh: Option<String>,
pub humanhash: String,
pub file_command: String,
}
impl FileMetadata {
pub fn new(contents: &[u8], name: Option<&str>) -> Self {
let mut sha1 = Sha1::new();
sha1.update(contents);
let sha1 = sha1.finalize();
let mut sha256 = Sha256::new();
sha256.update(contents);
let sha256 = sha256.finalize();
let mut sha384 = Sha384::new();
sha384.update(contents);
let sha384 = sha384.finalize();
let mut sha512 = Sha512::new();
sha512.update(contents);
let sha512 = sha512.finalize();
let mut md5 = Md5::new();
md5.update(contents);
let md5 = md5.finalize();
let build_hasher = Murmur3HashState::default();
let lzjd_str =
LZDict::from_bytes_stream(contents.iter().copied(), &build_hasher).to_string();
let mut builder = TlshBuilder::new(
tlsh_fixed::BucketKind::Bucket256,
tlsh_fixed::ChecksumKind::ThreeByte,
tlsh_fixed::Version::Version4,
);
builder.update(contents);
let tlsh = if let Ok(hasher) = builder.build() {
Some(hasher.hash())
} else {
None
};
let md5 = hex::encode(md5);
let uuid = Uuid::parse_str(&md5).unwrap();
let file_command = {
if let Ok(cookie) = magic::Cookie::open(magic::cookie::Flags::ERROR) {
let db_paths = DatabasePaths::default();
if let Ok(cookie) = cookie.load(&db_paths) {
if let Ok(output) = cookie.buffer(contents) {
output
} else {
error!("LibMagic: failed to get output for buffer");
"".into()
}
} else {
error!("LibMagic: failed to load signature database");
"".into()
}
} else {
error!("LibMagic: failed to get handle");
"".into()
}
};
Self {
name: name.map(|n| n.to_ascii_lowercase()),
size: contents.len() as u32,
entropy: contents.entropy(),
sha1: hex::encode(sha1),
sha256: hex::encode(sha256),
sha384: hex::encode(sha384),
sha512: hex::encode(sha512),
md5,
lzjd: Some(lzjd_str),
ssdeep: Some(FuzzyHash::new(contents).to_string()),
tlsh,
humanhash: humanize(&uuid, 4),
file_command,
}
}
}
#[derive(Debug, Clone)]
pub struct FileType {
pub id: i32,
pub name: String,
pub description: Option<String>,
pub magic: Vec<Vec<u8>>,
pub executable: bool,
}
pub struct FileTypes(pub Vec<FileType>);
impl From<FileType> for SupportedFileType {
fn from(value: FileType) -> Self {
Self {
name: value.name,
magic: value.magic.iter().map(hex::encode).collect(),
is_executable: value.executable,
description: value.description,
}
}
}
impl From<FileTypes> for SupportedFileTypes {
fn from(value: FileTypes) -> Self {
Self {
types: value.0.into_iter().map(|t| t.into()).collect(),
message: None,
}
}
}
#[cfg(test)]
mod test {
use super::*;
use std::str::FromStr;
#[test]
fn meta_and_sim_hashes() {
let contents = include_bytes!("../../../types/testdata/elf/elf_haiku_x86").to_vec();
let meta = FileMetadata::new(&contents, Some("elf_haiku_x86"));
assert!(meta.lzjd.is_some());
assert!(meta.tlsh.is_some());
assert!(meta.ssdeep.is_some());
let ssdeep = meta.ssdeep.unwrap();
let tlsh = meta.tlsh.unwrap();
let lzjd = meta.lzjd.unwrap();
println!("LZJD: {lzjd}");
println!("Tlsh: {tlsh}");
println!("SSDeep: {ssdeep}");
println!("Human hash: {}", meta.humanhash);
println!("File command: {}", meta.file_command);
assert_eq!(FuzzyHash::compare(ssdeep.clone(), ssdeep).unwrap(), 100);
let tlsh =
tlsh_fixed::Tlsh::from_str(&tlsh).expect("failed to convert tlsh string to object");
assert_eq!(tlsh.diff(&tlsh, true), 0);
let lzjd =
LZDict::from_base64_string(&lzjd).expect("failed to convert lzjd string to object");
assert_eq!(lzjd.jaccard_similarity(&lzjd), 1.0);
}
}