assemblyline_models/datastore/
file.rs

1
2use chrono::{DateTime, Utc};
3use md5::Digest;
4use serde::{Serialize, Deserialize};
5use serde_with::{DeserializeFromStr, SerializeDisplay};
6use struct_metadata::Described;
7
8use crate::{ElasticMeta, Readable};
9use crate::types::{ExpandingClassification, SSDeepHash, Sha1, Sha256, Text, MD5};
10
11/// Model of File
12#[derive(Debug, Serialize, Deserialize, Described, Clone)]
13#[metadata_type(ElasticMeta)]
14#[metadata(index=true, store=true)]
15pub struct File {
16    /// Timestamp indicating when the file was archived.
17    pub archive_ts: Option<DateTime<Utc>>,
18    /// Dotted ASCII representation of the first 64 bytes of the file
19    #[metadata(index=false, store=false)]
20    pub ascii: String,
21    /// Classification of the file
22    #[serde(flatten)]
23    pub classification: ExpandingClassification,
24    /// Entropy of the file
25    pub entropy: f32,
26    /// Expiry timestamp
27    #[metadata(store=false)]
28    pub expiry_ts: Option<DateTime<Utc>>,
29    /// Is this an image from an Image Result Section?
30    #[serde(default)]
31    pub is_section_image: bool,
32    /// Is this a file generated by a service?
33    #[serde(default)]
34    pub is_supplementary: bool,
35    /// Hex dump of the first 64 bytes of the file
36    #[metadata(index=false, store=false)]
37    pub hex: String,
38    /// List of labels of the file
39    #[serde(default)]
40    #[metadata(copyto="__text__")]
41    pub labels: Vec<String>,
42    /// Categories of label
43    #[serde(default)]
44    pub label_categories: LabelCategories,
45    /// MD5 of the file
46    #[metadata(copyto="__text__")]
47    pub md5: MD5,
48    /// Output from libmagic related to the file
49    #[metadata(store=false)]
50    pub magic: String,
51    /// MIME type of the file as identified by libmagic
52    #[metadata(store=false)]
53    pub mime: Option<String>,
54    /// Details about when the file was seen
55    #[serde(default)]
56    pub seen: Seen,
57    /// SHA1 hash of the file
58    #[metadata(copyto="__text__")]
59    pub sha1: Sha1,
60    /// SHA256 hash of the file
61    #[metadata(copyto="__text__")]
62    pub sha256: Sha256,
63    /// Size of the file in bytes
64    #[metadata(mapping="long")]
65    pub size: u64,
66    /// SSDEEP hash of the file
67    #[metadata(store=false)]
68    pub ssdeep: SSDeepHash,
69    /// Type of file as identified by Assemblyline
70    #[serde(rename = "type")]
71    #[metadata(copyto="__text__")]
72    pub file_type: String,
73    /// TLSH hash of the file"
74    #[metadata(copyto="__text__")]
75    pub tlsh: Option<String>,
76    /// Was loaded from the archive
77    #[serde(default)]
78    #[metadata(index=false, store=false)]
79    pub from_archive: bool,
80    /// URI structure to speed up specialty file searching
81    #[serde(default)]
82    pub uri_info: Option<URIInfo>,
83    /// List of comments made on a file
84    #[serde(default)]
85    pub comments: Vec<Comment>,
86}
87
88#[cfg(feature = "rand")]
89impl rand::distr::Distribution<File> for rand::distr::StandardUniform {
90    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> File {
91        let mut data = vec![];
92        for _ in 0..1000 {
93            data.push(rng.random());
94        }
95        File::gen_for_sample(&data, rng)
96    }
97}
98
99impl File {
100    pub fn gen_for_sample<R: rand::Rng + ?Sized>(data: &[u8], rng: &mut R) -> File {
101        let sha256 = hex::encode(sha2::Sha256::new().chain_update(data).finalize());
102        let sha1 = hex::encode(sha1::Sha1::new().chain_update(data).finalize());
103        let md5 = hex::encode(md5::Md5::new().chain_update(data).finalize());
104
105        File {
106            archive_ts: None,
107            ascii: String::from_iter(data.iter().take(64).map(|byte| if byte.is_ascii() { *byte as char } else { '.' })),
108            classification: ExpandingClassification::try_unrestricted().unwrap(),
109            entropy: rng.random_range(0.0..1.0),
110            expiry_ts: None,
111            is_section_image: rng.random(),
112            is_supplementary: rng.random(),
113            hex: String::from_iter(data.iter().take(64).map(|byte| if byte.is_ascii() { *byte as char } else { '.' })),
114            labels: vec![],
115            label_categories: Default::default(),
116            md5: md5.parse().unwrap(),
117            magic: "Binary data".to_string(),
118            mime: Some("application/octet-stream".to_string()),
119            seen: Seen { count: 1, first: chrono::Utc::now(), last: chrono::Utc::now() },
120            sha1: sha1.parse().unwrap(),
121            sha256: sha256.parse().unwrap(),
122            size: data.len() as u64,
123            ssdeep: rng.random(),
124            file_type: "unknown".to_string(),
125            tlsh: None,
126            from_archive: false,
127            uri_info: None,
128            comments: vec![],
129        }
130    }
131}
132
133impl Readable for File {
134    fn set_from_archive(&mut self, from_archive: bool) {
135        self.from_archive = from_archive;
136    }
137}
138
139/// URI Information Model
140#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq)]
141#[metadata_type(ElasticMeta)]
142#[metadata(index=true, store=true)]
143pub struct URIInfo {
144    /// full URI
145    pub uri: String,
146
147    // https://www.rfc-editor.org/rfc/rfc1808.html#section-2.1
148    pub scheme: String,
149    pub netloc: String,
150    pub path: Option<String>,
151    pub params: Option<String>,
152    pub query: Option<String>,
153    pub fragment: Option<String>,
154
155    // Ease-of-use elements
156    pub username: Option<String>,
157    pub password: Option<String>,
158    pub hostname: String,
159    pub port: Option<u16>,
160}
161
162/// File Seen Model
163#[derive(Debug, Serialize, Deserialize, Described, Clone)]
164#[metadata_type(ElasticMeta)]
165#[metadata(index=true, store=true)]
166pub struct Seen {
167    /// How many times have we seen this file?
168    #[serde(default = "default_seen_count")]
169    #[metadata(mapping="integer")]
170    pub count: u64,
171    /// First seen timestamp
172    #[serde(default = "default_now")]
173    pub first: DateTime<Utc>,
174    /// Last seen timestamp
175    #[serde(default = "default_now")]
176    pub last: DateTime<Utc>,
177}
178
179fn default_seen_count() -> u64 { 1 }
180fn default_now() -> DateTime<Utc> { Utc::now() }
181
182impl Default for Seen {
183    fn default() -> Self {
184        Self {
185            count: default_seen_count(),
186            first: default_now(),
187            last: default_now()
188        }
189    }
190}
191
192
193/// Label Categories Model
194#[derive(Debug, Serialize, Deserialize, Described, Clone, Default)]
195#[serde(default)]
196#[metadata_type(ElasticMeta)]
197#[metadata(index=true, store=true)]
198pub struct LabelCategories {
199    /// List of extra informational labels about the file
200    pub info: Vec<String>,
201    /// List of labels related to the technique used by the file and the signatures that hits on it.
202    pub technique: Vec<String>,
203    /// List of labels related to attribution of this file (implant name, actor, campain...)
204    pub attribution: Vec<String>,
205}
206
207/// Comment Model
208#[derive(Debug, Serialize, Deserialize, Described, Clone)]
209#[metadata_type(ElasticMeta)]
210#[metadata(index=true, store=false)]
211pub struct Comment {
212    /// Comment ID
213    pub cid: String,
214    /// Username of the user who made the comment
215    pub uname: String,
216    /// Datetime the comment was made on
217    #[serde(default="Utc::now")]
218    #[metadata(store=true)]
219    pub date: DateTime<Utc>,
220    /// Text of the comment written by the author
221    pub text: Text,
222    /// List of reactions made on a comment
223    #[serde(default)]
224    pub reactions: Vec<Reaction>,
225}
226
227/// Reaction Model
228#[derive(Debug, Serialize, Deserialize, Described, Clone)]
229#[metadata_type(ElasticMeta)]
230#[metadata(index=true, store=false)]
231pub struct Reaction {
232    /// Icon of the user who made the reaction
233    pub icon: ReactionsTypes,
234    /// Username of the user who made the reaction
235    pub uname: String,
236}
237
238#[derive(SerializeDisplay, DeserializeFromStr, strum::Display, strum::EnumString, Described, PartialEq, Eq, Debug, Clone, Copy)]
239#[metadata_type(ElasticMeta)]
240#[metadata(mapping="keyword")]
241#[strum(serialize_all = "snake_case")]
242pub enum ReactionsTypes {
243    ThumbsUp, 
244    ThumbsDown, 
245    Love, 
246    Smile, 
247    Surprised, 
248    Party
249}