Skip to main content

assemblyline_models/datastore/
file.rs

1
2use chrono::{DateTime, Utc};
3use md5::Digest;
4use serde::{Serialize, Deserialize};
5use serde_with::{DeserializeFromStr, SerializeDisplay};
6use struct_metadata::Described;
7
8#[cfg(feature = "rand")]
9use rand::RngExt;
10
11use crate::{ElasticMeta, Readable};
12use crate::types::{ExpandingClassification, SSDeepHash, Sha1, Sha256, Text, MD5};
13
14/// Model of File
15#[derive(Debug, Serialize, Deserialize, Described, Clone)]
16#[metadata_type(ElasticMeta)]
17#[metadata(index=true, store=true)]
18pub struct File {
19    /// Timestamp indicating when the file was archived.
20    pub archive_ts: Option<DateTime<Utc>>,
21    /// Dotted ASCII representation of the first 64 bytes of the file
22    #[metadata(index=false, store=false)]
23    pub ascii: String,
24    /// Classification of the file
25    #[serde(flatten)]
26    pub classification: ExpandingClassification,
27    /// Entropy of the file
28    pub entropy: f32,
29    /// Expiry timestamp
30    #[metadata(store=false)]
31    pub expiry_ts: Option<DateTime<Utc>>,
32    /// Is this an image from an Image Result Section?
33    #[serde(default)]
34    pub is_section_image: bool,
35    /// Is this a file generated by a service?
36    #[serde(default)]
37    pub is_supplementary: bool,
38    /// Hex dump of the first 64 bytes of the file
39    #[metadata(index=false, store=false)]
40    pub hex: String,
41    /// List of labels of the file
42    #[serde(default)]
43    #[metadata(copyto="__text__")]
44    pub labels: Vec<String>,
45    /// Categories of label
46    #[serde(default)]
47    pub label_categories: LabelCategories,
48    /// MD5 of the file
49    #[metadata(copyto="__text__")]
50    pub md5: MD5,
51    /// Output from libmagic related to the file
52    #[metadata(store=false)]
53    pub magic: String,
54    /// MIME type of the file as identified by libmagic
55    #[metadata(store=false)]
56    pub mime: Option<String>,
57    /// Details about when the file was seen
58    #[serde(default)]
59    pub seen: Seen,
60    /// SHA1 hash of the file
61    #[metadata(copyto="__text__")]
62    pub sha1: Sha1,
63    /// SHA256 hash of the file
64    #[metadata(copyto="__text__")]
65    pub sha256: Sha256,
66    /// Size of the file in bytes
67    #[metadata(mapping="long")]
68    pub size: u64,
69    /// SSDEEP hash of the file
70    #[metadata(store=false)]
71    pub ssdeep: SSDeepHash,
72    /// Type of file as identified by Assemblyline
73    #[serde(rename = "type")]
74    #[metadata(copyto="__text__")]
75    pub file_type: String,
76    /// TLSH hash of the file"
77    #[metadata(copyto="__text__")]
78    pub tlsh: Option<String>,
79    /// Was loaded from the archive
80    #[serde(default)]
81    #[metadata(index=false, store=false)]
82    pub from_archive: bool,
83    /// URI structure to speed up specialty file searching
84    #[serde(default)]
85    pub uri_info: Option<URIInfo>,
86    /// List of comments made on a file
87    #[serde(default)]
88    pub comments: Vec<Comment>,
89}
90
91#[cfg(feature = "rand")]
92impl rand::distr::Distribution<File> for rand::distr::StandardUniform {
93    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> File {
94        let mut data = vec![];
95        for _ in 0..1000 {
96            data.push(rng.random());
97        }
98        File::gen_for_sample(&data, rng)
99    }
100}
101
102impl File {
103    pub fn gen_for_sample<R: rand::Rng + ?Sized>(data: &[u8], rng: &mut R) -> File {
104        let sha256 = hex::encode(sha2::Sha256::new().chain_update(data).finalize());
105        let sha1 = hex::encode(sha1::Sha1::new().chain_update(data).finalize());
106        let md5 = hex::encode(md5::Md5::new().chain_update(data).finalize());
107
108        File {
109            archive_ts: None,
110            ascii: String::from_iter(data.iter().take(64).map(|byte| if byte.is_ascii() { *byte as char } else { '.' })),
111            classification: ExpandingClassification::try_unrestricted().unwrap(),
112            entropy: rng.random_range(0.0..1.0),
113            expiry_ts: None,
114            is_section_image: rng.random(),
115            is_supplementary: rng.random(),
116            hex: String::from_iter(data.iter().take(64).map(|byte| if byte.is_ascii() { *byte as char } else { '.' })),
117            labels: vec![],
118            label_categories: Default::default(),
119            md5: md5.parse().unwrap(),
120            magic: "Binary data".to_string(),
121            mime: Some("application/octet-stream".to_string()),
122            seen: Seen { count: 1, first: chrono::Utc::now(), last: chrono::Utc::now() },
123            sha1: sha1.parse().unwrap(),
124            sha256: sha256.parse().unwrap(),
125            size: data.len() as u64,
126            ssdeep: rng.random(),
127            file_type: "unknown".to_string(),
128            tlsh: None,
129            from_archive: false,
130            uri_info: None,
131            comments: vec![],
132        }
133    }
134}
135
136impl Readable for File {
137    fn set_from_archive(&mut self, from_archive: bool) {
138        self.from_archive = from_archive;
139    }
140}
141
142/// URI Information Model
143#[derive(Debug, Serialize, Deserialize, Described, Clone, PartialEq, Eq)]
144#[metadata_type(ElasticMeta)]
145#[metadata(index=true, store=true)]
146pub struct URIInfo {
147    /// full URI
148    pub uri: String,
149
150    // https://www.rfc-editor.org/rfc/rfc1808.html#section-2.1
151    pub scheme: String,
152    pub netloc: String,
153    pub path: Option<String>,
154    pub params: Option<String>,
155    pub query: Option<String>,
156    pub fragment: Option<String>,
157
158    // Ease-of-use elements
159    pub username: Option<String>,
160    pub password: Option<String>,
161    pub hostname: String,
162    pub port: Option<u16>,
163}
164
165/// File Seen Model
166#[derive(Debug, Serialize, Deserialize, Described, Clone)]
167#[metadata_type(ElasticMeta)]
168#[metadata(index=true, store=true)]
169pub struct Seen {
170    /// How many times have we seen this file?
171    #[serde(default = "default_seen_count")]
172    #[metadata(mapping="integer")]
173    pub count: u64,
174    /// First seen timestamp
175    #[serde(default = "default_now")]
176    pub first: DateTime<Utc>,
177    /// Last seen timestamp
178    #[serde(default = "default_now")]
179    pub last: DateTime<Utc>,
180}
181
182fn default_seen_count() -> u64 { 1 }
183fn default_now() -> DateTime<Utc> { Utc::now() }
184
185impl Default for Seen {
186    fn default() -> Self {
187        Self {
188            count: default_seen_count(),
189            first: default_now(),
190            last: default_now()
191        }
192    }
193}
194
195
196/// Label Categories Model
197#[derive(Debug, Serialize, Deserialize, Described, Clone, Default)]
198#[serde(default)]
199#[metadata_type(ElasticMeta)]
200#[metadata(index=true, store=true)]
201pub struct LabelCategories {
202    /// List of extra informational labels about the file
203    pub info: Vec<String>,
204    /// List of labels related to the technique used by the file and the signatures that hits on it.
205    pub technique: Vec<String>,
206    /// List of labels related to attribution of this file (implant name, actor, campain...)
207    pub attribution: Vec<String>,
208}
209
210/// Comment Model
211#[derive(Debug, Serialize, Deserialize, Described, Clone)]
212#[metadata_type(ElasticMeta)]
213#[metadata(index=true, store=false)]
214pub struct Comment {
215    /// Comment ID
216    pub cid: String,
217    /// Username of the user who made the comment
218    pub uname: String,
219    /// Datetime the comment was made on
220    #[serde(default="Utc::now")]
221    #[metadata(store=true)]
222    pub date: DateTime<Utc>,
223    /// Text of the comment written by the author
224    pub text: Text,
225    /// List of reactions made on a comment
226    #[serde(default)]
227    pub reactions: Vec<Reaction>,
228}
229
230/// Reaction Model
231#[derive(Debug, Serialize, Deserialize, Described, Clone)]
232#[metadata_type(ElasticMeta)]
233#[metadata(index=true, store=false)]
234pub struct Reaction {
235    /// Icon of the user who made the reaction
236    pub icon: ReactionsTypes,
237    /// Username of the user who made the reaction
238    pub uname: String,
239}
240
241#[derive(SerializeDisplay, DeserializeFromStr, strum::Display, strum::EnumString, Described, PartialEq, Eq, Debug, Clone, Copy)]
242#[metadata_type(ElasticMeta)]
243#[metadata(mapping="keyword")]
244#[strum(serialize_all = "snake_case")]
245pub enum ReactionsTypes {
246    ThumbsUp,
247    ThumbsDown,
248    Love,
249    Smile,
250    Surprised,
251    Party
252}