Skip to main content

assemblyline_models/messages/
task.rs

1use std::collections::HashMap;
2
3use log::debug;
4use md5::Digest;
5use serde::{Serialize, Deserialize};
6
7#[cfg(feature = "rand")]
8use rand::RngExt;
9
10use crate::datastore::tagging::TagValue;
11use crate::random_word;
12use crate::types::{ClassificationString, JsonMap, MD5, SSDeepHash, ServiceName, Sha1, Sha256, Sid, Wildcard};
13use crate::{datastore::file::URIInfo, config::ServiceSafelist};
14
15
16/// File Information
17#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
18pub struct FileInfo {
19    /// The output from libmagic which was used to determine the tag
20    pub magic: String,
21    /// MD5 of the file
22    pub md5: MD5,
23    /// The libmagic mime type
24    pub mime: Option<String>,
25    /// SHA1 hash of the file
26    pub sha1: Sha1,
27    /// SHA256 hash of the file
28    pub sha256: Sha256,
29    /// Size of the file in bytes
30    pub size: u64,
31    /// SSDEEP hash of the file"
32    #[serde(default)]
33    pub ssdeep: Option<SSDeepHash>,
34    /// TLSH hash of the file"
35    #[serde(default)]
36    pub tlsh: Option<String>,
37    /// Type of file as identified by Assemblyline
38    #[serde(rename="type")]
39    #[serde(default)]
40    pub file_type: String,
41    /// URI structure to speed up specialty file searching
42    #[serde(default)]
43    pub uri_info: Option<URIInfo>,
44}
45
46#[cfg(feature = "rand")]
47impl rand::distr::Distribution<FileInfo> for rand::distr::StandardUniform {
48    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> FileInfo {
49        FileInfo {
50                magic: "".to_string(),
51                md5: rng.random(),
52                mime: None,
53                sha1: rng.random(),
54                sha256: rng.random(),
55                size: rng.random(),
56                ssdeep: Some(rng.random()),
57                tlsh: None,
58                file_type: "unknown".to_owned(),
59                uri_info: None
60        }
61    }
62}
63
64/// Tag Item
65#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
66pub struct TagItem {
67    /// Type of tag item
68    #[serde(rename="type")]
69    pub tag_type: String,
70    ///Short version of tag type
71    pub short_type: String,
72    /// Value of tag item
73    pub value: TagValue,
74    /// Score of tag item
75    #[serde(skip_serializing_if="Option::is_none")]
76    pub score: Option<i32>,
77}
78
79
80/// Data Item
81#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
82pub struct DataItem {
83    pub name: String,
84    pub value: serde_json::Value,
85}
86
87/// Service Task Model
88#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
89pub struct Task {
90    /// A random ID to differentiate this task
91    pub task_id: u64,
92    /// Id of the dispatcher that issued this task
93    pub dispatcher: String,
94    pub dispatcher_address: String,
95    /// Submission ID
96    pub sid: Sid,
97    /// Metadata associated to the submission
98    pub metadata: HashMap<String, Wildcard>,
99    /// Minimum classification of the file being scanned
100    pub min_classification: ClassificationString,
101    /// File info block
102    pub fileinfo: FileInfo,
103    /// File name
104    pub filename: String,
105    /// Service name
106    pub service_name: ServiceName,
107    /// Service specific parameters
108    pub service_config: JsonMap,
109    /// File depth relative to initital submitted file
110    pub depth: u32,
111    /// Maximum number of files that submission can have
112    pub max_files: i32,
113    /// Task TTL
114    pub ttl: i32,
115
116    /// List of tags
117    pub tags: Vec<TagItem>,
118    /// Temporary submission data
119    pub temporary_submission_data: Vec<DataItem>,
120
121    /// Perform deep scanning
122    pub deep_scan: bool,
123
124    /// Whether the service cache should be ignored during the processing of this task
125    pub ignore_cache: bool,
126
127    /// Whether the service should ignore the dynamic recursion prevention or not
128    pub ignore_recursion_prevention: bool,
129
130    /// Should the service filter it's output?
131    pub ignore_filtering: bool,
132
133    /// Priority for processing order
134    pub priority: i32,
135
136    /// Safelisting configuration (as defined in global configuration)
137    #[serde(default="task_default_safelist_config")]
138    pub safelist_config: ServiceSafelist, // ", default={'enabled': False})
139}
140
141#[cfg(feature = "rand")]
142impl rand::distr::Distribution<Task> for rand::distr::StandardUniform {
143    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Task {
144        Task {
145            task_id: rng.random(),
146            dispatcher: random_word(rng),
147            dispatcher_address: "localhost:8080".to_string(),
148            sid: rng.random(),
149            metadata: Default::default(),
150            min_classification: ClassificationString::default_unrestricted(),
151            fileinfo: rng.random(),
152            filename: random_word(rng),
153            service_name: ServiceName::from_string(random_word(rng)),
154            service_config: Default::default(),
155            depth: rng.random(),
156            max_files: rng.random(),
157            ttl: rng.random_range(0..100),
158            tags: Default::default(),
159            temporary_submission_data: Default::default(),
160            deep_scan: rng.random(),
161            ignore_cache: rng.random(),
162            ignore_recursion_prevention: rng.random(),
163            ignore_filtering: rng.random(),
164            priority: rng.random(),
165            safelist_config: Default::default(),
166        }
167    }
168}
169
170pub fn task_default_safelist_config() -> ServiceSafelist {
171    ServiceSafelist {
172        enabled: false,
173        ..Default::default()
174    }
175}
176
177impl Task {
178    pub fn make_key(sid: Sid, service_name: &str, sha: &Sha256) -> String {
179        format!("{sid}_{service_name}_{sha}")
180    }
181
182    pub fn key(&self) -> String {
183        Self::make_key(self.sid, &self.service_name, &self.fileinfo.sha256)
184    }
185
186    pub fn signature(&self) -> TaskSignature {
187        TaskSignature {
188            task_id: self.task_id,
189            sid: self.sid,
190            service: self.service_name,
191            hash: self.fileinfo.sha256.clone()
192
193        }
194    }
195}
196
197pub fn generate_conf_key(service_tool_version: Option<&str>, task: Option<&Task>, partial: Option<bool>, force_ignore_cache: bool) -> Result<String, serde_json::Error> {
198    if let Some(task) = task {
199        let service_config = serde_json::to_string(&{
200            let mut pairs: Vec<_> = task.service_config.iter().collect();
201            pairs.sort_unstable_by_key(|row| row.0);
202            pairs
203        })?;
204
205        let submission_params_str = serde_json::to_string(&[
206            ("deep_scan", serde_json::json!(task.deep_scan)),
207            ("ignore_filtering", serde_json::json!(task.ignore_filtering)),
208            ("max_files", serde_json::json!(task.max_files)),
209            ("min_classification", serde_json::json!(task.min_classification)),
210        ])?;
211
212        let ignore_salt = if task.ignore_cache || force_ignore_cache || partial.unwrap_or_default() {
213             &rand::rng().random::<u128>().to_string()
214        } else {
215            "None"
216        };
217
218        let service_tool_version = service_tool_version.unwrap_or("None");
219        let total_str = format!("{service_tool_version}_{service_config}_{submission_params_str}_{ignore_salt}");
220
221        // get an md5 hash
222        let mut hasher = md5::Md5::new();
223        hasher.update(&total_str);
224        let hash = hasher.finalize();
225
226        // truncate it to 8 bytes and interpret it as a number
227        let number = u64::from_be_bytes(hash[0..8].try_into().unwrap());
228
229        let key = base62::encode(number);
230        debug!("Unhashed result config value {}/{}: {total_str} -> {key}", task.fileinfo.sha256, task.service_name);
231
232        // encode it as a string
233        Ok(key)
234    } else {
235        Ok("0".to_string())
236    }
237}
238
239
240#[derive(Hash, PartialEq, Eq)]
241pub struct TaskSignature {
242    pub task_id: u64,
243    pub sid: Sid,
244    pub service: ServiceName,
245    pub hash: Sha256,
246}
247
248
249
250/// Service Task Model
251#[derive(Serialize, Deserialize)]
252pub struct TaskToken {
253    pub task_id: u64,
254    pub dispatcher: String,
255}
256
257// ============================================================================
258//MARK: Responses
259
260#[derive(Serialize, Deserialize, Clone, Debug)]
261pub struct ResultSummary {
262    pub key: String,
263    pub drop: bool,
264    pub score: i32,
265    pub partial: bool,
266    pub children: Vec<(Sha256, String)>
267}
268
269#[derive(Serialize, Deserialize)]
270pub enum ServiceResponse {
271    Result(Box<ServiceResult>),
272    Error(Box<ServiceError>),
273}
274
275impl std::fmt::Debug for ServiceResponse {
276    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
277        f.debug_struct("ServiceResponse").field("result", &matches!(self, ServiceResponse::Result(..))).finish()
278    }
279}
280
281impl ServiceResponse {
282    pub fn sid(&self) -> Sid {
283        match self {
284            ServiceResponse::Result(item) => item.sid,
285            ServiceResponse::Error(item) => item.sid,
286        }
287    }
288
289    pub fn sha256(&self) -> Sha256 {
290        match self {
291            ServiceResponse::Result(item) => item.sha256.clone(),
292            ServiceResponse::Error(item) => item.service_task.fileinfo.sha256.clone(),
293        }
294    }
295
296    pub fn service_name(&self) -> ServiceName {
297        match self {
298            ServiceResponse::Result(item) => item.service_name,
299            ServiceResponse::Error(item) => item.service_task.service_name,
300        }
301    }
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
305pub struct TagEntry {
306    pub score: i32,
307    #[serde(rename="type")]
308    pub tag_type: String,
309    pub value: TagValue,
310
311}
312
313#[derive(Serialize, Deserialize)]
314pub struct ServiceResult {
315    pub dynamic_recursion_bypass: Vec<Sha256>,
316    pub sid: Sid,
317    pub sha256: Sha256,
318    pub service_name: ServiceName,
319    pub service_version: String,
320    pub service_tool_version: Option<String>,
321    pub expiry_ts: Option<chrono::DateTime<chrono::Utc>>,
322    pub result_summary: ResultSummary,
323    pub tags: HashMap<String, TagEntry>,
324    pub extracted_names: HashMap<Sha256, String>,
325    pub temporary_data: JsonMap,
326    pub extra_errors: Vec<String>,
327}
328
329#[derive(Serialize, Deserialize)]
330pub struct ServiceError {
331    pub sid: Sid,
332    pub service_task: Task,
333    pub error: crate::datastore::Error,
334    pub error_key: String,
335}