assemblyline_models/messages/
task.rs

1use std::collections::HashMap;
2
3use md5::Digest;
4use rand::Rng;
5use serde::{Serialize, Deserialize};
6
7use crate::datastore::tagging::TagValue;
8use crate::random_word;
9use crate::types::{JsonMap, Sid, Wildcard, MD5, Sha1, Sha256, SSDeepHash};
10use crate::{datastore::file::URIInfo, config::ServiceSafelist};
11
12
13/// File Information
14#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
15pub struct FileInfo {
16    /// The output from libmagic which was used to determine the tag
17    pub magic: String,
18    /// MD5 of the file
19    pub md5: MD5,
20    /// The libmagic mime type
21    pub mime: Option<String>,
22    /// SHA1 hash of the file
23    pub sha1: Sha1,
24    /// SHA256 hash of the file
25    pub sha256: Sha256,
26    /// Size of the file in bytes
27    pub size: u64,
28    /// SSDEEP hash of the file"
29    pub ssdeep: Option<SSDeepHash>,
30    /// TLSH hash of the file"
31    pub tlsh: Option<String>,
32    /// Type of file as identified by Assemblyline
33    #[serde(rename="type")]
34    pub file_type: String,
35    /// URI structure to speed up specialty file searching
36    pub uri_info: Option<URIInfo>,
37}
38
39/// Tag Item
40#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
41pub struct TagItem {
42    /// Type of tag item
43    #[serde(rename="type")]
44    pub tag_type: String,
45    ///Short version of tag type
46    pub short_type: String,
47    /// Value of tag item
48    pub value: TagValue,
49    /// Score of tag item
50    #[serde(skip_serializing_if="Option::is_none")]
51    pub score: Option<i32>,
52}
53
54
55/// Data Item
56#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
57pub struct DataItem {
58    pub name: String,
59    pub value: serde_json::Value,
60}
61
62/// Service Task Model
63#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
64pub struct Task {
65    /// A random ID to differentiate this task
66    pub task_id: u64,
67    /// Id of the dispatcher that issued this task
68    pub dispatcher: String,
69    pub dispatcher_address: String,
70    /// Submission ID
71    pub sid: Sid,
72    /// Metadata associated to the submission
73    pub metadata: HashMap<String, Wildcard>,
74    /// Minimum classification of the file being scanned
75    pub min_classification: String,
76    /// File info block
77    pub fileinfo: FileInfo,
78    /// File name
79    pub filename: String,
80    /// Service name
81    pub service_name: String,
82    /// Service specific parameters
83    pub service_config: JsonMap,
84    /// File depth relative to initital submitted file
85    pub depth: u32,
86    /// Maximum number of files that submission can have
87    pub max_files: i32,
88    /// Task TTL
89    pub ttl: i32,
90
91    /// List of tags
92    pub tags: Vec<TagItem>,
93    /// Temporary submission data
94    pub temporary_submission_data: Vec<DataItem>,
95
96    /// Perform deep scanning
97    pub deep_scan: bool,
98
99    /// Whether the service cache should be ignored during the processing of this task
100    pub ignore_cache: bool, 
101
102    /// Whether the service should ignore the dynamic recursion prevention or not
103    pub ignore_recursion_prevention: bool,
104
105    /// Should the service filter it's output?
106    pub ignore_filtering: bool,
107
108    /// Priority for processing order
109    pub priority: i32,
110
111    /// Safelisting configuration (as defined in global configuration)
112    #[serde(default="task_default_safelist_config")]
113    pub safelist_config: ServiceSafelist, // ", default={'enabled': False})
114}
115
116#[cfg(feature = "rand")]
117impl rand::distr::Distribution<Task> for rand::distr::StandardUniform {
118    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Task {
119        Task {
120            task_id: rng.random(),
121            dispatcher: random_word(rng),
122            dispatcher_address: "localhost:8080".to_string(),
123            sid: rng.random(),
124            metadata: Default::default(),
125            min_classification: Default::default(),
126            fileinfo: FileInfo { 
127                magic: "".to_string(), 
128                md5: rng.random(), 
129                mime: None, 
130                sha1: rng.random(), 
131                sha256: rng.random(), 
132                size: rng.random(), 
133                ssdeep: Some(rng.random()),
134                tlsh: None, 
135                file_type: "unknown".to_owned(), 
136                uri_info: None 
137            },
138            filename: random_word(rng),
139            service_name: random_word(rng),
140            service_config: Default::default(),
141            depth: rng.random(),
142            max_files: rng.random(),
143            ttl: rng.random_range(0..100),
144            tags: Default::default(),
145            temporary_submission_data: Default::default(),
146            deep_scan: rng.random(),
147            ignore_cache: rng.random(),
148            ignore_recursion_prevention: rng.random(),
149            ignore_filtering: rng.random(),
150            priority: rng.random(),
151            safelist_config: Default::default(),
152        }
153    }
154}
155
156pub fn task_default_safelist_config() -> ServiceSafelist {
157    ServiceSafelist {
158        enabled: false,
159        ..Default::default()
160    }
161}
162
163impl Task {
164    pub fn make_key(sid: Sid, service_name: &str, sha: &Sha256) -> String {
165        format!("{sid}_{service_name}_{sha}")
166    }
167
168    pub fn key(&self) -> String {
169        Self::make_key(self.sid, &self.service_name, &self.fileinfo.sha256)
170    }
171
172    pub fn signature(&self) -> TaskSignature {
173        TaskSignature { 
174            task_id: self.task_id, 
175            sid: self.sid, 
176            service: self.service_name.clone(), 
177            hash: self.fileinfo.sha256.clone() 
178        }
179    } 
180}
181
182pub fn generate_conf_key(service_tool_version: Option<&str>, task: Option<&Task>, partial: Option<bool>) -> Result<String, serde_json::Error> {
183    if let Some(task) = task {
184        let service_config = serde_json::to_string(&{
185            let mut pairs: Vec<_> = task.service_config.iter().collect();
186            pairs.sort_unstable_by_key(|row| row.0);
187            pairs
188        })?;
189
190        let submission_params_str = serde_json::to_string(&[
191            ("deep_scan", serde_json::json!(task.deep_scan)),
192            ("ignore_filtering", serde_json::json!(task.ignore_filtering)),
193            ("max_files", serde_json::json!(task.max_files)),
194            ("min_classification", serde_json::json!(task.min_classification)),
195        ])?;
196
197        let ignore_salt = if task.ignore_cache || partial.unwrap_or_default() {
198             &rand::rng().random::<u128>().to_string()
199        } else {
200            "None"
201        };
202
203        let service_tool_version = service_tool_version.unwrap_or("None");
204        let total_str = format!("{service_tool_version}_{service_config}_{submission_params_str}_{ignore_salt}");
205
206        // get an md5 hash
207        let mut hasher = md5::Md5::new();
208        hasher.update(total_str);
209        let hash = hasher.finalize();
210        
211        // truncate it to 8 bytes and interpret it as a number
212        let number = u64::from_be_bytes(hash.as_slice()[0..8].try_into().unwrap());
213        
214        // encode it as a string
215        Ok(base62::encode(number))
216    } else {
217        Ok("0".to_string())
218    }
219}
220
221
222#[derive(Hash, PartialEq, Eq)]
223pub struct TaskSignature {
224    pub task_id: u64,
225    pub sid: Sid,
226    pub service: String,
227    pub hash: Sha256,
228}
229
230
231
232/// Service Task Model
233#[derive(Serialize, Deserialize)]
234pub struct TaskToken {
235    pub task_id: u64,
236    pub dispatcher: String,
237}
238
239// ============================================================================
240//MARK: Responses 
241
242#[derive(Serialize, Deserialize, Clone)]
243pub struct ResultSummary {
244    pub key: String,
245    pub drop: bool,
246    pub score: i32,
247    pub partial: bool,
248    pub children: Vec<(Sha256, String)>
249}
250
251#[derive(Serialize, Deserialize)]
252pub enum ServiceResponse {
253    Result(ServiceResult),
254    Error(ServiceError),
255}
256
257impl ServiceResponse {
258    pub fn sid(&self) -> Sid {
259        match self {
260            ServiceResponse::Result(item) => item.sid,
261            ServiceResponse::Error(item) => item.sid,
262        }
263    }
264
265    pub fn sha256(&self) -> Sha256 {
266        match self {
267            ServiceResponse::Result(item) => item.sha256.clone(),
268            ServiceResponse::Error(item) => item.service_task.fileinfo.sha256.clone(),
269        }
270    }
271
272    pub fn service_name(&self) -> &str {
273        match self {
274            ServiceResponse::Result(item) => &item.service_name,
275            ServiceResponse::Error(item) => &item.service_task.service_name,
276        }
277    }
278}
279
280#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
281pub struct TagEntry {
282    pub score: i32,    
283    #[serde(rename="type")]
284    pub tag_type: String,
285    pub value: TagValue,
286
287}
288
289#[derive(Serialize, Deserialize)]
290pub struct ServiceResult {
291    pub dynamic_recursion_bypass: Vec<Sha256>,
292    pub sid: Sid,
293    pub sha256: Sha256,
294    pub service_name: String,
295    pub service_version: String,
296    pub service_tool_version: Option<String>,
297    pub expiry_ts: Option<chrono::DateTime<chrono::Utc>>,
298    pub result_summary: ResultSummary,
299    pub tags: HashMap<String, TagEntry>,
300    pub extracted_names: HashMap<Sha256, String>,
301    pub temporary_data: JsonMap,
302    pub extra_errors: Vec<String>,
303}
304
305#[derive(Serialize, Deserialize)]
306pub struct ServiceError {
307    pub sid: Sid,
308    pub service_task: Task,
309    pub error: crate::datastore::Error,
310    pub error_key: String,
311}