assemblyline_models/datastore/
result.rs

1// from collections import defaultdict
2// from typing import Any, Dict
3
4// from assemblyline import odm
5// from assemblyline.common import forge
6// from assemblyline.common.caching import generate_conf_key
7// from assemblyline.common.dict_utils import flatten
8// from assemblyline.common.tagging import tag_dict_to_list
9// from assemblyline.odm.models.tagging import Tagging
10
11use std::collections::HashMap;
12
13use chrono::{DateTime, Utc};
14use serde::{Serialize, Deserialize};
15use serde_with::{SerializeDisplay, DeserializeFromStr};
16use struct_metadata::Described;
17
18use crate::datastore::tagging::LayoutError;
19use crate::messages::task::{generate_conf_key, TagEntry, Task};
20use crate::types::strings::Keyword;
21use crate::{random_word, ElasticMeta, Readable};
22use crate::types::{ClassificationString, ExpandingClassification, ServiceName, Sha256, Text};
23
24use super::tagging::Tagging;
25
26#[derive(SerializeDisplay, DeserializeFromStr, strum::Display, strum::EnumString, Debug, Described, Clone, PartialEq, Eq)]
27#[metadata_type(ElasticMeta)]
28#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
29pub enum BodyFormat {
30    Text,
31    MemoryDump,
32    GraphData,
33    Url,
34    Json,
35    KeyValue,
36    ProcessTree,
37    Table,
38    Image,
39    Multi,
40    OrderedKeyValue,
41    Timeline,
42}
43
44// This needs to match the PROMOTE_TO StringTable in
45// assemblyline-v4-service/assemblyline_v4_service/common/result.py.
46// Any updates here need to go in that StringTable also.
47#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
48#[metadata_type(ElasticMeta)]
49#[serde(rename_all="SCREAMING_SNAKE_CASE")]
50pub enum PromoteTo {
51    Screenshot,
52    Entropy,
53    UriParams
54}
55
56// constants = forge.get_constants()
57
58#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
59#[metadata_type(ElasticMeta)]
60#[metadata(index=true, store=false)]
61pub struct Attack {
62    /// ID
63    #[metadata(copyto="__text__")]
64    pub attack_id: String,
65    /// Pattern Name
66    #[metadata(copyto="__text__")]
67    pub pattern: String,
68    /// Categories
69    pub categories: Vec<String>,
70}
71
72/// Heuristic Signatures
73#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
74#[metadata_type(ElasticMeta)]
75#[metadata(index=true, store=false)]
76pub struct Signature {
77    /// Name of the signature that triggered the heuristic
78    #[metadata(copyto="__text__")]
79    pub name: String,
80    /// Number of times this signature triggered the heuristic
81    #[serde(default = "default_signature_frequency")]
82    pub frequency: i32,
83    /// Is the signature safelisted or not
84    #[serde(default)]
85    pub safe: bool,
86}
87
88fn default_signature_frequency() -> i32 { 1 }
89
90/// Heuristic associated to the Section
91#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
92#[metadata_type(ElasticMeta)]
93#[metadata(index=true, store=false)]
94pub struct Heuristic {
95    /// ID of the heuristic triggered
96    #[metadata(copyto="__text__")]
97    pub heur_id: String,
98    /// Name of the heuristic
99    #[metadata(copyto="__text__")]
100    pub name: String,
101    /// List of Att&ck IDs related to this heuristic
102    #[serde(default)]
103    pub attack: Vec<Attack>,
104    /// List of signatures that triggered the heuristic
105    #[serde(default)]
106    pub signature: Vec<Signature>,
107    /// Calculated Heuristic score
108    pub score: i32,
109}
110
111/// Result Section
112#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
113#[metadata_type(ElasticMeta)]
114#[metadata(index=true, store=false)]
115pub struct Section {
116    /// Should the section be collapsed when displayed?
117    #[serde(default)]
118    pub auto_collapse: bool,
119    /// Text body of the result section
120    #[metadata(copyto="__text__")]
121    pub body: Option<Text>,
122    /// Classification of the section
123    pub classification: ClassificationString,
124    /// Type of body in this section
125    #[metadata(index=false)]
126    pub body_format: BodyFormat,
127    /// Configurations for the body of this section
128    #[metadata(index=false)]
129    pub body_config: Option<HashMap<String, serde_json::Value>>,
130    /// Depth of the section
131    #[metadata(index=false)]
132    pub depth: i32,
133    /// Heuristic used to score result section
134    pub heuristic: Option<Heuristic>,
135    /// List of tags associated to this section
136    #[serde(default)]
137    pub tags: Tagging,
138    /// List of safelisted tags
139    #[serde(default)]
140    #[metadata(store=false, mapping="flattenedobject")]
141    pub safelisted_tags: HashMap<String, Vec<Keyword>>,
142    /// Title of the section
143    #[metadata(copyto="__text__")]
144    pub title_text: Text,
145    /// This is the type of data that the current section should be promoted to.
146    pub promote_to: Option<PromoteTo>,
147}
148
149/// Result Body
150#[derive(Serialize, Deserialize, Debug, Default, Described, Clone, PartialEq, Eq)]
151#[metadata_type(ElasticMeta)]
152#[metadata(index=true, store=true)]
153pub struct ResultBody {
154    /// Aggregate of the score for all heuristics
155    #[serde(default)]
156    pub score: i32,
157    /// List of sections
158    #[serde(default)]
159    pub sections: Vec<Section>,
160}
161
162/// Service Milestones
163#[derive(Serialize, Deserialize, Debug, Default, Described, Clone, PartialEq, Eq)]
164#[metadata_type(ElasticMeta)]
165#[metadata(index=false, store=false)]
166pub struct Milestone {
167    /// Date the service started scanning
168    pub service_started: DateTime<Utc>,
169    /// Date the service finished scanning
170    pub service_completed: DateTime<Utc>,
171}
172
173#[cfg(feature = "rand")]
174impl rand::distr::Distribution<Milestone> for rand::distr::StandardUniform {
175    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Milestone {
176        let service_started = chrono::Utc::now() - chrono::TimeDelta::hours(rng.random_range(1..200));
177        let duration = chrono::TimeDelta::seconds(rng.random_range(1..900));
178        Milestone {
179            service_started,
180            service_completed: service_started + duration
181        }
182    }
183}
184
185
186/// File related to the Response
187#[derive(Serialize, Deserialize, Debug, Described, Clone, Eq)]
188#[metadata_type(ElasticMeta)]
189#[metadata(index=true, store=false)]
190pub struct File {
191    /// Name of the file
192    #[metadata(copyto="__text__")]
193    pub name: String,
194    /// SHA256 of the file
195    #[metadata(copyto="__text__")]
196    pub sha256: Sha256,
197    /// Description of the file
198    #[metadata(copyto="__text__")]
199    pub description: Text,
200    /// Classification of the file
201    pub classification: ClassificationString,
202    /// Is this an image used in an Image Result Section?
203    #[serde(default)]
204    pub is_section_image: bool,
205    /// File relation to parent, if any.
206    #[serde(default = "default_file_parent_relation")]
207    pub parent_relation: Text,
208    /// Allow file to be analysed during Dynamic Analysis even if Dynamic Recursion Prevention is enabled.
209    #[serde(default)]
210    pub allow_dynamic_recursion: bool,
211}
212
213
214impl PartialEq for File {
215    fn eq(&self, other: &Self) ->bool {
216        self.sha256 == other.sha256
217    }
218}
219
220impl File {
221    pub fn new(sha256: Sha256, name: String) -> Self {
222        File {
223            name,
224            sha256,
225            description: Default::default(),
226            classification: ClassificationString::default_unrestricted(),
227            is_section_image: false,
228            parent_relation: Default::default(),
229            allow_dynamic_recursion: false
230        }
231    }
232}
233
234fn default_file_parent_relation() -> Text { Text("EXTRACTED".to_owned()) }
235
236/// Response Body of Result
237#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
238#[metadata_type(ElasticMeta)]
239#[metadata(index=true, store=true)]
240pub struct ResponseBody {
241    /// Milestone block
242    #[serde(default)]
243    pub milestones: Milestone,
244    /// Version of the service
245    #[metadata(store=false)]
246    pub service_version: String,
247    /// Name of the service that scanned the file
248    #[metadata(copyto="__text__")]
249    pub service_name: ServiceName,
250    /// Tool version of the service
251    #[serde(default)]
252    #[metadata(copyto="__text__")]
253    pub service_tool_version: Option<String>,
254    /// List of supplementary files
255    #[serde(default)]
256    pub supplementary: Vec<File>,
257    /// List of extracted files
258    #[serde(default)]
259    pub extracted: Vec<File>,
260    /// Context about the service
261    #[serde(default)]
262    #[metadata(index=false, store=false)]
263    pub service_context: Option<String>,
264    /// Debug info about the service
265    #[serde(default)]
266    #[metadata(index=false, store=false)]
267    pub service_debug_info: Option<String>,
268}
269
270
271#[cfg(feature = "rand")]
272impl rand::distr::Distribution<ResponseBody> for rand::distr::StandardUniform {
273    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> ResponseBody {
274        ResponseBody {
275            milestones: rng.random(),
276            service_version: random_word(rng),
277            service_name: ServiceName::from_string(random_word(rng)),
278            service_tool_version: None,
279            supplementary: Default::default(),
280            extracted: Default::default(),
281            service_context: Default::default(),
282            service_debug_info: Default::default(),
283        }
284    }
285}
286
287
288/// Result Model
289#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
290#[metadata_type(ElasticMeta)]
291#[metadata(index=true, store=true)]
292pub struct Result {
293    /// Timestamp indicating when the result was archived.
294    pub archive_ts: Option<DateTime<Utc>>,
295    /// Aggregate classification for the result
296    #[serde(flatten)]
297    pub classification: ExpandingClassification,
298    /// Date at which the result object got created
299    #[serde(default="chrono::Utc::now")]
300    pub created: DateTime<Utc>,
301    /// Expiry timestamp
302    #[metadata(store=false)]
303    pub expiry_ts: Option<DateTime<Utc>>,
304    /// The body of the response from the service
305    pub response: ResponseBody,
306    /// The result body
307    #[serde(default)]
308    pub result: ResultBody,
309    /// SHA256 of the file the result object relates to
310    #[metadata(store=false)]
311    pub sha256: Sha256,
312    /// What type information is given along with this result
313    #[serde(rename = "type")]
314    pub result_type: Option<String>,
315    /// ???
316    pub size: Option<i32>,
317    /// Use to not pass to other stages after this run
318    #[serde(default)]
319    pub drop_file: bool,
320    /// Invalidate the current result cache creation
321    #[serde(default)]
322    pub partial: bool,
323    /// Was loaded from the archive
324    #[serde(default)]
325    #[metadata(index=false)]
326    pub from_archive: bool,
327}
328
329#[cfg(feature = "rand")]
330impl rand::distr::Distribution<Result> for rand::distr::StandardUniform {
331    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Result {
332        Result {
333            archive_ts: None,
334            classification: ExpandingClassification::try_unrestricted().unwrap(),
335            created: chrono::Utc::now(),
336            expiry_ts: None,
337            response: rng.random(),
338            result: Default::default(),
339            sha256: rng.random(),
340            result_type: None,
341            size: None,
342            partial: Default::default(),
343            drop_file: Default::default(),
344            from_archive: Default::default(),
345        }
346    }
347}
348
349impl Readable for Result {
350    fn set_from_archive(&mut self, from_archive: bool) {
351        self.from_archive = from_archive;
352    }
353}
354
355impl Result {
356    pub fn build_key(&self, task: Option<&Task>) -> std::result::Result<String, serde_json::Error> {
357        Self::help_build_key(
358            &self.sha256,
359            &self.response.service_name,
360            &self.response.service_version,
361            self.is_empty(),
362            self.partial,
363            self.response.service_tool_version.as_deref(),
364            task
365        )
366    }
367
368    pub fn help_build_key(sha256: &Sha256, service_name: &str, service_version: &str, is_empty: bool, partial: bool, service_tool_version: Option<&str>, task: Option<&Task>) -> std::result::Result<String, serde_json::Error> {
369        let mut key_list = vec![
370            sha256.to_string(),
371            service_name.replace('.', "_"),
372            format!("v{}", service_version.replace('.', "_")),
373            format!("c{}", generate_conf_key(service_tool_version, task, Some(partial))?),
374        ];
375
376        if is_empty {
377            key_list.push("e".to_owned())
378        }
379
380        Ok(key_list.join("."))
381    }
382
383    pub fn scored_tag_dict(&self) -> std::result::Result<HashMap<String, TagEntry>, LayoutError> {
384        let mut tags: HashMap<String, TagEntry> = Default::default();
385        // Save the tags and their score
386        for section in &self.result.sections {
387            for tag in section.tags.to_list(None)? {
388                let key = format!("{}:{}", tag.tag_type, tag.value);
389                let entry = tags.entry(key).or_insert(tag);
390                if let Some(heuristic) = &section.heuristic {
391                    entry.score += heuristic.score;
392                }
393            }
394        }
395        Ok(tags)
396    }
397
398    pub fn is_empty(&self) -> bool {
399        self.response.extracted.is_empty() && self.response.supplementary.is_empty() && self.result.sections.is_empty() && self.result.score == 0 && !self.partial
400    }
401}