Skip to main content

assemblyline_models/datastore/
result.rs

1// from collections import defaultdict
2// from typing import Any, Dict
3
4// from assemblyline import odm
5// from assemblyline.common import forge
6// from assemblyline.common.caching import generate_conf_key
7// from assemblyline.common.dict_utils import flatten
8// from assemblyline.common.tagging import tag_dict_to_list
9// from assemblyline.odm.models.tagging import Tagging
10
11use std::collections::HashMap;
12
13use chrono::{DateTime, Utc};
14use serde::{Serialize, Deserialize};
15use serde_with::{SerializeDisplay, DeserializeFromStr};
16use struct_metadata::Described;
17
18use crate::datastore::tagging::LayoutError;
19use crate::messages::task::{generate_conf_key, TagEntry, Task};
20use crate::types::strings::Keyword;
21use crate::{random_word, ElasticMeta, Readable};
22use crate::types::{ClassificationString, ExpandingClassification, ServiceName, Sha256, Text};
23
24use super::tagging::Tagging;
25
26#[derive(SerializeDisplay, DeserializeFromStr, strum::Display, strum::EnumString, Debug, Described, Clone, PartialEq, Eq)]
27#[metadata_type(ElasticMeta)]
28#[strum(serialize_all = "SCREAMING_SNAKE_CASE")]
29pub enum BodyFormat {
30    Text,
31    MemoryDump,
32    GraphData,
33    Url,
34    Json,
35    KeyValue,
36    ProcessTree,
37    Table,
38    Image,
39    Multi,
40    OrderedKeyValue,
41    Timeline,
42    Sandbox
43}
44
45// This needs to match the PROMOTE_TO StringTable in
46// assemblyline-v4-service/assemblyline_v4_service/common/result.py.
47// Any updates here need to go in that StringTable also.
48#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
49#[metadata_type(ElasticMeta)]
50#[serde(rename_all="SCREAMING_SNAKE_CASE")]
51pub enum PromoteTo {
52    Screenshot,
53    Entropy,
54    UriParams
55}
56
57// constants = forge.get_constants()
58
59#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
60#[metadata_type(ElasticMeta)]
61#[metadata(index=true, store=false)]
62pub struct Attack {
63    /// ID
64    #[metadata(copyto="__text__")]
65    pub attack_id: String,
66    /// Pattern Name
67    #[metadata(copyto="__text__")]
68    pub pattern: String,
69    /// Categories
70    pub categories: Vec<String>,
71}
72
73/// Heuristic Signatures
74#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
75#[metadata_type(ElasticMeta)]
76#[metadata(index=true, store=false)]
77pub struct Signature {
78    /// Name of the signature that triggered the heuristic
79    #[metadata(copyto="__text__")]
80    pub name: String,
81    /// Number of times this signature triggered the heuristic
82    #[serde(default = "default_signature_frequency")]
83    pub frequency: i32,
84    /// Is the signature safelisted or not
85    #[serde(default)]
86    pub safe: bool,
87}
88
89fn default_signature_frequency() -> i32 { 1 }
90
91/// Heuristic associated to the Section
92#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
93#[metadata_type(ElasticMeta)]
94#[metadata(index=true, store=false)]
95pub struct Heuristic {
96    /// ID of the heuristic triggered
97    #[metadata(copyto="__text__")]
98    pub heur_id: String,
99    /// Name of the heuristic
100    #[metadata(copyto="__text__")]
101    pub name: String,
102    /// List of Att&ck IDs related to this heuristic
103    #[serde(default)]
104    pub attack: Vec<Attack>,
105    /// List of signatures that triggered the heuristic
106    #[serde(default)]
107    pub signature: Vec<Signature>,
108    /// Calculated Heuristic score
109    pub score: i32,
110}
111
112/// Result Section
113#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
114#[metadata_type(ElasticMeta)]
115#[metadata(index=true, store=false)]
116pub struct Section {
117    /// Should the section be collapsed when displayed?
118    #[serde(default)]
119    pub auto_collapse: bool,
120    /// Text body of the result section
121    #[metadata(copyto="__text__")]
122    pub body: Option<Text>,
123    /// Classification of the section
124    pub classification: ClassificationString,
125    /// Type of body in this section
126    #[metadata(index=false)]
127    pub body_format: BodyFormat,
128    /// Configurations for the body of this section
129    #[metadata(index=false)]
130    pub body_config: Option<HashMap<String, serde_json::Value>>,
131    /// Depth of the section
132    #[metadata(index=false)]
133    pub depth: i32,
134    /// Heuristic used to score result section
135    pub heuristic: Option<Heuristic>,
136    /// List of tags associated to this section
137    #[serde(default)]
138    pub tags: Tagging,
139    /// List of safelisted tags
140    #[serde(default)]
141    #[metadata(store=false, mapping="flattenedobject")]
142    pub safelisted_tags: HashMap<String, Vec<Keyword>>,
143    /// Title of the section
144    #[metadata(copyto="__text__")]
145    pub title_text: Text,
146    /// This is the type of data that the current section should be promoted to.
147    pub promote_to: Option<PromoteTo>,
148}
149
150/// Result Body
151#[derive(Serialize, Deserialize, Debug, Default, Described, Clone, PartialEq, Eq)]
152#[metadata_type(ElasticMeta)]
153#[metadata(index=true, store=true)]
154pub struct ResultBody {
155    /// Aggregate of the score for all heuristics
156    #[serde(default)]
157    pub score: i32,
158    /// List of sections
159    #[serde(default)]
160    pub sections: Vec<Section>,
161}
162
163/// Service Milestones
164#[derive(Serialize, Deserialize, Debug, Default, Described, Clone, PartialEq, Eq)]
165#[metadata_type(ElasticMeta)]
166#[metadata(index=false, store=false)]
167pub struct Milestone {
168    /// Date the service started scanning
169    pub service_started: DateTime<Utc>,
170    /// Date the service finished scanning
171    pub service_completed: DateTime<Utc>,
172}
173
174#[cfg(feature = "rand")]
175impl rand::distr::Distribution<Milestone> for rand::distr::StandardUniform {
176    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Milestone {
177        let service_started = chrono::Utc::now() - chrono::TimeDelta::hours(rng.random_range(1..200));
178        let duration = chrono::TimeDelta::seconds(rng.random_range(1..900));
179        Milestone {
180            service_started,
181            service_completed: service_started + duration
182        }
183    }
184}
185
186
187/// File related to the Response
188#[derive(Serialize, Deserialize, Debug, Described, Clone, Eq)]
189#[metadata_type(ElasticMeta)]
190#[metadata(index=true, store=false)]
191pub struct File {
192    /// Name of the file
193    #[metadata(copyto="__text__")]
194    pub name: String,
195    /// SHA256 of the file
196    #[metadata(copyto="__text__")]
197    pub sha256: Sha256,
198    /// Description of the file
199    #[metadata(copyto="__text__")]
200    pub description: Text,
201    /// Classification of the file
202    pub classification: ClassificationString,
203    /// Is this an image used in an Image Result Section?
204    #[serde(default)]
205    pub is_section_image: bool,
206    /// File relation to parent, if any.
207    #[serde(default = "default_file_parent_relation")]
208    pub parent_relation: Text,
209    /// Allow file to be analysed during Dynamic Analysis even if Dynamic Recursion Prevention is enabled.
210    #[serde(default)]
211    pub allow_dynamic_recursion: bool,
212}
213
214
215impl PartialEq for File {
216    fn eq(&self, other: &Self) ->bool {
217        self.sha256 == other.sha256
218    }
219}
220
221impl File {
222    pub fn new(sha256: Sha256, name: String) -> Self {
223        File {
224            name,
225            sha256,
226            description: Default::default(),
227            classification: ClassificationString::default_unrestricted(),
228            is_section_image: false,
229            parent_relation: Default::default(),
230            allow_dynamic_recursion: false
231        }
232    }
233}
234
235fn default_file_parent_relation() -> Text { Text("EXTRACTED".to_owned()) }
236
237/// Response Body of Result
238#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
239#[metadata_type(ElasticMeta)]
240#[metadata(index=true, store=true)]
241pub struct ResponseBody {
242    /// Milestone block
243    #[serde(default)]
244    pub milestones: Milestone,
245    /// Version of the service
246    #[metadata(store=false)]
247    pub service_version: String,
248    /// Name of the service that scanned the file
249    #[metadata(copyto="__text__")]
250    pub service_name: ServiceName,
251    /// Tool version of the service
252    #[serde(default)]
253    #[metadata(copyto="__text__")]
254    pub service_tool_version: Option<String>,
255    /// List of supplementary files
256    #[serde(default)]
257    pub supplementary: Vec<File>,
258    /// List of extracted files
259    #[serde(default)]
260    pub extracted: Vec<File>,
261    /// Context about the service
262    #[serde(default)]
263    #[metadata(index=false, store=false)]
264    pub service_context: Option<String>,
265    /// Debug info about the service
266    #[serde(default)]
267    #[metadata(index=false, store=false)]
268    pub service_debug_info: Option<String>,
269}
270
271
272#[cfg(feature = "rand")]
273impl rand::distr::Distribution<ResponseBody> for rand::distr::StandardUniform {
274    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> ResponseBody {
275        ResponseBody {
276            milestones: rng.random(),
277            service_version: random_word(rng),
278            service_name: ServiceName::from_string(random_word(rng)),
279            service_tool_version: None,
280            supplementary: Default::default(),
281            extracted: Default::default(),
282            service_context: Default::default(),
283            service_debug_info: Default::default(),
284        }
285    }
286}
287
288
289/// Result Model
290#[derive(Serialize, Deserialize, Debug, Described, Clone, PartialEq, Eq)]
291#[metadata_type(ElasticMeta)]
292#[metadata(index=true, store=true)]
293pub struct Result {
294    /// Timestamp indicating when the result was archived.
295    pub archive_ts: Option<DateTime<Utc>>,
296    /// Aggregate classification for the result
297    #[serde(flatten)]
298    pub classification: ExpandingClassification,
299    /// Date at which the result object got created
300    #[serde(default="chrono::Utc::now")]
301    pub created: DateTime<Utc>,
302    /// Expiry timestamp
303    #[metadata(store=false)]
304    pub expiry_ts: Option<DateTime<Utc>>,
305    /// The body of the response from the service
306    pub response: ResponseBody,
307    /// The result body
308    #[serde(default)]
309    pub result: ResultBody,
310    /// SHA256 of the file the result object relates to
311    #[metadata(store=false)]
312    pub sha256: Sha256,
313    /// What type information is given along with this result
314    #[serde(rename = "type")]
315    pub result_type: Option<String>,
316    /// ???
317    pub size: Option<i32>,
318    /// Use to not pass to other stages after this run
319    #[serde(default)]
320    pub drop_file: bool,
321    /// Invalidate the current result cache creation
322    #[serde(default)]
323    pub partial: bool,
324    /// Was loaded from the archive
325    #[serde(default)]
326    #[metadata(index=false)]
327    pub from_archive: bool,
328}
329
330#[cfg(feature = "rand")]
331impl rand::distr::Distribution<Result> for rand::distr::StandardUniform {
332    fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> Result {
333        Result {
334            archive_ts: None,
335            classification: ExpandingClassification::try_unrestricted().unwrap(),
336            created: chrono::Utc::now(),
337            expiry_ts: None,
338            response: rng.random(),
339            result: Default::default(),
340            sha256: rng.random(),
341            result_type: None,
342            size: None,
343            partial: Default::default(),
344            drop_file: Default::default(),
345            from_archive: Default::default(),
346        }
347    }
348}
349
350impl Readable for Result {
351    fn set_from_archive(&mut self, from_archive: bool) {
352        self.from_archive = from_archive;
353    }
354}
355
356impl Result {
357    pub fn build_key(&self, task: Option<&Task>) -> std::result::Result<String, serde_json::Error> {
358        Self::help_build_key(
359            &self.sha256,
360            &self.response.service_name,
361            &self.response.service_version,
362            self.is_empty(),
363            self.partial,
364            self.response.service_tool_version.as_deref(),
365            task
366        )
367    }
368
369    pub fn help_build_key(sha256: &Sha256, service_name: &str, service_version: &str, is_empty: bool, partial: bool, service_tool_version: Option<&str>, task: Option<&Task>) -> std::result::Result<String, serde_json::Error> {
370        let mut key_list = vec![
371            sha256.to_string(),
372            service_name.replace('.', "_"),
373            format!("v{}", service_version.replace('.', "_")),
374            format!("c{}", generate_conf_key(service_tool_version, task, Some(partial))?),
375        ];
376
377        if is_empty {
378            key_list.push("e".to_owned())
379        }
380
381        Ok(key_list.join("."))
382    }
383
384    pub fn scored_tag_dict(&self) -> std::result::Result<HashMap<String, TagEntry>, LayoutError> {
385        let mut tags: HashMap<String, TagEntry> = Default::default();
386        // Save the tags and their score
387        for section in &self.result.sections {
388            for tag in section.tags.to_list(None)? {
389                let key = format!("{}:{}", tag.tag_type, tag.value);
390                let entry = tags.entry(key).or_insert(tag);
391                if let Some(heuristic) = &section.heuristic {
392                    entry.score += heuristic.score;
393                }
394            }
395        }
396        Ok(tags)
397    }
398
399    pub fn is_empty(&self) -> bool {
400        self.response.extracted.is_empty() && self.response.supplementary.is_empty() && self.result.sections.is_empty() && self.result.score == 0 && !self.partial
401    }
402}