Skip to main content

assemblyline_models/datastore/
tagging.rs

1use std::collections::HashMap;
2use std::sync::LazyLock;
3
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use struct_metadata::{Described, MetadataKind};
7
8use crate::messages::task::TagEntry;
9use crate::types::ja4::is_ja4;
10use crate::types::md5::is_md5;
11use crate::types::sha1::is_sha1;
12use crate::types::sha256::is_sha256;
13use crate::types::json_validation::{transform_string_with, validate_lowercase, validate_lowercase_with, validate_number, validate_rule_mapping, validate_string, validate_string_with, validate_uppercase, validate_uppercase_with};
14use crate::types::ssdeep::is_ssdeep_hash;
15use crate::types::strings::{check_domain, check_email, check_uri, is_ip, is_mac, is_phone_number, is_unc_path, is_uri_path};
16use crate::types::JsonMap;
17use crate::ElasticMeta;
18
19// MARK: Tag Value
20/// A thin wrapper over the generic JSON value type to enforce tag specific behaviours we want
21#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
22#[serde(transparent)]
23pub struct TagValue(serde_json::Value);
24
25// When we convert tags to strings we don't want to include quotes on raw strings
26impl std::fmt::Display for TagValue {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match &self.0 {
29            serde_json::Value::String(string) => f.write_str(string),
30            other => f.write_fmt(format_args!("{other}"))
31        }
32    }
33}
34
35impl From<&str> for TagValue {
36    fn from(value: &str) -> Self {
37        Self(serde_json::Value::String(value.to_owned()))
38    }
39}
40
41impl From<String> for TagValue {
42    fn from(value: String) -> Self {
43        Self(serde_json::Value::String(value))
44    }
45}
46
47// MARK: Tag Processors
48#[derive(Debug)]
49pub enum TagProcessor {
50    // Generic strings
51    String,
52    Uppercase,
53    Lowercase,
54
55    // Special purpose strings
56    PhoneNumber,
57    RuleMapping, // HashMap<String, Vec<String>>
58    Domain,
59    IpAddress,
60    Uri,
61    Mac,
62    UNCPath,
63    UriPath,
64    EmailAddress,
65
66    // hashes
67    Sha256,
68    Sha1,
69    MD5,
70    SSDeepHash,
71    JA4,
72
73    // numbers
74    U16,
75    I32,
76    // I64,
77}
78
79
80impl TagProcessor {
81    pub fn apply(&self, value: serde_json::Value) -> Result<serde_json::Value, serde_json::Value> {
82        match self {
83            TagProcessor::String => validate_string(value),
84            TagProcessor::Uppercase => validate_uppercase(value),
85            TagProcessor::Lowercase => validate_lowercase(value),
86            TagProcessor::PhoneNumber => validate_string_with(value, is_phone_number),
87            TagProcessor::RuleMapping => validate_rule_mapping(value),
88            TagProcessor::Domain => transform_string_with(value, |domain| check_domain(domain).ok()),
89            TagProcessor::IpAddress => validate_uppercase_with(value, is_ip),
90            TagProcessor::Uri => transform_string_with(value, |uri| check_uri(uri).ok()),
91            TagProcessor::Mac => validate_lowercase_with(value, is_mac),
92            TagProcessor::UNCPath => validate_string_with(value, is_unc_path),
93            TagProcessor::UriPath => validate_string_with(value, is_uri_path),
94            TagProcessor::EmailAddress => transform_string_with(value, |email| check_email(email).ok()),
95            TagProcessor::Sha256 => validate_lowercase_with(value, is_sha256),
96            TagProcessor::Sha1 => validate_lowercase_with(value, is_sha1),
97            TagProcessor::MD5 => validate_lowercase_with(value, is_md5),
98            TagProcessor::SSDeepHash => validate_string_with(value, is_ssdeep_hash),
99            TagProcessor::JA4 => validate_lowercase_with(value, is_ja4),
100            TagProcessor::U16 => validate_number::<u16>(value),
101            TagProcessor::I32 => validate_number::<i32>(value),
102            // TagProcessor::I64 => validate_number::<i64>(value),
103        }
104    }
105}
106
107// MARK: Tag Information
108#[derive(Debug)]
109pub struct TagInformation {
110    name: &'static [&'static str],
111    description: &'static str,
112    processor: TagProcessor,
113}
114
115impl Eq for TagInformation {}
116
117impl PartialEq for TagInformation {
118    fn eq(&self, other: &Self) -> bool {
119        self.name == other.name
120    }
121}
122
123impl std::hash::Hash for TagInformation {
124    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
125        self.name.hash(state);
126    }
127}
128
129impl TagInformation {
130    const fn new(name: &'static [&'static str], description: &'static str, processor: TagProcessor) -> Self {
131        Self {
132            name,
133            description,
134            processor
135        }
136    }
137
138    pub fn full_path(&self) -> String {
139        self.name.join(".")
140    }
141
142    pub fn metadata_type(&self) -> struct_metadata::Descriptor<ElasticMeta> {
143        use struct_metadata::{Kind, Descriptor};
144        let metadata = ElasticMeta{copyto: Some("__text__"), ..Default::default()};
145        let mut desc = match self.processor {
146            TagProcessor::RuleMapping => struct_metadata::Descriptor {
147                docs: None,
148                metadata,
149                kind: Kind::Mapping(
150                    Box::new(Descriptor { docs: None, metadata: Default::default(), kind: Kind::String }),
151                    Box::new(Descriptor { docs: None, metadata: Default::default(), kind: Kind::Sequence(
152                        Box::new(Descriptor { docs: None, metadata: ElasticMeta{copyto: Some("__text__"), ..Default::default()}, kind: Kind::String }),
153                    )})
154                )
155            },
156            // TagProcessor::I64 => struct_metadata::Descriptor { docs: None, metadata: Default::default(), kind: Kind::I64},
157            TagProcessor::U16 => struct_metadata::Descriptor { docs: None, metadata: Default::default(), kind: Kind::U16},
158            TagProcessor::I32 => struct_metadata::Descriptor { docs: None, metadata: Default::default(), kind: Kind::I32},
159            TagProcessor::IpAddress => struct_metadata::Descriptor {
160                docs: None,
161                metadata: ElasticMeta { mapping: Some("ip"), ..metadata },
162                kind: Kind::String
163            },
164            TagProcessor::SSDeepHash => struct_metadata::Descriptor {
165                docs: None,
166                metadata: ElasticMeta{mapping: Some("text"), analyzer: Some("text_fuzzy"), ..metadata},
167                kind: Kind::String
168            },
169            TagProcessor::Lowercase | TagProcessor::Sha1 | TagProcessor::MD5 | TagProcessor::Sha256 => struct_metadata::Descriptor {
170                docs: None,
171                metadata: ElasticMeta { normalizer: Some("lowercase_normalizer"), ..metadata },
172                kind: Kind::String
173            },
174            _ => struct_metadata::Descriptor { docs: None, metadata, kind: Kind::String }
175        };
176
177        desc.docs = Some(vec![self.description]);
178        desc
179    }
180
181    pub fn inner_mapping(&self) -> bool {
182        matches!(self.processor, TagProcessor::RuleMapping)
183    }
184}
185
186
187// MARK: Tag List
188/// The list of all tags we are willing to accept.
189/// This includes their path within a tagging dict, a textual description and how they should be processed for validation or normalization
190static ALL_VALID_TAGS: [TagInformation; 214] = [
191    TagInformation::new(&["attribution", "actor"], "Attribution Actor", TagProcessor::Uppercase),
192    TagInformation::new(&["attribution", "campaign"], "Attribution Campaign", TagProcessor::Uppercase),
193    TagInformation::new(&["attribution", "category"], "Attribution Category", TagProcessor::Uppercase),
194    TagInformation::new(&["attribution", "exploit"], "Attribution Exploit", TagProcessor::Uppercase),
195    TagInformation::new(&["attribution", "implant"], "Attribution Implant", TagProcessor::Uppercase),
196    TagInformation::new(&["attribution", "family"], "Attribution Family", TagProcessor::Uppercase),
197    TagInformation::new(&["attribution", "network"], "Attribution Network", TagProcessor::Uppercase),
198
199    TagInformation::new(&["av", "heuristic"], "List of heuristics", TagProcessor::String),
200    TagInformation::new(&["av", "virus_name"], "Collection of virus names identified by antivirus tools", TagProcessor::String),
201
202    TagInformation::new(&["cert", "extended_key_usage"], "Extended Key Usage", TagProcessor::String),
203    TagInformation::new(&["cert", "issuer"], "Issuer", TagProcessor::String),
204    TagInformation::new(&["cert", "key_usage"], "Key Usage", TagProcessor::String),
205    TagInformation::new(&["cert", "owner"], "Owner", TagProcessor::String),
206    TagInformation::new(&["cert", "serial_no"], "Serial Number", TagProcessor::String),
207    TagInformation::new(&["cert", "signature_algo"], "Signature Algorithm", TagProcessor::String),
208    TagInformation::new(&["cert", "subject"], "Subject Name", TagProcessor::String),
209    TagInformation::new(&["cert", "subject_alt_name"], "Alternative Subject Name", TagProcessor::String),
210    TagInformation::new(&["cert", "thumbprint"], "Thumbprint", TagProcessor::String),
211    TagInformation::new(&["cert", "valid", "start"], "Start date of certificate validity", TagProcessor::String),
212    TagInformation::new(&["cert", "valid", "end"], "End date of certificate validity", TagProcessor::String),
213    TagInformation::new(&["cert", "version"], "Version", TagProcessor::String),
214
215    TagInformation::new(&["code", "sha256"], "Code Tagging: Sha256 of code", TagProcessor::Sha256),
216
217    TagInformation::new(&["dynamic", "autorun_location"], "Autorun location", TagProcessor::String),
218    TagInformation::new(&["dynamic", "dos_device"], "DOS Device", TagProcessor::String),
219    TagInformation::new(&["dynamic", "mutex"], "Mutex", TagProcessor::String),
220    TagInformation::new(&["dynamic", "registry_key"], "Registy Keys", TagProcessor::String),
221    TagInformation::new(&["dynamic", "process", "command_line"], "Commandline", TagProcessor::String),
222    TagInformation::new(&["dynamic", "process", "file_name"], "Filename", TagProcessor::String),
223    TagInformation::new(&["dynamic", "process", "file_path"], "Filepath", TagProcessor::String),
224    TagInformation::new(&["dynamic", "process", "module_name"], "Module name", TagProcessor::String),
225    TagInformation::new(&["dynamic", "process", "module_path"], "Module path", TagProcessor::String),
226    TagInformation::new(&["dynamic", "process", "shortcut"], "Shortcut", TagProcessor::String),
227    TagInformation::new(&["dynamic", "signature", "category"], "Signature Category", TagProcessor::String),
228    TagInformation::new(&["dynamic", "signature", "family"], "Signature Family", TagProcessor::String),
229    TagInformation::new(&["dynamic", "signature", "name"], "Signature Name", TagProcessor::String),
230    TagInformation::new(&["dynamic", "ssdeep", "cls_ids"], "CLSIDs", TagProcessor::SSDeepHash),
231    TagInformation::new(&["dynamic", "ssdeep", "dynamic_classes"], "Dynamic Classes", TagProcessor::SSDeepHash),
232    TagInformation::new(&["dynamic", "ssdeep", "regkeys"], "Registry Keys", TagProcessor::SSDeepHash),
233    TagInformation::new(&["dynamic", "window", "cls_ids"], "CLSIDs", TagProcessor::String),
234    TagInformation::new(&["dynamic", "window", "dynamic_classes"], "Dynamic Classes", TagProcessor::String),
235    TagInformation::new(&["dynamic", "window", "regkeys"], "Registry Keys", TagProcessor::String),
236    TagInformation::new(&["dynamic", "operating_system", "platform"], "Platform", TagProcessor::String),
237    TagInformation::new(&["dynamic", "operating_system", "version"], "Version", TagProcessor::String),
238    TagInformation::new(&["dynamic", "operating_system", "processor"], "Processor", TagProcessor::String),
239    TagInformation::new(&["dynamic", "processtree_id"], "Process Tree ID", TagProcessor::String),
240
241    TagInformation::new(&["info", "phone_number"], "Phone Number", TagProcessor::PhoneNumber),
242    TagInformation::new(&["info", "password"], "Suspected Password", TagProcessor::String),
243
244    TagInformation::new(&["file", "ancestry"], "File Genealogy", TagProcessor::String),
245    TagInformation::new(&["file", "behavior"], "File Behaviour", TagProcessor::String),
246    TagInformation::new(&["file", "compiler"], "Compiler of File", TagProcessor::String),
247    TagInformation::new(&["file", "config"], "File Configuration", TagProcessor::String),
248    TagInformation::new(&["file", "date", "creation"], "File Creation Date", TagProcessor::String),
249    TagInformation::new(&["file", "date", "last_modified"], "File Last Modified Date", TagProcessor::String),
250    TagInformation::new(&["file", "elf", "libraries"], "ELF File Properties: Libraries", TagProcessor::String),
251    TagInformation::new(&["file", "elf", "interpreter"], "ELF File Properties: Interpreter", TagProcessor::String),
252    TagInformation::new(&["file", "elf", "sections", "name"], "ELF File Properties: Section Name", TagProcessor::String),
253    TagInformation::new(&["file", "elf", "segments", "type"], "ELF File Properties: Segment Types", TagProcessor::String),
254    TagInformation::new(&["file", "elf", "notes", "name"], "ELF File Properties: Note name", TagProcessor::String),
255    TagInformation::new(&["file", "elf", "notes", "type"], "ELF File Properties: Note type", TagProcessor::String),
256    TagInformation::new(&["file", "elf", "notes", "type_core"], "ELF File Properties: Note type core", TagProcessor::String),
257    TagInformation::new(&["file", "lib"], "File Libraries", TagProcessor::String),
258    TagInformation::new(&["file", "lsh"], "File LSH hashes", TagProcessor::String),
259    TagInformation::new(&["file", "name", "anomaly"], "File Anomaly Name", TagProcessor::String),
260    TagInformation::new(&["file", "name", "extracted"], "File Extracted Name", TagProcessor::String),
261    TagInformation::new(&["file", "path"], "File Path", TagProcessor::String),
262    TagInformation::new(&["file", "rule"], "Rule/Signature File", TagProcessor::RuleMapping),
263    TagInformation::new(&["file", "string", "api"], "File API Strings", TagProcessor::String),
264    TagInformation::new(&["file", "string", "blacklisted"], "File Known Bad Strings", TagProcessor::String),
265    TagInformation::new(&["file", "string", "decoded"], "File Decoded Strings", TagProcessor::String),
266    TagInformation::new(&["file", "string", "extracted"], "File Extracted Strings", TagProcessor::String),
267    TagInformation::new(&["file", "apk", "activity"], "APK File Properties: Activity", TagProcessor::String),
268    TagInformation::new(&["file", "apk", "app", "label"], "APK File Properties: APK Application Information: Label", TagProcessor::String),
269    TagInformation::new(&["file", "apk", "app", "version"], "APK File Properties: APK Application Information: Version", TagProcessor::String),
270    TagInformation::new(&["file", "apk", "feature"], "APK File Properties: Features", TagProcessor::String),
271    TagInformation::new(&["file", "apk", "locale"], "APK File Properties: Locale", TagProcessor::String),
272    TagInformation::new(&["file", "apk", "permission"], "APK File Properties: Permissions", TagProcessor::String),
273    TagInformation::new(&["file", "apk", "pkg_name"], "APK File Properties: Package Name", TagProcessor::String),
274    TagInformation::new(&["file", "apk", "provides_component"], "APK File Properties: Components Provided", TagProcessor::String),
275    TagInformation::new(&["file", "apk", "sdk", "min"], "APK File Properties: APK SDK minimum OS required", TagProcessor::String),
276    TagInformation::new(&["file", "apk", "sdk", "target"], "APK File Properties: APK SDK target OS", TagProcessor::String),
277    TagInformation::new(&["file", "apk", "used_library"], "APK File Properties: Libraries Used", TagProcessor::String),
278    TagInformation::new(&["file", "jar", "main_class"], "JAR File Properties: Main Class", TagProcessor::String),
279    TagInformation::new(&["file", "jar", "main_package"], "JAR File Properties: Main Package", TagProcessor::String),
280    TagInformation::new(&["file", "jar", "imported_package"], "JAR File Properties: Imported package", TagProcessor::String),
281    TagInformation::new(&["file", "img", "exif_tool", "creator_tool"], "Image File Properties: Exiftool Information: Image Creation Tool", TagProcessor::String),
282    TagInformation::new(&["file", "img", "exif_tool", "derived_document_id"], "Image File Properties: Exiftool Information: Derived Document ID", TagProcessor::String),
283    TagInformation::new(&["file", "img", "exif_tool", "document_id"], "Image File Properties: Exiftool Information: Document ID", TagProcessor::String),
284    TagInformation::new(&["file", "img", "exif_tool", "instance_id"], "Image File Properties: Exiftool Information: Instance ID", TagProcessor::String),
285    TagInformation::new(&["file", "img", "exif_tool", "toolkit"], "Image File Properties: Exiftool Information: Toolkit", TagProcessor::String),
286    TagInformation::new(&["file", "img", "mega_pixels"], "Image File Properties: Megapixels", TagProcessor::String),
287    TagInformation::new(&["file", "img", "mode"], "Image File Properties: Image Mode", TagProcessor::String),
288    TagInformation::new(&["file", "img", "size"], "Image File Properties: Image Size", TagProcessor::String),
289    TagInformation::new(&["file", "img", "sorted_metadata_hash"], "Image File Properties: Sorted Metadata Hash", TagProcessor::String),
290    TagInformation::new(&["file", "ole", "macro", "sha256"], "OLE File Properties: OLE Macro: SHA256 of Macro", TagProcessor::Sha256),
291    TagInformation::new(&["file", "ole", "macro", "suspicious_string"], "OLE File Properties: OLE Macro: Suspicious Strings", TagProcessor::String),
292    TagInformation::new(&["file", "ole", "summary", "author"], "OLE File Properties: OLE Summary: Author", TagProcessor::String),
293    TagInformation::new(&["file", "ole", "summary", "codepage"], "OLE File Properties: OLE Summary: Code Page", TagProcessor::String),
294    TagInformation::new(&["file", "ole", "summary", "comment"], "OLE File Properties: OLE Summary: Comment", TagProcessor::String),
295    TagInformation::new(&["file", "ole", "summary", "company"], "OLE File Properties: OLE Summary: Company", TagProcessor::String),
296    TagInformation::new(&["file", "ole", "summary", "create_time"], "OLE File Properties: OLE Summary: Creation Time", TagProcessor::String),
297    TagInformation::new(&["file", "ole", "summary", "last_printed"], "OLE File Properties: OLE Summary: Date Last Printed", TagProcessor::String),
298    TagInformation::new(&["file", "ole", "summary", "last_saved_by"], "OLE File Properties: OLE Summary: User Last Saved By", TagProcessor::String),
299    TagInformation::new(&["file", "ole", "summary", "last_saved_time"], "OLE File Properties: OLE Summary: Date Last Saved", TagProcessor::String),
300    TagInformation::new(&["file", "ole", "summary", "manager"], "OLE File Properties: OLE Summary: Manager", TagProcessor::String),
301    TagInformation::new(&["file", "ole", "summary", "subject"], "OLE File Properties: OLE Summary: Subject", TagProcessor::String),
302    TagInformation::new(&["file", "ole", "summary", "title"], "OLE File Properties: OLE Summary: Title", TagProcessor::String),
303    TagInformation::new(&["file", "ole", "clsid"], "OLE File Properties: CLSID", TagProcessor::String),
304    TagInformation::new(&["file", "ole", "dde_link"], "OLE File Properties: DDE Link", TagProcessor::String),
305    TagInformation::new(&["file", "ole", "fib_timestamp"], "OLE File Properties: FIB Timestamp", TagProcessor::String),
306    TagInformation::new(&["file", "pe", "api_vector"], "PE File Properties: API Vector", TagProcessor::String),
307    TagInformation::new(&["file", "pe", "authenticode", "spc_sp_opus_info", "program_name"], "PE File Properties: PE Authenticode Information: Program name", TagProcessor::String),
308    TagInformation::new(&["file", "pe", "debug", "guid"], "PE File Properties: PE Debug Information: GUID", TagProcessor::String),
309    TagInformation::new(&["file", "pe", "exports", "function_name"], "PE File Properties: PE Exports Information: Function Name", TagProcessor::String),
310    TagInformation::new(&["file", "pe", "exports", "module_name"], "PE File Properties: PE Exports Information: Module Name", TagProcessor::String),
311    TagInformation::new(&["file", "pe", "imports", "fuzzy"], "PE File Properties: PE Imports Information: Fuzzy", TagProcessor::SSDeepHash),
312    TagInformation::new(&["file", "pe", "imports", "md5"], "PE File Properties: PE Imports Information: MD5", TagProcessor::MD5),
313    TagInformation::new(&["file", "pe", "imports", "imphash"], "PE File Properties: PE Imports Information: Imphash", TagProcessor::MD5),
314    TagInformation::new(&["file", "pe", "imports", "sorted_fuzzy"], "PE File Properties: PE Imports Information: Sorted Fuzzy", TagProcessor::SSDeepHash),
315    TagInformation::new(&["file", "pe", "imports", "sorted_sha1"], "PE File Properties: PE Imports Information: Sorted SHA1", TagProcessor::Sha1),
316    TagInformation::new(&["file", "pe", "imports", "gimphash"], "PE File Properties: PE Imports Information: Go Import hash", TagProcessor::Sha256),
317    TagInformation::new(&["file", "pe", "imports", "suspicious"], "PE File Properties: PE Imports Information: Suspicious", TagProcessor::String),
318    TagInformation::new(&["file", "pe", "linker", "timestamp"], "PE File Properties: PE Linker Information: timestamp", TagProcessor::String),
319    TagInformation::new(&["file", "pe", "oep", "bytes"], "PE File Properties: PE OEP Information: Bytes", TagProcessor::String),
320    TagInformation::new(&["file", "pe", "oep", "hexdump"], "PE File Properties: PE OEP Information: Hex Dump", TagProcessor::String),
321    TagInformation::new(&["file", "pe", "pdb_filename"], "PE File Properties: PDB Filename", TagProcessor::String),
322    TagInformation::new(&["file", "pe", "resources", "language"], "PE File Properties: PE Resources Information: Language", TagProcessor::String),
323    TagInformation::new(&["file", "pe", "resources", "name"], "PE File Properties: PE Resources Information: Name", TagProcessor::String),
324    TagInformation::new(&["file", "pe", "rich_header", "hash"], "PE File Properties: PE Rich Header Information: Hash", TagProcessor::String),
325    TagInformation::new(&["file", "pe", "sections", "hash"], "PE File Properties: PE Sections Information: Hash", TagProcessor::String),
326    TagInformation::new(&["file", "pe", "sections", "name"], "PE File Properties: PE Sections Information: Name", TagProcessor::String),
327    TagInformation::new(&["file", "pe", "versions", "description"], "PE File Properties: PE Versions Information: Description", TagProcessor::String),
328    TagInformation::new(&["file", "pe", "versions", "filename"], "PE File Properties: PE Versions Information: Filename", TagProcessor::String),
329    TagInformation::new(&["file", "pdf", "date", "modified"], "PDF File Properties: PDF Date Information: Date Modified", TagProcessor::String),
330    TagInformation::new(&["file", "pdf", "date", "pdfx"], "PDF File Properties: PDF Date Information: PDFx", TagProcessor::String),
331    TagInformation::new(&["file", "pdf", "date", "source_modified"], "PDF File Properties: PDF Date Information: Date Source Modified", TagProcessor::String),
332    TagInformation::new(&["file", "pdf", "javascript", "sha1"], "PDF File Properties: PDF Javascript Information: SHA1 of javascript", TagProcessor::Sha1),
333    TagInformation::new(&["file", "pdf", "stats", "sha1"], "PDF File Properties: PDF Statistics Information: SHA1 of statistics", TagProcessor::Sha1),
334    TagInformation::new(&["file", "plist", "installer_url"], "PList File Properties: Installer URL", TagProcessor::String),
335    TagInformation::new(&["file", "plist", "min_os_version"], "PList File Properties: Minimum OS Version", TagProcessor::String),
336    TagInformation::new(&["file", "plist", "requests_open_access"], "PList File Properties: Requests Open Access", TagProcessor::String),
337    TagInformation::new(&["file", "plist", "build", "machine_os"], "PList File Properties: Build Information: Machine OS", TagProcessor::String),
338    TagInformation::new(&["file", "plist", "cf_bundle", "development_region"], "PList File Properties: CF Bundle Information: Development Region", TagProcessor::String),
339    TagInformation::new(&["file", "plist", "cf_bundle", "display_name"], "PList File Properties: CF Bundle Information: Display Name", TagProcessor::String),
340    TagInformation::new(&["file", "plist", "cf_bundle", "executable"], "PList File Properties: CF Bundle Information: Executable Name", TagProcessor::String),
341    TagInformation::new(&["file", "plist", "cf_bundle", "identifier"], "PList File Properties: CF Bundle Information: Identifier Name", TagProcessor::String),
342    TagInformation::new(&["file", "plist", "cf_bundle", "name"], "PList File Properties: CF Bundle Information: Bundle Name", TagProcessor::String),
343    TagInformation::new(&["file", "plist", "cf_bundle", "pkg_type"], "PList File Properties: CF Bundle Information: Package Type", TagProcessor::String),
344    TagInformation::new(&["file", "plist", "cf_bundle", "signature"], "PList File Properties: CF Bundle Information: Signature", TagProcessor::String),
345    TagInformation::new(&["file", "plist", "cf_bundle", "url_scheme"], "PList File Properties: CF Bundle Information: URL Scheme", TagProcessor::String),
346    TagInformation::new(&["file", "plist", "cf_bundle", "version", "long"], "PList File Properties: CF Bundle Information: Bundle Version Information: Long Version", TagProcessor::String),
347    TagInformation::new(&["file", "plist", "cf_bundle", "version", "short"], "PList File Properties: CF Bundle Information: Bundle Version Information: Short Version", TagProcessor::String),
348    TagInformation::new(&["file", "plist", "dt", "compiler"], "PList File Properties: DT Information: Compiler", TagProcessor::String),
349    TagInformation::new(&["file", "plist", "dt", "platform", "build"], "PList File Properties: DT Information: Platform Information: Build", TagProcessor::String),
350    TagInformation::new(&["file", "plist", "dt", "platform", "name"], "PList File Properties: DT Information: Platform Information: Name", TagProcessor::String),
351    TagInformation::new(&["file", "plist", "dt", "platform", "version"], "PList File Properties: DT Information: Platform Information: Version", TagProcessor::String),
352    TagInformation::new(&["file", "plist", "ls", "background_only"], "PList File Properties: LS Information: Background Only", TagProcessor::String),
353    TagInformation::new(&["file", "plist", "ls", "min_system_version"], "PList File Properties: LS Information: Minimum System Versuion", TagProcessor::String),
354    TagInformation::new(&["file", "plist", "ns", "apple_script_enabled"], "PList File Properties: NS Information: Apple Script Enabled", TagProcessor::String),
355    TagInformation::new(&["file", "plist", "ns", "principal_class"], "PList File Properties: NS Information: Principal Class", TagProcessor::String),
356    TagInformation::new(&["file", "plist", "ui", "background_modes"], "PList File Properties: UI Information: Background Modes", TagProcessor::String),
357    TagInformation::new(&["file", "plist", "ui", "requires_persistent_wifi"], "PList File Properties: UI Information: Requires Persistent WIFI", TagProcessor::String),
358    TagInformation::new(&["file", "plist", "wk", "app_bundle_identifier"], "PList File Properties: WK Information: App Bundle ID", TagProcessor::String),
359    TagInformation::new(&["file", "powershell", "cmdlet"], "PowerShell File Properties: Cmdlet", TagProcessor::String),
360    TagInformation::new(&["file", "shortcut", "command_line"], "Shortcut File Properties: Command Line", TagProcessor::String),
361    TagInformation::new(&["file", "shortcut", "icon_location"], "Shortcut File Properties: Icon Location", TagProcessor::String),
362    TagInformation::new(&["file", "shortcut", "machine_id"], "Shortcut File Properties: Machine ID", TagProcessor::String),
363    TagInformation::new(&["file", "shortcut", "tracker_mac"], "Shortcut File Properties: Possible MAC address from the Tracker block", TagProcessor::String),
364    TagInformation::new(&["file", "swf", "header", "frame", "count"], "SWF File Properties: Header Information: Header Frame Information: Number of Frames", TagProcessor::I32),
365    TagInformation::new(&["file", "swf", "header", "frame", "rate"], "SWF File Properties: Header Information: Header Frame Information: Speed of Animation", TagProcessor::String),
366    TagInformation::new(&["file", "swf", "header", "frame", "size"], "SWF File Properties: Header Information: Header Frame Information: Size of Frame", TagProcessor::String),
367    TagInformation::new(&["file", "swf", "header", "version"], "SWF File Properties: Header Information: Version", TagProcessor::String),
368    TagInformation::new(&["file", "swf", "tags_ssdeep"], "SWF File Properties: Tags SSDeep", TagProcessor::SSDeepHash),
369
370    TagInformation::new(&["network", "attack"], "Network: Attack", TagProcessor::String),
371    TagInformation::new(&["network", "dynamic", "domain"], "Network: Dynamic IOCs: Domain", TagProcessor::Domain),
372    TagInformation::new(&["network", "dynamic", "ip"], "Network: Dynamic IOCs: IP", TagProcessor::IpAddress),
373    TagInformation::new(&["network", "dynamic", "unc_path"], "Network: Dynamic IOCs: UNC Path", TagProcessor::UNCPath),
374    TagInformation::new(&["network", "dynamic", "uri"], "Network: Dynamic IOCs: URI", TagProcessor::Uri),
375    TagInformation::new(&["network", "dynamic", "uri_path"], "Network: Dynamic IOCs: URI Path", TagProcessor::UriPath),
376    TagInformation::new(&["network", "email", "address"], "Network: Email: Email Address", TagProcessor::EmailAddress),
377    TagInformation::new(&["network", "email", "date"], "Network: Email: Date", TagProcessor::String),
378    TagInformation::new(&["network", "email", "subject"], "Network: Email: Subject", TagProcessor::String),
379    TagInformation::new(&["network", "email", "msg_id"], "Network: Email: Message ID", TagProcessor::String),
380    TagInformation::new(&["network", "mac_address"], "Network: MAC Address", TagProcessor::Mac),
381    TagInformation::new(&["network", "port"], "Network: Port", TagProcessor::U16),
382    TagInformation::new(&["network", "protocol"], "Network: Protocol", TagProcessor::String),
383    TagInformation::new(&["network", "signature", "signature_id"], "Network: Signatures: ID", TagProcessor::String),
384    TagInformation::new(&["network", "signature", "message"], "Network: Signatures: Message", TagProcessor::String),
385    TagInformation::new(&["network", "static", "domain"], "Network: Static IOCs: Domain", TagProcessor::Domain),
386    TagInformation::new(&["network", "static", "ip"], "Network: Static IOCs: IP", TagProcessor::IpAddress),
387    TagInformation::new(&["network", "static", "unc_path"], "Network: Static IOCs: UNC Path", TagProcessor::UNCPath),
388    TagInformation::new(&["network", "static", "uri"], "Network: Static IOCs: URI", TagProcessor::Uri),
389    TagInformation::new(&["network", "static", "uri_path"], "Network: Static IOCs: URI Path", TagProcessor::UriPath),
390    TagInformation::new(&["network", "tls", "ja3_hash"], "Network: TLS: JA3 Hash", TagProcessor::Lowercase),
391    TagInformation::new(&["network", "tls", "ja3_string"], "Network: TLS: JA3 String", TagProcessor::String),
392    TagInformation::new(&["network", "tls", "ja3s_hash"], "Network: TLS: JA3S Hash", TagProcessor::Lowercase),
393    TagInformation::new(&["network", "tls", "ja3s_string"], "Network: TLS: JA3S String", TagProcessor::String),
394    TagInformation::new(&["network", "tls", "ja4_hash"], "Network: TLS: JA4 Hash", TagProcessor::JA4),
395    TagInformation::new(&["network", "tls", "ja4s_hash"], "Network: TLS: JA4S Hash", TagProcessor::String),
396    TagInformation::new(&["network", "tls", "sni"], "Network: TLS: SNI", TagProcessor::String),
397    TagInformation::new(&["network", "user_agent"], "Network: User Agent", TagProcessor::String),
398
399    TagInformation::new(&["source"], "Source", TagProcessor::String),
400
401    TagInformation::new(&["technique", "comms_routine"], "Technique: Communication Routine", TagProcessor::String),
402    TagInformation::new(&["technique", "config"], "Technique: Configuration", TagProcessor::String),
403    TagInformation::new(&["technique", "crypto"], "Technique: Cryptography", TagProcessor::String),
404    TagInformation::new(&["technique", "exploit"], "Technique: Technique", TagProcessor::String),
405    TagInformation::new(&["technique", "keylogger"], "Technique: Keylogger", TagProcessor::String),
406    TagInformation::new(&["technique", "macro"], "Technique: Macro", TagProcessor::String),
407    TagInformation::new(&["technique", "masking_algo"], "Technique: Masking Algorithm", TagProcessor::String),
408    TagInformation::new(&["technique", "obfuscation"], "Technique: Obfuscation", TagProcessor::String),
409    TagInformation::new(&["technique", "packer"], "Technique: Packer", TagProcessor::String),
410    TagInformation::new(&["technique", "persistence"], "Technique: Persistence", TagProcessor::String),
411    TagInformation::new(&["technique", "shellcode"], "Technique: Shell Code", TagProcessor::String),
412    TagInformation::new(&["technique", "string"], "Technique: String", TagProcessor::String),
413
414    TagInformation::new(&["vector"], "Vector", TagProcessor::String),
415];
416
417
418pub fn get_tag_information(label: &str) -> Option<&'static TagInformation> {
419    static TAGS: LazyLock<HashMap<String, &'static TagInformation>> = LazyLock::new(|| {
420        let mut table: HashMap<String, &'static TagInformation> = Default::default();
421        for tag in &ALL_VALID_TAGS {
422            if let Some(collision) = table.insert(tag.full_path(), tag) {
423                panic!("Collision on tag name: {}", collision.full_path());
424            }
425        }
426        table
427    });
428    TAGS.get(label).copied()
429}
430
431// MARK: Nested Tag Container
432/// Container for a dictionary set of tags
433#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)]
434#[serde(transparent)]
435pub struct Tagging(JsonMap);
436
437impl Described<ElasticMeta> for Tagging {
438    fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
439
440        let mut catagories: HashMap<&'static str, Vec<_>> = HashMap::new();
441        for tag in &ALL_VALID_TAGS {
442            catagories.entry(tag.name[0]).or_default().push((&tag.name[1..], tag))
443        }
444
445        fn make_entry(label: &'static str, elements: &[(&[&'static str], &TagInformation)]) -> struct_metadata::Entry<ElasticMeta> {
446            if elements.len() == 1 && elements[0].0.is_empty() {
447                struct_metadata::Entry {
448                    label,
449                    docs: None,
450                    metadata: ElasticMeta { index: Some(true), store: Some(false), ..Default::default() },
451                    type_info: elements[0].1.metadata_type(),
452                    has_default: false,
453                    aliases: &[],
454                }
455            } else {
456
457                let mut catagories: HashMap<&'static str, Vec<_>> = HashMap::new();
458                for (path, tag) in elements {
459                    catagories.entry(path[0]).or_default().push((&path[1..], *tag))
460                }
461
462                let mut children = vec![];
463                let metadata = ElasticMeta::default();
464                for (label, elements) in catagories {
465                    let mut entry = make_entry(label, &elements);
466                    entry.metadata.forward_propagate_entry_defaults(&metadata, &entry.type_info.metadata);
467                    children.push(entry)
468                }
469
470                let type_info = struct_metadata::Descriptor {
471                    docs: None,
472                    metadata,
473                    kind: struct_metadata::Kind::Struct {
474                        name: label,
475                        children,
476                    },
477                };
478
479                struct_metadata::Entry {
480                    label,
481                    docs: None,
482                    metadata: Default::default(),
483                    type_info,
484                    has_default: false,
485                    aliases: &[]
486                }
487            }
488        }
489
490        let mut children = vec![];
491        let metadata = ElasticMeta::default();
492        for (label, elements) in catagories {
493            let mut entry = make_entry(label, &elements);
494            entry.metadata.forward_propagate_entry_defaults(&metadata, &entry.type_info.metadata);
495            children.push(entry);
496        }
497
498        struct_metadata::Descriptor {
499            docs: None,
500            metadata,
501            kind: struct_metadata::Kind::Struct {
502                name: "Tagging",
503                children,
504            },
505        }
506    }
507}
508
509#[derive(Debug, thiserror::Error)]
510#[error("The tagging data had an unsupported layout.")]
511pub struct LayoutError;
512
513impl Tagging {
514
515//     pub fn flatten(&self) -> Result<FlatTags, serde_json::Error> {
516//         let data = serde_json::to_value(self)?;
517//         let serde_json::Value::Object(data) = data else {
518//             return Err(serde_json::Error::custom("struct must become object"))
519//         };
520//         Ok(flatten_tags(data, None))
521//     }
522
523    pub fn to_list(&self, _safelisted: Option<bool>) -> Result<Vec<TagEntry>, LayoutError> {
524
525        fn flatten_inner(output: &mut Vec<TagEntry>, path: &[&str], data: &JsonMap) -> Result<(), ()> {
526            for (key, value) in data {
527                let mut path = Vec::from(path);
528                path.push(key);
529
530                match value {
531                    serde_json::Value::Object(map) => {
532                        flatten_inner(output, &path, map)?;
533                    },
534                    serde_json::Value::Array(values) => {
535                        let path = path.join(".");
536                        for value in values {
537                            output.push(TagEntry {
538                                score: 0,
539                                tag_type: path.clone(),
540                                value: TagValue(value.clone())
541                            })
542                            // {'safelisted': safelisted, 'type': k, 'value': t, 'short_type': k.rsplit(".", 1)[-1]})
543                        }
544                    },
545                    _ => return Err(())
546                }
547            }
548            Ok(())
549        }
550
551        let mut output = vec![];
552        if flatten_inner(&mut output, &[], &self.0).is_err() {
553            return Err(LayoutError)
554        }
555
556        Ok(output)
557    }
558}
559
560
561// MARK: Flat Tag Container
562/// List of validated tags
563#[derive(Debug, Default)]
564pub struct FlatTags {
565    tags: HashMap<&'static TagInformation, Vec<TagValue>>,
566}
567
568impl IntoIterator for FlatTags {
569    type Item = (&'static TagInformation, Vec<TagValue>);
570    type IntoIter = <HashMap<&'static TagInformation, Vec<TagValue>> as std::iter::IntoIterator>::IntoIter;
571    fn into_iter(self) -> Self::IntoIter { self.tags.into_iter() }
572}
573
574impl std::ops::Deref for FlatTags {
575    type Target = HashMap<&'static TagInformation, Vec<TagValue>>;
576
577    fn deref(&self) -> &Self::Target { &self.tags }
578}
579
580impl std::ops::DerefMut for FlatTags {
581    fn deref_mut(&mut self) -> &mut Self::Target { &mut self.tags }
582}
583
584#[derive(Debug, thiserror::Error)]
585#[error("A tag name collision on {0} prevented a tagging document from being completed")]
586pub struct TagNameCollision(String);
587
588
589impl FlatTags {
590    pub fn to_tagging(self) -> Result<Tagging, TagNameCollision> {
591        let mut output = JsonMap::default();
592
593        // let mut groups: HashMap<&'static str, Vec<(&[&'static str], _, _)>> = HashMap::new();
594        // for (tag, value) in self.tags {
595        //     let path = tag.name;
596        //     let items = groups.entry(path[0]).or_default();
597        //     items.push((&path[1..], tag, value))
598        // }
599
600        fn insert(info: &'static TagInformation, output: &mut JsonMap, name: &[&str], values: Vec<TagValue>) -> Result<(), TagNameCollision> {
601            if name.len() == 1 {
602                if info.inner_mapping() {
603                    let outer = output.entry(name[0]).or_insert_with(|| Value::Object(Default::default()));
604                    match outer.as_object_mut() {
605                        Some(outer) => {
606                            for value in values {
607                                if let Value::Object(value) = value.0 {
608                                    for (subkey, value) in value {
609                                        let values = match value {
610                                            Value::Array(values) => values,
611                                            other => vec![other]
612                                        };
613
614                                        let inner = outer.entry(subkey)
615                                            .or_insert_with(|| Value::Array(vec![]));
616                                        match inner.as_array_mut() {
617                                            Some(obj) => { obj.extend(values); },
618                                            None => return Err(TagNameCollision(info.full_path())),
619                                        }
620                                    }
621                                } else {
622                                    return Err(TagNameCollision(info.full_path()))
623                                }
624                            }
625                            Ok(())
626                        },
627                        None => Err(TagNameCollision(info.full_path())),
628                    }
629                } else {
630                    let inner = output.entry(name[0])
631                        .or_insert_with(|| Value::Array(vec![]));
632                    match inner.as_array_mut() {
633                        Some(obj) => { obj.extend(values.into_iter().map(|tag| tag.0)); Ok(()) },
634                        None => Err(TagNameCollision(info.full_path())),
635                    }
636                }
637            } else {
638                let inner = output.entry(name[0])
639                    .or_insert_with(|| Value::Object(JsonMap::default()));
640                match inner.as_object_mut() {
641                    Some(obj) => insert(info, obj, &name[1..], values),
642                    None => Err(TagNameCollision(info.full_path())),
643                }
644            }
645        }
646        for (info, values) in self.tags {
647            insert(info, &mut output, info.name, values)?;
648        }
649        Ok(Tagging(output))
650    }
651}
652
653pub fn load_tags_from_object(data: JsonMap) -> (FlatTags, Vec<(String, String)>) {
654
655    let mut accepted = FlatTags::default();
656    let mut rejected = vec![];
657
658    fn process(accepted: &mut FlatTags, rejected: &mut Vec<(String, String)>, path: &[&str], data: JsonMap) {
659        for (key, value) in data {
660            // build the label for the tag if it exists at this level of recursion
661            let mut path = Vec::from(path);
662            path.push(&key);
663            let label = path.join(".");
664
665            // Try to use this tag label
666            if let Some(tag) = get_tag_information(&label) {
667                if let serde_json::Value::Array(values) = value {
668                    for value in values {
669                        match tag.processor.apply(value) {
670                            Ok(value) => accepted.entry(tag).or_default().push(TagValue(value)),
671                            Err(value) => rejected.push((label.to_string(), TagValue(value).to_string()))
672                        }
673                    }
674                } else {
675                    match tag.processor.apply(value) {
676                        Ok(value) => accepted.entry(tag).or_default().push(TagValue(value)),
677                        Err(value) => rejected.push((label.to_string(), TagValue(value).to_string()))
678                    }
679                }
680                continue
681            }
682
683            // if we couldn't use that tag label, try to recurse
684            if let serde_json::Value::Object(data) = value {
685                process(accepted, rejected, &path, data);
686            } else {
687                rejected.push((label.to_string(), TagValue(value).to_string()))
688            }
689        }
690    }
691
692    process(&mut accepted, &mut rejected, &[], data);
693
694    (accepted, rejected)
695}
696
697#[cfg(test)]
698pub fn load_tags_from_list(data: HashMap<String, Vec<serde_json::Value>>) -> (FlatTags, Vec<(String, String)>) {
699
700    let mut accepted = FlatTags::default();
701    let mut rejected = vec![];
702
703    for (name, values) in data {
704        match get_tag_information(&name) {
705            Some(info) => {
706                for value in values {
707                    match info.processor.apply(value) {
708                        Ok(value) => accepted.entry(info).or_default().push(TagValue(value)),
709                        Err(value) => rejected.push((name.to_string(), TagValue(value).to_string()))
710                    }
711                }
712            },
713            None => {
714                for value in values {
715                    rejected.push((name.to_string(), TagValue(value).to_string()))
716                }
717            }
718        }
719    }
720
721    (accepted, rejected)
722}
723
724// MARK: Tests
725
726#[test]
727fn tagging_forms_round_trip() {
728    use serde_json::json;
729
730    let input = json!({
731        "file": {
732            "behavior": ["hop", "skip", "jump"],
733            "rule": {
734                "service_abc": ["RULE.a10", "RULE.a11"],
735                "service_xyz": ["RULE.a10"]
736            }
737        },
738        "attribution": {
739            "actor": ["Randy"]
740        }
741    });
742    let serde_json::Value::Object(input) = input else { panic!() };
743
744    // convert input to flat tags
745    let (accepted, rejected) = load_tags_from_object(input);
746    assert!(rejected.is_empty());
747    assert_eq!(accepted.len(), 3);
748    assert_eq!(*accepted.get(get_tag_information("attribution.actor").unwrap()).unwrap(), vec![TagValue(json!("RANDY"))]);
749    assert_eq!(*accepted.get(get_tag_information("file.behavior").unwrap()).unwrap(), vec![TagValue(json!("hop")), TagValue(json!("skip")), TagValue(json!("jump"))]);
750    assert_eq!(*accepted.get(get_tag_information("file.rule").unwrap()).unwrap(), vec![TagValue(json!({"service_abc": ["RULE.a10", "RULE.a11"], "service_xyz": ["RULE.a10"]}))]);
751
752    // convert flat tags to nested data
753    let tagging = accepted.to_tagging().unwrap();
754
755    // convert nested data json
756    assert_eq!(serde_json::to_value(&tagging).unwrap(), json!({
757        "file": {
758            "behavior": ["hop", "skip", "jump"],
759            "rule": {
760                "service_abc": ["RULE.a10", "RULE.a11"],
761                "service_xyz": ["RULE.a10"]
762            }
763        },
764        "attribution": {
765            "actor": ["RANDY"]
766        }
767    }));
768
769    // convert nested data to a list of tags
770    let list = tagging.to_list(None).unwrap();
771    assert_eq!(list, vec![
772        TagEntry{ score: 0, tag_type: "attribution.actor".to_owned(), value: TagValue::from("RANDY") },
773        TagEntry{ score: 0, tag_type: "file.behavior".to_owned(), value: TagValue::from("hop") },
774        TagEntry{ score: 0, tag_type: "file.behavior".to_owned(), value: TagValue::from("skip") },
775        TagEntry{ score: 0, tag_type: "file.behavior".to_owned(), value: TagValue::from("jump") },
776        TagEntry{ score: 0, tag_type: "file.rule.service_abc".to_owned(), value: TagValue::from("RULE.a10") },
777        TagEntry{ score: 0, tag_type: "file.rule.service_abc".to_owned(), value: TagValue::from("RULE.a11") },
778        TagEntry{ score: 0, tag_type: "file.rule.service_xyz".to_owned(), value: TagValue::from("RULE.a10") },
779    ]);
780}
781
782
783/// test for invalid tag names
784#[test]
785fn tag_names() {
786    use serde_json::{Value, json};
787
788    // From a list
789    let mut input: HashMap<String, Vec<Value>> = HashMap::new();
790    input.insert("attribution.actor".to_string(), vec![json!("abc"), json!("Big hats!"), json!([]), json!(100), json!(Option::<()>::None)]);
791    input.insert("av.heuristic".to_string(), vec![json!("abc"), json!("Big hats!"), json!([]), json!(100), json!(Option::<()>::None)]);
792    input.insert("av.heuristic.".to_string(), vec![json!("100000")]);
793    input.insert(".av.heuristic".to_string(), vec![json!("100000")]);
794    input.insert("av".to_string(), vec![json!("100000")]);
795    // input.insert("network.tls.ja3_hash".to_string(), vec![json!("abc"), json!("Big hats!"), json!([]), json!(100), json!(None)]);
796
797    let (accepted, mut rejected) = load_tags_from_list(input);
798
799    assert_eq!(accepted.len(), 2);
800    assert_eq!(rejected.len(), 7);
801
802    assert_eq!(*accepted.get(get_tag_information("attribution.actor").unwrap()).unwrap(), vec![TagValue(json!("ABC")), TagValue(json!("BIG HATS!")), TagValue(json!("100"))]);
803    assert_eq!(*accepted.get(get_tag_information("av.heuristic").unwrap()).unwrap(), vec![TagValue(json!("abc")), TagValue(json!("Big hats!")), TagValue(json!("100"))]);
804
805    rejected.sort_unstable();
806    assert_eq!(rejected, vec![
807        (".av.heuristic".to_string(), "100000".to_string()),
808        ("attribution.actor".to_string(), "[]".to_string()),
809        ("attribution.actor".to_string(), "null".to_string()),
810        ("av".to_string(), "100000".to_string()),
811        ("av.heuristic".to_string(), "[]".to_string()),
812        ("av.heuristic".to_string(), "null".to_string()),
813        ("av.heuristic.".to_string(), "100000".to_string()),
814    ]);
815
816    // from a dictionary
817    let input = json!({
818        "attribution": {
819            "actor": ["abc", "Big hats!", [], 100, null]
820        },
821        "av": {
822            "heuristic": ["abc", "Big hats!", [], 100, null]
823        },
824        "cert": 100000,
825        "dynamic": ["abc", "Big hats!"]
826    });
827    let serde_json::Value::Object(input) = input else { panic!() };
828
829    let (accepted, mut rejected) = load_tags_from_object(input);
830
831    assert_eq!(accepted.len(), 2);
832    assert_eq!(rejected.len(), 6);
833
834    assert_eq!(*accepted.get(get_tag_information("attribution.actor").unwrap()).unwrap(), vec![TagValue(json!("ABC")), TagValue(json!("BIG HATS!")), TagValue(json!("100"))]);
835    assert_eq!(*accepted.get(get_tag_information("av.heuristic").unwrap()).unwrap(), vec![TagValue(json!("abc")), TagValue(json!("Big hats!")), TagValue(json!("100"))]);
836
837    rejected.sort_unstable();
838    assert_eq!(rejected, vec![
839        ("attribution.actor".to_string(), "[]".to_string()),
840        ("attribution.actor".to_string(), "null".to_string()),
841        ("av.heuristic".to_string(), "[]".to_string()),
842        ("av.heuristic".to_string(), "null".to_string()),
843        ("cert".to_string(), "100000".to_string()),
844        ("dynamic".to_string(), r#"["abc","Big hats!"]"#.to_string()),
845    ]);
846
847}
848
849
850/// Test parsing basic strings and uppercased strings
851#[test]
852fn string_tag_parsing() {
853    use serde_json::{Value, json};
854
855    let proc = TagProcessor::String;
856    assert_eq!(proc.apply(json!("abc")), Ok(json!("abc")));
857    assert_eq!(proc.apply(json!("Big Hats!")), Ok(json!("Big Hats!")));
858    assert_eq!(proc.apply(json!([])), Err(json!([])));
859    assert_eq!(proc.apply(json!(100)), Ok(json!("100")));
860    assert_eq!(proc.apply(Value::Null), Err(Value::Null));
861
862    let proc = TagProcessor::Lowercase;
863    assert_eq!(proc.apply(json!("abc")), Ok(json!("abc")));
864    assert_eq!(proc.apply(json!("Big Hats!")), Ok(json!("big hats!")));
865    assert_eq!(proc.apply(json!([])), Err(json!([])));
866    assert_eq!(proc.apply(json!(100)), Ok(json!("100")));
867    assert_eq!(proc.apply(Value::Null), Err(Value::Null));
868
869    let proc = TagProcessor::Uppercase;
870    assert_eq!(proc.apply(json!("abc")), Ok(json!("ABC")));
871    assert_eq!(proc.apply(json!("Big Hats!")), Ok(json!("BIG HATS!")));
872    assert_eq!(proc.apply(json!([])), Err(json!([])));
873    assert_eq!(proc.apply(json!(100)), Ok(json!("100")));
874    assert_eq!(proc.apply(Value::Null), Err(Value::Null));
875
876}
877
878#[test]
879fn hash_tag_parsing() {
880    use serde_json::json;
881
882    let proc = TagProcessor::MD5;
883    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Ok(json!("00000000000000000000000000000000")));
884    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
885    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
886    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
887    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
888    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
889
890    let proc = TagProcessor::Sha1;
891    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
892    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Ok(json!("0000000000000000000000000000000000000000")));
893    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
894    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
895    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
896    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
897
898    let proc = TagProcessor::Sha256;
899    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
900    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
901    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Ok(json!("0000000000000000000000000000000000000000000000000000000000000000")));
902    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
903    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
904    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
905
906    let proc = TagProcessor::SSDeepHash;
907    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
908    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
909    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
910    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Ok(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
911    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
912    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
913
914    let proc = TagProcessor::JA4;
915    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
916    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
917    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
918    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
919    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Ok(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
920    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
921
922}
923
924#[test]
925fn network_tag_parsing() {
926    use serde_json::json;
927
928    let proc = TagProcessor::Domain;
929    assert_eq!(proc.apply(json!("www.google.com")), Ok(json!("www.google.com")));
930    assert_eq!(proc.apply(json!("www.GooGle.com")), Ok(json!("www.google.com")));
931    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
932    assert_eq!(proc.apply(json!("")), Err(json!("")));
933
934    let proc = TagProcessor::IpAddress;
935    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
936    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
937    assert_eq!(proc.apply(json!("172.0.0.1")), Ok(json!("172.0.0.1")));
938    assert_eq!(proc.apply(json!("0.0.0.0")), Ok(json!("0.0.0.0")));
939    assert_eq!(proc.apply(json!("127.0.0.0")), Ok(json!("127.0.0.0")));
940    assert_eq!(proc.apply(json!("127.0.10.200")), Ok(json!("127.0.10.200")));
941    assert_eq!(proc.apply(json!("255.255.255.255")), Ok(json!("255.255.255.255")));
942    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9abc")), Ok(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
943    // we don't want abbrivated, octal, hex, or padded ip addresses
944    assert_eq!(proc.apply(json!("172.1")), Err(json!("172.1")));
945    assert_eq!(proc.apply(json!("256.0.0.1")), Err(json!("256.0.0.1")));
946    assert_eq!(proc.apply(json!("0.256.0.1")), Err(json!("0.256.0.1")));
947    assert_eq!(proc.apply(json!("0.0.256.1")), Err(json!("0.0.256.1")));
948    assert_eq!(proc.apply(json!("0.0.0.256")), Err(json!("0.0.0.256")));
949    assert_eq!(proc.apply(json!("172.0x1.0.1")), Err(json!("172.0x1.0.1")));
950    assert_eq!(proc.apply(json!("172.01.0.1")), Err(json!("172.01.0.1")));
951    assert_eq!(proc.apply(json!("172.1.0.00000000001")), Err(json!("172.1.0.00000000001")));
952    assert_eq!(proc.apply(json!("0.0.0.")), Err(json!("0.0.0.")));
953    assert_eq!(proc.apply(json!("0.0.0.0.")), Err(json!("0.0.0.0.")));
954    assert_eq!(proc.apply(json!(".0.0.0")), Err(json!(".0.0.0")));
955    assert_eq!(proc.apply(json!(".0.0.0.0")), Err(json!(".0.0.0.0")));
956
957
958    let proc = TagProcessor::UNCPath;
959    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
960    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
961    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
962    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
963    assert_eq!(proc.apply(json!(r"\\ComputerName\SharedFolder\Resource")), Ok(json!(r"\\ComputerName\SharedFolder\Resource")));
964    assert_eq!(proc.apply(json!(r"\\hostname@SSL@100\SharedFolder\Resource")), Ok(json!(r"\\hostname@SSL@100\SharedFolder\Resource")));
965
966    let proc = TagProcessor::Uri;
967    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
968    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
969    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9abc")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9abc")));
970    assert_eq!(proc.apply(json!("method://172.0.0.1")), Ok(json!("method://172.0.0.1")));
971    assert_eq!(proc.apply(json!("s3://1234:5678:9ABC:0000:0000:1234:5678:9abc")), Ok(json!("s3://1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
972    assert_eq!(proc.apply(json!("https://www.google.com")), Ok(json!("https://www.google.com")));
973    assert_eq!(proc.apply(json!("https://www.GooGle.com/hellow?x=100&x=red")), Ok(json!("https://www.google.com/hellow?x=100&x=red")));
974    assert_eq!(proc.apply(json!("https://172.0.0.1")), Ok(json!("https://172.0.0.1")));
975    assert_eq!(proc.apply(json!("HTTPS://172.0.0.1/path/woith%20/components")), Ok(json!("HTTPS://172.0.0.1/path/woith%20/components")));
976    assert_eq!(proc.apply(json!("ftp://1234:5678:9ABC:0000:0000:1234:5678:9abc")), Ok(json!("ftp://1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
977
978    let proc = TagProcessor::UriPath;
979    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
980    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
981    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
982    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
983    assert_eq!(proc.apply(json!(r"\\ComputerName\SharedFolder\Resource")), Err(json!(r"\\ComputerName\SharedFolder\Resource")));
984    assert_eq!(proc.apply(json!(r"\\hostname@SSL@100\SharedFolder\Resource")), Err(json!(r"\\hostname@SSL@100\SharedFolder\Resource")));
985    assert_eq!(proc.apply(json!(r"/path%20/words1/")), Ok(json!(r"/path%20/words1/")));
986
987
988    let proc = TagProcessor::EmailAddress;
989    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
990    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
991    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
992    assert_eq!(proc.apply(json!("user@www.google.com")), Ok(json!("user@www.google.com")));
993    assert_eq!(proc.apply(json!("user@www.GooGle.com")), Ok(json!("user@www.google.com")));
994    assert_eq!(proc.apply(json!("user@172.0.0.1")), Err(json!("user@172.0.0.1")));
995    assert_eq!(proc.apply(json!("john.doe@cyber.gc.ca")), Ok(json!("john.doe@cyber.gc.ca")));
996
997
998    let proc = TagProcessor::Mac;
999    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
1000    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
1001    assert_eq!(proc.apply(json!("00:1b:63:84:45:e6")), Ok(json!("00:1b:63:84:45:e6")));
1002    assert_eq!(proc.apply(json!("00-1B-63-84-45-E6")), Ok(json!("00-1b-63-84-45-e6")));
1003
1004}
1005
1006
1007#[test]
1008fn misc_tag_parsing() {
1009    use serde_json::json;
1010
1011    let proc = TagProcessor::PhoneNumber;
1012    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1013    assert_eq!(proc.apply(json!([])), Err(json!([])));
1014    assert_eq!(proc.apply(json!(100)), Err(json!(100)));
1015    // algerian phone number
1016    // assert_eq!(proc.apply(json!("+213 21 55 55 55")), Err(json!("")));
1017    assert_eq!(proc.apply(json!("+1 100 100 1000")), Ok(json!("+1 100 100 1000")));
1018    // assert_eq!(proc.apply(json!("+61 5555 5555")), Ok(json!("+61 5555 5555")));
1019
1020    let proc = TagProcessor::RuleMapping;
1021    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1022    assert_eq!(proc.apply(json!([])), Err(json!([])));
1023    assert_eq!(proc.apply(json!(100)), Err(json!(100)));
1024    assert_eq!(proc.apply(json!({})), Ok(json!({})));
1025    assert_eq!(proc.apply(json!({"123": {}})), Err(json!({"123": {}})));
1026    assert_eq!(proc.apply(json!({"123": []})), Ok(json!({"123": []})));
1027    assert_eq!(proc.apply(json!({"123": [[], "abc", 12]})), Ok(json!({"123": ["[]", "abc", "12"]})));
1028}
1029
1030#[test]
1031fn number_tag_parsing() {
1032    use serde_json::{Value, json};
1033
1034    let proc = TagProcessor::I32;
1035    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1036    assert_eq!(proc.apply(json!([])), Err(json!([])));
1037    assert_eq!(proc.apply(json!(100)), Ok(json!(100)));
1038    assert_eq!(proc.apply(json!(-100)), Ok(json!(-100)));
1039    assert_eq!(proc.apply(json!(Option::<()>::None)), Err(Value::Null));
1040    assert_eq!(proc.apply(json!("55")), Ok(json!(55)));
1041    assert_eq!(proc.apply(json!(10_000_000_000u64)), Err(json!("10000000000")));
1042
1043    let proc = TagProcessor::U16;
1044    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1045    assert_eq!(proc.apply(json!([])), Err(json!([])));
1046    assert_eq!(proc.apply(json!(100)), Ok(json!(100)));
1047    assert_eq!(proc.apply(json!(-100)), Err(json!("-100")));
1048    assert_eq!(proc.apply(json!(Option::<()>::None)), Err(Value::Null));
1049    assert_eq!(proc.apply(json!("55")), Ok(json!(55)));
1050    assert_eq!(proc.apply(json!(1_000_000)), Err(json!("1000000")));
1051
1052}