assemblyline_models/datastore/
tagging.rs

1use std::collections::HashMap;
2use std::sync::LazyLock;
3
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use struct_metadata::{Described, MetadataKind};
7
8use crate::messages::task::TagEntry;
9use crate::types::ja4::is_ja4;
10use crate::types::md5::is_md5;
11use crate::types::sha1::is_sha1;
12use crate::types::sha256::is_sha256;
13use crate::types::json_validation::{transform_string_with, validate_lowercase, validate_lowercase_with, validate_number, validate_rule_mapping, validate_string, validate_string_with, validate_uppercase, validate_uppercase_with};
14use crate::types::ssdeep::is_ssdeep_hash;
15use crate::types::strings::{check_domain, check_email, check_uri, is_ip, is_mac, is_phone_number, is_unc_path, is_uri_path};
16use crate::types::JsonMap;
17use crate::ElasticMeta;
18
19// MARK: Tag Value
20/// A thin wrapper over the generic JSON value type to enforce tag specific behaviours we want
21#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
22#[serde(transparent)]
23pub struct TagValue(serde_json::Value);
24
25// When we convert tags to strings we don't want to include quotes on raw strings
26impl std::fmt::Display for TagValue {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match &self.0 {
29            serde_json::Value::String(string) => f.write_str(string),
30            other => f.write_fmt(format_args!("{other}"))
31        }
32    }
33}
34
35impl From<&str> for TagValue {
36    fn from(value: &str) -> Self {
37        Self(serde_json::Value::String(value.to_owned()))
38    }
39}
40
41impl From<String> for TagValue {
42    fn from(value: String) -> Self {
43        Self(serde_json::Value::String(value))
44    }
45}
46
47// MARK: Tag Processors
48#[derive(Debug)]
49enum TagProcessor {
50    // Generic strings
51    String,
52    Uppercase,
53    Lowercase,
54
55    // Special purpose strings
56    PhoneNumber,
57    RuleMapping, // HashMap<String, Vec<String>>
58    Domain,
59    IpAddress,
60    Uri,
61    Mac,
62    UNCPath,
63    UriPath,
64    EmailAddress,
65
66    // hashes
67    Sha256,
68    Sha1,
69    MD5,
70    SSDeepHash,
71    JA4,
72
73    // numbers
74    U16,
75    I32,
76    // I64,
77}
78
79
80impl TagProcessor {
81    pub fn apply(&self, value: serde_json::Value) -> Result<serde_json::Value, serde_json::Value> {
82        match self {
83            TagProcessor::String => validate_string(value),
84            TagProcessor::Uppercase => validate_uppercase(value),
85            TagProcessor::Lowercase => validate_lowercase(value),
86            TagProcessor::PhoneNumber => validate_string_with(value, is_phone_number),
87            TagProcessor::RuleMapping => validate_rule_mapping(value),
88            TagProcessor::Domain => transform_string_with(value, |domain| check_domain(domain).ok()),
89            TagProcessor::IpAddress => validate_uppercase_with(value, is_ip),
90            TagProcessor::Uri => transform_string_with(value, |uri| check_uri(uri).ok()),
91            TagProcessor::Mac => validate_lowercase_with(value, is_mac),
92            TagProcessor::UNCPath => validate_string_with(value, is_unc_path),
93            TagProcessor::UriPath => validate_string_with(value, is_uri_path),
94            TagProcessor::EmailAddress => transform_string_with(value, |email| check_email(email).ok()),
95            TagProcessor::Sha256 => validate_lowercase_with(value, is_sha256),
96            TagProcessor::Sha1 => validate_lowercase_with(value, is_sha1),
97            TagProcessor::MD5 => validate_lowercase_with(value, is_md5),
98            TagProcessor::SSDeepHash => validate_string_with(value, is_ssdeep_hash),
99            TagProcessor::JA4 => validate_lowercase_with(value, is_ja4),
100            TagProcessor::U16 => validate_number::<u16>(value),
101            TagProcessor::I32 => validate_number::<i32>(value),
102            // TagProcessor::I64 => validate_number::<i64>(value),
103        }
104    }
105}
106
107// MARK: Tag Information
108#[derive(Debug)]
109pub struct TagInformation {
110    name: &'static [&'static str],
111    description: &'static str,
112    processor: TagProcessor,
113}
114
115impl Eq for TagInformation {}
116
117impl PartialEq for TagInformation {
118    fn eq(&self, other: &Self) -> bool {
119        self.name == other.name
120    }
121}
122
123impl std::hash::Hash for TagInformation {
124    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
125        self.name.hash(state);
126    }
127}
128
129impl TagInformation {
130    const fn new(name: &'static [&'static str], description: &'static str, processor: TagProcessor) -> Self {
131        Self {
132            name,
133            description,
134            processor
135        }
136    }
137
138    pub fn full_path(&self) -> String {
139        self.name.join(".")
140    }
141
142    pub fn metadata_type(&self) -> struct_metadata::Descriptor<ElasticMeta> {
143        use struct_metadata::{Kind, Descriptor};
144        let metadata = ElasticMeta{copyto: Some("__text__"), ..Default::default()};
145        let mut desc = match self.processor {
146            TagProcessor::RuleMapping => struct_metadata::Descriptor { 
147                docs: None, 
148                metadata, 
149                kind: Kind::Mapping(
150                    Box::new(Descriptor { docs: None, metadata: Default::default(), kind: Kind::String }), 
151                    Box::new(Descriptor { docs: None, metadata: Default::default(), kind: Kind::Sequence(
152                        Box::new(Descriptor { docs: None, metadata: ElasticMeta{copyto: Some("__text__"), ..Default::default()}, kind: Kind::String }),    
153                    )})
154                )
155            },
156            // TagProcessor::I64 => struct_metadata::Descriptor { docs: None, metadata: Default::default(), kind: Kind::I64},
157            TagProcessor::U16 => struct_metadata::Descriptor { docs: None, metadata: Default::default(), kind: Kind::U16},
158            TagProcessor::I32 => struct_metadata::Descriptor { docs: None, metadata: Default::default(), kind: Kind::I32},
159            TagProcessor::IpAddress => struct_metadata::Descriptor { 
160                docs: None, 
161                metadata: ElasticMeta { mapping: Some("ip"), ..metadata }, 
162                kind: Kind::String
163            },
164            TagProcessor::SSDeepHash => struct_metadata::Descriptor { 
165                docs: None, 
166                metadata: ElasticMeta{mapping: Some("text"), analyzer: Some("text_fuzzy"), ..metadata}, 
167                kind: Kind::String
168            },
169            TagProcessor::Lowercase | TagProcessor::Sha1 | TagProcessor::MD5 | TagProcessor::Sha256 => struct_metadata::Descriptor { 
170                docs: None, 
171                metadata: ElasticMeta { normalizer: Some("lowercase_normalizer"), ..metadata }, 
172                kind: Kind::String 
173            },            
174            _ => struct_metadata::Descriptor { docs: None, metadata, kind: Kind::String }
175        };
176
177        desc.docs = Some(vec![self.description]);
178        desc
179    }
180
181    pub fn inner_mapping(&self) -> bool {
182        matches!(self.processor, TagProcessor::RuleMapping)
183    }
184}
185
186
187// MARK: Tag List
188/// The list of all tags we are willing to accept. 
189/// This includes their path within a tagging dict, a textual description and how they should be processed for validation or normalization
190static ALL_VALID_TAGS: [TagInformation; 211] = [
191    TagInformation::new(&["attribution", "actor"], "Attribution Actor", TagProcessor::Uppercase),
192    TagInformation::new(&["attribution", "campaign"], "Attribution Campaign", TagProcessor::Uppercase),
193    TagInformation::new(&["attribution", "category"], "Attribution Category", TagProcessor::Uppercase),
194    TagInformation::new(&["attribution", "exploit"], "Attribution Exploit", TagProcessor::Uppercase),
195    TagInformation::new(&["attribution", "implant"], "Attribution Implant", TagProcessor::Uppercase),
196    TagInformation::new(&["attribution", "family"], "Attribution Family", TagProcessor::Uppercase),
197    TagInformation::new(&["attribution", "network"], "Attribution Network", TagProcessor::Uppercase),
198
199    TagInformation::new(&["av", "heuristic"], "List of heuristics", TagProcessor::String),
200    TagInformation::new(&["av", "virus_name"], "Collection of virus names identified by antivirus tools", TagProcessor::String),
201
202    TagInformation::new(&["cert", "extended_key_usage"], "Extended Key Usage", TagProcessor::String),
203    TagInformation::new(&["cert", "issuer"], "Issuer", TagProcessor::String),
204    TagInformation::new(&["cert", "key_usage"], "Key Usage", TagProcessor::String),
205    TagInformation::new(&["cert", "owner"], "Owner", TagProcessor::String),
206    TagInformation::new(&["cert", "serial_no"], "Serial Number", TagProcessor::String),
207    TagInformation::new(&["cert", "signature_algo"], "Signature Algorithm", TagProcessor::String),
208    TagInformation::new(&["cert", "subject"], "Subject Name", TagProcessor::String),
209    TagInformation::new(&["cert", "subject_alt_name"], "Alternative Subject Name", TagProcessor::String),
210    TagInformation::new(&["cert", "thumbprint"], "Thumbprint", TagProcessor::String),
211    TagInformation::new(&["cert", "valid", "start"], "Start date of certificate validity", TagProcessor::String),
212    TagInformation::new(&["cert", "valid", "end"], "End date of certificate validity", TagProcessor::String),
213    TagInformation::new(&["cert", "version"], "Version", TagProcessor::String),
214
215    TagInformation::new(&["code", "sha256"], "Code Tagging: Sha256 of code", TagProcessor::Sha256),
216
217    TagInformation::new(&["dynamic", "autorun_location"], "Autorun location", TagProcessor::String),
218    TagInformation::new(&["dynamic", "dos_device"], "DOS Device", TagProcessor::String),
219    TagInformation::new(&["dynamic", "mutex"], "Mutex", TagProcessor::String),
220    TagInformation::new(&["dynamic", "registry_key"], "Registy Keys", TagProcessor::String),
221    TagInformation::new(&["dynamic", "process", "command_line"], "Commandline", TagProcessor::String),
222    TagInformation::new(&["dynamic", "process", "file_name"], "Filename", TagProcessor::String),
223    TagInformation::new(&["dynamic", "process", "shortcut"], "Shortcut", TagProcessor::String),
224    TagInformation::new(&["dynamic", "signature", "category"], "Signature Category", TagProcessor::String),
225    TagInformation::new(&["dynamic", "signature", "family"], "Signature Family", TagProcessor::String),
226    TagInformation::new(&["dynamic", "signature", "name"], "Signature Name", TagProcessor::String),
227    TagInformation::new(&["dynamic", "ssdeep", "cls_ids"], "CLSIDs", TagProcessor::SSDeepHash),
228    TagInformation::new(&["dynamic", "ssdeep", "dynamic_classes"], "Dynamic Classes", TagProcessor::SSDeepHash),
229    TagInformation::new(&["dynamic", "ssdeep", "regkeys"], "Registry Keys", TagProcessor::SSDeepHash),
230    TagInformation::new(&["dynamic", "window", "cls_ids"], "CLSIDs", TagProcessor::String),
231    TagInformation::new(&["dynamic", "window", "dynamic_classes"], "Dynamic Classes", TagProcessor::String),
232    TagInformation::new(&["dynamic", "window", "regkeys"], "Registry Keys", TagProcessor::String),
233    TagInformation::new(&["dynamic", "operating_system", "platform"], "Platform", TagProcessor::String),
234    TagInformation::new(&["dynamic", "operating_system", "version"], "Version", TagProcessor::String),
235    TagInformation::new(&["dynamic", "operating_system", "processor"], "Processor", TagProcessor::String),
236    TagInformation::new(&["dynamic", "processtree_id"], "Process Tree ID", TagProcessor::String),
237
238    TagInformation::new(&["info", "phone_number"], "Phone Number", TagProcessor::PhoneNumber),
239    TagInformation::new(&["info", "password"], "Suspected Password", TagProcessor::String),
240
241    TagInformation::new(&["file", "ancestry"], "File Genealogy", TagProcessor::String),
242    TagInformation::new(&["file", "behavior"], "File Behaviour", TagProcessor::String),
243    TagInformation::new(&["file", "compiler"], "Compiler of File", TagProcessor::String),
244    TagInformation::new(&["file", "config"], "File Configuration", TagProcessor::String),
245    TagInformation::new(&["file", "date", "creation"], "File Creation Date", TagProcessor::String),
246    TagInformation::new(&["file", "date", "last_modified"], "File Last Modified Date", TagProcessor::String),
247    TagInformation::new(&["file", "elf", "libraries"], "ELF File Properties: Libraries", TagProcessor::String),
248    TagInformation::new(&["file", "elf", "interpreter"], "ELF File Properties: Interpreter", TagProcessor::String),
249    TagInformation::new(&["file", "elf", "sections", "name"], "ELF File Properties: Section Name", TagProcessor::String),
250    TagInformation::new(&["file", "elf", "segments", "type"], "ELF File Properties: Segment Types", TagProcessor::String),
251    TagInformation::new(&["file", "elf", "notes", "name"], "ELF File Properties: Note name", TagProcessor::String),
252    TagInformation::new(&["file", "elf", "notes", "type"], "ELF File Properties: Note type", TagProcessor::String),
253    TagInformation::new(&["file", "elf", "notes", "type_core"], "ELF File Properties: Note type core", TagProcessor::String),
254    TagInformation::new(&["file", "lib"], "File Libraries", TagProcessor::String),
255    TagInformation::new(&["file", "lsh"], "File LSH hashes", TagProcessor::String),
256    TagInformation::new(&["file", "name", "anomaly"], "File Anomaly Name", TagProcessor::String),
257    TagInformation::new(&["file", "name", "extracted"], "File Extracted Name", TagProcessor::String),
258    TagInformation::new(&["file", "path"], "File Path", TagProcessor::String),
259    TagInformation::new(&["file", "rule"], "Rule/Signature File", TagProcessor::RuleMapping),
260    TagInformation::new(&["file", "string", "api"], "File API Strings", TagProcessor::String),
261    TagInformation::new(&["file", "string", "blacklisted"], "File Known Bad Strings", TagProcessor::String),
262    TagInformation::new(&["file", "string", "decoded"], "File Decoded Strings", TagProcessor::String),
263    TagInformation::new(&["file", "string", "extracted"], "File Extracted Strings", TagProcessor::String),
264    TagInformation::new(&["file", "apk", "activity"], "APK File Properties: Activity", TagProcessor::String),
265    TagInformation::new(&["file", "apk", "app", "label"], "APK File Properties: APK Application Information: Label", TagProcessor::String),
266    TagInformation::new(&["file", "apk", "app", "version"], "APK File Properties: APK Application Information: Version", TagProcessor::String),
267    TagInformation::new(&["file", "apk", "feature"], "APK File Properties: Features", TagProcessor::String),
268    TagInformation::new(&["file", "apk", "locale"], "APK File Properties: Locale", TagProcessor::String),
269    TagInformation::new(&["file", "apk", "permission"], "APK File Properties: Permissions", TagProcessor::String),
270    TagInformation::new(&["file", "apk", "pkg_name"], "APK File Properties: Package Name", TagProcessor::String),
271    TagInformation::new(&["file", "apk", "provides_component"], "APK File Properties: Components Provided", TagProcessor::String),
272    TagInformation::new(&["file", "apk", "sdk", "min"], "APK File Properties: APK SDK minimum OS required", TagProcessor::String),
273    TagInformation::new(&["file", "apk", "sdk", "target"], "APK File Properties: APK SDK target OS", TagProcessor::String),
274    TagInformation::new(&["file", "apk", "used_library"], "APK File Properties: Libraries Used", TagProcessor::String),
275    TagInformation::new(&["file", "jar", "main_class"], "JAR File Properties: Main Class", TagProcessor::String),
276    TagInformation::new(&["file", "jar", "main_package"], "JAR File Properties: Main Package", TagProcessor::String),
277    TagInformation::new(&["file", "jar", "imported_package"], "JAR File Properties: Imported package", TagProcessor::String),
278    TagInformation::new(&["file", "img", "exif_tool", "creator_tool"], "Image File Properties: Exiftool Information: Image Creation Tool", TagProcessor::String),
279    TagInformation::new(&["file", "img", "exif_tool", "derived_document_id"], "Image File Properties: Exiftool Information: Derived Document ID", TagProcessor::String),
280    TagInformation::new(&["file", "img", "exif_tool", "document_id"], "Image File Properties: Exiftool Information: Document ID", TagProcessor::String),
281    TagInformation::new(&["file", "img", "exif_tool", "instance_id"], "Image File Properties: Exiftool Information: Instance ID", TagProcessor::String),
282    TagInformation::new(&["file", "img", "exif_tool", "toolkit"], "Image File Properties: Exiftool Information: Toolkit", TagProcessor::String),
283    TagInformation::new(&["file", "img", "mega_pixels"], "Image File Properties: Megapixels", TagProcessor::String),
284    TagInformation::new(&["file", "img", "mode"], "Image File Properties: Image Mode", TagProcessor::String),
285    TagInformation::new(&["file", "img", "size"], "Image File Properties: Image Size", TagProcessor::String),
286    TagInformation::new(&["file", "img", "sorted_metadata_hash"], "Image File Properties: Sorted Metadata Hash", TagProcessor::String),
287    TagInformation::new(&["file", "ole", "macro", "sha256"], "OLE File Properties: OLE Macro: SHA256 of Macro", TagProcessor::Sha256),
288    TagInformation::new(&["file", "ole", "macro", "suspicious_string"], "OLE File Properties: OLE Macro: Suspicious Strings", TagProcessor::String),
289    TagInformation::new(&["file", "ole", "summary", "author"], "OLE File Properties: OLE Summary: Author", TagProcessor::String),
290    TagInformation::new(&["file", "ole", "summary", "codepage"], "OLE File Properties: OLE Summary: Code Page", TagProcessor::String),
291    TagInformation::new(&["file", "ole", "summary", "comment"], "OLE File Properties: OLE Summary: Comment", TagProcessor::String),
292    TagInformation::new(&["file", "ole", "summary", "company"], "OLE File Properties: OLE Summary: Company", TagProcessor::String),
293    TagInformation::new(&["file", "ole", "summary", "create_time"], "OLE File Properties: OLE Summary: Creation Time", TagProcessor::String),
294    TagInformation::new(&["file", "ole", "summary", "last_printed"], "OLE File Properties: OLE Summary: Date Last Printed", TagProcessor::String),
295    TagInformation::new(&["file", "ole", "summary", "last_saved_by"], "OLE File Properties: OLE Summary: User Last Saved By", TagProcessor::String),
296    TagInformation::new(&["file", "ole", "summary", "last_saved_time"], "OLE File Properties: OLE Summary: Date Last Saved", TagProcessor::String),
297    TagInformation::new(&["file", "ole", "summary", "manager"], "OLE File Properties: OLE Summary: Manager", TagProcessor::String),
298    TagInformation::new(&["file", "ole", "summary", "subject"], "OLE File Properties: OLE Summary: Subject", TagProcessor::String),
299    TagInformation::new(&["file", "ole", "summary", "title"], "OLE File Properties: OLE Summary: Title", TagProcessor::String),
300    TagInformation::new(&["file", "ole", "clsid"], "OLE File Properties: CLSID", TagProcessor::String),
301    TagInformation::new(&["file", "ole", "dde_link"], "OLE File Properties: DDE Link", TagProcessor::String),
302    TagInformation::new(&["file", "ole", "fib_timestamp"], "OLE File Properties: FIB Timestamp", TagProcessor::String),
303    TagInformation::new(&["file", "pe", "api_vector"], "PE File Properties: API Vector", TagProcessor::String),
304    TagInformation::new(&["file", "pe", "authenticode", "spc_sp_opus_info", "program_name"], "PE File Properties: PE Authenticode Information: Program name", TagProcessor::String),
305    TagInformation::new(&["file", "pe", "debug", "guid"], "PE File Properties: PE Debug Information: GUID", TagProcessor::String),
306    TagInformation::new(&["file", "pe", "exports", "function_name"], "PE File Properties: PE Exports Information: Function Name", TagProcessor::String),
307    TagInformation::new(&["file", "pe", "exports", "module_name"], "PE File Properties: PE Exports Information: Module Name", TagProcessor::String),
308    TagInformation::new(&["file", "pe", "imports", "fuzzy"], "PE File Properties: PE Imports Information: Fuzzy", TagProcessor::SSDeepHash),
309    TagInformation::new(&["file", "pe", "imports", "md5"], "PE File Properties: PE Imports Information: MD5", TagProcessor::MD5),
310    TagInformation::new(&["file", "pe", "imports", "imphash"], "PE File Properties: PE Imports Information: Imphash", TagProcessor::MD5),
311    TagInformation::new(&["file", "pe", "imports", "sorted_fuzzy"], "PE File Properties: PE Imports Information: Sorted Fuzzy", TagProcessor::SSDeepHash),
312    TagInformation::new(&["file", "pe", "imports", "sorted_sha1"], "PE File Properties: PE Imports Information: Sorted SHA1", TagProcessor::Sha1),
313    TagInformation::new(&["file", "pe", "imports", "gimphash"], "PE File Properties: PE Imports Information: Go Import hash", TagProcessor::Sha256),
314    TagInformation::new(&["file", "pe", "imports", "suspicious"], "PE File Properties: PE Imports Information: Suspicious", TagProcessor::String),
315    TagInformation::new(&["file", "pe", "linker", "timestamp"], "PE File Properties: PE Linker Information: timestamp", TagProcessor::String),
316    TagInformation::new(&["file", "pe", "oep", "bytes"], "PE File Properties: PE OEP Information: Bytes", TagProcessor::String),
317    TagInformation::new(&["file", "pe", "oep", "hexdump"], "PE File Properties: PE OEP Information: Hex Dump", TagProcessor::String),
318    TagInformation::new(&["file", "pe", "pdb_filename"], "PE File Properties: PDB Filename", TagProcessor::String),
319    TagInformation::new(&["file", "pe", "resources", "language"], "PE File Properties: PE Resources Information: Language", TagProcessor::String),
320    TagInformation::new(&["file", "pe", "resources", "name"], "PE File Properties: PE Resources Information: Name", TagProcessor::String),
321    TagInformation::new(&["file", "pe", "rich_header", "hash"], "PE File Properties: PE Rich Header Information: Hash", TagProcessor::String),
322    TagInformation::new(&["file", "pe", "sections", "hash"], "PE File Properties: PE Sections Information: Hash", TagProcessor::String),
323    TagInformation::new(&["file", "pe", "sections", "name"], "PE File Properties: PE Sections Information: Name", TagProcessor::String),
324    TagInformation::new(&["file", "pe", "versions", "description"], "PE File Properties: PE Versions Information: Description", TagProcessor::String),
325    TagInformation::new(&["file", "pe", "versions", "filename"], "PE File Properties: PE Versions Information: Filename", TagProcessor::String),
326    TagInformation::new(&["file", "pdf", "date", "modified"], "PDF File Properties: PDF Date Information: Date Modified", TagProcessor::String),
327    TagInformation::new(&["file", "pdf", "date", "pdfx"], "PDF File Properties: PDF Date Information: PDFx", TagProcessor::String),
328    TagInformation::new(&["file", "pdf", "date", "source_modified"], "PDF File Properties: PDF Date Information: Date Source Modified", TagProcessor::String),
329    TagInformation::new(&["file", "pdf", "javascript", "sha1"], "PDF File Properties: PDF Javascript Information: SHA1 of javascript", TagProcessor::Sha1),
330    TagInformation::new(&["file", "pdf", "stats", "sha1"], "PDF File Properties: PDF Statistics Information: SHA1 of statistics", TagProcessor::Sha1),
331    TagInformation::new(&["file", "plist", "installer_url"], "PList File Properties: Installer URL", TagProcessor::String),
332    TagInformation::new(&["file", "plist", "min_os_version"], "PList File Properties: Minimum OS Version", TagProcessor::String),
333    TagInformation::new(&["file", "plist", "requests_open_access"], "PList File Properties: Requests Open Access", TagProcessor::String),
334    TagInformation::new(&["file", "plist", "build", "machine_os"], "PList File Properties: Build Information: Machine OS", TagProcessor::String),
335    TagInformation::new(&["file", "plist", "cf_bundle", "development_region"], "PList File Properties: CF Bundle Information: Development Region", TagProcessor::String),
336    TagInformation::new(&["file", "plist", "cf_bundle", "display_name"], "PList File Properties: CF Bundle Information: Display Name", TagProcessor::String),
337    TagInformation::new(&["file", "plist", "cf_bundle", "executable"], "PList File Properties: CF Bundle Information: Executable Name", TagProcessor::String),
338    TagInformation::new(&["file", "plist", "cf_bundle", "identifier"], "PList File Properties: CF Bundle Information: Identifier Name", TagProcessor::String),
339    TagInformation::new(&["file", "plist", "cf_bundle", "name"], "PList File Properties: CF Bundle Information: Bundle Name", TagProcessor::String),
340    TagInformation::new(&["file", "plist", "cf_bundle", "pkg_type"], "PList File Properties: CF Bundle Information: Package Type", TagProcessor::String),
341    TagInformation::new(&["file", "plist", "cf_bundle", "signature"], "PList File Properties: CF Bundle Information: Signature", TagProcessor::String),
342    TagInformation::new(&["file", "plist", "cf_bundle", "url_scheme"], "PList File Properties: CF Bundle Information: URL Scheme", TagProcessor::String),
343    TagInformation::new(&["file", "plist", "cf_bundle", "version", "long"], "PList File Properties: CF Bundle Information: Bundle Version Information: Long Version", TagProcessor::String),
344    TagInformation::new(&["file", "plist", "cf_bundle", "version", "short"], "PList File Properties: CF Bundle Information: Bundle Version Information: Short Version", TagProcessor::String),
345    TagInformation::new(&["file", "plist", "dt", "compiler"], "PList File Properties: DT Information: Compiler", TagProcessor::String),
346    TagInformation::new(&["file", "plist", "dt", "platform", "build"], "PList File Properties: DT Information: Platform Information: Build", TagProcessor::String),
347    TagInformation::new(&["file", "plist", "dt", "platform", "name"], "PList File Properties: DT Information: Platform Information: Name", TagProcessor::String),
348    TagInformation::new(&["file", "plist", "dt", "platform", "version"], "PList File Properties: DT Information: Platform Information: Version", TagProcessor::String),
349    TagInformation::new(&["file", "plist", "ls", "background_only"], "PList File Properties: LS Information: Background Only", TagProcessor::String),
350    TagInformation::new(&["file", "plist", "ls", "min_system_version"], "PList File Properties: LS Information: Minimum System Versuion", TagProcessor::String),
351    TagInformation::new(&["file", "plist", "ns", "apple_script_enabled"], "PList File Properties: NS Information: Apple Script Enabled", TagProcessor::String),
352    TagInformation::new(&["file", "plist", "ns", "principal_class"], "PList File Properties: NS Information: Principal Class", TagProcessor::String),
353    TagInformation::new(&["file", "plist", "ui", "background_modes"], "PList File Properties: UI Information: Background Modes", TagProcessor::String),
354    TagInformation::new(&["file", "plist", "ui", "requires_persistent_wifi"], "PList File Properties: UI Information: Requires Persistent WIFI", TagProcessor::String),
355    TagInformation::new(&["file", "plist", "wk", "app_bundle_identifier"], "PList File Properties: WK Information: App Bundle ID", TagProcessor::String),
356    TagInformation::new(&["file", "powershell", "cmdlet"], "PowerShell File Properties: Cmdlet", TagProcessor::String),
357    TagInformation::new(&["file", "shortcut", "command_line"], "Shortcut File Properties: Command Line", TagProcessor::String),
358    TagInformation::new(&["file", "shortcut", "icon_location"], "Shortcut File Properties: Icon Location", TagProcessor::String),
359    TagInformation::new(&["file", "shortcut", "machine_id"], "Shortcut File Properties: Machine ID", TagProcessor::String),
360    TagInformation::new(&["file", "shortcut", "tracker_mac"], "Shortcut File Properties: Possible MAC address from the Tracker block", TagProcessor::String),
361    TagInformation::new(&["file", "swf", "header", "frame", "count"], "SWF File Properties: Header Information: Header Frame Information: Number of Frames", TagProcessor::I32),
362    TagInformation::new(&["file", "swf", "header", "frame", "rate"], "SWF File Properties: Header Information: Header Frame Information: Speed of Animation", TagProcessor::String),
363    TagInformation::new(&["file", "swf", "header", "frame", "size"], "SWF File Properties: Header Information: Header Frame Information: Size of Frame", TagProcessor::String),
364    TagInformation::new(&["file", "swf", "header", "version"], "SWF File Properties: Header Information: Version", TagProcessor::String),
365    TagInformation::new(&["file", "swf", "tags_ssdeep"], "SWF File Properties: Tags SSDeep", TagProcessor::SSDeepHash),
366    
367    TagInformation::new(&["network", "attack"], "Network: Attack", TagProcessor::String),
368    TagInformation::new(&["network", "dynamic", "domain"], "Network: Dynamic IOCs: Domain", TagProcessor::Domain),
369    TagInformation::new(&["network", "dynamic", "ip"], "Network: Dynamic IOCs: IP", TagProcessor::IpAddress),
370    TagInformation::new(&["network", "dynamic", "unc_path"], "Network: Dynamic IOCs: UNC Path", TagProcessor::UNCPath),
371    TagInformation::new(&["network", "dynamic", "uri"], "Network: Dynamic IOCs: URI", TagProcessor::Uri),
372    TagInformation::new(&["network", "dynamic", "uri_path"], "Network: Dynamic IOCs: URI Path", TagProcessor::UriPath),
373    TagInformation::new(&["network", "email", "address"], "Network: Email: Email Address", TagProcessor::EmailAddress),
374    TagInformation::new(&["network", "email", "date"], "Network: Email: Date", TagProcessor::String),
375    TagInformation::new(&["network", "email", "subject"], "Network: Email: Subject", TagProcessor::String),
376    TagInformation::new(&["network", "email", "msg_id"], "Network: Email: Message ID", TagProcessor::String),
377    TagInformation::new(&["network", "mac_address"], "Network: MAC Address", TagProcessor::Mac),
378    TagInformation::new(&["network", "port"], "Network: Port", TagProcessor::U16),
379    TagInformation::new(&["network", "protocol"], "Network: Protocol", TagProcessor::String),
380    TagInformation::new(&["network", "signature", "signature_id"], "Network: Signatures: ID", TagProcessor::String),
381    TagInformation::new(&["network", "signature", "message"], "Network: Signatures: Message", TagProcessor::String),
382    TagInformation::new(&["network", "static", "domain"], "Network: Static IOCs: Domain", TagProcessor::Domain),
383    TagInformation::new(&["network", "static", "ip"], "Network: Static IOCs: IP", TagProcessor::IpAddress),
384    TagInformation::new(&["network", "static", "unc_path"], "Network: Static IOCs: UNC Path", TagProcessor::UNCPath),
385    TagInformation::new(&["network", "static", "uri"], "Network: Static IOCs: URI", TagProcessor::Uri),
386    TagInformation::new(&["network", "static", "uri_path"], "Network: Static IOCs: URI Path", TagProcessor::UriPath),
387    TagInformation::new(&["network", "tls", "ja3_hash"], "Network: TLS: JA3 Hash", TagProcessor::Lowercase),
388    TagInformation::new(&["network", "tls", "ja3_string"], "Network: TLS: JA3 String", TagProcessor::String),
389    TagInformation::new(&["network", "tls", "ja3s_hash"], "Network: TLS: JA3S Hash", TagProcessor::Lowercase),
390    TagInformation::new(&["network", "tls", "ja3s_string"], "Network: TLS: JA3S String", TagProcessor::String),
391    TagInformation::new(&["network", "tls", "ja4_hash"], "Network: TLS: JA4 Hash", TagProcessor::JA4),
392    TagInformation::new(&["network", "tls", "ja4s_hash"], "Network: TLS: JA4S Hash", TagProcessor::String),
393    TagInformation::new(&["network", "tls", "sni"], "Network: TLS: SNI", TagProcessor::String),
394    TagInformation::new(&["network", "user_agent"], "Network: User Agent", TagProcessor::String),
395
396    TagInformation::new(&["source"], "Source", TagProcessor::String),
397
398    TagInformation::new(&["technique", "comms_routine"], "Technique: Communication Routine", TagProcessor::String),
399    TagInformation::new(&["technique", "config"], "Technique: Configuration", TagProcessor::String),
400    TagInformation::new(&["technique", "crypto"], "Technique: Cryptography", TagProcessor::String),
401    TagInformation::new(&["technique", "exploit"], "Technique: Technique", TagProcessor::String),
402    TagInformation::new(&["technique", "keylogger"], "Technique: Keylogger", TagProcessor::String),
403    TagInformation::new(&["technique", "macro"], "Technique: Macro", TagProcessor::String),
404    TagInformation::new(&["technique", "masking_algo"], "Technique: Masking Algorithm", TagProcessor::String),
405    TagInformation::new(&["technique", "obfuscation"], "Technique: Obfuscation", TagProcessor::String),
406    TagInformation::new(&["technique", "packer"], "Technique: Packer", TagProcessor::String),
407    TagInformation::new(&["technique", "persistence"], "Technique: Persistence", TagProcessor::String),
408    TagInformation::new(&["technique", "shellcode"], "Technique: Shell Code", TagProcessor::String),
409    TagInformation::new(&["technique", "string"], "Technique: String", TagProcessor::String),
410
411    TagInformation::new(&["vector"], "Vector", TagProcessor::String),
412];
413
414
415pub fn get_tag_information(label: &str) -> Option<&'static TagInformation> {
416    static TAGS: LazyLock<HashMap<String, &'static TagInformation>> = LazyLock::new(|| {
417        let mut table: HashMap<String, &'static TagInformation> = Default::default();
418        for tag in &ALL_VALID_TAGS {
419            if let Some(collision) = table.insert(tag.full_path(), tag) {
420                panic!("Collision on tag name: {}", collision.full_path());
421            }
422        }
423        table
424    });
425    TAGS.get(label).copied()
426}
427
428// MARK: Nested Tag Container
429/// Container for a dictionary set of tags
430#[derive(Serialize, Deserialize, Debug, Default, Clone)]
431#[serde(transparent)]
432pub struct Tagging(JsonMap);
433
434impl Described<ElasticMeta> for Tagging {
435    fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
436
437        let mut catagories: HashMap<&'static str, Vec<_>> = HashMap::new();
438        for tag in &ALL_VALID_TAGS {
439            catagories.entry(tag.name[0]).or_default().push((&tag.name[1..], tag))
440        }
441
442        fn make_entry(label: &'static str, elements: &[(&[&'static str], &TagInformation)]) -> struct_metadata::Entry<ElasticMeta> {
443            if elements.len() == 1 && elements[0].0.is_empty() {
444                struct_metadata::Entry {
445                    label,
446                    docs: None,
447                    metadata: ElasticMeta { index: Some(true), store: Some(false), ..Default::default() },
448                    type_info: elements[0].1.metadata_type(),
449                    has_default: false,
450                    aliases: &[],
451                }
452            } else {
453
454                let mut catagories: HashMap<&'static str, Vec<_>> = HashMap::new();
455                for (path, tag) in elements {
456                    catagories.entry(path[0]).or_default().push((&path[1..], *tag))
457                }
458
459                let mut children = vec![];
460                let metadata = ElasticMeta::default();
461                for (label, elements) in catagories {
462                    let mut entry = make_entry(label, &elements);
463                    entry.metadata.forward_propagate_entry_defaults(&metadata, &entry.type_info.metadata);
464                    children.push(entry)
465                }
466
467                let type_info = struct_metadata::Descriptor {
468                    docs: None,
469                    metadata,
470                    kind: struct_metadata::Kind::Struct { 
471                        name: label, 
472                        children, 
473                    },
474                };
475
476                struct_metadata::Entry {
477                    label,
478                    docs: None,
479                    metadata: Default::default(),
480                    type_info,
481                    has_default: false,
482                    aliases: &[]
483                }
484            }
485        }
486
487        let mut children = vec![];
488        let metadata = ElasticMeta::default();
489        for (label, elements) in catagories {
490            let mut entry = make_entry(label, &elements);
491            entry.metadata.forward_propagate_entry_defaults(&metadata, &entry.type_info.metadata);
492            children.push(entry);
493        }
494
495        struct_metadata::Descriptor {
496            docs: None,
497            metadata,
498            kind: struct_metadata::Kind::Struct { 
499                name: "Tagging", 
500                children, 
501            },
502        }
503    }
504}
505
506#[derive(Debug, thiserror::Error)]
507#[error("The tagging data had an unsupported layout.")]
508pub struct LayoutError;
509
510impl Tagging {
511
512//     pub fn flatten(&self) -> Result<FlatTags, serde_json::Error> {
513//         let data = serde_json::to_value(self)?;
514//         let serde_json::Value::Object(data) = data else {
515//             return Err(serde_json::Error::custom("struct must become object"))
516//         };
517//         Ok(flatten_tags(data, None))
518//     }
519
520    pub fn to_list(&self, _safelisted: Option<bool>) -> Result<Vec<TagEntry>, LayoutError> {
521
522        fn flatten_inner(output: &mut Vec<TagEntry>, path: &[&str], data: &JsonMap) -> Result<(), ()> {
523            for (key, value) in data {
524                let mut path = Vec::from(path);
525                path.push(key);
526
527                match value {
528                    serde_json::Value::Object(map) => {
529                        flatten_inner(output, &path, map)?;
530                    },
531                    serde_json::Value::Array(values) => {
532                        let path = path.join(".");
533                        for value in values {
534                            output.push(TagEntry { 
535                                score: 0, 
536                                tag_type: path.clone(),
537                                value: TagValue(value.clone()) 
538                            })
539                            // {'safelisted': safelisted, 'type': k, 'value': t, 'short_type': k.rsplit(".", 1)[-1]})
540                        }
541                    },
542                    _ => return Err(())
543                }
544            }
545            Ok(())
546        }
547
548        let mut output = vec![];
549        if flatten_inner(&mut output, &[], &self.0).is_err() {
550            return Err(LayoutError)
551        }
552
553        Ok(output)
554    }
555}
556
557
558// MARK: Flat Tag Container
559/// List of validated tags
560#[derive(Debug, Default)]
561pub struct FlatTags {
562    tags: HashMap<&'static TagInformation, Vec<TagValue>>,
563}
564
565impl IntoIterator for FlatTags {
566    type Item = (&'static TagInformation, Vec<TagValue>);
567    type IntoIter = <HashMap<&'static TagInformation, Vec<TagValue>> as std::iter::IntoIterator>::IntoIter;
568    fn into_iter(self) -> Self::IntoIter { self.tags.into_iter() }
569}
570
571impl std::ops::Deref for FlatTags {
572    type Target = HashMap<&'static TagInformation, Vec<TagValue>>;
573
574    fn deref(&self) -> &Self::Target { &self.tags }
575}
576
577impl std::ops::DerefMut for FlatTags {
578    fn deref_mut(&mut self) -> &mut Self::Target { &mut self.tags }
579}
580
581#[derive(Debug, thiserror::Error)]
582#[error("A tag name collision on {0} prevented a tagging document from being completed")]
583pub struct TagNameCollision(String);
584
585
586impl FlatTags {
587    pub fn to_tagging(self) -> Result<Tagging, TagNameCollision> {
588        let mut output = JsonMap::default();
589
590        // let mut groups: HashMap<&'static str, Vec<(&[&'static str], _, _)>> = HashMap::new();
591        // for (tag, value) in self.tags {
592        //     let path = tag.name;
593        //     let items = groups.entry(path[0]).or_default();
594        //     items.push((&path[1..], tag, value))
595        // }
596
597        fn insert(info: &'static TagInformation, output: &mut JsonMap, name: &[&str], values: Vec<TagValue>) -> Result<(), TagNameCollision> {
598            if name.len() == 1 {
599                if info.inner_mapping() {
600                    let outer = output.entry(name[0]).or_insert_with(|| Value::Object(Default::default()));
601                    match outer.as_object_mut() {
602                        Some(outer) => {
603                            for value in values {
604                                if let Value::Object(value) = value.0 {
605                                    for (subkey, value) in value {
606                                        let values = match value {
607                                            Value::Array(values) => values,
608                                            other => vec![other]
609                                        };
610
611                                        let inner = outer.entry(subkey)
612                                            .or_insert_with(|| Value::Array(vec![]));
613                                        match inner.as_array_mut() {
614                                            Some(obj) => { obj.extend(values); },
615                                            None => return Err(TagNameCollision(info.full_path())),
616                                        }
617                                    }
618                                } else {
619                                    return Err(TagNameCollision(info.full_path()))
620                                }
621                            }
622                            Ok(())
623                        },
624                        None => Err(TagNameCollision(info.full_path())),
625                    }
626                } else {
627                    let inner = output.entry(name[0])
628                        .or_insert_with(|| Value::Array(vec![]));
629                    match inner.as_array_mut() {
630                        Some(obj) => { obj.extend(values.into_iter().map(|tag| tag.0)); Ok(()) },
631                        None => Err(TagNameCollision(info.full_path())),
632                    }
633                }
634            } else {
635                let inner = output.entry(name[0])
636                    .or_insert_with(|| Value::Object(JsonMap::default()));
637                match inner.as_object_mut() {
638                    Some(obj) => insert(info, obj, &name[1..], values),
639                    None => Err(TagNameCollision(info.full_path())),
640                }
641            }
642        }
643        for (info, values) in self.tags {
644            insert(info, &mut output, info.name, values)?;
645        }
646        Ok(Tagging(output))
647    }
648}
649
650pub fn load_tags_from_object(data: JsonMap) -> (FlatTags, Vec<(String, String)>) {
651
652    let mut accepted = FlatTags::default();
653    let mut rejected = vec![];
654
655    fn process(accepted: &mut FlatTags, rejected: &mut Vec<(String, String)>, path: &[&str], data: JsonMap) {
656        for (key, value) in data {
657            // build the label for the tag if it exists at this level of recursion 
658            let mut path = Vec::from(path);
659            path.push(&key);
660            let label = path.join(".");
661
662            // Try to use this tag label
663            if let Some(tag) = get_tag_information(&label) {
664                if let serde_json::Value::Array(values) = value {
665                    for value in values {
666                        match tag.processor.apply(value) {
667                            Ok(value) => accepted.entry(tag).or_default().push(TagValue(value)),
668                            Err(value) => rejected.push((label.to_string(), TagValue(value).to_string()))
669                        }
670                    }
671                } else {
672                    match tag.processor.apply(value) {
673                        Ok(value) => accepted.entry(tag).or_default().push(TagValue(value)),
674                        Err(value) => rejected.push((label.to_string(), TagValue(value).to_string()))
675                    }
676                }
677                continue
678            }
679
680            // if we couldn't use that tag label, try to recurse
681            if let serde_json::Value::Object(data) = value {
682                process(accepted, rejected, &path, data);
683            } else {
684                rejected.push((label.to_string(), TagValue(value).to_string()))
685            }
686        }
687    }
688
689    process(&mut accepted, &mut rejected, &[], data);
690
691    (accepted, rejected)
692}
693
694#[cfg(test)]
695pub fn load_tags_from_list(data: HashMap<String, Vec<serde_json::Value>>) -> (FlatTags, Vec<(String, String)>) {
696
697    let mut accepted = FlatTags::default();
698    let mut rejected = vec![];
699
700    for (name, values) in data {
701        match get_tag_information(&name) {
702            Some(info) => {
703                for value in values {
704                    match info.processor.apply(value) {
705                        Ok(value) => accepted.entry(info).or_default().push(TagValue(value)),
706                        Err(value) => rejected.push((name.to_string(), TagValue(value).to_string()))
707                    }
708                }
709            },
710            None => {
711                for value in values {
712                    rejected.push((name.to_string(), TagValue(value).to_string()))
713                }
714            }
715        }
716    }
717
718    (accepted, rejected)
719}
720
721// MARK: Tests
722
723#[test]
724fn tagging_forms_round_trip() {
725    use serde_json::json;
726
727    let input = json!({
728        "file": {
729            "behavior": ["hop", "skip", "jump"],
730            "rule": {
731                "service_abc": ["RULE.a10", "RULE.a11"],
732                "service_xyz": ["RULE.a10"]
733            }
734        },
735        "attribution": {
736            "actor": ["Randy"]
737        }
738    });
739    let serde_json::Value::Object(input) = input else { panic!() };
740
741    // convert input to flat tags
742    let (accepted, rejected) = load_tags_from_object(input);
743    assert!(rejected.is_empty());
744    assert_eq!(accepted.len(), 3);
745    assert_eq!(*accepted.get(get_tag_information("attribution.actor").unwrap()).unwrap(), vec![TagValue(json!("RANDY"))]);
746    assert_eq!(*accepted.get(get_tag_information("file.behavior").unwrap()).unwrap(), vec![TagValue(json!("hop")), TagValue(json!("skip")), TagValue(json!("jump"))]);
747    assert_eq!(*accepted.get(get_tag_information("file.rule").unwrap()).unwrap(), vec![TagValue(json!({"service_abc": ["RULE.a10", "RULE.a11"], "service_xyz": ["RULE.a10"]}))]);
748
749    // convert flat tags to nested data
750    let tagging = accepted.to_tagging().unwrap();
751
752    // convert nested data json
753    assert_eq!(serde_json::to_value(&tagging).unwrap(), json!({
754        "file": {
755            "behavior": ["hop", "skip", "jump"],
756            "rule": {
757                "service_abc": ["RULE.a10", "RULE.a11"],
758                "service_xyz": ["RULE.a10"]
759            }
760        },
761        "attribution": {
762            "actor": ["RANDY"]
763        }
764    }));
765
766    // convert nested data to a list of tags
767    let list = tagging.to_list(None).unwrap();
768    assert_eq!(list, vec![
769        TagEntry{ score: 0, tag_type: "attribution.actor".to_owned(), value: TagValue::from("RANDY") },
770        TagEntry{ score: 0, tag_type: "file.behavior".to_owned(), value: TagValue::from("hop") },
771        TagEntry{ score: 0, tag_type: "file.behavior".to_owned(), value: TagValue::from("skip") },
772        TagEntry{ score: 0, tag_type: "file.behavior".to_owned(), value: TagValue::from("jump") },
773        TagEntry{ score: 0, tag_type: "file.rule.service_abc".to_owned(), value: TagValue::from("RULE.a10") },
774        TagEntry{ score: 0, tag_type: "file.rule.service_abc".to_owned(), value: TagValue::from("RULE.a11") },
775        TagEntry{ score: 0, tag_type: "file.rule.service_xyz".to_owned(), value: TagValue::from("RULE.a10") },
776    ]);
777}
778
779
780/// test for invalid tag names
781#[test]
782fn tag_names() {
783    use serde_json::{Value, json};
784
785    // From a list
786    let mut input: HashMap<String, Vec<Value>> = HashMap::new();
787    input.insert("attribution.actor".to_string(), vec![json!("abc"), json!("Big hats!"), json!([]), json!(100), json!(Option::<()>::None)]);
788    input.insert("av.heuristic".to_string(), vec![json!("abc"), json!("Big hats!"), json!([]), json!(100), json!(Option::<()>::None)]);
789    input.insert("av.heuristic.".to_string(), vec![json!("100000")]);
790    input.insert(".av.heuristic".to_string(), vec![json!("100000")]);
791    input.insert("av".to_string(), vec![json!("100000")]);
792    // input.insert("network.tls.ja3_hash".to_string(), vec![json!("abc"), json!("Big hats!"), json!([]), json!(100), json!(None)]);
793
794    let (accepted, mut rejected) = load_tags_from_list(input);
795
796    assert_eq!(accepted.len(), 2);
797    assert_eq!(rejected.len(), 7);
798
799    assert_eq!(*accepted.get(get_tag_information("attribution.actor").unwrap()).unwrap(), vec![TagValue(json!("ABC")), TagValue(json!("BIG HATS!")), TagValue(json!("100"))]);
800    assert_eq!(*accepted.get(get_tag_information("av.heuristic").unwrap()).unwrap(), vec![TagValue(json!("abc")), TagValue(json!("Big hats!")), TagValue(json!("100"))]);
801
802    rejected.sort_unstable();
803    assert_eq!(rejected, vec![
804        (".av.heuristic".to_string(), "100000".to_string()),
805        ("attribution.actor".to_string(), "[]".to_string()),
806        ("attribution.actor".to_string(), "null".to_string()),
807        ("av".to_string(), "100000".to_string()),
808        ("av.heuristic".to_string(), "[]".to_string()),
809        ("av.heuristic".to_string(), "null".to_string()),
810        ("av.heuristic.".to_string(), "100000".to_string()),
811    ]);
812
813    // from a dictionary
814    let input = json!({
815        "attribution": {
816            "actor": ["abc", "Big hats!", [], 100, null]
817        },
818        "av": {
819            "heuristic": ["abc", "Big hats!", [], 100, null]
820        },
821        "cert": 100000,
822        "dynamic": ["abc", "Big hats!"]
823    });
824    let serde_json::Value::Object(input) = input else { panic!() };
825
826    let (accepted, mut rejected) = load_tags_from_object(input);
827
828    assert_eq!(accepted.len(), 2);
829    assert_eq!(rejected.len(), 6);
830
831    assert_eq!(*accepted.get(get_tag_information("attribution.actor").unwrap()).unwrap(), vec![TagValue(json!("ABC")), TagValue(json!("BIG HATS!")), TagValue(json!("100"))]);
832    assert_eq!(*accepted.get(get_tag_information("av.heuristic").unwrap()).unwrap(), vec![TagValue(json!("abc")), TagValue(json!("Big hats!")), TagValue(json!("100"))]);
833
834    rejected.sort_unstable();
835    assert_eq!(rejected, vec![
836        ("attribution.actor".to_string(), "[]".to_string()),
837        ("attribution.actor".to_string(), "null".to_string()),
838        ("av.heuristic".to_string(), "[]".to_string()),
839        ("av.heuristic".to_string(), "null".to_string()),
840        ("cert".to_string(), "100000".to_string()),
841        ("dynamic".to_string(), r#"["abc","Big hats!"]"#.to_string()),
842    ]);
843
844}
845
846
847/// Test parsing basic strings and uppercased strings
848#[test]
849fn string_tag_parsing() {
850    use serde_json::{Value, json};
851
852    let proc = TagProcessor::String;
853    assert_eq!(proc.apply(json!("abc")), Ok(json!("abc")));
854    assert_eq!(proc.apply(json!("Big Hats!")), Ok(json!("Big Hats!")));
855    assert_eq!(proc.apply(json!([])), Err(json!([])));
856    assert_eq!(proc.apply(json!(100)), Ok(json!("100")));
857    assert_eq!(proc.apply(Value::Null), Err(Value::Null));
858
859    let proc = TagProcessor::Lowercase;
860    assert_eq!(proc.apply(json!("abc")), Ok(json!("abc")));
861    assert_eq!(proc.apply(json!("Big Hats!")), Ok(json!("big hats!")));
862    assert_eq!(proc.apply(json!([])), Err(json!([])));
863    assert_eq!(proc.apply(json!(100)), Ok(json!("100")));
864    assert_eq!(proc.apply(Value::Null), Err(Value::Null));
865
866    let proc = TagProcessor::Uppercase;
867    assert_eq!(proc.apply(json!("abc")), Ok(json!("ABC")));
868    assert_eq!(proc.apply(json!("Big Hats!")), Ok(json!("BIG HATS!")));
869    assert_eq!(proc.apply(json!([])), Err(json!([])));
870    assert_eq!(proc.apply(json!(100)), Ok(json!("100")));
871    assert_eq!(proc.apply(Value::Null), Err(Value::Null));
872
873}
874
875#[test]
876fn hash_tag_parsing() {
877    use serde_json::json;
878    
879    let proc = TagProcessor::MD5;
880    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Ok(json!("00000000000000000000000000000000")));
881    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
882    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
883    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
884    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
885    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
886    
887    let proc = TagProcessor::Sha1;
888    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
889    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Ok(json!("0000000000000000000000000000000000000000")));
890    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
891    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
892    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
893    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
894
895    let proc = TagProcessor::Sha256;
896    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
897    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
898    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Ok(json!("0000000000000000000000000000000000000000000000000000000000000000")));
899    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
900    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
901    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
902
903    let proc = TagProcessor::SSDeepHash;
904    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
905    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
906    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
907    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Ok(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
908    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Err(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
909    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
910
911    let proc = TagProcessor::JA4;
912    assert_eq!(proc.apply(json!("00000000000000000000000000000000")), Err(json!("00000000000000000000000000000000")));
913    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000")));
914    assert_eq!(proc.apply(json!("0000000000000000000000000000000000000000000000000000000000000000")), Err(json!("0000000000000000000000000000000000000000000000000000000000000000")));
915    assert_eq!(proc.apply(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")), Err(json!("24576:c+bnyhC57zhu0Nbs2p/ojPgZmAnShaLOZHzYX20:zwQB3bN/MkNbOZS20")));
916    assert_eq!(proc.apply(json!("t13d1517h2_8daaf6152771_b0da82dd1658")), Ok(json!("t13d1517h2_8daaf6152771_b0da82dd1658")));
917    assert_eq!(proc.apply(json!(999)), Err(json!(999)));
918
919}
920
921#[test]
922fn network_tag_parsing() {
923    use serde_json::json;
924
925    let proc = TagProcessor::Domain;
926    assert_eq!(proc.apply(json!("www.google.com")), Ok(json!("www.google.com")));
927    assert_eq!(proc.apply(json!("www.GooGle.com")), Ok(json!("www.google.com")));
928    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
929    assert_eq!(proc.apply(json!("")), Err(json!("")));
930
931    let proc = TagProcessor::IpAddress;
932    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
933    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
934    assert_eq!(proc.apply(json!("172.0.0.1")), Ok(json!("172.0.0.1")));
935    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9abc")), Ok(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
936    
937    let proc = TagProcessor::UNCPath;
938    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
939    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
940    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
941    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
942    assert_eq!(proc.apply(json!(r"\\ComputerName\SharedFolder\Resource")), Ok(json!(r"\\ComputerName\SharedFolder\Resource")));
943    assert_eq!(proc.apply(json!(r"\\hostname@SSL@100\SharedFolder\Resource")), Ok(json!(r"\\hostname@SSL@100\SharedFolder\Resource")));
944 
945    let proc = TagProcessor::Uri;
946    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
947    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
948    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9abc")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9abc")));
949    assert_eq!(proc.apply(json!("method://172.0.0.1")), Ok(json!("method://172.0.0.1")));
950    assert_eq!(proc.apply(json!("s3://1234:5678:9ABC:0000:0000:1234:5678:9abc")), Ok(json!("s3://1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
951    assert_eq!(proc.apply(json!("https://www.google.com")), Ok(json!("https://www.google.com")));
952    assert_eq!(proc.apply(json!("https://www.GooGle.com/hellow?x=100&x=red")), Ok(json!("https://www.google.com/hellow?x=100&x=red")));
953    assert_eq!(proc.apply(json!("https://172.0.0.1")), Ok(json!("https://172.0.0.1")));
954    assert_eq!(proc.apply(json!("HTTPS://172.0.0.1/path/woith%20/components")), Ok(json!("HTTPS://172.0.0.1/path/woith%20/components")));
955    assert_eq!(proc.apply(json!("ftp://1234:5678:9ABC:0000:0000:1234:5678:9abc")), Ok(json!("ftp://1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
956
957    let proc = TagProcessor::UriPath;
958    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
959    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
960    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
961    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
962    assert_eq!(proc.apply(json!(r"\\ComputerName\SharedFolder\Resource")), Err(json!(r"\\ComputerName\SharedFolder\Resource")));
963    assert_eq!(proc.apply(json!(r"\\hostname@SSL@100\SharedFolder\Resource")), Err(json!(r"\\hostname@SSL@100\SharedFolder\Resource")));
964    assert_eq!(proc.apply(json!(r"/path%20/words1/")), Ok(json!(r"/path%20/words1/")));
965
966
967    let proc = TagProcessor::EmailAddress;
968    assert_eq!(proc.apply(json!("www.google.com")), Err(json!("www.google.com")));
969    assert_eq!(proc.apply(json!("www.GooGle.com")), Err(json!("www.GooGle.com")));
970    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
971    assert_eq!(proc.apply(json!("user@www.google.com")), Ok(json!("user@www.google.com")));
972    assert_eq!(proc.apply(json!("user@www.GooGle.com")), Ok(json!("user@www.google.com")));
973    assert_eq!(proc.apply(json!("user@172.0.0.1")), Err(json!("user@172.0.0.1")));
974    assert_eq!(proc.apply(json!("john.doe@cyber.gc.ca")), Ok(json!("john.doe@cyber.gc.ca")));
975    
976
977    let proc = TagProcessor::Mac;
978    assert_eq!(proc.apply(json!("172.0.0.1")), Err(json!("172.0.0.1")));
979    assert_eq!(proc.apply(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")), Err(json!("1234:5678:9ABC:0000:0000:1234:5678:9ABC")));
980    assert_eq!(proc.apply(json!("00:1b:63:84:45:e6")), Ok(json!("00:1b:63:84:45:e6")));
981    assert_eq!(proc.apply(json!("00-1B-63-84-45-E6")), Ok(json!("00-1b-63-84-45-e6")));
982
983}
984
985
986#[test]
987fn misc_tag_parsing() {
988    use serde_json::json;
989
990    let proc = TagProcessor::PhoneNumber;
991    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
992    assert_eq!(proc.apply(json!([])), Err(json!([])));
993    assert_eq!(proc.apply(json!(100)), Err(json!(100)));
994    // algerian phone number
995    // assert_eq!(proc.apply(json!("+213 21 55 55 55")), Err(json!("")));
996    assert_eq!(proc.apply(json!("+1 100 100 1000")), Ok(json!("+1 100 100 1000")));
997    // assert_eq!(proc.apply(json!("+61 5555 5555")), Ok(json!("+61 5555 5555")));
998
999    let proc = TagProcessor::RuleMapping;
1000    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1001    assert_eq!(proc.apply(json!([])), Err(json!([])));
1002    assert_eq!(proc.apply(json!(100)), Err(json!(100)));
1003    assert_eq!(proc.apply(json!({})), Ok(json!({})));
1004    assert_eq!(proc.apply(json!({"123": {}})), Err(json!({"123": {}})));
1005    assert_eq!(proc.apply(json!({"123": []})), Ok(json!({"123": []})));
1006    assert_eq!(proc.apply(json!({"123": [[], "abc", 12]})), Ok(json!({"123": ["[]", "abc", "12"]})));
1007}
1008
1009#[test]
1010fn number_tag_parsing() {
1011    use serde_json::{Value, json};
1012
1013    let proc = TagProcessor::I32;
1014    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1015    assert_eq!(proc.apply(json!([])), Err(json!([])));
1016    assert_eq!(proc.apply(json!(100)), Ok(json!(100)));
1017    assert_eq!(proc.apply(json!(-100)), Ok(json!(-100)));
1018    assert_eq!(proc.apply(json!(Option::<()>::None)), Err(Value::Null));
1019    assert_eq!(proc.apply(json!("55")), Ok(json!(55)));
1020    assert_eq!(proc.apply(json!(10_000_000_000u64)), Err(json!("10000000000")));
1021
1022    let proc = TagProcessor::U16;
1023    assert_eq!(proc.apply(json!("abc")), Err(json!("abc")));
1024    assert_eq!(proc.apply(json!([])), Err(json!([])));
1025    assert_eq!(proc.apply(json!(100)), Ok(json!(100)));
1026    assert_eq!(proc.apply(json!(-100)), Err(json!("-100")));
1027    assert_eq!(proc.apply(json!(Option::<()>::None)), Err(Value::Null));
1028    assert_eq!(proc.apply(json!("55")), Ok(json!(55)));
1029    assert_eq!(proc.apply(json!(1_000_000)), Err(json!("1000000")));
1030
1031}