1use std::fmt::Display;
2use std::str::FromStr;
3
4use serde::{Serialize, Deserialize};
5use serde_with::{SerializeDisplay, DeserializeFromStr};
6use struct_metadata::Described;
7
8pub mod datastore;
9pub mod config;
10pub mod messages;
11pub mod serialize;
12pub mod meta;
13pub mod types;
14
15pub use meta::ElasticMeta;
16pub use types::MD5;
17pub use types::Sha1;
18pub use types::classification::{ClassificationString, ExpandingClassification, disable_global_classification};
19
20pub const HEXCHARS: [char; 16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'];
21
22pub trait Readable: for <'de> Deserialize<'de> {
23 fn set_from_archive(&mut self, from_archive: bool);
24}
25
26impl Readable for JsonMap {
27 fn set_from_archive(&mut self, from_archive: bool) {
28 self.insert("from_json".to_owned(), serde_json::json!(from_archive));
29 }
30}
31
32#[derive(Debug)]
33pub enum ModelError {
34 InvalidSha256(String),
35 InvalidMd5(String),
36 InvalidSha1(String),
37 InvalidSid(String),
38 InvalidSSDeep(String),
39 ClassificationNotInitialized,
40 InvalidClassification(Option<assemblyline_markings::errors::Errors>),
41}
42
43impl Display for ModelError {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 match self {
46 ModelError::InvalidSha256(content) => f.write_fmt(format_args!("Invalid value provided for a sha256: {content}")),
47 ModelError::InvalidMd5(content) => f.write_fmt(format_args!("Invalid value provided for a md5: {content}")),
48 ModelError::InvalidSha1(content) => f.write_fmt(format_args!("Invalid value provided for a sha1: {content}")),
49 ModelError::InvalidSid(content) => f.write_fmt(format_args!("Invalid value provided for a sid: {content}")),
50 ModelError::ClassificationNotInitialized => f.write_str("The classification engine has not been initialized."),
51 ModelError::InvalidClassification(_) => f.write_str("An invalid classification string was provided."),
52 ModelError::InvalidSSDeep(content) => f.write_fmt(format_args!("Invalid value provided for a ssdeep hash: {content}")),
53 }
54 }
55}
56
57impl From<base62::DecodeError> for ModelError {
58 fn from(value: base62::DecodeError) -> Self {
59 Self::InvalidSid(value.to_string())
60 }
61}
62
63impl From<assemblyline_markings::errors::Errors> for ModelError {
64 fn from(value: assemblyline_markings::errors::Errors) -> Self {
65 Self::InvalidClassification(Some(value))
66 }
67}
68
69impl std::error::Error for ModelError {}
70
71pub type JsonMap = serde_json::Map<String, serde_json::Value>;
73
74#[derive(Debug, SerializeDisplay, DeserializeFromStr, Described, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
76#[metadata(normalizer="lowercase_normalizer")]
77#[metadata_type(ElasticMeta)]
78pub struct Sha256(String);
79
80impl std::fmt::Display for Sha256 {
87 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88 f.write_str(&self.0)
89 }
90}
91
92impl std::ops::Deref for Sha256 {
93 type Target = str;
94
95 fn deref(&self) -> &Self::Target {
96 &self.0
97 }
98}
99
100impl FromStr for Sha256 {
101 type Err = ModelError;
102
103 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
104 let hex = s.trim().to_ascii_lowercase();
105 if hex.len() != 64 || !hex.chars().all(|c|c.is_ascii_hexdigit()) {
106 return Err(ModelError::InvalidSha256(hex))
107 }
108 Ok(Sha256(hex))
109 }
110}
111
112impl TryFrom<&[u8]> for Sha256 {
113 type Error = ModelError;
114
115 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
116 Self::from_str(&hex::encode(value))
117 }
118}
119
120#[cfg(feature = "rand")]
121pub fn random_hex<R: rand::prelude::Rng + ?Sized>(rng: &mut R, size: usize) -> String {
122 let mut buffer = String::with_capacity(size);
123 for _ in 0..size {
124 let index = rng.random_range(0..HEXCHARS.len());
125 buffer.push(HEXCHARS[index]);
126 }
127 buffer
128}
129
130#[cfg(feature = "rand")]
131impl rand::distr::Distribution<Sha256> for rand::distr::StandardUniform {
132 fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> Sha256 {
133 Sha256(random_hex(rng, 64))
134 }
135}
136
137#[derive(SerializeDisplay, DeserializeFromStr, Debug, Described, Hash, PartialEq, Eq, Clone, Copy)]
139#[metadata_type(ElasticMeta)]
140#[metadata(mapping="keyword")]
141pub struct Sid(u128);
142
143impl std::fmt::Display for Sid {
144 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145 f.write_str(&base62::encode(self.0))
146 }
147}
148
149impl std::str::FromStr for Sid {
150 type Err = ModelError;
151
152 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
153 Ok(Sid(base62::decode(s)?))
154 }
155}
156
157impl Sid {
158 pub fn assign(&self, bins: usize) -> usize {
159 (self.0 % bins as u128) as usize
160 }
161}
162
163#[cfg(feature = "rand")]
164impl rand::distr::Distribution<Sid> for rand::distr::StandardUniform {
165 fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> Sid {
166 Sid(rng.random())
167 }
168}
169
170#[derive(Serialize, Deserialize, Described, PartialEq, Eq, Debug, Clone, Default)]
171#[metadata_type(ElasticMeta)]
172#[metadata(mapping="text")]
173pub struct Text(pub String);
174
175impl From<&str> for Text {
176 fn from(value: &str) -> Self {
177 Self(value.to_owned())
178 }
179}
180
181impl From<String> for Text {
182 fn from(value: String) -> Self {
183 Self(value)
184 }
185}
186
187impl From<Text> for String {
188 fn from(value: Text) -> String {
189 value.0
190 }
191}
192
193impl std::fmt::Display for Text {
194 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195 f.write_str(&self.0)
196 }
197}
198
199impl Text {
200 pub fn as_str(&self) -> &str {
201 self.0.as_str()
202 }
203}
204
205pub type Uuid = String;
207
208pub type Domain = String;
210
211pub type Uri = String;
213
214pub type Platform = String;
216
217pub type Processor = String;
219
220#[derive(SerializeDisplay, DeserializeFromStr, Described, PartialEq, Eq, Debug, Clone)]
222#[metadata_type(ElasticMeta)]
223#[metadata(mapping="text", analyzer="text_fuzzy")]
224pub struct SSDeepHash(String);
225
226impl std::fmt::Display for SSDeepHash {
227 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228 f.write_str(&self.0)
229 }
230}
231
232pub fn is_ssdeep_char(value: char) -> bool {
233 value.is_ascii_alphanumeric() || value == '/' || value == '+'
234}
235
236impl std::str::FromStr for SSDeepHash {
237 type Err = ModelError;
238
239 fn from_str(s: &str) -> Result<Self, Self::Err> {
240 let (numbers, hashes) = s.split_once(":").ok_or_else(||ModelError::InvalidSSDeep(s.to_owned()))?;
242 let (hasha, hashb) = hashes.split_once(":").ok_or_else(||ModelError::InvalidSSDeep(s.to_owned()))?;
243 if numbers.is_empty() || numbers.len() > 18 || numbers.chars().any(|c|!c.is_ascii_digit()) {
244 return Err(ModelError::InvalidSSDeep(s.to_owned()))
245 }
246 if hasha.len() > 64 || hasha.chars().any(|c|!is_ssdeep_char(c)) {
247 return Err(ModelError::InvalidSSDeep(s.to_owned()))
248 }
249 if hashb.len() > 64 || hashb.chars().any(|c|!is_ssdeep_char(c)) {
250 return Err(ModelError::InvalidSSDeep(s.to_owned()))
251 }
252 Ok(SSDeepHash(s.to_owned()))
253 }
254}
255
256#[cfg(feature = "rand")]
257impl rand::distr::Distribution<SSDeepHash> for rand::distr::StandardUniform {
258 fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> SSDeepHash {
259 use rand::distr::{Alphanumeric, SampleString};
260 let mut output = String::new();
261 output += &rng.random_range(0..10000).to_string();
262 output += ":";
263 let len = rng.random_range(0..64);
264 output += &Alphanumeric.sample_string(rng, len);
265 output += ":";
266 let len = rng.random_range(0..64);
267 output += &Alphanumeric.sample_string(rng, len);
268 SSDeepHash(output)
269 }
270}
271
272pub type PhoneNumber = String;
274
275pub type Mac = String;
277
278pub type UNCPath = String;
280
281pub type UriPath = String;
283
284pub type Email = String;
286
287const WORDS: [&str; 187] = ["The", "Cyber", "Centre", "stays", "on", "the", "cutting", "edge", "of", "technology", "by",
288 "working", "with", "commercial", "vendors", "of", "cyber", "security", "technology", "to", "support", "their",
289 "development", "of", "enhanced", "cyber", "defence", "tools", "To", "do", "this", "our", "experts", "survey",
290 "the", "cyber", "security", "market", "evaluate", "emerging", "technologies", "in", "order", "to", "determine",
291 "their", "potential", "to", "improve", "cyber", "security", "across", "the", "country", "The", "Cyber", "Centre",
292 "supports", "innovation", "by", "collaborating", "with", "all", "levels", "of", "government", "private", "industry",
293 "academia", "to", "examine", "complex", "problems", "in", "cyber", "security", "We", "are", "constantly",
294 "engaging", "partners", "to", "promote", "an", "open", "innovative", "environment", "We", "invite", "partners",
295 "to", "work", "with", "us", "but", "also", "promote", "other", "Government", "of", "Canada", "innovation",
296 "programs", "One", "of", "our", "key", "partnerships", "is", "with", "the", "Government", "of", "Canada", "Build",
297 "in", "Canada", "Innovation", "Program", "BCIP", "The", "BCIP", "helps", "Canadian", "companies", "of", "all",
298 "sizes", "transition", "their", "state", "of", "the", "art", "goods", "services", "from", "the", "laboratory",
299 "to", "the", "marketplace", "For", "certain", "cyber", "security", "innovations", "the", "Cyber", "Centre",
300 "performs", "the", "role", "of", "technical", "authority", "We", "evaluate", "participating", "companies",
301 "new", "technology", "provide", "feedback", "in", "order", "to", "assist", "them", "in", "bringing", "their",
302 "product", "to", "market", "To", "learn", "more", "about", "selling", "testing", "an", "innovation", "visit",
303 "the", "BCIP", "website"];
304
305#[cfg(feature = "rand")]
306pub fn random_word<R: rand::Rng + ?Sized>(prng: &mut R) -> String {
307 WORDS[prng.random_range(0..WORDS.len())].to_string()
308}
309
310#[cfg(feature = "rand")]
311pub fn random_words<R: rand::Rng + ?Sized>(prng: &mut R, count: usize) -> Vec<String> {
312 let mut output = vec![];
313 while output.len() < count {
314 output.push(WORDS[prng.random_range(0..WORDS.len())].to_string())
315 }
316 output
317}
318
319
320#[cfg(test)]
321mod test {
322 use rand::Rng;
323
324 use crate::{SSDeepHash, Sha1, Sha256, MD5};
325
326 #[test]
327 fn random_ssdeep() {
328 let mut prng = rand::rng();
329 for _ in 0..100 {
330 let hash: SSDeepHash = prng.random();
331 assert_eq!(hash, hash.to_string().parse().unwrap());
332 }
333 }
334
335 #[test]
336 fn random_sha256() {
337 let mut prng = rand::rng();
338 for _ in 0..100 {
339 let hash: Sha256 = prng.random();
340 assert_eq!(hash, hash.to_string().parse().unwrap());
341 }
342 }
343
344 #[test]
345 fn random_sha1() {
346 let mut prng = rand::rng();
347 for _ in 0..100 {
348 let hash: Sha1 = prng.random();
349 assert_eq!(hash, hash.to_string().parse().unwrap());
350 }
351 }
352
353 #[test]
354 fn random_md5() {
355 let mut prng = rand::rng();
356 for _ in 0..100 {
357 let hash: MD5 = prng.random();
358 assert_eq!(hash, hash.to_string().parse().unwrap());
359 }
360 }
361}
362