assemblyline_models/
lib.rs

1use std::fmt::Display;
2use std::str::FromStr;
3
4use serde::{Serialize, Deserialize};
5use serde_with::{SerializeDisplay, DeserializeFromStr};
6use struct_metadata::Described;
7
8pub mod datastore;
9pub mod config;
10pub mod messages;
11pub mod serialize;
12pub mod meta;
13pub mod types;
14
15pub use meta::ElasticMeta;
16pub use types::MD5;
17pub use types::Sha1;
18pub use types::classification::{ClassificationString, ExpandingClassification, disable_global_classification};
19
20pub const HEXCHARS: [char; 16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'];
21
22pub trait Readable: for <'de> Deserialize<'de> {
23    fn set_from_archive(&mut self, from_archive: bool);
24}
25
26impl Readable for JsonMap {
27    fn set_from_archive(&mut self, from_archive: bool) {
28        self.insert("from_json".to_owned(), serde_json::json!(from_archive));
29    }
30}
31
32#[derive(Debug)]
33pub enum ModelError {
34    InvalidSha256(String),
35    InvalidMd5(String),
36    InvalidSha1(String),
37    InvalidSid(String),
38    InvalidSSDeep(String),
39    ClassificationNotInitialized,
40    InvalidClassification(Option<assemblyline_markings::errors::Errors>),
41}
42
43impl Display for ModelError {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        match self {
46            ModelError::InvalidSha256(content) => f.write_fmt(format_args!("Invalid value provided for a sha256: {content}")),
47            ModelError::InvalidMd5(content) => f.write_fmt(format_args!("Invalid value provided for a md5: {content}")),
48            ModelError::InvalidSha1(content) => f.write_fmt(format_args!("Invalid value provided for a sha1: {content}")),
49            ModelError::InvalidSid(content) => f.write_fmt(format_args!("Invalid value provided for a sid: {content}")),
50            ModelError::ClassificationNotInitialized => f.write_str("The classification engine has not been initialized."),
51            ModelError::InvalidClassification(_) => f.write_str("An invalid classification string was provided."),
52            ModelError::InvalidSSDeep(content) =>  f.write_fmt(format_args!("Invalid value provided for a ssdeep hash: {content}")),
53        }
54    }
55}
56
57impl From<base62::DecodeError> for ModelError {
58    fn from(value: base62::DecodeError) -> Self {
59        Self::InvalidSid(value.to_string())
60    }
61}
62
63impl From<assemblyline_markings::errors::Errors> for ModelError {
64    fn from(value: assemblyline_markings::errors::Errors) -> Self {
65        Self::InvalidClassification(Some(value))
66    }
67}
68
69impl std::error::Error for ModelError {}
70
71/// Short name for serde json's basic map type
72pub type JsonMap = serde_json::Map<String, serde_json::Value>;
73
74/// sha256 hash of a file
75#[derive(Debug, SerializeDisplay, DeserializeFromStr, Described, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
76#[metadata(normalizer="lowercase_normalizer")]
77#[metadata_type(ElasticMeta)]
78pub struct Sha256(String);
79
80// impl Described<ElasticMeta> for internment::ArcIntern<String> {
81//     fn metadata() -> struct_metadata::Descriptor<ElasticMeta> {
82//         String::metadata()
83//     }
84// }
85
86impl std::fmt::Display for Sha256 {
87    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88        f.write_str(&self.0)
89    }
90}
91
92impl std::ops::Deref for Sha256 {
93    type Target = str;
94
95    fn deref(&self) -> &Self::Target {
96        &self.0
97    }
98}
99
100impl FromStr for Sha256 {
101    type Err = ModelError;
102
103    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
104        let hex = s.trim().to_ascii_lowercase();
105        if hex.len() != 64 || !hex.chars().all(|c|c.is_ascii_hexdigit()) {
106            return Err(ModelError::InvalidSha256(hex))
107        }
108        Ok(Sha256(hex))
109    }
110}
111
112impl TryFrom<&[u8]> for Sha256 {
113    type Error = ModelError;
114
115    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
116        Self::from_str(&hex::encode(value))
117    }
118}
119
120#[cfg(feature = "rand")]
121pub fn random_hex<R: rand::prelude::Rng + ?Sized>(rng: &mut R, size: usize) -> String {
122    let mut buffer = String::with_capacity(size);
123    for _ in 0..size {
124        let index = rng.random_range(0..HEXCHARS.len());
125        buffer.push(HEXCHARS[index]);
126    }
127    buffer
128}
129
130#[cfg(feature = "rand")]
131impl rand::distr::Distribution<Sha256> for rand::distr::StandardUniform {
132    fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> Sha256 {
133        Sha256(random_hex(rng, 64))
134    }
135}
136
137/// Validated uuid type with base62 encoding
138#[derive(SerializeDisplay, DeserializeFromStr, Debug, Described, Hash, PartialEq, Eq, Clone, Copy)]
139#[metadata_type(ElasticMeta)]
140#[metadata(mapping="keyword")]
141pub struct Sid(u128);
142
143impl std::fmt::Display for Sid {
144    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
145        f.write_str(&base62::encode(self.0))
146    }
147}
148
149impl std::str::FromStr for Sid {
150    type Err = ModelError;
151
152    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
153        Ok(Sid(base62::decode(s)?))
154    }
155}
156
157impl Sid {
158    pub fn assign(&self, bins: usize) -> usize {
159        (self.0 % bins as u128) as usize
160    }
161}
162
163#[cfg(feature = "rand")]
164impl rand::distr::Distribution<Sid> for rand::distr::StandardUniform {
165    fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> Sid {
166        Sid(rng.random())
167    }
168}
169
170#[derive(Serialize, Deserialize, Described, PartialEq, Eq, Debug, Clone, Default)]
171#[metadata_type(ElasticMeta)]
172#[metadata(mapping="text")]
173pub struct Text(pub String);
174
175impl From<&str> for Text {
176    fn from(value: &str) -> Self {
177        Self(value.to_owned())
178    }
179}
180
181impl From<String> for Text {
182    fn from(value: String) -> Self {
183        Self(value)
184    }
185}
186
187impl From<Text> for String {
188    fn from(value: Text) -> String {
189        value.0
190    }
191}
192
193impl std::fmt::Display for Text {
194    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195        f.write_str(&self.0)
196    }
197}
198
199impl Text {
200    pub fn as_str(&self) -> &str {
201        self.0.as_str()
202    }
203}
204
205/// Unvalidated uuid type
206pub type Uuid = String;
207
208/// Unvalidated domain type
209pub type Domain = String;
210
211/// Unvalidated uri type
212pub type Uri = String;
213
214/// Unvalidated platform type
215pub type Platform = String;
216
217/// Unvalidated processor type
218pub type Processor = String;
219
220/// Validated ssdeep type
221#[derive(SerializeDisplay, DeserializeFromStr, Described, PartialEq, Eq, Debug, Clone)]
222#[metadata_type(ElasticMeta)]
223#[metadata(mapping="text", analyzer="text_fuzzy")]
224pub struct SSDeepHash(String);
225
226impl std::fmt::Display for SSDeepHash {
227    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228        f.write_str(&self.0)
229    }
230}
231
232pub fn is_ssdeep_char(value: char) -> bool {
233    value.is_ascii_alphanumeric() || value == '/' || value == '+'
234}
235
236impl std::str::FromStr for SSDeepHash {
237    type Err = ModelError;
238
239    fn from_str(s: &str) -> Result<Self, Self::Err> {
240        // SSDEEP_REGEX = r"^[0-9]{1,18}:[a-zA-Z0-9/+]{0,64}:[a-zA-Z0-9/+]{0,64}$"
241        let (numbers, hashes) = s.split_once(":").ok_or_else(||ModelError::InvalidSSDeep(s.to_owned()))?;
242        let (hasha, hashb) = hashes.split_once(":").ok_or_else(||ModelError::InvalidSSDeep(s.to_owned()))?;
243        if numbers.is_empty() || numbers.len() > 18 || numbers.chars().any(|c|!c.is_ascii_digit()) {
244            return Err(ModelError::InvalidSSDeep(s.to_owned()))
245        }
246        if hasha.len() > 64 || hasha.chars().any(|c|!is_ssdeep_char(c)) {
247            return Err(ModelError::InvalidSSDeep(s.to_owned()))
248        }
249        if hashb.len() > 64 || hashb.chars().any(|c|!is_ssdeep_char(c)) {
250            return Err(ModelError::InvalidSSDeep(s.to_owned()))
251        }
252        Ok(SSDeepHash(s.to_owned()))
253    }
254}
255
256#[cfg(feature = "rand")]
257impl rand::distr::Distribution<SSDeepHash> for rand::distr::StandardUniform {
258    fn sample<R: rand::prelude::Rng + ?Sized>(&self, rng: &mut R) -> SSDeepHash {
259        use rand::distr::{Alphanumeric, SampleString};
260        let mut output = String::new();
261        output += &rng.random_range(0..10000).to_string();
262        output += ":";
263        let len = rng.random_range(0..64);
264        output += &Alphanumeric.sample_string(rng, len);
265        output += ":";
266        let len = rng.random_range(0..64);
267        output += &Alphanumeric.sample_string(rng, len);
268        SSDeepHash(output)
269    }
270}
271
272/// Unvalidated phone number type
273pub type PhoneNumber = String;
274
275/// Unvalidated MAC type
276pub type Mac = String;
277
278/// Unvalidated UNCPath type
279pub type UNCPath = String;
280
281/// Unvalidated UriPath type
282pub type UriPath = String;
283
284/// Unvalidated Email type
285pub type Email = String;
286
287const WORDS: [&str; 187] = ["The", "Cyber", "Centre", "stays", "on", "the", "cutting", "edge", "of", "technology", "by", 
288    "working", "with", "commercial", "vendors", "of", "cyber", "security", "technology", "to", "support", "their", 
289    "development", "of", "enhanced", "cyber", "defence", "tools", "To", "do", "this", "our", "experts", "survey", 
290    "the", "cyber", "security", "market", "evaluate", "emerging", "technologies", "in", "order", "to", "determine", 
291    "their", "potential", "to", "improve", "cyber", "security", "across", "the", "country", "The", "Cyber", "Centre", 
292    "supports", "innovation", "by", "collaborating", "with", "all", "levels", "of", "government", "private", "industry", 
293    "academia", "to", "examine", "complex", "problems", "in", "cyber", "security", "We", "are", "constantly", 
294    "engaging", "partners", "to", "promote", "an", "open", "innovative", "environment", "We", "invite", "partners", 
295    "to", "work", "with", "us", "but", "also", "promote", "other", "Government", "of", "Canada", "innovation", 
296    "programs", "One", "of", "our", "key", "partnerships", "is", "with", "the", "Government", "of", "Canada", "Build", 
297    "in", "Canada", "Innovation", "Program", "BCIP", "The", "BCIP", "helps", "Canadian", "companies", "of", "all", 
298    "sizes", "transition", "their", "state", "of", "the", "art", "goods", "services", "from", "the", "laboratory", 
299    "to", "the", "marketplace", "For", "certain", "cyber", "security", "innovations", "the", "Cyber", "Centre", 
300    "performs", "the", "role", "of", "technical", "authority", "We", "evaluate", "participating", "companies", 
301    "new", "technology", "provide", "feedback", "in", "order", "to", "assist", "them", "in", "bringing", "their", 
302    "product", "to", "market", "To", "learn", "more", "about", "selling", "testing", "an", "innovation", "visit", 
303    "the", "BCIP", "website"];
304
305#[cfg(feature = "rand")]
306pub fn random_word<R: rand::Rng + ?Sized>(prng: &mut R) -> String {
307    WORDS[prng.random_range(0..WORDS.len())].to_string()
308}
309
310#[cfg(feature = "rand")]
311pub fn random_words<R: rand::Rng + ?Sized>(prng: &mut R, count: usize) -> Vec<String> {
312    let mut output = vec![];
313    while output.len() < count {
314        output.push(WORDS[prng.random_range(0..WORDS.len())].to_string())
315    }
316    output
317}
318
319
320#[cfg(test)]
321mod test {
322    use rand::Rng;
323
324    use crate::{SSDeepHash, Sha1, Sha256, MD5};
325    
326    #[test]
327    fn random_ssdeep() {
328        let mut prng = rand::rng();
329        for _ in 0..100 {
330            let hash: SSDeepHash = prng.random();
331            assert_eq!(hash, hash.to_string().parse().unwrap());
332        }
333    }
334
335    #[test]
336    fn random_sha256() {
337        let mut prng = rand::rng();
338        for _ in 0..100 {
339            let hash: Sha256 = prng.random();
340            assert_eq!(hash, hash.to_string().parse().unwrap());
341        }
342    }
343
344    #[test]
345    fn random_sha1() {
346        let mut prng = rand::rng();
347        for _ in 0..100 {
348            let hash: Sha1 = prng.random();
349            assert_eq!(hash, hash.to_string().parse().unwrap());
350        }
351    }
352
353    #[test]
354    fn random_md5() {
355        let mut prng = rand::rng();
356        for _ in 0..100 {
357            let hash: MD5 = prng.random();
358            assert_eq!(hash, hash.to_string().parse().unwrap());
359        }
360    }
361}
362