1use bitcoin::{
4 is_valid_bitcoin_p2pkh_address, is_valid_bitcoin_p2sh_address, is_valid_bitcoin_p2wpkh_address,
5 is_valid_bitcoin_p2wsh_address, is_valid_litecoin_p2wpkh_address,
6};
7use helpers::{
8 bytes_to_string, dec_u8, defanged_colon, defanged_period, hex_u16, is_base58, is_bech32,
9 is_multispace, is_not_digit, is_not_hex_digit,
10};
11use nom::{
12 branch::alt,
13 bytes::complete::{is_not, tag, tag_no_case, take_till, take_while},
14 character::{
15 complete::{alphanumeric1, hex_digit1, multispace0, multispace1},
16 is_alphanumeric,
17 },
18 combinator::{complete, opt},
19 error::{make_error, ErrorKind},
20 multi::{many1, separated_list0, separated_list1},
21 sequence::preceded,
22 Err, IResult, Parser,
23};
24use std::{
25 net::{Ipv4Addr, Ipv6Addr},
26 sync::LazyLock,
27};
28
29mod bitcoin;
30mod helpers;
31
32static TLD_EXTRACTOR: LazyLock<tldextract::TldExtractor> =
33 LazyLock::new(|| tldextract::TldExtractor::new(Default::default()));
34
35#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, serde::Serialize)]
39#[serde(tag = "kind", content = "value", rename_all = "snake_case")]
40pub enum Indicator {
41 Url(String),
43 Domain(String),
45 File(String),
47 Email(String),
49 Ipv4(Ipv4Addr),
51 Ipv6(Ipv6Addr),
53 Sha512(String),
55 Sha256(String),
57 Sha1(String),
59 Md5(String),
61 BitcoinP2pkhAddress(String),
63 BitcoinP2shAddress(String),
65 BitcoinP2wpkhAddress(String),
67 BitcoinP2wshAddress(String),
69 LitecoinP2pkhAddress(String),
70 LitecoinP2wpkhAddress(String),
72}
73
74pub fn extract_indicators(input: &[u8]) -> IResult<&[u8], Vec<Indicator>> {
78 let (input, _) = multispace0(input)?;
79 let (input, indicator) = complete(separated_list0(
80 opt(is_not(" \t\r\n")).and(multispace1),
81 opt(extract_indicator),
82 ))(input)?;
83
84 let mut indicators = indicator.into_iter().flatten().collect::<Vec<Indicator>>();
85 indicators.sort();
86 indicators.dedup();
87
88 Ok((input, indicators))
89}
90
91pub fn extract_indicator(input: &[u8]) -> IResult<&[u8], Indicator> {
95 alt((
96 extract_url,
97 extract_email,
98 extract_ipv4,
99 extract_ipv6,
100 extract_hash,
101 extract_domain,
102 extract_bitcoin_p2pkh_address,
103 extract_bitcoin_p2sh_address,
104 extract_bitcoin_p2wpkh_address,
105 extract_bitcoin_p2wsh_address,
106 extract_litecoin_p2wpkh_address,
107 ))(input)
108}
109
110fn extract_url(input: &[u8]) -> IResult<&[u8], Indicator> {
111 let (input, scheme) = alt((tag_no_case("https"), tag_no_case("http")))(input)?;
112 let (input, _) = defanged_colon(input)?;
113 let (input, _) = tag("//")(input)?;
114 let (input, host) = separated_list1(defanged_period, alt((alphanumeric1, tag("-"))))(input)?;
115 let (input, rest) = take_till(is_multispace)(input)?;
116
117 Ok((
118 input,
119 Indicator::Url(format!(
120 "{}://{}{}",
121 std::str::from_utf8(scheme).unwrap(),
122 host.into_iter()
123 .map(|s| std::str::from_utf8(s).unwrap())
124 .collect::<Vec<&str>>()
125 .join("."),
126 std::str::from_utf8(rest).unwrap()
127 )),
128 ))
129}
130
131fn extract_domain(input: &[u8]) -> IResult<&[u8], Indicator> {
132 let (input, data) = separated_list1(defanged_period, alt((alphanumeric1, tag("-"))))(input)?;
133
134 if data.len() < 2 {
135 return Err(Err::Error(make_error(input, ErrorKind::Verify)));
136 }
137
138 let potential_domain = data
139 .into_iter()
140 .map(|s| std::str::from_utf8(s).unwrap())
141 .collect::<Vec<&str>>()
142 .join(".");
143
144 let Ok(tld) = TLD_EXTRACTOR.extract(&potential_domain) else {
145 return Ok((input, Indicator::File(potential_domain)));
146 };
147
148 if tld.domain.is_some() && tld.suffix.is_some() {
149 return Ok((
150 input,
151 Indicator::Domain(format!(
152 "{}{}.{}",
153 if let Some(subdomain) = tld.subdomain.as_ref() {
154 format!("{}.", subdomain)
155 } else {
156 "".to_string()
157 },
158 tld.domain.unwrap(),
159 tld.suffix.unwrap()
160 )),
161 ));
162 }
163
164 Ok((input, Indicator::File(potential_domain)))
165}
166
167fn extract_email(input: &[u8]) -> IResult<&[u8], Indicator> {
168 let (input, _) = opt(take_while(|c| {
169 c != b'.' && c != b'-' && c != b'_' && c != b'+' && !is_alphanumeric(c) && !is_multispace(c)
170 }))(input)?;
171
172 let (input, email) =
173 many1(alt((alphanumeric1, tag("."), tag("-"), tag("_"), tag("+"))))(input)?;
174 let (input, _) = tag("@")(input)?;
175 let (input, first_part) = many1(alt((alphanumeric1, tag("-"))))(input)?;
176 let (input, domain) = preceded(defanged_period, many1(alt((alphanumeric1, tag("-")))))(input)?;
177
178 Ok((
179 input,
180 Indicator::Email(format!(
181 "{}@{}.{}",
182 std::str::from_utf8(&email.concat()).unwrap(),
183 std::str::from_utf8(&first_part.concat()).unwrap(),
184 std::str::from_utf8(&domain.concat()).unwrap()
185 )),
186 ))
187}
188
189fn extract_ipv4(input: &[u8]) -> IResult<&[u8], Indicator> {
190 let (input, _) = opt(take_while(|c| is_not_digit(c) && !is_multispace(c)))(input)?;
191
192 let (input, octects) = separated_list1(defanged_period, dec_u8)(input)?;
193 if octects.len() != 4 {
194 return Err(Err::Error(make_error(input, ErrorKind::Verify)));
195 }
196
197 let ipv4_addr = Ipv4Addr::new(octects[0], octects[1], octects[2], octects[3]);
198 Ok((input, Indicator::Ipv4(ipv4_addr)))
199}
200
201fn extract_ipv6(input: &[u8]) -> IResult<&[u8], Indicator> {
202 let (input, _) = opt(take_while(|c| {
203 c != b':' && !is_alphanumeric(c) && !is_multispace(c)
204 }))(input)?;
205
206 let (input, hexes) = separated_list1(defanged_colon, hex_u16)(input)?;
207 if hexes.len() != 8 {
208 return Err(Err::Error(make_error(input, ErrorKind::Verify)));
209 }
210
211 let ipv6_addr = Ipv6Addr::new(
212 hexes[0], hexes[1], hexes[2], hexes[3], hexes[4], hexes[5], hexes[6], hexes[7],
213 );
214 Ok((input, Indicator::Ipv6(ipv6_addr)))
215}
216
217fn extract_hash(input: &[u8]) -> IResult<&[u8], Indicator> {
218 let (input, _) = opt(take_while(|c| is_not_hex_digit(c) && !is_multispace(c)))(input)?;
219
220 let (input, hash) = hex_digit1(input)?;
221
222 match hash.len() {
223 32 => Ok((input, Indicator::Md5(bytes_to_string(hash)))),
224 40 => Ok((input, Indicator::Sha1(bytes_to_string(hash)))),
225 64 => Ok((input, Indicator::Sha256(bytes_to_string(hash)))),
226 128 => Ok((input, Indicator::Sha512(bytes_to_string(hash)))),
227 _ => Err(Err::Error(make_error(input, ErrorKind::Verify))),
228 }
229}
230
231fn extract_bitcoin_p2pkh_address(input: &[u8]) -> IResult<&[u8], Indicator> {
232 let (input, _) = opt(take_while(|c| c != b'1' && !is_multispace(c)))(input)?;
233
234 let (input, prefix) = tag("1")(input)?;
235 let (input, address) = is_base58(input)?;
236
237 if is_valid_bitcoin_p2pkh_address(address) {
238 Ok((
239 input,
240 Indicator::BitcoinP2pkhAddress(bytes_to_string(&[prefix, address].concat())),
241 ))
242 } else {
243 Err(Err::Error(make_error(input, ErrorKind::Verify)))
244 }
245}
246
247fn extract_bitcoin_p2sh_address(input: &[u8]) -> IResult<&[u8], Indicator> {
248 let (input, _) = opt(take_while(|c| c != b'3' && !is_multispace(c)))(input)?;
249
250 let (input, prefix) = tag("3")(input)?;
251 let (input, address) = is_base58(input)?;
252
253 if is_valid_bitcoin_p2sh_address(address) {
254 Ok((
255 input,
256 Indicator::BitcoinP2shAddress(bytes_to_string(&[prefix, address].concat())),
257 ))
258 } else {
259 Err(Err::Error(make_error(input, ErrorKind::Verify)))
260 }
261}
262
263fn extract_bitcoin_p2wpkh_address(input: &[u8]) -> IResult<&[u8], Indicator> {
264 let (input, _) = opt(take_while(|c| c != b'b' && c != b't' && !is_multispace(c)))(input)?;
265
266 let (input, prefix) = alt((tag_no_case("bc1"), tag_no_case("tb1")))(input)?;
267 let (input, address) = is_bech32(input)?;
268
269 let address = &[prefix, address].concat();
270
271 if is_valid_bitcoin_p2wpkh_address(address) {
272 Ok((
273 input,
274 Indicator::BitcoinP2wpkhAddress(bytes_to_string(address)),
275 ))
276 } else {
277 Err(Err::Error(make_error(input, ErrorKind::Verify)))
278 }
279}
280
281fn extract_bitcoin_p2wsh_address(input: &[u8]) -> IResult<&[u8], Indicator> {
282 let (input, _) = opt(take_while(|c| c != b'b' && c != b't' && !is_multispace(c)))(input)?;
283
284 let (input, prefix) = alt((tag_no_case("bc1"), tag_no_case("tb1")))(input)?;
285 let (input, address) = is_bech32(input)?;
286
287 let address = &[prefix, address].concat();
288
289 if is_valid_bitcoin_p2wsh_address(address) {
290 Ok((
291 input,
292 Indicator::BitcoinP2wshAddress(bytes_to_string(address)),
293 ))
294 } else {
295 Err(Err::Error(make_error(input, ErrorKind::Verify)))
296 }
297}
298
299fn extract_litecoin_p2wpkh_address(input: &[u8]) -> IResult<&[u8], Indicator> {
300 let (input, _) = opt(take_while(|c| c != b'l' && !is_multispace(c)))(input)?;
301
302 let (input, prefix) = tag("ltc1")(input)?;
303 let (input, address) = is_bech32(input)?;
304
305 let address = &[prefix, address].concat();
306
307 if is_valid_litecoin_p2wpkh_address(address) {
308 Ok((
309 input,
310 Indicator::LitecoinP2wpkhAddress(bytes_to_string(address)),
311 ))
312 } else {
313 Err(Err::Error(make_error(input, ErrorKind::Verify)))
314 }
315}
316
317#[cfg(test)]
318mod tests {
319 use super::*;
320
321 #[test]
322 fn test_extract_single_litecoin_p2wpkh_address() {
323 let input = "ltc1q8c6fshw2dlwun7ekn9qwf37cu2rn755u9ym7p0";
324 let expected = Indicator::LitecoinP2wpkhAddress(
325 "ltc1q8c6fshw2dlwun7ekn9qwf37cu2rn755u9ym7p0".to_string(),
326 );
327
328 assert_eq!(
329 extract_indicator(input.as_bytes()),
330 Ok(("".as_bytes(), expected))
331 );
332 }
333
334 #[test]
335 fn test_extract_single_bitcoin_pubkey() {
336 let input = "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa";
337 let expected =
338 Indicator::BitcoinP2pkhAddress("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa".to_string());
339
340 assert_eq!(
341 extract_indicator(input.as_bytes()),
342 Ok(("".as_bytes(), expected))
343 );
344 }
345
346 #[test]
347 fn test_extract_single_url() {
348 let input = "http://www.example.com/foo/bar";
349 let expected = Indicator::Url("http://www.example.com/foo/bar".to_string());
350
351 assert_eq!(
352 extract_indicator(input.as_bytes()),
353 Ok(("".as_bytes(), expected))
354 );
355 }
356
357 #[test]
358 fn test_extract_single_ipv4_with_garbage() {
359 let input = "asdf127.0.0.1";
360 let expected = Indicator::Ipv4(Ipv4Addr::new(127, 0, 0, 1));
361
362 assert_eq!(
363 extract_indicator(input.as_bytes()),
364 Ok(("".as_bytes(), expected))
365 );
366 }
367
368 #[test]
369 fn test_extract_single_ipv4() {
370 let input = "127.0.0.1";
371 let expected = Indicator::Ipv4(Ipv4Addr::new(127, 0, 0, 1));
372
373 assert_eq!(
374 extract_indicator(input.as_bytes()),
375 Ok(("".as_bytes(), expected))
376 );
377 }
378
379 #[test]
380 fn test_extract_single_partially_defanged_ipv4() {
381 let input = "127[.]0.0[.]1";
382 let expected = Indicator::Ipv4(Ipv4Addr::new(127, 0, 0, 1));
383
384 assert_eq!(
385 extract_indicator(input.as_bytes()),
386 Ok(("".as_bytes(), expected))
387 );
388 }
389
390 #[test]
391 fn test_extract_single_fully_defanged_ipv4() {
392 let input = "127[.]0[.]0[.]1";
393 let expected = Indicator::Ipv4(Ipv4Addr::new(127, 0, 0, 1));
394
395 assert_eq!(
396 extract_indicator(input.as_bytes()),
397 Ok(("".as_bytes(), expected))
398 );
399 }
400
401 #[test]
402 fn test_extract_single_partially_defanged_ipv6() {
403 let input = "2001:0db8[:]85a3[:]0000:0000[:]8a2e:0370:7334";
404 let expected = Indicator::Ipv6(Ipv6Addr::new(
405 0x2001, 0x0db8, 0x85a3, 0x0000, 0x0000, 0x8a2e, 0x0370, 0x7334,
406 ));
407
408 assert_eq!(
409 extract_indicator(input.as_bytes()),
410 Ok(("".as_bytes(), expected))
411 );
412 }
413
414 #[test]
415 fn test_extract_md5() {
416 let input = "MD5 hash: d41d8cd98f00b204e9800998ecf8427e";
417 let expected = vec![Indicator::Md5(
418 "d41d8cd98f00b204e9800998ecf8427e".to_string(),
419 )];
420
421 assert_eq!(
422 extract_indicators(input.as_bytes()),
423 Ok(("".as_bytes(), expected))
424 );
425 }
426
427 #[test]
428 fn test_extract_single_md5() {
429 let input = "d41d8cd98f00b204e9800998ecf8427e";
430 let expected = Indicator::Md5("d41d8cd98f00b204e9800998ecf8427e".to_string());
431
432 assert_eq!(
433 extract_indicator(input.as_bytes()),
434 Ok(("".as_bytes(), expected))
435 );
436 }
437
438 #[test]
439 fn test_extract_sha1() {
440 let input = "SHA1 hash: da39a3ee5e6b4b0d3255bfef95601890afd80709";
441 let expected = vec![Indicator::Sha1(
442 "da39a3ee5e6b4b0d3255bfef95601890afd80709".to_string(),
443 )];
444
445 assert_eq!(
446 extract_indicators(input.as_bytes()),
447 Ok(("".as_bytes(), expected))
448 );
449 }
450
451 #[test]
452 fn test_extract_single_sha1() {
453 let input = "da39a3ee5e6b4b0d3255bfef95601890afd80709";
454 let expected = Indicator::Sha1("da39a3ee5e6b4b0d3255bfef95601890afd80709".to_string());
455
456 assert_eq!(
457 extract_indicator(input.as_bytes()),
458 Ok(("".as_bytes(), expected))
459 );
460 }
461
462 #[test]
463 fn test_extract_sha256() {
464 let input = "SHA256 hash: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
465 let expected = vec![Indicator::Sha256(
466 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(),
467 )];
468
469 assert_eq!(
470 extract_indicators(input.as_bytes()),
471 Ok(("".as_bytes(), expected))
472 );
473 }
474
475 #[test]
476 fn test_extract_single_sha256() {
477 let input = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
478 let expected = Indicator::Sha256(
479 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855".to_string(),
480 );
481
482 assert_eq!(
483 extract_indicator(input.as_bytes()),
484 Ok(("".as_bytes(), expected))
485 );
486 }
487
488 #[test]
489 fn test_extract_sha512() {
490 let input = "SHA512 hash: f1d9d8f153ec808a44cd63fb85f7cb811845b1596e46e78febd8c8b505a9b7d3a242c98b2b51261e5402f37334beefd7ba4066873a6dc56cd030cf29f4aef6dc";
491 let expected = vec![Indicator::Sha512(
492 "f1d9d8f153ec808a44cd63fb85f7cb811845b1596e46e78febd8c8b505a9b7d3a242c98b2b51261e5402f37334beefd7ba4066873a6dc56cd030cf29f4aef6dc".to_string()
493 )];
494
495 assert_eq!(
496 extract_indicators(input.as_bytes()),
497 Ok(("".as_bytes(), expected))
498 );
499 }
500
501 #[test]
502 fn test_extract_single_sha512() {
503 let input = "f1d9d8f153ec808a44cd63fb85f7cb811845b1596e46e78febd8c8b505a9b7d3a242c98b2b51261e5402f37334beefd7ba4066873a6dc56cd030cf29f4aef6dc";
504 let expected = Indicator::Sha512("f1d9d8f153ec808a44cd63fb85f7cb811845b1596e46e78febd8c8b505a9b7d3a242c98b2b51261e5402f37334beefd7ba4066873a6dc56cd030cf29f4aef6dc".to_string());
505
506 assert_eq!(
507 extract_indicator(input.as_bytes()),
508 Ok(("".as_bytes(), expected))
509 );
510 }
511
512 #[test]
513 fn test_multiple_indicators() {
514 let input = r#" Domain: AM6P194CA0000.outlook.office.com
515 Domain: AMS0EPF000000A0.eurprd01.prod.outlook.com
516 Domain: me512.com
517 File: 1.0
518 File: 15.21.7897.01
519 File: 15.26.7918.123
520 File: AA6P194CA0000.EURP001.PROD.OUTLOOK.COM
521 File: CC3PR84AB3445.LAPE210.PROD.OUTLOOK.COM
522 Email: 8ab3fa386978525c7fd59cb135f0fbc598c8@outlook.com
523 Email: ALLOW@OUTLOOK.COM
524 Email: allow@outlook.com
525 Ipv4: 10.167.20.233
526 Ipv4: 96.21.95.53
527 BitcoinP2pkhAddress: 15N6Q12yFN3xa8ChqXDWWGgZPYcZdoTyRa
528 LitecoinP2wpkhAddress: ltc1q8c6fshw2dlwun7ekn9qwf37cu2rn755u9ym7p0"#;
529
530 let expected = vec![
531 Indicator::Domain("AM6P194CA0000.outlook.office.com".to_string()),
532 Indicator::Domain("AMS0EPF000000A0.eurprd01.prod.outlook.com".to_string()),
533 Indicator::Domain("me512.com".to_string()),
534 Indicator::File("1.0".to_string()),
535 Indicator::File("15.21.7897.01".to_string()),
536 Indicator::File("15.26.7918.123".to_string()),
537 Indicator::File("AA6P194CA0000.EURP001.PROD.OUTLOOK.COM".to_string()),
538 Indicator::File("CC3PR84AB3445.LAPE210.PROD.OUTLOOK.COM".to_string()),
539 Indicator::Email("8ab3fa386978525c7fd59cb135f0fbc598c8@outlook.com".to_string()),
540 Indicator::Email("ALLOW@OUTLOOK.COM".to_string()),
541 Indicator::Email("allow@outlook.com".to_string()),
542 Indicator::Ipv4(Ipv4Addr::new(10, 167, 20, 233)),
543 Indicator::Ipv4(Ipv4Addr::new(96, 21, 95, 53)),
544 Indicator::BitcoinP2pkhAddress("15N6Q12yFN3xa8ChqXDWWGgZPYcZdoTyRa".to_string()),
545 Indicator::LitecoinP2wpkhAddress(
546 "ltc1q8c6fshw2dlwun7ekn9qwf37cu2rn755u9ym7p0".to_string(),
547 ),
548 ];
549
550 let (input, result) = extract_indicators(input.as_bytes()).unwrap();
551
552 for indicator in expected.iter() {
553 assert!(
554 result.contains(indicator),
555 "indicator {indicator:?} doesn't match"
556 );
557 }
558
559 assert_eq!(input.len(), 0);
560 }
561}