Skip to main content

yara_x/modules/vt/
mod.rs

1/*! Implementation of the `vt` module.
2
3This a VirusTotal-specific module that provides additional context and metadata
4about files, URLs, IP addresses and domains scanned in VirusTotal.
5*/
6
7mod bitsquatting;
8mod homoglyphs;
9mod interleaved;
10mod typos;
11
12use std::net::IpAddr;
13use std::ops::BitAnd;
14use std::rc::Rc;
15use std::sync::LazyLock;
16
17use bstr::BStr;
18use ipnet::IpNet;
19use protobuf::EnumFull;
20
21use crate::modules::prelude::*;
22use crate::modules::protos::titan::*;
23use crate::modules::protos::vtnet::enriched_domain::Permutation;
24use crate::modules::vt::bitsquatting::bitsquatting;
25use crate::modules::vt::homoglyphs::is_homoglyph;
26use crate::modules::vt::interleaved::interleaved;
27use crate::modules::vt::typos::{
28    doubling, insertion, omission, replacement, swap, vowel_swap,
29};
30use crate::types::Struct;
31
32static BITSQUATTING: LazyLock<i64> = LazyLock::new(|| {
33    Struct::enum_value_i64(&Permutation::BITSQUATTING.descriptor()).unwrap()
34});
35
36static TYPO: LazyLock<i64> = LazyLock::new(|| {
37    Struct::enum_value_i64(&Permutation::TYPO.descriptor()).unwrap()
38});
39
40static HYPHENATION: LazyLock<i64> = LazyLock::new(|| {
41    Struct::enum_value_i64(&Permutation::HYPHENATION.descriptor()).unwrap()
42});
43
44static HOMOGLYPH: LazyLock<i64> = LazyLock::new(|| {
45    Struct::enum_value_i64(&Permutation::HOMOGLYPH.descriptor()).unwrap()
46});
47
48static SUBDOMAIN: LazyLock<i64> = LazyLock::new(|| {
49    Struct::enum_value_i64(&Permutation::SUBDOMAIN.descriptor()).unwrap()
50});
51
52#[module_main]
53fn main(
54    _data: &[u8],
55    _meta: Option<&[u8]>,
56) -> Result<LiveHuntData, ModuleError> {
57    Ok(LiveHuntData::new())
58}
59
60#[module_export(method_of = "vt.net.EnrichedIP")]
61fn in_range(
62    ctx: &mut ScanContext,
63    ip: Rc<Struct>,
64    cidr: RuntimeString,
65) -> bool {
66    let cidr =
67        match cidr.to_str(ctx).ok().and_then(|s| s.parse::<IpNet>().ok()) {
68            Some(cidr) => cidr,
69            None => return false,
70        };
71
72    let ip = ip.field_by_name("raw").unwrap().type_value.as_string();
73
74    let ip = match ip.to_str().ok().and_then(|s| s.parse::<IpAddr>().ok()) {
75        Some(ip) => ip,
76        None => return false,
77    };
78
79    cidr.contains(&ip)
80}
81
82#[module_export(name = "permutation_of", method_of = "vt.net.EnrichedDomain")]
83fn all_permutations(
84    ctx: &mut ScanContext,
85    domain: Rc<Struct>,
86    target: RuntimeString,
87) -> bool {
88    permutations(ctx, domain, target, 0x1F)
89}
90
91#[module_export(name = "permutation_of", method_of = "vt.net.EnrichedDomain")]
92fn permutations(
93    ctx: &mut ScanContext,
94    scanned_domain: Rc<Struct>,
95    legitimate_domain: RuntimeString,
96    permutation_kinds: i64,
97) -> bool {
98    let scanned_domain =
99        scanned_domain.field_by_name("raw").unwrap().type_value.as_string();
100
101    let scanned_domain = match parse_domain(scanned_domain.as_bstr()) {
102        Some(d) => d,
103        None => return false,
104    };
105
106    let legit_domain = match parse_domain(legitimate_domain.as_bstr(ctx)) {
107        Some(s) => s,
108        None => return false,
109    };
110
111    // The domain is not a permutation of itself.
112    if scanned_domain == legit_domain {
113        return false;
114    }
115
116    // Both domains must have the same TLD.
117    if scanned_domain.tld != legit_domain.tld {
118        return false;
119    }
120
121    let scanned_prefix = scanned_domain.prefix;
122    let scanned_domain = match scanned_domain.domain {
123        Some(d) => d,
124        None => return false,
125    };
126
127    let legit_prefix = legit_domain.prefix;
128    let legit_domain = match legit_domain.domain {
129        Some(d) => d,
130        None => return false,
131    };
132
133    if TYPO.bitand(&permutation_kinds) != 0
134        && (insertion(legit_domain, scanned_domain)
135            || omission(legit_domain, scanned_domain)
136            || replacement(legit_domain, scanned_domain)
137            || doubling(legit_domain, scanned_domain)
138            || swap(legit_domain, scanned_domain)
139            || vowel_swap(legit_domain, scanned_domain))
140    {
141        return true;
142    }
143
144    if HOMOGLYPH.bitand(&permutation_kinds) != 0
145        && is_homoglyph(legit_domain, scanned_domain)
146    {
147        return true;
148    }
149
150    if BITSQUATTING.bitand(&permutation_kinds) != 0
151        && bitsquatting(legit_domain, scanned_domain)
152    {
153        return true;
154    }
155
156    if SUBDOMAIN.bitand(&permutation_kinds) != 0
157        && let (Some(legit), Some(scanned)) = (legit_prefix, scanned_prefix)
158            && interleaved(legit, scanned, '.') {
159                return true;
160            }
161
162    if HYPHENATION.bitand(&permutation_kinds) != 0
163        && interleaved(legit_domain, scanned_domain, '-')
164    {
165        return true;
166    }
167
168    false
169}
170
171/// Parses a domain name and returns its parts. For instance,
172/// for `www.virustotal.com` it returns:
173///
174/// ```text
175/// DomainParts {
176///   prefix: Some("www.virustotal"),
177///   subdomain: Some("www"),
178///   domain: Some("virustotal"),
179///   tld: "com",
180/// }
181/// ```
182///
183/// Returns `None` if the argument is not a valid domain name.
184fn parse_domain(domain: &BStr) -> Option<DomainParts<'_>> {
185    let domain_len = domain.len();
186    let suffix_len = psl::suffix(domain)?.as_bytes().len();
187    let tld = domain[domain_len - suffix_len..].to_str().ok()?;
188    let suffix_plus_dot = suffix_len + 1;
189
190    if domain_len <= suffix_plus_dot {
191        return Some(DomainParts {
192            prefix: None,
193            subdomain: None,
194            domain: None,
195            tld,
196        });
197    }
198
199    let prefix = domain.get(..domain_len - suffix_plus_dot)?.to_str().ok()?;
200
201    let (mut subdomain, mut domain) = match prefix.rsplit_once('.') {
202        Some((subdomain, domain)) => (Some(subdomain), Some(domain)),
203        None => (None, Some(prefix)),
204    };
205
206    // The psl::suffix function can incorrectly parse domains like
207    // "www.gov.uk", returning "www" as the domain and "gov.uk" as the public
208    // suffix. This happens because both "gov.uk" and "uk" are valid public
209    // suffixes, leading to ambiguity:
210    //
211    // Possible interpretations:
212    // - "www.gov.uk": subdomain="www", domain="gov", suffix="uk" (correct)
213    // - "www.gov.uk": subdomain="", domain="www", suffix="gov.uk" (incorrect)
214    //
215    // However, for "www.tfl.gov.uk":
216    // - subdomain="www.tfl", domain="gov", suffix="uk" (incorrect)
217    // - subdomain="www", domain="tfl", suffix="gov.uk" (correct)
218    //
219    // This workaround checks for common subdomains (e.g., "www") and correctly
220    // assigns the domain and subdomain fields to handle these cases.
221    if matches!(
222        domain,
223        Some("www")
224            | Some("ftp")
225            | Some("m")
226            | Some("mail")
227            | Some("webmail")
228            | Some("ns1")
229            | Some("ns2")
230    ) {
231        subdomain = domain;
232        domain = None;
233    }
234
235    Some(DomainParts { prefix: Some(prefix), subdomain, domain, tld })
236}
237
238#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
239pub struct DomainParts<'a> {
240    pub prefix: Option<&'a str>,
241    pub subdomain: Option<&'a str>,
242    pub domain: Option<&'a str>,
243    pub tld: &'a str,
244}
245
246#[cfg(test)]
247mod tests {
248    use crate::modules::protos::titan::LiveHuntData;
249    use crate::modules::vt::{parse_domain, DomainParts};
250    use crate::{Compiler, Scanner};
251    use bstr::BStr;
252    use protobuf::text_format::parse_from_str;
253
254    #[test]
255    fn in_range_ipv4() {
256        let vt_meta = Box::new(
257            parse_from_str::<LiveHuntData>(
258                r#"
259                meta {
260                    itw {
261                        ip {
262                            raw: "142.250.184.164"
263                        }
264                    }
265                }
266                net {
267                    ip {
268                        raw: "192.168.1.100"
269                    }
270                }"#,
271            )
272            .unwrap(),
273        );
274
275        let rule = r#"
276           import "vt"
277           rule test {
278             condition:
279               vt.net.ip.raw == "192.168.1.100"
280               and vt.metadata.itw.ip.raw == "142.250.184.164"
281               and vt.net.ip.in_range("192.168.1.100/32")
282               and vt.net.ip.in_range("192.168.1.1/17")
283               and vt.net.ip.in_range("192.168.1.0/24")
284               and not vt.net.ip.in_range("192.168.1.0/32")
285               and not vt.net.ip.in_range("192.168.1.0/31")
286               and vt.metadata.itw.ip.in_range("142.250.184.164/20")
287               and vt.metadata.itw.ip.in_range("142.250.176.0/20")
288           }"#;
289
290        let mut compiler = Compiler::new();
291
292        compiler
293            .enable_feature("ip_address")
294            .enable_feature("file")
295            .add_source(rule)
296            .unwrap();
297
298        let rules = compiler.build();
299
300        assert_eq!(
301            Scanner::new(&rules)
302                .set_module_output(vt_meta)
303                .unwrap()
304                .scan(b"")
305                .unwrap()
306                .matching_rules()
307                .len(),
308            1
309        );
310    }
311
312    #[test]
313    fn in_range_ipv6() {
314        let vt_meta = Box::new(
315            parse_from_str::<LiveHuntData>(
316                r#"
317                meta {
318                    itw {
319                        ip {
320                            raw: "2001:db8::1"
321                        }
322                    }
323                }
324                net {
325                    ip {
326                        raw: "2001:0DB8:7654:0010:FEDC:0000:0000:3210"
327                    }
328                }"#,
329            )
330            .unwrap(),
331        );
332
333        let rule = r#"
334           import "vt"
335           rule test {
336             condition:
337               vt.net.ip.raw == "2001:0DB8:7654:0010:FEDC:0000:0000:3210"
338               and vt.metadata.itw.ip.raw == "2001:db8::1"
339               and vt.net.ip.in_range("2001:db8::1/32")
340               and not vt.net.ip.in_range("2001:db8::1/34")
341               and vt.metadata.itw.ip.in_range("2001:db8::1/64")
342           }"#;
343
344        let mut compiler = Compiler::new();
345
346        compiler
347            .enable_feature("ip_address")
348            .enable_feature("file")
349            .add_source(rule)
350            .unwrap();
351
352        let rules = compiler.build();
353
354        assert_eq!(
355            Scanner::new(&rules)
356                .set_module_output(vt_meta)
357                .unwrap()
358                .scan(b"")
359                .unwrap()
360                .matching_rules()
361                .len(),
362            1
363        );
364    }
365
366    #[test]
367    fn permutation_constants() {
368        let rule = r#"
369           import "vt"
370           rule test {
371             condition:
372               vt.Domain.Permutation.ALL == vt.Domain.Permutation.TYPO
373                | vt.Domain.Permutation.HYPHENATION
374                | vt.Domain.Permutation.HOMOGLYPH
375                | vt.Domain.Permutation.SUBDOMAIN
376                | vt.Domain.Permutation.BITSQUATTING
377           }"#;
378
379        let mut compiler = Compiler::new();
380
381        compiler
382            .enable_feature("ip_address")
383            .enable_feature("file")
384            .add_source(rule)
385            .unwrap();
386
387        let rules = compiler.build();
388
389        assert_eq!(
390            Scanner::new(&rules).scan(b"").unwrap().matching_rules().len(),
391            1
392        );
393    }
394
395    macro_rules! squatting {
396        ($legit_domain:literal, $scanned_domain:literal) => {{
397            let vt_meta = Box::new(
398                parse_from_str::<LiveHuntData>(
399                    format!(
400                        "net {{ domain {{ raw: \"{}\" }} }}",
401                        $scanned_domain
402                    )
403                    .as_str(),
404                )
405                .unwrap(),
406            );
407
408            let rule = format!(
409                r#"
410           import "vt"
411           rule test {{
412             condition:
413               vt.net.domain.permutation_of("{}")
414           }}"#,
415                $legit_domain
416            );
417
418            let mut compiler = Compiler::new();
419
420            compiler
421                .enable_feature("ip_address")
422                .enable_feature("file")
423                .add_source(rule.as_str())
424                .unwrap();
425
426            let rules = compiler.build();
427
428            let result = Scanner::new(&rules)
429                .set_module_output(vt_meta)
430                .unwrap()
431                .scan(b"")
432                .unwrap()
433                .matching_rules()
434                .len()
435                == 1;
436
437            result
438        }};
439    }
440
441    #[test]
442    fn test_parse_domain() {
443        assert_eq!(
444            parse_domain(BStr::new("www.google.com")),
445            Some(DomainParts {
446                prefix: Some("www.google"),
447                subdomain: Some("www"),
448                domain: Some("google"),
449                tld: "com"
450            })
451        );
452
453        assert_eq!(
454            parse_domain(BStr::new("gov.uk")),
455            Some(DomainParts {
456                prefix: None,
457                subdomain: None,
458                domain: None,
459                tld: "gov.uk"
460            })
461        );
462
463        assert_eq!(
464            parse_domain(BStr::new("www.gov.uk")),
465            Some(DomainParts {
466                prefix: Some("www"),
467                subdomain: Some("www"),
468                domain: None,
469                tld: "gov.uk"
470            })
471        );
472
473        assert_eq!(
474            parse_domain(BStr::new("ftp.gov.uk")),
475            Some(DomainParts {
476                prefix: Some("ftp"),
477                subdomain: Some("ftp"),
478                domain: None,
479                tld: "gov.uk"
480            })
481        );
482
483        assert_eq!(
484            parse_domain(BStr::new("www.ncbi.nlm.nih.gov")),
485            Some(DomainParts {
486                prefix: Some("www.ncbi.nlm.nih"),
487                subdomain: Some("www.ncbi.nlm"),
488                domain: Some("nih"),
489                tld: "gov"
490            })
491        );
492    }
493
494    #[test]
495    fn test_squatting() {
496        // the 'b' was omitted.
497        assert!(squatting!("bankofamerica.com", "ankofamerica.com"));
498        // the `o` was omitted.
499        assert!(squatting!("bankofamerica.com", "bankfamerica.com"));
500        // the `k` is repeated.
501        assert!(squatting!("bankofamerica.com", "bankkofamerica.com"));
502        // the `l` was inserted.
503        assert!(squatting!("bankofamerica.com", "banklofamerica.com"));
504        // 'q' is close to 'a' in the keyboard.
505        assert!(squatting!("bankofamerica.com", "bqnkofamerica.com"));
506        // 'É‘' is a homoglyph of 'a'
507        assert!(squatting!("bankofamerica.com", "bɑnkofamerica.com"));
508        // transposition of "a" and "b".
509        assert!(squatting!("bankofamerica.com", "abnkofamerica.com"));
510        // insertion of hyphens.
511        assert!(squatting!("bankofamerica.com", "bank-of-america.com"));
512        // the `e` was replaced with `d`, which is close in the keyboard.
513        assert!(squatting!("bankofamerica.com", "bankofamdrica.com"));
514        // the vowel `a` was replaced with `e`.
515        assert!(squatting!("bankofamerica.com", "bonkofamerica.com"));
516        // bitsquatting, the `k` and the `c` differ in one bit.
517        assert!(squatting!("bankofamerica.com", "bancofamerica.com"));
518        // subdomain
519        assert!(squatting!("bankofamerica.com", "bankof.america.com"));
520        assert!(squatting!("bankofamerica.com", "bank.of.america.com"));
521
522        // test some negative cases
523        assert!(!squatting!("www.google.com", "notifications.google.com"));
524        assert!(!squatting!("www.ing.com", "www.ncbi.nlm.nih.gov"));
525        assert!(!squatting!("www.google.com", "www.goggle.es"));
526        assert!(!squatting!("www.google.com", "www.goore.com"));
527    }
528}