Skip to main content

luaur_analysis/methods/
lint_format_string_check_string_match.rs

1use crate::records::lint_format_string::LintFormatString;
2
3impl crate::records::lint_format_string::LintFormatString {
4    #[inline]
5    pub fn check_string_match(
6        &self,
7        data: *const core::ffi::c_char,
8        size: usize,
9        out_captures: *mut i32,
10    ) -> *const core::ffi::c_char {
11        let magic = b"^$()%.[]*+-?)";
12        let classes = b"acdglpsuwxz";
13
14        let mut open_captures: alloc::vec::Vec<i32> = alloc::vec::Vec::new();
15        let mut total_captures: i32 = 0;
16
17        let mut i: usize = 0;
18        unsafe {
19            while i < size {
20                if *data.add(i) == b'%' as i8 {
21                    i += 1;
22
23                    if i == size {
24                        return c"unfinished character class".as_ptr();
25                    }
26
27                    let ch = *data.add(i);
28                    if self.is_digit(ch) {
29                        if ch == b'0' as i8 {
30                            return c"invalid capture reference, must be 1-9".as_ptr();
31                        }
32
33                        let capture_index = (ch as u8 - b'0') as i32;
34
35                        if capture_index > total_captures {
36                            return c"invalid capture reference, must refer to a valid capture"
37                                .as_ptr();
38                        }
39
40                        for &open in &open_captures {
41                            if open == capture_index {
42                                return c"invalid capture reference, must refer to a closed capture".as_ptr();
43                            }
44                        }
45                    } else if self.is_alpha(ch) {
46                        if ch == b'b' as i8 {
47                            if i + 2 >= size {
48                                return c"missing brace characters for balanced match".as_ptr();
49                            }
50                            i += 2;
51                        } else if ch == b'f' as i8 {
52                            if i + 1 >= size || *data.add(i + 1) != b'[' as i8 {
53                                return c"missing set after a frontier pattern".as_ptr();
54                            }
55                            // we can parse the set with the regular logic
56                        } else {
57                            // lower case lookup - upper case for every character class is defined as its inverse
58                            if !classes.contains(&((ch as u8) | b' ')) {
59                                return c"invalid character class, must refer to a defined class or its inverse".as_ptr();
60                            }
61                        }
62                    } else {
63                        // technically % can escape any non-alphanumeric character but this is error-prone
64                        if !magic.contains(&(ch as u8)) {
65                            return c"expected a magic character after %".as_ptr();
66                        }
67                    }
68                } else if *data.add(i) == b'[' as i8 {
69                    let mut j = i + 1;
70
71                    // empty patterns don't exist as per grammar rules, so we skip leading ^ and ]
72                    if j < size && *data.add(j) == b'^' as i8 {
73                        j += 1;
74                    }
75                    if j < size && *data.add(j) == b']' as i8 {
76                        j += 1;
77                    }
78
79                    // scan for the end of the pattern
80                    while j < size && *data.add(j) != b']' as i8 {
81                        // % escapes the next character
82                        if j + 1 < size && *data.add(j) == b'%' as i8 {
83                            j += 1;
84                        }
85                        j += 1;
86                    }
87
88                    if j == size {
89                        return c"expected ] at the end of the string to close a set".as_ptr();
90                    }
91
92                    let error =
93                        self.check_string_match_set(data.add(i + 1), j - i - 1, magic, classes);
94                    if !error.is_null() {
95                        return error;
96                    }
97
98                    debug_assert!(*data.add(j) == b']' as i8);
99                    i = j;
100                } else if *data.add(i) == b'(' as i8 {
101                    total_captures += 1;
102                    open_captures.push(total_captures);
103                } else if *data.add(i) == b')' as i8 {
104                    if open_captures.is_empty() {
105                        return c"unexpected ) without a matching (".as_ptr();
106                    }
107                    open_captures.pop();
108                }
109
110                i += 1;
111            }
112        }
113
114        if !open_captures.is_empty() {
115            return c"expected ) at the end of the string to close a capture".as_ptr();
116        }
117
118        if !out_captures.is_null() {
119            unsafe {
120                *out_captures = total_captures;
121            }
122        }
123
124        core::ptr::null()
125    }
126}