keyhog_scanner/multiline/
config.rs1use regex::Regex;
2use std::sync::LazyLock;
3
4const MAX_MULTILINE_PREPROCESS_BYTES: usize = 2 * 1024 * 1024;
5const MAX_MULTILINE_LINE_BYTES: usize = 64 * 1024;
6
7#[derive(Debug, Clone)]
9pub struct LineMapping {
10 pub start_offset: usize,
12 pub end_offset: usize,
14 pub line_number: usize,
16}
17
18#[derive(Debug, Clone)]
20pub struct PreprocessedText {
21 pub text: String,
23 pub original_end: usize,
25 pub mappings: Vec<LineMapping>,
27}
28
29impl PreprocessedText {
30 pub fn line_for_offset(&self, offset: usize) -> Option<usize> {
39 let idx = self.mappings.partition_point(|m| m.start_offset <= offset);
40 if idx == 0 {
41 return None;
42 }
43 let m = &self.mappings[idx - 1];
44 if offset < m.end_offset {
45 Some(m.line_number)
46 } else {
47 None
48 }
49 }
50
51 pub fn passthrough(text: &str) -> Self {
53 let mut mappings = Vec::new();
54 let mut offset = 0;
55 for (line_idx, line) in text.split('\n').enumerate() {
56 let end = offset + line.len();
57 mappings.push(LineMapping {
58 line_number: line_idx + 1,
59 start_offset: offset,
60 end_offset: end + 1,
61 });
62 offset = end + 1;
63 }
64 if let Some(last) = mappings.last_mut() {
65 last.end_offset = text.len();
66 }
67 let original_end = text.len();
68 Self {
69 text: text.to_string(),
70 original_end,
71 mappings,
72 }
73 }
74}
75
76#[derive(Debug, Clone)]
78pub struct MultilineConfig {
79 pub max_join_lines: usize,
81 pub python_implicit: bool,
83 pub backslash_continuation: bool,
85 pub plus_concatenation: bool,
87 pub template_literals: bool,
89}
90
91impl Default for MultilineConfig {
92 fn default() -> Self {
93 Self {
94 max_join_lines: 10,
95 python_implicit: true,
96 backslash_continuation: true,
97 plus_concatenation: true,
98 template_literals: true,
99 }
100 }
101}
102
103pub(crate) fn has_concatenation_indicators(text: &str) -> bool {
105 let trimmed = text.trim_start();
106 if trimmed.starts_with('{')
107 || trimmed.starts_with('[')
108 || trimmed.starts_with("<?xml")
109 || trimmed.starts_with('<')
110 {
111 return false;
112 }
113
114 let bytes = text.as_bytes();
115
116 if bytes.len() > 4096 {
118 let has_secret_keyword = memchr::memmem::find(bytes, b"ecret").is_some()
119 || memchr::memmem::find(bytes, b"oken").is_some()
120 || memchr::memmem::find(bytes, b"assword").is_some()
121 || memchr::memmem::find(bytes, b"api_key").is_some()
122 || memchr::memmem::find(bytes, b"API_KEY").is_some()
123 || memchr::memmem::find(bytes, b"redential").is_some();
124 if !has_secret_keyword {
125 return false;
126 }
127 }
128
129 let has_explicit_concat = text.contains("\" +") || text.contains("' +");
130 let has_backslash_cont = text.contains("\" \\") || text.contains("' \\");
131 let has_template = memchr::memchr(b'`', bytes).is_some();
132 let has_paste =
136 text.contains("paste0(") || text.contains("paste(") || text.contains("concat!(");
137 let has_implicit = bytes.windows(3).any(|window| {
138 (window[0] == b'"' && window[1] == b' ' && window[2] == b'"')
139 || (window[0] == b'\'' && window[1] == b' ' && window[2] == b'\'')
140 || (window[0] == b'"'
141 && window[1] == b'\n'
142 && (window[2] == b'"' || window[2] == b' ' || window[2] == b'\t'))
143 || (window[0] == b'\''
144 && window[1] == b'\n'
145 && (window[2] == b'\'' || window[2] == b' ' || window[2] == b'\t'))
146 });
147 if !has_explicit_concat
148 && !has_backslash_cont
149 && !has_template
150 && !has_paste
151 && !has_implicit
152 && !has_var_ref_concatenation(text)
153 {
154 return false;
155 }
156
157 for line in text.lines() {
158 let trimmed = line.trim();
159 if trimmed.ends_with('+')
160 || trimmed.starts_with('+')
161 || trimmed.starts_with("+ ")
162 || trimmed.contains("paste0(")
163 || trimmed.contains("paste(")
164 || trimmed.contains("concat!(")
165 || trimmed.contains("\" +")
166 || trimmed.contains("' +")
167 || trimmed.contains("+ \"")
168 || trimmed.contains("+ '")
169 || (trimmed.ends_with('\\') && !trimmed.ends_with("\\\\"))
170 || trimmed.contains("\" \"")
171 || trimmed.contains("' '")
172 || has_var_ref_concat_line(trimmed)
173 || (trimmed.ends_with('`') && trimmed.matches('`').count() == 1)
174 || trimmed.contains("${\"")
181 || trimmed.contains("${'")
182 || trimmed.contains("}${")
189 {
190 return true;
191 }
192 }
193
194 false
195}
196
197fn has_var_ref_concatenation(text: &str) -> bool {
202 text.lines().any(has_var_ref_concat_line)
203}
204
205fn has_var_ref_concat_line(line: &str) -> bool {
206 if !line.contains('+') {
220 return false;
221 }
222 static VAR_REF_CONCAT_RE: LazyLock<Option<Regex>> = LazyLock::new(|| {
223 Regex::new(
224 r#"(?i)^\s*[a-z0-9_\-\.]{2,64}\s*[:=]\s*[a-z0-9_\-]{2,32}(?:\s*\+\s*[a-z0-9_\-]{2,32}){1,8}\s*;?\s*$"#,
225 )
226 .ok()
227 });
228 VAR_REF_CONCAT_RE
229 .as_ref()
230 .is_some_and(|re| re.is_match(line))
231}
232
233pub(crate) fn should_passthrough(text: &str) -> bool {
234 text.len() > MAX_MULTILINE_PREPROCESS_BYTES
235 || text
236 .lines()
237 .any(|line| line.len() > MAX_MULTILINE_LINE_BYTES)
238 || !has_concatenation_indicators(text)
239}