const MAX_MULTILINE_PREPROCESS_BYTES: usize = 2 * 1024 * 1024;
const MAX_MULTILINE_LINE_BYTES: usize = 64 * 1024;
#[derive(Debug, Clone)]
pub struct LineMapping {
pub start_offset: usize,
pub end_offset: usize,
pub line_number: usize,
}
#[derive(Debug, Clone)]
pub struct PreprocessedText {
pub text: String,
pub original_end: usize,
pub mappings: Vec<LineMapping>,
}
impl PreprocessedText {
pub fn line_for_offset(&self, offset: usize) -> Option<usize> {
self.mappings
.iter()
.find(|mapping| offset >= mapping.start_offset && offset < mapping.end_offset)
.map(|mapping| mapping.line_number)
}
pub fn passthrough(text: &str) -> Self {
let mut mappings = Vec::new();
let mut offset = 0;
for (line_idx, line) in text.split('\n').enumerate() {
let end = offset + line.len();
mappings.push(LineMapping {
line_number: line_idx + 1,
start_offset: offset,
end_offset: end + 1,
});
offset = end + 1;
}
if let Some(last) = mappings.last_mut() {
last.end_offset = text.len();
}
let original_end = text.len();
Self {
text: text.to_string(),
original_end,
mappings,
}
}
}
#[derive(Debug, Clone)]
pub struct MultilineConfig {
pub max_join_lines: usize,
pub python_implicit: bool,
pub backslash_continuation: bool,
pub plus_concatenation: bool,
pub template_literals: bool,
}
impl Default for MultilineConfig {
fn default() -> Self {
Self {
max_join_lines: 10,
python_implicit: true,
backslash_continuation: true,
plus_concatenation: true,
template_literals: true,
}
}
}
pub(crate) fn has_concatenation_indicators(text: &str) -> bool {
let trimmed = text.trim_start();
if trimmed.starts_with('{')
|| trimmed.starts_with('[')
|| trimmed.starts_with("<?xml")
|| trimmed.starts_with('<')
{
return false;
}
let bytes = text.as_bytes();
if bytes.len() > 4096 {
let has_secret_keyword = memchr::memmem::find(bytes, b"ecret").is_some()
|| memchr::memmem::find(bytes, b"oken").is_some()
|| memchr::memmem::find(bytes, b"assword").is_some()
|| memchr::memmem::find(bytes, b"api_key").is_some()
|| memchr::memmem::find(bytes, b"API_KEY").is_some()
|| memchr::memmem::find(bytes, b"redential").is_some();
if !has_secret_keyword {
return false;
}
}
let has_explicit_concat = text.contains("\" +") || text.contains("' +");
let has_backslash_cont = text.contains("\" \\") || text.contains("' \\");
let has_template = memchr::memchr(b'`', bytes).is_some();
let has_paste = text.contains("paste0(");
let has_implicit = bytes.windows(3).any(|window| {
(window[0] == b'"' && window[1] == b' ' && window[2] == b'"')
|| (window[0] == b'\'' && window[1] == b' ' && window[2] == b'\'')
|| (window[0] == b'"'
&& window[1] == b'\n'
&& (window[2] == b'"' || window[2] == b' ' || window[2] == b'\t'))
|| (window[0] == b'\''
&& window[1] == b'\n'
&& (window[2] == b'\'' || window[2] == b' ' || window[2] == b'\t'))
});
if !has_explicit_concat && !has_backslash_cont && !has_template && !has_paste && !has_implicit {
return false;
}
for line in text.lines() {
let trimmed = line.trim();
if trimmed.ends_with('+')
|| trimmed.starts_with('+')
|| trimmed.starts_with("+ ")
|| trimmed.contains("paste0(")
|| trimmed.contains("paste(")
|| trimmed.contains("\" +")
|| trimmed.contains("' +")
|| trimmed.contains("+ \"")
|| trimmed.contains("+ '")
|| (trimmed.ends_with('\\') && !trimmed.ends_with("\\\\"))
|| trimmed.contains("\" \"")
|| trimmed.contains("' '")
|| (trimmed.ends_with('`') && trimmed.matches('`').count() == 1)
{
return true;
}
}
false
}
pub(crate) fn should_passthrough(text: &str) -> bool {
text.len() > MAX_MULTILINE_PREPROCESS_BYTES
|| text
.lines()
.any(|line| line.len() > MAX_MULTILINE_LINE_BYTES)
|| !has_concatenation_indicators(text)
}