use regex::Regex;
use std::sync::LazyLock;
const MAX_MULTILINE_PREPROCESS_BYTES: usize = 2 * 1024 * 1024;
const MAX_MULTILINE_LINE_BYTES: usize = 64 * 1024;
static VAR_REF_CONCAT_RE: LazyLock<Option<Regex>> = LazyLock::new(|| {
Regex::new(
r#"(?i)^\s*[a-z0-9_\-\.]{2,64}\s*[:=]\s*[a-z0-9_\-]{2,32}(?:\s*\+\s*[a-z0-9_\-]{2,32}){1,8}\s*;?\s*$"#,
)
.ok()
});
pub(crate) fn warm_runtime_regexes() {
let _ = VAR_REF_CONCAT_RE.as_ref();
}
#[derive(Debug, Clone)]
pub struct LineMapping {
pub start_offset: usize,
pub end_offset: usize,
pub line_number: usize,
}
#[derive(Debug, Clone)]
pub struct PreprocessedText<'a> {
pub text: std::borrow::Cow<'a, str>,
pub original_end: usize,
pub mappings: Vec<LineMapping>,
}
impl<'a> PreprocessedText<'a> {
pub fn line_for_offset(&self, offset: usize) -> Option<usize> {
let idx = self.mappings.partition_point(|m| m.start_offset <= offset);
if idx == 0 {
return None;
}
let m = &self.mappings[idx - 1];
if offset < m.end_offset {
Some(m.line_number)
} else {
None
}
}
pub fn passthrough(text: impl Into<std::borrow::Cow<'a, str>>) -> Self {
let text: std::borrow::Cow<'a, str> = text.into();
let mut mappings = Vec::new();
let mut offset = 0;
for (line_idx, line) in text.split('\n').enumerate() {
let end = offset + line.len();
mappings.push(LineMapping {
line_number: line_idx + 1,
start_offset: offset,
end_offset: end + 1,
});
offset = end + 1;
}
if let Some(last) = mappings.last_mut() {
last.end_offset = text.len();
}
let original_end = text.len();
Self {
text,
original_end,
mappings,
}
}
}
#[derive(Debug, Clone)]
pub struct MultilineConfig {
pub max_join_lines: usize,
pub python_implicit: bool,
pub backslash_continuation: bool,
pub plus_concatenation: bool,
pub template_literals: bool,
}
impl Default for MultilineConfig {
fn default() -> Self {
Self {
max_join_lines: 10,
python_implicit: true,
backslash_continuation: true,
plus_concatenation: true,
template_literals: true,
}
}
}
pub(crate) fn has_concatenation_indicators(text: &str) -> bool {
let trimmed = text.trim_start();
if trimmed.starts_with('{')
|| trimmed.starts_with('[')
|| trimmed.starts_with("<?xml")
|| trimmed.starts_with('<')
{
return false;
}
let bytes = text.as_bytes();
if bytes.len() > 4096 {
let has_secret_keyword = memchr::memmem::find(bytes, b"ecret").is_some()
|| memchr::memmem::find(bytes, b"oken").is_some()
|| memchr::memmem::find(bytes, b"assword").is_some()
|| memchr::memmem::find(bytes, b"api_key").is_some()
|| memchr::memmem::find(bytes, b"API_KEY").is_some()
|| memchr::memmem::find(bytes, b"redential").is_some();
if !has_secret_keyword {
return false;
}
}
let has_explicit_concat = text.contains("\" +") || text.contains("' +");
let has_backslash_cont = text.contains("\" \\") || text.contains("' \\");
let has_template = memchr::memchr(b'`', bytes).is_some();
let has_paste =
text.contains("paste0(") || text.contains("paste(") || text.contains("concat!(");
let has_implicit = bytes.windows(3).any(|window| {
(window[0] == b'"' && window[1] == b' ' && window[2] == b'"')
|| (window[0] == b'\'' && window[1] == b' ' && window[2] == b'\'')
|| (window[0] == b'"'
&& window[1] == b'\n'
&& (window[2] == b'"' || window[2] == b' ' || window[2] == b'\t'))
|| (window[0] == b'\''
&& window[1] == b'\n'
&& (window[2] == b'\'' || window[2] == b' ' || window[2] == b'\t'))
});
if !has_explicit_concat
&& !has_backslash_cont
&& !has_template
&& !has_paste
&& !has_implicit
&& !has_var_ref_concatenation(text)
{
return false;
}
for line in text.lines() {
let trimmed = line.trim();
if trimmed.ends_with('+')
|| trimmed.starts_with('+')
|| trimmed.starts_with("+ ")
|| trimmed.contains("paste0(")
|| trimmed.contains("paste(")
|| trimmed.contains("concat!(")
|| trimmed.contains("\" +")
|| trimmed.contains("' +")
|| trimmed.contains("+ \"")
|| trimmed.contains("+ '")
|| (trimmed.ends_with('\\') && !trimmed.ends_with("\\\\"))
|| trimmed.contains("\" \"")
|| trimmed.contains("' '")
|| has_var_ref_concat_line(trimmed)
|| (trimmed.ends_with('`') && trimmed.matches('`').count() == 1)
|| trimmed.contains("${\"")
|| trimmed.contains("${'")
|| trimmed.contains("}${")
{
return true;
}
}
false
}
fn has_var_ref_concatenation(text: &str) -> bool {
text.lines().any(has_var_ref_concat_line)
}
fn has_var_ref_concat_line(line: &str) -> bool {
if !line.contains('+') {
return false;
}
VAR_REF_CONCAT_RE
.as_ref()
.is_some_and(|re| re.is_match(line))
}
pub(crate) fn should_passthrough(text: &str) -> bool {
text.len() > MAX_MULTILINE_PREPROCESS_BYTES
|| text
.lines()
.any(|line| line.len() > MAX_MULTILINE_LINE_BYTES)
|| !has_concatenation_indicators(text)
}