const MAX_MULTILINE_PREPROCESS_BYTES: usize = 2 * 1024 * 1024;
const MAX_MULTILINE_LINE_BYTES: usize = 64 * 1024;
#[derive(Debug, Clone)]
pub struct LineMapping {
pub start_offset: usize,
pub end_offset: usize,
pub line_number: usize,
}
#[derive(Debug, Clone)]
pub struct PreprocessedText {
pub text: String,
pub original_end: usize,
pub mappings: Vec<LineMapping>,
}
impl PreprocessedText {
pub fn line_for_offset(&self, offset: usize) -> Option<usize> {
self.mappings
.iter()
.find(|m| offset >= m.start_offset && offset < m.end_offset)
.map(|m| m.line_number)
}
pub fn passthrough(text: &str) -> Self {
let mut mappings = Vec::new();
let mut offset = 0;
for (line_idx, line) in text.split('\n').enumerate() {
let end = offset + line.len();
mappings.push(LineMapping {
line_number: line_idx + 1, start_offset: offset,
end_offset: end + 1, });
offset = end + 1; }
if let Some(last) = mappings.last_mut() {
last.end_offset = text.len();
}
let original_end = text.len();
Self {
text: text.to_string(),
original_end,
mappings,
}
}
}
#[derive(Debug, Clone)]
pub struct MultilineConfig {
pub max_join_lines: usize,
pub python_implicit: bool,
pub backslash_continuation: bool,
pub plus_concatenation: bool,
pub template_literals: bool,
}
impl Default for MultilineConfig {
fn default() -> Self {
Self {
max_join_lines: 10,
python_implicit: true,
backslash_continuation: true,
plus_concatenation: true,
template_literals: true,
}
}
}
pub(crate) fn has_concatenation_indicators(text: &str) -> bool {
let trimmed = text.trim_start();
if trimmed.starts_with('{') || trimmed.starts_with('[') || trimmed.starts_with("<?xml") || trimmed.starts_with('<')
{
return false;
}
let bytes = text.as_bytes();
let has_explicit_concat = text.contains("\" +") || text.contains("' +");
let has_backslash_cont = text.contains("\" \\") || text.contains("' \\");
let has_template = memchr::memchr(b'`', bytes).is_some();
let has_paste = text.contains("paste0(");
let has_implicit = bytes.windows(3).any(|w| {
(w[0] == b'"' && w[1] == b' ' && w[2] == b'"')
|| (w[0] == b'\'' && w[1] == b' ' && w[2] == b'\'')
|| (w[0] == b'"' && w[1] == b'\n' && (w[2] == b'"' || w[2] == b' ' || w[2] == b'\t'))
|| (w[0] == b'\'' && w[1] == b'\n' && (w[2] == b'\'' || w[2] == b' ' || w[2] == b'\t'))
});
if !has_explicit_concat && !has_backslash_cont && !has_template && !has_paste && !has_implicit {
return false;
}
for line in text.lines() {
let t = line.trim();
if t.ends_with('+') || t.starts_with('+') || t.starts_with("+ ") {
return true;
}
if t.contains("paste0(") || t.contains("paste(") {
return true;
}
if t.contains("\" +") || t.contains("' +") || t.contains("+ \"") || t.contains("+ '") {
return true;
}
if t.ends_with('\\') && !t.ends_with("\\\\") {
return true;
}
if t.contains("\" \"") || t.contains("' '") {
return true;
}
if t.ends_with('`') && t.matches('`').count() == 1 {
return true;
}
}
false
}
pub fn preprocess_multiline(text: &str, config: &MultilineConfig) -> PreprocessedText {
if text.len() > MAX_MULTILINE_PREPROCESS_BYTES
|| text
.lines()
.any(|line| line.len() > MAX_MULTILINE_LINE_BYTES)
{
return passthrough_text(text);
}
if !has_concatenation_indicators(text) {
return passthrough_text(text);
}
let lines: Vec<&str> = text.lines().collect();
if lines.is_empty() {
return PreprocessedText {
text: String::new(),
original_end: 0,
mappings: Vec::new(),
};
}
let first_nonwhite = text.trim_start().chars().next().unwrap_or(' ');
if first_nonwhite == '{' || first_nonwhite == '[' {
return passthrough_text(text);
}
let mut result_lines: Vec<String> = Vec::new();
let mut mappings: Vec<LineMapping> = Vec::new();
let mut current_offset: usize = 0;
let mut i = 0;
while i < lines.len() {
let (joined_line, lines_consumed, line_mappings) =
process_line_chain(&lines, i, config, current_offset);
if !joined_line.is_empty() {
let total_len = joined_line.len();
for mapping in line_mappings {
mappings.push(mapping);
}
current_offset += total_len + 1; }
result_lines.push(joined_line);
i += lines_consumed.max(1);
}
let joined_text = result_lines.join("\n");
let original_end = text.len();
let mut final_text = text.to_string();
if joined_text != text && !joined_text.is_empty() {
final_text.push('\n');
final_text.push_str(&joined_text);
let append_start = original_end + 1; for mapping in &mut mappings {
mapping.start_offset += append_start;
mapping.end_offset += append_start;
}
}
let mut original_mappings = Vec::new();
let mut offset = 0;
for (line_idx, line) in text.split('\n').enumerate() {
let end = offset + line.len();
original_mappings.push(LineMapping {
line_number: line_idx + 1,
start_offset: offset,
end_offset: (end + 1).min(original_end),
});
offset = end + 1;
}
original_mappings.extend(mappings);
PreprocessedText {
text: final_text,
original_end,
mappings: original_mappings,
}
}
fn passthrough_text(text: &str) -> PreprocessedText {
let mut mappings = Vec::new();
let mut offset = 0;
for (i, line) in text.lines().enumerate() {
mappings.push(LineMapping {
line_number: i + 1,
start_offset: offset,
end_offset: offset + line.len(),
});
offset += line.len() + 1;
}
let original_end = text.len();
PreprocessedText {
text: text.to_string(),
original_end,
mappings,
}
}
fn process_line_chain(
lines: &[&str],
start_idx: usize,
config: &MultilineConfig,
base_offset: usize,
) -> (String, usize, Vec<LineMapping>) {
let mut joined_parts: Vec<String> = Vec::new();
let mut line_mappings: Vec<LineMapping> = Vec::new();
let mut current_idx = start_idx;
let mut current_offset = base_offset;
let original_start_line = start_idx + 1;
while current_idx < lines.len() && (current_idx - start_idx) < config.max_join_lines {
let line = lines[current_idx];
let line_number = current_idx + 1;
let (part, continues, continuation_type) =
extract_string_part(line, config, current_idx > start_idx);
if current_idx == start_idx {
if !part.is_empty() {
let part_start = current_offset;
let part_len = part.len();
joined_parts.push(part);
line_mappings.push(LineMapping {
start_offset: part_start,
end_offset: part_start + part_len,
line_number,
});
current_offset += part_len;
}
if !continues {
break;
}
} else {
if continuation_type == ContinuationType::Backslash {
if !part.is_empty() {
let part_start = current_offset;
let part_len = part.len();
joined_parts.push(part);
line_mappings.push(LineMapping {
start_offset: part_start,
end_offset: part_start + part_len,
line_number,
});
current_offset += part_len;
}
} else if continuation_type == ContinuationType::PlusOperator
|| continuation_type == ContinuationType::Implicit
{
if !part.is_empty() {
let part_start = current_offset;
let part_len = part.len();
joined_parts.push(part);
line_mappings.push(LineMapping {
start_offset: part_start,
end_offset: part_start + part_len,
line_number,
});
current_offset += part_len;
}
} else if !part.is_empty() {
let part_start = current_offset;
let part_len = part.len();
joined_parts.push(part);
line_mappings.push(LineMapping {
start_offset: part_start,
end_offset: part_start + part_len,
line_number,
});
current_offset += part_len;
}
if !continues {
break;
}
}
current_idx += 1;
}
let joined = joined_parts.join("");
let final_mappings = if joined.is_empty() {
Vec::new()
} else {
vec![LineMapping {
start_offset: base_offset,
end_offset: base_offset + joined.len(),
line_number: original_start_line,
}]
};
let lines_consumed = (current_idx - start_idx) + 1;
(joined, lines_consumed, final_mappings)
}
#[derive(Debug, PartialEq)]
enum ContinuationType {
None,
Backslash,
PlusOperator,
Implicit,
TemplateLiteral,
}
fn extract_string_part(
line: &str,
config: &MultilineConfig,
_is_continuation: bool,
) -> (String, bool, ContinuationType) {
let trimmed = line.trim();
if config.backslash_continuation && trimmed.ends_with('\\') && !trimmed.ends_with("\\\\") {
let without_backslash = line
.trim_end()
.strip_suffix('\\')
.unwrap_or(line)
.trim_end();
let part = extract_string_content(without_backslash);
return (part, true, ContinuationType::Backslash);
}
if config.plus_concatenation {
if let Some((part, continues)) = extract_plus_concatenation(line) {
return (part, continues, ContinuationType::PlusOperator);
}
}
if let Some((part, continues)) = extract_function_concatenation(line) {
return (part, continues, ContinuationType::Implicit);
}
if config.python_implicit
&& let Some((part, continues)) = extract_python_implicit_concatenation(line)
{
return (part, continues, ContinuationType::Implicit);
}
if config.template_literals
&& let Some((part, continues)) = extract_template_literal_continuation(line)
{
return (part, continues, ContinuationType::TemplateLiteral);
}
(line.to_string(), false, ContinuationType::None)
}
fn extract_string_content(line: &str) -> String {
let trimmed = line.trim();
for (open, close) in [('"', '"'), ('\'', '\''), ('`', '`')] {
if let Some(content) = extract_quoted_content(trimmed, open, close) {
return content;
}
}
filter_line_content(trimmed)
}
fn extract_quoted_content(s: &str, open: char, close: char) -> Option<String> {
let mut chars = s.chars().peekable();
let mut is_fstring = false;
while let Some(&ch) = chars.peek() {
if ch == open {
break;
}
if ch == 'f' || ch == 'F' {
is_fstring = true;
}
chars.next();
}
if chars.next() != Some(open) {
return None;
}
let mut content = String::new();
let mut escaped = false;
while let Some(ch) = chars.next() {
if escaped {
content.push(ch);
escaped = false;
} else if ch == '\\' {
escaped = true;
content.push(ch);
} else if ch == close {
return Some(content);
} else if is_fstring && ch == '{' && chars.peek() != Some(&'{') {
let mut brace_depth = 1;
for c in chars.by_ref() {
if c == '{' {
brace_depth += 1;
} else if c == '}' {
brace_depth -= 1;
if brace_depth == 0 {
break;
}
}
}
} else {
content.push(ch);
}
}
None }
fn filter_line_content(line: &str) -> String {
let line = line
.trim_start_matches("const ")
.trim_start_matches("let ")
.trim_start_matches("var ")
.trim_start_matches("val ")
.trim_start_matches("final ")
.trim_start_matches("static ")
.trim_start_matches("string ")
.trim_start_matches("String ")
.trim_start_matches("auto ")
.trim_start_matches("dim ")
.trim_start_matches("my ");
if let Some(pos) = line.find(" = ") {
let after_assign = &line[pos + 3..];
return after_assign.trim().to_string();
}
if let Some(pos) = line.find("= ") {
let after_assign = &line[pos + 2..];
return after_assign.trim().to_string();
}
if let Some(pos) = line.find('=') {
let after_assign = &line[pos + 1..];
return after_assign.trim().to_string();
}
line.to_string()
}
fn extract_plus_concatenation(line: &str) -> Option<(String, bool)> {
let trimmed = line.trim();
let ends_with_plus = trimmed.ends_with('+');
if !trimmed.contains('+') {
return None;
}
let parts: Vec<&str> = trimmed.split('+').collect();
if parts.len() < 2 {
return None;
}
let mut result = String::new();
for part in &parts {
let content = extract_string_content(part.trim());
if !content.is_empty() {
result.push_str(&content);
}
}
Some((result, ends_with_plus))
}
fn extract_python_implicit_concatenation(line: &str) -> Option<(String, bool)> {
let parts = extract_quoted_strings(line);
if parts.is_empty() {
return None;
}
let joined = parts.join("");
Some((joined, false))
}
fn extract_function_concatenation(line: &str) -> Option<(String, bool)> {
let trimmed = line.trim();
if !trimmed.contains("paste0(") && !trimmed.contains("paste(") {
return None;
}
let parts = extract_quoted_strings(trimmed);
if parts.len() < 2 {
return None;
}
Some((parts.join(""), false))
}
fn extract_quoted_strings(line: &str) -> Vec<String> {
let mut parts = Vec::new();
let mut i = 0;
let chars: Vec<char> = line.chars().collect();
while i < chars.len() {
if chars[i] == '"' || chars[i] == '\'' {
let quote = chars[i];
let mut j = i + 1;
let mut content = String::new();
let mut escaped = false;
while j < chars.len() {
if escaped {
content.push(chars[j]);
escaped = false;
} else if chars[j] == '\\' {
escaped = true;
content.push(chars[j]);
} else if chars[j] == quote {
parts.push(content);
i = j;
break;
} else {
content.push(chars[j]);
}
j += 1;
}
}
i += 1;
}
parts
}
fn extract_template_literal_continuation(line: &str) -> Option<(String, bool)> {
let trimmed = line.trim();
if !trimmed.contains('`') {
return None;
}
let backtick_count = trimmed.chars().filter(|&c| c == '`').count();
let continues = backtick_count % 2 == 1;
let mut result = String::new();
let mut in_template = false;
let mut chars = trimmed.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '`' {
in_template = !in_template;
continue;
}
if in_template && ch == '$' && chars.peek() == Some(&'{') {
chars.next(); let mut brace_depth = 1;
for c in chars.by_ref() {
if c == '{' {
brace_depth += 1;
} else if c == '}' {
brace_depth -= 1;
if brace_depth == 0 {
break;
}
}
}
continue;
}
if in_template {
result.push(ch);
}
}
Some((result, continues))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_python_backslash_continuation() {
let text = r#"key = 'sk-proj-' + \
'abcdef1234567890'"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-proj-"));
assert!(preprocessed.text.contains("abcdef1234567890"));
assert!(preprocessed.text.contains("sk-proj-abcdef1234567890"));
}
#[test]
fn test_python_implicit_concatenation() {
let text = r#"api_key = "sk-" "live_" "abcdef123456""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_javascript_plus_concatenation() {
let text = r#"const key = "sk-" +
"test_" +
"secret123";"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-test_secret123"));
}
#[test]
fn test_javascript_template_literal() {
let text = r#"const key = `sk-proj-${id}abcdef123456`;"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-proj-"));
assert!(preprocessed.text.contains("abcdef123456"));
}
#[test]
fn test_go_string_concatenation() {
let text = r#"apiKey := "sk-" +
"live_" +
"abcdef123456""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_go_implicit_concatenation() {
let text = r#"apiKey := "sk-" "live_" "abcdef123456""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_java_plus_concatenation() {
let text = r#"String apiKey = "sk-" +
"live_" +
"abcdef123456";"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_csharp_plus_concatenation() {
let text = r#"var apiKey = "sk-" +
"live_" +
"abcdef123456";"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_ruby_concatenation() {
let text = r#"api_key = "sk-" \
+ "live_" \
+ "abcdef123456""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_rust_string_concatenation() {
let text = r#"let api_key = "sk-".to_string() +
"live_" +
"abcdef123456";"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live_abcdef123456"));
}
#[test]
fn test_multiline_openai_key() {
let text = r#"OPENAI_API_KEY = "sk-proj-" + \
"AbCdEfGhIjKlMnOpQrStUvWxYz" + \
"1234567890abcdefghij""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-proj-"));
assert!(preprocessed.text.contains("AbCdEfGhIjKlMnOpQrStUvWxYz"));
}
#[test]
fn test_line_mapping_basic() {
let text = "line1\nline2\nline3";
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
let line1 = preprocessed.line_for_offset(0);
assert_eq!(line1, Some(1));
}
#[test]
fn test_empty_input() {
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline("", &config);
assert!(preprocessed.text.is_empty());
assert!(preprocessed.mappings.is_empty());
}
#[test]
fn test_single_line_no_concatenation() {
let text = r#"api_key = "sk-abcdef123456""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-abcdef123456"));
}
#[test]
fn test_aws_key_multiline() {
let text = r#"AWS_ACCESS_KEY_ID = "AKIA" \
"IOSFODNN7EXAMPLE""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("AKIAIOSFODNN7EXAMPLE"));
}
#[test]
fn test_github_token_multiline() {
let text = r#"const token = "ghp_" +
"xxxxxxxxxxxxxxxxxxxx" +
"xxxxxxxxxxxxxxxxxxxx";"#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("ghp_"));
assert!(preprocessed.text.contains("xxxxxxxxxxxxxxxxxxxx"));
}
#[test]
fn test_slack_token_multiline() {
let text =
r#"slack_token = "xoxb-" "1234567890" "-" "1234567890" "-" "abcdefghijABCDEFGHIJklmn""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("xoxb-"));
assert!(preprocessed.text.contains("1234567890"));
}
#[test]
fn test_config_disables_features() {
let text = r#"key = "part1" + "part2""#;
let config = MultilineConfig {
plus_concatenation: false,
..Default::default()
};
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("part1"));
assert!(preprocessed.text.contains("part2"));
}
#[test]
fn test_single_line_plus_concatenation() {
let text = r#"token = "xoxb-1234567890-" + "1234567890-" + "abcdefghijABCDEFGHIJklmn""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
eprintln!("Input: {}", text);
eprintln!("Output: {}", preprocessed.text);
assert!(preprocessed.text.contains("xoxb-1234567890-"));
assert!(preprocessed.text.contains("1234567890-"));
assert!(preprocessed.text.contains("abcdefghijABCDEFGHIJklmn"));
}
#[test]
fn test_python_fstring_interpolation() {
let text = r#"key = f"sk-proj-{prefix}abcdef123456""#;
let content = extract_quoted_content(r#"f"sk-proj-{prefix}abcdef123456""#, '"', '"');
assert_eq!(
content.as_deref(),
Some("sk-proj-abcdef123456"),
"f-string interpolation should be stripped, literals preserved"
);
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-proj-"));
}
#[test]
fn test_python_fstring_multiline_concat() {
let text = r#"key = f"sk-proj-" + \
f"{org_id}abcdef123456""#;
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-proj-"));
assert!(preprocessed.text.contains("abcdef123456"));
}
#[test]
fn test_go_raw_literal_via_backtick() {
let text = "apiKey := `sk-live-abcdef123456`";
let config = MultilineConfig::default();
let preprocessed = preprocess_multiline(text, &config);
assert!(preprocessed.text.contains("sk-live-abcdef123456"));
}
}