Skip to main content

keyhog_scanner/
multiline.rs

1//! Multi-line string concatenation preprocessor.
2//!
3//! Detects and joins string concatenation patterns across lines for multiple languages.
4//! This allows the scanner to detect secrets that are split across lines using various
5//! concatenation syntaxes.
6
7const MAX_MULTILINE_PREPROCESS_BYTES: usize = 2 * 1024 * 1024;
8const MAX_MULTILINE_LINE_BYTES: usize = 64 * 1024;
9
10/// A mapping from an offset in the joined text back to the original line number.
11#[derive(Debug, Clone)]
12/// Mapping from preprocessed offsets back to original line numbers.
13///
14/// # Examples
15///
16/// ```rust,ignore
17/// use keyhog_scanner::multiline::LineMapping;
18/// let _ = std::mem::size_of::<LineMapping>();
19/// ```
20pub struct LineMapping {
21    /// Start offset in the joined text (inclusive)
22    pub start_offset: usize,
23    /// End offset in the joined text (exclusive)
24    pub end_offset: usize,
25    /// Original line number (1-indexed)
26    pub line_number: usize,
27}
28
29/// Result of preprocessing text for multi-line concatenation.
30///
31/// The `text` field contains the **original text unchanged**, followed by any
32/// multiline-joined segments appended after a separator. This ensures:
33///
34/// 1. Structural regex patterns (`secret_key = "..."`) match in the original text.
35/// 2. Multiline-joined secrets (`"sk-proj-" + "abc..."`) match in the appended segments.
36/// 3. No double-scanning or heuristic thresholds are needed.
37///
38/// # Examples
39///
40/// ```rust
41/// use keyhog_scanner::multiline::PreprocessedText;
42///
43/// let value = PreprocessedText::passthrough("secret");
44/// assert_eq!(value.line_for_offset(0), Some(1));
45/// ```
46#[derive(Debug, Clone)]
47pub struct PreprocessedText {
48    /// Original text + appended multiline-joined segments
49    pub text: String,
50    /// Byte offset where the appended joined segments start (= original text length)
51    pub original_end: usize,
52    /// Mapping from offsets in text to original line numbers
53    pub mappings: Vec<LineMapping>,
54}
55
56impl PreprocessedText {
57    /// Get the original line number for a given offset in the joined text.
58    /// Map a byte offset in preprocessed text back to an original line number.
59    ///
60    /// # Examples
61    ///
62    /// ```rust
63    /// use keyhog_scanner::multiline::PreprocessedText;
64    ///
65    /// let value = PreprocessedText::passthrough("secret");
66    /// assert_eq!(value.line_for_offset(0), Some(1));
67    /// ```
68    pub fn line_for_offset(&self, offset: usize) -> Option<usize> {
69        self.mappings
70            .iter()
71            .find(|m| offset >= m.start_offset && offset < m.end_offset)
72            .map(|m| m.line_number)
73    }
74
75    /// Create a passthrough (no preprocessing) — one mapping per line.
76    /// Build a preprocessed representation with a one-line identity mapping.
77    ///
78    /// # Examples
79    ///
80    /// ```rust
81    /// use keyhog_scanner::multiline::PreprocessedText;
82    ///
83    /// let value = PreprocessedText::passthrough("secret");
84    /// assert_eq!(value.text, "secret");
85    /// ```
86    pub fn passthrough(text: &str) -> Self {
87        let mut mappings = Vec::new();
88        let mut offset = 0;
89        for (line_idx, line) in text.split('\n').enumerate() {
90            let end = offset + line.len();
91            mappings.push(LineMapping {
92                line_number: line_idx + 1, // 1-indexed
93                start_offset: offset,
94                end_offset: end + 1, // +1 for the \n
95            });
96            offset = end + 1; // skip past \n
97        }
98        if let Some(last) = mappings.last_mut() {
99            last.end_offset = text.len();
100        }
101        let original_end = text.len();
102        Self {
103            text: text.to_string(),
104            original_end,
105            mappings,
106        }
107    }
108}
109
110/// Configuration for multiline concatenation recovery.
111///
112/// # Examples
113///
114/// ```rust
115/// use keyhog_scanner::multiline::MultilineConfig;
116///
117/// let config = MultilineConfig::default();
118/// assert!(config.python_implicit);
119/// ```
120#[derive(Debug, Clone)]
121pub struct MultilineConfig {
122    /// Maximum number of lines to join in a single concatenation chain
123    pub max_join_lines: usize,
124    /// Whether to enable Python-style implicit concatenation
125    pub python_implicit: bool,
126    /// Whether to enable backslash line continuation
127    pub backslash_continuation: bool,
128    /// Whether to enable explicit concatenation with + operator
129    pub plus_concatenation: bool,
130    /// Whether to enable JavaScript template literal concatenation
131    pub template_literals: bool,
132}
133
134impl Default for MultilineConfig {
135    fn default() -> Self {
136        Self {
137            max_join_lines: 10,
138            python_implicit: true,
139            backslash_continuation: true,
140            plus_concatenation: true,
141            template_literals: true,
142        }
143    }
144}
145
146/// Check if text contains any concatenation indicators.
147pub(crate) fn has_concatenation_indicators(text: &str) -> bool {
148    // FAST PATH: skip structured data formats that never contain programming
149    // string concatenation.  YAML is intentionally NOT excluded because it is
150    // one of the highest-value secret formats and the multiline preprocessor
151    // must still see the original content for downstream scanning.
152    let trimmed = text.trim_start();
153    if trimmed.starts_with('{') || trimmed.starts_with('[') // JSON / TOML
154        || trimmed.starts_with("<?xml") || trimmed.starts_with('<')
155    // XML/HTML
156    {
157        return false;
158    }
159
160    // Only trigger multiline preprocessing when actual concatenation patterns exist:
161    // - `" +` or `' +` (string concat with +)
162    // - `" \` or `' \` (backslash continuation)
163    // - `` ` `` (template literals)
164    // - `paste0(` (R language)
165    // NOT just quotes or plus signs alone — those appear in every source file.
166    // Check for actual multi-line concatenation indicators:
167    // - `" +` or `' +` (explicit concat)
168    // - `" \` or `' \` (backslash continuation)
169    // - `"` followed by newline then `"` (implicit concat: Python, Go)
170    // - Template literals (backtick)
171    // - R paste0()
172    let bytes = text.as_bytes();
173    let has_explicit_concat = text.contains("\" +") || text.contains("' +");
174    let has_backslash_cont = text.contains("\" \\") || text.contains("' \\");
175    let has_template = memchr::memchr(b'`', bytes).is_some();
176    let has_paste = text.contains("paste0(");
177    // Implicit concat: adjacent strings `"..." "..."` or `"...\n "..."`
178    let has_implicit = bytes.windows(3).any(|w| {
179        // Same-line: `" "` or `' '`
180        (w[0] == b'"' && w[1] == b' ' && w[2] == b'"')
181            || (w[0] == b'\'' && w[1] == b' ' && w[2] == b'\'')
182            // Cross-line: `"\n "` or `"\n"`
183            || (w[0] == b'"' && w[1] == b'\n' && (w[2] == b'"' || w[2] == b' ' || w[2] == b'\t'))
184            || (w[0] == b'\'' && w[1] == b'\n' && (w[2] == b'\'' || w[2] == b' ' || w[2] == b'\t'))
185    });
186    if !has_explicit_concat && !has_backslash_cont && !has_template && !has_paste && !has_implicit {
187        return false;
188    }
189
190    // Look for programming concatenation patterns in a single pass.
191    for line in text.lines() {
192        let t = line.trim();
193
194        // Line ends with + or starts with + (multi-line concat)
195        if t.ends_with('+') || t.starts_with('+') || t.starts_with("+ ") {
196            return true;
197        }
198        if t.contains("paste0(") || t.contains("paste(") {
199            return true;
200        }
201        // "str" + "str" pattern mid-line (single-line concat)
202        if t.contains("\" +") || t.contains("' +") || t.contains("+ \"") || t.contains("+ '") {
203            return true;
204        }
205        // Line ends with \ (line continuation)
206        if t.ends_with('\\') && !t.ends_with("\\\\") {
207            return true;
208        }
209        if t.contains("\" \"") || t.contains("' '") {
210            return true;
211        }
212        if t.ends_with('`') && t.matches('`').count() == 1 {
213            return true;
214        }
215    }
216
217    false
218}
219
220/// Preprocess text to join multi-line string concatenations.
221///
222/// This function detects various concatenation patterns across multiple languages:
223/// - Python: implicit concatenation of adjacent strings, backslash continuation
224/// - JavaScript/TypeScript: + operator, template literals, backslash continuation
225/// - Ruby: + operator, backslash continuation, line continuation without operator
226/// - Go: implicit concatenation of adjacent strings, + operator
227/// - Rust: + operator for strings, implicit array concatenation (less common)
228/// - Java/C#: + operator for string concatenation
229///
230/// Returns the preprocessed text with a mapping from joined offsets back to original line numbers.
231/// Join adjacent string fragments and continuations before scanning.
232///
233/// # Examples
234///
235/// ```rust
236/// use keyhog_scanner::multiline::{MultilineConfig, preprocess_multiline};
237///
238/// let value = preprocess_multiline("\"abc\" + \"123\"", &MultilineConfig::default());
239/// assert!(value.text.contains("abc123"));
240/// ```
241pub fn preprocess_multiline(text: &str, config: &MultilineConfig) -> PreprocessedText {
242    if text.len() > MAX_MULTILINE_PREPROCESS_BYTES
243        || text
244            .lines()
245            .any(|line| line.len() > MAX_MULTILINE_LINE_BYTES)
246    {
247        return passthrough_text(text);
248    }
249
250    // Fast path: skip preprocessing if no concatenation indicators present
251    if !has_concatenation_indicators(text) {
252        return passthrough_text(text);
253    }
254    let lines: Vec<&str> = text.lines().collect();
255    if lines.is_empty() {
256        return PreprocessedText {
257            text: String::new(),
258            original_end: 0,
259            mappings: Vec::new(),
260        };
261    }
262
263    // Fast path: content that starts with { or [ is likely JSON/data — pass through.
264    // The multiline preprocessor's string extraction mangles JSON structure.
265    let first_nonwhite = text.trim_start().chars().next().unwrap_or(' ');
266    if first_nonwhite == '{' || first_nonwhite == '[' {
267        return passthrough_text(text);
268    }
269
270    let mut result_lines: Vec<String> = Vec::new();
271    let mut mappings: Vec<LineMapping> = Vec::new();
272    let mut current_offset: usize = 0;
273
274    let mut i = 0;
275    while i < lines.len() {
276        let (joined_line, lines_consumed, line_mappings) =
277            process_line_chain(&lines, i, config, current_offset);
278
279        if !joined_line.is_empty() {
280            // Track the mapping for this joined line
281            let total_len = joined_line.len();
282            for mapping in line_mappings {
283                mappings.push(mapping);
284            }
285            current_offset += total_len + 1; // +1 for newline
286        }
287
288        result_lines.push(joined_line);
289        i += lines_consumed.max(1);
290    }
291
292    let joined_text = result_lines.join("\n");
293
294    // Build the final text: original text + separator + joined segments.
295    // This ensures structural patterns match in the original, AND multiline-joined
296    // secrets match in the appended segments. No double-scanning needed.
297    let original_end = text.len();
298    let mut final_text = text.to_string();
299
300    // Only append joined segments that differ from the original lines
301    // (i.e., segments that were actually joined from multiple lines)
302    if joined_text != text && !joined_text.is_empty() {
303        final_text.push('\n');
304        final_text.push_str(&joined_text);
305
306        // Remap the appended joined text offsets
307        let append_start = original_end + 1; // +1 for the separator newline
308        for mapping in &mut mappings {
309            mapping.start_offset += append_start;
310            mapping.end_offset += append_start;
311        }
312    }
313
314    // Build mappings for the ORIGINAL text (first part)
315    let mut original_mappings = Vec::new();
316    let mut offset = 0;
317    for (line_idx, line) in text.split('\n').enumerate() {
318        let end = offset + line.len();
319        original_mappings.push(LineMapping {
320            line_number: line_idx + 1,
321            start_offset: offset,
322            end_offset: (end + 1).min(original_end),
323        });
324        offset = end + 1;
325    }
326
327    // Combine: original mappings first, then joined mappings
328    original_mappings.extend(mappings);
329
330    PreprocessedText {
331        text: final_text,
332        original_end,
333        mappings: original_mappings,
334    }
335}
336
337fn passthrough_text(text: &str) -> PreprocessedText {
338    let mut mappings = Vec::new();
339    let mut offset = 0;
340    for (i, line) in text.lines().enumerate() {
341        mappings.push(LineMapping {
342            line_number: i + 1,
343            start_offset: offset,
344            end_offset: offset + line.len(),
345        });
346        offset += line.len() + 1;
347    }
348    let original_end = text.len();
349    PreprocessedText {
350        text: text.to_string(),
351        original_end,
352        mappings,
353    }
354}
355
356/// Process a potential chain of concatenated lines starting at the given index.
357/// Returns (joined_line, number_of_lines_consumed, line_mappings).
358fn process_line_chain(
359    lines: &[&str],
360    start_idx: usize,
361    config: &MultilineConfig,
362    base_offset: usize,
363) -> (String, usize, Vec<LineMapping>) {
364    let mut joined_parts: Vec<String> = Vec::new();
365    let mut line_mappings: Vec<LineMapping> = Vec::new();
366    let mut current_idx = start_idx;
367    let mut current_offset = base_offset;
368    // Track the original starting line for the entire joined result
369    let original_start_line = start_idx + 1;
370
371    while current_idx < lines.len() && (current_idx - start_idx) < config.max_join_lines {
372        let line = lines[current_idx];
373        let line_number = current_idx + 1;
374
375        // Check if this line continues a concatenation chain
376        let (part, continues, continuation_type) =
377            extract_string_part(line, config, current_idx > start_idx);
378
379        if current_idx == start_idx {
380            // First line in the chain
381            if !part.is_empty() {
382                let part_start = current_offset;
383                let part_len = part.len();
384                joined_parts.push(part);
385                line_mappings.push(LineMapping {
386                    start_offset: part_start,
387                    end_offset: part_start + part_len,
388                    line_number,
389                });
390                current_offset += part_len;
391            }
392
393            // If first line doesn't continue, we're done
394            if !continues {
395                break;
396            }
397        } else {
398            // Subsequent line in a chain
399            if continuation_type == ContinuationType::Backslash {
400                // Backslash continuation: the entire line continues
401                // We need to handle the case where the backslash continues
402                // but there might be string content before it
403                if !part.is_empty() {
404                    let part_start = current_offset;
405                    let part_len = part.len();
406                    joined_parts.push(part);
407                    line_mappings.push(LineMapping {
408                        start_offset: part_start,
409                        end_offset: part_start + part_len,
410                        line_number,
411                    });
412                    current_offset += part_len;
413                }
414            } else if continuation_type == ContinuationType::PlusOperator
415                || continuation_type == ContinuationType::Implicit
416            {
417                // + operator or implicit concatenation
418                if !part.is_empty() {
419                    let part_start = current_offset;
420                    let part_len = part.len();
421                    joined_parts.push(part);
422                    line_mappings.push(LineMapping {
423                        start_offset: part_start,
424                        end_offset: part_start + part_len,
425                        line_number,
426                    });
427                    current_offset += part_len;
428                }
429            } else if !part.is_empty() {
430                let part_start = current_offset;
431                let part_len = part.len();
432                joined_parts.push(part);
433                line_mappings.push(LineMapping {
434                    start_offset: part_start,
435                    end_offset: part_start + part_len,
436                    line_number,
437                });
438                current_offset += part_len;
439            }
440
441            if !continues {
442                break;
443            }
444        }
445
446        current_idx += 1;
447    }
448
449    let joined = joined_parts.join("");
450
451    // Create a single mapping entry for the entire joined line
452    // pointing to the original starting line
453    let final_mappings = if joined.is_empty() {
454        Vec::new()
455    } else {
456        vec![LineMapping {
457            start_offset: base_offset,
458            end_offset: base_offset + joined.len(),
459            line_number: original_start_line,
460        }]
461    };
462
463    let lines_consumed = (current_idx - start_idx) + 1;
464    (joined, lines_consumed, final_mappings)
465}
466
467#[derive(Debug, PartialEq)]
468enum ContinuationType {
469    None,
470    Backslash,
471    PlusOperator,
472    Implicit,
473    TemplateLiteral,
474}
475
476/// Extract the string part from a line and determine if it continues.
477/// Returns (extracted_part, continues, continuation_type).
478fn extract_string_part(
479    line: &str,
480    config: &MultilineConfig,
481    _is_continuation: bool,
482) -> (String, bool, ContinuationType) {
483    let trimmed = line.trim();
484
485    // Check for backslash continuation at end of line.
486    // Only treat a single trailing `\` as continuation — `\\` (escaped backslash)
487    // is a literal backslash, NOT a line continuation.
488    if config.backslash_continuation && trimmed.ends_with('\\') && !trimmed.ends_with("\\\\") {
489        // Strip exactly one trailing backslash (not all of them).
490        let without_backslash = line
491            .trim_end()
492            .strip_suffix('\\')
493            .unwrap_or(line)
494            .trim_end();
495        let part = extract_string_content(without_backslash);
496        return (part, true, ContinuationType::Backslash);
497    }
498
499    // Check for + operator continuation
500    if config.plus_concatenation {
501        // Match patterns like: "str" + or 'str' + or var + "str"
502        if let Some((part, continues)) = extract_plus_concatenation(line) {
503            return (part, continues, ContinuationType::PlusOperator);
504        }
505    }
506
507    if let Some((part, continues)) = extract_function_concatenation(line) {
508        return (part, continues, ContinuationType::Implicit);
509    }
510
511    // Check for Python-style implicit concatenation
512    if config.python_implicit
513        && let Some((part, continues)) = extract_python_implicit_concatenation(line)
514    {
515        return (part, continues, ContinuationType::Implicit);
516    }
517
518    // NOTE: Parenthesized implicit concatenation (Python `key = ("str"\n"str")`)
519    // is not yet supported. It requires a state machine to track parenthesis depth
520    // across lines, which the current line-by-line architecture doesn't support.
521    // This is a known limitation — tracked for a future refactor.
522
523    // Check for template literal continuation (JavaScript)
524    if config.template_literals
525        && let Some((part, continues)) = extract_template_literal_continuation(line)
526    {
527        return (part, continues, ContinuationType::TemplateLiteral);
528    }
529
530    // Regular line — pass through UNCHANGED.
531    // Only concatenation chains get transformed.
532    (line.to_string(), false, ContinuationType::None)
533}
534
535/// Extract string content handling various quote types.
536fn extract_string_content(line: &str) -> String {
537    let trimmed = line.trim();
538
539    // Try to extract content from quoted strings
540    // Handle single quotes, double quotes, and backticks
541    for (open, close) in [('"', '"'), ('\'', '\''), ('`', '`')] {
542        if let Some(content) = extract_quoted_content(trimmed, open, close) {
543            return content;
544        }
545    }
546
547    // If no quoted content found, return the trimmed line
548    // but filter out common non-secret parts
549    filter_line_content(trimmed)
550}
551
552/// Extract content between matching quotes.
553///
554/// Handles Python f-string interpolation `{expr}` by skipping the
555/// interpolated expression and preserving the literal fragments.
556/// This ensures `f"sk-{prefix}abcdef123"` extracts `sk-abcdef123`.
557fn extract_quoted_content(s: &str, open: char, close: char) -> Option<String> {
558    let mut chars = s.chars().peekable();
559
560    // Skip leading non-quote characters (like variable names, operators,
561    // or f-string prefixes like `f`, `r`, `b`, `rf`, `br`)
562    let mut is_fstring = false;
563    while let Some(&ch) = chars.peek() {
564        if ch == open {
565            break;
566        }
567        if ch == 'f' || ch == 'F' {
568            is_fstring = true;
569        }
570        chars.next();
571    }
572
573    // Check for opening quote
574    if chars.next() != Some(open) {
575        return None;
576    }
577
578    let mut content = String::new();
579    let mut escaped = false;
580
581    while let Some(ch) = chars.next() {
582        if escaped {
583            content.push(ch);
584            escaped = false;
585        } else if ch == '\\' {
586            escaped = true;
587            content.push(ch);
588        } else if ch == close {
589            return Some(content);
590        } else if is_fstring && ch == '{' && chars.peek() != Some(&'{') {
591            // Skip f-string interpolation `{expr}`, preserving surrounding literals.
592            // Double-brace `{{` is a literal `{` in f-strings, not interpolation.
593            let mut brace_depth = 1;
594            for c in chars.by_ref() {
595                if c == '{' {
596                    brace_depth += 1;
597                } else if c == '}' {
598                    brace_depth -= 1;
599                    if brace_depth == 0 {
600                        break;
601                    }
602                }
603            }
604        } else {
605            content.push(ch);
606        }
607    }
608
609    None // Unclosed string
610}
611
612/// Filter line content to extract potential secret material.
613fn filter_line_content(line: &str) -> String {
614    // Remove common assignment operators and variable names
615    let line = line
616        .trim_start_matches("const ")
617        .trim_start_matches("let ")
618        .trim_start_matches("var ")
619        .trim_start_matches("val ")
620        .trim_start_matches("final ")
621        .trim_start_matches("static ")
622        .trim_start_matches("string ")
623        .trim_start_matches("String ")
624        .trim_start_matches("auto ")
625        .trim_start_matches("dim ")
626        .trim_start_matches("my ");
627
628    // Remove assignment operators
629    if let Some(pos) = line.find(" = ") {
630        let after_assign = &line[pos + 3..];
631        return after_assign.trim().to_string();
632    }
633
634    if let Some(pos) = line.find("= ") {
635        let after_assign = &line[pos + 2..];
636        return after_assign.trim().to_string();
637    }
638
639    if let Some(pos) = line.find('=') {
640        let after_assign = &line[pos + 1..];
641        return after_assign.trim().to_string();
642    }
643
644    line.to_string()
645}
646
647/// Extract content from a + operator concatenation.
648/// Handles multiple + operators on the same line.
649/// Returns (extracted_part, continues).
650fn extract_plus_concatenation(line: &str) -> Option<(String, bool)> {
651    let trimmed = line.trim();
652
653    // Pattern: ... + "string" or ... + 'string' or ... + `string`
654    // or: "string" + ...
655
656    // Check if line ends with + (indicates continuation)
657    let ends_with_plus = trimmed.ends_with('+');
658
659    // Check if line has any + operators
660    if !trimmed.contains('+') {
661        return None;
662    }
663
664    // Split by + and extract string content from each part
665    let parts: Vec<&str> = trimmed.split('+').collect();
666    if parts.len() < 2 {
667        return None;
668    }
669
670    let mut result = String::new();
671    for part in &parts {
672        let content = extract_string_content(part.trim());
673        if !content.is_empty() {
674            result.push_str(&content);
675        }
676    }
677
678    Some((result, ends_with_plus))
679}
680
681/// Extract content from Python-style implicit concatenation.
682/// Returns (extracted_part, continues).
683fn extract_python_implicit_concatenation(line: &str) -> Option<(String, bool)> {
684    let parts = extract_quoted_strings(line);
685
686    if parts.is_empty() {
687        return None;
688    }
689
690    // Join all adjacent string parts
691    let joined = parts.join("");
692    Some((joined, false))
693}
694
695fn extract_function_concatenation(line: &str) -> Option<(String, bool)> {
696    let trimmed = line.trim();
697    if !trimmed.contains("paste0(") && !trimmed.contains("paste(") {
698        return None;
699    }
700
701    let parts = extract_quoted_strings(trimmed);
702    if parts.len() < 2 {
703        return None;
704    }
705
706    Some((parts.join(""), false))
707}
708
709fn extract_quoted_strings(line: &str) -> Vec<String> {
710    let mut parts = Vec::new();
711    let mut i = 0;
712    let chars: Vec<char> = line.chars().collect();
713
714    while i < chars.len() {
715        if chars[i] == '"' || chars[i] == '\'' {
716            let quote = chars[i];
717            let mut j = i + 1;
718            let mut content = String::new();
719            let mut escaped = false;
720
721            while j < chars.len() {
722                if escaped {
723                    content.push(chars[j]);
724                    escaped = false;
725                } else if chars[j] == '\\' {
726                    escaped = true;
727                    content.push(chars[j]);
728                } else if chars[j] == quote {
729                    parts.push(content);
730                    i = j;
731                    break;
732                } else {
733                    content.push(chars[j]);
734                }
735                j += 1;
736            }
737        }
738        i += 1;
739    }
740
741    parts
742}
743
744/// Extract content from JavaScript template literal continuation.
745/// Returns (extracted_part, continues).
746fn extract_template_literal_continuation(line: &str) -> Option<(String, bool)> {
747    let trimmed = line.trim();
748
749    // Check if this is a template literal that continues
750    // Template literals use backticks: `content ${...} content`
751
752    if !trimmed.contains('`') {
753        return None;
754    }
755
756    // Check for continuation pattern: line ends without closing backtick
757    let backtick_count = trimmed.chars().filter(|&c| c == '`').count();
758
759    // If odd number of backticks, the template literal is unclosed
760    let continues = backtick_count % 2 == 1;
761
762    // Extract content between backticks
763    let mut result = String::new();
764    let mut in_template = false;
765    let mut chars = trimmed.chars().peekable();
766
767    while let Some(ch) = chars.next() {
768        if ch == '`' {
769            in_template = !in_template;
770            continue;
771        }
772        if in_template && ch == '$' && chars.peek() == Some(&'{') {
773            // Skip interpolation
774            chars.next(); // consume '{'
775            let mut brace_depth = 1;
776            for c in chars.by_ref() {
777                if c == '{' {
778                    brace_depth += 1;
779                } else if c == '}' {
780                    brace_depth -= 1;
781                    if brace_depth == 0 {
782                        break;
783                    }
784                }
785            }
786            continue;
787        }
788        if in_template {
789            result.push(ch);
790        }
791    }
792
793    Some((result, continues))
794}
795
796#[cfg(test)]
797mod tests {
798    use super::*;
799
800    #[test]
801    fn test_python_backslash_continuation() {
802        let text = r#"key = 'sk-proj-' + \
803    'abcdef1234567890'"#;
804
805        let config = MultilineConfig::default();
806        let preprocessed = preprocess_multiline(text, &config);
807
808        assert!(preprocessed.text.contains("sk-proj-"));
809        assert!(preprocessed.text.contains("abcdef1234567890"));
810        assert!(preprocessed.text.contains("sk-proj-abcdef1234567890"));
811    }
812
813    #[test]
814    fn test_python_implicit_concatenation() {
815        let text = r#"api_key = "sk-" "live_" "abcdef123456""#;
816
817        let config = MultilineConfig::default();
818        let preprocessed = preprocess_multiline(text, &config);
819
820        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
821    }
822
823    #[test]
824    fn test_javascript_plus_concatenation() {
825        let text = r#"const key = "sk-" +
826    "test_" +
827    "secret123";"#;
828
829        let config = MultilineConfig::default();
830        let preprocessed = preprocess_multiline(text, &config);
831
832        assert!(preprocessed.text.contains("sk-test_secret123"));
833    }
834
835    #[test]
836    fn test_javascript_template_literal() {
837        // Template literals with interpolation - each part is extracted
838        // Template literals that continue with backslash or have content after interpolation
839        let text = r#"const key = `sk-proj-${id}abcdef123456`;"#;
840
841        let config = MultilineConfig::default();
842        let preprocessed = preprocess_multiline(text, &config);
843
844        assert!(preprocessed.text.contains("sk-proj-"));
845        assert!(preprocessed.text.contains("abcdef123456"));
846    }
847
848    #[test]
849    fn test_go_string_concatenation() {
850        let text = r#"apiKey := "sk-" +
851    "live_" +
852    "abcdef123456""#;
853
854        let config = MultilineConfig::default();
855        let preprocessed = preprocess_multiline(text, &config);
856
857        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
858    }
859
860    #[test]
861    fn test_go_implicit_concatenation() {
862        let text = r#"apiKey := "sk-" "live_" "abcdef123456""#;
863
864        let config = MultilineConfig::default();
865        let preprocessed = preprocess_multiline(text, &config);
866
867        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
868    }
869
870    #[test]
871    fn test_java_plus_concatenation() {
872        let text = r#"String apiKey = "sk-" +
873    "live_" +
874    "abcdef123456";"#;
875
876        let config = MultilineConfig::default();
877        let preprocessed = preprocess_multiline(text, &config);
878
879        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
880    }
881
882    #[test]
883    fn test_csharp_plus_concatenation() {
884        let text = r#"var apiKey = "sk-" +
885    "live_" +
886    "abcdef123456";"#;
887
888        let config = MultilineConfig::default();
889        let preprocessed = preprocess_multiline(text, &config);
890
891        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
892    }
893
894    #[test]
895    fn test_ruby_concatenation() {
896        let text = r#"api_key = "sk-" \
897    + "live_" \
898    + "abcdef123456""#;
899
900        let config = MultilineConfig::default();
901        let preprocessed = preprocess_multiline(text, &config);
902
903        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
904    }
905
906    #[test]
907    fn test_rust_string_concatenation() {
908        let text = r#"let api_key = "sk-".to_string() +
909    "live_" +
910    "abcdef123456";"#;
911
912        let config = MultilineConfig::default();
913        let preprocessed = preprocess_multiline(text, &config);
914
915        assert!(preprocessed.text.contains("sk-live_abcdef123456"));
916    }
917
918    #[test]
919    fn test_multiline_openai_key() {
920        // Real-world pattern: OpenAI API key split across lines
921        let text = r#"OPENAI_API_KEY = "sk-proj-" + \
922    "AbCdEfGhIjKlMnOpQrStUvWxYz" + \
923    "1234567890abcdefghij""#;
924
925        let config = MultilineConfig::default();
926        let preprocessed = preprocess_multiline(text, &config);
927
928        assert!(preprocessed.text.contains("sk-proj-"));
929        assert!(preprocessed.text.contains("AbCdEfGhIjKlMnOpQrStUvWxYz"));
930    }
931
932    #[test]
933    fn test_line_mapping_basic() {
934        let text = "line1\nline2\nline3";
935        let config = MultilineConfig::default();
936        let preprocessed = preprocess_multiline(text, &config);
937
938        let line1 = preprocessed.line_for_offset(0);
939        assert_eq!(line1, Some(1));
940    }
941
942    #[test]
943    fn test_empty_input() {
944        let config = MultilineConfig::default();
945        let preprocessed = preprocess_multiline("", &config);
946
947        assert!(preprocessed.text.is_empty());
948        assert!(preprocessed.mappings.is_empty());
949    }
950
951    #[test]
952    fn test_single_line_no_concatenation() {
953        let text = r#"api_key = "sk-abcdef123456""#;
954        let config = MultilineConfig::default();
955        let preprocessed = preprocess_multiline(text, &config);
956
957        assert!(preprocessed.text.contains("sk-abcdef123456"));
958    }
959
960    #[test]
961    fn test_aws_key_multiline() {
962        // AWS key split with backslash continuation
963        let text = r#"AWS_ACCESS_KEY_ID = "AKIA" \
964    "IOSFODNN7EXAMPLE""#;
965
966        let config = MultilineConfig::default();
967        let preprocessed = preprocess_multiline(text, &config);
968
969        assert!(preprocessed.text.contains("AKIAIOSFODNN7EXAMPLE"));
970    }
971
972    #[test]
973    fn test_github_token_multiline() {
974        // GitHub token split with + operator
975        let text = r#"const token = "ghp_" +
976    "xxxxxxxxxxxxxxxxxxxx" +
977    "xxxxxxxxxxxxxxxxxxxx";"#;
978
979        let config = MultilineConfig::default();
980        let preprocessed = preprocess_multiline(text, &config);
981
982        assert!(preprocessed.text.contains("ghp_"));
983        assert!(preprocessed.text.contains("xxxxxxxxxxxxxxxxxxxx"));
984    }
985
986    #[test]
987    fn test_slack_token_multiline() {
988        // Slack token with implicit concatenation
989        let text =
990            r#"slack_token = "xoxb-" "1234567890" "-" "1234567890" "-" "abcdefghijABCDEFGHIJklmn""#;
991
992        let config = MultilineConfig::default();
993        let preprocessed = preprocess_multiline(text, &config);
994
995        assert!(preprocessed.text.contains("xoxb-"));
996        assert!(preprocessed.text.contains("1234567890"));
997    }
998
999    #[test]
1000    fn test_config_disables_features() {
1001        let text = r#"key = "part1" + "part2""#;
1002
1003        // With plus concatenation disabled
1004        let config = MultilineConfig {
1005            plus_concatenation: false,
1006            ..Default::default()
1007        };
1008        let preprocessed = preprocess_multiline(text, &config);
1009
1010        assert!(preprocessed.text.contains("part1"));
1011        assert!(preprocessed.text.contains("part2"));
1012    }
1013
1014    #[test]
1015    fn test_single_line_plus_concatenation() {
1016        // Test single-line + concatenation (like JS/Python inline string joining)
1017        let text = r#"token = "xoxb-1234567890-" + "1234567890-" + "abcdefghijABCDEFGHIJklmn""#;
1018
1019        let config = MultilineConfig::default();
1020        let preprocessed = preprocess_multiline(text, &config);
1021
1022        eprintln!("Input: {}", text);
1023        eprintln!("Output: {}", preprocessed.text);
1024
1025        assert!(preprocessed.text.contains("xoxb-1234567890-"));
1026        assert!(preprocessed.text.contains("1234567890-"));
1027        assert!(preprocessed.text.contains("abcdefghijABCDEFGHIJklmn"));
1028    }
1029
1030    #[test]
1031    fn test_python_fstring_interpolation() {
1032        // Python f-string: interpolation should be skipped, literal parts preserved
1033        let text = r#"key = f"sk-proj-{prefix}abcdef123456""#;
1034        let content = extract_quoted_content(r#"f"sk-proj-{prefix}abcdef123456""#, '"', '"');
1035        assert_eq!(
1036            content.as_deref(),
1037            Some("sk-proj-abcdef123456"),
1038            "f-string interpolation should be stripped, literals preserved"
1039        );
1040
1041        let config = MultilineConfig::default();
1042        let preprocessed = preprocess_multiline(text, &config);
1043        // The original text should still be present
1044        assert!(preprocessed.text.contains("sk-proj-"));
1045    }
1046
1047    #[test]
1048    fn test_python_fstring_multiline_concat() {
1049        // f-string split with + operator
1050        let text = r#"key = f"sk-proj-" + \
1051    f"{org_id}abcdef123456""#;
1052
1053        let config = MultilineConfig::default();
1054        let preprocessed = preprocess_multiline(text, &config);
1055
1056        assert!(preprocessed.text.contains("sk-proj-"));
1057        assert!(preprocessed.text.contains("abcdef123456"));
1058    }
1059
1060    #[test]
1061    fn test_go_raw_literal_via_backtick() {
1062        // Go raw string literals use backticks — handled by template literal extractor
1063        let text = "apiKey := `sk-live-abcdef123456`";
1064
1065        let config = MultilineConfig::default();
1066        let preprocessed = preprocess_multiline(text, &config);
1067
1068        assert!(preprocessed.text.contains("sk-live-abcdef123456"));
1069    }
1070}