1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
// SC2059: Don't use variables in the printf format string. Use printf '..%s..' "$foo"
//
// Using variables in printf format strings can lead to format string injection vulnerabilities.
// If the variable contains format specifiers like %s, %d, or %n, they will be interpreted
// by printf, potentially causing crashes, information leaks, or arbitrary code execution.
//
// Examples:
// Bad:
// printf "$format" "$value" // Format string injection
// printf "Value: $var\n" // Variable expansion in format
// printf "$msg" // Direct variable as format
//
// Good:
// printf '%s\n' "$value" // Literal format string
// printf 'Value: %s\n' "$var" // Literal format with %s
// printf '%s' "$msg" // Safe variable output
//
// Security Impact:
// - Format string vulnerabilities (arbitrary memory read/write)
// - Information disclosure
// - Denial of service (crashes)
// - Potential code execution in some implementations
//
// Note: Always use literal format strings with printf. Use %s to safely output variables.
use crate::linter::{Diagnostic, LintResult, Severity, Span};
use regex::Regex;
static PRINTF_WITH_VAR: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
// Match: printf "$var" or printf "...$var..."
Regex::new(r#"printf\s+(['"]?)(\$[a-zA-Z_][a-zA-Z0-9_]*|\$\{[a-zA-Z_][a-zA-Z0-9_]*\})"#)
.unwrap()
});
static PRINTF_WITH_EXPANSION: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
// Match: printf "...$var..." (variable in format string)
Regex::new(r#"printf\s+"[^"]*\$[a-zA-Z_][a-zA-Z0-9_]*"#).unwrap()
});
pub fn check(source: &str) -> LintResult {
let mut result = LintResult::new();
for (line_num, line) in source.lines().enumerate() {
let line_num = line_num + 1;
if line.trim_start().starts_with('#') {
continue;
}
// Check for printf with variable as format string
if let Some(mat) = PRINTF_WITH_VAR.find(line) {
let start_col = mat.start() + 1;
let end_col = mat.end() + 1;
let diagnostic = Diagnostic::new(
"SC2059",
Severity::Error,
"Don't use variables in the printf format string. Use printf '..%s..' \"$foo\""
.to_string(),
Span::new(line_num, start_col, line_num, end_col),
);
result.add(diagnostic);
}
// Check for printf with variable expansion in format string
if let Some(mat) = PRINTF_WITH_EXPANSION.find(line) {
// Skip if already caught by first pattern
if !PRINTF_WITH_VAR.is_match(line) {
let start_col = mat.start() + 1;
let end_col = mat.end() + 1;
let diagnostic = Diagnostic::new(
"SC2059",
Severity::Error,
"Don't use variables in the printf format string. Use printf '..%s..' \"$foo\""
.to_string(),
Span::new(line_num, start_col, line_num, end_col),
);
result.add(diagnostic);
}
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sc2059_variable_as_format() {
let code = r#"printf "$format" "value""#;
let result = check(code);
assert_eq!(result.diagnostics.len(), 1);
assert_eq!(result.diagnostics[0].code, "SC2059");
assert_eq!(result.diagnostics[0].severity, Severity::Error);
}
#[test]
fn test_sc2059_braced_variable() {
let code = r#"printf "${fmt}" "data""#;
let result = check(code);
assert_eq!(result.diagnostics.len(), 1);
}
#[test]
fn test_sc2059_variable_expansion_in_format() {
let code = r#"printf "Value: $var\n""#;
let result = check(code);
assert_eq!(result.diagnostics.len(), 1);
}
#[test]
fn test_sc2059_direct_variable() {
let code = r#"printf "$msg""#;
let result = check(code);
assert_eq!(result.diagnostics.len(), 1);
}
#[test]
fn test_sc2059_literal_format_ok() {
let code = r#"printf '%s\n' "$value""#;
let result = check(code);
// Literal format string is safe
assert_eq!(result.diagnostics.len(), 0);
}
#[test]
fn test_sc2059_literal_with_percent_ok() {
let code = r#"printf 'Value: %s\n' "$var""#;
let result = check(code);
// Literal format with %s placeholder is safe
assert_eq!(result.diagnostics.len(), 0);
}
#[test]
fn test_sc2059_no_variables_ok() {
let code = r#"printf 'Hello, World!\n'"#;
let result = check(code);
// No variables, safe
assert_eq!(result.diagnostics.len(), 0);
}
#[test]
fn test_sc2059_comment_ok() {
let code = r#"# printf "$format" "value""#;
let result = check(code);
assert_eq!(result.diagnostics.len(), 0);
}
#[test]
fn test_sc2059_single_quotes_ok() {
let code = r#"printf 'Format: %s' "$value""#;
let result = check(code);
// Single quotes prevent expansion, safe
assert_eq!(result.diagnostics.len(), 0);
}
#[test]
fn test_sc2059_multiple_args_with_literal() {
let code = r#"printf '%s %s\n' "$a" "$b""#;
let result = check(code);
// Literal format with multiple %s placeholders
assert_eq!(result.diagnostics.len(), 0);
}
// ===== Mutation Coverage Tests - Iteration 1 =====
// These 7 tests target the missed mutants from baseline (41.7% kill rate)
// All 7 missed mutants are arithmetic column calculation mutations
// Target: 90%+ kill rate (11/12 mutants caught)
#[test]
fn test_mutation_sc2059_printf_var_start_col_exact() {
// MUTATION: Line 53:41 - replace + with * in mat.start() + 1
// Tests PRINTF_WITH_VAR pattern start column calculation
let bash_code = "printf $fmt arg"; // $fmt starts at column 8
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
let span = result.diagnostics[0].span;
assert_eq!(
span.start_col, 1,
"Start column must use +1, not *1 (would be 0 with *)"
);
}
#[test]
fn test_mutation_sc2059_printf_var_end_col_exact() {
// MUTATION: Line 54:37 - replace + with * or -
// Tests PRINTF_WITH_VAR pattern end column calculation
let bash_code = "printf $fmt"; // $fmt ends at column 12
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
let span = result.diagnostics[0].span;
assert_eq!(span.end_col, 12, "End column must use +1, not *1 or -1");
}
#[test]
fn test_mutation_sc2059_printf_expansion_start_col_exact() {
// MUTATION: Line 71:45 - replace + with * in mat.start() + 1
// Tests PRINTF_WITH_EXPANSION pattern start column calculation
let bash_code = r#"printf "hello $name""#; // String starts at column 8
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
let span = result.diagnostics[0].span;
assert_eq!(
span.start_col, 1,
"Start column calculation must use +1, not *1"
);
}
#[test]
fn test_mutation_sc2059_printf_expansion_end_col_exact() {
// MUTATION: Line 72:41 - replace + with * or -
// Tests PRINTF_WITH_EXPANSION pattern end column calculation
// CRITICAL: Must use format where variable is INSIDE the string, not separate arg
// printf "$var" matches PRINTF_WITH_VAR (line 54), NOT PRINTF_WITH_EXPANSION (line 72)
// printf "text $var" matches PRINTF_WITH_EXPANSION only (after PRINTF_WITH_VAR check fails)
let bash_code = r#"printf "text $var""#; // Variable inside format string
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
let span = result.diagnostics[0].span;
// Pattern: printf "text $var" (18 chars total, positions 0-17)
// Regex matches: printf "text $var (17 chars, no closing quote)
// mat.end() = 17 (exclusive end, one past 'r')
// With +1: end_col = 18
// With *1: end_col = 17
assert_eq!(
span.end_col, 18,
"End column must use +1 (would be 17 with *1)"
);
}
#[test]
fn test_mutation_sc2059_line_num_calculation() {
// MUTATION: Line 45:33 - replace + with * in line_num + 1
// Tests line number calculation (0-indexed → 1-indexed)
let bash_code = "# comment\nprintf $var"; // printf on line 2
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
assert_eq!(
result.diagnostics[0].span.start_line, 2,
"Line number must use +1 (0-indexed → 1-indexed)"
);
}
#[test]
fn test_mutation_sc2059_column_positions_with_offset() {
// Tests column calculations with leading whitespace
// Verifies column arithmetic works correctly with offsets
let bash_code = " printf $fmt"; // $fmt starts at column 12
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
let span = result.diagnostics[0].span;
assert_eq!(span.start_col, 5, "Should account for leading spaces");
assert_eq!(span.end_col, 16, "End column should be start + length");
}
#[test]
fn test_mutation_sc2059_expansion_column_accuracy() {
// Tests PRINTF_WITH_EXPANSION pattern column accuracy
// Verifies span covers the entire format string
let bash_code = r#"printf "test $var""#;
let result = check(bash_code);
assert_eq!(result.diagnostics.len(), 1);
let span = result.diagnostics[0].span;
// Verify span covers the entire printf command
assert!(span.end_col > span.start_col, "End must be after start");
assert_eq!(span.start_col, 1, "Should start at printf");
}
// ===== Property-Based Tests - Arithmetic Invariants =====
// These property tests catch arithmetic mutations (+ → *, + → -) that unit tests miss
#[cfg(test)]
mod property_tests {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(proptest::test_runner::Config::with_cases(10))]
#[test]
fn prop_column_positions_always_valid(
var_name in "[a-z]{1,10}",
leading_spaces in 0usize..20
) {
// PROPERTY: Column positions must always be >= 1 (1-indexed)
let spaces = " ".repeat(leading_spaces);
let bash_code = format!("{}printf ${}", spaces, var_name);
let result = check(&bash_code);
if !result.diagnostics.is_empty() {
let span = &result.diagnostics[0].span;
// INVARIANT: Columns are 1-indexed, never 0 or negative
prop_assert!(span.start_col >= 1, "Start column must be >= 1, got {}", span.start_col);
prop_assert!(span.end_col >= 1, "End column must be >= 1, got {}", span.end_col);
// INVARIANT: End must be after start
prop_assert!(span.end_col > span.start_col,
"End col ({}) must be > start col ({})", span.end_col, span.start_col);
}
}
#[test]
fn prop_line_numbers_always_valid(
var_name in "[a-z]{1,10}",
comment_lines in prop::collection::vec("# comment.*", 0..5)
) {
// PROPERTY: Line numbers must always be >= 1 (1-indexed)
let mut bash_code = comment_lines.join("\n");
if !bash_code.is_empty() {
bash_code.push('\n');
}
bash_code.push_str(&format!("printf ${}", var_name));
let result = check(&bash_code);
if !result.diagnostics.is_empty() {
let span = &result.diagnostics[0].span;
// INVARIANT: Lines are 1-indexed, never 0 or negative
prop_assert!(span.start_line >= 1, "Line number must be >= 1, got {}", span.start_line);
prop_assert!(span.end_line >= 1, "Line number must be >= 1, got {}", span.end_line);
}
}
#[test]
fn prop_span_length_reasonable(
var_name in "[a-z]{1,10}"
) {
// PROPERTY: Span length should be reasonable (not negative, not huge)
let bash_code = format!("printf ${}", var_name);
let result = check(&bash_code);
if !result.diagnostics.is_empty() {
let span = &result.diagnostics[0].span;
let span_length = span.end_col.saturating_sub(span.start_col);
// INVARIANT: Span length must be positive and reasonable
prop_assert!(span_length > 0, "Span length must be > 0");
prop_assert!(span_length < 1000, "Span length {} seems unreasonable", span_length);
}
}
#[test]
fn prop_expansion_pattern_columns_valid(
var_name in "[a-z]{1,10}",
prefix in "[a-z ]{0,10}"
) {
// PROPERTY: PRINTF_WITH_EXPANSION pattern must have valid column positions
let bash_code = format!("printf \"{}${}\"", prefix, var_name);
let result = check(&bash_code);
if !result.diagnostics.is_empty() {
let span = &result.diagnostics[0].span;
// INVARIANT: Columns are 1-indexed and ordered correctly
prop_assert!(span.start_col >= 1);
prop_assert!(span.end_col > span.start_col);
// INVARIANT: Span should cover at least "printf \"$x\""
prop_assert!(span.end_col - span.start_col >= 10);
}
}
}
}
}