Skip to main content

perl_corpus/
codegen.rs

1//! Randomized Perl code generation utilities.
2
3use proptest::prelude::*;
4use proptest::strategy::BoxedStrategy;
5use proptest::test_runner::{Config, RngAlgorithm, TestRng, TestRunner};
6use rand::rngs::StdRng;
7use rand::seq::SliceRandom;
8use rand::{RngExt, SeedableRng};
9use std::time::{SystemTime, UNIX_EPOCH};
10
11use crate::r#gen;
12
13const DEFAULT_PREAMBLE: &str = "use strict;\nuse warnings;\n\n";
14
15/// Statement categories for randomized code generation.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17pub enum StatementKind {
18    /// Minimal valid statements (assignments, conditionals, subs).
19    Basic,
20    /// Package/subroutine declarations and method calls.
21    Declarations,
22    /// Object-oriented constructs (bless, inheritance, overload).
23    ObjectOriented,
24    /// qw(...) and related list constructs.
25    Qw,
26    /// Quote-like operators (q/qq/qx/qr).
27    QuoteLike,
28    /// Heredoc syntax in common contexts.
29    Heredoc,
30    /// Whitespace and comment stress cases.
31    Whitespace,
32    /// Loop control flow statements.
33    ControlFlow,
34    /// Format statements and sections.
35    Format,
36    /// Glob expressions and patterns.
37    Glob,
38    /// Tie/untie statements.
39    Tie,
40    /// I/O and filehandle statements.
41    Io,
42    /// Filetest operators and stacked checks.
43    Filetest,
44    /// Built-in function calls (pack/unpack, split/join, etc).
45    Builtins,
46    /// Map/grep/sort list operators.
47    ListOps,
48    /// Operator-focused expressions.
49    Expressions,
50    /// Regex match/substitution/transliteration.
51    Regex,
52    /// Parser ambiguity and stress cases.
53    Ambiguity,
54    /// Sigil-heavy variable and dereference patterns.
55    Sigils,
56    /// Compile-time phase blocks (BEGIN/CHECK/UNITCHECK/INIT/END).
57    Phasers,
58    /// Special variables and punctuation variables.
59    SpecialVars,
60}
61
62const STATEMENT_KINDS_ALL: [StatementKind; 21] = [
63    StatementKind::Basic,
64    StatementKind::Declarations,
65    StatementKind::ObjectOriented,
66    StatementKind::Qw,
67    StatementKind::QuoteLike,
68    StatementKind::Heredoc,
69    StatementKind::Whitespace,
70    StatementKind::ControlFlow,
71    StatementKind::Format,
72    StatementKind::Glob,
73    StatementKind::Tie,
74    StatementKind::Io,
75    StatementKind::Filetest,
76    StatementKind::Builtins,
77    StatementKind::ListOps,
78    StatementKind::Expressions,
79    StatementKind::Regex,
80    StatementKind::Ambiguity,
81    StatementKind::Sigils,
82    StatementKind::Phasers,
83    StatementKind::SpecialVars,
84];
85
86impl StatementKind {
87    /// Return all available statement kinds.
88    pub fn all() -> &'static [StatementKind] {
89        &STATEMENT_KINDS_ALL
90    }
91}
92
93/// Options for randomized Perl code generation.
94#[derive(Debug, Clone)]
95pub struct CodegenOptions {
96    /// Number of statements to generate.
97    pub statements: usize,
98    /// Seed for deterministic output.
99    pub seed: u64,
100    /// Optional preamble prepended to output (e.g., `use strict;`).
101    pub preamble: Option<String>,
102    /// Ensure each selected statement kind appears at least once when possible.
103    pub ensure_coverage: bool,
104    /// Statement kinds to include in generation.
105    pub kinds: Vec<StatementKind>,
106}
107
108impl Default for CodegenOptions {
109    fn default() -> Self {
110        Self {
111            statements: 20,
112            seed: default_seed(),
113            preamble: Some(DEFAULT_PREAMBLE.to_string()),
114            ensure_coverage: false,
115            kinds: StatementKind::all().to_vec(),
116        }
117    }
118}
119
120/// Generate random Perl code with a default statement count.
121pub fn generate_perl_code() -> String {
122    generate_perl_code_with_options(CodegenOptions::default())
123}
124
125/// Generate random Perl code with a specific statement count.
126pub fn generate_perl_code_with_statements(statements: usize) -> String {
127    generate_perl_code_with_options(CodegenOptions { statements, ..Default::default() })
128}
129
130/// Generate random Perl code with explicit statement count and seed.
131pub fn generate_perl_code_with_seed(statements: usize, seed: u64) -> String {
132    generate_perl_code_with_options(CodegenOptions { statements, seed, ..Default::default() })
133}
134
135/// Generate random Perl code with explicit options.
136pub fn generate_perl_code_with_options(options: CodegenOptions) -> String {
137    let mut rng = StdRng::seed_from_u64(options.seed);
138    let mut runner = TestRunner::new_with_rng(
139        Config::default(),
140        TestRng::from_seed(RngAlgorithm::ChaCha, &proptest_seed(options.seed)),
141    );
142    let strategies = build_strategies_for(&options.kinds);
143
144    let mut output = String::new();
145    if let Some(preamble) = options.preamble.as_deref() {
146        output.push_str(preamble);
147    }
148
149    if strategies.is_empty() || options.statements == 0 {
150        return output;
151    }
152
153    let indices = build_strategy_indices(
154        strategies.len(),
155        options.statements,
156        options.ensure_coverage,
157        &mut rng,
158    );
159
160    for (i, idx) in indices.into_iter().enumerate() {
161        let fallback = format!("my $var{} = {};", i, i);
162        let mut snippet = sample_strategy(&strategies[idx], &mut runner, &fallback);
163
164        if !snippet.ends_with('\n') {
165            snippet.push('\n');
166        }
167
168        output.push_str(&snippet);
169        output.push('\n');
170    }
171
172    output
173}
174
175fn default_seed() -> u64 {
176    SystemTime::now()
177        .duration_since(UNIX_EPOCH)
178        .unwrap_or_else(|_| std::time::Duration::from_secs(0))
179        .as_secs()
180}
181
182fn proptest_seed(seed: u64) -> [u8; 32] {
183    let mut bytes = [0u8; 32];
184    for (i, chunk) in bytes.chunks_mut(8).enumerate() {
185        let mixed = seed.wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
186        chunk.copy_from_slice(&mixed.to_le_bytes());
187    }
188    bytes
189}
190
191fn build_strategies_for(kinds: &[StatementKind]) -> Vec<BoxedStrategy<String>> {
192    let mut strategies = Vec::new();
193
194    for kind in kinds {
195        match kind {
196            StatementKind::Basic => strategies.push(basic_statement().boxed()),
197            StatementKind::Declarations => {
198                strategies.push(r#gen::declarations::declaration_in_context().boxed());
199            }
200            StatementKind::ObjectOriented => {
201                strategies.push(r#gen::object_oriented::object_oriented_in_context().boxed());
202            }
203            StatementKind::Qw => {
204                let qw = r#gen::qw::qw_in_context();
205                let constants = r#gen::qw::use_constant_qw().prop_map(|(src, _)| src);
206                strategies.push(prop_oneof![qw, constants].boxed());
207            }
208            StatementKind::QuoteLike => {
209                let quote = r#gen::quote_like::quote_like_single()
210                    .prop_map(|expr| format!("my $text = {};\n", expr));
211                strategies.push(quote.boxed());
212            }
213            StatementKind::Heredoc => strategies.push(r#gen::heredoc::heredoc_in_context().boxed()),
214            StatementKind::Whitespace => {
215                let whitespace = r#gen::whitespace::whitespace_stress_test();
216                let commented = r#gen::whitespace::commented_code();
217                strategies.push(prop_oneof![whitespace, commented].boxed());
218            }
219            StatementKind::ControlFlow => {
220                strategies.push(r#gen::control_flow::loop_with_control().boxed());
221            }
222            StatementKind::Format => {
223                strategies.push(r#gen::format_statements::format_statement().boxed());
224            }
225            StatementKind::Glob => strategies.push(r#gen::glob::glob_in_context().boxed()),
226            StatementKind::Tie => strategies.push(r#gen::tie::tie_in_context().boxed()),
227            StatementKind::Io => strategies.push(r#gen::io::io_in_context().boxed()),
228            StatementKind::Filetest => {
229                strategies.push(r#gen::filetest::filetest_in_context().boxed());
230            }
231            StatementKind::Builtins => {
232                strategies.push(r#gen::builtins::builtin_in_context().boxed());
233            }
234            StatementKind::ListOps => {
235                strategies.push(r#gen::list_ops::list_op_in_context().boxed());
236            }
237            StatementKind::Expressions => {
238                strategies.push(r#gen::expressions::expression_in_context().boxed());
239            }
240            StatementKind::Regex => {
241                strategies.push(r#gen::regex::regex_in_context().boxed());
242            }
243            StatementKind::Ambiguity => {
244                strategies.push(r#gen::ambiguity::ambiguity_in_context().boxed());
245            }
246            StatementKind::Sigils => {
247                strategies.push(r#gen::sigils::sigil_in_context().boxed());
248            }
249            StatementKind::Phasers => {
250                strategies.push(r#gen::phasers::phaser_block().boxed());
251            }
252            StatementKind::SpecialVars => {
253                strategies.push(r#gen::special_vars::special_vars_in_context().boxed());
254            }
255        }
256    }
257
258    strategies
259}
260
261fn build_strategy_indices(
262    strategy_len: usize,
263    statements: usize,
264    ensure_coverage: bool,
265    rng: &mut StdRng,
266) -> Vec<usize> {
267    if strategy_len == 0 || statements == 0 {
268        return Vec::new();
269    }
270
271    let mut indices = Vec::with_capacity(statements);
272
273    if ensure_coverage {
274        let mut all_indices: Vec<usize> = (0..strategy_len).collect();
275        all_indices.shuffle(rng);
276
277        if statements <= strategy_len {
278            indices.extend(all_indices.into_iter().take(statements));
279            return indices;
280        }
281
282        indices.extend(all_indices);
283    }
284
285    while indices.len() < statements {
286        indices.push(rng.random_range(0..strategy_len));
287    }
288
289    indices
290}
291
292fn sample_strategy(
293    strategy: &BoxedStrategy<String>,
294    runner: &mut TestRunner,
295    fallback: &str,
296) -> String {
297    match strategy.new_tree(runner) {
298        Ok(tree) => tree.current(),
299        Err(_) => fallback.to_string(),
300    }
301}
302
303fn basic_statement() -> impl Strategy<Value = String> {
304    prop_oneof![
305        Just("my $x = 1;".to_string()),
306        Just("my @items = (1, 2, 3);".to_string()),
307        Just("my %map = (a => 1, b => 2);".to_string()),
308        Just("sub add { return $_[0] + $_[1]; }".to_string()),
309        Just("if ($x) { print $x; }".to_string()),
310        Just("my $msg = \"hello\"; print $msg;".to_string()),
311    ]
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317    use std::collections::HashSet;
318
319    #[test]
320    fn generated_code_is_stable_for_seed() {
321        let first = generate_perl_code_with_seed(5, 42);
322        let second = generate_perl_code_with_seed(5, 42);
323        assert_eq!(first, second);
324    }
325
326    #[test]
327    fn codegen_respects_empty_kinds() {
328        let options = CodegenOptions {
329            statements: 5,
330            seed: 123,
331            preamble: None,
332            ensure_coverage: false,
333            kinds: Vec::new(),
334        };
335        let code = generate_perl_code_with_options(options);
336        assert!(code.is_empty());
337    }
338
339    #[test]
340    fn strategy_indices_cover_all_kinds_when_requested() {
341        let mut rng = StdRng::seed_from_u64(7);
342        let strategy_len = 6;
343        let statements = 30;
344
345        let indices = build_strategy_indices(strategy_len, statements, true, &mut rng);
346        assert_eq!(indices.len(), statements);
347
348        let unique: HashSet<usize> = indices.iter().copied().collect();
349        assert_eq!(
350            unique.len(),
351            strategy_len,
352            "expected all strategy indices to appear at least once"
353        );
354        assert!(unique.iter().all(|idx| *idx < strategy_len));
355    }
356
357    #[test]
358    fn strategy_indices_do_not_repeat_when_budget_is_smaller_than_kind_count() {
359        let mut rng = StdRng::seed_from_u64(11);
360        let strategy_len = 10;
361        let statements = 4;
362
363        let indices = build_strategy_indices(strategy_len, statements, true, &mut rng);
364        assert_eq!(indices.len(), statements);
365
366        let unique: HashSet<usize> = indices.iter().copied().collect();
367        assert_eq!(
368            unique.len(),
369            statements,
370            "expected unique indices when sampling fewer statements than kinds"
371        );
372    }
373
374    proptest! {
375        #[test]
376        fn prop_strategy_indices_stay_in_bounds(
377            strategy_len in 1usize..32,
378            statements in 1usize..128,
379            ensure_coverage in any::<bool>(),
380            seed in any::<u64>(),
381        ) {
382            let mut rng = StdRng::seed_from_u64(seed);
383            let indices = build_strategy_indices(strategy_len, statements, ensure_coverage, &mut rng);
384
385            prop_assert_eq!(indices.len(), statements);
386            prop_assert!(indices.iter().all(|idx| *idx < strategy_len));
387        }
388
389        #[test]
390        fn prop_strategy_indices_cover_all_when_requested(
391            strategy_len in 1usize..24,
392            extra_statements in 0usize..48,
393            seed in any::<u64>(),
394        ) {
395            let statements = strategy_len + extra_statements;
396            let mut rng = StdRng::seed_from_u64(seed);
397            let indices = build_strategy_indices(strategy_len, statements, true, &mut rng);
398
399            let mut seen = vec![false; strategy_len];
400            for idx in indices {
401                seen[idx] = true;
402            }
403
404            prop_assert!(seen.into_iter().all(|v| v));
405        }
406
407        #[test]
408        fn prop_seeded_codegen_is_deterministic_for_options(
409            statements in 0usize..40,
410            seed in any::<u64>(),
411            ensure_coverage in any::<bool>(),
412        ) {
413            let options = CodegenOptions {
414                statements,
415                seed,
416                preamble: Some("use strict;\n".to_string()),
417                ensure_coverage,
418                kinds: vec![
419                    StatementKind::Basic,
420                    StatementKind::ControlFlow,
421                    StatementKind::Regex,
422                ],
423            };
424
425            let first = generate_perl_code_with_options(options.clone());
426            let second = generate_perl_code_with_options(options);
427            prop_assert_eq!(first, second);
428        }
429
430        #[test]
431        fn prop_basic_only_codegen_emits_requested_statement_count(
432            statements in 0usize..50,
433            seed in any::<u64>(),
434        ) {
435            let code = generate_perl_code_with_options(CodegenOptions {
436                statements,
437                seed,
438                preamble: None,
439                ensure_coverage: true,
440                kinds: vec![StatementKind::Basic],
441            });
442
443            let non_empty_line_count = code.lines().filter(|line| !line.trim().is_empty()).count();
444            prop_assert_eq!(non_empty_line_count, statements);
445        }
446    }
447}