rable 0.2.0

A Rust implementation of the Parable bash parser — complete GNU Bash 5.3-compatible parsing with Python bindings
Documentation
"""Generate test cases from bash-oracle fuzzing.

Runs the mutation fuzzer, filters to cases where both Rable and bash-oracle
succeed but produce different output, deduplicates by category, and writes
.tests files in tests/oracle/.
"""

import os
import random
import sys

sys.path.insert(0, os.path.dirname(__file__))
import fuzz

ORACLE_DIR = os.path.join(os.path.dirname(__file__), "oracle")


def categorize(inp, oracle_out, rable_out):
    """Assign a rough category based on the diff pattern."""
    o, r = oracle_out, rable_out

    # Top-level separation: oracle has newlines between nodes, rable wraps in (semi)/(background)
    if "\n" in o and "(semi" in r and "\n" not in r:
        return "top_level_separation"
    if "\n" in o and "(background" in r and "\n" not in r:
        return "top_level_separation"

    # Array normalization
    if "=(" in inp:
        return "array_normalization"

    # ANSI-C escape differences
    if "$'" in inp and ("$'" in r) != ("$'" in o):
        return "ansi_c_processing"
    if "\\x" in inp or "\\U" in inp or "\\c" in inp or "\\\\" in inp:
        if o != r:
            return "ansi_c_escapes"

    # Conditional associativity
    if "cond-and" in o and "cond-and" in r and o != r:
        return "conditional_assoc"

    # Word boundary / bracket words
    if "[" in inp and "word" in o and "word" in r:
        o_words = o.count("(word")
        r_words = r.count("(word")
        if o_words != r_words:
            return "word_boundaries"

    # Cmdsub reformatting differences
    if "$(" in inp:
        return "cmdsub_formatting"

    # Process sub formatting
    if "<(" in inp or ">(" in inp:
        return "procsub_formatting"

    # Heredoc differences
    if "<<" in inp:
        return "heredoc_formatting"

    # Redirect differences
    if ">" in o and ">" in r:
        return "redirect_formatting"

    # Locale strings
    if '$"' in inp:
        return "locale_strings"

    return "other"


def main():
    if not fuzz.HAS_ORACLE:
        print("bash-oracle not found. Set BASH_ORACLE env var.")
        sys.exit(1)

    random.seed(123)
    seeds = fuzz.load_test_inputs()
    print(f"Loaded {len(seeds)} seeds, fuzzing...")

    # Collect differences by category
    by_category = {}
    seen_inputs = set()
    n = 50000

    for i in range(n):
        seed = random.choice(seeds)
        mutated = seed
        for _ in range(random.randint(1, 3)):
            mutated = fuzz.mutate(mutated)
        if len(mutated) > 300 or mutated in seen_inputs:
            continue
        seen_inputs.add(mutated)

        oracle = fuzz.run_oracle(mutated)
        if oracle is None or oracle == "<error>":
            continue

        rable = fuzz.run_rable(mutated)
        if rable == "<error>":
            continue

        if fuzz.normalize(rable) == fuzz.normalize(oracle):
            continue

        cat = categorize(mutated, oracle, rable)
        if cat not in by_category:
            by_category[cat] = []
        # Keep max 20 per category
        if len(by_category[cat]) < 20:
            by_category[cat].append((mutated, oracle, rable))

        if (i + 1) % 10000 == 0:
            total = sum(len(v) for v in by_category.values())
            print(f"  [{i+1}/{n}] {total} diffs across {len(by_category)} categories")

    # Write test files
    os.makedirs(ORACLE_DIR, exist_ok=True)
    total_tests = 0
    for cat, cases in sorted(by_category.items()):
        fname = os.path.join(ORACLE_DIR, f"{cat}.tests")
        with open(fname, "w") as f:
            f.write(f"# Oracle-derived tests: {cat}\n")
            f.write(f"# Generated by fuzzing against bash-oracle\n")
            f.write(f"# {len(cases)} test cases\n\n")
            for j, (inp, oracle, _rable) in enumerate(cases):
                name = f"{cat} {j+1}"
                f.write(f"=== {name}\n")
                f.write(f"{inp}\n")
                f.write(f"---\n")
                f.write(f"{oracle}\n")
                f.write(f"---\n\n")
                total_tests += 1
        print(f"  {cat}: {len(cases)} tests -> {fname}")

    print(f"\nGenerated {total_tests} tests across {len(by_category)} categories")


if __name__ == "__main__":
    main()