import os
import random
import sys
sys.path.insert(0, os.path.dirname(__file__))
import fuzz
ORACLE_DIR = os.path.join(os.path.dirname(__file__), "oracle")
def categorize(inp, oracle_out, rable_out):
o, r = oracle_out, rable_out
if "\n" in o and "(semi" in r and "\n" not in r:
return "top_level_separation"
if "\n" in o and "(background" in r and "\n" not in r:
return "top_level_separation"
if "=(" in inp:
return "array_normalization"
if "$'" in inp and ("$'" in r) != ("$'" in o):
return "ansi_c_processing"
if "\\x" in inp or "\\U" in inp or "\\c" in inp or "\\\\" in inp:
if o != r:
return "ansi_c_escapes"
if "cond-and" in o and "cond-and" in r and o != r:
return "conditional_assoc"
if "[" in inp and "word" in o and "word" in r:
o_words = o.count("(word")
r_words = r.count("(word")
if o_words != r_words:
return "word_boundaries"
if "$(" in inp:
return "cmdsub_formatting"
if "<(" in inp or ">(" in inp:
return "procsub_formatting"
if "<<" in inp:
return "heredoc_formatting"
if ">" in o and ">" in r:
return "redirect_formatting"
if '$"' in inp:
return "locale_strings"
return "other"
def main():
if not fuzz.HAS_ORACLE:
print("bash-oracle not found. Set BASH_ORACLE env var.")
sys.exit(1)
random.seed(123)
seeds = fuzz.load_test_inputs()
print(f"Loaded {len(seeds)} seeds, fuzzing...")
by_category = {}
seen_inputs = set()
n = 50000
for i in range(n):
seed = random.choice(seeds)
mutated = seed
for _ in range(random.randint(1, 3)):
mutated = fuzz.mutate(mutated)
if len(mutated) > 300 or mutated in seen_inputs:
continue
seen_inputs.add(mutated)
oracle = fuzz.run_oracle(mutated)
if oracle is None or oracle == "<error>":
continue
rable = fuzz.run_rable(mutated)
if rable == "<error>":
continue
if fuzz.normalize(rable) == fuzz.normalize(oracle):
continue
cat = categorize(mutated, oracle, rable)
if cat not in by_category:
by_category[cat] = []
if len(by_category[cat]) < 20:
by_category[cat].append((mutated, oracle, rable))
if (i + 1) % 10000 == 0:
total = sum(len(v) for v in by_category.values())
print(f" [{i+1}/{n}] {total} diffs across {len(by_category)} categories")
os.makedirs(ORACLE_DIR, exist_ok=True)
total_tests = 0
for cat, cases in sorted(by_category.items()):
fname = os.path.join(ORACLE_DIR, f"{cat}.tests")
with open(fname, "w") as f:
f.write(f"# Oracle-derived tests: {cat}\n")
f.write(f"# Generated by fuzzing against bash-oracle\n")
f.write(f"# {len(cases)} test cases\n\n")
for j, (inp, oracle, _rable) in enumerate(cases):
name = f"{cat} {j+1}"
f.write(f"=== {name}\n")
f.write(f"{inp}\n")
f.write(f"---\n")
f.write(f"{oracle}\n")
f.write(f"---\n\n")
total_tests += 1
print(f" {cat}: {len(cases)} tests -> {fname}")
print(f"\nGenerated {total_tests} tests across {len(by_category)} categories")
if __name__ == "__main__":
main()