import re
import pyclip
import sys
import os
import dataclasses
from collections import defaultdict
@dataclasses.dataclass
class Entry:
variant_name: str
comment: str
@dataclasses.dataclass
class ConstEntry:
comment: str
value: str
def snake_to_pascal_case(snake_case_str: str) -> str:
return snake_case_str.replace("_", " ").title().replace(" ", "")
def make_variant_name(c_define_name: str, bitflags: bool) -> str:
if bitflags:
return c_define_name
else:
return snake_to_pascal_case(c_define_name)
def extract_constants(prefix: str, bitflags: bool):
lines = []
home = os.environ['HOME']
for rel_path in os.listdir(f'{home}/Documents/binutils-gdb/include/elf'):
fullpath = f'{home}/Documents/binutils-gdb/include/elf/' + rel_path
with open(fullpath,'r') as f:
lines += f.readlines()
for rel_path in os.listdir(f'{home}/Documents/binutils-gdb/elfcpp'):
fullpath = f'{home}/Documents/binutils-gdb/elfcpp/' + rel_path
with open(fullpath,'r') as f:
lines += f.readlines()
with open('/usr/include/elf.h', 'r') as f:
lines += f.readlines()
entries_of_value_str = defaultdict(list)
all_names = set()
all_raw_names = set()
unfixed_lines = lines
lines = []
i = 0
while i < len(unfixed_lines):
cur = unfixed_lines[i]
i += 1
while i < len(unfixed_lines):
m = re.match(r'^(.*)\\s*$', cur)
if m == None:
break
cur = m.groups()[0]
cur += unfixed_lines[i]
i += 1
while i < len(unfixed_lines):
m = re.match('^\s*//(.*)', cur)
if m == None:
break
comment = m.groups()[0]
cur = unfixed_lines[i] + f' /* {comment} */'
i += 1
lines.append(cur)
for line in lines:
pattern = f'^#define\s+{prefix}_([A-Za-z0-9_]+)\s+([^/]+)(?:\s*/\*(.*)\*)?'
prefix_match = re.match(pattern, line)
if prefix_match == None:
pattern = f'^#define\s+OLD_{prefix}_([A-Za-z0-9_]+)\s+([^/]+)(?:\s*/\*(.*)\*)?'
prefix_match = re.match(pattern, line)
if prefix_match == None:
pattern = f'^\s*{prefix}_([A-Za-z0-9_]+)\s*=\s*(.*)$'
prefix_match = re.match(pattern, line)
if prefix_match == None:
continue
variant_name, value_str = prefix_match.groups()
comment = None
else:
variant_name, value_str, comment = prefix_match.groups()
variant_name = 'OLD_' + variant_name
else:
variant_name, value_str, comment = prefix_match.groups()
value_str = value_str.strip()
if variant_name.lower() == 'num':
continue
if comment != None:
comment = comment.strip()
if variant_name in all_raw_names:
continue
all_raw_names.add(variant_name)
if variant_name[0].isdigit():
if variant_name.lower().startswith('68k'):
variant_name = f'm{variant_name}'
elif variant_name.startswith('386'):
variant_name = f'i{variant_name}'
elif variant_name.startswith('390'):
variant_name = f's{variant_name}'
elif comment != None:
comment_first_word = comment.split(' ')[0].lower()
if comment_first_word in ['intel', 'motorola', 'renesas', 'freescale', 'wdc', 'nxp']:
variant_name = f'{comment_first_word}_{variant_name}'
else:
raise Exception(f'variant name {repr(variant_name)} starts with a digit, comment is {comment}')
else:
raise Exception(f'variant name {repr(variant_name)} starts with a digit and has no comment')
variant_name = make_variant_name(variant_name, bitflags)
already_have = False
for entry in entries_of_value_str[value_str]:
if entry.variant_name == variant_name:
already_have = True
break
if not already_have and variant_name not in all_names:
all_names.add(variant_name)
entries_of_value_str[value_str].append(Entry(variant_name, comment))
entries_of_value = defaultdict(list)
for initial_value_str, entries in entries_of_value_str.items():
value_str = initial_value_str
m = re.fullmatch('(.*)//.*', value_str)
if m != None:
value_str = m.groups()[0]
value_str = value_str.strip()
if value_str.endswith(','):
value_str = value_str[:-1]
while True:
replaced_anything = False
replacements = re.findall(f'OLD_{prefix}_([A-Z0-9a-z_]+)', value_str)
for base_replacement in replacements:
replacement = 'OLD_' + base_replacement
replacement_variant_name = make_variant_name(replacement, bitflags)
replacement_value = None
for sub_value, sub_entries in entries_of_value_str.items():
for sub_entry in sub_entries:
if sub_entry.variant_name == replacement_variant_name:
replacement_value = sub_value
if replacement_value == None:
raise Exception(f'failed to replace {replacement} in variant {variant_name}')
value_str = value_str.replace(f'OLD_{prefix}_{base_replacement}', str(replacement_value))
replaced_anything = True
replacements = re.findall(f'{prefix}_([A-Z0-9a-z_]+)', value_str)
for replacement in replacements:
replacement_variant_name = make_variant_name(replacement, bitflags)
replacement_value = None
for sub_value, sub_entries in entries_of_value_str.items():
for sub_entry in sub_entries:
if sub_entry.variant_name == replacement_variant_name:
replacement_value = sub_value
if replacement_value == None:
raise Exception(f'failed to replace {replacement} in variant {variant_name}')
value_str = value_str.replace(f'{prefix}_{replacement}', str(replacement_value))
replaced_anything = True
invalid_literals = re.findall(f'[0-9]+U', value_str)
for invalid_literal in invalid_literals:
value_str = value_str.replace(invalid_literal, invalid_literal[:-1])
replaced_anything = True
invalid_literals = re.findall(f'[0-9]+L', value_str)
for invalid_literal in invalid_literals:
value_str = value_str.replace(invalid_literal, invalid_literal[:-1])
replaced_anything = True
invalid_literals = re.findall(f'[0-9]+LL', value_str)
for invalid_literal in invalid_literals:
value_str = value_str.replace(invalid_literal, invalid_literal[:-2])
replaced_anything = True
if not replaced_anything:
break
value = eval(value_str)
for entry in entries:
entries_of_value[value].append(Entry(entry.variant_name, entry.comment))
res = ''
enum_consts = {}
enum_variant_name_of_value = {}
for value, entries in entries_of_value.items():
if bitflags:
for entry in entries:
name = entry.variant_name
comment = entry.comment
if comment != None:
res += f'/// {comment}\n'
res += f'const {name} = {hex(value)};\n'
else:
sep = 'Or'
full_name = sep.join(entry.variant_name for entry in entries)
if len(entries) > 1:
full_name = '_' + full_name
if any(entry.comment is not None for entry in entries):
comment_contents = [entry.comment if entry.comment != None else entry.variant_name for entry in entries]
comment_contents = [content.strip().removesuffix('.') for content in comment_contents]
comment = ' Or '.join(comment_contents)
res += f'/// {comment}\n'
for entry in entries:
const_name = entry.variant_name
if const_name in enum_consts:
if enum_consts[const_name].value != value:
raise Exception(f'multiple values for const name {const_name}: {[value, enum_consts[const_name].value]}')
enum_consts[const_name] = ConstEntry(entry.comment, value)
else:
comment = entries[0].comment
if comment != None:
res += f'/// {comment}\n'
res += f'{full_name} = {hex(value)},\n'
enum_variant_name_of_value[value] = full_name
if len(enum_consts) > 0:
res += '}\n'
res += 'impl REPLACE {\n'
for const_name, entry in enum_consts.items():
comment = entry.comment
if comment != None:
res += f'/// {comment}\n'
variant_name = enum_variant_name_of_value[entry.value]
res += f'#[allow(non_upper_case_globals)]\n'
res += f'pub const {const_name} : Self = Self::{variant_name};\n'
res += '}\n'
res = res.replace('Aarch64', 'AArch64')
pyclip.copy(res)
def main():
if len(sys.argv) not in [2,3]:
print('usage: extract.py <prefix>')
bitflags = len(sys.argv) > 2 and sys.argv[2] == 'bitflags'
extract_constants(sys.argv[1], bitflags)
if __name__ == '__main__':
main()