require 'ffi/clang'
require 'json'
module FFI::Clang::Lib
enum :storage_class, [
:invalid, 0,
:none, 1,
:extern, 2,
:static, 3,
:private_extern, 4,
:opencl_workgroup_local, 5,
:auto, 6,
:register, 7,
]
attach_function :get_storage_class, :clang_Cursor_getStorageClass, [FFI::Clang::Lib::CXCursor.by_value], :storage_class
end
module FFI::Clang
class Cursor
def storage_class
Lib.get_storage_class(@cursor)
end
def has_external_storage
storage_class == :extern || storage_class == :private_extern
end
end
end
class Runner
attr_reader :unresolved
attr_reader :code_for_resolve
def initialize
@file_analysis = {}
@global_method_to_base_filename = {}
@file_to_method_and_pos = {}
@external_variables = []
@resolved_static_by_base_filename = {}
@resolved_global = []
@symbols_to_output = {}
@include_files_to_output = []
@unresolved = []
@blocklist = []
@mock = {}
@basepath = File.absolute_path(ARGV[0]) + '/'
@out_path = File.absolute_path(ARGV[1]) + '/'
end
def blocklist(symbol)
@blocklist << symbol
end
def mock(symbol, code)
@mock[symbol] = code
end
def run
files = Dir.glob(@basepath + 'src/backend/**/*.c') +
Dir.glob(@basepath + 'src/common/**/*.c') +
Dir.glob(@basepath + 'src/port/**/*.c') +
Dir.glob(@basepath + 'src/timezone/**/*.c') +
Dir.glob(@basepath + 'src/pl/plpgsql/src/*.c') +
Dir.glob(@basepath + 'contrib/pgcrypto/*.c') -
[ @basepath + 'src/backend/libpq/be-secure-openssl.c', @basepath + 'src/backend/utils/adt/levenshtein.c', @basepath + 'src/backend/utils/adt/like_match.c', @basepath + 'src/backend/utils/adt/jsonpath_scan.c', @basepath + 'src/backend/utils/misc/guc-file.c', @basepath + 'src/backend/utils/sort/qsort_tuple.c', @basepath + 'src/backend/bootstrap/bootscanner.c', @basepath + 'src/backend/regex/regc_color.c', @basepath + 'src/backend/regex/regc_cvec.c', @basepath + 'src/backend/regex/regc_lex.c', @basepath + 'src/backend/regex/regc_pg_locale.c', @basepath + 'src/backend/regex/regc_locale.c', @basepath + 'src/backend/regex/regc_nfa.c', @basepath + 'src/backend/regex/rege_dfa.c', @basepath + 'src/backend/replication/repl_scanner.c', @basepath + 'src/backend/replication/libpqwalreceiver/libpqwalreceiver.c', @basepath + 'src/backend/replication/syncrep_scanner.c', @basepath + 'src/backend/port/posix_sema.c', @basepath + 'src/common/fe_memutils.c', @basepath + 'src/common/restricted_token.c', @basepath + 'src/common/unicode/norm_test.c', @basepath + 'src/backend/utils/mb/win866.c', @basepath + 'src/backend/utils/mb/win1251.c', @basepath + 'src/backend/utils/mb/iso.c', @basepath + 'src/port/dirent.c', @basepath + 'src/port/win32error.c', @basepath + 'src/port/win32env.c', @basepath + 'src/port/win32security.c', @basepath + 'src/port/gettimeofday.c', @basepath + 'src/port/strlcpy.c', @basepath + 'src/port/strlcat.c', @basepath + 'src/port/unsetenv.c', @basepath + 'src/port/getaddrinfo.c', @basepath + 'src/port/getrusage.c', @basepath + 'src/port/pg_crc32c_armv8.c', @basepath + 'src/port/pg_crc32c_armv8_choose.c', @basepath + 'src/backend/jit/llvm/llvmjit_expr.c', @basepath + 'src/backend/jit/llvm/llvmjit_deform.c', @basepath + 'src/backend/jit/llvm/llvmjit.c', @basepath + 'src/backend/libpq/be-secure-gssapi.c', @basepath + 'src/common/protocol_openssl.c', ] -
Dir.glob(@basepath + 'src/backend/port/dynloader/*.c') -
Dir.glob(@basepath + 'src/backend/port/win32/*.c') -
Dir.glob(@basepath + 'src/backend/port/win32_*.c') -
Dir.glob(@basepath + 'src/backend/snowball/**/*.c')
files.each do |file|
if files == [file]
puts format('Analysing single file: %s', file)
analysis = analyze_file(file)
analysis_file = analysis.save
puts format('Result: %s', analysis_file)
exit 1
end
print '.'
analysis = FileAnalysis.restore(file, @basepath) || analyze_file(file)
analysis.save
@file_analysis[file] = analysis
analysis.symbol_to_file.each do |symbol, _|
next if analysis.static_symbols.include?(symbol)
if @global_method_to_base_filename[symbol] && !['main', 'Pg_magic_func', 'pg_open_tzfile', '_PG_init'].include?(symbol) && !@global_method_to_base_filename[symbol].end_with?('c')
puts format('Error processing %s, symbol %s already defined by %s', file, symbol, @global_method_to_base_filename[symbol])
end
@global_method_to_base_filename[symbol] = file
end
analysis.file_to_symbol_positions.each do |file, method_and_pos|
@file_to_method_and_pos[file] = method_and_pos
end
analysis.external_variables.each do |symbol|
@external_variables << symbol
end
end
puts "\nFinished parsing"
end
class FileAnalysis
attr_accessor :references, :static_symbols, :symbol_to_file, :file_to_symbol_positions, :external_variables, :included_files
def initialize(filename, basepath, references = {}, static_symbols = [],
symbol_to_file = {}, file_to_symbol_positions = {}, external_variables = [],
included_files = [])
@filename = filename
@basepath = basepath
@references = references
@static_symbols = static_symbols
@symbol_to_file = symbol_to_file
@file_to_symbol_positions = file_to_symbol_positions
@external_variables = external_variables
@included_files = included_files
end
def save
json = JSON.pretty_generate({
references: @references,
static_symbols: @static_symbols,
symbol_to_file: @symbol_to_file,
file_to_symbol_positions: @file_to_symbol_positions,
external_variables: @external_variables,
included_files: @included_files,
})
file = self.class.analysis_filename(@filename, @basepath)
FileUtils.mkdir_p(File.dirname(file))
File.write(file, json)
file
end
def self.restore(filename, basepath)
json = File.read(analysis_filename(filename, basepath))
hsh = JSON.parse(json)
new(filename, basepath, hsh['references'], hsh['static_symbols'],
hsh['symbol_to_file'], hsh['file_to_symbol_positions'], hsh['external_variables'],
hsh['included_files'])
rescue Errno::ENOENT
nil
end
private
def self.analysis_filename(filename, basepath)
File.absolute_path('./tmp/analysis') + '/' + filename.gsub(%r{^#{basepath}}, '').gsub(/.c$/, '.json')
end
end
def analyze_file(file)
index = FFI::Clang::Index.new(true, true)
translation_unit = index.parse_translation_unit(file, ['-I', @basepath + 'src/include', '-I', '/usr/local/opt/openssl/include', '-I', `xcrun --sdk macosx --show-sdk-path`.strip + '/usr/include', '-DDLSUFFIX=".bundle"', '-msse4.2', '-g', '-DUSE_ASSERT_CHECKING'])
cursor = translation_unit.cursor
func_cursor = nil
analysis = FileAnalysis.new(file, @basepath)
included_files = []
translation_unit.inclusions do |included_file, _inclusions|
next if !included_file.start_with?(@basepath) || included_file == file
included_files << included_file
end
analysis.included_files = included_files.uniq.sort
cursor.visit_children do |cursor, parent|
if cursor.location.file && (File.dirname(file) == File.dirname(cursor.location.file) || cursor.location.file.end_with?('_impl.h'))
if parent.kind == :cursor_translation_unit
if (cursor.kind == :cursor_function && cursor.definition?) || (cursor.kind == :cursor_variable && !cursor.has_external_storage)
analysis.symbol_to_file[cursor.spelling] = cursor.location.file
if cursor.linkage == :external
elsif cursor.linkage == :internal
(analysis.static_symbols << cursor.spelling).uniq!
else
fail format('Unknown linkage: %s', cursor.linkage.inspect)
end
start_offset = cursor.extent.start.offset
end_offset = cursor.extent.end.offset
end_offset += 1 if cursor.kind == :cursor_variable
if cursor.kind == :cursor_variable && (cursor.linkage == :external || cursor.linkage == :internal) &&
!cursor.type.const_qualified? && !cursor.type.array_element_type.const_qualified? &&
cursor.type.pointee.kind != :type_function_proto
analysis.external_variables << cursor.spelling
end
analysis.file_to_symbol_positions[cursor.location.file] ||= {}
analysis.file_to_symbol_positions[cursor.location.file][cursor.spelling] = [start_offset, end_offset]
cursor.visit_children do |child_cursor, parent|
next :recurse if child_cursor.definition.semantic_parent == cursor
if child_cursor.kind == :cursor_decl_ref_expr || child_cursor.kind == :cursor_call_expr
analysis.references[cursor.spelling] ||= []
(analysis.references[cursor.spelling] << child_cursor.spelling).uniq!
end
:recurse
end
end
end
end
next :recurse
end
analysis
end
RESOLVE_MAX_DEPTH = 100
def deep_resolve(method_name, depth: 0, trail: [], global_resolved_by_parent: [], static_resolved_by_parent: [], static_base_filename: nil)
if @blocklist.include?(method_name)
puts 'ERROR: Hit blocklist entry ' + method_name
puts 'Trail: ' + trail.inspect
exit 1
end
if depth > RESOLVE_MAX_DEPTH
puts 'ERROR: Exceeded max depth'
puts method_name.inspect
puts trail.inspect
exit 1
end
base_filename = static_base_filename || @global_method_to_base_filename[method_name]
if !base_filename
(@unresolved << method_name).uniq!
return
end
analysis = @file_analysis[base_filename]
fail "could not find analysis data for #{base_filename}" if analysis.nil?
implementation_filename = analysis.symbol_to_file[method_name]
if !implementation_filename
(@unresolved << method_name).uniq!
return
end
@symbols_to_output[implementation_filename] ||= []
@symbols_to_output[implementation_filename] << method_name
(@include_files_to_output += analysis.included_files).uniq!
if @mock.key?(method_name)
return
end
dependents = (analysis.references[method_name] || [])
global_dependents = dependents.select { |c| !analysis.static_symbols.include?(c) } - global_resolved_by_parent
static_dependents = dependents.select { |c| analysis.static_symbols.include?(c) } - static_resolved_by_parent
@resolved_static_by_base_filename[base_filename] ||= []
global_dependents.delete_if { |s| @resolved_global.include?(s) }
static_dependents.delete_if { |s| @resolved_static_by_base_filename[base_filename].include?(s) }
global_dependents.each { |s| @resolved_global << s }
static_dependents.each { |s| @resolved_static_by_base_filename[base_filename] << s }
global_dependents.each do |symbol|
deep_resolve(
symbol, depth: depth + 1, trail: trail + [method_name],
global_resolved_by_parent: global_resolved_by_parent + global_dependents
)
end
static_dependents.each do |symbol|
deep_resolve(
symbol, depth: depth + 1, trail: trail + [method_name],
global_resolved_by_parent: global_resolved_by_parent + global_dependents,
static_resolved_by_parent: static_resolved_by_parent + static_dependents,
static_base_filename: base_filename
)
end
end
def special_include_file?(filename)
filename[/\/(reg(c|e)_[\w_]+|guc-file|qsort_tuple|repl_scanner|levenshtein|bootscanner|like_match)\.c$/] || filename[/\/[\w_]+_impl.h$/]
end
def write_out
all_thread_local_variables = []
@symbols_to_output.each do |filename, symbols|
file_thread_local_variables = []
dead_positions = (@file_to_method_and_pos[filename] || {}).dup
symbols.each do |symbol|
next if @mock.key?(symbol)
next if @external_variables.include?(symbol)
alive_pos = dead_positions[symbol]
dead_positions.delete_if { |_,pos| pos == alive_pos }
end
full_code = File.read(filename)
str = "/*--------------------------------------------------------------------\n"
str += " * Symbols referenced in this file:\n"
symbols.each do |symbol|
str += format(" * - %s\n", symbol)
end
str += " *--------------------------------------------------------------------\n"
str += " */\n\n"
next_start_pos = 0
dead_positions.each do |symbol, pos|
fail format("Position overrun for %s in %s, next_start_pos (%d) > file length (%d)", symbol, filename, next_start_pos, full_code.size) if next_start_pos > full_code.size
fail format("Position overrun for %s in %s, dead position pos[0]-1 (%d) > file length (%d)", symbol, filename, pos[0]-1, full_code.size) if pos[0]-1 > full_code.size
str += full_code[next_start_pos...(pos[0]-1)]
skipped_code = full_code[(pos[0]-1)...pos[1]]
if @mock.key?(symbol)
str += "\n" + @mock[symbol] + "\n"
elsif @external_variables.include?(symbol) && symbols.include?(symbol)
file_thread_local_variables << symbol
if skipped_code.include?('static')
str += "\n" + skipped_code.strip.gsub('static', 'static __thread') + "\n"
else
str += "\n__thread " + skipped_code.strip + "\n"
end
else
str += "\n" + skipped_code.scan(/^(#\s*(?:include|define|undef|if|ifdef|ifndef|else|endif))((?:[^\n]*\\\s*\n)*)([^\n]*)$/m).map { |m| m.compact.join }.join("\n")
end
next_start_pos = pos[1]
end
str += full_code[next_start_pos..-1]
file_thread_local_variables.each do |variable|
str.gsub!(/(PGDLLIMPORT|extern)\s+(const|volatile)?\s*(\w+)\s+(\*{0,2})#{variable}(\[\])?;/, "\\1 __thread \\2 \\3 \\4#{variable}\\5;")
end
all_thread_local_variables += file_thread_local_variables
if special_include_file?(filename)
out_name = File.basename(filename)
else
out_name = filename.gsub(%r{^#{@basepath}}, '').gsub('/', '_')
end
File.write(@out_path + out_name, str)
end
@include_files_to_output.each do |include_file|
next if special_include_file?(include_file)
if include_file.start_with?(@basepath + 'src/include')
out_file = @out_path + include_file.gsub(%r{^#{@basepath}src/}, '')
else
out_file = @out_path + 'include/' + File.basename(include_file)
end
code = File.read(include_file)
all_thread_local_variables.each do |variable|
code.gsub!(/(PGDLLIMPORT|extern)\s+(const|volatile)?\s*(\w+)\s+(\*{0,2})#{variable}(\[\])?;/, "\\1 __thread \\2 \\3 \\4#{variable}\\5;")
end
FileUtils.mkdir_p File.dirname(out_file)
File.write(out_file, code)
end
end
end
runner = Runner.new
runner.run
runner.blocklist('SearchSysCache')
runner.blocklist('heap_open')
runner.blocklist('relation_open')
runner.blocklist('RelnameGetRelid')
runner.blocklist('ProcessClientWriteInterrupt')
runner.blocklist('typeStringToTypeName')
runner.blocklist('LWLockAcquire')
runner.blocklist('SPI_freeplan')
runner.blocklist('get_ps_display')
runner.blocklist('pq_beginmessage')
runner.mock('ProcessInterrupts', 'void ProcessInterrupts(void) {}') runner.mock('PqCommMethods', 'const PQcommMethods *PqCommMethods = NULL;') runner.mock('proc_exit', 'void proc_exit(int code) { printf("Terminating process due to FATAL error\n"); exit(1); }') runner.mock('send_message_to_server_log', 'static void send_message_to_server_log(ErrorData *edata) {}')
runner.mock('send_message_to_frontend', 'static void send_message_to_frontend(ErrorData *edata) {}')
runner.mock('format_type_be', 'char * format_type_be(Oid type_oid) { return pstrdup("-"); }')
runner.mock('build_row_from_class', 'static PLpgSQL_row *build_row_from_class(Oid classOid) { return NULL; }')
runner.mock('plpgsql_build_datatype', 'PLpgSQL_type * plpgsql_build_datatype(Oid typeOid, int32 typmod, Oid collation, TypeName *origtypname) { PLpgSQL_type *typ; typ = (PLpgSQL_type *) palloc0(sizeof(PLpgSQL_type)); typ->typname = pstrdup("UNKNOWN"); typ->ttype = PLPGSQL_TTYPE_SCALAR; return typ; }')
runner.mock('parse_datatype', 'static PLpgSQL_type * parse_datatype(const char *string, int location) { PLpgSQL_type *typ; typ = (PLpgSQL_type *) palloc0(sizeof(PLpgSQL_type)); typ->typname = pstrdup(string); typ->ttype = strcmp(string, "RECORD") == 0 ? PLPGSQL_TTYPE_REC : PLPGSQL_TTYPE_SCALAR; return typ; }')
runner.mock('get_collation_oid', 'Oid get_collation_oid(List *name, bool missing_ok) { return -1; }')
runner.mock('plpgsql_parse_wordtype', 'PLpgSQL_type * plpgsql_parse_wordtype(char *ident) { return NULL; }')
runner.mock('plpgsql_parse_wordrowtype', 'PLpgSQL_type * plpgsql_parse_wordrowtype(char *ident) { return NULL; }')
runner.mock('plpgsql_parse_cwordtype', 'PLpgSQL_type * plpgsql_parse_cwordtype(List *idents) { return NULL; }')
runner.mock('plpgsql_parse_cwordrowtype', 'PLpgSQL_type * plpgsql_parse_cwordrowtype(List *idents) { return NULL; }')
runner.mock('function_parse_error_transpose', 'bool function_parse_error_transpose(const char *prosrc) { return false; }')
runner.mock('free_expr', "static void free_expr(PLpgSQL_expr *expr) {}") runner.mock('make_return_stmt', %(
static PLpgSQL_stmt *
make_return_stmt(int location)
{
PLpgSQL_stmt_return *new;
Assert(plpgsql_curr_compile->fn_rettype == VOIDOID);
new = palloc0(sizeof(PLpgSQL_stmt_return));
new->cmd_type = PLPGSQL_STMT_RETURN;
new->lineno = plpgsql_location_to_lineno(location);
new->expr = NULL;
new->retvarno = -1;
int tok = yylex();
if (tok != ';')
{
plpgsql_push_back_token(tok);
new->expr = read_sql_expression(';', ";");
}
return (PLpgSQL_stmt *) new;
}
))
runner.deep_resolve('raw_parser')
runner.deep_resolve('plpgsql_compile_inline')
runner.deep_resolve('plpgsql_free_function_memory')
runner.deep_resolve('SetDatabaseEncoding')
runner.deep_resolve('MemoryContextInit')
runner.deep_resolve('AllocSetContextCreate')
runner.deep_resolve('MemoryContextSwitchTo')
runner.deep_resolve('CurrentMemoryContext')
runner.deep_resolve('MemoryContextDelete')
runner.deep_resolve('AllocSetDeleteFreeList')
runner.deep_resolve('palloc0')
runner.deep_resolve('CopyErrorData')
runner.deep_resolve('FlushErrorState')
runner.deep_resolve('bms_first_member')
runner.deep_resolve('bms_free')
runner.deep_resolve('bms_next_member')
runner.deep_resolve('bms_num_members')
runner.deep_resolve('makeBitString')
runner.deep_resolve('pg_toupper')
runner.deep_resolve('pg_qsort')
runner.deep_resolve('raw_expression_tree_walker')
runner.deep_resolve('hash_bytes')
runner.deep_resolve('MemoryContextAllocExtended')
runner.deep_resolve('pg_printf')
runner.write_out