from io import StringIO
import argparse
import ci.git as git
import glob
import os
import pandas as pd
import subprocess
def main():
args: argparse.Namespace = __read_args()
__build_report(args)
def __read_args() -> argparse.Namespace:
description: str = 'CI Utility for reporting performance statistics of Demikernel.'
parser = argparse.ArgumentParser(prog='plot.py', description=description)
parser.add_argument('--branch', required=True, help='Current branch name.')
parser.add_argument('--libos', required=False, help='LibOS Name.')
parser.add_argument('--log-dir', required=False, help='The directory where the logs are stored.')
return parser.parse_args()
def __build_report(args):
commit_id = git.get_head_commit(args.branch)
print('libos = ' + args.libos)
print('commit id = ' + commit_id)
perf_df = __get_perf_data(args.log_dir)
__print_perf_data(perf_df)
__create_flame_graph(args.libos, commit_id, perf_df)
perf_df.to_csv('perf_data.csv', index=False, header=True)
def __get_perf_data(log_dir):
files = glob.glob(os.path.join(log_dir, '**', 'system-test*stdout*'), recursive=True)
collapsed_stacks = []
for file in files:
with open(file, 'r') as f:
__populate_collapsed_stacks(collapsed_stacks, f)
perf_df = pd.read_csv(
StringIO('\n'.join(collapsed_stacks)),
names=['collapsed_stack', 'num_calls', 'percent_time', 'cycles_per_call', 'nanoseconds_per_call'])
perf_df = perf_df.groupby(by='collapsed_stack').mean().round(2).reset_index()
return perf_df
def __populate_collapsed_stacks(collapsed_stacks, file):
file_df = __get_file_df(file)
thread_ids = file_df['thread_id'].unique()
for thread_id in thread_ids:
current_stack = []
for _index, row in file_df[file_df['thread_id'] == thread_id].iterrows():
depth = row['call_depth']
current_stack = [] if depth == 1 else current_stack[:depth-1]
current_stack.append(row['function_name'])
collapsed_stack = ";".join(current_stack)
collapsed_stacks.append(f"{collapsed_stack},{row['num_calls']},{row['percent_time']},{row['cycles_per_call']},{row['nanoseconds_per_call']}")
def __get_file_df(file):
lines = __extract_perf_lines(file)
file_df = pd.read_csv(
StringIO('\n'.join(lines)),
delimiter=',',
names=['call_depth', 'thread_id', 'function_name', 'num_calls', 'percent_time', 'cycles_per_call', 'nanoseconds_per_call'])
file_df['call_depth'] = file_df['call_depth'].apply(lambda x: x.count('+')).astype(int)
return file_df
def __extract_perf_lines(file):
lines = []
for line in file:
if not line.startswith('+'):
continue
lines.append(line)
return lines
def __print_perf_data(perf_df):
sort_by_columns = ['num_calls', 'cycles_per_call', 'nanoseconds_per_call', 'percent_time']
columns_to_display = ['collapsed_stack', 'num_calls', 'cycles_per_call', 'nanoseconds_per_call', 'percent_time']
out_df = perf_df.sort_values(by=sort_by_columns, ascending=False)[columns_to_display]
print(out_df.to_markdown(floatfmt='.2f', index=False))
def __create_flame_graph(libos, commit_id, perf_df) -> None:
perf_df[['collapsed_stack', 'percent_time']].to_csv(
'flamegraph_input.txt', index=False, sep=' ', header=False)
subprocess.run(['/tmp/FlameGraph/flamegraph.pl', 'flamegraph_input.txt',
'--countname', 'percent_time',
'--title', "libos = " + libos,
'--subtitle', "commit id = " + commit_id],
check=True,
stdout=open('flamegraph.svg', 'w'))
if __name__ == '__main__':
main()