import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sys
import math
def smooth(series):
return series.rolling(window=5, center=True).mean()
def plot(dfs, cols, line_styles, id_var, title):
fig = plt.figure(figsize=(16, 10))
ax = fig.add_subplot(111)
sns.set_theme(style="whitegrid")
file_colors = sns.color_palette("husl", len(dfs))
for df in dfs:
for col in cols:
df[col] = smooth(df[col])
for col in cols:
all_values = pd.concat([df[col] for df in dfs])
col_min = all_values.min()
col_max = all_values.max()
for i, df in enumerate(dfs):
if col_max > col_min:
normalized = (df[col] - col_min) / (col_max - col_min)
else:
normalized = df[col] * 0
plt.plot(
df[id_var],
normalized,
label=f"{col} (File {i})",
linestyle=line_styles[col],
color=file_colors[i],
linewidth=2
)
ax.set_ylabel("Normalized Value [0,1]")
full_title = f"{title}\n" + "\n".join(f"File {i}: {df.attrs['filename']}" for i, df in enumerate(dfs))
ax.set_title(full_title, pad=20)
ax.legend(loc='center right', bbox_to_anchor=(0.98, 0.5))
plt.tight_layout()
plt.show()
if len(sys.argv) < 2:
print(f'usage: {sys.argv[0]} FILE1.json [FILE2.json ...]')
sys.exit(2)
dfs = []
for filename in sys.argv[1:]:
with open(filename, "r") as f:
data = json.load(f)
df = pd.DataFrame(data["data"])
df.attrs['filename'] = filename
dfs.append(df)
line_styles = {
'flush_time': 'solid',
'cache_size': 'dashed',
'cache_bytes': 'dotted'
}
cols = ['flush_time', 'cache_size', 'cache_bytes']
plot(dfs, cols, line_styles, id_var='map_size', title='Raw vs map size')
for df in dfs:
df['lg_map_size'] = df['map_size'].apply(math.log2)
plot(dfs, cols, line_styles, id_var='lg_map_size', title='Raw vs log2 map size')
line_styles = {
'flush_time_per_cache_size': 'solid',
'flush_time_per_cache_byte': 'dashed',
'cache_byte_per_cache_size': 'dotted',
}
for df in dfs:
df['flush_time_per_cache_size'] = df['flush_time']/df['cache_size']
df['flush_time_per_cache_byte'] = df['flush_time']/df['cache_bytes']
df['cache_byte_per_cache_size'] = df['cache_bytes']/df['cache_size']
cols = ['flush_time_per_cache_size', 'flush_time_per_cache_byte', 'cache_byte_per_cache_size']
plot(dfs, cols, line_styles, id_var='map_size', title='Relative vs map size')