import json
import os
from random import randint
import shutil
import time
from typing import Tuple
def sum_digits(f: str) -> int:
return sum([eval("0x" + c) for c in f])
def generate_file_and_content() -> Tuple[str, str]:
r = randint(0, 1 << 36)
f = f"{r:09x}"
n = sum_digits(f)
return f, str(n)
steps = []
def new_step(title: str):
if steps != []:
steps[-1]["ended_at"] = time.time()
steps[-1]["elapsed_ms"] = int((steps[-1]["ended_at"] - steps[-1]["started_at"]) * 1000)
print(f"Step {len(steps) - 1} took {steps[-1]['elapsed_ms']:,} ms")
print(f"Step {len(steps)}: {title}")
steps.append({
"seq": len(steps),
"title": title,
"started_at": time.time(),
})
if __name__ == "__main__":
if os.path.exists("git-like"):
shutil.rmtree("git-like")
if os.path.exists("naive"):
shutil.rmtree("naive")
os.mkdir("git-like")
os.mkdir("naive")
step = 0
step_times = [0 for _ in range(999)]
repo_size = 200_000
d1_pairs = {}
d2_pairs = {}
for i in range(3):
new_step(f"init git-like-base with {repo_size} files")
for _ in range(repo_size):
file, content = generate_file_and_content()
parent = os.path.join("git-like", file[:2])
if len(d1_pairs) < 2000:
d1_pairs[file] = content
if not os.path.exists(parent):
os.mkdir(parent)
file = os.path.join(parent, file[2:])
with open(file, "w") as f:
f.write(content)
new_step(f"init naive-base with {repo_size} files")
for _ in range(repo_size):
file, content = generate_file_and_content()
if len(d2_pairs) < 2000:
d2_pairs[file] = content
file = os.path.join("naive", file)
with open(file, "w") as f:
f.write(content)
new_step("search git-like")
for key, value in d1_pairs.items():
file = os.path.join("git-like", key[:2], key[2:])
with open(file, "r") as f:
assert int(f.read().strip()) == int(value)
new_step("search naive")
for key, value in d2_pairs.items():
file = os.path.join("naive", key)
with open(file, "r") as f:
assert int(f.read().strip()) == int(value)
if i != 2:
new_step("rm -r git-like")
shutil.rmtree("git-like")
os.mkdir("git-like")
d1_pairs = {}
new_step("rm -r naive")
shutil.rmtree("naive")
os.mkdir("naive")
d2_pairs = {}
new_step("complete!")
result = {
"repo_size": repo_size,
"steps": steps,
}
print(json.dumps(result, indent=4))
r = {
"repo_size": 200000,
"steps": [
{
"seq": 0,
"title": "init git-like-base with 200000 files",
"started_at": 1748338102.189761,
"ended_at": 1748338132.6686232,
"elapsed_ms": 30478
}, {
"seq": 1,
"title": "init naive-base with 200000 files",
"started_at": 1748338132.668639,
"ended_at": 1748338160.474258,
"elapsed_ms": 27805
}, {
"seq": 2,
"title": "search git-like",
"started_at": 1748338160.474273,
"ended_at": 1748338160.596063,
"elapsed_ms": 121
}, {
"seq": 3,
"title": "search naive",
"started_at": 1748338160.596082,
"ended_at": 1748338160.6898549,
"elapsed_ms": 93
}, {
"seq": 4,
"title": "rm -r git-like",
"started_at": 1748338160.6898718,
"ended_at": 1748338190.342999,
"elapsed_ms": 29653
}, {
"seq": 5,
"title": "rm -r naive",
"started_at": 1748338190.343023,
"ended_at": 1748338220.748424,
"elapsed_ms": 30405
}, {
"seq": 6,
"title": "init git-like-base with 200000 files",
"started_at": 1748338220.74845,
"ended_at": 1748338254.644025,
"elapsed_ms": 33895
}, {
"seq": 7,
"title": "init naive-base with 200000 files",
"started_at": 1748338254.644044,
"ended_at": 1748338283.4599879,
"elapsed_ms": 28815
}, {
"seq": 8,
"title": "search git-like",
"started_at": 1748338283.4600031,
"ended_at": 1748338283.5988421,
"elapsed_ms": 138
}, {
"seq": 9,
"title": "search naive",
"started_at": 1748338283.59886,
"ended_at": 1748338283.697797,
"elapsed_ms": 98
}, {
"seq": 10,
"title": "rm -r git-like",
"started_at": 1748338283.697817,
"ended_at": 1748338318.950847,
"elapsed_ms": 35253
}, {
"seq": 11,
"title": "rm -r naive",
"started_at": 1748338318.950868,
"ended_at": 1748338348.511057,
"elapsed_ms": 29560
}, {
"seq": 12,
"title": "init git-like-base with 200000 files",
"started_at": 1748338348.5110872,
"ended_at": 1748338382.013751,
"elapsed_ms": 33502
}, {
"seq": 13,
"title": "init naive-base with 200000 files",
"started_at": 1748338382.013765,
"ended_at": 1748338411.272249,
"elapsed_ms": 29258
}, {
"seq": 14,
"title": "search git-like",
"started_at": 1748338411.272267,
"ended_at": 1748338411.317322,
"elapsed_ms": 45
}, {
"seq": 15,
"title": "search naive",
"started_at": 1748338411.317347,
"ended_at": 1748338411.4253461,
"elapsed_ms": 107
}, {
"seq": 16,
"title": "complete!",
"started_at": 1748338411.425366
}
]
}
r = {
"repo_size": 200000,
"steps": [
{
"seq": 0,
"title": "init git-like-base with 200000 files",
"started_at": 1748338686.9525535,
"ended_at": 1748338692.7156608,
"elapsed_ms": 5763
},
{
"seq": 1,
"title": "init naive-base with 200000 files",
"started_at": 1748338692.7156796,
"ended_at": 1748338698.0063965,
"elapsed_ms": 5290
},
{
"seq": 2,
"title": "search git-like",
"started_at": 1748338698.0064123,
"ended_at": 1748338698.0172935,
"elapsed_ms": 10
},
{
"seq": 3,
"title": "search naive",
"started_at": 1748338698.0173044,
"ended_at": 1748338698.0274107,
"elapsed_ms": 10
},
{
"seq": 4,
"title": "rm -r git-like",
"started_at": 1748338698.027416,
"ended_at": 1748338699.3780425,
"elapsed_ms": 1350
},
{
"seq": 5,
"title": "rm -r naive",
"started_at": 1748338699.3780665,
"ended_at": 1748338700.4909148,
"elapsed_ms": 1112
},
{
"seq": 6,
"title": "init git-like-base with 200000 files",
"started_at": 1748338700.4909394,
"ended_at": 1748338705.9839892,
"elapsed_ms": 5493
},
{
"seq": 7,
"title": "init naive-base with 200000 files",
"started_at": 1748338705.9840052,
"ended_at": 1748338711.1196747,
"elapsed_ms": 5135
},
{
"seq": 8,
"title": "search git-like",
"started_at": 1748338711.1196904,
"ended_at": 1748338711.130551,
"elapsed_ms": 10
},
{
"seq": 9,
"title": "search naive",
"started_at": 1748338711.1305625,
"ended_at": 1748338711.1407018,
"elapsed_ms": 10
},
{
"seq": 10,
"title": "rm -r git-like",
"started_at": 1748338711.140707,
"ended_at": 1748338712.3045957,
"elapsed_ms": 1163
},
{
"seq": 11,
"title": "rm -r naive",
"started_at": 1748338712.30462,
"ended_at": 1748338713.4315445,
"elapsed_ms": 1126
},
{
"seq": 12,
"title": "init git-like-base with 200000 files",
"started_at": 1748338713.4315727,
"ended_at": 1748338718.9001195,
"elapsed_ms": 5468
},
{
"seq": 13,
"title": "init naive-base with 200000 files",
"started_at": 1748338718.9001346,
"ended_at": 1748338724.0638602,
"elapsed_ms": 5163
},
{
"seq": 14,
"title": "search git-like",
"started_at": 1748338724.0638769,
"ended_at": 1748338724.0747116,
"elapsed_ms": 10
},
{
"seq": 15,
"title": "search naive",
"started_at": 1748338724.0747228,
"ended_at": 1748338724.084739,
"elapsed_ms": 10
},
{
"seq": 16,
"title": "complete!",
"started_at": 1748338724.0847442
}
]
}