import os
from random import randint
from subprocess import TimeoutExpired
from utils import (
cargo_run,
count_files,
goto_root,
mk_and_cd_tmp_dir,
rand_word,
write_string,
)
def external_bases():
goto_root()
mk_and_cd_tmp_dir()
os.mkdir("root")
os.chdir("root")
cargo_run(["init"])
prefixes = {}
base_count = randint(3, 8)
for i in range(base_count):
dir_name = f"base_{i}"
os.mkdir(dir_name)
os.chdir(dir_name)
cargo_run(["init"])
cargo_run(["check"])
cargo_run(["config", "--set", "model", "dummy"])
cargo_run(["config", "--set", "sleep_after_llm_call", "200"])
cargo_run(["config", "--set", "chunk_size", "8000"])
cargo_run(["config", "--set", "strict_file_reader", "true"])
file_count = randint(3, 8)
for j in range(file_count):
file_name = f"base_{i}_doc_{j}.txt"
long_doc = " ".join([rand_word() for _ in range(randint(2000, 8000))])
prefix = long_doc[:16] prefixes[prefix] = file_name
write_string(file_name, long_doc)
cargo_run(["add", file_name])
cargo_run(["check"])
try:
cargo_run(["build"], timeout=1.0)
except TimeoutExpired:
pass
else:
raise Exception("The build should have timed out")
cargo_run(["check", "--recover"])
cargo_run(["config", "--set", "sleep_after_llm_call", "0"])
cargo_run(["check"])
cargo_run(["build"])
cargo_run(["check"])
_, _, processed_files = count_files()
assert processed_files == file_count
os.chdir("..")
cargo_run(["merge", dir_name])
cargo_run(["check"])
for prefix, file in prefixes.items():
tfidf_result = cargo_run(["tfidf", prefix], stdout=True)
assert file in tfidf_result