import os
import shutil
from random import randint
import re
from subprocess import TimeoutExpired
from utils import (
cargo_run,
count_chunks,
count_files,
get_commit_hash,
goto_root,
ls_recursive,
send_message,
)
def end_to_end(test_model: str):
goto_root()
os.chdir("docs")
files = ls_recursive("txt") + ls_recursive("md")
if ".ragit" in os.listdir():
shutil.rmtree(".ragit")
assert len(files) > 10 cargo_run(["init"])
cargo_run(["check"])
cargo_run(["config", "--set", "model", "dummy"])
cargo_run(["check"])
assert "dummy" in cargo_run(["config", "--get", "model"], stdout=True)
cargo_run(["config", "--set", "model", test_model])
assert test_model in cargo_run(["config", "--get", "model"], stdout=True)
cargo_run(["config", "--set", "sleep_after_llm_call", "null"])
cargo_run(["config", "--set", "sleep_after_llm_call", "2000"])
assert cargo_run(["config", "--set", "sleep_after_llm_call", "this_is_not_a_number"], check=False) != 0
assert "2000" in cargo_run(["config", "--get", "sleep_after_llm_call"], stdout=True)
cargo_run(["config", "--set", "dump_log", "true"])
cargo_run(["config", "--set", "strict_file_reader", "true"])
cargo_run(["check"])
os.mkdir("tmp")
os.chdir("tmp")
assert test_model in cargo_run(["config", "--get", "model"], stdout=True)
cargo_run(["check"])
os.chdir("..")
shutil.rmtree("tmp")
cargo_run(["config", "--get-all"])
cargo_run(["add", *files])
cargo_run(["check"])
file_count, _, _ = count_files()
assert file_count == len(files)
cargo_run(["rm", *files])
cargo_run(["check"])
file_count, _, _ = count_files()
assert file_count == 0
cargo_run(["add", *files])
cargo_run(["check"])
file_count, _, _ = count_files()
assert file_count == len(files)
for _ in range(3):
try:
cargo_run(["build"], timeout=8.0 + randint(0, 20) / 10)
except TimeoutExpired:
pass
cargo_run(["check", "--recover"])
cargo_run(["config", "--set", "sleep_after_llm_call", "null"])
cargo_run(["config", "--set", "summary_after_build", "true"])
cargo_run(["build"])
cargo_run(["check"])
cargo_run(["build"])
cargo_run(["check"])
chunks = cargo_run(["ls-chunks", "--abbrev=32"], stdout=True)
chunk_uids = []
for line in chunks.split("\n"):
if (r := re.match(r"^uid\:\s([0-9a-f]{32})$", line)) is not None:
chunk_uids.append(r.group(1))
has_ragit_in_tfidf = False
for chunk_uid in chunk_uids:
tfidf_dump = cargo_run(["ls-terms", chunk_uid], stdout=True)
has_ragit_in_tfidf = has_ragit_in_tfidf or "ragit" in tfidf_dump
assert has_ragit_in_tfidf
file_count_prev, _, _ = count_files()
chunk_count_prev = count_chunks()
assert file_count_prev == len(files)
cargo_run(["rm", files[0]])
cargo_run(["check"])
file_count_next, _, _ = count_files()
chunk_count_next = count_chunks()
assert file_count_prev == file_count_next + 1
assert chunk_count_prev > chunk_count_next
cargo_run(["add", files[0]])
cargo_run(["check"])
file_count, _, _ = count_files()
chunk_count = count_chunks()
assert file_count_prev == file_count
assert chunk_count_prev > chunk_count
cargo_run(["build"])
summary = cargo_run(["summary", "--cached"], stdout=True)
send_message(f"--- summary of the knowledge-base ---\n\n{summary}")
cargo_run(["meta", "--set", "git-hash", get_commit_hash()])
cargo_run(["check"])
chunk_count = count_chunks()
assert chunk_count_prev == chunk_count
os.chdir(".ragit/files")
assert len(os.listdir()) > 0
for file_index in os.listdir():
shutil.rmtree(file_index)
os.chdir("../..")
assert cargo_run(["check"], check=False) != 0
cargo_run(["check", "--recover"])
cargo_run(["check"])
cargo_run(["gc", "--logs"])
query = cargo_run(["query", "What makes ragit special?"], stdout=True)
send_message(f"--- What makes ragit special? ---\n\n{query}")
query = cargo_run(["query", "--super-rerank", "What makes ragit special?"], stdout=True)
send_message(f"--- What makes ragit special? (super rerank mode) ---\n\n{query}")