from abbrev import abbrev
from add_and_rm import add_and_rm
from add_and_rm2 import add_and_rm2
from archive import archive
from audit import audit
from erroneous_llm import erroneous_llm
from cannot_read_images import cannot_read_images
from cargo_tests import cargo_tests
from cargo_features import cargo_features
from cat_file import cat_file
from clean_up_erroneous_chunk import clean_up_erroneous_chunk
from cli import cli
from clone import clone
from clone_empty import clone_empty
from config import config
from csv_reader import csv_reader
from empty import empty
from end_to_end import end_to_end
from external_bases import external_bases
from extract_keywords import extract_keywords
from fetch_models import fetch_models
from generous_file_reader import generous_file_reader
from gh_issue_20 import gh_issue_20
from ignore import ignore
from ii import ii
from images import images
from images2 import images2
from images3 import images3
from korean import korean
from logs import logs
from ls import ls
from ls_dedup import ls_dedup
from ls_queries import ls_queries
from many_chunks import many_chunks
from many_jobs import many_jobs
from markdown_reader import markdown_reader
from merge import merge
from meta import meta
from migrate import migrate
from migrate2 import migrate2
from migrate3 import migrate3
from models_init import models_init, test_home_config_override
from orphan_process import orphan_process
from outside import outside
from pdf import pdf
from pdl import pdl
from pdl_escape import pdl_escape
from prompts import prompts
from pull import pull
from pull_ragithub import pull_ragithub
from query_options import query_options
from query_with_schema import query_with_schema
from ragit_api import ragit_api
from real_repos import real_repos
from real_repos_regression import real_repos_regression
from recover import recover
from retrieve_chunks import retrieve_chunks
from server import server
from server_ai_model import server_ai_model
from server_chat import server_chat
from server_file_tree import server_file_tree
from server_permission import server_permission
from subdir import subdir
from summary import summary
from svg import svg
from symlink import symlink
from tfidf import tfidf
from web_images import web_images
from write_lock import write_lock
from datetime import datetime
import os
from random import seed as rand_seed
import sys
from utils import (
clean,
clean_test_output,
get_commit_hash,
get_coverage,
get_ragit_version,
goto_root,
recv_message,
reset_message,
)
def get_platform_info() -> dict[str, str]:
result = {}
try:
import subprocess
result["cargo_version"] = subprocess.run(["cargo", "version"], capture_output=True, text=True, check=True).stdout.strip()
except Exception as e:
result["cargo_version"] = f"cannot get cargo_version: {e}"
try:
result["rustc_version"] = subprocess.run(["rustc", "--version"], capture_output=True, text=True, check=True).stdout.strip()
except Exception as e:
result["rustc_version"] = f"cannot get rustc_version: {e}"
try:
import platform
result["python_version"] = platform.python_version()
except Exception as e:
result["python_version"] = f"cannot get python_version: {e}"
try:
result["platform"] = platform.platform()
except Exception as e:
result["platform"] = f"cannot get platform: {e}"
return result
help_message = """
Commands
end_to_end [model=dummy] run `end_to_end` test
It simulates a basic workflow of ragit: init, add, build and
query. It runs on a real dataset: the documents of ragit.
external_bases run `external_bases` test
It creates bunch of knowledge-bases and run
`rag merge` on them. It also checks whether `rag tfidf`
can successfully retrieve a chunk from multiple
knowledge-bases.
merge run `merge` test
It's like `external_bases` test, but with `--prefix` option.
abbrev run `abbrev` test
It tests `--abbrev` option.
add_and_rm run `add_and_rm` test
It runs tons of `rag add` and `rag rm` with different options.
add_and_rm2 run `add_and_rm2` test
Like `add_and_rm`, but it's more focused on `rag rm`.
ignore run `ignore` test
It tests whether `rag add` respects `.ragignore` or
`.gitignore`.
archive run `archive` test
It runs `archive-create` and `archive-extract` and check
if the extracted knowledge-base is identical to the original
one.
recover run `recover` test
It checks whether 1) `rag check` fails on a broken
knowledge-base and 2) `rag check --recover` can fix a broken
knowledge-base.
cannot_read_images run `cannot_read_images` test
Some models can read images while some cannot. It tests what
happens when a model that cannot read images tries to manage
a knowledge-base with an image.
clone run `clone` test
It creates a knowledge-base, pushes, clones and checks it.
It runs a local `ragit-server` in this repository.
clone_empty run `clone_empty` test
It creates an empty repository in ragit-server, clones the
repository (which is not an error), adds some chunks to it,
and pushes it back to the server.
pull run `pull` test
It creates a repository, pushes and pulls the repository and
see if it works.
pull_ragithub run `pull_ragithub` test
Most other tests, including `pull` runs ragit-server on
localhost, but it clones and pulls a knowledge-base from
ragithub. I want to make sure that the latest version is
always compatible with ragithub.
server run `server` test
It tests endpoints related to a repository. It first pushes a
repository and fetches data (chunks, images, files, ...) from
the server.
server_ai_model run `server_ai_model` test
It tests endpoints related to ai models. It creates/updates and
reads ai models. Deletion is WIP.
server_chat [model] run `server_chat` test
It tests chat-related endpoints of ragit-server.
server_file_tree run `server_file_tree` test
It tests `/{user}/{repo}/file-content` api of ragit-server.
server_permission run `server_permission` test
It creates users and repositories with different permissions
and sends requests with/without api keys.
fetch_models run `fetch_models` test
It's like `server_ai_model`, but it updates local `models.json`
instead of server.
query_options [model] run `query_options` test
It tests various option flags of `rag query`.
query_with_schema [model] run `query_with_schema` test
It tests `--schema` flag of `rag query`.
cli run `cli` test
It tests whether cli parser can parse the arguments correctly.
It also creates invalid inputs and see if the cli parser can
successfully reject them.
outside run `outside`
It tests whether ragit can successfully reject files outside
a knowledge-base.
migrate run `migrate` test
It checks out git to v 0.2.0, creates a knowledge-base, and
run `migrate` until the knowledge-base is migrated to the
newest version.
Since it runs `git checkout`, it may mess up your working
tree. If you have uncommitted changes, this test will do
nothing and fail.
migrate2 run `migrate2` test
Like `migrate`, but clones knowledge-bases from web instead
of creating a mock knowledge-base.
migrate3 run `migrate3` test
It creates knowledge-bases with different versions of ragit.
Then it makes sure that the versions can clone/push to the
latest version of ragit-server.
config run `config` test
I have added new configs to ragit 0.3.5. And I want to see if
it's compatible with older versions.
gh_issue_20 run `gh_issue_20` test
https://github.com/baehyunsol/ragit/issues/20
It tests `-C` option.
many_chunks run `many_chunks` test
It creates a lot of small files and see if ragit can
handle the files correctly. It also tests interrupting
`rag build`.
erroneous_llm run `erroneous_llm` test
It tests how `rag build` behaves when the LLM server is
unstable.
many_jobs [model=dummy] [jobs=999]
run `many_jobs` test
`rag build` by default runs with many processes, and a
multi-process program may introduce many unexpected bugs.
It runs `rag build` with many processes and see if it works.
You'd better run it on a machine with many cores.
ls run `ls` test
It runs `ls-chunks`, `ls-files`, `ls-images`, `ls-terms` and
`tfidf` with bunch of different options.
ls_dedup run `ls_dedup` test
Some commands match chunk uid and file uid at the same time.
There's a possibility that a chunk and a file have the same
uid prefix. It tests whether ragit can deduplicate the same
uids in such cases.
ls_queries run `ls_queries` test
It runs `ls-queries` multiple times with different options,
and check if it behaves as expected. It also tests other
commands that deal with query histories. I implemented an
extra test case instead of updating the `ls` test, because
the test is getting too long.
logs [model] run `logs` test
It checks if `rag config --set dump_log true` and
`rag gc --logs` work correctly.
meta run `meta` test
It runs `rag meta`-family commands and see if it works.
empty [model=dummy] run `empty` test
It sees if ragit can handle an empty file correctly.
symlink run `symlink` test
It tests whether ragit can handle symlinks correctly
without falling into infinite loops.
ii run `ii` test
It creates an inverted index and test it.
cat_file run `cat_file` test
generous_file_reader run `generous_file_reader` test
If some files are broken, ragit is supposed to
skip the broken files and continue processing the
valid files.
clean_up_erroneous_chunk run `clean_up_erroneous_chunk` test
It's an edge case in `generous_file_reader`.
audit [model] run `audit` test
summary run `summary` test
It tests `rag summary`. It doesn't evaluate the quality of the
generated summary. It only checks whether the cli option of the
command work as expected. `end_to_end` test runs the command
and dumps the output, so you can check that out if you want to
see the quality of a summary.
images run `images` test
It creates a markdown file with images and check
whether the markdown reader can parse the file
correctly.
images2 [model] run `images2` test
It tests whether models can generate image-description
files correctly.
NOTE: It uses the vision capability of the model.
Make sure that the model has one.
images3 [model] run `images3` test
Other tests test images in markdown files, but they
don't test image file readers. It does.
pdf [model] run `pdf` test
It tests the pdf reader.
You have to use a vision language model!
pdl [model] run `pdl` test
It tests `rag pdl` command.
pdl_escape run `pdl_escape` test
The pdl engine uses tera under the hood. The pdl engine
modifies tera's default escape function. It tests whether
the escape function works.
svg [model] run `svg` test
It tests the svg reader.
You have to use a vision language model!
web_images [model] run `web_images` test
It tests whether ragit can fetch images from web.
extract_keywords [model] run `extract_keywords` test
It tests whether `rag extract-keywords` command works.
orphan_process run `orphan_process` test
It reproduces gh issue #9.
https://github.com/baehyunsol/ragit/issues/9
write_lock run `write_lock` test
It reproduces gh issue #8.
https://github.com/baehyunsol/ragit/issues/8
markdown_reader run `markdown_reader` test
I have found many bugs in `markdown_reader_v0`. The bugs are
reproduced in this test. If you find a new one, please add that
to this test.
csv_reader run `csv_reader` test
real_repos [repo=all] run `real_repos` test
It clones real git repos from the web and build knowledge-base
of the repos.
This is a very important test because it's the exact use
case of ragit that I have in my mind.
The test code uses the dummy model and only test file readers.
If you want to use real models, you have to run the main
function of `real_repos.py`.
real_repos_regression run `real_repos_regression` test
I ran `python3 tests.py real_repos` and was surprised to see
it throwing so many errors. Many of them were ragit's fault. So
I created this test, which tries to reproduce all the errors
found in the `real_repos` test.
prompts [model=dummy] run `prompts` test
It's the smallest set of commands that parses and executes all
the `.pdl` files in `prompts/` directory.
retrieve_chunks [model] run `retrieve_chunks` test
It tests the `rag retrieve-chunks` command.
subdir run `subdir` test
It checks whether `ragit` is smart enough to find `.ragit/` in
any directory.
tfidf run `tfidf` test
It creates bunch of lorem-ipsum files and see if `rag tfidf`
can retrieve files correctly. It also tests tfidf searches on
cjk strings.
korean run `korean` test
It runs ragit with/without "korean" feature and makes sure that
the tokenizer behaves differently.
ragit_api [model] run `ragit_api` test
It asks "what's your name" to the model. It returns OK if the
api call was successful. It doesn't care about the content of
the model's response.
cargo_tests run `cargo test` on all the crates
It also makes sure that cargo emits no warnings.
cargo_features run `cargo_features` test
Ragit has many cargo features. This test compiles ragit with
all the possible combinations of features and makes sure that
they all compile.
models_init run `models_init` test
It tests the initialization of models.json and
model selection in api.json.
all run all tests
It dumps the test result to `tests/results.json`.
"""
if __name__ == "__main__":
no_clean = "--no-clean" in sys.argv
args = [arg for arg in sys.argv if arg != "--no-clean"]
seed = [arg for arg in args if arg.startswith("--seed=")]
if len(seed) > 0:
args = [arg for arg in args if arg not in seed]
seed = int(seed[0].split("=")[1])
else:
now = datetime.now()
seed = int(f"{now.year:04}{now.month:02}{now.day:02}{now.hour:02}{now.minute:02}{now.second:02}")
command = args[1] if len(args) > 1 else None
test_model = args[2] if len(args) > 2 else None
rand_seed(seed)
try:
if command == "end_to_end":
test_model = test_model or "dummy"
end_to_end(test_model=test_model)
elif command == "external_bases":
external_bases()
elif command == "merge":
merge()
elif command == "abbrev":
abbrev()
elif command == "add_and_rm":
add_and_rm()
elif command == "add_and_rm2":
add_and_rm2()
elif command == "ignore":
ignore()
elif command == "recover":
recover()
elif command == "cannot_read_images":
cannot_read_images()
elif command == "clone":
clone()
elif command == "clone_empty":
clone_empty()
elif command == "pull":
pull()
elif command == "pull_ragithub":
pull_ragithub()
elif command == "server":
server()
elif command == "server_ai_model":
server_ai_model()
elif command == "server_chat":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
server_chat(test_model=test_model)
elif command == "server_file_tree":
server_file_tree()
elif command == "server_permission":
server_permission()
elif command == "fetch_models":
fetch_models()
elif command == "query_options":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
query_options(test_model=test_model)
elif command == "query_with_schema":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
query_with_schema(test_model=test_model)
elif command == "cli":
cli()
elif command == "outside":
outside()
elif command == "audit":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
audit(test_model=test_model)
elif command == "summary":
summary()
elif command == "migrate":
migrate()
elif command == "migrate2":
migrate2()
elif command == "migrate3":
migrate3()
elif command == "config":
config()
elif command == "gh_issue_20":
gh_issue_20()
elif command == "archive":
archive()
elif command == "many_chunks":
many_chunks()
elif command == "erroneous_llm":
erroneous_llm()
elif command == "many_jobs":
jobs = args[3] if len(args) > 3 else 999
test_model = test_model if test_model else "dummy"
many_jobs(test_model=test_model, jobs=jobs)
elif command == "ls":
ls()
elif command == "ls_dedup":
ls_dedup()
elif command == "ls_queries":
ls_queries()
elif command == "logs":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
logs(test_model=test_model)
elif command == "meta":
meta()
elif command == "symlink":
symlink()
elif command == "empty":
test_model = test_model or "dummy"
empty(test_model)
elif command == "ii":
ii()
elif command == "cat_file":
cat_file()
elif command == "generous_file_reader":
generous_file_reader()
elif command == "clean_up_erroneous_chunk":
clean_up_erroneous_chunk()
elif command == "images":
images()
elif command == "images2":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
images2(test_model=test_model)
elif command == "images3":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
images3(test_model=test_model)
elif command == "pdf":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
pdf(test_model=test_model)
elif command == "pdl":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
pdl(test_model=test_model)
elif command == "pdl_escape":
pdl_escape()
elif command == "svg":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
svg(test_model=test_model)
elif command == "web_images":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
web_images(test_model=test_model)
elif command == "extract_keywords":
if test_model is None:
print("Please specify which model to run the tests with.")
sys.exit(1)
extract_keywords(test_model=test_model)
elif command == "orphan_process":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
orphan_process(test_model=test_model)
elif command == "write_lock":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
write_lock(test_model=test_model)
elif command == "markdown_reader":
markdown_reader()
elif command == "csv_reader":
csv_reader()
elif command == "real_repos":
repo = "all" if len(args) < 3 else args[2]
real_repos(repo=repo)
elif command == "real_repos_regression":
real_repos_regression()
elif command == "prompts":
test_model = test_model or "dummy"
prompts(test_model=test_model)
elif command == "retrieve_chunks":
if test_model is None or test_model == "dummy":
print("Please specify which model to run the tests with. You cannot run this test with a dummy model.")
sys.exit(1)
retrieve_chunks(test_model=test_model)
elif command == "subdir":
subdir()
elif command == "tfidf":
tfidf()
elif command == "korean":
korean()
elif command == "ragit_api":
if test_model is None:
print("Please specify which model to run the tests with.")
sys.exit(1)
ragit_api(test_model=test_model)
elif command == "cargo_tests":
cargo_tests()
elif command == "cargo_features":
cargo_features()
elif command == "models_init":
models_init()
test_home_config_override()
elif command == "all":
import json
import time
import traceback
tests = [
("cargo_tests", cargo_tests),
("cargo_features", cargo_features),
("abbrev", abbrev),
("add_and_rm", add_and_rm),
("add_and_rm2", add_and_rm2),
("ignore", ignore),
("recover", recover),
("cannot_read_images", cannot_read_images),
("clone", clone),
("clone_empty", clone_empty),
("pull", pull),
("pull_ragithub", pull_ragithub),
("server", server),
("server_ai_model", server_ai_model),
("server_permission", server_permission),
("server_file_tree", server_file_tree),
("fetch_models", fetch_models),
("cli", cli),
("outside", outside),
("archive", archive),
("many_chunks", many_chunks),
("erroneous_llm", erroneous_llm),
("pdl_escape", pdl_escape),
("many_jobs", many_jobs),
("ls", ls),
("ls_dedup", ls_dedup),
("ls_queries", ls_queries),
("meta", meta),
("symlink", symlink),
("gh_issue_20", gh_issue_20),
("ii", ii),
("cat_file", cat_file),
("generous_file_reader", generous_file_reader),
("clean_up_erroneous_chunk", clean_up_erroneous_chunk),
("images", images),
("markdown_reader", markdown_reader),
("csv_reader", csv_reader),
("real_repos", real_repos),
("real_repos_regression", real_repos_regression),
("subdir", subdir),
("tfidf", tfidf),
("korean", korean),
("merge", merge),
("summary", summary),
("external_bases", external_bases),
("end_to_end dummy", lambda: end_to_end(test_model="dummy")),
("end_to_end gpt-oss-20b-groq", lambda: end_to_end(test_model="gpt-oss-20b-groq")),
("end_to_end gpt-5-mini", lambda: end_to_end(test_model="gpt-5-mini")),
("audit gpt-oss-20b-groq", lambda: audit(test_model="gpt-oss-20b-groq")),
("logs gpt-oss-20b-groq", lambda: logs(test_model="gpt-oss-20b-groq")),
("prompts dummy", lambda: prompts(test_model="dummy")),
("prompts gpt-5-mini", lambda: prompts(test_model="gpt-5-mini")),
("prompts gpt-oss-20b-groq", lambda: prompts(test_model="gpt-oss-20b-groq")),
("prompts gemini-2.0-flash", lambda: prompts(test_model="gemini-2.0-flash")),
("prompts claude-4.5-haiku", lambda: prompts(test_model="claude-4.5-haiku")),
("retrieve_chunks claude-4.5-haiku", lambda: retrieve_chunks(test_model="claude-4.5-haiku")),
("retrieve_chunks gpt-oss-20b-groq", lambda: retrieve_chunks(test_model="gpt-oss-20b-groq")),
("empty dummy", lambda: empty(test_model="dummy")),
("empty gpt-oss-20b-groq", lambda: empty(test_model="gpt-oss-20b-groq")),
("server_chat gpt-oss-20b-groq", lambda: server_chat(test_model="gpt-oss-20b-groq")),
("server_chat gemini-2.0-flash", lambda: server_chat(test_model="gemini-2.0-flash")),
("images2 gpt-5-mini", lambda: images2(test_model="gpt-5-mini")),
("images3 gpt-5-mini", lambda: images3(test_model="gpt-5-mini")),
("pdl gpt-5-mini", lambda: pdl(test_model="gpt-5-mini")),
("pdf gpt-5-mini", lambda: pdf(test_model="gpt-5-mini")),
("svg gpt-5-mini", lambda: svg(test_model="gpt-5-mini")),
("web_images gpt-5-mini", lambda: web_images(test_model="gpt-5-mini")),
("images2 claude-4.5-haiku", lambda: images2(test_model="claude-4.5-haiku")),
("extract_keywords dummy", lambda: extract_keywords(test_model="dummy")),
("extract_keywords gpt-5-mini", lambda: extract_keywords(test_model="gpt-5-mini")),
("orphan_process gpt-oss-20b-groq", lambda: orphan_process(test_model="gpt-oss-20b-groq")),
("write_lock gpt-oss-20b-groq", lambda: write_lock(test_model="gpt-oss-20b-groq")),
("ragit_api command-r", lambda: ragit_api(test_model="command-r")),
("query_options gpt-oss-20b-groq", lambda: query_options(test_model="gpt-oss-20b-groq")),
("query_with_schema gpt-oss-20b-groq", lambda: query_with_schema(test_model="gpt-oss-20b-groq")),
("models_init", models_init),
("test_home_config_override", test_home_config_override),
("config", config), ("migrate", migrate),
("migrate2", migrate2),
("migrate3", migrate3),
]
started_at = datetime.now()
has_error = False
result = {
"meta": {
"complete": False,
"started_at": str(started_at),
"commit": get_commit_hash(),
"platform": get_platform_info(),
"ragit_version": get_ragit_version(),
"rand_seed": seed,
},
"tests": {},
"result": {
"total": len(tests),
"complete": 0,
"pass": 0,
"fail": 0,
"remaining": len(tests),
},
}
with open("result.json", "w") as f:
f.write(json.dumps(result, indent=4, ensure_ascii=True))
for seq, (name, test) in enumerate(tests):
print(f"running `{name}`...", flush=True)
try:
start = time.time()
reset_message()
rand_seed(seed)
test()
except Exception as e:
has_error = True
result["tests"][name] = {
"seq": seq,
"pass": False,
"error": clean_test_output(str(e) + "\n" + traceback.format_exc()),
"elapsed_ms": int((time.time() - start) * 1000),
}
result["result"]["fail"] += 1
else:
result["tests"][name] = {
"seq": seq,
"pass": True,
"error": None,
"elapsed_ms": int((time.time() - start) * 1000),
}
result["result"]["pass"] += 1
finally:
result["result"]["complete"] += 1
result["result"]["remaining"] -= 1
result["coverage"] = get_coverage()
if name in result["tests"]:
result["tests"][name]["message"] = recv_message()
if not no_clean:
try:
clean()
except Exception as e:
result["tests"][name]["cleanup_error"] = str(e) + "\n" + traceback.format_exc()
goto_root()
os.chdir("tests")
with open("result.json", "w") as f:
f.write(json.dumps(result, indent=4, ensure_ascii=True))
ended_at = datetime.now()
result["meta"]["ended_at"] = str(ended_at)
result["meta"]["elapsed_ms"] = (ended_at - started_at).seconds * 1000 + (ended_at - started_at).microseconds // 1000
result["meta"]["complete"] = True
goto_root()
os.chdir("tests")
result = json.dumps(result, indent=4, ensure_ascii=True)
print(result)
with open("result.json", "w") as f:
f.write(result)
if has_error:
sys.exit(1)
else:
print("invalid command:", command)
print(help_message)
finally:
if not no_clean:
clean()