import argparse
import pathlib
import re
import shutil
import typing as T
METRICS = [
"cognitive",
"sloc",
"ploc",
"lloc",
"cloc",
"blank",
"cyclomatic",
"halstead",
"nom",
"nexits",
"nargs",
]
def main() -> None:
parser = argparse.ArgumentParser(
prog="split-minimal-tests",
description="This tool splits HTML minimal-tests, produced by "
"a software called `json-minimal-tests`, into distinct directories "
"depending on metric differences.",
epilog="The source code of this program can be found on "
"GitHub at https://github.com/mozilla/rust-code-analysis",
)
parser.add_argument(
"--input",
"-i",
type=lambda value: pathlib.Path(value),
required=True,
help="Input directory containing HTML minimal tests.",
)
parser.add_argument(
"--output",
"-o",
type=lambda value: pathlib.Path(value),
required=True,
help="Path to the output directory.",
)
parser.add_argument(
"--threshold",
"-t",
type=int,
help="Maximum number of considered minimal tests for a metric.",
)
args = parser.parse_args()
args.output.mkdir(parents=True, exist_ok=True)
metrics_saver: T.Dict[str, T.List] = {metric_name: [] for metric_name in METRICS}
for path in args.input.glob("*.html"):
with open(path) as f:
file_str = f.read()
file_no_pre = re.sub(r"<pre>(.|\n)*?<\/pre>", "", file_str)
for metric_name, metric_files in metrics_saver.items():
m = re.search(f"(\.{metric_name})", file_no_pre)
if m is None:
continue
if m.group(1):
metric_files.append(path)
for metric_name, metric_files in metrics_saver.items():
metric_path = args.output / metric_name
if metric_files:
metric_path.mkdir(parents=True, exist_ok=True)
output_paths = (
metric_files[: args.threshold] if args.threshold else metric_files
)
for path in output_paths:
shutil.copy(path, metric_path)
if __name__ == "__main__":
main()