import argparse
import os
import sys
from pathlib import Path
from utils import check_tool, run
from jcvi.apps.base import cleanup, logger
def main():
p = argparse.ArgumentParser(description="Run pbsim + ccs pipeline.")
p.add_argument(
"genome", type=Path, help="Reference genome FASTA (e.g., parents.genome.fa)"
)
p.add_argument(
"--errhmm",
default="~/code/pbsim3/data/ERRHMM-SEQUEL.model",
help="ERRHMM model for pbsim (default: %(default)s)",
)
p.add_argument(
"--depth", type=int, default=50, help="pbsim --depth (default: %(default)s)"
)
p.add_argument(
"--pass-num",
type=int,
default=8,
help="pbsim --pass-num (default: %(default)s)",
)
p.add_argument(
"--length-mean",
type=int,
default=15000,
help="pbsim --length-mean (default: %(default)s)",
)
p.add_argument(
"--prefix",
default=None,
help="Output prefix (defaults to basename(GENOME) without .fa)",
)
p.add_argument(
"--pbsim",
default="pbsim",
help="pbsim executable name/path (default: %(default)s)",
)
p.add_argument(
"--ccs", default="ccs", help="pbccs executable name/path (default: %(default)s)"
)
args = p.parse_args()
if not args.genome.exists():
sys.exit(f"ERROR: Genome not found: {args.genome}")
genome_dir = args.genome.parent.resolve()
genome = args.genome.name
cwd = os.getcwd()
os.chdir(genome_dir)
check_tool(args.pbsim)
check_tool(args.ccs)
if args.prefix is not None:
prefix = args.prefix
else:
name = args.genome.name
prefix = (
name[:-3] if name.endswith(".fa") else Path(name).stem
)
errhmm_path = os.path.expanduser(args.errhmm)
run(
[
args.pbsim,
"--strategy",
"wgs",
"--method",
"errhmm",
"--errhmm",
errhmm_path,
"--depth",
str(args.depth),
"--pass-num",
str(args.pass_num),
"--length-mean",
str(args.length_mean),
"--prefix",
prefix,
"--id-prefix",
prefix + "_",
"--genome",
genome,
]
)
cleanup(f"{prefix}_0001.maf.gz", f"{prefix}_0001.ref")
bam_in = f"{prefix}_0001.bam"
ccs_out = f"{prefix}_0001.ccs.fastq.gz"
threads = os.cpu_count() * 2 // args.jobs
run([args.ccs, bam_in, ccs_out, "-j", str(threads)])
cleanup(f"{prefix}_0001.ccs.zmw_metrics.json.gz")
logger.info("Done.")
logger.info(" BAM: %s", bam_in)
logger.info(" CCS: %s", ccs_out)
os.chdir(cwd)
if __name__ == "__main__":
main()