category,tool,task_description,required_patterns
alignment,bwa,"align reads.fastq to ref.fa with 8 threads","mem;-t 8"
alignment,bwa,"build BWA index for genome.fa","index"
alignment,bwa-mem2,"align paired reads R1.fastq R2.fastq to reference.fa with read group ID sample1","mem;-R"
alignment,bowtie2,"align R1.fastq.gz and R2.fastq.gz to index bt2_index with 4 threads","-x;-1;-2"
alignment,bowtie2,"build bowtie2 index from genome.fa","bowtie2-build"
alignment,minimap2,"align long reads reads.fastq.gz to reference.fa with 16 threads","-t"
alignment,minimap2,"align ONT cDNA reads to reference genome for splice-aware mapping","-ax splice"
alignment,hisat2,"align paired reads R1.fq.gz R2.fq.gz to HISAT2 index genome_ht2 with 8 threads","-x;-1;-2"
alignment,STAR,"align paired reads R1.fq.gz R2.fq.gz to STAR index in star_idx/ and output coordinate-sorted BAM","--genomeDir;--readFilesIn;SortedByCoordinate"
alignment,STAR,"generate STAR genome index from genome.fa and genes.gtf with 8 threads","--runMode genomeGenerate;--genomeFastaFiles;--sjdbGTFfile"
qc,fastp,"quality trim paired reads R1.fastq.gz R2.fastq.gz with adapter auto-detection and 8 threads","--in1;--in2;--detect_adapter_for_pe"
qc,fastp,"trim single-end reads input.fq.gz and filter reads shorter than 50bp","--in1;--length_required"
qc,fastqc,"run quality check on sample_R1.fastq.gz with 4 threads and save to qc_output/","-t;-o"
qc,trimmomatic,"trim paired-end reads R1.fq.gz R2.fq.gz with leading:3 trailing:3 slidingwindow:4:20","PE;LEADING;TRAILING;SLIDINGWINDOW"
qc,cutadapt,"trim Illumina TruSeq adapters from paired reads R1.fq R2.fq and discard reads shorter than 20bp","-a;-o;-m"
qc,multiqc,"aggregate QC reports in results/ directory and output to multiqc_output/","-o"
sam-bam,samtools,"sort aligned.bam by coordinate and output to sorted.bam using 4 threads","sort;-o"
sam-bam,samtools,"index sorted.bam","index"
sam-bam,samtools,"view only mapped reads from input.bam and output as BAM","view;-F 4;-b"
sam-bam,samtools,"compute alignment statistics for aligned.bam","flagstat"
sam-bam,samtools,"merge sample1.bam sample2.bam sample3.bam into merged.bam","merge"
sam-bam,samtools,"generate depth information for sample.bam and output to depth.txt","depth"
sam-bam,samtools,"extract reads from chromosome chr1:1000-2000 of sorted.bam","view;chr1"
sam-bam,picard,"mark duplicates in input.bam and output to dedup.bam with metrics file","MarkDuplicates;INPUT"
interval-ops,bedtools,"intersect peaks.bed with genes.bed and report only overlapping regions","intersect;-a;-b"
interval-ops,bedtools,"compute genome coverage from aligned.bam and output bedGraph","genomecov;-ibam;-bg"
interval-ops,bedtools,"sort input.bed by chromosome and position","sort;-i"
interval-ops,bedtools,"merge overlapping intervals in peaks.bed","merge;-i"
variant-calling,gatk,"call variants in gVCF mode on sample.bam against reference.fa","HaplotypeCaller;-ERC GVCF"
variant-calling,gatk,"apply base quality score recalibration to sample.bam using recal.table and reference.fa","ApplyBQSR;-R"
variant-calling,gatk,"combine gVCF files sample1.g.vcf.gz and sample2.g.vcf.gz for joint genotyping","CombineGVCFs"
variant-calling,bcftools,"call SNVs and indels from input.bcf with ploidy 2","call;-m"
variant-calling,bcftools,"filter VCF to keep only variants with QUAL > 30 and DP > 10","filter;QUAL"
variant-calling,bcftools,"compute VCF statistics for variants.vcf.gz","stats"
variant-calling,freebayes,"call variants in sample.bam against reference.fa with min mapping quality 20","-f;--min-mapping-quality"
variant-calling,deepvariant,"call variants from sample.bam with reference.fa using WGS model and 8 threads","--model_type;--ref;--reads"
structural-variants,delly,"call structural variants from sample.bam against reference.fa","call;-g"
structural-variants,manta,"configure and run SV calling for sample.bam with reference.fa","--bam;--referenceFasta"
quantification,featureCounts,"count reads in aligned.bam against annotation.gtf for paired-end data with 8 threads","-a;-p"
quantification,featureCounts,"count reads at gene level from aligned.bam using annotation.gtf with strand-specific counting (reverse)","-a;-s 2"
quantification,salmon,"quantify expression from R1.fastq.gz and R2.fastq.gz against index salmon_index","quant;-1;-2"
quantification,salmon,"build salmon index from transcripts.fa","index;-t"
quantification,kallisto,"quantify paired-end reads R1.fq.gz R2.fq.gz against transcriptome index and output to results/ with 100 bootstraps","quant;-i;-b"
quantification,htseq-count,"count reads in aligned.bam using genes.gtf with reverse strand mode","-s reverse"
quantification,stringtie,"assemble transcripts from aligned.bam using reference annotation genes.gtf with 8 threads","-G;-p"
metagenomics,kraken2,"classify paired reads R1.fastq.gz R2.fastq.gz against database /db/kraken2 and write report to report.txt","--db;--paired;--report"
metagenomics,bracken,"estimate abundance from kraken2 report.txt with database /db/kraken2 and read length 150","-d;-i;-r 150"
metagenomics,metaphlan,"profile metagenome from reads.fastq.gz with 8 threads and output to profile.txt","--input_type;--nproc;-o"
metagenomics,megahit,"assemble metagenome from paired reads R1.fq.gz R2.fq.gz with 16 threads and output to assembly/","-1;-2;-t;-o"
metagenomics,spades,"assemble metagenome from paired reads using meta mode with 8 threads","--meta"
epigenomics,macs3,"call narrow peaks from treatment.bam with control.bam using genome size hs","callpeak;-t;-c;-g hs"
epigenomics,macs3,"call broad peaks from ChIP-seq treatment.bam with control.bam","callpeak;--broad"
epigenomics,deeptools,"compute read coverage across the genome from sample.bam normalized by RPKM with 8 threads","bamCoverage;--normalizeUsing RPKM"
epigenomics,deeptools,"plot heatmap of signal around TSS from matrix.gz","plotHeatmap;-m"
epigenomics,bismark,"align bisulfite-seq reads R1.fq.gz R2.fq.gz to bisulfite genome in bismark_idx/","--genome;-1;-2"
single-cell,cellranger,"count single-cell 3' gene expression from fastqs/ directory with reference transcriptome in refdata/","count;--fastqs;--transcriptome"
single-cell,STARsolo,"align 10x Chromium v3 scRNA-seq reads R1.fq.gz R2.fq.gz to STAR index in star_idx/","--soloType;--genomeDir"
single-cell,velocyto,"count spliced/unspliced from sample.bam using genes.gtf and output to velocyto/","run"
assembly,flye,"assemble nanopore reads reads.fastq.gz with estimated genome size 5m","--nano-raw;--genome-size"
assembly,hifiasm,"assemble PacBio HiFi reads reads.fq.gz with 16 threads","-t"
assembly,quast,"evaluate genome assembly assembly.fasta with reference genome reference.fa","-r"
annotation,prokka,"annotate bacterial genome assembly.fa with genus Escherichia and output to annotation/","--genus;--outdir"
annotation,snpEff,"annotate variants in variants.vcf using hg38 database","hg38"
annotation,blast,"search protein query.fa against database nr with evalue 1e-5 and output format 6","blastp;-evalue;-outfmt 6"
annotation,diamond,"search query.fa against nr.dmnd in sensitive mode with 8 threads","blastp;--sensitive;-p"
sequence-ops,seqkit,"extract sequences longer than 1000bp from input.fasta","seq;--min-len"
sequence-ops,seqkit,"compute statistics for input.fastq.gz","stats"
sequence-ops,seqkit,"subsample 10000 reads from input.fastq.gz","sample;-n"
phylogenetics,mafft,"perform multiple sequence alignment on sequences.fa with auto strategy","--auto"
phylogenetics,iqtree2,"infer maximum likelihood tree from alignment.fa with 1000 ultrafast bootstraps and auto model selection","-s;-B 1000;-m"
format-conversion,samtools,"convert SAM to BAM format from input.sam","view;-b"
format-conversion,bcftools,"convert BCF to VCF format from input.bcf","view"
format-conversion,bedtools,"convert BAM to BED format from aligned.bam","bamtobed;-i"