name: ./rustybam
about: Mitchell R. Vollger's alignment utilities
args:
- threads:
short: t
long: threads
about: Number of threads to use for decompressing
takes_value: true
global: true
subcommands:
- stats:
about: Get percent identity stats from a sam/bam/cram or PAF (add --paf)
args:
- BAM:
about: sam/bam/cram file
index: 1
- qbed:
short: q
long: qbed
about: Print query coordinates first
- paf:
short: p
long: paf
about: The input is paf format (must have cg tag with extended cigar or cs tag).
- nucfreq:
about: Get the frequencies of each bp at each position.
args:
- BAM:
about: sam/bam/cram file
index: 1
- region:
short: r
long: region
takes_value: true
about: Print nucfreq info from the input region e.g "chr1:1-1000"
- bed:
short: b
long: bed
takes_value: true
about: Print nucfreq info from regions in the bed file, output is optionally tagged using the 4th column
- small:
short: s
long: small
about: smaller output format
- suns:
about: Extract the intervals in a genome (fasta) that are made up of SUNs
args:
- kmer-size:
short: k
long: kmer-size
takes_value: true
about: The size of the required unique kmer
- max-size:
short: m
long: max-size
takes_value: true
about: The maximum size SUN interval to report
- fasta:
about: a fasta file
index: 1
- validate:
short: v
long: validate
about: Confirm all the SUNs (very slow) only for debugging.
- repeat:
about: Report the longest repeat length at every position in a fasta.
args:
- min:
short: m
long: min
takes_value: true
about: The smallest repeat length to report
- fasta:
about: a fasta file
index: 1
- liftover:
about: liftover target sequence coordinates onto query sequence using a PAF
args:
- paf:
about: PAF file from minimap2 or unimap. Must have the cg tag, and n matches will be zero unless the cigar uses =X.
index: 1
- bed:
required: true
short: b
long: bed
takes_value: true
about: Bed file of regions to liftover
- qbed:
short: q
long: qbed
about: The bed contains query coordinates to be lifted (note the query in the original PAF will become the target in the output)
- largest:
short: l
long: largest
about: Only return the largest liftover
- bedlength:
about: count basepairs in a bed file
args:
- bed:
index: 1
- readable:
short: r
long: readable
about: make the output human readable (Mbp)
- breakpaf:
about: break up paf on indels of a certain size
args:
- paf:
index: 1
- max-size:
short: s
long: max-size
about: Maximum indel size to keep in the paf
takes_value: true
- fastq-split:
about: reads in a fastq from stdin and divides into files (can compress by adding .gz)
args:
- fastq:
min_values: 1
- fasta-split:
about: reads in a fasta from stdin and divides into files (can compress by adding .gz)
args:
- fasta:
min_values: 1
- orient:
about: orient paf records so that most of the bases are in the forward direction
args:
- paf:
index: 1
- getfasta:
about: Mimic bedtools getfasta but allow for bgzip in both bed and fasta inputs.
args:
- bed:
required: true
short: b
long: bed
takes_value: true
about: Bed file of regions to extract
- fasta:
required: true
short: f
long: fi
takes_value: true
about: Fasta file to extract sequences from
- strand:
short: s
long: strand
about: revcomp sequence if the strand is "-"
- name:
short: n
long: name
about: Add the name (4th column) to the header of the fasta output