rustybam 0.1.33

bioinformatics toolkit in rust
Documentation
name: ./rustybam
about: Mitchell R. Vollger's alignment utilities
args:
  - threads:
      short: t
      long: threads
      about: Number of threads to use for decompressing
      takes_value: true
      global: true
subcommands:
  - stats:
      about: Get percent identity stats from a sam/bam/cram or PAF (add --paf)
      args:
        - BAM:
            about: sam/bam/cram file
            index: 1
        - qbed:
            short: q
            long: qbed
            about: Print query coordinates first
        - paf:
            short: p
            long: paf
            about: The input is paf format (must have cg tag with extended cigar or cs tag).
  - nucfreq:
      about: Get the frequencies of each bp at each position.
      args:
        - BAM:
            about: sam/bam/cram file
            index: 1
        - region:
            short: r
            long: region
            takes_value: true
            about: Print nucfreq info from the input region e.g "chr1:1-1000"
        - bed:
            short: b
            long: bed
            takes_value: true
            about: Print nucfreq info from regions in the bed file, output is optionally tagged using the 4th column
        - small:
            short: s
            long: small
            about: smaller output format
  - suns:
      about: Extract the intervals in a genome (fasta) that are made up of SUNs
      args:
        - kmer-size:
            short: k
            long: kmer-size
            takes_value: true
            about: The size of the required unique kmer
        - max-size:
            short: m
            long: max-size
            takes_value: true
            about: The maximum size SUN interval to report
        - fasta:
            about: a fasta file
            index: 1
        - validate:
            short: v
            long: validate
            about: Confirm all the SUNs (very slow) only for debugging.
  - repeat:
      about: Report the longest repeat length at every position in a fasta.
      args:
        - min:
            short: m
            long: min
            takes_value: true
            about: The smallest repeat length to report
        - fasta:
            about: a fasta file
            index: 1
  - liftover:
      about: liftover target sequence coordinates onto query sequence using a PAF
      args:
        - paf:
            about: PAF file from minimap2 or unimap. Must have the cg tag, and n matches will be zero unless the cigar uses =X.
            index: 1
        - bed:
            required: true
            short: b
            long: bed
            takes_value: true
            about: Bed file of regions to liftover
        - qbed:
            short: q
            long: qbed
            about: The bed contains query coordinates to be lifted (note the query in the original PAF will become the target in the output)
        - largest:
            short: l
            long: largest
            about: Only return the largest liftover
  - bedlength:
      about: count basepairs in a bed file
      args:
        - bed:
            index: 1
        - readable:
            short: r
            long: readable
            about: make the output human readable (Mbp)
  - breakpaf:
      about: break up paf on indels of a certain size
      args:
        - paf:
            index: 1
        - max-size:
            short: s
            long: max-size
            about: Maximum indel size to keep in the paf
            takes_value: true
  - fastq-split:
      about: reads in a fastq from stdin and divides into files (can compress by adding .gz)
      args:
        - fastq:
            min_values: 1
  - fasta-split:
      about: reads in a fasta from stdin and divides into files (can compress by adding .gz)
      args:
        - fasta:
            min_values: 1
  - orient:
      about: orient paf records so that most of the bases are in the forward direction
      args:
        - paf:
            index: 1
  - getfasta:
      about: Mimic bedtools getfasta but allow for bgzip in both bed and fasta inputs.
      args:
        - bed:
            required: true
            short: b
            long: bed
            takes_value: true
            about: Bed file of regions to extract
        - fasta:
            required: true
            short: f
            long: fi
            takes_value: true
            about: Fasta file to extract sequences from
        - strand:
            short: s
            long: strand
            about: revcomp sequence if the strand is "-"
        - name:
            short: n
            long: name
            about: Add the name (4th column) to the header of the fasta output