import sys
import re
import enum
import argparse
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from scipy.constants.constants import alpha
class Distance(enum.Enum):
edit = 1
gap_linear = 2
gap_affine = 3
gap_affine_2p = 4
class Penalties:
def __init__(self):
self.distance = Distance.edit
self.match = 0
self.mismatch = 0
self.indel = 0
self.gap_open1 = 0
self.gap_extend1 = 0
self.gap_open2 = 0
self.gap_extend2 = 0
def cigar_compute_score_edit(cigar_vector,penalties):
score = 0
for op in cigar_vector:
if op[1] in "DI": score += int(op[0])
if op[1] in "X": score += int(op[0])
return score
def cigar_compute_score_linear(cigar_vector,penalties):
score = 0
for op in cigar_vector:
if op[1] == "M": score -= int(op[0]) * penalties.match
if op[1] == "X": score -= int(op[0]) * penalties.mismatch
if op[1] in "DI": score -= int(op[0]) * penalties.indel
return score
def cigar_compute_score_affine(cigar_vector,penalties):
score = 0
for op in cigar_vector:
if op[1] == "M": score -= int(op[0]) * penalties.match
if op[1] == "X": score -= int(op[0]) * penalties.mismatch
if op[1] in "DI": score -= penalties.gap_open1 + int(op[0]) * penalties.gap_extend1
return score
def cigar_compute_score_affine_2p(cigar_vector,penalties):
score = 0
for op in cigar_vector:
if op[1] == "M": score -= int(op[0]) * penalties.match
if op[1] == "X": score -= int(op[0]) * penalties.mismatch
if op[1] in "DI":
score1 = penalties.gap_open1 + int(op[0]) * penalties.gap_extend1
score2 = penalties.gap_open2 + int(op[0]) * penalties.gap_extend2
score -= min(score1,score2)
return score
def cigar_compute_score(cigar,penalties):
cigar_vector = re.findall(r'(\d+)([MXDI])',cigar)
if penalties.distance == Distance.edit:
return cigar_compute_score_edit(cigar_vector,penalties)
elif penalties.distance == Distance.gap_linear:
return cigar_compute_score_linear(cigar_vector,penalties)
elif penalties.distance == Distance.gap_affine:
return cigar_compute_score_affine(cigar_vector,penalties)
elif penalties.distance == Distance.gap_affine_2p:
return cigar_compute_score_affine_2p(cigar_vector,penalties)
def rescore_alignments(input_path,output_path,penalties,verbose):
input_file = open(input_path,"rt")
output_file = open(output_path,"wt")
while True:
line = input_file.readline()
if not line: break fields = line.split()
cigar = fields[1] if len(fields)<=2 else fields[5]
score = cigar_compute_score(cigar,penalties)
output_file.write("%d\t%s\n" % (score,cigar))
input_file.close()
output_file.close()
parser = argparse.ArgumentParser()
parser.add_argument('-i','--input',required=True,help='Input file (*.alg)')
parser.add_argument('-o','--output',required=True,help='Output file (*.alg)')
parser.add_argument('-e','--edit',
help='Score alignments using edit distance (--edit)')
parser.add_argument('-l','--gap-linear',
help='Score alignments using gap-linear distance (--gap-linear=M,X,I)')
parser.add_argument('-g','--gap-affine',
help='Score alignments using gap-affine distance (--gap-affine=M,X,O,E)')
parser.add_argument('-G','--gap-affine-2p',
help='Score alignments using gap-affine-2p distance (--gap-affine=M,X,O1,E1,O2,E2)')
parser.add_argument('-v','--verbose',action='store_true',default=False,
help='Display comparison differences (default=disable)')
parser.add_argument('-H',action='store_true',dest="human_readable",default=False)
args = parser.parse_args()
penalties = Penalties()
if args.edit is not None:
pass elif args.gap_linear is not None:
values = args.gap_linear.split(',')
penalties.distance = Distance.gap_linear
penalties.match = int(values[0])
penalties.mismatch = int(values[1])
penalties.indel = int(values[2])
elif args.gap_affine is not None:
values = args.gap_affine.split(',')
penalties.distance = Distance.gap_affine
penalties.match = int(values[0])
penalties.mismatch = int(values[1])
penalties.gap_open1 = int(values[2])
penalties.gap_extend1 = int(values[3])
elif args.gap_affine_2p is not None:
values = args.gap_affine_2p.split(',')
penalties.distance = Distance.gap_affine_2p
penalties.match = int(values[0])
penalties.mismatch = int(values[1])
penalties.gap_open1 = int(values[2])
penalties.gap_extend1 = int(values[3])
penalties.gap_open2 = int(values[4])
penalties.gap_extend2 = int(values[5])
else:
print("[WFAReScoreAlignments] No distance provided. Using edit-distance (default)");
if penalties.match > 0:
print("[WFACompareAlignments] Match penalty must be negative or zero")
exit(-1)
if penalties.mismatch < 0 or \
penalties.mismatch < 0 or \
penalties.gap_open1 < 0 or \
penalties.gap_extend1 < 0 or \
penalties.gap_open2 < 0 or \
penalties.gap_extend2 < 0:
print("[WFACompareAlignments] Penalties must be positive or zero")
exit(-1)
rescore_alignments(args.input,args.output,penalties,args.verbose)