import os
import utls
from Bio import SeqIO as SqIO
from bx.align import maf
import numpy as nmpy
from math import sqrt
def mn():
(cr_wrk_dr, ast_dr, prgrm_dr, cnd_prgrm_dr) = utls.gt_drs()
pstv_algn_fls = [
ast_dr + "/h_spns_m_mscls_hmlg_lncrnas_2_hmlg_m_mscls_ncrnas.maf",
ast_dr + "/fldlgn_h_spns_m_mscls_hmlg_lncrnas_2_hmlg_m_mscls_ncrnas.dat",
]
ngtv_algn_fls = [
ast_dr + "/shfl_h_spns_m_mscls_hmlg_lncrnas_2_hmlg_m_mscls_ncrnas.maf",
ast_dr + "/fldlgn_shfl_h_spns_m_mscls_hmlg_lncrnas_2_hmlg_m_mscls_ncrnas.dat",
]
rf_sq_fl = ast_dr + "/hmlg_m_mscls_ncrnas.fa"
hmlg_rlts = ast_dr + "/h_spns_m_mscls_hmlg_lncrnas.dat"
hmlg_rlts = utls.gt_prs_hmlg_rlts(hmlg_rlts)
rf_sq_nm = len([rc for rc in SqIO.parse(rf_sq_fl, "fasta")])
for pstv_algn_fl, ngtv_algn_fl in zip(pstv_algn_fls, ngtv_algn_fls):
pstv_algns = gt_prs_fldlgn_algns(pstv_algn_fl) if pstv_algn_fl.find("fldlgn") != -1 else utls.gt_prs_algns(pstv_algn_fl)
ngtv_algns = gt_prs_fldlgn_algns(ngtv_algn_fl) if ngtv_algn_fl.find("fldlgn") != -1 else utls.gt_prs_algns(ngtv_algn_fl)
tp = fp = tn = fn = 0.
fnd_algns = []
for pstv_algn in pstv_algns:
algn_pr = (pstv_algn[1], pstv_algn[2])
if algn_pr in fnd_algns:
continue
if utls.is_gn_prdctd(hmlg_rlts[pstv_algn[2]], pstv_algn[1], rf_sq_fl):
tp += 1
else:
fp += 1
fnd_algns.append(algn_pr)
fp += rf_sq_nm - tp
fnd_algns = []
for ngtv_algn in ngtv_algns:
algn_pr = (ngtv_algn[1], ngtv_algn[2])
if algn_pr in fnd_algns:
continue
if not utls.is_gn_prdctd(hmlg_rlts[ngtv_algn[2]], ngtv_algn[1], rf_sq_fl):
tn += 1
else:
fn += 1
fnd_algns.append(algn_pr)
tn += rf_sq_nm - fn
print tp, fp, tn, fn
prcsn = tp / (tp + fp)
rcl = tp / (tp + fn)
f_ms = 2 * rcl * prcsn / (rcl + prcsn)
print f_ms
def gt_prs_fldlgn_algns(algn_fl):
prs_algns = []
inpt_hndl = open(algn_fl, "rU")
lns = inpt_hndl.readlines()
for ln in lns:
if ln.startswith("; ALIGNMENT_LIST"):
id_pr = ln.split()
id_pr = (id_pr[2], id_pr[3])
prs_algns.append((0, id_pr[1], id_pr[0]))
elif ln.startswith("; FOLDALIGN_SCORE"):
prs_algns[-1] = (int(ln.split()[2]), prs_algns[-1][1], prs_algns[-1][2])
return prs_algns
if __name__ == "__main__":
mn()