import os
import matplotlib
from matplotlib import pylab
import numpy
from math import log
from clint.textui import colored
from itertools import combinations
def get_dir_paths():
current_work_dir_path = os.getcwd()
(head, tail) = os.path.split(current_work_dir_path)
asset_dir_path = head + "/assets"
program_dir_path = "/usr/local" if current_work_dir_path.find("/home/masaki") == -1 else "/home/masaki/prgrms"
conda_program_dir_path = "/usr/local/ancnd/envs/rsrch" if current_work_dir_path.find("/home/masaki") == -1 else "/home/masaki/prgrms/ancnd/envs/rsrch"
return (current_work_dir_path, asset_dir_path, program_dir_path, conda_program_dir_path)
def init_matplotlib():
params = {
"legend.fontsize": "x-large",
"axes.labelsize": "x-large",
"axes.titlesize":"x-large",
"xtick.labelsize":"x-large",
"ytick.labelsize":"x-large"
}
pylab.rcParams.update(params)
matplotlib.rcParams['ps.fonttype'] = 42
def get_nums_of_gaps_in_front_of_chars(sa, num_of_records, sa_len):
nums_of_gaps_in_front_of_chars = numpy.zeros((num_of_records, sa_len), dtype = int)
for i in range(0, num_of_records):
for j in range(0, sa_len):
if j == 0:
if sa[i].seq[j] == "-":
nums_of_gaps_in_front_of_chars[i][j] += 1
else:
if sa[i].seq[j] == "-":
nums_of_gaps_in_front_of_chars[i][j] = nums_of_gaps_in_front_of_chars[i][j - 1] + 1
else:
nums_of_gaps_in_front_of_chars[i][j] = nums_of_gaps_in_front_of_chars[i][j - 1]
return nums_of_gaps_in_front_of_chars
def get_css_string(css_file_path):
css_file = open(css_file_path)
line = css_file.readlines()[-2]
css_string = line.split()[2]
return css_string
def get_ss(ss_string):
ss = []
stack = []
for (i, char) in enumerate(ss_string):
if char == "(":
stack.append(i)
elif char == ")":
ss.insert(0, (stack.pop(), i))
return ss
def print_color_coded_css_with_sa(css, css_string, sa, bpp_mats, nums_of_gaps_in_front_of_chars, num_of_records, sa_len):
color_coded_css_with_sa = [list(map(colored.black, sa[i].seq.upper())) for i in range(0, num_of_records)]
color_coded_css_with_sa.append(list(map(colored.black, css_string)))
for (i, j) in css:
mean_bpp = 0
for k in range(0, num_of_records):
pos_without_gaps_1 = i - nums_of_gaps_in_front_of_chars[k][i]
pos_without_gaps_2 = j - nums_of_gaps_in_front_of_chars[k][j]
bpp = bpp_mats[k][pos_without_gaps_1][pos_without_gaps_2]
if bpp > 0:
mean_bpp += bpp
mean_bpp /= num_of_records
for k in range(0, num_of_records + 1):
for l in (i, j):
char = colored.clean(color_coded_css_with_sa[k][l])
color_coded_char = colored.blue(char)
if mean_bpp >= 0.5:
color_coded_char = colored.red(char)
elif mean_bpp >= 0.5 ** 2:
color_coded_char = colored.yellow(char)
elif mean_bpp >= 0.5 ** 3:
color_coded_char = colored.green(char)
elif mean_bpp >= 0.5 ** 4:
color_coded_char = colored.cyan(char)
color_coded_css_with_sa[k][l] = color_coded_char
for string in color_coded_css_with_sa:
color_coded_string = ""
for char in string:
color_coded_string += char
print(color_coded_string)
def get_bpp_mats(bpp_mat_file_path, seq_lens):
bpp_mats = {}
bpp_mat_file = open(bpp_mat_file_path)
lines = bpp_mat_file.readlines()
lines = [line for line in lines if line[0].isdigit() or line[0].startswith(">")]
num_of_lines = len(lines)
for i in range(0, num_of_lines - 1, 2):
rna_id = int(lines[i][1 :])
seq_len = seq_lens[rna_id]
bpp_mat = numpy.zeros((seq_len, seq_len))
for string in lines[i + 1].strip().split(" "):
substrings = string.split(",")
(j, k, bpp) = (int(substrings[0]), int(substrings[1]), float(substrings[2]))
bpp_mat[j, k] = bpp
bpp_mats[rna_id] = bpp_mat
return bpp_mats
def print_color_coded_sss(sss, ss_strings, bpp_mats, records, num_of_records):
color_coded_seqs = [list(map(colored.black, record.seq)) for record in records]
color_coded_sss = [list(map(colored.black, ss_string)) for ss_string in ss_strings]
for i in range(0, num_of_records):
for (j, k) in sss[i]:
bpp = bpp_mats[i][j, k]
for l in (j, k):
char_pair = (colored.clean(color_coded_seqs[i][l]), colored.clean(color_coded_sss[i][l]))
color_coded_char_pair = (colored.blue(char_pair[0]), colored.blue(char_pair[1]))
if bpp >= 0.5:
color_coded_char_pair = (colored.red(char_pair[0]), colored.red(char_pair[1]))
elif bpp >= 0.5 ** 2:
color_coded_char_pair = (colored.yellow(char_pair[0]), colored.yellow(char_pair[1]))
elif bpp >= 0.5 ** 3:
color_coded_char_pair = (colored.green(char_pair[0]), colored.green(char_pair[1]))
elif bpp >= 0.5 ** 4:
color_coded_char_pair = (colored.cyan(char_pair[0]), colored.cyan(char_pair[1]))
color_coded_seqs[i][l] = color_coded_char_pair[0]
color_coded_sss[i][l] = color_coded_char_pair[1]
for seq, ss in zip(color_coded_seqs, color_coded_sss):
color_coded_seq = ""
for char in seq:
color_coded_seq += char
color_coded_ss = ""
for char in ss:
color_coded_ss += char
print(color_coded_seq)
print(color_coded_ss)
def get_ss_strings(ss_file_path):
ss_strings = []
ss_file = open(ss_file_path)
lines = ss_file.readlines()
num_of_lines = len(lines)
for i in range(0, num_of_lines - 1, 7):
ss_string = lines[i + 5].split()[0]
ss_strings.append(ss_string)
return ss_strings
def get_sss(ss_strings):
return list(map(get_ss, ss_strings))