def gen_thermo_array():
array = [
[
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
],
[
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
],
[
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
],
[
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
],
]
thermo_dg_dict = {
("AA", "AT"): 0.69,
("AA", "CT"): 1.33,
("AA", "GT"): 0.74,
("AA", "TA"): 0.61,
("AA", "TC"): 0.88,
("AA", "TG"): 0.14,
("AA", "TT"): -1.0,
("AC", "AG"): 0.17,
("AC", "CG"): 0.47,
("AC", "GG"): -0.52,
("AC", "TA"): 0.77,
("AC", "TC"): 1.33,
("AC", "TG"): -1.44,
("AC", "TT"): 0.64,
("AG", "AC"): 0.43,
("AG", "CC"): 0.79,
("AG", "GC"): 0.11,
("AG", "TA"): 0.02,
("AG", "TC"): -1.28,
("AG", "TG"): -0.13,
("AG", "TT"): 0.71,
("AT", "AA"): 0.61,
("AT", "CA"): 0.77,
("AT", "GA"): 0.02,
("AT", "TA"): -0.88,
("AT", "TC"): 0.73,
("AT", "TG"): 0.07,
("AT", "TT"): 0.69,
("CA", "AT"): 0.92,
("CA", "CT"): 1.05,
("CA", "GA"): 0.43,
("CA", "GC"): 0.75,
("CA", "GG"): 0.03,
("CA", "GT"): -1.45,
("CA", "TT"): 0.75,
("CC", "AG"): 0.81,
("CC", "CG"): 0.79,
("CC", "GA"): 0.79,
("CC", "GC"): 0.7,
("CC", "GG"): -1.84,
("CC", "GT"): 0.62,
("CC", "TG"): 0.98,
("CG", "AC"): 0.75,
("CG", "CC"): 0.7,
("CG", "GA"): 0.11,
("CG", "GC"): -2.17,
("CG", "GG"): -0.11,
("CG", "GT"): -0.47,
("CG", "TC"): 0.4,
("CT", "AA"): 0.88,
("CT", "CA"): 1.33,
("CT", "GA"): -1.28,
("CT", "GC"): 0.4,
("CT", "GG"): -0.32,
("CT", "GT"): -0.12,
("CT", "TA"): 0.73,
("GA", "AT"): 0.42,
("GA", "CA"): 0.17,
("GA", "CC"): 0.81,
("GA", "CG"): -0.25,
("GA", "CT"): -1.3,
("GA", "GT"): 0.44,
("GA", "TT"): 0.34,
("GC", "AG"): -0.25,
("GC", "CA"): 0.47,
("GC", "CC"): 0.79,
("GC", "CG"): -2.24,
("GC", "CT"): 0.62,
("GC", "GG"): -1.11,
("GC", "TG"): -0.59,
("GG", "AC"): 0.03,
("GG", "CA"): -0.52,
("GG", "CC"): -1.84,
("GG", "CG"): -1.11,
("GG", "CT"): 0.08,
("GG", "GC"): -0.11,
("GG", "TC"): -0.32,
("GT", "AA"): 0.14,
("GT", "CA"): -1.44,
("GT", "CC"): 0.98,
("GT", "CG"): -0.59,
("GT", "CT"): 0.45,
("GT", "GA"): -0.13,
("GT", "TA"): 0.07,
("TA", "AA"): 0.69,
("TA", "AC"): 0.92,
("TA", "AG"): 0.42,
("TA", "AT"): -0.58,
("TA", "CT"): 0.97,
("TA", "GT"): 0.43,
("TA", "TT"): 0.68,
("TC", "AA"): 1.33,
("TC", "AC"): 1.05,
("TC", "AG"): -1.3,
("TC", "AT"): 0.97,
("TC", "CG"): 0.62,
("TC", "GG"): 0.08,
("TC", "TG"): 0.45,
("TG", "AA"): 0.74,
("TG", "AC"): -1.45,
("TG", "AG"): 0.44,
("TG", "AT"): 0.43,
("TG", "CC"): 0.62,
("TG", "GC"): -0.47,
("TG", "TC"): -0.12,
("TT", "AA"): -1.0,
("TT", "AC"): 0.75,
("TT", "AG"): 0.34,
("TT", "AT"): 0.68,
("TT", "CA"): 0.64,
("TT", "GA"): 0.71,
("TT", "TA"): 0.69,
}
for k, v in thermo_dg_dict.items():
i1, i2, i3, i4 = parse_old_thermo_key_to_index(k)
array[i1][i2][i3][i4] = f"Some({v})"
print(array)
base_to_encode = {"A": 0, "T": 3, "C": 1, "G": 2}
def parse_old_thermo_key_to_index(k):
i1 = base_to_encode[k[0][0]]
i2 = base_to_encode[k[0][1]]
i3 = base_to_encode[k[1][0]]
i4 = base_to_encode[k[1][1]]
return [i1, i2, i3, i4]
def parse_overhang_key_to_index(k):
i1 = base_to_encode[k[0][0]]
i2 = base_to_encode[k[0][1]]
i3 = base_to_encode[k[1]]
return [i1, i2, i3]
def create_seq1_overhang_array():
array = [
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
]
seq1_overhang_dg_dict = {
(("A", "T"), "A"): -0.51,
(("C", "G"), "A"): -0.96,
(("G", "C"), "A"): -0.58,
(("T", "A"), "A"): -0.51,
(("A", "T"), "C"): -0.42,
(("C", "G"), "C"): -0.52,
(("G", "C"), "C"): -0.34,
(("T", "A"), "C"): -0.02,
(("A", "T"), "G"): -0.62,
(("C", "G"), "G"): -0.72,
(("G", "C"), "G"): -0.56,
(("T", "A"), "G"): 0.48,
(("A", "T"), "T"): -0.71,
(("C", "G"), "T"): -0.58,
(("G", "C"), "T"): -0.61,
(("T", "A"), "T"): -0.10,
}
for k, v in seq1_overhang_dg_dict.items():
i1, i2, i3 = parse_overhang_key_to_index(k)
array[i1][i2][i3] = f"Some({v})"
print(array)
def create_seq2_overhang_array():
array = [
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
[
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
[None, None, None, None],
],
]
seq2_overhang_dg_dict = {
(("A", "T"), "A"): -0.48,
(("C", "G"), "A"): -0.92,
(("G", "C"), "A"): -0.82,
(("T", "A"), "A"): -0.12,
(("A", "T"), "C"): -0.19,
(("C", "G"), "C"): -0.23,
(("G", "C"), "C"): -0.31,
(("T", "A"), "C"): 0.28,
(("A", "T"), "G"): -0.50,
(("C", "G"), "G"): -0.44,
(("G", "C"), "G"): -0.01,
(("T", "A"), "G"): -0.01,
(("A", "T"), "T"): -0.29,
(("C", "G"), "T"): -0.35,
(("G", "C"), "T"): -0.52,
(("T", "A"), "T"): 0.13,
}
for k, v in seq2_overhang_dg_dict.items():
i1, i2, i3 = parse_overhang_key_to_index(k)
array[i1][i2][i3] = f"Some({v})"
print(array)
def main():
gen_thermo_array()
create_seq2_overhang_array()
create_seq1_overhang_array()
if __name__ == "__main__":
main()