block-aligner 0.5.1

SIMD-accelerated library for computing global and X-drop affine gap penalty sequence-to-sequence or sequence-to-profile alignments using an adaptive block-based algorithm.
Documentation
import os

import pandas as pd

base_remote_path = "ftp://ftp.ncbi.nlm.nih.gov/blast/matrices/"
base_local_path = "../matrices/"
names = [
    "BLOSUM45",
    "BLOSUM50",
    "BLOSUM62",
    "BLOSUM80",
    "BLOSUM90",
    "PAM100",
    "PAM120",
    "PAM160",
    "PAM200",
    "PAM250"
]

os.makedirs(base_local_path, exist_ok = True)

for name in names:
    path = base_remote_path + name
    df = pd.read_csv(path, delim_whitespace = True, comment = "#")
    df = df.drop(index = "*")
    df = df.drop(columns = "*")

    min_val = -128

    # note: '[' = 'A' + 26
    for i in range(27):
        c = chr(ord("A") + i)
        if not c in df.index:
            df.loc[c, :] = min_val

    for i in range(32):
        c = chr(ord("A") + i)
        if not c in df.columns:
            df.loc[:, c] = min_val

    # alphabetically sort the amino acids
    df = df.sort_index(axis = 0)
    df = df.sort_index(axis = 1)

    for col in df.columns:
        df[col] = df[col].astype(int)

    res = "[" + df.to_csv(index = False, header = False) + "]"
    res = res.replace("\n", ",\n")
    res_path = base_local_path + name

    print(name)
    print(df)
    print()

    with open(res_path, "w") as f:
        f.write(res)