nom-pdb 0.0.9

PDB parser implemented with nom
Documentation
import sys
import json
import os

# The [MODRES](http://www.wwpdb.org/documentation/file-format-content/format33/sect3.html#MODRES)
# record provides descriptions of modifications (e.g., chemical or post-translational) to protein and nucleic acid residues. Included are correlations between residue names given
# in a PDB entry and standard residues.
# # Record Format
# | COLUMNS | DATA TYPE    | FIELD    | DEFINITION                               |
# | ------- | ------------ | -------- | ---------------------------------------- |
# | 1 -  6  | Record name  | "MODRES" |                                          |
# | 8 - 11  | IDcode       | idCode   | ID code of this entry.                   |
# | 13 - 15 | Residue name | resName  | Residue name used in this entry.         |   !
# | 17      | Character    | chainID  | Chain identifier.                        |
# | 19 - 22 | Integer      | seqNum   | Sequence number.                         |
# | 23      | AChar        | iCode    | Insertion code.                          |
# | 25 - 27 | Residue name | stdRes   | Standard residue name.                   |   !
# | 30 - 70 | String       | comment  | Description of the residue modification. |   !


PROCESSED = os.path.join(
    os.path.dirname(__file__), "processed.json")
MODRES = os.path.join(
    os.path.dirname(__file__), "modres.json")
ANB = os.path.join(
    os.path.dirname(__file__), "ambiguous.json")

with open(PROCESSED) as f:
    processed = json.load(f)
with open(MODRES) as f:
    modres = json.load(f)
with open(ANB) as f:
    anb = json.load(f)
PDB_DIR = sys.argv[1] if len(
    sys.argv) >= 2 else "/run/media/tianyi/LaCie/pdb/decompressed"
for fn in [f for f in os.listdir(PDB_DIR) if not (f in processed)]:
    print(fn)
    with open(fn) as f:
        read = [False]
        while line := f.readline():
            if line[:6] == "MODRES":
                resname = line[12:15]
                stdres = line[24:27]
                desc = line[29:].strip()
                if anb.get(resname):
                    if [stdres, desc] not in anb[resname]:
                        anb[resname].append([stdres, desc])
                elif modres.get(resname):
                    if modres[resname][1] != desc:
                        anb[resname] = [modres[resname][:2], [stdres, desc]]
                        del modres[resname]
                    else:
                        modres[resname][2] += 1
                else:
                    modres[resname] = [stdres, desc, 1]
                read[0] = True
            else:
                if read[0]:
                    break
        processed.append(fn)

with open(MODRES, "w") as f:
    json.dump(modres, f)
with open(PROCESSED, "w") as f:
    json.dump(processed, f)
with open(ANB, "w") as f:
    json.dump(anb, f)