#!/usr/bin/python
# -*- coding: utf-8 -*-
#--------------------------------------------------------------------------------
#process_pdb.py v1.0, Copyright Bjoern Olausson
#--------------------------------------------------------------------------------
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#To view the license visit
#http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
#or write to
#Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#--------------------------------------------------------------------------------
#--------------------------------------------------------------------------------
#
#This tool gets extended as I need som more functionality.
#
# Currently it is intended to
# -fix missing HETATM record for spcific residue names (see CONFIG section)
# -fix Engineered residues (see CONFIG section)
# -removes all residues not defined in the CONFIG section
# -renumber ATOM and RESIDUE IDs
# -Create PDB and PATCH for CHARMM from a PDB generated via H++ (http://biophysics.cs.vt.edu/H++/) to get proper protonation state
#
# USE WITH CARE since all parts were written for a special purpose and might fail on others
#
#I know I should have used dictionaries ;-)
#################################CONFIG############################
#Valid Aminoacids
AA = ( 'ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLU', 'GLN', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRY', 'TYR', 'VAL' )
#VALID Nucleoacids
NA = ( 'ADE', 'CYT', 'GUA', 'THY', 'URA' )

#Heteroatoms _I_ like to be valid (every Tuple in HET needs a counterpart in HETID)
HET = ( 'HOH', 'MG', 'TIP3' )
#Heteroatom ID
HETID = ( 'W', 'M', 'W' )

#Engineered residues (every Tuple in HET needs a counterpart in HETID)
EN = ( 'CSP', 'GMA', 'HIP', 'MLY', 'MSE', 'PDS', 'PHL', 'PTR', 'SEP', 'TPO' )
#Replacement for engineered residues according to charmm-gui
ENR = ( 'CYS', 'GLU2', 'HSD', 'LYS', 'MET', 'ASP', 'ASP', 'TYR1', 'SER1', 'THR1' )

#Pprotonizable residues
PRES = ( 'ASP', 'GLU', 'LYS', 'HIS' )

#Set protonation state for HIS | Protonated His --> HSP | neutral HIS, proton on ND1 --> HSD | neutral His, proton on NE2 --> HSE
HISTIDIN = "HSD"

#Misc. residues to save in seperate file when splitting the PDP
MISC = ( 'CYT' )
###################################################################

import sys, os, shutil, string, glob, subprocess, itertools, time, tempfile

from Bio.PDB import PDBIO
from Bio.PDB.PDBIO import Select
pdbwrite = PDBIO()
from Bio.PDB.PDBParser import PDBParser
pdbparse = PDBParser(PERMISSIVE=1)

from optparse import OptionParser
parser = OptionParser()

from sets import Set

parser.add_option(
	"-i",
	"--input",
	action="store",
	dest="I",
	help="PDB-File to mod",
	metavar="some.pdb"
)

parser.add_option(
	"-r",
	"--renumber",
	action="store",
	dest="R",
	help="""Use the keywords "atm" "res" "all" to renumber only atoms, residues or both """,
	metavar="{atm, res, all}"
)

parser.add_option(
	"-o",
	"--offset",
	action="store",
	dest="O",
	default=0,
	help="""Use an offset to renumber the residues""",
	metavar="INT",
	type="int"
)

parser.add_option(
	"-c",
	"--clean",
	action="store_true",
	dest="C",
	help="Clean the PDB",
)

parser.add_option(
	"-w",
	"--water",
	action="store_true",
	dest="W",
	help="Fix PDB water (HOH) to be charmm complient (TIP3)",
)

parser.add_option(
	"-s",
	"--split",
	action="store_true",
	dest="S",
	help="Split PDB file with multible chains into multible files each containing a single chain. Additionaly all HETATMs are stored in a seperate file. If split fails, try to clean (-c) the pdb first",
)

parser.add_option(
	"-p",
	"--protlist",
	action="store_true",
	dest="P",
	help="List all protonizable residues in the Protein (ASP -> [ASPP], GLU -> [GLUP], LYS -> [LSN] , HIS -> [HSD, HSE, HSP])",
)

parser.add_option(
	"-g",
	"--genpatch",
	action="store_true",
	dest="G",
	help="Generate CHARMM patch for titrateable AAs from a H++ PDB (http://biophysics.cs.vt.edu/H++/)",
)

parser.add_option(
	"-m",
	"--chm",
	action="store_true",
	dest="M",
	help="Convert atomtypes to CHARMM atomtypes",
)

parser.add_option(
	"-t",
	"--test",
	action="store_true",
	dest="T",
	help="Just for testing stuff",
)

# instruct optparse to parse the program's command line
(options, args) = parser.parse_args()

# Checking for required otions
if not options.I:
		parser.error("You must provide a file to process (-i file.pdb)")

#Asign options to variables
INPUT = options.I
optCLEAN = options.C
optWATER = options.W
optRENUM = options.R
optSPLIT = options.S
optTEST = options.T
optPROTABLE = options.P
optCHM = options.M
optGENPATCH = options.G
OFFSET = options.O

#Change to the current working dir
path = os.getcwd()
#os.chdir(path)

#Original (oldPDB) and new (newPDB) PDB filename
oldPDB = os.path.splitext(INPUT)[0]


#parse the PDB-file
structure = pdbparse.get_structure( oldPDB , oldPDB+".pdb")
header = pdbparse.get_header()
trailer = pdbparse.get_trailer()

def cleanPDB(model) :
	for chains in model :
		if len(chains) == 0 :
			model.detach_child(chain.id)
		else :
			residues = chains.child_list
			for residue in residues :
				resn = residue.get_resname()
				if resn in AA :
					print "ATOM:", resn
				elif resn in NA :
					print "ATOM:", resn
				elif resn in EN :
					for ENi, ENRi in itertools.izip(EN, ENR) :
						if resn == ENi :
							print "ATOM: %s <---> %s (%s)" % (resn, ENRi, ENi)
							residue.resname = ENRi
				elif resn in HET :
					for HETi, HETIDi in itertools.izip(HET, HETID) :
						#print "HETATM:", resn, residue.id[0]
						if resn == HETi :
							resOLD = residue.id
							residue.id = ( HETIDi, residue.id[1], residue.id[2] )
							print "HETATM: %s ---> Fixed RECORD to HETATM" % (resn)
				else :
					print "INVALD:", residue, "REMOVED"
					chains.detach_child(residue.id)
		if len(chains) == 0 :
			model.detach_child(chain.id)
	writePDB(structure, "clean")

def renumRES(model) :
	for chains in model :
		residues = chains.child_list
		for residue in residues :
			newID = residue.get_id()[1] + OFFSET
			if newID <= 0:
				print "Residue number cant be <= 0"
				sys.exit(2)
			residue.id = ( residue.id[0], newID, residue.id[2] )
	writePDB(structure, "renum_residue-id")

def renumATM(model) :
	for chains in model :
		chains.set_id(A)
		for residues in chains:
			for atom in residues:
				newSERIAL = atom.get_serial_number() + OFFSET
				if newSERIAL < 0:
					print "Atom serial cant be <= 0"
					sys.exit(2)
				atom.set_serial_number(newSERIAL)
	writePDB(structure, "renum_atom-serial")

def splitPDB(model):
	class SelectChain(Select):
		def accept_chain(self, chain) :
			if chain.get_id() == self._chain_id :
				return True
			else:
				return False

	class SelectHETATM(Select):
		def accept_residue(self, res) :
			if res.id[0] in HETID and res.get_full_id()[2] == self._chain_id :
				return True
			else:
				return False
	class SelectMISCRE(Select):
		def accept_residue(self, res) :
			if res.resname in MISC and res.get_full_id()[2] == self._chain_id :
				return True
			else :
				return False

	for chains in model:
		chainID = chains.id
		savePROT = oldPDB+"_chain-"+chainID.lower()+"-prot.pdb"
		saveHETA = oldPDB+"_chain-"+chainID.lower()+"-hetatm.pdb"
		saveMISC = oldPDB+"_chain-"+chainID.lower()+"-misc.pdb"

		pdbwrite.set_structure(structure)

		chain = SelectChain()
		chain._chain_id = chains.id

		hetatm = SelectHETATM()
		hetatm._chain_id = chains.id

		miscre = SelectMISCRE()
		miscre._chain_id = chains.id

		pdbwrite.save(saveMISC, miscre)
		pdbwrite.save(saveHETA, hetatm)

		for residue in chains:
			if residue.id[0] in HETID or residue.resname in MISC :
				chains.detach_child(residue.id)
		pdbwrite.save(savePROT, chain)

def listProtRes(model) :
	protonizable = set()
	for chains in model.child_list :
		for residues in chains.child_list :
			if residues.resname in PRES :
				protonizable.add(residues.resname)
	for item in protonizable :
		if item == "ASP" :
			print item, " | Protonated aspartic acid, proton on od2 --> ASPP"
		elif item == "HIS" :
			print item, " | Protonated His --> HSP | neutral HIS, proton on ND1 --> HSD | neutral His, proton on NE2 --> HSE"
		elif item == "GLU" :
			print item, " | Protonated glutamic acid, proton on oe2--> GLUP"
		elif item == "LYS" :
			print item, " | Neutral --> LSN"

def convCHM(model) :
# Pay attention to the whitespaces to make the Atomtypes allway 4 chars long [ATOMNUM] [....] [RESID]!!
# Singel char: [.X..], Two chars: [.XY.], Three chars: [XYZ.], Four chars: [WXYZ]
	for chains in model :
		residues = chains.child_list
		first_residue = chains.child_list[0]
		last_residue = chains.child_list[-1]
		for residue in residues :
			if residue.resname == 'HOH':
				print "HOH(%s) --> %s" % (residue.id[1], "TIP3")
				residue.resname = 'TIP3'
				for atom in residue.child_list :
					if atom.name ==  "O" :
						print "TIP3(%s): O --> OH2" % (residue.id[1])
						residue['O'].fullname = 'OH2 '
			if residue.resname == 'HIS':
				print "HIS(%s) --> %s" % (residue.id[1], HISTIDIN)
				residue.resname = HISTIDIN

			elif residue.resname == 'ILE':
				for atom in residue.child_list :
					if atom.name ==  "CD1" :
						print "ILE(%s): CD1 --> CD" % (residue.id[1])
						residue['CD1'].fullname = ' CD '
					elif atom.name ==  "1HD1" :
						print "ILE(%s): 1HD1 --> HD1" % (residue.id[1])
						residue['1HD1'].fullname = 'HD1 '
					elif atom.name ==  "2HD1" :
						print "ILE(%s): 2HD1 --> HD2" % (residue.id[1])
						residue['2HD1'].fullname = 'HD2 '
					elif atom.name ==  "3HD1" :
						print "ILE(%s): 3HD1 --> HD3" % (residue.id[1])
						residue['3HD1'].fullname = 'HD3 '

			elif residue.resname == 'SER' :
				for atom in residue.child_list :
					if atom.name ==  "1HG" :
						print "SER(%s): 1HG --> HG1" % (residue.id[1])
						residue['1HG'].fullname = 'HG1 '

			elif residue.resname == 'MET' :
				for atom in residue.child_list :
					if atom.name ==  "SE" :
						print "MET(%s): SE --> SD" % (residue.id[1])
						residue['SE'].fullname = ' SD '

			#for atom in first_residue :
				#if atom.name ==  "1HT" :
					#print "Patching first residue: 1HT --> HT1"
					#residue['1HT'].fullname = 'HT1 '
				#elif atom.name ==  "2HT" :
					#print "Patching first residue: 2HT --> HT2"
					#residue['2HT'].fullname = 'HT2 '
				#elif atom.name ==  "3HT" :
					#print "Patching first residue: 3HT --> HT3"
					#residue['3HT'].fullname = 'HT3 '
			#if last_residue.resname != "HOH" :
				#for atom in last_residue :
					#if atom.name ==  "OXT" :
						#print "Patching last residue: OXT --> OT1"
						#residue['OXT'].fullname = 'OT1 '
					#elif atom.name == "O" :
						#print "Patching last residue: O --> OT2"
						#residue['O'].fullname = 'OT2 '
	writePDB(structure, "charmm")
	filename = oldPDB + '-charmm.pdb'
	in_f = open(filename)
	out_f = tempfile.NamedTemporaryFile()
	for line in in_f:
		out_f.write(line.replace('TIP3A', 'TIP3 '))
	os.remove(filename)
	os.link(out_f.name, filename)

def generatePATCH(model) :
	'''Histidine
	positive HIS, proton HD1 on ND1 and proton HE2 on NE2 --> HSP
	neutral HIS, proton HD1 on ND1 --> HSD
	neutral HIS, proton HE2 on NE2 --> HSE

	Aspartic Acid
	neutral ASP, proton 2HD on OD2 --> ASPP

	Glutamic acid
	neutral GLU, proton 2HE on OE2 --> GLUP

	Lysine
	neutral LYS, NO proton 3HZ on NZ --> LSN'''

	TITRATEABLE = {"ASP": ["ASPP"], "GLU": ["GLUP"], "LYS": ["LSN"], "HIS": ["HSP", "HSD", "HSE"]}
	PROTONS = {"ASP": ["2HD"], "GLU": ["2HE"], "LYS": ["1HZ", "2HZ", "3HZ"], "HIS": ["HD1", "HE2"]}
	CHAIN_DICT = {}
	CHAIN_COUNT = 1
	for chains in model.child_list :
		CHAINNAME = chains.get_id()
		if CHAINNAME == " ":
			CHAINNAME = "PRO%(COUNT)s" %{"COUNT": CHAIN_COUNT}
			#chains.id = CHAINNAME
		CHAIN = chains
		RESIDUE_DICT = {"ASPP": [], "GLUP": [], "LSN": [], "HSP": [], "HSD": [], "HSE": []}
		for residue in chains.child_list :
			RES = [residue.get_id()[1], residue.get_resname()]
			RESN = RES[1]
			RESID =RES[0]
			if RESN in TITRATEABLE :
				ATOMLIST = []
				for atom in residue.child_list:
					ATOMNAME = atom.get_fullname().strip()
					if ATOMNAME in PROTONS[RESN]:
						ATOMLIST.append(ATOMNAME)

				if RESN == "HIS" and len(ATOMLIST) == 2:
					RESN_NEW = TITRATEABLE[RESN][0]
					#print "Changing %(RESN_OLD)s:%(RESID)s to %(RESN_NEW)s:%(RESID)s" %{"RESN_OLD": RESN, "RESID": RESID, "RESN_NEW": RESN_NEW}
					residue.resname = RESN_NEW
					RESIDUE_DICT[RESN_NEW].append(RESID)
				elif RESN == "HIS" and len(ATOMLIST) == 1 and ATOMLIST[0] == "HD1":
					RESN_NEW = TITRATEABLE[RESN][1]
					#print "Changing %(RESN_OLD)s:%(RESID)s to %(RESN_NEW)s:%(RESID)s" %{"RESN_OLD": RESN, "RESID": RESID, "RESN_NEW": RESN_NEW}
					residue.resname = RESN_NEW
					RESIDUE_DICT[RESN_NEW].append(RESID)
				elif RESN == "HIS" and len(ATOMLIST) == 1 and ATOMLIST[0] == "HE2":
					RESN_NEW = TITRATEABLE[RESN][2]
					#print "Changing %(RESN_OLD)s:%(RESID)s to %(RESN_NEW)s:%(RESID)s" %{"RESN_OLD": RESN, "RESID": RESID, "RESN_NEW": RESN_NEW}
					residue.resname = RESN_NEW
					RESIDUE_DICT[RESN_NEW].append(RESID)
				elif RESN == "LYS" and len(ATOMLIST) > 0:
					if len(ATOMLIST) == 2:
						RESN_NEW = TITRATEABLE[RESN][0]
						#print "Changing %(RESN_OLD)s:%(RESID)s to %(RESN_NEW)s:%(RESID)s" %{"RESN_OLD": RESN, "RESID": RESID, "RESN_NEW": RESN_NEW}
						RESIDUE_DICT[RESN_NEW].append(RESID)
				elif RESN == "GLU" and len(ATOMLIST) > 0:
					RESN_NEW = TITRATEABLE[RESN][0]
					#print "Changing %(RESN_OLD)s:%(RESID)s to %(RESN_NEW)s:%(RESID)s" %{"RESN_OLD": RESN, "RESID": RESID, "RESN_NEW": RESN_NEW}
					RESIDUE_DICT[RESN_NEW].append(RESID)
				elif RESN == "ASP" and len(ATOMLIST) > 0:
					RESN_NEW = TITRATEABLE[RESN][0]
					#print "Changing %(RESN_OLD)s:%(RESID)s to %(RESN_NEW)s:%(RESID)s" %{"RESN_OLD": RESN, "RESID": RESID, "RESN_NEW": RESN_NEW}
					RESIDUE_DICT[RESN_NEW].append(RESID)
				else:
					pass
			CHAIN_DICT[CHAIN] = RESIDUE_DICT
		CHAIN_COUNT += 1
	# Print the suggested patch
	print "Suggested patch for CHARMM:"
	print "--------------------------------- [START] ---------------------------------"
	CHANGED_HIS={"HSD": [], "HSE": [], "HSP": []}
	for CHAIN in CHAIN_DICT:
		RESN_DICT = CHAIN_DICT[CHAIN]
		for RESN in RESN_DICT:
			RESIDS = RESN_DICT[RESN]
			if len(RESIDS) > 0 and RESN in ["LSN", "GLUP", "ASPP"]:
				print "patch %(PATCH)s %(CHAIN)s %(RESIDS)s setup warn" %{"PATCH": RESN, "CHAIN": CHAIN, "RESIDS": " ".join(map(str, RESIDS))}
				print "autogenerate angles dihedrals\n"
			if RESN in ["HSP", "HSD", "HSE"]:
				CHANGED_HIS[RESN].append(" ".join(map(str, RESIDS)))
				SAVE_CORRECTED_HIS = True
	print "--------------------------------- [STOP] ---------------------------------"
	if SAVE_CORRECTED_HIS:
		print "Saving structure with corrected HIS residue names:"
		for VALUE in CHANGED_HIS:
			print "%(RESN)s: %(RESID)s" %{"RESN": VALUE, "RESID": ", ".join(map(str, CHANGED_HIS[VALUE]))}
		writePDB(structure, "titrated")


def writePDB(struc, suffix) :
	pdbwrite.set_structure(struc)
	newname = "%(original)s-%(suffix)s.pdb" %{"original": oldPDB, "suffix": suffix}
	print "Saving new file to %(newname)s" %{"newname": newname}
	pdbwrite.save(newname)

if optCLEAN :
	cleanPDB(structure[0])
if optRENUM == "res" :
	renumRES(structure[0])
if optRENUM == "atm" :
	renumATM(structure[0])
if optSPLIT :
	splitPDB(structure[0])
if optCHM :
	print "converting to CHARMM format"
	convCHM(structure[0])
#else :
#	writePDB(structure)
if optGENPATCH:
	generatePATCH(structure[0])

if optPROTABLE :
	listProtRes(structure[0])

if optTEST :
	for chains in structure[0] :
		residues = chains.child_list
		for residue in residues :
			print residue.id[0]


#a.get_name()       # atom name (spaces stripped, e.g. "CA")
#a.get_id()         # id (equals atom name)
#a.get_coord()      # atomic coordinates
#a.get_bfactor()    # B factor
#a.get_occupancy()  # occupancy
#a.get_altloc()     # alternative location specifie
#a.get_sigatm()     # std. dev. of atomic parameters
#a.get_siguij()     # std. dev. of anisotropic B factor
#a.get_anisou()     # anisotropic B factor
#a.get_fullname()   # atom name (with spaces, e.g. ".CA.")