Bjoern Olausson

pdb2charmm PDF Print E-mail
  
Tuesday, 03 March 2009 14:09

  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. #--------------------------------------------------------------------------------
  4. #process_pdb.py v0.1, Copyright Bjoern Olausson
  5. #--------------------------------------------------------------------------------
  6. #This program is free software; you can redistribute it and/or modify
  7. #it under the terms of the GNU General Public License as published by
  8. #the Free Software Foundation; either version 2 of the License, or
  9. #(at your option) any later version.
  10. #
  11. #This program is distributed in the hope that it will be useful,
  12. #but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. #GNU General Public License for more details.
  15. #
  16. #To view the license visit
  17. #http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
  18. #or write to
  19. #Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. #--------------------------------------------------------------------------------
  21. #--------------------------------------------------------------------------------
  22. #
  23. #This tool gets extended as I need som more functionality.
  24. #
  25. # Currently it is intended to
  26. # -fix missing HETATM record for spcific residue names (see CONFIG section)
  27. # -fix Engineered residues (see CONFIG section)
  28. # -removes all residues not defined in the CONFIG section
  29. #
  30. #I know I should have used dictionaries ;-)
  31. #################################CONFIG############################
  32. #Valid Aminoacids
  33. AA = ( 'ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLU', 'GLN', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRY', 'TYR', 'VAL' )
  34. #VALID Nucleoacids
  35. NA = ( 'ADE', 'CYT', 'GUA', 'THY', 'URA' )
  36.  
  37. #Heteroatoms _I_ like to be valid none (every Tuple in HET needs a counterpart in HETID)
  38. HET = ( 'HOH', 'MG', 'TIP3' )
  39. #Heteroatom ID
  40. HETID = ( 'W', 'M', 'W' )
  41.  
  42. #Engineered residues (every Tuple in HET needs a counterpart in HETID)
  43. EN = ( 'CSP', 'GMA', 'HIP', 'MLY', 'MSE', 'PDS', 'PHL', 'PTR', 'SEP', 'TPO' )
  44. #Replacement for engineered residues according to charmm-gui
  45. ENR = ( 'CYS', 'GLU2', 'HSD', 'LYS', 'MET', 'ASP', 'ASP', 'TYR1', 'SER1', 'THR1' )
  46.  
  47. #Pprotonizable residues
  48. PRES = ( 'ASP', 'GLU', 'LYS', 'HIS' )
  49.  
  50. #Set protonation state for HIS | Protonated His --> HSP | neutral HIS, proton on ND1 --> HSD | neutral His, proton on NE2 --> HSE
  51. HISTIDIN = "HSD"
  52.  
  53. #Misc. residues to save in seperate file when splitting the PDP
  54. MISC = ( 'CYT' )
  55. ###################################################################
  56.  
  57. import sys, os, shutil, string, glob, subprocess, itertools, time, tempfile
  58.  
  59. from Bio.PDB import PDBIO
  60. from Bio.PDB.PDBIO import Select
  61. pdbwrite = PDBIO()
  62. from Bio.PDB.PDBParser import PDBParser
  63. pdbparse = PDBParser(PERMISSIVE=1)
  64.  
  65. from optparse import OptionParser
  66. parser = OptionParser()
  67.  
  68. from sets import Set
  69.  
  70. parser.add_option(
  71. "-i",
  72. "--input",
  73. action="store",
  74. dest="I",
  75. help="PDB-File to mod",
  76. metavar="INPUTFILE"
  77. )
  78.  
  79. parser.add_option(
  80. "-r",
  81. "--renumber",
  82. action="store",
  83. dest="R",
  84. help="""Use the keywords "atm" "res" "all" to renumber only atoms, residues or both """,
  85. metavar="RENUM"
  86. )
  87.  
  88. parser.add_option(
  89. "-c",
  90. "--clean",
  91. action="store_true",
  92. dest="C",
  93. help="Clean the PDB",
  94. metavar="CLEAN"
  95. )
  96.  
  97. parser.add_option(
  98. "-w",
  99. "--water",
  100. action="store_true",
  101. dest="W",
  102. help="Fix PDB water (HOH) to be charmm complient (TIP3)",
  103. metavar="WATER",
  104. )
  105.  
  106. parser.add_option(
  107. "-s",
  108. "--split",
  109. action="store_true",
  110. dest="S",
  111. help="Split PDB file with multible chains into multible files each containing a single chain. Additionaly all HETATMs are stored in a seperate file. If split fails, try to clean (-c) the pdb first",
  112. metavar="WATER",
  113. )
  114.  
  115. parser.add_option(
  116. "-p",
  117. "--protlist",
  118. action="store_true",
  119. dest="P",
  120. help="List all protonizable residues in the Protein (ASP -> [ASPP], GLU -> [GLUP], LYS -> [LSN] , HIS -> [HSD, HSE, HSP])",
  121. metavar="PROTABLE",
  122. )
  123.  
  124. parser.add_option(
  125. "-m",
  126. "--chm",
  127. action="store_true",
  128. dest="M",
  129. help="Convert atomtypes to CHARMM atomtypes",
  130. metavar="CHM",
  131. )
  132.  
  133. parser.add_option(
  134. "-t",
  135. "--test",
  136. action="store_true",
  137. dest="T",
  138. help="Just for testing stuff",
  139. metavar="TEST",
  140. )
  141. # instruct optparse to parse the program's command line
  142. (options, args) = parser.parse_args()
  143.  
  144. # Checking for required otions
  145. if not options.I:
  146. parser.error("You must provide a file to process (-i file.pdb)")
  147.  
  148. #Asign options to variables
  149. INPUT = options.I
  150. optCLEAN = options.C
  151. optWATER = options.W
  152. optRENUM = options.R
  153. optSPLIT = options.S
  154. optTEST = options.T
  155. optPROTABLE = options.P
  156. optCHM = options.M
  157.  
  158. #Change to the current working dir
  159. path = os.getcwd()
  160. os.chdir(path)
  161.  
  162. #Original (oldPDB) and new (newPDB) PDB filename
  163. oldPDB = os.path.splitext(INPUT)[0]
  164.  
  165.  
  166. #parse the PDB-file
  167. structure = pdbparse.get_structure( oldPDB , oldPDB+".pdb")
  168. header = pdbparse.get_header()
  169. trailer = pdbparse.get_trailer()
  170.  
  171. def cleanPDB(model) :
  172. for chains in model :
  173. if len(chains) == 0 :
  174. model.detach_child(chain.id)
  175. else :
  176. residues = chains.child_list
  177. for residue in residues :
  178. resn = residue.get_resname()
  179. if resn in AA :
  180. print "ATOM:", resn
  181.  
  182. elif resn in NA :
  183. print "ATOM:", resn
  184.  
  185. elif resn in EN :
  186. for ENi, ENRi in itertools.izip(EN, ENR) :
  187. if resn == ENi :
  188. print "ATOM: %s <---> %s (%s)" % (resn, ENRi, ENi)
  189. residue.resname = ENRi
  190.  
  191. elif resn in HET :
  192. for HETi, HETIDi in itertools.izip(HET, HETID) :
  193. #print "HETATM:", resn, residue.id[0]
  194. if resn == HETi :
  195. resOLD = residue.id
  196. residue.id = ( HETIDi, residue.id[1], residue.id[2] )
  197. print "HETATM: %s ---> Fixed RECORD to HETATM" % (resn)
  198. else :
  199. print "INVALD:", residue, "REMOVED"
  200. chains.detach_child(residue.id)
  201. if len(chains) == 0 :
  202. model.detach_child(chain.id)
  203. writePDB(structure, "clean")
  204.  
  205. def renumRES(model) :
  206. for chains in model :
  207. residues = chains.child_list
  208. RSIDUEoffset=residues[0].id[1] - 1
  209. RESIDUEid = 1
  210. for residue in residues :
  211. newID = residue.id[1] - RSIDUEoffset
  212. residue.id = ( residue.id[0], RESIDUEid, residue.id[2] )
  213. RESIDUEid = RESIDUEid + 1
  214. print "Residue OFFSET was -%s" % (RSIDUEoffset)
  215. writePDB(structure, "renum")
  216.  
  217. def renumATM(model) :
  218. print "Not implemented yet"
  219.  
  220. def splitPDB(model):
  221. class SelectChain(Select):
  222. def accept_chain(self, chain) :
  223. if chain.get_id() == self._chain_id :
  224. return True
  225. else:
  226. return False
  227.  
  228. class SelectHETATM(Select):
  229. def accept_residue(self, res) :
  230. if res.id[0] in HETID and res.get_full_id()[2] == self._chain_id :
  231. return True
  232. else:
  233. return False
  234. class SelectMISCRE(Select):
  235. def accept_residue(self, res) :
  236. if res.resname in MISC and res.get_full_id()[2] == self._chain_id :
  237. return True
  238. else :
  239. return False
  240.  
  241. for chains in model:
  242. chainID = chains.id
  243. savePROT = oldPDB+"_chain-"+chainID.lower()+"-prot.pdb"
  244. saveHETA = oldPDB+"_chain-"+chainID.lower()+"-hetatm.pdb"
  245. saveMISC = oldPDB+"_chain-"+chainID.lower()+"-misc.pdb"
  246.  
  247. pdbwrite.set_structure(structure)
  248.  
  249. chain = SelectChain()
  250. chain._chain_id = chains.id
  251.  
  252. hetatm = SelectHETATM()
  253. hetatm._chain_id = chains.id
  254.  
  255. miscre = SelectMISCRE()
  256. miscre._chain_id = chains.id
  257.  
  258. pdbwrite.save(saveMISC, miscre)
  259. pdbwrite.save(saveHETA, hetatm)
  260.  
  261. for residue in chains:
  262. if residue.id[0] in HETID or residue.resname in MISC :
  263. chains.detach_child(residue.id)
  264. pdbwrite.save(savePROT, chain)
  265.  
  266. def listProtRes(model) :
  267. protonizable = set()
  268. for chains in model.child_list :
  269. for residues in chains.child_list :
  270. if residues.resname in PRES :
  271. protonizable.add(residues.resname)
  272. for item in protonizable :
  273. if item == "ASP" :
  274. print item, " | Protonated aspartic acid, proton on od2 --> ASPP"
  275. elif item == "HIS" :
  276. print item, " | Protonated His --> HSP | neutral HIS, proton on ND1 --> HSD | neutral His, proton on NE2 --> HSE"
  277. elif item == "GLU" :
  278. print item, " | Protonated glutamic acid, proton on oe2--> GLUP"
  279. elif item == "LYS" :
  280. print item, " | Neutral --> LSN"
  281.  
  282. def convCHM(model) :
  283. # Pay attention to the whitespaces to make the Atomtypes allway 4 chars long [ATOMNUM] [....] [RESID]!!
  284. # Singel char: [.X..], Two chars: [.XY.], Three chars: [XYZ.], Four chars: [WXYZ]
  285. for chains in model :
  286. residues = chains.child_list
  287. first_residue = chains.child_list[0]
  288. last_residue = chains.child_list[-1]
  289. for residue in residues :
  290. if residue.resname == 'HOH':
  291. print "HOH(%s) --> %s" % (residue.id[1], "TIP3")
  292. residue.resname = 'TIP3'
  293. for atom in residue.child_list :
  294. if atom.name ==  "O" :
  295. print "TIP3(%s): O --> OH2" % (residue.id[1])
  296. residue['O'].fullname = 'OH2 '
  297. if residue.resname == 'HIS':
  298. print "HIS(%s) --> %s" % (residue.id[1], HISTIDIN)
  299. residue.resname = HISTIDIN
  300.  
  301. elif residue.resname == 'ILE':
  302. for atom in residue.child_list :
  303. if atom.name ==  "CD1" :
  304. print "ILE(%s): CD1 --> CD" % (residue.id[1])
  305. residue['CD1'].fullname = ' CD '
  306. elif atom.name ==  "1HD1" :
  307. print "ILE(%s): 1HD1 --> HD1" % (residue.id[1])
  308. residue['1HD1'].fullname = 'HD1 '
  309. elif atom.name ==  "2HD1" :
  310. print "ILE(%s): 2HD1 --> HD2" % (residue.id[1])
  311. residue['2HD1'].fullname = 'HD2 '
  312. elif atom.name ==  "3HD1" :
  313. print "ILE(%s): 3HD1 --> HD3" % (residue.id[1])
  314. residue['3HD1'].fullname = 'HD3 '
  315.  
  316. elif residue.resname == 'SER' :
  317. for atom in residue.child_list :
  318. if atom.name ==  "1HG" :
  319. print "SER(%s): 1HG --> HG1" % (residue.id[1])
  320. residue['1HG'].fullname = 'HG1 '
  321.  
  322. elif residue.resname == 'MET' :
  323. for atom in residue.child_list :
  324. if atom.name ==  "SE" :
  325. print "MET(%s): SE --> SD" % (residue.id[1])
  326. residue['SE'].fullname = ' SD '
  327.  
  328. #for atom in first_residue :
  329. #if atom.name ==  "1HT" :
  330. #print "Patching first residue: 1HT --> HT1"
  331. #residue['1HT'].fullname = 'HT1 '
  332. #elif atom.name ==  "2HT" :
  333. #print "Patching first residue: 2HT --> HT2"
  334. #residue['2HT'].fullname = 'HT2 '
  335. #elif atom.name ==  "3HT" :
  336. #print "Patching first residue: 3HT --> HT3"
  337. #residue['3HT'].fullname = 'HT3 '
  338. #if last_residue.resname != "HOH" :
  339. #for atom in last_residue :
  340. #if atom.name ==  "OXT" :
  341. #print "Patching last residue: OXT --> OT1"
  342. #residue['OXT'].fullname = 'OT1 '
  343. #elif atom.name == "O" :
  344. #print "Patching last residue: O --> OT2"
  345. #residue['O'].fullname = 'OT2 '
  346. writePDB(structure, "charmm")
  347.  
  348. filename = oldPDB + '-charmm.pdb'
  349. in_f = open(filename)
  350. out_f = tempfile.NamedTemporaryFile()
  351. for line in in_f:
  352. out_f.write(line.replace('TIP3A', 'TIP3 '))
  353. os.remove(filename)
  354. os.link(out_f.name, filename)
  355.  
  356. def writePDB(struc, suffix) :
  357. pdbwrite.set_structure(struc)
  358. pdbwrite.save(oldPDB+"-"+suffix+".pdb")
  359.  
  360. if optCLEAN :
  361. cleanPDB(structure[0])
  362. if optRENUM == "res" :
  363. renumRES(structure[0])
  364. if optRENUM == "atm" :
  365. renumATM(structure[0])
  366. if optSPLIT :
  367. splitPDB(structure[0])
  368. if optCHM :
  369. print "converting to CHARMM format"
  370. convCHM(structure[0])
  371. #else :
  372. #    writePDB(structure)
  373.  
  374. if optPROTABLE :
  375. listProtRes(structure[0])
  376.  
  377. if optTEST :
  378. for chains in structure[0] :
  379. residues = chains.child_list
  380. for residue in residues :
  381. print residue.id[0]
  382.  
  383.  
  384. #a.get_name()       # atom name (spaces stripped, e.g. "CA")
  385. #a.get_id()         # id (equals atom name)
  386. #a.get_coord()      # atomic coordinates
  387. #a.get_bfactor()    # B factor
  388. #a.get_occupancy()  # occupancy
  389. #a.get_altloc()     # alternative location specifie
  390. #a.get_sigatm()     # std. dev. of atomic parameters
  391. #a.get_siguij()     # std. dev. of anisotropic B factor
  392. #a.get_anisou()     # anisotropic B factor
  393. #a.get_fullname()   # atom name (with spaces, e.g. ".CA.")</p>
  394. <p>

Attachments:
FileFile sizeLast Modified
Download this file (pdb2charmm.py)pdb2charmm.py11 Kb07/09/09 10:50
Last Updated ( Thursday, 09 July 2009 10:51 )
 

Add comment


Security code
Refresh

6M Pixel

Banner

www. is deprecated

Banner

Play OGG

Banner