Viewing file: PDBParser.php (9.06 KB) -rw-rw-rw- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php // // +----------------------------------------------------------------------+ // | PHP Version 4 | // +----------------------------------------------------------------------+ // | Copyright (c) 1997-2003 The PHP Group | // +----------------------------------------------------------------------+ // | This source file is subject to version 2.0 of the PHP license, | // | that is bundled with this package in the file LICENSE, and is | // | available at through the world-wide-web at | // | http://www.php.net/license/2_02.txt. | // | If you did not receive a copy of the PHP license and are unable to | // | obtain it through the world-wide-web, please send a note to | // | license@php.net so we can mail you a copy immediately. | // +----------------------------------------------------------------------+ // | Authors: Jesus M. Castagnetto <jmcastagnetto@php.net> | // +----------------------------------------------------------------------+ // // $Id: PDBParser.php,v 1.4 2003/01/04 11:56:25 mj Exp $ //
/** * A self-contained class to parse a PDB file into an array of residues * each containing an array of atoms * <br> * Useful when dealing with big PDB files, where using the Science_Chemistry_PDBFile * class will generate out of memory errors. * * @author Jesus M. Castagnetto <jmcastagnetto@php.net> * @version 1.0 * @access public * @package Science_Chemistry * @see Science_Chemistry_PDBFile */ class Science_Chemistry_PDBParser {
/** * PDB ID * * @var string * @access private */ var $pdb;
/** * Full path to PDB file * * @var string * @access private */ var $file;
/** * PDB file's date * * @var date * @access private */ var $date;
/** * PDB macromolecule(s) class * * @var string * @access private */ var $class;
/** * Array of macromolecules * * @var array * @access private */ var $macromolecules;
/** * Number of macromolecules * * @var int * @access private */ var $num_macromolecules;
/** * Constructor for the class, requires a PDB filename * * @param string $filename PDB filename * @param boolean $multi whether to parse all models in a multi-model file * @param boolean $meta whether to store the PDB file meta information * @param boolean $full whether to store the full set of fields per atom * @return object PDBParser * @access public * @see parseResidues() */ function Science_Chemistry_PDBParser($filename, $multi=false, $meta=false, $full=false) { if (!file_exists($filename)) return null; list($pdb,) = explode(".",basename($filename)); $this->pdb = $pdb; $this->file = realpath($filename); $arr = file($filename); // parsing the PDB file $month = array ( "JAN" => "01", "FEB" => "02", "MAR" => "03", "APR" => "04", "MAY" => "05", "JUN" => "06", "JUL" => "07", "AUG" => "08", "SEP" => "09", "OCT" => "10", "NOV" => "11", "DEC" => "12" ); $header_re = "/^HEADER[[:space:]]+(([^[:space:]]+ )+)[[:space:]]+"; $header_re .= "([0-9]{2}-[A-Z]{3}-[0-9]{2,4})[[:space:]]+[A-Z0-9]{4}/";
if (preg_match($header_re, $arr[0], &$regs)) { $this->class = trim($regs[1]); // put date in a more standard format $tmp = explode("-", $regs[3]); if ($tmp[2] <= 23) $year = 2000 + (int)$tmp[2]; else $year = 1900 + (int)$tmp[2]; $this->date = $year."-".$month[$tmp[1]]."-".$tmp[0]; } $flag = "nomodel"; $tmparr = array(); for ($i=0; $i < count($arr); $i++) { if (!trim($arr[$i])) continue; $rectype = strtok($arr[$i]," "); // check if we have multi-model file if ($rectype == "MODEL") continue;
// did we get a multi-model file and are parsing the end // of a model, if so, end parsing altogether if ($rectype == "ENDMDL") { if ($multi) { $this->macromolecules[] = $this->parseResidues(&$tmparr, $full); $this->num_macromolecules++; // = count($this->macromolecules); $tmparr = array(); } else { break; } continue; }
// accumulate atom records, put the rest into the meta array if ($rectype == "ATOM" || $rectype == "HETATM") $tmparr[] = $arr[$i]; elseif ($meta) $this->meta[$rectype][] = $arr[$i]; } if (!empty($tmparr)) { $this->macromolecules[] = $this->parseResidues(&$tmparr, $full); $this->num_macromolecules++; // = count($this->macromolecules); } }
/** * Makes the array of residues in the macromolecule * * @param array $records * @param boolean $full whether to store the full set of fields per atom * @see parseFile() * @see parseAtom() */ function parseResidues($records, $full) { $curr_res_id = ""; $residues = array(); $res_atoms = array(); for ($i=0; $i< count($records); $i++) { $atomrec =& $records[$i]; $res_name = trim(substr($atomrec,17,3)); $chain = trim(substr($atomrec,21,1)); $seq_num = (int) trim(substr($atomrec,22,4)); $res_id = $res_name.":".$seq_num.":".$chain; //if ($i == 0) // $curr_res_id = $res_id;
if ($res_id == $curr_res_id) { $res_atoms[] = $atomrec; if ($i != (count($records) - 1)) continue; }
if (($res_id != $curr_res_id) || ($i == (count($records) - 1)) ) { if (!empty($res_atoms)) { for ($j=0; $j < count($res_atoms); $j++) { $temp = $this->parseAtom($res_atoms[$j], $full, &$atomname); $residues[$curr_res_id][$atomname] = $temp; } } $curr_res_id = $res_id; $res_atoms = array($atomrec); } } return $residues; }
/** * Parses an atom record into an associative array * * @param string $atomrec PDB atom record * @param boolean $full whether to store the full set of fields per atom * @see parseResidues() */ function parseAtom($atomrec, $full, &$atomname) { $atom = array(); // process PDB atom record // no error checking, assumes correct and standard record $atom["RecName"] = trim(substr($atomrec,0,6)); $atom["SerNum"] = (int) trim(substr($atomrec,6,5)); $atom["AtomName"] = trim(substr($atomrec,12,4)); $atomname = $atom["AtomName"]; if ($full) { $atom["AltLoc"] = trim(substr($atomrec,16,1)); $atom["ResName"] = trim(substr($atomrec,17,3)); $atom["ChainID"] = trim(substr($atomrec,21,1)); $atom["ResSeqNum"] = (int) trim(substr($atomrec,22,4)); $atom["InsCode"] = trim(substr($atomrec,26,1)); $atom["Occupancy"] = (float) trim(substr($atomrec,54,6)); $atom["TempFactor"] = (float) trim(substr($atomrec,60,6)); $atom["SegmentID"] = trim(substr($atomrec,72,4)); $atom["Charge"] = (float)trim(substr($atomrec,78,2)); $atom["Element"] = trim(substr($atomrec,76,2)); } $atom["X"] = (double) trim(substr($atomrec,30,8)); $atom["Y"] = (double) trim(substr($atomrec,38,8)); $atom["Z"] = (double) trim(substr($atomrec,46,8)); return $atom; }
/** * Returns an array of residues with a particular name * from the indicated macromolecule index * * @param integer $macromol Index of the macromolecule in the $macromolecules array * @param string $resnam Residue name, e.g. HIS, CYS, etc. * @return array list of residues with the requested name * @access public * @see $macromolecules */ function getResidueList ($macromol, $resname) { $mol =& $this->macromolecules[$macromol]; $reslist = array(); if (!$mol) return $reslist; foreach($mol as $resid=>$atoms) { list($curr_res_name,,) = explode(":",$resid); // echo $curr_res_name."***\n"; if ($curr_res_name == $resname) $reslist[$resid] = $atoms; else continue; } return $reslist; }
} // end of PDBParser
// vim: expandtab: ts=4: sw=4 ?>
|