Skip to content

Commit bf95024

Browse files
johnmayegonw
authored andcommitted
Compress the PDB hetatm type_map by not storing the common C.sp2 and H types.
1 parent 9f8de9b commit bf95024

File tree

2 files changed

+18
-359942
lines changed

2 files changed

+18
-359942
lines changed

storage/pdb/src/main/java/org/openscience/cdk/io/PDBReader.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@
3131
import java.io.StringReader;
3232
import java.util.ArrayList;
3333
import java.util.HashMap;
34+
import java.util.HashSet;
3435
import java.util.Hashtable;
3536
import java.util.List;
3637
import java.util.Map;
38+
import java.util.Set;
3739

3840
import javax.vecmath.Point3d;
3941

@@ -108,6 +110,7 @@ public class PDBReader extends DefaultChemObjectReader {
108110
* names; for example "RFB.N13" maps to "N.planar3".
109111
*/
110112
private Map<String, String> hetDictionary;
113+
private Set<String> hetResidues;
111114

112115
private AtomTypeFactory cdkAtomTypeFactory;
113116

@@ -630,7 +633,7 @@ private PDBAtom readAtom(String cLine, int lineLength) throws CDKException {
630633
throw new RuntimeException("PDBReader error during readAtom(): line too short");
631634
}
632635

633-
boolean isHetatm = cLine.substring(0, 6).equals("HETATM");
636+
boolean isHetatm = cLine.startsWith("HETATM");
634637
String atomName = cLine.substring(12, 16).trim();
635638
String resName = cLine.substring(17, 20).trim();
636639
String symbol = parseAtomSymbol(cLine);
@@ -723,18 +726,27 @@ private String typeHetatm(String resName, String atomName) {
723726
cdkAtomTypeFactory = AtomTypeFactory.getInstance("org/openscience/cdk/dict/data/cdk-atom-types.owl",
724727
DefaultChemObjectBuilder.getInstance());
725728
}
729+
730+
// lookup the atom type using the residue and name, if the atom is a hydrogen
731+
// or carbon and is a known residue we default to the common H and C.sp2 cases
726732
String key = resName + "." + atomName;
727-
if (hetDictionary.containsKey(key)) {
728-
return hetDictionary.get(key);
729-
}
733+
String type = hetDictionary.get(key);
734+
if (type != null)
735+
return type;
736+
else if (atomName.startsWith("H"))
737+
return hetResidues.contains(resName) ? "H" : null;
738+
else if (hetResidues.contains(resName) && atomName.startsWith("C"))
739+
return hetResidues.contains(resName) ? "C.sp2" : null;
740+
730741
return null;
731742
}
732743

733744
private void readHetDictionary() {
734745
try {
735746
InputStream ins = getClass().getResourceAsStream(hetDictionaryPath);
736747
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(ins));
737-
hetDictionary = new HashMap<String, String>();
748+
hetDictionary = new HashMap<>();
749+
hetResidues = new HashSet<>();
738750
String line;
739751
while ((line = bufferedReader.readLine()) != null) {
740752
int colonIndex = line.indexOf(':');
@@ -746,6 +758,7 @@ private void readHetDictionary() {
746758
} else {
747759
hetDictionary.put(typeKey, typeValue);
748760
}
761+
hetResidues.add(typeKey.split("\\.")[0]);
749762
}
750763
bufferedReader.close();
751764
} catch (IOException ioe) {

0 commit comments

Comments
 (0)