Skip to content

Commit 019fa96

Browse files
committed
Update to biojava 5.0 ligand and chain model
Clearly define ligand handling: - Full-chain selections ('1abc.A') use all groups with that chain name, including waters and ligands - Residue selections (1ab.A:1-100) do not include waters, and they may include ligands from other chains if they appear within the cutoff distance Also - Use StructureTools.addGroupToStructure more systematically - Fix bug regarding entity copies - Remove deprecated Chain.getChainID calls & replace with correct version - Bug fixes regarding ligand definitions - Reverting earlier biojava 4 specific test changes (better ligand model in 5) - Really sweet use of Java 8 streams in SubstructureIdentifier.reduce
1 parent 820a924 commit 019fa96

File tree

7 files changed

+190
-206
lines changed

7 files changed

+190
-206
lines changed

biojava-integrationtest/src/test/java/org/biojava/nbio/structure/test/StructureToolsTest.java

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -105,21 +105,18 @@ public void testGetAtomsConsistency() throws IOException, StructureException{
105105
Structure hivA = cache.getStructure("1hiv.A");
106106
Atom[] caSa = StructureTools.getRepresentativeAtomArray(hivA);
107107
Atom[] caCa = StructureTools.getRepresentativeAtomArray(hivA.getChainByIndex(0));
108-
//TODO Residue 67 is PTM. In BioJava 4 this is treated as a ligand, so it
109-
// gets added to caSa. In BioJava 5 this should be fixed, so they
110-
// should match again.
111108
assertEquals("did not find the same number of Atoms from structure and from chain..",
112-
caSa.length-1,caCa.length);
109+
caSa.length,caCa.length);
113110
Structure hivB = cache.getStructure("1hiv.B");
114111
Atom[] caSb = StructureTools.getRepresentativeAtomArray(hivB);
115112
Atom[] caCb = StructureTools.getRepresentativeAtomArray(hivB.getChainByIndex(0));
116113
assertEquals("did not find the same number of Atoms from structure and from chain..",
117-
caSb.length-1,caCb.length);
114+
caSb.length,caCb.length);
118115
//Both chains have to be the same size (A and B)
119-
assertEquals(99,caSa.length-1);
116+
assertEquals(99,caSa.length);
120117
assertEquals("did not find the same number of Atoms in both chains...",
121-
caSa.length-1,caCb.length);
122-
assertEquals(99,caSa.length-1);
118+
caSa.length,caCb.length);
119+
assertEquals(99,caSa.length);
123120

124121
ChemCompGroupFactory.setChemCompProvider(provider);
125122
}
@@ -273,30 +270,35 @@ public void testRevisedConvention() throws IOException, StructureException{
273270

274271
String name11 = "4hhb.A";
275272
Structure s = cache.getStructure(name11);
276-
assertTrue(s.getChains().size() == 1);
273+
assertEquals(1,s.getPolyChains().size());
274+
assertEquals(3,s.getChains().size()); // protein, HEM, water
277275

278276

279277
String name12 = "4hhb.A:";
280278
s = cache.getStructure(name12);
281-
assertTrue(s.getChains().size() == 1);
279+
assertEquals(1,s.getPolyChains().size());
280+
assertEquals(3,s.getChains().size());
282281

283282
String name13 = "4hhb.A_";
284283
s = cache.getStructure(name13);
285-
assertTrue(s.getChains().size() == 1);
284+
assertEquals(1,s.getPolyChains().size());
285+
assertEquals(3,s.getChains().size());
286286

287287
String name9 = "4hhb.C_1-83";
288288
String chainId = "C";
289289
s = cache.getStructure(name9);
290+
assertEquals(1,s.getPolyChains().size());
291+
assertEquals(2,s.getChains().size()); // drops waters
290292

291-
assertTrue(s.getChains().size() == 1);
292293
Chain c = s.getPolyChainByPDB(chainId);
293294
assertEquals(c.getName(),chainId);
294295
Atom[] ca = StructureTools.getRepresentativeAtomArray(s);
295296
assertEquals(83,ca.length);
296297

297298
String name10 = "4hhb.C_1-83,A_1-10";
298299
s = cache.getStructure(name10);
299-
assertTrue(s.getChains().size() == 2);
300+
assertEquals(2,s.getPolyChains().size());
301+
assertEquals(3,s.getChains().size()); // Includes C heme
300302
ca = StructureTools.getRepresentativeAtomArray(s);
301303
assertEquals(93, ca.length);
302304

@@ -416,9 +418,7 @@ public void testGetSubRangesInsertionCodes() throws StructureException {
416418
// range including insertion
417419
range = "H:35-37"; //includes 36A
418420
substr = StructureTools.getSubRanges(structure3, range);
419-
// Because we are loading from PDB, TYS I:363 is recognized as a ligand
420-
// rather than a PTM, so it gets included here
421-
assertEquals("Wrong number of chains in "+range, 2, substr.size());
421+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
422422

423423
chain = substr.getChainByIndex(0);
424424

@@ -428,7 +428,7 @@ public void testGetSubRangesInsertionCodes() throws StructureException {
428428
// end with insertion
429429
range = "H:35-36A";
430430
substr = StructureTools.getSubRanges(structure3, range);
431-
assertEquals("Wrong number of chains in "+range, 2, substr.size());
431+
assertEquals("Wrong number of chains in "+range, 1, substr.size());
432432

433433
chain = substr.getChainByIndex(0);
434434

biojava-structure/src/main/java/org/biojava/nbio/structure/Structure.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ public interface Structure extends Cloneable {
722722
* Request a particular entity by its entity id (mol id in legacy PDB format)
723723
*
724724
* @param entityId the number of the entity
725-
* @return an entity
725+
* @return an entity, or null if the molId was not found
726726
*/
727727
EntityInfo getEntityById(int entityId);
728728

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureImpl.java

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -203,36 +203,37 @@ public Group findGroup(String chainName, String pdbResnum, int modelnr)
203203

204204
// first we need to gather all groups with the author id chainName: polymers, non-polymers and waters
205205
Chain polyChain = getPolyChainByPDB(chainName, modelnr);
206-
List<Group> groups = new ArrayList<>();
206+
if(polyChain != null) {
207+
List<Group> groups = new ArrayList<>();
207208

208-
groups.addAll(polyChain.getAtomGroups());
209+
groups.addAll(polyChain.getAtomGroups());
209210

210211

211-
// there can be more thatn one non-poly chain for a given author id
212-
for (Chain chain: getNonPolyChainsByPDB(chainName, modelnr)) {
213-
groups.addAll(chain.getAtomGroups());
214-
}
215-
216-
Chain water = getWaterChainByPDB(chainName, modelnr);
217-
218-
if (water!=null)
219-
groups.addAll(water.getAtomGroups());
220-
212+
// there can be more than one non-poly chain for a given author id
213+
for (Chain chain: getNonPolyChainsByPDB(chainName, modelnr)) {
214+
groups.addAll(chain.getAtomGroups());
215+
}
221216

217+
Chain water = getWaterChainByPDB(chainName, modelnr);
222218

223-
// now iterate over all groups
224-
// in order to find the amino acid that has this pdbRenum.
219+
if (water!=null)
220+
groups.addAll(water.getAtomGroups());
225221

226-
for (Group g : groups) {
227-
String rnum = g.getResidueNumber().toString();
228-
//System.out.println(g + " >" + rnum + "< >" + pdbResnum + "<");
229-
// we only mutate amino acids
230-
// and ignore hetatoms and nucleotides in this case
231-
if (rnum.equals(pdbResnum)) {
232-
return g;
222+
223+
224+
// now iterate over all groups
225+
// in order to find the amino acid that has this pdbRenum.
226+
227+
for (Group g : groups) {
228+
String rnum = g.getResidueNumber().toString();
229+
//System.out.println(g + " >" + rnum + "< >" + pdbResnum + "<");
230+
// we only mutate amino acids
231+
// and ignore hetatoms and nucleotides in this case
232+
if (rnum.equals(pdbResnum)) {
233+
return g;
234+
}
233235
}
234236
}
235-
236237
throw new StructureException("could not find group " + pdbResnum +
237238
" in chain " + chainName);
238239
}

biojava-structure/src/main/java/org/biojava/nbio/structure/StructureTools.java

Lines changed: 62 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@
4545
import org.biojava.nbio.structure.contact.Grid;
4646
import org.biojava.nbio.structure.io.FileParsingParameters;
4747
import org.biojava.nbio.structure.io.PDBFileParser;
48-
import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
49-
import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
5048
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
5149
import org.biojava.nbio.structure.io.util.FileDownloadUtils;
5250
import org.slf4j.Logger;
@@ -488,8 +486,7 @@ public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[]
488486
continue;
489487
}
490488

491-
ChemComp chemComp = g.getChemComp();
492-
if(chemComp != null && chemComp.isStandard() ) {
489+
if(g.isPolymeric() ) {
493490
// Polymers aren't ligands
494491
continue;
495492
}
@@ -526,33 +523,52 @@ public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[]
526523
* being added to the new chain
527524
* @return the chain g was added to
528525
*/
529-
public static Chain addGroupToStructure(Structure s, Group g, Chain chainGuess, boolean clone ) {
530-
// Find or create the chain
531-
String chainId = g.getChainId();
532-
assert !chainId.isEmpty();
533-
Chain chain;
534-
if(chainGuess != null && chainGuess.getId() == chainId) {
535-
// previously guessed chain
536-
chain = chainGuess;
537-
} else {
538-
// Try to guess
539-
chain = s.getChain(chainId);
540-
if(chain == null) {
541-
// no chain found
542-
chain = new ChainImpl();
543-
chain.setId(chainId);
544-
chain.setName(g.getChain().getName());
545-
s.addChain(chain);
526+
public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) {
527+
synchronized(s) {
528+
// Find or create the chain
529+
String chainId = g.getChainId();
530+
assert !chainId.isEmpty();
531+
Chain chain;
532+
if(chainGuess != null && chainGuess.getId() == chainId) {
533+
// previously guessed chain
534+
chain = chainGuess;
535+
} else {
536+
// Try to guess
537+
chain = s.getChain(chainId, model);
538+
if(chain == null) {
539+
// no chain found
540+
chain = new ChainImpl();
541+
chain.setId(chainId);
542+
543+
Chain oldChain = g.getChain();
544+
chain.setName(oldChain.getName());
545+
546+
EntityInfo oldEntityInfo = oldChain.getEntityInfo();
547+
548+
EntityInfo newEntityInfo = s.getEntityById(oldEntityInfo.getMolId());
549+
if( newEntityInfo == null ) {
550+
newEntityInfo = new EntityInfo(oldEntityInfo);
551+
s.addEntityInfo(newEntityInfo);
552+
}
553+
newEntityInfo.addChain(chain);
554+
chain.setEntityInfo(newEntityInfo);
555+
556+
// TODO Do the seqres need to be cloned too? -SB 2016-10-7
557+
chain.setSeqResGroups(oldChain.getSeqResGroups());
558+
chain.setSeqMisMatches(oldChain.getSeqMisMatches());
559+
560+
s.addChain(chain,model);
561+
}
546562
}
563+
564+
// Add cloned group
565+
if(clone) {
566+
g = (Group)g.clone();
567+
}
568+
chain.addGroup(g);
569+
570+
return chain;
547571
}
548-
549-
// Add cloned group
550-
if(clone) {
551-
g = (Group)g.clone();
552-
}
553-
chain.addGroup(g);
554-
555-
return chain;
556572
}
557573

558574
/**
@@ -563,10 +579,10 @@ public static Chain addGroupToStructure(Structure s, Group g, Chain chainGuess,
563579
* @param clone Indicates whether the input groups should be cloned before
564580
* being added to the new chain
565581
*/
566-
public static void addGroupsToStructure(Structure s, Collection<Group> groups, boolean clone) {
582+
public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) {
567583
Chain chainGuess = null;
568584
for(Group g : groups) {
569-
chainGuess = addGroupToStructure(s, g, chainGuess, clone);
585+
chainGuess = addGroupToStructure(s, g, model, chainGuess, clone);
570586
}
571587
}
572588

@@ -1492,7 +1508,7 @@ public static final Group getGroupByPDBResidueNumber(Structure struc,
14921508
throw new IllegalArgumentException("Null argument(s).");
14931509
}
14941510

1495-
Chain chain = struc.findChain(pdbResNum.getChainName());
1511+
Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName());
14961512

14971513
return chain.getGroupByPDB(pdbResNum);
14981514
}
@@ -1857,8 +1873,6 @@ public static List<Group> filterLigands(List<Group> allGroups) {
18571873
List<Group> groups = new ArrayList<Group>();
18581874
for (Group g : allGroups) {
18591875

1860-
ChemComp cc = g.getChemComp();
1861-
18621876
if ( g.isPolymeric())
18631877
continue;
18641878

@@ -1932,6 +1946,7 @@ public static Structure getStructure(String name, PDBFileParser parser,
19321946
/**
19331947
* @deprecated use {@link Chain#isProtein()} instead.
19341948
*/
1949+
@Deprecated
19351950
public static boolean isProtein(Chain c) {
19361951

19371952
return c.isProtein();
@@ -1940,26 +1955,30 @@ public static boolean isProtein(Chain c) {
19401955
/**
19411956
* @deprecated use {@link Chain#isNucleicAcid()} instead.
19421957
*/
1958+
@Deprecated
19431959
public static boolean isNucleicAcid(Chain c) {
19441960
return c.isNucleicAcid();
19451961
}
19461962

19471963
/**
19481964
* @deprecated use {@link Chain#getPredominantGroupType()} instead.
19491965
*/
1966+
@Deprecated
19501967
public static GroupType getPredominantGroupType(Chain c) {
19511968
return c.getPredominantGroupType();
19521969
}
19531970

19541971
/**
19551972
* @deprecated use {@link Chain#isWaterOnly()} instead.
19561973
*/
1974+
@Deprecated
19571975
public static boolean isChainWaterOnly(Chain c) {
19581976
return c.isWaterOnly();
19591977
}
19601978

19611979
/** @deprecated use {@link Chain#isPureNonPolymer()} instead.
19621980
*/
1981+
@Deprecated
19631982
public static boolean isChainPureNonPolymer(Chain c) {
19641983

19651984
return c.isPureNonPolymer();
@@ -2014,10 +2033,11 @@ public static void expandNcsOps(Structure structure) {
20142033

20152034
for (Chain c:structure.getChains()) {
20162035
Chain clonedChain = (Chain)c.clone();
2017-
String newChainId = c.getChainID()+i+"n";
2018-
clonedChain.setChainID(newChainId);
2019-
clonedChain.setInternalChainID(newChainId);
2020-
setChainIdsInResidueNumbers(clonedChain, newChainId);
2036+
String newChainId = c.getId()+i+"n";
2037+
String newChainName = c.getName()+i+"n";
2038+
clonedChain.setId(newChainId);
2039+
clonedChain.setName(newChainName);
2040+
setChainIdsInResidueNumbers(clonedChain, newChainName);
20212041
Calc.transform(clonedChain, m);
20222042
chainsToAdd.add(clonedChain);
20232043
c.getEntityInfo().addChain(clonedChain);
@@ -2034,15 +2054,15 @@ public static void expandNcsOps(Structure structure) {
20342054
* Used when cloning chains and resetting their ids: one needs to take care of
20352055
* resetting the ids within residue numbers too.
20362056
* @param c
2037-
* @param newChainId
2057+
* @param newChainName
20382058
*/
2039-
private static void setChainIdsInResidueNumbers(Chain c, String newChainId) {
2059+
private static void setChainIdsInResidueNumbers(Chain c, String newChainName) {
20402060
for (Group g:c.getAtomGroups()) {
2041-
g.setResidueNumber(newChainId, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
2061+
g.setResidueNumber(newChainName, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
20422062
}
20432063
for (Group g:c.getSeqResGroups()) {
20442064
if (g.getResidueNumber()==null) continue;
2045-
g.setResidueNumber(newChainId, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
2065+
g.setResidueNumber(newChainName, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
20462066
}
20472067
}
20482068

0 commit comments

Comments
 (0)