-
Notifications
You must be signed in to change notification settings - Fork 397
Fix #703: Recover from empty structure files in PDB_CACHE_DIR #774
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5c1a335
4785d57
c3cec91
b207d34
6ea9ae7
62fb2a4
f5b325f
13f267c
c1c3c30
0c10a2e
08ab3e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,7 @@ | |
| import org.biojava.nbio.core.util.InputStreamProvider; | ||
| import org.biojava.nbio.structure.align.util.HTTPConnectionTools; | ||
| import org.biojava.nbio.structure.align.util.UserConfiguration; | ||
| import org.biojava.nbio.structure.io.LocalPDBDirectory; | ||
| import org.biojava.nbio.structure.io.mmcif.model.ChemComp; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
@@ -85,32 +86,37 @@ public class DownloadChemCompProvider implements ChemCompProvider { | |
| protectedIDs.add("AUX"); | ||
| protectedIDs.add("NUL"); | ||
| } | ||
|
|
||
| private static ChemCompProvider fallback = null; // Fallback provider if the download fails | ||
|
|
||
| /** by default we will download only some of the files. User has to request that all files should be downloaded... | ||
| * | ||
| */ | ||
| boolean downloadAll = false; | ||
|
|
||
| public DownloadChemCompProvider(){ | ||
| logger.debug("Initialising DownloadChemCompProvider"); | ||
|
|
||
| // note that path is static, so this is just to make sure that all non-static methods will have path initialised | ||
| initPath(); | ||
| this(null); | ||
| } | ||
|
|
||
| public DownloadChemCompProvider(String cacheFilePath){ | ||
| logger.debug("Initialising DownloadChemCompProvider"); | ||
|
|
||
| // note that path is static, so this is just to make sure that all non-static methods will have path initialised | ||
| path = new File(cacheFilePath); | ||
| if(cacheFilePath != null) { | ||
| path = new File(cacheFilePath); | ||
| } | ||
| } | ||
|
|
||
| private static void initPath(){ | ||
|
|
||
| /** | ||
| * Get this provider's cache path | ||
| * @return | ||
| */ | ||
| public static File getPath(){ | ||
| if (path==null) { | ||
| UserConfiguration config = new UserConfiguration(); | ||
| path = new File(config.getCacheFilePath()); | ||
| } | ||
| return path; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -127,7 +133,7 @@ public void checkDoFirstInstall(){ | |
|
|
||
| // this makes sure there is a file separator between every component, | ||
| // if path has a trailing file separator or not, it will work for both cases | ||
| File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY); | ||
| File dir = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY); | ||
| File f = new File(dir, "components.cif.gz"); | ||
|
|
||
| if ( ! f.exists()) { | ||
|
|
@@ -161,7 +167,7 @@ private void split() throws IOException { | |
|
|
||
| logger.info("Installing individual chem comp files ..."); | ||
|
|
||
| File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY); | ||
| File dir = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY); | ||
| File f = new File(dir, "components.cif.gz"); | ||
|
|
||
|
|
||
|
|
@@ -212,7 +218,7 @@ private void split() throws IOException { | |
| */ | ||
| private void writeID(String contents, String currentID) throws IOException{ | ||
|
|
||
| String localName = DownloadChemCompProvider.getLocalFileName(currentID); | ||
| String localName = getLocalFileName(currentID); | ||
|
|
||
| try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) { | ||
|
|
||
|
|
@@ -272,7 +278,10 @@ public ChemComp getChemComp(String recordName) { | |
|
|
||
| ChemComp chemComp = dict.getChemComp(recordName); | ||
|
|
||
| return chemComp; | ||
| // May be null if the file was corrupt. Fall back on ReducedChemCompProvider in that case | ||
| if(chemComp != null) { | ||
| return chemComp; | ||
| } | ||
|
|
||
| } catch (IOException e) { | ||
|
|
||
|
|
@@ -296,9 +305,12 @@ public ChemComp getChemComp(String recordName) { | |
|
|
||
| // see https://github.com/biojava/biojava/issues/315 | ||
| // probably a network error happened. Try to use the ReducedChemCOmpProvider | ||
| ReducedChemCompProvider reduced = new ReducedChemCompProvider(); | ||
| if( fallback == null) { | ||
| fallback = new ReducedChemCompProvider(); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. creating a ReducedChemCompProvider is cheap (and idempotent), but let's cache it anyways. |
||
| } | ||
|
|
||
| return reduced.getChemComp(recordName); | ||
| logger.warn("Falling back to ReducedChemCompProvider for {}. This could indicate a network error.", recordName); | ||
| return fallback.getChemComp(recordName); | ||
|
|
||
| } | ||
|
|
||
|
|
@@ -313,16 +325,15 @@ public static String getLocalFileName(String recordName){ | |
| recordName = "_" + recordName; | ||
| } | ||
|
|
||
| initPath(); | ||
|
|
||
| File f = new File(path, CHEM_COMP_CACHE_DIRECTORY); | ||
| File f = new File(getPath(), CHEM_COMP_CACHE_DIRECTORY); | ||
| if (! f.exists()){ | ||
| logger.info("Creating directory " + f); | ||
|
|
||
| boolean success = f.mkdir(); | ||
| // we've checked in initPath that path is writable, so there's no need to check if it succeeds | ||
| // in the unlikely case that in the meantime it isn't writable at least we log an error | ||
| if (!success) logger.error("Directory {} could not be created",f); | ||
| if (!success) | ||
| logger.error("Directory {} could not be created",f); | ||
|
|
||
| } | ||
|
|
||
|
|
@@ -337,6 +348,14 @@ private static boolean fileExists(String recordName){ | |
|
|
||
| File f = new File(fileName); | ||
|
|
||
| // delete files that are too short to have contents | ||
| if( f.length() < LocalPDBDirectory.MIN_PDB_FILE_SIZE ) { | ||
| // Delete defensively. | ||
| // Note that if delete is unsuccessful, we re-download the file anyways | ||
| f.delete(); | ||
| return false; | ||
| } | ||
|
|
||
| return f.exists(); | ||
|
|
||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -212,6 +212,9 @@ public void parse(BufferedReader buf) | |
|
|
||
| // the first line is a data_PDBCODE line, test if this looks like a mmcif file | ||
| line = buf.readLine(); | ||
| while( line != null && (line.isEmpty() || line.startsWith(COMMENT_CHAR))) { | ||
| line = buf.readLine(); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice to handle comments in first line! Is an empty first line also valid CIF?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think blank lines are permitted anywhere in CIF, but I could be wrong. I try to write permissive parsers. It seems like it shouldn't break the spec so we might as well be robust to it either way. |
||
| if (line == null || !line.startsWith(MMCIF_TOP_HEADER)){ | ||
| logger.error("This does not look like a valid mmCIF file! The first line should start with 'data_', but is: '" + line+"'"); | ||
| triggerDocumentEnd(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,9 +21,6 @@ | |
| */ | ||
| package org.biojava.nbio.structure.io.util; | ||
|
|
||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import java.io.File; | ||
| import java.io.FileInputStream; | ||
| import java.io.FileOutputStream; | ||
|
|
@@ -36,6 +33,15 @@ | |
| import java.nio.channels.Channels; | ||
| import java.nio.channels.FileChannel; | ||
| import java.nio.channels.ReadableByteChannel; | ||
| import java.nio.file.FileVisitResult; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.nio.file.Paths; | ||
| import java.nio.file.SimpleFileVisitor; | ||
| import java.nio.file.attribute.BasicFileAttributes; | ||
|
|
||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| public class FileDownloadUtils { | ||
|
|
||
|
|
@@ -240,6 +246,41 @@ public static URLConnection prepareURLConnection(String url, int timeout) throws | |
| connection.setConnectTimeout(timeout); | ||
| return connection; | ||
| } | ||
|
|
||
| /** | ||
| * Recursively delete a folder & contents | ||
| * | ||
| * @param dir directory to delete | ||
| */ | ||
| public static void deleteDirectory(Path dir) throws IOException { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Surprisingly this isn't an nio method and I couldn't find one in BioJava |
||
| if(dir == null || !Files.exists(dir)) | ||
| return; | ||
| Files.walkFileTree(dir, new SimpleFileVisitor<Path>() { | ||
| @Override | ||
| public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { | ||
| Files.delete(file); | ||
| return FileVisitResult.CONTINUE; | ||
| } | ||
|
|
||
| @Override | ||
| public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException { | ||
| if (e != null) { | ||
| throw e; | ||
| } | ||
| Files.delete(dir); | ||
| return FileVisitResult.CONTINUE; | ||
| } | ||
| }); | ||
| } | ||
| /** | ||
| * Recursively delete a folder & contents | ||
| * | ||
| * @param dir directory to delete | ||
| */ | ||
| public static void deleteDirectory(String dir) throws IOException { | ||
| deleteDirectory(Paths.get(dir)); | ||
| } | ||
|
|
||
|
|
||
| public static void main(String[] args) { | ||
| String url; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a small change in behavior. If we can't read the downloaded chemcomp file, we now return a stub chemcomp rather than NULL. Previously the stub was returned for corrupt GZIP files (which threw an error) but not for malformed cif contents (which just returned null).