Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
Expand All @@ -39,14 +42,21 @@
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Scanner;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FileDownloadUtils {

private static final String SIZE_EXT = ".size";
private static final String HASH_EXT = ".hash";
private static final Logger logger = LoggerFactory.getLogger(FileDownloadUtils.class);

public enum Hash{
MD5, SHA1, SHA256, UNKNOWN
}

/**
* Copy the content of file src to dst TODO since java 1.7 this is provided
* in java.nio.file.Files
Expand Down Expand Up @@ -154,13 +164,134 @@ public static void downloadFile(URL url, File destination) throws IOException {
}
}

logger.debug("Copying temp file {} to final location {}", tempFile, destination);
logger.debug("Copying temp file [{}] to final location [{}]", tempFile, destination);
copy(tempFile, destination);

// delete the tmp file
tempFile.delete();

}

/**
* Creates validation files beside a file to be downloaded.<br>
* Whenever possible, for a <code>file.ext</code> file, it creates
* <code>file.ext.size</code> and <code>file.hash</code> for in the same
* folder where <code>file.ext</code> exists.
* If the file connection size could not be deduced from the URL, no size file is created.
* If <code>hashURL</code> is <code>null</code>, no hash file is created.
* @param url the remote file URL to download
* @param localDestination the local file to download into
* @param hashURL the URL of the hash file to download. Can be <code>null</code>.
* @param hash The Hashing algorithm. Ignored if <code>hashURL</code> is <code>null</code>.
*/
public static void createValidationFiles(URL url, File localDestination, URL hashURL, Hash hash){
try {
URLConnection resourceConnection = url.openConnection();
createValidationFiles(resourceConnection, localDestination, hashURL, FileDownloadUtils.Hash.UNKNOWN);
} catch (IOException e) {
logger.warn("could not open connection to resource file due to exception: {}", e.getMessage());
}
}
/**
* Creates validation files beside a file to be downloaded.<br>
* Whenever possible, for a <code>file.ext</code> file, it creates
* <code>file.ext.size</code> and <code>file.hash_XXXX</code> in the same
* folder where <code>file.ext</code> exists (XXXX may be DM5, SHA1, or SHA256).
* If the file connection size could not be deduced from the resourceUrlConnection
* {@link URLConnection}, no size file is created.
* If <code>hashURL</code> is <code>null</code>, no hash file is created.<br>
* <b>N.B.</b> None of the hashing algorithms is implemented (yet), because we did not need any of them yet.
* @param resourceUrlConnection the remote file URLConnection to download
* @param localDestination the local file to download into
* @param hashURL the URL of the hash file to download. Can be <code>null</code>.
* @param hash The Hashing algorithm. Ignored if <code>hashURL</code> is <code>null</code>.
* @since 7.0.0
*/
public static void createValidationFiles(URLConnection resourceUrlConnection, File localDestination, URL hashURL, Hash hash){
long size = resourceUrlConnection.getContentLengthLong();
if(size == -1) {
logger.warn("could not find expected file size for resource {}.", resourceUrlConnection.getURL());
} else {
logger.debug("Content-Length: " + size);
File sizeFile = new File(localDestination.getParentFile(), localDestination.getName() + SIZE_EXT);
try (PrintStream sizePrintStream = new PrintStream(sizeFile)) {
sizePrintStream.print(size);
sizePrintStream.close();
} catch (FileNotFoundException e) {
logger.warn("could not write size validation file due to exception: {}", e.getMessage());
}
}

if(hashURL == null)
return;

if(hash == Hash.UNKNOWN)
throw new IllegalArgumentException("Hash URL given but algorithm is unknown");
try {
File hashFile = new File(localDestination.getParentFile(), String.format("%s%s_%s", localDestination.getName(), HASH_EXT, hash));
downloadFile(hashURL, hashFile);
} catch (IOException e) {
logger.warn("could not write validation hash file due to exception: {}", e.getMessage());
}
}

/**
* Validate a local file based on pre-existing metadata files for size and hash.<br>
* If the passed in <code>localFile</code> parameter is a file named <code>file.ext</code>, the function searches in the same folder for:
* <ul>
* <li><code>file.ext.size</code>: If found, it compares the size stored in it to the length of <code>localFile</code> (in bytes).</li>
* <li><code>file.ext.hash_XXXX (where XXXX is DM5, SHA1, or SHA256)</code>: If found, it compares the size stored in it to the hash code of <code>localFile</code>.</li>
* </ul>
* If any of these comparisons fail, the function returns <code>false</code>. otherwise it returns true.
* <p>
* <b>N.B.</b> None of the 3 common verification hashing algorithms are implement yet.
* @param localFile The file to validate
* @return <code>false</code> if any of the size or hash code metadata files exists but its contents does not match the expected value in the file, <code>true</code> otherwise.
* @since 7.0.0
*/
public static boolean validateFile(File localFile) {
File sizeFile = new File(localFile.getParentFile(), localFile.getName() + SIZE_EXT);
if(sizeFile.exists()) {
Scanner scanner = null;
try {
scanner = new Scanner(sizeFile);
long expectedSize = scanner.nextLong();
long actualLSize = localFile.length();
if (expectedSize != actualLSize) {
logger.warn("File [{}] size ({}) does not match expected size ({}).", localFile, actualLSize, expectedSize);
return false;
}
} catch (FileNotFoundException e) {
logger.warn("could not validate size of file [{}] because no size metadata file exists.", localFile);
} finally {
scanner.close();
}
}

File[] hashFiles = localFile.getParentFile().listFiles(new FilenameFilter() {
String hashPattern = String.format("%s%s_(%s|%s|%s)", localFile.getName(), HASH_EXT, Hash.MD5, Hash.SHA1, Hash.SHA256);
@Override
public boolean accept(File dir, String name) {
return name.matches(hashPattern);
}
});
if(hashFiles.length > 0) {
File hashFile = hashFiles[0];
String name = hashFile.getName();
String algo = name.substring(name.lastIndexOf('_') + 1);
switch (Hash.valueOf(algo)) {
case MD5:
case SHA1:
case SHA256:
throw new UnsupportedOperationException("Not yet implemented");
case UNKNOWN:
default: // No need. Already checked above
throw new IllegalArgumentException("Hashing algorithm not known: " + algo);
}
}

return true;
}

/**
* Converts path to Unix convention and adds a terminating slash if it was
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import static org.biojava.nbio.core.util.FileDownloadUtils.getFilePrefix;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URL;
import java.nio.file.Files;

import org.junit.jupiter.api.Nested;
Expand Down Expand Up @@ -190,4 +193,50 @@ void deleteFolderTree() throws IOException{
assertFalse(toDelete.exists());
}
}

@Nested
class CreateValidationFiles{

@Test
void testValidationFiles() throws IOException{
URL sourceUrl = new URL("https://ftp.wwpdb.org/pub/pdb/data/structures/divided/mmCIF/45/145d.cif.gz");
File destFile = new File(System.getProperty("java.io.tmpdir"), "145d.cif.gz");
File sizeFile = new File(destFile.getParentFile(), destFile.getName()+".size");
File hashFile = new File(destFile.getParentFile(), destFile.getName()+".hash_MD5");
System.out.println(destFile.getAbsolutePath());
destFile.delete();
sizeFile.delete();
hashFile.delete();
assertFalse(destFile.exists(), "couldn't delete dest file");
assertFalse(sizeFile.exists(), "couldn't delete size file");
assertFalse(hashFile.exists(), "couldn't delete hash file");

FileDownloadUtils.downloadFile(sourceUrl, destFile);
assertTrue(destFile.exists(), "couldn't create dest file");

assertTrue(FileDownloadUtils.validateFile(destFile), "file detected to be invalid although there are no validation files");

PrintStream temp1 = new PrintStream(sizeFile);
temp1.print(15); // some wrong size value
temp1.close();
assertFalse(FileDownloadUtils.validateFile(destFile), "file not detected to be invalid although size value is wrong.");
System.out.println("Just ignore the previous warning. It is expected.");

FileDownloadUtils.createValidationFiles(sourceUrl, destFile, null, FileDownloadUtils.Hash.UNKNOWN);
assertTrue(sizeFile.exists(), "couldn't create size file");
assertTrue(FileDownloadUtils.validateFile(destFile), "file not detected to be invalid although there is correct size validation file");

PrintStream temp2 = new PrintStream(hashFile);
temp2.print("ABCD"); // some wrong hash value
temp2.close();
//This is not yet implemented. I am using this test for documentation purpose.
assertThrows(UnsupportedOperationException.class,
() -> FileDownloadUtils.validateFile(destFile),
"file not detected to be invalid although hash value is wrong.");

destFile.delete();
sizeFile.delete();
hashFile.delete();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ public static EcodDatabase getEcodDatabase(String version) {
}
} catch (IOException e) {
// For parsing errors, just use the requested version
// TODO What about corrupted downloading errors?? Amr
logger.warn("Could not get Ecod version, or file is corrupted", e);
return null;
}
}
logger.trace("Releasing EcodFactory lock after getting version "+version);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ private boolean domainsAvailable() {
try {
File f = getDomainFile();

if (!f.exists() || f.length() <= 0 )
if (! (f.exists() && FileDownloadUtils.validateFile(f)))
return false;

// Re-download old copies of "latest"
Expand All @@ -395,8 +395,8 @@ private boolean domainsAvailable() {
}

/**
* Downloads the domains file, overwriting any existing file
* @throws IOException
* Downloads the domains file +/- its validation metadata, overwriting any existing file
* @throws IOException in cases of file I/O, including failure to download a healthy (non-corrupted) file.
*/
private void downloadDomains() throws IOException {
domainsFileLock.writeLock().lock();
Expand All @@ -406,7 +406,10 @@ private void downloadDomains() throws IOException {
File localFile = getDomainFile();

logger.info("Downloading {} to: {}",domainsURL, localFile);
FileDownloadUtils.createValidationFiles(domainsURL, localFile, null, FileDownloadUtils.Hash.UNKNOWN);
FileDownloadUtils.downloadFile(domainsURL, localFile);
if(! FileDownloadUtils.validateFile(localFile))
throw new IOException("Downloaded file invalid: "+ localFile);
} catch (MalformedURLException e) {
logger.error("Malformed url: "+ url + DOMAINS_PATH + getDomainFilename(),e);
} finally {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ public Structure getStructureById(PdbId pdbId) throws IOException {
* for direct parsing.
* @param pdbId
* @return
* @throws IOException
* @throws IOException in cases of file I/O, including failure to download a healthy (non-corrupted) file.
*/
protected InputStream getInputStream(PdbId pdbId) throws IOException{

Expand All @@ -373,6 +373,9 @@ protected InputStream getInputStream(PdbId pdbId) throws IOException{
throw new IOException("Structure "+pdbId+" not found and unable to download.");
}

if(! FileDownloadUtils.validateFile(file))
throw new IOException("Downloaded file invalid: "+file);

InputStreamProvider isp = new InputStreamProvider();

InputStream inputStream = isp.getInputStream(file);
Expand All @@ -385,7 +388,7 @@ protected InputStream getInputStream(PdbId pdbId) throws IOException{
*
* Used to pre-fetch large numbers of structures.
* @param pdbId
* @throws IOException
* @throws IOException in cases of file I/O, including failure to download a healthy (non-corrupted) file.
*/
public void prefetchStructure(String pdbId) throws IOException {

Expand All @@ -395,6 +398,8 @@ public void prefetchStructure(String pdbId) throws IOException {
if(!file.exists()) {
throw new IOException("Structure "+pdbId+" not found and unable to download.");
}
if(! FileDownloadUtils.validateFile(file))
throw new IOException("Downloaded file invalid: "+file);
}

/**
Expand Down Expand Up @@ -525,14 +530,14 @@ protected File downloadStructure(PdbId pdbId) throws IOException {
}

/**
* Download a file from the ftp server, replacing any existing files if needed
* Download a file from the ftp server +/- its validation metadata, replacing any existing files if needed
* @param pdbId PDB ID
* @param pathOnServer Path on the FTP server, e.g. data/structures/divided/pdb
* @param obsolete Whether or not file should be saved to the obsolete location locally
* @param existingFile if not null and checkServerFileDate is true, the last modified date of the
* server file and this file will be compared to decide whether to download or not
* @return
* @throws IOException
* @throws IOException in cases of file I/O, including failure to download a healthy (non-corrupted) file.
*/
private File downloadStructure(PdbId pdbId, String pathOnServer, boolean obsolete, File existingFile)
throws IOException{
Expand Down Expand Up @@ -576,7 +581,10 @@ private File downloadStructure(PdbId pdbId, String pathOnServer, boolean obsolet
logger.info("Fetching " + ftp);
logger.info("Writing to "+ realFile);

FileDownloadUtils.createValidationFiles(url, realFile, null, FileDownloadUtils.Hash.UNKNOWN);
FileDownloadUtils.downloadFile(url, realFile);
if(! FileDownloadUtils.validateFile(realFile))
throw new IOException("Downloaded file invalid: "+realFile);

return realFile;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,15 @@ public static List<SiftsEntity> getSiftsMapping(String pdbId) throws IOException
if ( ! dest.exists()){
String u = String.format(fileLoc,pdbId);
URL url = new URL(u);
logger.debug("Downloading SIFTS file {} validation metadata.",url);
FileDownloadUtils.createValidationFiles(url, dest, null, FileDownloadUtils.Hash.UNKNOWN);
logger.debug("Downloading SIFTS file {} to {}",url,dest);
FileDownloadUtils.downloadFile(url, dest);
}

if(! FileDownloadUtils.validateFile(dest))
throw new IOException("Downloaded file invalid: "+dest);

InputStreamProvider prov = new InputStreamProvider();
InputStream is = prov.getInputStream(dest);
SiftsXMLParser parser = new SiftsXMLParser();
Expand Down
Loading