Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 81 additions & 41 deletions core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,28 @@
import java.io.StringWriter;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.jsonldjava.core.DocumentLoader;
import com.github.jsonldjava.core.JsonLdApi;
import com.github.jsonldjava.core.JsonLdProcessor;

import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.http.Header;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
Expand All @@ -28,17 +42,8 @@
import org.apache.http.impl.client.cache.BasicHttpCacheStorage;
import org.apache.http.impl.client.cache.CacheConfig;
import org.apache.http.impl.client.cache.CachingHttpClientBuilder;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.jsonldjava.core.DocumentLoader;
import com.github.jsonldjava.core.JsonLdApi;
import com.github.jsonldjava.core.JsonLdProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Functions used to make loading, parsing, and serializing JSON easy using
Expand Down Expand Up @@ -66,6 +71,8 @@ public class JsonUtils {
private static final JsonFactory JSON_FACTORY = new JsonFactory(JSON_MAPPER);

private static volatile CloseableHttpClient DEFAULT_HTTP_CLIENT;
// Avoid possible endless loop when following alternate locations
private static final int MAX_LINKS_FOLLOW = 20;

static {
// Disable default Jackson behaviour to close
Expand Down Expand Up @@ -109,6 +116,10 @@ public static Object fromInputStream(InputStream input) throws IOException {
}
}
return fromInputStream(bOMInputStream, charset);
} finally {
if (input != null) {
input.close();
}
}
}

Expand Down Expand Up @@ -335,40 +346,69 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
final String protocol = url.getProtocol();
// We can only use the Apache HTTPClient for HTTP/HTTPS, so use the
// native java client for the others
CloseableHttpResponse response = null;
InputStream in = null;
try {
if (!protocol.equalsIgnoreCase("http") && !protocol.equalsIgnoreCase("https")) {
// Can't use the HTTP client for those!
// Fallback to Java's built-in JsonLdUrl handler. No need for
// Accept headers as it's likely to be file: or jar:
in = url.openStream();
} else {
final HttpUriRequest request = new HttpGet(url.toExternalForm());
// We prefer application/ld+json, but fallback to
// application/json
// or whatever is available
request.addHeader("Accept", ACCEPT_HEADER);

response = httpClient.execute(request);
final int status = response.getStatusLine().getStatusCode();
if (status != 200 && status != 203) {
throw new IOException("Can't retrieve " + url + ", status code: " + status);
}
in = response.getEntity().getContent();
if (!protocol.equalsIgnoreCase("http") && !protocol.equalsIgnoreCase("https")) {
// Can't use the HTTP client for those!
// Fallback to Java's built-in JsonLdUrl handler. No need for
// Accept headers as it's likely to be file: or jar:
return fromInputStream(url.openStream());
} else {
return fromJsonLdViaHttpUri(url, httpClient, 0);
}
}

private static Object fromJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient, int linksFollowed)
throws IOException {
final HttpUriRequest request = new HttpGet(url.toExternalForm());
// We prefer application/ld+json, but fallback to application/json
// or whatever is available
request.addHeader("Accept", ACCEPT_HEADER);
try (CloseableHttpResponse response = httpClient.execute(request)) {
final int status = response.getStatusLine().getStatusCode();
if (status != 200 && status != 203) {
throw new IOException("Can't retrieve " + url + ", status code: " + status);
}
return fromInputStream(in);
} finally {
try {
if (in != null) {
in.close();
// follow alternate document location
// https://www.w3.org/TR/json-ld11/#alternate-document-location
URL alternateLink = alternateLink(url, response);
if (alternateLink != null) {
linksFollowed++;
if (linksFollowed > MAX_LINKS_FOLLOW) {
throw new IOException("Too many alternate links followed. This may indicate a cycle. Aborting.");
}
} finally {
if (response != null) {
response.close();
return fromJsonLdViaHttpUri(alternateLink, httpClient, linksFollowed);
}
return fromInputStream(response.getEntity().getContent());
}
}

private static URL alternateLink(URL url, CloseableHttpResponse response)
throws MalformedURLException {
if (response.getEntity().getContentType() != null
&& !response.getEntity().getContentType().getValue().equals("application/ld+json")) {
for (Header header : response.getAllHeaders()) {
if (header.getName().equalsIgnoreCase("link")) {
String alternateLink = "";
boolean relAlternate = false;
boolean jsonld = false;
for (String value : header.getValue().split(";")) {
value=value.trim();
if (value.startsWith("<") && value.endsWith(">")) {
alternateLink = value.substring(1, value.length() - 1);
}
if (value.startsWith("type=\"application/ld+json\"")) {
jsonld = true;
}
if (value.startsWith("rel=\"alternate\"")) {
relAlternate = true;
}
}
if (jsonld && relAlternate && !alternateLink.isEmpty()) {
return new URL(url.getProtocol() + "://" + url.getAuthority() + alternateLink);
}
}
}
}
return null;
}

/**
Expand All @@ -384,7 +424,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
* @throws IOException
* If there was an IO error during parsing.
*/
public static Object fromURLJavaNet(java.net.URL url) throws JsonParseException, IOException {
public static Object fromURLJavaNet(URL url) throws JsonParseException, IOException {
final HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
urlConn.addRequestProperty("Accept", ACCEPT_HEADER);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,119 +1,61 @@
package com.github.jsonldjava.core;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;

import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import com.github.jsonldjava.utils.JarCacheStorage;
import com.github.jsonldjava.utils.JsonUtils;

import org.apache.http.client.protocol.RequestAcceptEncoding;
import org.apache.http.client.protocol.ResponseContentEncoding;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.cache.BasicHttpCacheStorage;
import org.apache.http.impl.client.cache.CacheConfig;
import org.apache.http.impl.client.cache.CachingHttpClientBuilder;
import org.junit.Ignore;
import org.junit.Test;

import com.github.jsonldjava.utils.JarCacheStorage;

public class MinimalSchemaOrgRegressionTest {

private static final String ACCEPT_HEADER = "application/ld+json, application/json;q=0.9, application/javascript;q=0.5, text/javascript;q=0.5, text/plain;q=0.2, */*;q=0.1";

@Ignore("Java API does not have any way of redirecting automatically from HTTP to HTTPS, which breaks schema.org usage with it")
/**
* Tests getting JSON from schema.org with the HTTP Accept header set to
* {@value com.github.jsonldjava.utils.JsonUtils#ACCEPT_HEADER}? .
*/
@Test
public void testHttpURLConnection() throws Exception {
public void testApacheHttpClient() throws Exception {
final URL url = new URL("http://schema.org/");
final boolean followRedirectsSetting = HttpURLConnection.getFollowRedirects();
try {
HttpURLConnection.setFollowRedirects(true);
final HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
urlConn.setInstanceFollowRedirects(true);
urlConn.addRequestProperty("Accept", ACCEPT_HEADER);

final InputStream directStream = urlConn.getInputStream();
verifyInputStream(directStream);
} finally {
HttpURLConnection.setFollowRedirects(followRedirectsSetting);
}
// Common CacheConfig for both the JarCacheStorage and the underlying
// BasicHttpCacheStorage
final CacheConfig cacheConfig = CacheConfig.custom().setMaxCacheEntries(1000)
.setMaxObjectSize(1024 * 128).build();

final CloseableHttpClient httpClient = CachingHttpClientBuilder.create()
// allow caching
.setCacheConfig(cacheConfig)
// Wrap the local JarCacheStorage around a BasicHttpCacheStorage
.setHttpCacheStorage(new JarCacheStorage(null, cacheConfig,
new BasicHttpCacheStorage(cacheConfig)))
// Support compressed data
// http://hc.apache.org/httpcomponents-client-ga/tutorial/html/httpagent.html#d5e1238
.addInterceptorFirst(new RequestAcceptEncoding())
.addInterceptorFirst(new ResponseContentEncoding())
.setRedirectStrategy(DefaultRedirectStrategy.INSTANCE)
// use system defaults for proxy etc.
.useSystemProperties().build();

Object content = JsonUtils.fromURL(url, httpClient);
checkBasicConditions(content.toString());
}

private void verifyInputStream(InputStream directStream) throws IOException {
assertNotNull("InputStream was null", directStream);
final StringWriter output = new StringWriter();
try {
IOUtils.copy(directStream, output, StandardCharsets.UTF_8);
} finally {
directStream.close();
output.flush();
}
final String outputString = output.toString();
// System.out.println(outputString);
private void checkBasicConditions(final String outputString) {
// Test for some basic conditions without including the JSON/JSON-LD
// parsing code here
// assertTrue(outputString, outputString.endsWith("}"));
assertTrue(outputString, outputString.endsWith("}"));
assertFalse("Output string should not be empty: " + outputString.length(),
outputString.isEmpty());
assertTrue("Unexpected length: " + outputString.length(), outputString.length() > 100000);
}

@Test
public void testApacheHttpClient() throws Exception {
final URL url = new URL("http://schema.org/");
// Common CacheConfig for both the JarCacheStorage and the underlying
// BasicHttpCacheStorage
final CacheConfig cacheConfig = CacheConfig.custom().setMaxCacheEntries(1000)
.setMaxObjectSize(1024 * 128).build();

final CloseableHttpClient httpClient = CachingHttpClientBuilder.create()
// allow caching
.setCacheConfig(cacheConfig)
// Wrap the local JarCacheStorage around a BasicHttpCacheStorage
.setHttpCacheStorage(new JarCacheStorage(null, cacheConfig,
new BasicHttpCacheStorage(cacheConfig)))
// Support compressed data
// http://hc.apache.org/httpcomponents-client-ga/tutorial/html/httpagent.html#d5e1238
.addInterceptorFirst(new RequestAcceptEncoding())
.addInterceptorFirst(new ResponseContentEncoding())
.setRedirectStrategy(DefaultRedirectStrategy.INSTANCE)
// use system defaults for proxy etc.
.useSystemProperties().build();

try {
final HttpUriRequest request = new HttpGet(url.toExternalForm());
// We prefer application/ld+json, but fallback to application/json
// or whatever is available
request.addHeader("Accept", ACCEPT_HEADER);

final CloseableHttpResponse response = httpClient.execute(request);
try {
final int status = response.getStatusLine().getStatusCode();
if (status != 200 && status != 203) {
throw new IOException("Can't retrieve " + url + ", status code: " + status);
}
final InputStream content = response.getEntity().getContent();
verifyInputStream(content);
} finally {
if (response != null) {
response.close();
}
}
} finally {
if (httpClient != null) {
httpClient.close();
}
}
}


}