Skip to content

Commit 2e1135d

Browse files
committed
Follow alternate document location
Implements https://www.w3.org/TR/json-ld11/#alternate-document-location. Resolves #289.
1 parent c867a0a commit 2e1135d

File tree

2 files changed

+63
-39
lines changed

2 files changed

+63
-39
lines changed

core/src/main/java/com/github/jsonldjava/utils/JsonUtils.java

Lines changed: 55 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@
99
import java.io.StringWriter;
1010
import java.io.Writer;
1111
import java.net.HttpURLConnection;
12+
import java.net.MalformedURLException;
1213
import java.nio.charset.Charset;
1314
import java.nio.charset.StandardCharsets;
15+
import java.net.URL;
1416
import java.util.List;
1517
import java.util.Map;
1618

1719
import org.apache.commons.io.ByteOrderMark;
1820
import org.apache.commons.io.IOUtils;
1921
import org.apache.commons.io.input.BOMInputStream;
22+
import org.apache.http.Header;
2023
import org.apache.http.client.methods.CloseableHttpResponse;
2124
import org.apache.http.client.methods.HttpGet;
2225
import org.apache.http.client.methods.HttpUriRequest;
@@ -344,18 +347,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
344347
// Accept headers as it's likely to be file: or jar:
345348
in = url.openStream();
346349
} else {
347-
final HttpUriRequest request = new HttpGet(url.toExternalForm());
348-
// We prefer application/ld+json, but fallback to
349-
// application/json
350-
// or whatever is available
351-
request.addHeader("Accept", ACCEPT_HEADER);
352-
353-
response = httpClient.execute(request);
354-
final int status = response.getStatusLine().getStatusCode();
355-
if (status != 200 && status != 203) {
356-
throw new IOException("Can't retrieve " + url + ", status code: " + status);
357-
}
358-
in = response.getEntity().getContent();
350+
in = getJsonLdViaHttpUri(url, httpClient, response);
359351
}
360352
return fromInputStream(in);
361353
} finally {
@@ -371,6 +363,56 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
371363
}
372364
}
373365

366+
private static InputStream getJsonLdViaHttpUri(final URL url, final CloseableHttpClient httpClient,
367+
CloseableHttpResponse response) throws IOException {
368+
final HttpUriRequest request = new HttpGet(url.toExternalForm());
369+
// We prefer application/ld+json, but fallback to application/json
370+
// or whatever is available
371+
request.addHeader("Accept", ACCEPT_HEADER);
372+
response = httpClient.execute(request);
373+
374+
final int status = response.getStatusLine().getStatusCode();
375+
if (status != 200 && status != 203) {
376+
throw new IOException("Can't retrieve " + url + ", status code: " + status);
377+
}
378+
// follow alternate document location
379+
// https://www.w3.org/TR/json-ld11/#alternate-document-location
380+
URL alternateLink = alternateLink(url, response);
381+
if (alternateLink != null) {
382+
return getJsonLdViaHttpUri(alternateLink, httpClient, response);
383+
}
384+
return response.getEntity().getContent();
385+
}
386+
387+
private static URL alternateLink(URL url, CloseableHttpResponse response)
388+
throws MalformedURLException, IOException {
389+
if (response.getEntity().getContentLength() > 0
390+
&& !response.getEntity().getContentType().getValue().equals("application/ld+json")) {
391+
for (Header header : response.getAllHeaders()) {
392+
if (header.getName().equalsIgnoreCase("link")) {
393+
String alternateLink = "";
394+
boolean relAlternate = false;
395+
boolean jsonld = false;
396+
for (String value : header.getValue().split(";")) {
397+
if (value.trim().startsWith("<")) {
398+
alternateLink = value.replaceAll("<(.*)>", "$1");
399+
}
400+
if (value.trim().startsWith("type=\"application/ld+json\"")) {
401+
jsonld = true;
402+
}
403+
if (value.trim().startsWith("rel=\"alternate\"")) {
404+
relAlternate = true;
405+
}
406+
}
407+
if (jsonld && relAlternate && !alternateLink.isEmpty()) {
408+
return new URL(url.getProtocol() + "://" + url.getAuthority() + alternateLink);
409+
}
410+
}
411+
}
412+
}
413+
return null;
414+
}
415+
374416
/**
375417
* Fallback method directly using the {@link java.net.HttpURLConnection}
376418
* class for cases where servers do not interoperate correctly with Apache
@@ -384,7 +426,7 @@ public static Object fromURL(java.net.URL url, CloseableHttpClient httpClient)
384426
* @throws IOException
385427
* If there was an IO error during parsing.
386428
*/
387-
public static Object fromURLJavaNet(java.net.URL url) throws JsonParseException, IOException {
429+
public static Object fromURLJavaNet(URL url) throws JsonParseException, IOException {
388430
final HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
389431
urlConn.addRequestProperty("Accept", ACCEPT_HEADER);
390432

core/src/test/java/com/github/jsonldjava/core/MinimalSchemaOrgRegressionTest.java

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.junit.Test;
2727

2828
import com.github.jsonldjava.utils.JarCacheStorage;
29+
import com.github.jsonldjava.utils.JsonUtils;
2930

3031
public class MinimalSchemaOrgRegressionTest {
3132

@@ -59,10 +60,13 @@ private void verifyInputStream(InputStream directStream) throws IOException {
5960
output.flush();
6061
}
6162
final String outputString = output.toString();
62-
// System.out.println(outputString);
63+
checkBasicConditions(outputString);
64+
}
65+
66+
private void checkBasicConditions(final String outputString) {
6367
// Test for some basic conditions without including the JSON/JSON-LD
6468
// parsing code here
65-
// assertTrue(outputString, outputString.endsWith("}"));
69+
assertTrue(outputString, outputString.endsWith("}"));
6670
assertFalse("Output string should not be empty: " + outputString.length(),
6771
outputString.isEmpty());
6872
assertTrue("Unexpected length: " + outputString.length(), outputString.length() > 100000);
@@ -90,30 +94,8 @@ public void testApacheHttpClient() throws Exception {
9094
// use system defaults for proxy etc.
9195
.useSystemProperties().build();
9296

93-
try {
94-
final HttpUriRequest request = new HttpGet(url.toExternalForm());
95-
// We prefer application/ld+json, but fallback to application/json
96-
// or whatever is available
97-
request.addHeader("Accept", ACCEPT_HEADER);
98-
99-
final CloseableHttpResponse response = httpClient.execute(request);
100-
try {
101-
final int status = response.getStatusLine().getStatusCode();
102-
if (status != 200 && status != 203) {
103-
throw new IOException("Can't retrieve " + url + ", status code: " + status);
104-
}
105-
final InputStream content = response.getEntity().getContent();
106-
verifyInputStream(content);
107-
} finally {
108-
if (response != null) {
109-
response.close();
110-
}
111-
}
112-
} finally {
113-
if (httpClient != null) {
114-
httpClient.close();
115-
}
116-
}
97+
Object content = JsonUtils.fromURL(url, httpClient);
98+
checkBasicConditions(content.toString());
11799
}
118100

119101
}

0 commit comments

Comments
 (0)