let OAIClient do all the nasty work for listing records, remove netty-http client, use JDK HTTP client

This commit is contained in:
Jörg Prante 2021-02-03 11:57:25 +01:00
parent b9df4d8199
commit e66af3ab74
23 changed files with 303 additions and 415 deletions

View file

@ -25,11 +25,6 @@ ext {
subprojects {
apply plugin: 'java-library'
dependencies {
testImplementation "org.xbib:bibliographic-character-sets:${project.property('xbib-bibliographic-character-sets.version')}"
}
apply from: rootProject.file('gradle/ide/idea.gradle')
apply from: rootProject.file('gradle/compile/java.gradle')
apply from: rootProject.file('gradle/test/junit5.gradle')

View file

@ -1,10 +1,8 @@
group = org.xbib
name = oai
version = 2.4.1
version = 2.5.0
gradle.wrapper.version = 6.6.1
xbib-content.version = 2.6.2
xbib-netty-http.version = 4.1.58.0
xbib-marc.version = 2.4.0
xbib-bibliographic-character-sets.version = 2.0.0
tcnative.version = 2.0.36.Final

View file

@ -1,7 +1,5 @@
dependencies {
api project(':oai-common')
api "org.xbib:netty-http-client:${project.property('xbib-netty-http.version')}"
implementation "io.netty:netty-tcnative-boringssl-static:${project.property('tcnative.version')}"
testImplementation "org.xbib:marc:${project.property('xbib-marc.version')}"
testImplementation "org.xbib:bibliographic-character-sets:${project.property('xbib-bibliographic-character-sets.version')}"
}

View file

@ -3,11 +3,13 @@ module org.xbib.oai.client {
exports org.xbib.oai.client.getrecord;
exports org.xbib.oai.client.identify;
exports org.xbib.oai.client.listidentifiers;
exports org.xbib.oai.client.listmetadataformats;
exports org.xbib.oai.client.listrecords;
exports org.xbib.oai.client.listsets;
requires org.xbib.oai;
requires org.xbib.net.url;
requires org.xbib.netty.http.common;
requires org.xbib.content.xml;
requires java.xml;
requires java.logging;
requires java.net.http;
}

View file

@ -1,19 +1,20 @@
package org.xbib.oai.client;
import org.xbib.net.URL;
import org.xbib.oai.OAIConstants;
import org.xbib.oai.OAIRequest;
import org.xbib.oai.util.ResumptionToken;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Client OAI request.
*/
public abstract class AbstractOAIRequest implements OAIRequest {
private final URL.Builder urlBuilder;
private final Map<String, String> params;
private DateTimeFormatter dateTimeFormatter;
@ -29,24 +30,20 @@ public abstract class AbstractOAIRequest implements OAIRequest {
private boolean retry;
protected AbstractOAIRequest(URL url) {
this.urlBuilder = URL.builder()
.scheme(url.getScheme())
.host(url.getHost())
.port(url.getPort())
.path(url.getPath());
}
public URL getURL() {
return urlBuilder.build();
protected AbstractOAIRequest() {
this.params = new LinkedHashMap<>();
}
protected void addParameter(String name, String value) {
if (value != null && !value.isEmpty()) {
urlBuilder.queryParam(name, value).build();
params.put(name, value);
}
}
public Map<String, String> getParams() {
return params;
}
@Override
public void setSet(String set) {
this.set = set;

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.OAIResponse;
import org.xbib.oai.exceptions.OAIException;
@ -11,5 +10,5 @@ import java.io.Writer;
*/
public abstract class AbstractOAIResponse implements OAIResponse {
public abstract void receivedResponse(HttpResponse message, Writer writer) throws OAIException;
public abstract void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException;
}

View file

@ -3,25 +3,47 @@ package org.xbib.oai.client;
import org.xbib.net.URL;
import org.xbib.oai.client.getrecord.GetRecordRequest;
import org.xbib.oai.client.identify.IdentifyRequest;
import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.client.listidentifiers.ListIdentifiersRequest;
import org.xbib.oai.client.listmetadataformats.ListMetadataFormatsRequest;
import org.xbib.oai.client.listrecords.ListRecordsRequest;
import org.xbib.oai.client.listrecords.ListRecordsResponse;
import org.xbib.oai.client.listsets.ListSetsRequest;
import org.xbib.oai.util.ResumptionToken;
import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* OAI client.
*/
public class OAIClient implements AutoCloseable {
public class OAIClient {
private final URL url;
private static final Logger logger = Logger.getLogger(OAIClient.class.getName());
public OAIClient(URL url) {
this.url = url;
}
private final String baseURL;
public URL getURL() {
return url;
private final HttpClient httpClient;
public OAIClient(String baseURL) {
this.baseURL = baseURL;
this.httpClient = HttpClient.newBuilder()
.followRedirects(HttpClient.Redirect.ALWAYS)
.build();
}
/**
@ -29,10 +51,25 @@ public class OAIClient implements AutoCloseable {
* Some of the information returned is required as part of the OAI-PMH.
* Repositories may also employ the Identify verb to return additional
* descriptive information.
* @return identify request
* @return identify response
*/
public IdentifyRequest newIdentifyRequest() {
return new IdentifyRequest(url);
public IdentifyResponse identify() throws IOException, InterruptedException {
IdentifyRequest identifyRequest = new IdentifyRequest();
IdentifyResponse identifyResponse = new IdentifyResponse();
URL.Builder url = URL.from(baseURL).mutator();
identifyRequest.getParams().forEach(url::queryParam);
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(URI.create(url.build().toExternalForm()))
.header("accept", "utf-8")
.GET()
.build();
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
int status = httpResponse.statusCode();
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
StringWriter sw = new StringWriter();
identifyResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, sw);
return identifyResponse;
}
/**
@ -42,7 +79,7 @@ public class OAIClient implements AutoCloseable {
* @return list metadata formats request
*/
public ListMetadataFormatsRequest newListMetadataFormatsRequest() {
return new ListMetadataFormatsRequest(url);
return new ListMetadataFormatsRequest();
}
/**
@ -51,7 +88,7 @@ public class OAIClient implements AutoCloseable {
* @return list sets request
*/
public ListSetsRequest newListSetsRequest() {
return new ListSetsRequest(url);
return new ListSetsRequest();
}
/**
@ -65,7 +102,7 @@ public class OAIClient implements AutoCloseable {
*
*/
public ListIdentifiersRequest newListIdentifiersRequest() {
return new ListIdentifiersRequest(url);
return new ListIdentifiersRequest();
}
/**
@ -80,7 +117,7 @@ public class OAIClient implements AutoCloseable {
* @return get record request
*/
public GetRecordRequest newGetRecordRequest() {
return new GetRecordRequest(url);
return new GetRecordRequest();
}
/**
@ -91,10 +128,109 @@ public class OAIClient implements AutoCloseable {
* attribute of "deleted" if a record matching the arguments
* specified in the request has been deleted. No metadata
* will be present for records with deleted status.
* @return list records request
*/
public ListRecordsRequest newListRecordsRequest() {
return new ListRecordsRequest(url);
public void listRecords(String metadataPrefix,
String set,
DateTimeFormatter dateTimeFormatter,
Instant from,
Instant until,
Writer writer,
SimpleMetadataHandler handler) {
ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
if (metadataPrefix != null) {
listRecordsRequest.setMetadataPrefix(metadataPrefix);
}
if (set != null) {
listRecordsRequest.setSet(set);
}
if (dateTimeFormatter != null) {
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
}
if (from != null) {
listRecordsRequest.setFrom(from);
}
if (until != null) {
listRecordsRequest.setUntil(until);
}
while (listRecordsRequest != null) {
try {
if (handler != null) {
listRecordsRequest.addHandler(handler);
}
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
URL.Builder url = URL.from(baseURL).mutator();
listRecordsRequest.getParams().forEach(url::queryParam);
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(URI.create(url.build().toExternalForm()))
.header("accept", "utf-8")
.GET()
.build();
logger.log(Level.INFO,"sending " + httpRequest);
HttpResponse<String> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
int status = httpResponse.statusCode();
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
listRecordsResponse.receivedResponse(httpResponse.body(), status, contentType, retryAfter, writer);
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
" resumption-token = " + listRecordsResponse.getResumptionToken());
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null;
}
}
}
public void listRecords(String metadataPrefix,
String set,
DateTimeFormatter dateTimeFormatter,
Instant from,
Instant until,
Consumer<InputStream> consumer) {
ListRecordsRequest listRecordsRequest = new ListRecordsRequest();
if (metadataPrefix != null) {
listRecordsRequest.setMetadataPrefix(metadataPrefix);
}
if (set != null) {
listRecordsRequest.setSet(set);
}
if (dateTimeFormatter != null) {
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
}
if (from != null) {
listRecordsRequest.setFrom(from);
}
if (until != null) {
listRecordsRequest.setUntil(until);
}
while (listRecordsRequest != null) {
try {
StringWriter sw = new StringWriter();
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
URL.Builder url = URL.from(baseURL).mutator();
listRecordsRequest.getParams().forEach(url::queryParam);
HttpRequest httpRequest = HttpRequest.newBuilder()
.uri(URI.create(url.build().toExternalForm()))
.header("accept", "utf-8")
.GET()
.build();
logger.log(Level.INFO,"sending " + httpRequest);
HttpResponse<byte[]> httpResponse = httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofByteArray());
int status = httpResponse.statusCode();
String contentType = httpResponse.headers().firstValue("content-type").orElse(null);
String retryAfter = httpResponse.headers().firstValue("retry-after").orElse(null);
listRecordsResponse.receivedResponse(new String(httpResponse.body(), StandardCharsets.UTF_8), status, contentType, retryAfter, sw);
if (consumer != null) {
consumer.accept(new ByteArrayInputStream(httpResponse.body()));
}
logger.log(Level.FINE, "response headers = " + httpResponse.headers() +
" resumption-token = " + listRecordsResponse.getResumptionToken());
listRecordsRequest = resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null;
}
}
}
public IdentifyRequest resume(IdentifyRequest request, ResumptionToken<?> token) {
@ -105,7 +241,7 @@ public class OAIClient implements AutoCloseable {
if (token == null) {
return null;
}
IdentifyRequest nextRequest = newIdentifyRequest();
IdentifyRequest nextRequest = new IdentifyRequest();
nextRequest.setResumptionToken(token);
return nextRequest;
}
@ -118,7 +254,7 @@ public class OAIClient implements AutoCloseable {
if (token == null) {
return null;
}
ListRecordsRequest nextRequest = newListRecordsRequest();
ListRecordsRequest nextRequest = new ListRecordsRequest();
nextRequest.setResumptionToken(token);
return nextRequest;
}
@ -175,8 +311,4 @@ public class OAIClient implements AutoCloseable {
return nextRequest;
}
@Override
public void close() {
// nothing to close
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.getrecord;
import org.xbib.net.URL;
import org.xbib.oai.client.AbstractOAIRequest;
/**
@ -8,8 +7,8 @@ import org.xbib.oai.client.AbstractOAIRequest;
*/
public class GetRecordRequest extends AbstractOAIRequest {
public GetRecordRequest(URL url) {
super(url);
public GetRecordRequest() {
super();
addParameter(VERB_PARAMETER, GET_RECORD);
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.getrecord;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.client.AbstractOAIResponse;
import java.io.Writer;
@ -11,7 +10,7 @@ import java.io.Writer;
public class GetRecordResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(HttpResponse message, Writer writer) {
public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) {
// not implemented yet
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.identify;
import org.xbib.net.URL;
import org.xbib.oai.client.AbstractOAIRequest;
/**
@ -8,8 +7,8 @@ import org.xbib.oai.client.AbstractOAIRequest;
*/
public class IdentifyRequest extends AbstractOAIRequest {
public IdentifyRequest(URL url) {
super(url);
public IdentifyRequest() {
super();
addParameter(VERB_PARAMETER, IDENTIFY);
}
}

View file

@ -3,7 +3,6 @@ package org.xbib.oai.client.identify;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.client.AbstractOAIResponse;
import org.xbib.oai.exceptions.OAIException;
import org.xml.sax.InputSource;
@ -13,7 +12,6 @@ import java.io.IOException;
import java.io.StringReader;
import java.io.Writer;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@ -32,7 +30,7 @@ public class IdentifyResponse extends AbstractOAIResponse {
private String protocolVersion;
private List<String> adminEmails = new ArrayList<>();
private final List<String> adminEmails = new ArrayList<>();
private Date earliestDatestamp;
@ -43,11 +41,11 @@ public class IdentifyResponse extends AbstractOAIResponse {
private String compression;
@Override
public void receivedResponse(HttpResponse message, Writer writer) {
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) {
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(message.getBodyAsString(StandardCharsets.UTF_8)));
InputSource is = new InputSource(new StringReader(message));
Document doc = db.parse(is);
setGranularity(getString("granularity", doc.getDocumentElement()));
} catch (ParserConfigurationException | SAXException | IOException e) {

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listidentifiers;
import org.xbib.net.URL;
import org.xbib.oai.client.AbstractOAIRequest;
/**
@ -8,8 +7,8 @@ import org.xbib.oai.client.AbstractOAIRequest;
*/
public class ListIdentifiersRequest extends AbstractOAIRequest {
public ListIdentifiersRequest(URL url) {
super(url);
public ListIdentifiersRequest() {
super();
addParameter(VERB_PARAMETER, LIST_IDENTIFIERS);
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listidentifiers;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.client.AbstractOAIResponse;
import org.xbib.oai.exceptions.OAIException;
@ -12,7 +11,7 @@ import java.io.Writer;
public class ListIdentifiersResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(HttpResponse message, Writer writer) throws OAIException {
public void receivedResponse(String message, int statusCode, String contentTyep, String retryAfter, Writer writer) throws OAIException {
// not implemented yet
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listmetadataformats;
import org.xbib.net.URL;
import org.xbib.oai.client.AbstractOAIRequest;
/**
@ -8,8 +7,8 @@ import org.xbib.oai.client.AbstractOAIRequest;
*/
public class ListMetadataFormatsRequest extends AbstractOAIRequest {
public ListMetadataFormatsRequest(URL url) {
super(url);
public ListMetadataFormatsRequest() {
super();
addParameter(VERB_PARAMETER, LIST_METADATA_FORMATS);
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listmetadataformats;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.client.AbstractOAIResponse;
import org.xbib.oai.exceptions.OAIException;
@ -12,7 +11,7 @@ import java.io.Writer;
public class ListMetadataFormatsResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(HttpResponse message, Writer writer) throws OAIException {
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException {
// not implemented yet
}
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listrecords;
import org.xbib.net.URL;
import org.xbib.oai.OAIConstants;
import org.xbib.oai.client.AbstractOAIRequest;
import org.xbib.oai.xml.MetadataHandler;
@ -15,8 +14,8 @@ public class ListRecordsRequest extends AbstractOAIRequest {
private List<MetadataHandler> handlers = new LinkedList<>();
public ListRecordsRequest(URL url) {
super(url);
public ListRecordsRequest() {
super();
addParameter(OAIConstants.VERB_PARAMETER, LIST_RECORDS);
}
public ListRecordsRequest addHandler(MetadataHandler handler) {

View file

@ -2,7 +2,6 @@ package org.xbib.oai.client.listrecords;
import org.xbib.content.xml.transform.TransformerURIResolver;
import org.xbib.content.xml.util.XMLUtil;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.client.AbstractOAIResponse;
import org.xbib.oai.exceptions.BadVerbException;
import org.xbib.oai.exceptions.BadArgumentException;
@ -14,7 +13,6 @@ import org.xml.sax.InputSource;
import java.io.StringReader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
@ -71,17 +69,10 @@ public class ListRecordsResponse extends AbstractOAIResponse {
}
@Override
public void receivedResponse(HttpResponse message, Writer writer) throws OAIException {
String content = message.getBodyAsString(StandardCharsets.UTF_8);
int status = message.getStatus().getCode();
public void receivedResponse(String message, int status, String contentType, String retryAfter, Writer writer) throws OAIException {
if (status == 503) {
long secs = retryAfterMillis / 1000;
if (message.getHeaders() != null) {
for (String retryAfterHeader : RETRY_AFTER_HEADERS) {
String retryAfter = message.getHeaders().getHeader(retryAfterHeader);
if (retryAfter == null) {
continue;
}
if (retryAfter != null) {
secs = Long.parseLong(retryAfter);
if (!isDigits(retryAfter)) {
// parse RFC date, e.g. Fri, 31 Dec 1999 23:59:59 GMT
@ -89,7 +80,6 @@ public class ListRecordsResponse extends AbstractOAIResponse {
secs = ChronoUnit.SECONDS.between(instant, Instant.now());
}
}
}
request.setRetry(true);
try {
if (secs > 0L) {
@ -108,19 +98,20 @@ public class ListRecordsResponse extends AbstractOAIResponse {
}
}
if (status != 200) {
throw new OAIException("status = " + status + " response = " + content);
throw new OAIException("status = " + status + " response = " + message);
}
// activate XSLT only if OAI XML content type is returned
String contentType = message.getHeaders().getHeader("content-type");
if (contentType != null && !contentType.startsWith("text/xml")) {
throw new OAIException("no XML content type in response: " + contentType);
}
// the filterreader allows access to the resumption token
this.filterreader = new ListRecordsFilterReader(request, this);
if (message != null) {
try {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
transformerFactory.setURIResolver(new TransformerURIResolver("xsl"));
Transformer transformer = transformerFactory.newTransformer();
Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(content))));
Source source = new SAXSource(filterreader, new InputSource(new StringReader(XMLUtil.sanitize(message))));
StreamResult streamResult = new StreamResult(writer);
transformer.transform(source, streamResult);
if ("noRecordsMatch".equals(error)) {
@ -141,6 +132,7 @@ public class ListRecordsResponse extends AbstractOAIResponse {
throw new OAIException(t);
}
}
}
private boolean isDigits(String str) {
for (int i = 0; i < str.length(); i++) {

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listsets;
import org.xbib.net.URL;
import org.xbib.oai.client.AbstractOAIRequest;
/**
@ -8,8 +7,8 @@ import org.xbib.oai.client.AbstractOAIRequest;
*/
public class ListSetsRequest extends AbstractOAIRequest {
public ListSetsRequest(URL url) {
super(url);
public ListSetsRequest() {
super();
addParameter(VERB_PARAMETER, LIST_SETS);
}

View file

@ -1,6 +1,5 @@
package org.xbib.oai.client.listsets;
import org.xbib.netty.http.common.HttpResponse;
import org.xbib.oai.client.AbstractOAIResponse;
import org.xbib.oai.exceptions.OAIException;
@ -12,7 +11,7 @@ import java.io.Writer;
public class ListSetsResponse extends AbstractOAIResponse {
@Override
public void receivedResponse(HttpResponse message, Writer writer) throws OAIException {
public void receivedResponse(String message, int statusCode, String contentType, String retryAfter, Writer writer) throws OAIException {
// not implemented yet
}
}

View file

@ -1,22 +1,13 @@
package org.xbib.oai.client;
import io.netty.handler.codec.http.HttpHeaderNames;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import org.xbib.net.URL;
import org.xbib.netty.http.client.Client;
import org.xbib.netty.http.client.api.Request;
import org.xbib.oai.client.identify.IdentifyRequest;
import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.client.listrecords.ListRecordsRequest;
import org.xbib.oai.client.listrecords.ListRecordsResponse;
import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
@ -24,34 +15,14 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
*/
class ArxivClientTest {
private static final Logger logger = Logger.getLogger(ArxivClientTest.class.getName());
@Test
void testListRecordsArxiv() {
final URL url = URL.create("http://export.arxiv.org/oai2/");
try (Client httpClient = Client.builder()
.setConnectTimeoutMillis(60 * 1000)
.setReadTimeoutMillis(60 * 1000)
.build();
OAIClient client = new OAIClient(url)) {
IdentifyRequest identifyRequest = client.newIdentifyRequest();
IdentifyResponse identifyResponse = new IdentifyResponse();
Request request = Request.get()
.url(identifyRequest.getURL())
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setResponseListener(resp -> {
logger.log(Level.INFO,
" body = " + resp.getBodyAsString(StandardCharsets.UTF_8));
StringWriter sw = new StringWriter();
identifyResponse.receivedResponse(resp, sw);
})
.build();
httpClient.execute(request).get();
void testListRecordsArxivWithJdkClient() throws Exception {
OAIClient oaiClient = new OAIClient("http://export.arxiv.org/oai2/");
IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity();
logger.log(Level.INFO, "granularity = " + granularity);
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
@ -59,42 +30,15 @@ class ArxivClientTest {
// ArXiv wants us to wait 20 secs between *every* HTTP request, so we must wait here
logger.log(Level.INFO,"waiting 20 seconds");
Thread.sleep(20 * 1000L);
ListRecordsRequest listRecordsRequest = client.newListRecordsRequest();
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
listRecordsRequest.setFrom(Instant.parse("2016-11-01T00:00:00Z"));
listRecordsRequest.setUntil(Instant.parse("2016-11-02T00:00:00Z"));
listRecordsRequest.setMetadataPrefix("arXiv");
Handler handler = new Handler();
File file = File.createTempFile("arxiv.", ".xml");
file.deleteOnExit();
FileWriter fileWriter = new FileWriter(file);
while (listRecordsRequest != null) {
try {
listRecordsRequest.addHandler(handler);
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
logger.log(Level.INFO,"sending " + listRecordsRequest.getURL());
request = Request.get()
.url(listRecordsRequest.getURL())
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setResponseListener(resp -> {
listRecordsResponse.receivedResponse(resp, fileWriter);
logger.log(Level.FINE, "response headers = " + resp.getHeaders() +
" resumption-token = " + listRecordsResponse.getResumptionToken());
})
.build();
httpClient.execute(request).get();
listRecordsRequest = client.resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (IOException e) {
logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null;
}
}
oaiClient.listRecords("arXiv", null,
dateTimeFormatter, Instant.parse("2016-11-01T00:00:00Z"), Instant.parse("2016-11-02T00:00:00Z"), fileWriter, handler);
fileWriter.close();
logger.log(Level.INFO, "count = " + handler.count());
assertTrue(handler.count() > 0L);
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage(), e);
}
}
static class Handler extends SimpleMetadataHandler {

View file

@ -1,23 +1,13 @@
package org.xbib.oai.client;
import io.netty.handler.codec.http.HttpHeaderNames;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.xbib.marc.Marc;
import org.xbib.marc.json.MarcJsonWriter;
import org.xbib.marc.xml.MarcContentHandler;
import org.xbib.net.URL;
import org.xbib.netty.http.client.Client;
import org.xbib.netty.http.client.api.Request;
import org.xbib.oai.client.identify.IdentifyRequest;
import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.client.listrecords.ListRecordsRequest;
import org.xbib.oai.client.listrecords.ListRecordsResponse;
import org.xbib.oai.exceptions.OAIException;
import java.io.IOException;
import java.io.StringWriter;
import java.net.ConnectException;
import java.nio.charset.StandardCharsets;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
@ -25,68 +15,29 @@ import java.util.EnumSet;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
*/
class BundeskunsthalleTest {
private static final Logger logger = Logger.getLogger(BundeskunsthalleTest.class.getName());
@Test
@Disabled("takes long time")
void testListRecords() {
URL url = URL.create("https://www.bundeskunsthalle.de/cgi-bin/bib/oai-pmh");
try (Client httpClient = Client.builder()
.setConnectTimeoutMillis(60 * 1000)
.setReadTimeoutMillis(60 * 1000)
.build();
OAIClient oaiClient = new OAIClient(url)) {
IdentifyRequest identifyRequest = oaiClient.newIdentifyRequest();
IdentifyResponse identifyResponse = new IdentifyResponse();
Request request = Request.get()
.url(identifyRequest.getURL())
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setFollowRedirect(true)
.setResponseListener(resp -> {
logger.log(Level.INFO,
"status = " + resp.getStatus() +
" body = " + resp.getBodyAsString(StandardCharsets.UTF_8));
StringWriter sw = new StringWriter();
identifyResponse.receivedResponse(resp, sw);
})
.build();
httpClient.execute(request).get();
// @Disabled("takes long time")
void testListRecords() throws Exception {
OAIClient oaiClient = new OAIClient("https://www.bundeskunsthalle.de/cgi-bin/bib/oai-pmh");
IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity();
logger.log(Level.INFO, "granularity = " + granularity);
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null;
ListRecordsRequest listRecordsRequest = oaiClient.newListRecordsRequest();
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
listRecordsRequest.setMetadataPrefix("marcxml");
try (MarcJsonWriter writer = new MarcJsonWriter("build/bk-bulk%d.jsonl", 1000,
EnumSet.of(MarcJsonWriter.Style.ELASTICSEARCH_BULK), 65536, false)
.setIndex("testindex", "testtype")) {
.setIndex("bk", "type")) {
writer.startDocument();
writer.beginCollection();
while (listRecordsRequest != null) {
try {
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
logger.log(Level.INFO, "sending " + listRecordsRequest.getURL());
request = Request.get()
.url(listRecordsRequest.getURL())
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setFollowRedirect(true)
.setTimeoutInMillis(60 * 1000)
.setResponseListener(resp -> {
logger.log(Level.FINE,
"status = " + resp.getStatus() +
" headers = " + resp.getHeaders() +
" resumptiontoken = " + listRecordsResponse.getResumptionToken());
StringWriter sw = new StringWriter();
listRecordsResponse.receivedResponse(resp, sw);
oaiClient.listRecords("marcxml", null,
dateTimeFormatter, null, null, inputStream -> {
try {
Marc.builder()
.setInputStream(resp.getBodyAsStream())
.setInputStream(inputStream)
.setCharset(StandardCharsets.UTF_8)
.setContentHandler(new MarcContentHandler()
.setFormat("MarcXML")
@ -98,23 +49,10 @@ class BundeskunsthalleTest {
} catch (IOException e) {
throw new OAIException("MARC parser exception: " + e.getMessage(), e);
}
})
.build();
httpClient.execute(request).get();
listRecordsRequest = oaiClient.resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (ConnectException e) {
logger.log(Level.WARNING, e.getMessage(), e);
} catch (IOException e) {
logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null;
}
}
});
writer.endCollection();
writer.endDocument();
}
logger.log(Level.INFO, "completed");
} catch (Exception e) {
logger.log(Level.WARNING, e.getMessage(), e);
}
}
}

View file

@ -1,88 +1,40 @@
package org.xbib.oai.client;
import io.netty.handler.codec.http.HttpHeaderNames;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
import org.xbib.net.URL;
import org.xbib.netty.http.client.Client;
import org.xbib.netty.http.client.api.Request;
import org.xbib.oai.client.identify.IdentifyRequest;
import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.client.listrecords.ListRecordsRequest;
import org.xbib.oai.client.listrecords.ListRecordsResponse;
import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringWriter;
import java.net.ConnectException;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
*/
class DNBClientTest {
private static final Logger logger = Logger.getLogger(DNBClientTest.class.getName());
@Test
void testBibdat() {
URL url = URL.create("http://services.dnb.de/oai/repository");
try (Client httpClient = Client.builder()
.setConnectTimeoutMillis(60 * 1000)
.setReadTimeoutMillis(60 * 1000)
.build();
OAIClient oaiClient = new OAIClient(url)) {
IdentifyRequest identifyRequest = oaiClient.newIdentifyRequest();
IdentifyResponse identifyResponse = new IdentifyResponse();
Request request = Request.get()
.url(identifyRequest.getURL())
.setResponseListener(resp -> {
logger.log(Level.INFO, resp.getBodyAsString(StandardCharsets.UTF_8));
StringWriter sw = new StringWriter();
identifyResponse.receivedResponse(resp, sw);
})
.build();
httpClient.execute(request).get();
void testBibdat() throws Exception {
OAIClient oaiClient = new OAIClient("http://services.dnb.de/oai/repository");
IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity();
logger.log(Level.INFO, "granularity = " + granularity);
ListRecordsRequest listRecordsRequest = oaiClient.newListRecordsRequest();
listRecordsRequest.setFrom(Instant.parse("2016-01-01T00:00:00Z"));
listRecordsRequest.setUntil(Instant.parse("2016-01-10T00:00:00Z"));
listRecordsRequest.setSet("bib");
listRecordsRequest.setMetadataPrefix("PicaPlus-xml");
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("UTC")) : null;
Handler handler = new Handler();
File file = new File("build/dnb-bib-pica.xml");
FileWriter fileWriter = new FileWriter(file);
while (listRecordsRequest != null) {
try {
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
listRecordsRequest.addHandler(handler);
request = Request.get()
.url(listRecordsRequest.getURL())
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setResponseListener(resp -> listRecordsResponse.receivedResponse(resp, fileWriter))
.build();
httpClient.execute(request).get();
listRecordsRequest = oaiClient.resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
} catch (ConnectException e) {
logger.log(Level.WARNING, e.getMessage(), e);
} catch (IOException e) {
logger.log(Level.SEVERE, e.getMessage(), e);
listRecordsRequest = null;
try (FileWriter fileWriter = new FileWriter(file)) {
oaiClient.listRecords("PicaPlus-xml", "bib",
dateTimeFormatter, Instant.parse("2016-01-01T00:00:00Z"), Instant.parse("2016-01-10T00:00:00Z"),
fileWriter, handler);
}
}
fileWriter.close();
logger.log(Level.INFO, "count=" + handler.count());
assertTrue(handler.count() > 0);
} catch (Exception e) {
logger.log(Level.SEVERE, "skipped, HTTP exception");
}
}
static class Handler extends SimpleMetadataHandler {

View file

@ -1,18 +1,9 @@
package org.xbib.oai.client;
import io.netty.handler.codec.http.HttpHeaderNames;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.xbib.net.URL;
import org.xbib.netty.http.client.Client;
import org.xbib.netty.http.client.api.Request;
import org.xbib.oai.client.identify.IdentifyRequest;
import org.xbib.oai.client.identify.IdentifyResponse;
import org.xbib.oai.client.listrecords.ListRecordsRequest;
import org.xbib.oai.client.listrecords.ListRecordsResponse;
import org.xbib.oai.xml.SimpleMetadataHandler;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.file.Files;
import java.nio.file.Paths;
@ -23,66 +14,28 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
*/
import static org.junit.jupiter.api.Assertions.assertTrue;
class DOAJClientTest {
private static final Logger logger = Logger.getLogger(DOAJClientTest.class.getName());
@Test
@Disabled("takes long time")
void testListRecordsDOAJ() {
URL url = URL.create("https://doaj.org/oai");
try (Client httpClient = Client.builder()
.setConnectTimeoutMillis(60 * 1000)
.setReadTimeoutMillis(60 * 1000)
.build();
OAIClient oaiClient = new OAIClient(url)) {
IdentifyRequest identifyRequest = oaiClient.newIdentifyRequest();
IdentifyResponse identifyResponse = new IdentifyResponse();
Request request = Request.get()
.url(url.resolve(identifyRequest.getURL()))
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setResponseListener(resp -> {
StringWriter sw = new StringWriter();
identifyResponse.receivedResponse(resp, sw);
})
.build();
httpClient.execute(request).get();
void testListRecordsDOAJ() throws Exception {
OAIClient oaiClient = new OAIClient("https://doaj.org/oai");
IdentifyResponse identifyResponse = oaiClient.identify();
String granularity = identifyResponse.getGranularity();
logger.log(Level.INFO, "granularity = " + granularity);
DateTimeFormatter dateTimeFormatter = "YYYY-MM-DD".equals(granularity) ?
DateTimeFormatter.ofPattern("yyyy-MM-dd").withZone(ZoneId.of("GMT")) : null;
ListRecordsRequest listRecordsRequest = oaiClient.newListRecordsRequest();
listRecordsRequest.setDateTimeFormatter(dateTimeFormatter);
listRecordsRequest.setFrom(Instant.parse("2008-01-01T00:00:00Z"));
listRecordsRequest.setUntil(Instant.parse("2018-01-01T00:00:00Z"));
listRecordsRequest.setMetadataPrefix("oai_dc");
Handler handler = new Handler();
try (Writer writer = Files.newBufferedWriter(Paths.get("build/doaj.xml"))) {
while (listRecordsRequest != null) {
ListRecordsResponse listRecordsResponse = new ListRecordsResponse(listRecordsRequest);
listRecordsRequest.addHandler(handler);
logger.log(Level.INFO, "sending " + listRecordsRequest.getURL());
request = Request.get()
.url(url.resolve(listRecordsRequest.getURL()))
.addHeader(HttpHeaderNames.ACCEPT.toString(), "utf-8")
.setResponseListener(resp -> {
listRecordsResponse.receivedResponse(resp, writer);
logger.log(Level.FINE, "response headers = " + resp.getHeaders() +
" resumption-token = {}" + listRecordsResponse.getResumptionToken());
})
.build();
httpClient.execute(request).get();
listRecordsRequest = oaiClient.resume(listRecordsRequest, listRecordsResponse.getResumptionToken());
}
oaiClient.listRecords("oai_dc", null,
dateTimeFormatter,Instant.parse("2021-01-01T00:00:00Z"), Instant.parse("2021-02-01T00:00:00Z"),
writer, handler);
}
logger.log(Level.INFO, "count = " + handler.count());
} catch (Exception e) {
logger.log(Level.SEVERE, e.getMessage(), e);
}
assertTrue(handler.count() > 0);
}
static class Handler extends SimpleMetadataHandler {